golex

package module

v0.1.3-alpha Latest Latest Go to latest Published: Aug 26, 2024 License: GPL-3.0 Imports: 8 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/cornejong/golex

README ¶

Golex

A lexing and parsing toolkit for go

Features

Multiple Tokenizers: Supports built-in tokenizers for comments, literals, numbers, booleans, strings, and symbols, with the ability to add custom tokenizers.
Flexible Lexer Options: Configure the lexer with options like retaining whitespace or customizing keyword sets.

WIP

This package is still a work-in-progress. The lexer is pretty much there but the parsing tools are not completely implemented.

Installation

go get github.com/cornejong/golex

Include the library in your project:

import "github.com/cornejong/golex"

Usage

Basic Example

Here's an example of how to use the lexer to tokenize a simple source string:

package main

import (
    "fmt"
    "github.com/cornejong/golex"
)

func main() {
    source := `func() { test = "SomeStringValue"; test = 1.2; test = 88 }`
    lexer := golex.NewLexer()

    for token, err := range lexer.Iterate(source) {
        if err != nil {
            fmt.Println(err)
            os.Exit(1)
        }

        token.Dump()
    }
}

// Output:
//   1:   2 -> Symbol                func                  (<nil>)
//   1:   6 -> OpenParenthesis       (                     (<nil>)
//   1:   7 -> CloseParenthesis      )                     (<nil>)
//   1:   9 -> OpenCurlyBracket      {                     (<nil>)
//   1:  11 -> Symbol                test                  (<nil>)
//   1:  16 -> Assign                =                     (<nil>)
//   1:  18 -> DoubleQuoteString     "SomeStringValue"     (SomeStringValue)
//   1:  35 -> Semicolon             ;                     (<nil>)
//   1:  37 -> Symbol                test                  (<nil>)
//   1:  42 -> Assign                =                     (<nil>)
//   1:  44 -> Float                 1.2                   (1.2)
//   1:  47 -> Semicolon             ;                     (<nil>)
//   1:  49 -> Symbol                test                  (<nil>)
//   1:  54 -> Assign                =                     (<nil>)
//   1:  56 -> Integer               88                    (88)
//   1:  59 -> CloseCurlyBracket     }                     (<nil>)
//   1:  60 -> EndOfFile                                   (<nil>)

Lexer Options

lexer := NewLexer(
    // Print each token as it is parsed 
    DebugPrintTokens(),

    // Don't add the token position to the token
    OmitTokenPosition(),

    // Ignore specific tokens. Tokens will be parsed but lexer.NextToken will be returned
    IgnoreTokens(TypeComment),

    // Retain whitespace tokens
    RetainWhitespace(),

    // Turn symbols into keyword tokens
    WithKeywords("func", "const", "def"),

    // Specify the symbol character maps
    // - arg1: the start character of a symbol
    // - arg2: the continuation of the symbol
    SymbolCharacterMap("a-zA-Z_", "a-zA-Z0-9_"),

    // Register a custom tokenizer
    WithTokenizer(InsertBefore(TypeStringTokenizer, TokenizerType("MyCustomTokenizer"), MyCustomTokenizer{})),

    // Extend the literal tokens
    WithLiteralTokens(LiteralToken{Type: Type("MyLiteralToken", Literal: "__!__")}),

    // unset a build-in literal token
    WithoutLiteralTokens(TypeEllipses, TypeSemicolon),

    // Add a comment syntax
    WithCommentSyntax(CommentSyntax{Opener: "#"}, CommentSyntax{Opener: "/*", Closer: "*/"}),

    // Unset a build-in comment syntax
    WithoutCommentSyntax(CommentSyntax{Opener: "//"}),

    // Add a string enclosure
    WithStringEnclosure(StringEnclosure{Enclosure: "```"}),
    
    // Unset a build-in enclosure
    WithoutStringEnclosure(StringEnclosure{Enclosure: "\""})
)

Tokens

type Token struct {
    // The token Type
    Type     TokenType
    // The literal representation of the token
    Literal  string
    // The parsed value (if available)
    // Currently just for strings, numbers and booleans
    Value    any
    // The token Position within the source
    Position Position
}

Build-in Types

All basic token types are build-in and can be unset or extended using the lexer options. For a full list of build-in types check build_in_types.go

TODO:

Better lexer errors with positional info
[ ]

Documentation ¶

Index ¶

Variables
type BooleanTokenizer
- func (b BooleanTokenizer) CanTokenize(l *Lexer) bool
- func (b BooleanTokenizer) Tokenize(l *Lexer) (Token, error)
type BuildInType
- func (bit BuildInType) String() string
type CommentSyntax
type CommentTokenizer
- func (c CommentTokenizer) CanTokenize(l *Lexer) bool
- func (c CommentTokenizer) Tokenize(l *Lexer) (Token, error)
type Diff
- func (d Diff) String() string
type Differ
- func (d *Differ) Compare(expected, got any)
- func (d *Differ) HasDifference() bool
- func (d *Differ) String() string
type Lexer
- func NewLexer(options ...LexerOptionFunc) *Lexer
- func (l *Lexer) CharAtCursor() rune
- func (l *Lexer) CharAtPosition(pos int) rune
- func (l *Lexer) CharAtRelativePosition(pos int) rune
- func (l *Lexer) CollectAnyTokenDelimited(delimiter TokenType) (Tokens, error)
- func (l *Lexer) CollectTokensBetween(open TokenType, close TokenType) (Tokens, int, int, error)
- func (l *Lexer) CollectTokensBetweenCurlyBraces() (Tokens, int, int, error)
- func (l *Lexer) CollectTokensBetweenParentheses() (Tokens, int, int, error)
- func (l *Lexer) CollectTokensDelimited(tokenType TokenType, delimiter TokenType) (Tokens, error)
- func (l Lexer) CurrentToken() Token
- func (l *Lexer) CursorIsOutOfBounds() bool
- func (l Lexer) GetCurrentLine() (int, int)
- func (l *Lexer) GetCursor() int
- func (l *Lexer) GetPosition() Position
- func (l *Lexer) GetSourceSubsString(start int, end int) string
- func (l *Lexer) GetState() State
- func (l *Lexer) IncrementCursor(amount int)
- func (l *Lexer) Iterate(content string) iter.Seq2[Token, error]
- func (l *Lexer) IterateAnyTokenDelimited(delimiter TokenType) iter.Seq2[Token, error]
- func (l *Lexer) IterateTokensBetween(open TokenType, close TokenType) (iter.Seq2[Token, error], *int, *int, error)
- func (l *Lexer) IterateTokensBetweenCurlyBraces() (iter.Seq2[Token, error], *int, *int, error)
- func (l *Lexer) IterateTokensBetweenParentheses() (iter.Seq2[Token, error], *int, *int, error)
- func (l *Lexer) IterateTokensDelimited(tokenType TokenType, delimiter TokenType) iter.Seq2[Token, error]
- func (l *Lexer) Lookahead(offset int) Token
- func (l *Lexer) LookaheadIterator(count int) iter.Seq[Token]
- func (l *Lexer) NextCharsAre(chars []rune) bool
- func (l *Lexer) NextToken() (Token, error)
- func (l *Lexer) NextTokenIs(token Token) bool
- func (l *Lexer) NextTokenIsAnyOf(tokens ...Token) bool
- func (l *Lexer) NextTokenSequenceIs(tokens ...Token) bool
- func (l Lexer) ReachedEOF() bool
- func (l *Lexer) RemoveTokenizer(tokenizerType TokenizerType)
- func (l *Lexer) SetCursor(cursor int)
- func (l *Lexer) SetState(state State)
- func (l *Lexer) SkipWhitespace()
- func (l *Lexer) TokenizeManual(content string)
- func (l *Lexer) TokenizeToSlice(content string) ([]Token, error)
type LexerError
- func NewLexerError(message string, position Position, cursor int, input []rune) *LexerError
- func (e *LexerError) Error() string
type LexerOptionFunc
- func DebugPrintTokens() LexerOptionFunc
- func IgnoreTokens(types ...TokenType) LexerOptionFunc
- func OmitTokenPosition() LexerOptionFunc
- func RetainWhitespace() LexerOptionFunc
- func SymbolCharacterMap(startCharMap, continueCharMap string) LexerOptionFunc
- func WithCommentSyntax(syntaxes ...CommentSyntax) LexerOptionFunc
- func WithKeywords(keywords ...string) LexerOptionFunc
- func WithLiteralTokens(literalTokens ...LiteralToken) LexerOptionFunc
- func WithStringEnclosure(enclosures ...StringEnclosure) LexerOptionFunc
- func WithTokenizer(inserter TokenizerInserter) LexerOptionFunc
- func WithoutCommentSyntax(syntaxes ...CommentSyntax) LexerOptionFunc
- func WithoutLiteralTokens(literalTokens ...TokenType) LexerOptionFunc
- func WithoutStringEnclosure(enclosures ...string) LexerOptionFunc
type LiteralToken
- func SortLiteralTokens(tokens []LiteralToken) []LiteralToken
type LiteralTokenizer
- func (t LiteralTokenizer) CanTokenize(l *Lexer) bool
- func (t LiteralTokenizer) Tokenize(l *Lexer) (Token, error)
type LiteralTokenizerCacheKey
type LookaheadCache
- func (lc *LookaheadCache) AddItem(token Token) bool
- func (lc *LookaheadCache) ContainsItems() bool
- func (lc *LookaheadCache) GetFirstItem() Token
- func (lc *LookaheadCache) GetItem(pos int) Token
- func (lc *LookaheadCache) ItemCount() int
- func (lc *LookaheadCache) PluckItem() Token
type NumberTokenizer
- func (n NumberTokenizer) CanTokenize(l *Lexer) bool
- func (n NumberTokenizer) Tokenize(l *Lexer) (Token, error)
type Position
- func (p Position) String() string
type State
- func NewState(content string) State
type StringEnclosure
- func (se StringEnclosure) Tokenize(l *Lexer) (Token, error)
- func (se StringEnclosure) TokenizeEscapable(l *Lexer) (Token, error)
- func (se StringEnclosure) TokenizeNotEscapableMultiChar(l *Lexer) (Token, error)
- func (se StringEnclosure) TokenizeNotEscapableSingleChar(l *Lexer) (Token, error)
type StringTokenizer
- func (s StringTokenizer) CanTokenize(l *Lexer) bool
- func (s StringTokenizer) Tokenize(l *Lexer) (Token, error)
type SymbolTokenizer
- func (s SymbolTokenizer) CanTokenize(l *Lexer) bool
- func (s SymbolTokenizer) Tokenize(l *Lexer) (Token, error)
type Token
- func (t *Token) AppendChar(char ...rune)
- func (t Token) Dump()
- func (t Token) Is(token Token) bool
- func (t Token) IsAnyOf(tokens ...Token) bool
- func (t Token) LiteralIs(literal string) bool
- func (t Token) LiteralIsAnyOf(literals ...string) bool
- func (t Token) TypeIs(tt TokenType) bool
- func (t Token) TypeIsAnyOf(tokenTypes ...TokenType) bool
type TokenCollection
- func NewTokenCollection(tokens Tokens) TokenCollection
- func (t *TokenCollection) CollectAnyTokensDelimited(delimiter TokenType) ([]Token, error)
- func (t *TokenCollection) CollectTokensBetween(open TokenType, close TokenType) (Tokens, int, int, error)
- func (t *TokenCollection) CollectTokensBetweenCurlyBraces() (Tokens, int, int, error)
- func (t *TokenCollection) CollectTokensBetweenParentheses() (Tokens, int, int, error)
- func (t *TokenCollection) CollectTokensDelimited(tokenType TokenType, delimiter TokenType) (Tokens, error)
- func (t *TokenCollection) CollectTokensUntil(delimiter TokenType) ([]Token, error)
- func (t TokenCollection) CursorIsOutOfBounds() bool
- func (t *TokenCollection) IncrementCursor(amount int)
- func (ti *TokenCollection) Iter() iter.Seq2[int, Token]
- func (t *TokenCollection) NextToken() Token
- func (t TokenCollection) ReachedEOF() bool
- func (t *TokenCollection) TokenAtCursor() Token
- func (t *TokenCollection) TokenAtPosition(pos int) Token
- func (t *TokenCollection) TokenAtRelativePosition(pos int) Token
type TokenType
type Tokenizer
type TokenizerInserter
- func InsertAfter(after TokenizerType, tokenizerType TokenizerType, tokenizer Tokenizer) TokenizerInserter
- func InsertBefore(before TokenizerType, tokenizerType TokenizerType, tokenizer Tokenizer) TokenizerInserter
- func (ti TokenizerInserter) Insert(tokenizers map[TokenizerType]Tokenizer, tokenizationOrder []TokenizerType) (map[TokenizerType]Tokenizer, []TokenizerType)
type TokenizerType
type Tokens

Constants ¶

This section is empty.

Variables ¶

View Source

var (
	EOF rune = rune(byte(0x03))

	ErrNoEOFTokenType     error = errors.New("no eof token type specified. Specify an EOF token type or use build-in types")
	ErrNoInvalidTokenType error = errors.New("no eof token type specified. Specify an EOF token type or use build-in types")
)

View Source

var (
	SlashSingleLineCommentSyntax   = CommentSyntax{Opener: "//"}
	SlashMultilineCommentSyntax    = CommentSyntax{Opener: "/*", Closer: "*/"}
	HashtagSingleLineCommentSyntax = CommentSyntax{Opener: "#"}
)

Functions ¶

This section is empty.

Types ¶

type BooleanTokenizer ¶

type BooleanTokenizer struct{}

func (BooleanTokenizer) CanTokenize ¶

func (b BooleanTokenizer) CanTokenize(l *Lexer) bool

func (BooleanTokenizer) Tokenize ¶

func (b BooleanTokenizer) Tokenize(l *Lexer) (Token, error)

type BuildInType ¶

type BuildInType string

const (
	TypeSof     BuildInType = "StartOfFile"
	TypeEof     BuildInType = "EndOfFile"
	TypeInvalid BuildInType = "Invalid"

	TypeString               BuildInType = "String"
	TypeDoubleQuoteString    BuildInType = "DoubleQuoteString"
	TypeSingleQuoteString    BuildInType = "SingleQuoteString"
	TypeBacktickString       BuildInType = "BacktickString"
	TypeTripleBacktickString BuildInType = "TripleBacktickString"
	TypeNumber               BuildInType = "Number"
	TypeInteger              BuildInType = "Integer"
	TypeFloat                BuildInType = "Float"
	TypeBool                 BuildInType = "Boolean"
	TypeNull                 BuildInType = "Null"
	TypeNil                  BuildInType = "Nil"

	TypeComment    BuildInType = "Comment"
	TypeKeyword    BuildInType = "Keyword"
	TypeIdentifier BuildInType = "Identifier"
	TypeSymbol     BuildInType = "Symbol"

	TypePlus               BuildInType = "Plus"               // +
	TypeMinus              BuildInType = "Minus"              // -
	TypeMultiply           BuildInType = "Multiply"           // *
	TypeDivide             BuildInType = "Divide"             // /
	TypeModulo             BuildInType = "Modulo"             // %
	TypeAssign             BuildInType = "Assign"             // =
	TypeEqual              BuildInType = "Equal"              // ==
	TypeNotEqual           BuildInType = "NotEqual"           // !=
	TypeLessThan           BuildInType = "LessThan"           // <
	TypeGreaterThan        BuildInType = "GreaterThan"        // >
	TypeLessThanOrEqual    BuildInType = "LessThanOrEqual"    // <=
	TypeGreaterThanOrEqual BuildInType = "GreaterThanOrEqual" // >=
	TypeAnd                BuildInType = "And"                // &&
	TypeOr                 BuildInType = "Or"                 // ||
	TypeNot                BuildInType = "Not"                // !

	TypeOpenParen   BuildInType = "OpenParenthesis"    // (
	TypeCloseParen  BuildInType = "CloseParenthesis"   // )
	TypeOpenCurly   BuildInType = "OpenCurlyBracket"   // {
	TypeCloseCurly  BuildInType = "CloseCurlyBracket"  // }
	TypeOpenSquare  BuildInType = "OpenSquareBracket"  // [
	TypeCloseSquare BuildInType = "CloseSquareBracket" // ]
	TypeComma       BuildInType = "Comma"              // ,
	TypeDot         BuildInType = "Dot"                // .
	TypeColon       BuildInType = "Colon"              // :
	TypeSemicolon   BuildInType = "Semicolon"          // ;

	TypeArrowRight   BuildInType = "ArrowRight"   // ->
	TypeArrowLeft    BuildInType = "ArrowLeft"    // <-
	TypeQuestionMark BuildInType = "QuestionMark" // ?
	TypeTilde        BuildInType = "Tilde"        // ~
	TypeAmpersand    BuildInType = "Ampersand"    // &
	TypePipe         BuildInType = "Pipe"         // |
	TypeCaret        BuildInType = "Caret"        // ^
	TypeDollar       BuildInType = "Dollar"       // $
	TypeHash         BuildInType = "Hash"         // #
	TypeAt           BuildInType = "At"           // @
	TypeEllipses     BuildInType = "Ellipses"     //...

	TypeSpace          BuildInType = "Space"
	TypeTab            BuildInType = "Tab"
	TypeNewline        BuildInType = "Newline"
	TypeCarriageReturn BuildInType = "CarriageReturn"
	TypeFormFeed       BuildInType = "FormFeed"

	// AnyTokenType represents a wildcard for
	// token comparison using Token.Is()
	// and should never be returned by the lexer.
	AnyTokenType BuildInType = "AnyTokenType"
)

func (BuildInType) String ¶

func (bit BuildInType) String() string

type CommentSyntax ¶

type CommentSyntax struct {
	Opener string
	Closer string
}

type CommentTokenizer ¶

type CommentTokenizer struct{}

func (CommentTokenizer) CanTokenize ¶

func (c CommentTokenizer) CanTokenize(l *Lexer) bool

func (CommentTokenizer) Tokenize ¶

func (c CommentTokenizer) Tokenize(l *Lexer) (Token, error)

type Diff ¶

type Diff struct {
	Field  string
	Expect interface{}
	Got    interface{}
}

Diff stores the differences between two values

func (Diff) String ¶

func (d Diff) String() string

func (*Differ) Compare ¶

func (d *Differ) Compare(expected, got any)

Compare is the entry point for comparing two values

func (*Differ) HasDifference ¶

func (d *Differ) HasDifference() bool

func (*Differ) String ¶

func (d *Differ) String() string

type Lexer ¶

type Lexer struct {
	CommentTokenizer CommentTokenizer
	LiteralTokenizer LiteralTokenizer
	NumberTokenizer  NumberTokenizer
	BooleanTokenizer BooleanTokenizer
	SymbolTokenizer  SymbolTokenizer
	StringTokenizer  StringTokenizer

	// options
	LiteralTokens    []LiteralToken
	StringEnclosures []StringEnclosure
	CommentSyntaxes  []CommentSyntax
	Keywords         []string
	IgnoreTokens     []TokenType

	IgnoreWhitespace           bool
	IgnoreComments             bool
	UseBuiltinTypes            bool
	CheckForKeywords           bool
	SymbolStartCharacterMap    string
	SymbolContinueCharacterMap string
	DebugPrintTokens           bool
	OmitTokenPosition          bool
	// contains filtered or unexported fields
}

func NewLexer ¶

func NewLexer(options ...LexerOptionFunc) *Lexer

func (*Lexer) CharAtCursor ¶

func (l *Lexer) CharAtCursor() rune

CharAtCursor returns the rune at the current cursor position

func (*Lexer) CharAtPosition ¶

func (l *Lexer) CharAtPosition(pos int) rune

CharAtPosition returns the rune at the provided absolute position

func (*Lexer) CharAtRelativePosition ¶

func (l *Lexer) CharAtRelativePosition(pos int) rune

CharAtRelativePosition returns the rune at the relative position to the cursor

func (*Lexer) CollectAnyTokenDelimited ¶

func (l *Lexer) CollectAnyTokenDelimited(delimiter TokenType) (Tokens, error)

func (*Lexer) CollectTokensBetween ¶

func (l *Lexer) CollectTokensBetween(open TokenType, close TokenType) (Tokens, int, int, error)

CollectTokensBetween collects all the tokens between the open and close type starting at the offset start it assumes that the offset start contains the opening token. Nested openers and closers will be contained in the output tokens until the matching closer is found. In addition it returns the start and end cursor position for the collected portion

func (*Lexer) CollectTokensBetweenCurlyBraces ¶

func (l *Lexer) CollectTokensBetweenCurlyBraces() (Tokens, int, int, error)

CollectTokensBetweenCurlyBraced collects all the tokens between the opening and closing curly braces starting at the offset start It assumes that the offset start contains the opening curly brace. Nested curly braces will be contained in the output tokens until the matching closing curly brace to the start is found. In addition it returns the start and end cursor position for the collected portion

func (*Lexer) CollectTokensBetweenParentheses ¶

func (l *Lexer) CollectTokensBetweenParentheses() (Tokens, int, int, error)

CollectTokensBetweenParentheses collects all the tokens between the opening and closing parentheses starting at the offset start It assumes that the offset start contains the opening parenthesis. Nested parenthesis will be contained in the output tokens until the matching closing parenthesis to the start is found. In addition it returns the start and end cursor position for the collected portion

func (*Lexer) CollectTokensDelimited ¶

func (l *Lexer) CollectTokensDelimited(tokenType TokenType, delimiter TokenType) (Tokens, error)

func (Lexer) CurrentToken ¶

func (l Lexer) CurrentToken() Token

func (*Lexer) CursorIsOutOfBounds ¶

func (l *Lexer) CursorIsOutOfBounds() bool

func (Lexer) GetCurrentLine ¶

func (l Lexer) GetCurrentLine() (int, int)

func (*Lexer) GetCursor ¶

func (l *Lexer) GetCursor() int

func (*Lexer) GetPosition ¶

func (l *Lexer) GetPosition() Position

func (*Lexer) GetSourceSubsString ¶

func (l *Lexer) GetSourceSubsString(start int, end int) string

func (*Lexer) GetState ¶

func (l *Lexer) GetState() State

func (*Lexer) IncrementCursor ¶

func (l *Lexer) IncrementCursor(amount int)

func (*Lexer) Iterate ¶

func (l *Lexer) Iterate(content string) iter.Seq2[Token, error]

func (*Lexer) IterateAnyTokenDelimited ¶

func (l *Lexer) IterateAnyTokenDelimited(delimiter TokenType) iter.Seq2[Token, error]

func (*Lexer) IterateTokensBetween ¶

func (l *Lexer) IterateTokensBetween(open TokenType, close TokenType) (iter.Seq2[Token, error], *int, *int, error)

IterateTokensBetween returns an iterator that iterates over all the tokens between the open and close type starting at the offset start it assumes that the offset start contains the opening token. Nested openers and closers will be contained in the output tokens until the matching closer is found. In addition it returns the start and end cursor position for the iterated portion

func (*Lexer) IterateTokensBetweenCurlyBraces ¶

func (l *Lexer) IterateTokensBetweenCurlyBraces() (iter.Seq2[Token, error], *int, *int, error)

IterateTokensBetweenCurlyBraced returns an iterator that iterates over all the tokens between the opening and closing curly braces starting at the offset start It assumes that the offset start contains the opening curly brace. Nested curly braces will be contained in the output tokens until the matching closing curly brace to the start is found. In addition it returns the start and end cursor position for the iterated portion

func (*Lexer) IterateTokensBetweenParentheses ¶

func (l *Lexer) IterateTokensBetweenParentheses() (iter.Seq2[Token, error], *int, *int, error)

IterateTokensBetweenParentheses returns an iterator that iterates over all the tokens between the opening and closing parentheses starting at the offset start It assumes that the offset start contains the opening parenthesis. Nested parenthesis will be contained in the output tokens until the matching closing parenthesis to the start is found. In addition it returns the start and end cursor position for the iterated portion

func (*Lexer) IterateTokensDelimited ¶

func (l *Lexer) IterateTokensDelimited(tokenType TokenType, delimiter TokenType) iter.Seq2[Token, error]

func (*Lexer) Lookahead ¶

func (l *Lexer) Lookahead(offset int) Token

Lookahead returns the token at count offset from the cursor without consuming it

func (*Lexer) LookaheadIterator ¶

func (l *Lexer) LookaheadIterator(count int) iter.Seq[Token]

LookaheadIterator returns an iterator that iterates over the count number of tokens without consuming them

func (*Lexer) NextCharsAre ¶

func (l *Lexer) NextCharsAre(chars []rune) bool

NextCharsAre checks if the next chars from the cursor on match the provided chars without consuming them

func (*Lexer) NextToken ¶

func (l *Lexer) NextToken() (Token, error)

func (*Lexer) NextTokenIs ¶

func (l *Lexer) NextTokenIs(token Token) bool

NextTokenIs checks if the next token is the same as the provided token without consuming the token

func (*Lexer) NextTokenIsAnyOf ¶

func (l *Lexer) NextTokenIsAnyOf(tokens ...Token) bool

NextTokenIsAnyOf checks if the next token is of any of the provided tokens without consuming the token

func (*Lexer) NextTokenSequenceIs ¶

func (l *Lexer) NextTokenSequenceIs(tokens ...Token) bool

NextTokenSequenceIs checks if the next sequence of tokens in the lexer matches the provided token sequence without consuming the tokens

func (Lexer) ReachedEOF ¶

func (l Lexer) ReachedEOF() bool

func (*Lexer) RemoveTokenizer ¶

func (l *Lexer) RemoveTokenizer(tokenizerType TokenizerType)

func (*Lexer) SetCursor ¶

func (l *Lexer) SetCursor(cursor int)

func (*Lexer) SetState ¶

func (l *Lexer) SetState(state State)

func (*Lexer) SkipWhitespace ¶

func (l *Lexer) SkipWhitespace()

func (*Lexer) TokenizeManual ¶

func (l *Lexer) TokenizeManual(content string)

func (*Lexer) TokenizeToSlice ¶

func (l *Lexer) TokenizeToSlice(content string) ([]Token, error)

type LexerError ¶

type LexerError struct {
	Message  string
	Position Position
	Cursor   int
	Snippet  string
}

LexerError represents an error that occurred during lexical analysis.

func NewLexerError ¶

func NewLexerError(message string, position Position, cursor int, input []rune) *LexerError

Utility to create a new LexerError with a snippet from the input.

func (*LexerError) Error ¶

func (e *LexerError) Error() string

Error implements the error interface for LexerError

type LexerOptionFunc ¶

type LexerOptionFunc func(*Lexer)

func DebugPrintTokens ¶

func DebugPrintTokens() LexerOptionFunc

func IgnoreTokens ¶

func IgnoreTokens(types ...TokenType) LexerOptionFunc

func OmitTokenPosition ¶

func OmitTokenPosition() LexerOptionFunc

func RetainWhitespace ¶

func RetainWhitespace() LexerOptionFunc

func SymbolCharacterMap ¶

func SymbolCharacterMap(startCharMap, continueCharMap string) LexerOptionFunc

func WithCommentSyntax ¶

func WithCommentSyntax(syntaxes ...CommentSyntax) LexerOptionFunc

func WithKeywords ¶

func WithKeywords(keywords ...string) LexerOptionFunc

func WithLiteralTokens ¶

func WithLiteralTokens(literalTokens ...LiteralToken) LexerOptionFunc

func WithStringEnclosure ¶

func WithStringEnclosure(enclosures ...StringEnclosure) LexerOptionFunc

func WithTokenizer ¶

func WithTokenizer(inserter TokenizerInserter) LexerOptionFunc

func WithoutCommentSyntax ¶

func WithoutCommentSyntax(syntaxes ...CommentSyntax) LexerOptionFunc

func WithoutLiteralTokens ¶

func WithoutLiteralTokens(literalTokens ...TokenType) LexerOptionFunc

func WithoutStringEnclosure ¶

func WithoutStringEnclosure(enclosures ...string) LexerOptionFunc

type LiteralToken ¶

type LiteralToken struct {
	Type    TokenType
	Literal string
}

func SortLiteralTokens ¶

func SortLiteralTokens(tokens []LiteralToken) []LiteralToken

type LiteralTokenizer ¶

type LiteralTokenizer struct{}

func (LiteralTokenizer) CanTokenize ¶

func (t LiteralTokenizer) CanTokenize(l *Lexer) bool

func (LiteralTokenizer) Tokenize ¶

func (t LiteralTokenizer) Tokenize(l *Lexer) (Token, error)

type LiteralTokenizerCacheKey ¶

type LiteralTokenizerCacheKey string

type LookaheadCache ¶

type LookaheadCache struct {
	// contains filtered or unexported fields
}

func (*LookaheadCache) AddItem ¶

func (lc *LookaheadCache) AddItem(token Token) bool

func (*LookaheadCache) ContainsItems ¶

func (lc *LookaheadCache) ContainsItems() bool

func (*LookaheadCache) GetFirstItem ¶

func (lc *LookaheadCache) GetFirstItem() Token

func (*LookaheadCache) GetItem ¶

func (lc *LookaheadCache) GetItem(pos int) Token

func (*LookaheadCache) ItemCount ¶

func (lc *LookaheadCache) ItemCount() int

func (*LookaheadCache) PluckItem ¶

func (lc *LookaheadCache) PluckItem() Token

TODO: this does not check for out of bounds stuff.. TODO: Probably not what we want to do...

type NumberTokenizer ¶

type NumberTokenizer struct{}

func (NumberTokenizer) CanTokenize ¶

func (n NumberTokenizer) CanTokenize(l *Lexer) bool

func (NumberTokenizer) Tokenize ¶

func (n NumberTokenizer) Tokenize(l *Lexer) (Token, error)

type Position ¶

type Position struct {
	Row int
	Col int
}

################################################### # Position ###################################################

func (Position) String ¶

func (p Position) String() string

type State ¶

type State struct {
	Content       []rune
	ContentLength int
	Cursor        int

	CachedPositionCursor int
	CachedPosition       Position

	LineIndexes      []int
	LineIndexesCount int
	CurrentToken     *Token
	LookaheadCache   LookaheadCache
}

func NewState ¶

func NewState(content string) State

type StringEnclosure ¶

type StringEnclosure struct {
	Type      TokenType
	Enclosure string
	Escapable bool
}

var (
	DoubleQuoteStringEnclosure StringEnclosure = StringEnclosure{
		Type:      TypeDoubleQuoteString,
		Enclosure: "\"",
		Escapable: true,
	}
	SingleQuoteStringEnclosure StringEnclosure = StringEnclosure{
		Type:      TypeSingleQuoteString,
		Enclosure: "'",
	}
	BacktickStringEnclosure StringEnclosure = StringEnclosure{
		Type:      TypeBacktickString,
		Enclosure: "`",
	}
	TripleBacktickStringEnclosure StringEnclosure = StringEnclosure{
		Type:      TypeTripleBacktickString,
		Enclosure: "```",
	}
)

func (StringEnclosure) Tokenize ¶

func (se StringEnclosure) Tokenize(l *Lexer) (Token, error)

func (StringEnclosure) TokenizeEscapable ¶

func (se StringEnclosure) TokenizeEscapable(l *Lexer) (Token, error)

func (StringEnclosure) TokenizeNotEscapableMultiChar ¶

func (se StringEnclosure) TokenizeNotEscapableMultiChar(l *Lexer) (Token, error)

func (StringEnclosure) TokenizeNotEscapableSingleChar ¶

func (se StringEnclosure) TokenizeNotEscapableSingleChar(l *Lexer) (Token, error)

type StringTokenizer ¶

type StringTokenizer struct{}

func (StringTokenizer) CanTokenize ¶

func (s StringTokenizer) CanTokenize(l *Lexer) bool

func (StringTokenizer) Tokenize ¶

func (s StringTokenizer) Tokenize(l *Lexer) (Token, error)

type SymbolTokenizer ¶

type SymbolTokenizer struct{}

func (SymbolTokenizer) CanTokenize ¶

func (s SymbolTokenizer) CanTokenize(l *Lexer) bool

func (SymbolTokenizer) Tokenize ¶

func (s SymbolTokenizer) Tokenize(l *Lexer) (Token, error)

type Token ¶

type Token struct {
	Type     TokenType
	Literal  string
	Value    any
	Position Position
}

################################################### # Token ###################################################

func (*Token) AppendChar ¶

func (t *Token) AppendChar(char ...rune)

func (Token) Dump ¶

func (t Token) Dump()

func (Token) Is ¶

func (t Token) Is(token Token) bool

func (Token) IsAnyOf ¶

func (t Token) IsAnyOf(tokens ...Token) bool

func (Token) LiteralIs ¶

func (t Token) LiteralIs(literal string) bool

func (Token) LiteralIsAnyOf ¶

func (t Token) LiteralIsAnyOf(literals ...string) bool

func (Token) TypeIs ¶

func (t Token) TypeIs(tt TokenType) bool

func (Token) TypeIsAnyOf ¶

func (t Token) TypeIsAnyOf(tokenTypes ...TokenType) bool

type TokenCollection ¶

type TokenCollection struct {
	// contains filtered or unexported fields
}

TokenCollection represents an iterable collection of tokens

func NewTokenCollection ¶

func NewTokenCollection(tokens Tokens) TokenCollection

func (*TokenCollection) CollectAnyTokensDelimited ¶

func (t *TokenCollection) CollectAnyTokensDelimited(delimiter TokenType) ([]Token, error)

func (*TokenCollection) CollectTokensBetween ¶

func (t *TokenCollection) CollectTokensBetween(open TokenType, close TokenType) (Tokens, int, int, error)