golex

package module
v0.1.3-alpha Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Aug 26, 2024 License: GPL-3.0 Imports: 8 Imported by: 0

README

Golex

A lexing and parsing toolkit for go

Features

  • Multiple Tokenizers: Supports built-in tokenizers for comments, literals, numbers, booleans, strings, and symbols, with the ability to add custom tokenizers.
  • Flexible Lexer Options: Configure the lexer with options like retaining whitespace or customizing keyword sets.
WIP

This package is still a work-in-progress. The lexer is pretty much there but the parsing tools are not completely implemented.

Installation

go get github.com/cornejong/golex

Include the library in your project:

import "github.com/cornejong/golex"

Usage

Basic Example

Here's an example of how to use the lexer to tokenize a simple source string:

package main

import (
    "fmt"
    "github.com/cornejong/golex"
)

func main() {
    source := `func() { test = "SomeStringValue"; test = 1.2; test = 88 }`
    lexer := golex.NewLexer()

    for token, err := range lexer.Iterate(source) {
        if err != nil {
            fmt.Println(err)
            os.Exit(1)
        }

        token.Dump()
    }
}

// Output:
//   1:   2 -> Symbol                func                  (<nil>)
//   1:   6 -> OpenParenthesis       (                     (<nil>)
//   1:   7 -> CloseParenthesis      )                     (<nil>)
//   1:   9 -> OpenCurlyBracket      {                     (<nil>)
//   1:  11 -> Symbol                test                  (<nil>)
//   1:  16 -> Assign                =                     (<nil>)
//   1:  18 -> DoubleQuoteString     "SomeStringValue"     (SomeStringValue)
//   1:  35 -> Semicolon             ;                     (<nil>)
//   1:  37 -> Symbol                test                  (<nil>)
//   1:  42 -> Assign                =                     (<nil>)
//   1:  44 -> Float                 1.2                   (1.2)
//   1:  47 -> Semicolon             ;                     (<nil>)
//   1:  49 -> Symbol                test                  (<nil>)
//   1:  54 -> Assign                =                     (<nil>)
//   1:  56 -> Integer               88                    (88)
//   1:  59 -> CloseCurlyBracket     }                     (<nil>)
//   1:  60 -> EndOfFile                                   (<nil>)

Lexer Options

lexer := NewLexer(
    // Print each token as it is parsed 
    DebugPrintTokens(),

    // Don't add the token position to the token
    OmitTokenPosition(),

    // Ignore specific tokens. Tokens will be parsed but lexer.NextToken will be returned
    IgnoreTokens(TypeComment),

    // Retain whitespace tokens
    RetainWhitespace(),

    // Turn symbols into keyword tokens
    WithKeywords("func", "const", "def"),

    // Specify the symbol character maps
    // - arg1: the start character of a symbol
    // - arg2: the continuation of the symbol
    SymbolCharacterMap("a-zA-Z_", "a-zA-Z0-9_"),

    // Register a custom tokenizer
    WithTokenizer(InsertBefore(TypeStringTokenizer, TokenizerType("MyCustomTokenizer"), MyCustomTokenizer{})),

    // Extend the literal tokens
    WithLiteralTokens(LiteralToken{Type: Type("MyLiteralToken", Literal: "__!__")}),

    // unset a build-in literal token
    WithoutLiteralTokens(TypeEllipses, TypeSemicolon),

    // Add a comment syntax
    WithCommentSyntax(CommentSyntax{Opener: "#"}, CommentSyntax{Opener: "/*", Closer: "*/"}),

    // Unset a build-in comment syntax
    WithoutCommentSyntax(CommentSyntax{Opener: "//"}),

    // Add a string enclosure
    WithStringEnclosure(StringEnclosure{Enclosure: "```"}),
    
    // Unset a build-in enclosure
    WithoutStringEnclosure(StringEnclosure{Enclosure: "\""})
)

Tokens

type Token struct {
    // The token Type
    Type     TokenType
    // The literal representation of the token
    Literal  string
    // The parsed value (if available)
    // Currently just for strings, numbers and booleans
    Value    any
    // The token Position within the source
    Position Position
}

Build-in Types

All basic token types are build-in and can be unset or extended using the lexer options. For a full list of build-in types check build_in_types.go

TODO:

  • Better lexer errors with positional info
  • [ ]

Documentation

Index

Constants

This section is empty.

Variables

View Source
var (
	EOF rune = rune(byte(0x03))

	ErrNoEOFTokenType     error = errors.New("no eof token type specified. Specify an EOF token type or use build-in types")
	ErrNoInvalidTokenType error = errors.New("no eof token type specified. Specify an EOF token type or use build-in types")
)
View Source
var (
	SlashSingleLineCommentSyntax   = CommentSyntax{Opener: "//"}
	SlashMultilineCommentSyntax    = CommentSyntax{Opener: "/*", Closer: "*/"}
	HashtagSingleLineCommentSyntax = CommentSyntax{Opener: "#"}
)

Functions

This section is empty.

Types

type BooleanTokenizer

type BooleanTokenizer struct{}

func (BooleanTokenizer) CanTokenize

func (b BooleanTokenizer) CanTokenize(l *Lexer) bool

func (BooleanTokenizer) Tokenize

func (b BooleanTokenizer) Tokenize(l *Lexer) (Token, error)

type BuildInType

type BuildInType string
const (
	TypeSof     BuildInType = "StartOfFile"
	TypeEof     BuildInType = "EndOfFile"
	TypeInvalid BuildInType = "Invalid"

	TypeString               BuildInType = "String"
	TypeDoubleQuoteString    BuildInType = "DoubleQuoteString"
	TypeSingleQuoteString    BuildInType = "SingleQuoteString"
	TypeBacktickString       BuildInType = "BacktickString"
	TypeTripleBacktickString BuildInType = "TripleBacktickString"
	TypeNumber               BuildInType = "Number"
	TypeInteger              BuildInType = "Integer"
	TypeFloat                BuildInType = "Float"
	TypeBool                 BuildInType = "Boolean"
	TypeNull                 BuildInType = "Null"
	TypeNil                  BuildInType = "Nil"

	TypeComment    BuildInType = "Comment"
	TypeKeyword    BuildInType = "Keyword"
	TypeIdentifier BuildInType = "Identifier"
	TypeSymbol     BuildInType = "Symbol"

	TypePlus               BuildInType = "Plus"               // +
	TypeMinus              BuildInType = "Minus"              // -
	TypeMultiply           BuildInType = "Multiply"           // *
	TypeDivide             BuildInType = "Divide"             // /
	TypeModulo             BuildInType = "Modulo"             // %
	TypeAssign             BuildInType = "Assign"             // =
	TypeEqual              BuildInType = "Equal"              // ==
	TypeNotEqual           BuildInType = "NotEqual"           // !=
	TypeLessThan           BuildInType = "LessThan"           // <
	TypeGreaterThan        BuildInType = "GreaterThan"        // >
	TypeLessThanOrEqual    BuildInType = "LessThanOrEqual"    // <=
	TypeGreaterThanOrEqual BuildInType = "GreaterThanOrEqual" // >=
	TypeAnd                BuildInType = "And"                // &&
	TypeOr                 BuildInType = "Or"                 // ||
	TypeNot                BuildInType = "Not"                // !

	TypeOpenParen   BuildInType = "OpenParenthesis"    // (
	TypeCloseParen  BuildInType = "CloseParenthesis"   // )
	TypeOpenCurly   BuildInType = "OpenCurlyBracket"   // {
	TypeCloseCurly  BuildInType = "CloseCurlyBracket"  // }
	TypeOpenSquare  BuildInType = "OpenSquareBracket"  // [
	TypeCloseSquare BuildInType = "CloseSquareBracket" // ]
	TypeComma       BuildInType = "Comma"              // ,
	TypeDot         BuildInType = "Dot"                // .
	TypeColon       BuildInType = "Colon"              // :
	TypeSemicolon   BuildInType = "Semicolon"          // ;

	TypeArrowRight   BuildInType = "ArrowRight"   // ->
	TypeArrowLeft    BuildInType = "ArrowLeft"    // <-
	TypeQuestionMark BuildInType = "QuestionMark" // ?
	TypeTilde        BuildInType = "Tilde"        // ~
	TypeAmpersand    BuildInType = "Ampersand"    // &
	TypePipe         BuildInType = "Pipe"         // |
	TypeCaret        BuildInType = "Caret"        // ^
	TypeDollar       BuildInType = "Dollar"       // $
	TypeHash         BuildInType = "Hash"         // #
	TypeAt           BuildInType = "At"           // @
	TypeEllipses     BuildInType = "Ellipses"     //...

	TypeSpace          BuildInType = "Space"
	TypeTab            BuildInType = "Tab"
	TypeNewline        BuildInType = "Newline"
	TypeCarriageReturn BuildInType = "CarriageReturn"
	TypeFormFeed       BuildInType = "FormFeed"

	// AnyTokenType represents a wildcard for
	// token comparison using Token.Is()
	// and should never be returned by the lexer.
	AnyTokenType BuildInType = "AnyTokenType"
)

func (BuildInType) String

func (bit BuildInType) String() string

type CommentSyntax

type CommentSyntax struct {
	Opener string
	Closer string
}

type CommentTokenizer

type CommentTokenizer struct{}

func (CommentTokenizer) CanTokenize

func (c CommentTokenizer) CanTokenize(l *Lexer) bool

func (CommentTokenizer) Tokenize

func (c CommentTokenizer) Tokenize(l *Lexer) (Token, error)

type Diff

type Diff struct {
	Field  string
	Expect interface{}
	Got    interface{}
}

Diff stores the differences between two values

func (Diff) String

func (d Diff) String() string

type Differ

type Differ struct {
	Diffs []Diff
}

Differ is the main diff engine

func (*Differ) Compare

func (d *Differ) Compare(expected, got any)

Compare is the entry point for comparing two values

func (*Differ) HasDifference

func (d *Differ) HasDifference() bool

func (*Differ) String

func (d *Differ) String() string

type Lexer

type Lexer struct {
	CommentTokenizer CommentTokenizer
	LiteralTokenizer LiteralTokenizer
	NumberTokenizer  NumberTokenizer
	BooleanTokenizer BooleanTokenizer
	SymbolTokenizer  SymbolTokenizer
	StringTokenizer  StringTokenizer

	// options
	LiteralTokens    []LiteralToken
	StringEnclosures []StringEnclosure
	CommentSyntaxes  []CommentSyntax
	Keywords         []string
	IgnoreTokens     []TokenType

	IgnoreWhitespace           bool
	IgnoreComments             bool
	UseBuiltinTypes            bool
	CheckForKeywords           bool
	SymbolStartCharacterMap    string
	SymbolContinueCharacterMap string
	DebugPrintTokens           bool
	OmitTokenPosition          bool
	// contains filtered or unexported fields
}

func NewLexer

func NewLexer(options ...LexerOptionFunc) *Lexer

func (*Lexer) CharAtCursor

func (l *Lexer) CharAtCursor() rune

CharAtCursor returns the rune at the current cursor position

func (*Lexer) CharAtPosition

func (l *Lexer) CharAtPosition(pos int) rune

CharAtPosition returns the rune at the provided absolute position

func (*Lexer) CharAtRelativePosition

func (l *Lexer) CharAtRelativePosition(pos int) rune

CharAtRelativePosition returns the rune at the relative position to the cursor

func (*Lexer) CollectAnyTokenDelimited

func (l *Lexer) CollectAnyTokenDelimited(delimiter TokenType) (Tokens, error)

func (*Lexer) CollectTokensBetween

func (l *Lexer) CollectTokensBetween(open TokenType, close TokenType) (Tokens, int, int, error)

CollectTokensBetween collects all the tokens between the open and close type starting at the offset start it assumes that the offset start contains the opening token. Nested openers and closers will be contained in the output tokens until the matching closer is found. In addition it returns the start and end cursor position for the collected portion

func (*Lexer) CollectTokensBetweenCurlyBraces

func (l *Lexer) CollectTokensBetweenCurlyBraces() (Tokens, int, int, error)

CollectTokensBetweenCurlyBraced collects all the tokens between the opening and closing curly braces starting at the offset start It assumes that the offset start contains the opening curly brace. Nested curly braces will be contained in the output tokens until the matching closing curly brace to the start is found. In addition it returns the start and end cursor position for the collected portion

func (*Lexer) CollectTokensBetweenParentheses

func (l *Lexer) CollectTokensBetweenParentheses() (Tokens, int, int, error)

CollectTokensBetweenParentheses collects all the tokens between the opening and closing parentheses starting at the offset start It assumes that the offset start contains the opening parenthesis. Nested parenthesis will be contained in the output tokens until the matching closing parenthesis to the start is found. In addition it returns the start and end cursor position for the collected portion

func (*Lexer) CollectTokensDelimited

func (l *Lexer) CollectTokensDelimited(tokenType TokenType, delimiter TokenType) (Tokens, error)

func (Lexer) CurrentToken

func (l Lexer) CurrentToken() Token

func (*Lexer) CursorIsOutOfBounds

func (l *Lexer) CursorIsOutOfBounds() bool

func (Lexer) GetCurrentLine

func (l Lexer) GetCurrentLine() (int, int)

func (*Lexer) GetCursor

func (l *Lexer) GetCursor() int

func (*Lexer) GetPosition

func (l *Lexer) GetPosition() Position

func (*Lexer) GetSourceSubsString

func (l *Lexer) GetSourceSubsString(start int, end int) string

func (*Lexer) GetState

func (l *Lexer) GetState() State

func (*Lexer) IncrementCursor

func (l *Lexer) IncrementCursor(amount int)

func (*Lexer) Iterate

func (l *Lexer) Iterate(content string) iter.Seq2[Token, error]

func (*Lexer) IterateAnyTokenDelimited

func (l *Lexer) IterateAnyTokenDelimited(delimiter TokenType) iter.Seq2[Token, error]

func (*Lexer) IterateTokensBetween

func (l *Lexer) IterateTokensBetween(open TokenType, close TokenType) (iter.Seq2[Token, error], *int, *int, error)

IterateTokensBetween returns an iterator that iterates over all the tokens between the open and close type starting at the offset start it assumes that the offset start contains the opening token. Nested openers and closers will be contained in the output tokens until the matching closer is found. In addition it returns the start and end cursor position for the iterated portion

func (*Lexer) IterateTokensBetweenCurlyBraces

func (l *Lexer) IterateTokensBetweenCurlyBraces() (iter.Seq2[Token, error], *int, *int, error)

IterateTokensBetweenCurlyBraced returns an iterator that iterates over all the tokens between the opening and closing curly braces starting at the offset start It assumes that the offset start contains the opening curly brace. Nested curly braces will be contained in the output tokens until the matching closing curly brace to the start is found. In addition it returns the start and end cursor position for the iterated portion

func (*Lexer) IterateTokensBetweenParentheses

func (l *Lexer) IterateTokensBetweenParentheses() (iter.Seq2[Token, error], *int, *int, error)

IterateTokensBetweenParentheses returns an iterator that iterates over all the tokens between the opening and closing parentheses starting at the offset start It assumes that the offset start contains the opening parenthesis. Nested parenthesis will be contained in the output tokens until the matching closing parenthesis to the start is found. In addition it returns the start and end cursor position for the iterated portion

func (*Lexer) IterateTokensDelimited

func (l *Lexer) IterateTokensDelimited(tokenType TokenType, delimiter TokenType) iter.Seq2[Token, error]

func (*Lexer) Lookahead

func (l *Lexer) Lookahead(offset int) Token

Lookahead returns the token at count offset from the cursor without consuming it

func (*Lexer) LookaheadIterator

func (l *Lexer) LookaheadIterator(count int) iter.Seq[Token]

LookaheadIterator returns an iterator that iterates over the count number of tokens without consuming them

func (*Lexer) NextCharsAre

func (l *Lexer) NextCharsAre(chars []rune) bool

NextCharsAre checks if the next chars from the cursor on match the provided chars without consuming them

func (*Lexer) NextToken

func (l *Lexer) NextToken() (Token, error)

func (*Lexer) NextTokenIs

func (l *Lexer) NextTokenIs(token Token) bool

NextTokenIs checks if the next token is the same as the provided token without consuming the token

func (*Lexer) NextTokenIsAnyOf

func (l *Lexer) NextTokenIsAnyOf(tokens ...Token) bool

NextTokenIsAnyOf checks if the next token is of any of the provided tokens without consuming the token

func (*Lexer) NextTokenSequenceIs

func (l *Lexer) NextTokenSequenceIs(tokens ...Token) bool

NextTokenSequenceIs checks if the next sequence of tokens in the lexer matches the provided token sequence without consuming the tokens

func (Lexer) ReachedEOF

func (l Lexer) ReachedEOF() bool

func (*Lexer) RemoveTokenizer

func (l *Lexer) RemoveTokenizer(tokenizerType TokenizerType)

func (*Lexer) SetCursor

func (l *Lexer) SetCursor(cursor int)

func (*Lexer) SetState

func (l *Lexer) SetState(state State)

func (*Lexer) SkipWhitespace

func (l *Lexer) SkipWhitespace()

func (*Lexer) TokenizeManual

func (l *Lexer) TokenizeManual(content string)

func (*Lexer) TokenizeToSlice

func (l *Lexer) TokenizeToSlice(content string) ([]Token, error)

type LexerError

type LexerError struct {
	Message  string
	Position Position
	Cursor   int
	Snippet  string
}

LexerError represents an error that occurred during lexical analysis.

func NewLexerError

func NewLexerError(message string, position Position, cursor int, input []rune) *LexerError

Utility to create a new LexerError with a snippet from the input.

func (*LexerError) Error

func (e *LexerError) Error() string

Error implements the error interface for LexerError

type LexerOptionFunc

type LexerOptionFunc func(*Lexer)

func DebugPrintTokens

func DebugPrintTokens() LexerOptionFunc

func IgnoreTokens

func IgnoreTokens(types ...TokenType) LexerOptionFunc

func OmitTokenPosition

func OmitTokenPosition() LexerOptionFunc

func RetainWhitespace

func RetainWhitespace() LexerOptionFunc

func SymbolCharacterMap

func SymbolCharacterMap(startCharMap, continueCharMap string) LexerOptionFunc

func WithCommentSyntax

func WithCommentSyntax(syntaxes ...CommentSyntax) LexerOptionFunc

func WithKeywords

func WithKeywords(keywords ...string) LexerOptionFunc

func WithLiteralTokens

func WithLiteralTokens(literalTokens ...LiteralToken) LexerOptionFunc

func WithStringEnclosure

func WithStringEnclosure(enclosures ...StringEnclosure) LexerOptionFunc

func WithTokenizer

func WithTokenizer(inserter TokenizerInserter) LexerOptionFunc

func WithoutCommentSyntax

func WithoutCommentSyntax(syntaxes ...CommentSyntax) LexerOptionFunc

func WithoutLiteralTokens

func WithoutLiteralTokens(literalTokens ...TokenType) LexerOptionFunc

func WithoutStringEnclosure

func WithoutStringEnclosure(enclosures ...string) LexerOptionFunc

type LiteralToken

type LiteralToken struct {
	Type    TokenType
	Literal string
}

func SortLiteralTokens

func SortLiteralTokens(tokens []LiteralToken) []LiteralToken

type LiteralTokenizer

type LiteralTokenizer struct{}

func (LiteralTokenizer) CanTokenize

func (t LiteralTokenizer) CanTokenize(l *Lexer) bool

func (LiteralTokenizer) Tokenize

func (t LiteralTokenizer) Tokenize(l *Lexer) (Token, error)

type LiteralTokenizerCacheKey

type LiteralTokenizerCacheKey string

type LookaheadCache

type LookaheadCache struct {
	// contains filtered or unexported fields
}

func (*LookaheadCache) AddItem

func (lc *LookaheadCache) AddItem(token Token) bool

func (*LookaheadCache) ContainsItems

func (lc *LookaheadCache) ContainsItems() bool

func (*LookaheadCache) GetFirstItem

func (lc *LookaheadCache) GetFirstItem() Token

func (*LookaheadCache) GetItem

func (lc *LookaheadCache) GetItem(pos int) Token

func (*LookaheadCache) ItemCount

func (lc *LookaheadCache) ItemCount() int

func (*LookaheadCache) PluckItem

func (lc *LookaheadCache) PluckItem() Token

TODO: this does not check for out of bounds stuff.. TODO: Probably not what we want to do...

type NumberTokenizer

type NumberTokenizer struct{}

func (NumberTokenizer) CanTokenize

func (n NumberTokenizer) CanTokenize(l *Lexer) bool

func (NumberTokenizer) Tokenize

func (n NumberTokenizer) Tokenize(l *Lexer) (Token, error)

type Position

type Position struct {
	Row int
	Col int
}

################################################### # Position ###################################################

func (Position) String

func (p Position) String() string

type State

type State struct {
	Content       []rune
	ContentLength int
	Cursor        int

	CachedPositionCursor int
	CachedPosition       Position

	LineIndexes      []int
	LineIndexesCount int
	CurrentToken     *Token
	LookaheadCache   LookaheadCache
}

func NewState

func NewState(content string) State

type StringEnclosure

type StringEnclosure struct {
	Type      TokenType
	Enclosure string
	Escapable bool
}
var (
	DoubleQuoteStringEnclosure StringEnclosure = StringEnclosure{
		Type:      TypeDoubleQuoteString,
		Enclosure: "\"",
		Escapable: true,
	}
	SingleQuoteStringEnclosure StringEnclosure = StringEnclosure{
		Type:      TypeSingleQuoteString,
		Enclosure: "'",
	}
	BacktickStringEnclosure StringEnclosure = StringEnclosure{
		Type:      TypeBacktickString,
		Enclosure: "`",
	}
	TripleBacktickStringEnclosure StringEnclosure = StringEnclosure{
		Type:      TypeTripleBacktickString,
		Enclosure: "```",
	}
)

func (StringEnclosure) Tokenize

func (se StringEnclosure) Tokenize(l *Lexer) (Token, error)

func (StringEnclosure) TokenizeEscapable

func (se StringEnclosure) TokenizeEscapable(l *Lexer) (Token, error)

func (StringEnclosure) TokenizeNotEscapableMultiChar

func (se StringEnclosure) TokenizeNotEscapableMultiChar(l *Lexer) (Token, error)

func (StringEnclosure) TokenizeNotEscapableSingleChar

func (se StringEnclosure) TokenizeNotEscapableSingleChar(l *Lexer) (Token, error)

type StringTokenizer

type StringTokenizer struct{}

func (StringTokenizer) CanTokenize

func (s StringTokenizer) CanTokenize(l *Lexer) bool

func (StringTokenizer) Tokenize

func (s StringTokenizer) Tokenize(l *Lexer) (Token, error)

type SymbolTokenizer

type SymbolTokenizer struct{}

func (SymbolTokenizer) CanTokenize

func (s SymbolTokenizer) CanTokenize(l *Lexer) bool

func (SymbolTokenizer) Tokenize

func (s SymbolTokenizer) Tokenize(l *Lexer) (Token, error)

type Token

type Token struct {
	Type     TokenType
	Literal  string
	Value    any
	Position Position
}

################################################### # Token ###################################################

func (*Token) AppendChar

func (t *Token) AppendChar(char ...rune)

func (Token) Dump

func (t Token) Dump()

func (Token) Is

func (t Token) Is(token Token) bool

func (Token) IsAnyOf

func (t Token) IsAnyOf(tokens ...Token) bool

func (Token) LiteralIs

func (t Token) LiteralIs(literal string) bool

func (Token) LiteralIsAnyOf

func (t Token) LiteralIsAnyOf(literals ...string) bool

func (Token) TypeIs

func (t Token) TypeIs(tt TokenType) bool

func (Token) TypeIsAnyOf

func (t Token) TypeIsAnyOf(tokenTypes ...TokenType) bool

type TokenCollection

type TokenCollection struct {
	// contains filtered or unexported fields
}

TokenCollection represents an iterable collection of tokens

func NewTokenCollection

func NewTokenCollection(tokens Tokens) TokenCollection

func (*TokenCollection) CollectAnyTokensDelimited

func (t *TokenCollection) CollectAnyTokensDelimited(delimiter TokenType) ([]Token, error)

func (*TokenCollection) CollectTokensBetween

func (t *TokenCollection) CollectTokensBetween(open TokenType, close TokenType) (Tokens, int, int, error)

CollectTokensBetween collects all the tokens between the open and close type starting at the offset start it assumes that the offset start contains the opening token. Nested openers and closers will be contained in the output tokens until the matching closer is found. In addition it returns the start and end cursor position for the collected portion

func (*TokenCollection) CollectTokensBetweenCurlyBraces

func (t *TokenCollection) CollectTokensBetweenCurlyBraces() (Tokens, int, int, error)

CollectTokensBetweenCurlyBraced collects all the tokens between the opening and closing curly braces starting at the offset start It assumes that the offset start contains the opening curly brace. Nested curly braces will be contained in the output tokens until the matching closing curly brace to the start is found. In addition it returns the start and end cursor position for the collected portion

func (*TokenCollection) CollectTokensBetweenParentheses

func (t *TokenCollection) CollectTokensBetweenParentheses() (Tokens, int, int, error)

CollectTokensBetweenParentheses collects all the tokens between the opening and closing parentheses starting at the offset start It assumes that the offset start contains the opening parenthesis. Nested parenthesis will be contained in the output tokens until the matching closing parenthesis to the start is found. In addition it returns the start and end cursor position for the collected portion

func (*TokenCollection) CollectTokensDelimited

func (t *TokenCollection) CollectTokensDelimited(tokenType TokenType, delimiter TokenType) (Tokens, error)

func (*TokenCollection) CollectTokensUntil

func (t *TokenCollection) CollectTokensUntil(delimiter TokenType) ([]Token, error)

func (TokenCollection) CursorIsOutOfBounds

func (t TokenCollection) CursorIsOutOfBounds() bool

func (*TokenCollection) IncrementCursor

func (t *TokenCollection) IncrementCursor(amount int)

IncrementCursor increments the cursor by the amount

func (*TokenCollection) Iter

func (ti *TokenCollection) Iter() iter.Seq2[int, Token]

func (*TokenCollection) NextToken

func (t *TokenCollection) NextToken() Token

NextToken increments the cursor position by 1 and returns the token at that position

func (TokenCollection) ReachedEOF

func (t TokenCollection) ReachedEOF() bool

func (*TokenCollection) TokenAtCursor

func (t *TokenCollection) TokenAtCursor() Token

TokenAtCursor returns the token at the current cursor position

func (*TokenCollection) TokenAtPosition

func (t *TokenCollection) TokenAtPosition(pos int) Token

TokenAtPosition returns the token at the absolute position

func (*TokenCollection) TokenAtRelativePosition

func (t *TokenCollection) TokenAtRelativePosition(pos int) Token

TokenAtRelativePosition returns the token at the position relative to the cursor

type TokenType

type TokenType interface {
	String() string
}

################################################### # TokenType ###################################################

type Tokenizer

type Tokenizer interface {
	CanTokenize(*Lexer) bool
	Tokenize(*Lexer) (Token, error)
}

type TokenizerInserter

type TokenizerInserter struct {
	Before TokenizerType
	After  TokenizerType
	// contains filtered or unexported fields
}

func InsertAfter

func InsertAfter(after TokenizerType, tokenizerType TokenizerType, tokenizer Tokenizer) TokenizerInserter

func InsertBefore

func InsertBefore(before TokenizerType, tokenizerType TokenizerType, tokenizer Tokenizer) TokenizerInserter

func (TokenizerInserter) Insert

func (ti TokenizerInserter) Insert(tokenizers map[TokenizerType]Tokenizer, tokenizationOrder []TokenizerType) (map[TokenizerType]Tokenizer, []TokenizerType)

type TokenizerType

type TokenizerType string
const (
	TypeNoTokenizer      TokenizerType = ""
	TypeCommentTokenizer TokenizerType = "BuildInCommentTokenizer"
	TypeStringTokenizer  TokenizerType = "BuildInStringTokenizer"
	TypeNumberTokenizer  TokenizerType = "BuildInNumberTokenizer"
	TypeLiteralTokenizer TokenizerType = "BuildInLiteralTokenizer"
	TypeSymbolTokenizer  TokenizerType = "BuildInSymbolTokenizer"
	TypeBooleanTokenizer TokenizerType = "BuildInBooleanTokenizer"
)

type Tokens

type Tokens []Token

Tokens represents a set of tokens

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL