Documentation
¶
Overview ¶
Example (BasicCalculator) ¶
// Let's write a small example for parsing a really basic calculator. // The calculator understands input that looks like: // // 10 + 20 - 8+4 // // So positive numbers that can be either added or substracted, and whitespace // is ignored. package main import ( "fmt" "strconv" "git.makaay.nl/mauricem/go-parsekit" ) // When writing a parser, it's a good start to use the parser/combinator // functionality of parsekit to create some TokenHandler functions. These functions // can later be used in the parser state machine to check for matching strings // on the input data. // // For the calculator, we only need a definition of "number, surrounded by // optional whitespace". Skipping whitespace could be a part of the StateHandler // functions below too, but including it in a TokenHandler makes things really // practical. func createNumberMatcher() parsekit.TokenHandler { // Easy access to parsekit definition. c, a, m := parsekit.C, parsekit.A, parsekit.M whitespace := m.Drop(c.Opt(a.Whitespace)) return c.Seq(whitespace, c.OneOrMore(a.Digit), whitespace) } var calcNumber = createNumberMatcher() // We need to define the ItemTypes that we will use for emitting Items // during the parsing process. const ( numberType parsekit.ItemType = iota addType subtractType ) // We also need to define the state machine for parsing the input. // The state machine is built up from functions that match the StateHandler // signature: func(*parsekit.ParseAPI) // The ParseAPI struct holds the internal state for the parser and it provides // some methods that form the API for your StateHandler implementation. // State: expect a number. When a number is found on the input, // it is accepted in the parser's string buffer, after which that buffer is // emitted as a numberType item. Then we tell the state machine to continue // with the calcWaitForOperatorOrEndOfInput state. // When no number is found, the parser will emit an error, explaining that // "a number" was expected. func calcWaitForNumber(p *parsekit.ParseAPI) { p.Expects("a number") if p.On(calcNumber).Accept() { p.EmitLiteral(numberType) p.RouteTo(calcWaitForOperatorOrEndOfInput) } } // State: expect a plus or minus operator. When one of those is found, the // appropriate Item is emitted and the parser is sent back to the // numberHandler to find the next number on the input. When no operator is // found, then the parser is told to expect the end of the input. When more // input data are available (which are obviously wrong data since they do // not match our syntax), the parser will emit an error. func calcWaitForOperatorOrEndOfInput(p *parsekit.ParseAPI) { switch { case p.On(a.Plus).Accept(): p.EmitLiteral(addType) p.RouteTo(calcWaitForNumber) case p.On(a.Minus).Accept(): p.EmitLiteral(subtractType) p.RouteTo(calcWaitForNumber) default: p.ExpectEndOfFile() } } // All is ready for our parser. We now can create a new Parser struct. // We need to tell it what StateHandler to start with. In our case, it is the // calcWaitForNumber state, since the calculation must start with a number. var calcParser = parsekit.NewParser(calcWaitForNumber) func main() { // Let's feed the parser some input to work with. This provides us with // a parse run for that input. run := calcParser.Parse(" 153+22 + 31-4 -\t 6+42 ") // We can now step through the results of the parsing process by repeated // calls to run.Next(). Next() returns either the next parse item, a parse // error or an end of file. Let's dump the parse results and handle the // computation while we're at it. // TODO this in convoluted for people using the parser code I think. Maybe use three output data types instead? sum := 0 op := +1 for { item, err, ok := run.Next() switch { case !ok && err == nil: fmt.Println("End of file reached") fmt.Println("Outcome of computation:", sum) return case !ok: fmt.Printf("Error: %s\n", err) return default: fmt.Printf("Type: %d, Value: %q\n", item.Type, item.Value) switch { case item.Type == addType: op = +1 case item.Type == subtractType: op = -1 case item.Type == numberType: nr, err := strconv.Atoi(item.Value) if err != nil { fmt.Printf("Error: invalid number %s: %s\n", item.Value, err) return } sum += op * nr } } } }
Output: Type: 0, Value: "153" Type: 1, Value: "+" Type: 0, Value: "22" Type: 1, Value: "+" Type: 0, Value: "31" Type: 2, Value: "-" Type: 0, Value: "4" Type: 2, Value: "-" Type: 0, Value: "6" Type: 1, Value: "+" Type: 0, Value: "42" End of file reached Outcome of computation: 238
Example (DutchPostcodeUsingMatcher) ¶
// In this example, a Parser is created which can parse and normalize Dutch postcodes // The implementation uses only TokenHandler functions and does not implement a // full-fledged state-based Parser for it. package main import ( "fmt" "git.makaay.nl/mauricem/go-parsekit" ) func createPostcodeMatcher() *parsekit.Matcher { // Easy access to the parsekit definitions. c, a, m := parsekit.C, parsekit.A, parsekit.M // TokenHandler functions are created and combined to satisfy these rules: // - A Dutch postcode consists of 4 digits and 2 letters (1234XX). // - The first digit is never a zero. // - A space between letters and digits is optional. // - It is good form to write the letters in upper case. // - It is good form to use a single space between digits and letters. digitNotZero := c.Except(c.Rune('0'), a.Digit) pcDigits := c.Seq(digitNotZero, c.Rep(3, a.Digit)) pcLetter := c.Any(a.ASCIILower, a.ASCIIUpper) pcLetters := m.ToUpper(c.Seq(pcLetter, pcLetter)) space := m.Replace(c.Opt(a.Whitespace), " ") postcode := c.Seq(pcDigits, space, pcLetters) // Create a Matcher, which wraps the 'postcode' TokenHandler and allows // us to match some input against that handler. return parsekit.NewMatcher(postcode, "a Dutch postcode") } func main() { pcParser := createPostcodeMatcher() for i, input := range []string{ "1234 AB", "2233Ab", "1001\t\tab", "1818ab", "1234", "huh", } { output, err, ok := pcParser.Parse(input) if !ok { fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err) } else { fmt.Printf("[%d] Input: %q Output: %s\n", i, input, output) } } }
Output: [0] Input: "1234 AB" Output: 1234 AB [1] Input: "2233Ab" Output: 2233 AB [2] Input: "1001\t\tab" Output: 1001 AB [3] Input: "1818ab" Output: 1818 AB [4] Input: "1234" Error: unexpected character '1' (expected a Dutch postcode) [5] Input: "huh" Error: unexpected character 'h' (expected a Dutch postcode)
Example (HelloWorldUsingMatcher) ¶
// In this example, a parser is created that is able to parse input that looks // like "Hello, <name>!", and that extracts the name from it. // // The implementation uses only parser/combinator TokenHandler functions and does // not implement a full-fledged state-based Parser for it. If you want to see the // same kind of functionality, implementated using a Paser, take a look at the // HelloWorldUsingParser example. package main import ( "fmt" "git.makaay.nl/mauricem/go-parsekit" ) func createHelloMatcher() *parsekit.Matcher { // Easy access to parsekit definition. c, a, m := parsekit.C, parsekit.A, parsekit.M // Using the parser/combinator support of parsekit, we create a TokenHandler function // that does all the work. The 'greeting' TokenHandler matches the whole input and // drops all but the name from it. hello := c.StrNoCase("hello") comma := c.Seq(c.Opt(a.Whitespace), a.Comma, c.Opt(a.Whitespace)) separator := c.Any(comma, a.Whitespace) name := c.OneOrMore(c.Not(a.Excl)) greeting := c.Seq(m.Drop(hello), m.Drop(separator), name, m.Drop(a.Excl)) // Create a Matcher, which wraps the 'greeting' TokenHandler and allows // us to match some input against that handler. return parsekit.NewMatcher(greeting, "a friendly greeting") } func main() { parser := createHelloMatcher() for i, input := range []string{ "Hello, world!", "HELLO ,Johnny!", "hello , Bob123!", "hello Pizza!", "Oh no!", "Hello, world", } { output, err, ok := parser.Parse(input) if !ok { fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err) } else { fmt.Printf("[%d] Input: %q Output: %s\n", i, input, output) } } }
Output: [0] Input: "Hello, world!" Output: world [1] Input: "HELLO ,Johnny!" Output: Johnny [2] Input: "hello , Bob123!" Output: Bob123 [3] Input: "hello Pizza!" Output: Pizza [4] Input: "Oh no!" Error: unexpected character 'O' (expected a friendly greeting) [5] Input: "Hello, world" Error: unexpected character 'H' (expected a friendly greeting)
Example (HelloWorldUsingParser) ¶
// In this example, a parser is created that is able to parse input that looks // like "Hello, <name>!", and that extracts the name from it. // // This implementation uses a state-based Parser for it, and it does not implement // any custom parser/combinator TokenHandler functions. Note that things are much // easier to implement using custom TokenHandlers (see the other HelloWorldUsingMatcher // example for this). Doing this fully parser-based implementation is mainly for your // learning pleasure. // // One big difference between the Matcher-based example and this one, is that the // state-based parser reports errors much more fine-grained. This might or might // not be useful for your specific use case. package main import ( "fmt" "strings" "git.makaay.nl/mauricem/go-parsekit" ) const greeteeItem parsekit.ItemType = 1 func stateStartOfGreeting(p *parsekit.ParseAPI) { c := parsekit.C p.Expects("hello") if p.On(c.StrNoCase("hello")).Skip() { p.RouteTo(stateComma) } } func stateComma(p *parsekit.ParseAPI) { a := parsekit.A p.Expects("comma") switch { case p.On(a.Whitespace).Skip(): p.RouteRepeat() case p.On(a.Comma).Skip(): p.RouteTo(stateName) } } func stateName(p *parsekit.ParseAPI) { a := parsekit.A p.Expects("name") switch { case p.On(a.Excl).Skip(): p.RouteTo(stateEndOfGreeting) case p.On(a.AnyRune).Accept(): p.RouteRepeat() } } func stateEndOfGreeting(p *parsekit.ParseAPI) { p.Expects("end of greeting") if p.On(a.EndOfFile).Stay() { name := strings.TrimSpace(p.BufLiteral()) if name == "" { p.EmitError("The name cannot be empty") } else { p.Emit(greeteeItem, name) } } } func createHelloParser() *parsekit.Parser { return parsekit.NewParser(stateStartOfGreeting) } func main() { parser := createHelloParser() for i, input := range []string{ "Hello, world!", "HELLO ,Johnny!", "hello , Bob123!", "hello Pizza!", "", " ", "hello", "hello,", "hello , ", "hello , Droopy", "hello , Droopy!", "hello , \t \t Droopy \t !", "Oh no!", "hello,!", } { item, err, ok := parser.Parse(input).Next() if !ok { fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err) } else { fmt.Printf("[%d] Input: %q Output: %s\n", i, input, item.Value) } } }
Output: [0] Input: "Hello, world!" Output: world [1] Input: "HELLO ,Johnny!" Output: Johnny [2] Input: "hello , Bob123!" Output: Bob123 [3] Input: "hello Pizza!" Error: unexpected character 'P' (expected comma) [4] Input: "" Error: unexpected end of file (expected hello) [5] Input: " " Error: unexpected character ' ' (expected hello) [6] Input: "hello" Error: unexpected end of file (expected comma) [7] Input: "hello," Error: unexpected end of file (expected name) [8] Input: "hello , " Error: unexpected end of file (expected name) [9] Input: "hello , Droopy" Error: unexpected end of file (expected name) [10] Input: "hello , Droopy!" Output: Droopy [11] Input: "hello , \t \t Droopy \t !" Output: Droopy [12] Input: "Oh no!" Error: unexpected character 'O' (expected hello) [13] Input: "hello,!" Error: The name cannot be empty
Index ¶
- Variables
- type Error
- type Item
- type ItemType
- type MatchAction
- type Matcher
- type ParseAPI
- func (p *ParseAPI) BufInterpreted() (string, bool)
- func (p *ParseAPI) BufLiteral() string
- func (p *ParseAPI) Emit(t ItemType, v string)
- func (p *ParseAPI) EmitError(format string, args ...interface{})
- func (p *ParseAPI) EmitInterpreted(t ItemType) bool
- func (p *ParseAPI) EmitLiteral(t ItemType)
- func (p *ParseAPI) ExpectEndOfFile()
- func (p *ParseAPI) Expects(description string)
- func (p *ParseAPI) On(tokenHandler TokenHandler) *MatchAction
- func (p *ParseAPI) RouteRepeat()
- func (p *ParseAPI) RouteReturn()
- func (p *ParseAPI) RouteTo(state StateHandler) *RouteFollowupAction
- func (p *ParseAPI) UnexpectedInput()
- type ParseRun
- type Parser
- type RouteFollowupAction
- type StateHandler
- type TokenAPI
- type TokenHandler
- func MatchAny(handlers ...TokenHandler) TokenHandler
- func MatchAnyRune() TokenHandler
- func MatchEndOfFile() TokenHandler
- func MatchExcept(except TokenHandler, handler TokenHandler) TokenHandler
- func MatchMax(max int, handler TokenHandler) TokenHandler
- func MatchMin(min int, handler TokenHandler) TokenHandler
- func MatchMinMax(min int, max int, handler TokenHandler) TokenHandler
- func MatchNot(handler TokenHandler) TokenHandler
- func MatchOneOrMore(handler TokenHandler) TokenHandler
- func MatchOpt(handler TokenHandler) TokenHandler
- func MatchRep(times int, handler TokenHandler) TokenHandler
- func MatchRune(expected rune) TokenHandler
- func MatchRuneRange(start rune, end rune) TokenHandler
- func MatchRunes(expected ...rune) TokenHandler
- func MatchSeparated(separator TokenHandler, separated TokenHandler) TokenHandler
- func MatchSeq(handlers ...TokenHandler) TokenHandler
- func MatchStr(expected string) TokenHandler
- func MatchStrNoCase(expected string) TokenHandler
- func MatchZeroOrMore(handler TokenHandler) TokenHandler
- func ModifyByCallback(handler TokenHandler, modfunc func(string) string) TokenHandler
- func ModifyDrop(handler TokenHandler) TokenHandler
- func ModifyReplace(handler TokenHandler, replaceWith string) TokenHandler
- func ModifyToLower(handler TokenHandler) TokenHandler
- func ModifyToUpper(handler TokenHandler) TokenHandler
- func ModifyTrim(handler TokenHandler, cutset string) TokenHandler
- func ModifyTrimLeft(handler TokenHandler, cutset string) TokenHandler
- func ModifyTrimRight(handler TokenHandler, cutset string) TokenHandler
Examples ¶
Constants ¶
This section is empty.
Variables ¶
var A = struct { EndOfFile TokenHandler AnyRune TokenHandler Space TokenHandler Tab TokenHandler CR TokenHandler LF TokenHandler CRLF TokenHandler Excl TokenHandler DoubleQuote TokenHandler Hash TokenHandler Dollar TokenHandler Percent TokenHandler Amp TokenHandler SingleQuote TokenHandler RoundOpen TokenHandler RoundClose TokenHandler Asterisk TokenHandler Plus TokenHandler Comma TokenHandler Minus TokenHandler Dot TokenHandler Slash TokenHandler Colon TokenHandler Semicolon TokenHandler AngleOpen TokenHandler Equal TokenHandler AngleClose TokenHandler Question TokenHandler At TokenHandler SquareOpen TokenHandler Backslash TokenHandler SquareClose TokenHandler Caret TokenHandler Underscore TokenHandler Backquote TokenHandler CurlyOpen TokenHandler Pipe TokenHandler CurlyClose TokenHandler Tilde TokenHandler Newline TokenHandler Whitespace TokenHandler WhitespaceAndNewlines TokenHandler EndOfLine TokenHandler Digit TokenHandler ASCII TokenHandler ASCIILower TokenHandler ASCIIUpper TokenHandler HexDigit TokenHandler }{ EndOfFile: MatchEndOfFile(), AnyRune: MatchAnyRune(), Space: C.Rune(' '), Tab: C.Rune('\t'), CR: C.Rune('\r'), LF: C.Rune('\n'), CRLF: C.Str("\r\n"), Excl: C.Rune('!'), DoubleQuote: C.Rune('"'), Hash: C.Rune('#'), Dollar: C.Rune('$'), Percent: C.Rune('%'), Amp: C.Rune('&'), SingleQuote: C.Rune('\''), RoundOpen: C.Rune('('), RoundClose: C.Rune(')'), Asterisk: C.Rune('*'), Plus: C.Rune('+'), Comma: C.Rune(','), Minus: C.Rune('-'), Dot: C.Rune('.'), Slash: C.Rune('/'), Colon: C.Rune(':'), Semicolon: C.Rune(';'), AngleOpen: C.Rune('<'), Equal: C.Rune('='), AngleClose: C.Rune('>'), Question: C.Rune('?'), At: C.Rune('@'), SquareOpen: C.Rune('['), Backslash: C.Rune('\\'), SquareClose: C.Rune(']'), Caret: C.Rune('^'), Underscore: C.Rune('_'), Backquote: C.Rune('`'), CurlyOpen: C.Rune('{'), Pipe: C.Rune('|'), CurlyClose: C.Rune('}'), Tilde: C.Rune('~'), Whitespace: C.OneOrMore(C.Any(C.Rune(' '), C.Rune('\t'))), WhitespaceAndNewlines: C.OneOrMore(C.Any(C.Rune(' '), C.Rune('\t'), C.Str("\r\n"), C.Rune('\n'))), EndOfLine: C.Any(C.Str("\r\n"), C.Rune('\n'), MatchEndOfFile()), Digit: C.RuneRange('0', '9'), ASCII: C.RuneRange('\x00', '\x7F'), ASCIILower: C.RuneRange('a', 'z'), ASCIIUpper: C.RuneRange('A', 'Z'), HexDigit: C.Any(C.RuneRange('0', '9'), C.RuneRange('a', 'f'), C.RuneRange('A', 'F')), }
A provides convenient access to a range of atoms that can be used to build TokenHandlers or parser rules.
In parsekit, an atom is defined as a ready for use TokenHandler function.
When using A in your own parser, then it is advised to create a variable to reference it:
var a = parsekit.A
Doing so saves you a lot of typing, and it makes your code a lot cleaner.
var C = struct { Rune func(rune) TokenHandler Runes func(...rune) TokenHandler RuneRange func(rune, rune) TokenHandler Str func(string) TokenHandler StrNoCase func(string) TokenHandler Any func(...TokenHandler) TokenHandler Not func(TokenHandler) TokenHandler Opt func(TokenHandler) TokenHandler Seq func(...TokenHandler) TokenHandler Rep func(times int, handler TokenHandler) TokenHandler Min func(min int, handler TokenHandler) TokenHandler Max func(max int, handler TokenHandler) TokenHandler ZeroOrMore func(TokenHandler) TokenHandler OneOrMore func(TokenHandler) TokenHandler MinMax func(min int, max int, handler TokenHandler) TokenHandler Separated func(separated TokenHandler, separator TokenHandler) TokenHandler // TODO reverse args for consistency Except func(except TokenHandler, handler TokenHandler) TokenHandler }{ Rune: MatchRune, Runes: MatchRunes, RuneRange: MatchRuneRange, Str: MatchStr, StrNoCase: MatchStrNoCase, Opt: MatchOpt, Any: MatchAny, Not: MatchNot, Seq: MatchSeq, Rep: MatchRep, Min: MatchMin, Max: MatchMax, ZeroOrMore: MatchZeroOrMore, OneOrMore: MatchOneOrMore, MinMax: MatchMinMax, Separated: MatchSeparated, Except: MatchExcept, }
C provides convenient access to a range of parser/combinators that can be used to construct TokenHandler functions.
When using C in your own parser, then it is advised to create a variable to reference it:
var c = parsekit.C
Doing so saves you a lot of typing, and it makes your code a lot cleaner.
var M = struct { Drop func(TokenHandler) TokenHandler Trim func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments? TrimLeft func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments? TrimRight func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments? ToLower func(TokenHandler) TokenHandler ToUpper func(TokenHandler) TokenHandler Replace func(handler TokenHandler, replaceWith string) TokenHandler // TODO reverse arguments? ModifyByCallback func(TokenHandler, func(string) string) TokenHandler }{ Drop: ModifyDrop, Trim: ModifyTrim, TrimLeft: ModifyTrimLeft, TrimRight: ModifyTrimRight, ToLower: ModifyToLower, ToUpper: ModifyToUpper, Replace: ModifyReplace, ModifyByCallback: ModifyByCallback, }
M provides convenient access to a range of modifiers (which in their nature are parser/combinators) that can be used when creating TokenHandler functions.
In parsekit, a modifier is defined as a TokenHandler function that modifies the resulting output of another TokenHandler in some way. It does not do any matching against input of its own.
When using M in your own parser, then it is advised to create a variable to reference it:
var m = parsekit.M
Doing so saves you a lot of typing, and it makes your code a lot cleaner.
Functions ¶
This section is empty.
Types ¶
type Error ¶
Error is used as the error type when parsing errors occur. The error includes some extra meta information to allow for useful error messages to the user.
Example ¶
err := &parsekit.Error{ Message: "it broke down", Line: 10, Column: 42, } fmt.Println(err.Error()) fmt.Printf("%s\n", err) fmt.Println(err.ErrorFull())
Output: it broke down it broke down it broke down after line 10, column 42
func (*Error) Error ¶
Example ¶
err := &parsekit.Error{ Message: "it broke down", Line: 10, Column: 42, } fmt.Println(err.Error()) fmt.Printf("%s\n", err)
Output: it broke down it broke down
func (*Error) ErrorFull ¶
ErrorFull returns the current error message, including information about the position in the input where the error occurred.
Example ¶
err := &parsekit.Error{ Message: "it broke down", Line: 10, Column: 42, } fmt.Println(err.ErrorFull())
Output: it broke down after line 10, column 42
type Item ¶
Item represents an item that can be emitted from the parser.
Example ¶
// Easy access to the parsekit definitions. c := parsekit.C // You define your own item types for your specific parser. const QuestionItem = parsekit.ItemType(42) // A StateHandler function can use the defined item type by means of // the p.Emit* methods on parsekit.P. // When errors occur, or the end of the file is reached, then the built-in // types parsekit.ItemEOF and parsekit.ItemError will be emitted by parsekit. stateHandler := func(p *parsekit.ParseAPI) { if p.On(c.Str("question")).Accept() { p.EmitLiteral(QuestionItem) } p.ExpectEndOfFile() } // Successful match item, _, ok := parsekit.NewParser(stateHandler).Parse("question").Next() fmt.Println(ok, item.Type == QuestionItem, item.Value) // End of file reached item, _, ok = parsekit.NewParser(stateHandler).Parse("").Next() fmt.Println(ok, item.Type == parsekit.ItemEOF) // An error occurred item, err, ok := parsekit.NewParser(stateHandler).Parse("answer").Next() fmt.Println(ok, item.Type == parsekit.ItemError, err)
Output: true true question false true false true unexpected character 'a' (expected end of file)
type ItemType ¶
type ItemType int
ItemType represents the type of a parser Item.
When creating your own ItemType values, then make use of positive integer values. Negative values are possible, but they are reserved for internal use by parsekit.
Example ¶
// Make use of positive values. Ideally, define your ItemTypes using // iota for easy automatic value management like this: const ( ItemWord parsekit.ItemType = iota ItemNumber ItemBlob // ... )
Output:
const ItemEOF ItemType = -1
ItemEOF is a built-in parser item type that is used for flagging that the end of the input was reached.
const ItemError ItemType = -2
ItemError is a built-in parser item type that is used for flagging that an error has occurred during parsing.
type MatchAction ¶ added in v0.0.2
type MatchAction struct {
// contains filtered or unexported fields
}
MatchAction is a struct that is used for building the On()-method chain. The On() method will return an initialized struct of this type.
func (*MatchAction) Accept ¶ added in v0.0.2
func (a *MatchAction) Accept() bool
Accept tells the parser to move the cursor past a match that was found, and to store the input that matched in the parser's string buffer. When no match was found, then no action is taken.
Returns true in case a match was found. When no match was found, then no action is taken and false is returned.
func (*MatchAction) Skip ¶ added in v0.0.2
func (a *MatchAction) Skip() bool
Skip tells the parser to move the cursor past a match that was found, without storing the actual match in the parser's string buffer.
Returns true in case a match was found. When no match was found, then no action is taken and false is returned.
func (*MatchAction) Stay ¶ added in v0.0.2
func (a *MatchAction) Stay() bool
Stay tells the parser to not move the cursor after finding a match. Returns true in case a match was found, false otherwise.
type Matcher ¶
type Matcher struct {
// contains filtered or unexported fields
}
Matcher is the top-level struct that holds the configuration for a parser that is based solely on a TokenHandler function. The Matcher can be instantiated using the parsekit.NewMatcher() method.
To match input data against the wrapped Matcher function, use the method Matcher.Parse().
func NewMatcher ¶
func NewMatcher(tokenHandler TokenHandler, expects string) *Matcher
NewMatcher instantiates a new Matcher.
This is a simple wrapper around a TokenHandler function. It can be used to match an input string against that TokenHandler function and retrieve the results in a straight forward way.
The 'expects' parameter is used for creating an error message in case parsed input does not match the TokenHandler.
type ParseAPI ¶ added in v0.0.2
type ParseAPI struct { LastMatch string // a string representation of the last matched input data // contains filtered or unexported fields }
ParseAPI holds the internal state of a parse run and provides an API to StateHandler methods to communicate with the parser.
func (*ParseAPI) BufInterpreted ¶ added in v0.0.2
BufInterpreted retrieves the contents of the parser's string buffer (all the runes that were added to it using ParseAPI.Accept()) as an interpreted string.
Interpreted means that the contents are treated as a Go double quoted interpreted string (handling escape codes like \n, \t, \uXXXX, etc.). if the input had for example the subsequent runes '\' and 'n' in it, then the interpreted string would have an actual linefeed (ASCII char 10) in it.
This method returns a boolean value, indicating whether or not the string interpretation was successful. On invalid string data, an error will automatically be emitted and the boolean return value will be false.
Retrieving the buffer contents will not affect the buffer itself. New runes can still be added to it. Only when calling P.Emit(), the buffer will be cleared.
func (*ParseAPI) BufLiteral ¶ added in v0.0.2
BufLiteral retrieves the contents of the parser's string buffer (all the runes that were added to it using ParseAPI.Accept()) as a literal string.
Literal means that if the input had for example the subsequent runes '\' and 'n' in it, then the literal string would have a backslash and an 'n' it in, not a linefeed (ASCII char 10).
Retrieving the buffer contents will not affect the buffer itself. New runes can still be added to it. Only when calling P.Emit(), the buffer will be cleared.
func (*ParseAPI) Emit ¶ added in v0.0.2
Emit passes a Parser item to the client, including the provided string.
func (*ParseAPI) EmitInterpreted ¶ added in v0.0.2
EmitInterpreted passes a Parser item to the client, including accumulated string buffer data a Go double quoted interpreted string (handling escape codes like \n, \t, \uXXXX, etc.) This method returns a boolean value, indicating whether or not the string interpretation was successful. On invalid string data, an error will automatically be emitted and false will be returned.
func (*ParseAPI) EmitLiteral ¶ added in v0.0.2
EmitLiteral passes a parser Item to the client, including the accumulated string buffer data as a literal string.
func (*ParseAPI) ExpectEndOfFile ¶ added in v0.0.2
func (p *ParseAPI) ExpectEndOfFile()
ExpectEndOfFile can be used from a StateHandler function to indicate that your parser expects to be at the end of the file. This will schedule a parsekit-provided StateHandler which will do the actual check for this.
func (*ParseAPI) Expects ¶ added in v0.0.2
Expects is used to let a StateHandler function describe what input it is expecting. This expectation is used in error messages to make them more descriptive.
When defining an expectation inside a StateHandler, you do not need to handle unexpected input yourself. When the end of the function is reached without setting the next state, an automatic error will be emitted. This error can differentiate between the following issues:
1) there is valid data on input, but it was not accepted by the function
2) there is an invalid UTF8 character on input
3) the end of the file was reached.
func (*ParseAPI) On ¶ added in v0.0.2
func (p *ParseAPI) On(tokenHandler TokenHandler) *MatchAction
On checks if the input at the current cursor position matches the provided TokenHandler. On must be chained with another method, which tells the parser what action to perform when a match was found:
1) On(...).Skip() - Only move cursor forward, ignore the matched runes.
2) On(...).Accept() - Move cursor forward, add runes to parsers's string buffer.
3) On(...).Stay() - Do nothing, the cursor stays at the same position.
So an example chain could look like this:
p.On(parsekit.A.Whitespace).Skip()
The chain as a whole returns a boolean, which indicates whether or not at match was found. When no match was found, false is returned and Skip() and Accept() will have no effect. Because of this, typical use of an On() chain is as expression for a conditional expression (if, switch/case, for). E.g.:
// Skip multiple exclamation marks. for p.On(parsekit.A.Excl).Skip() { } // Fork a route based on the input. switch { case p.On(parsekit.A.Excl).Stay() p.RouteTo(stateHandlerA) case p.On(parsekit.A.Colon).Stay(): p.RouteTo(stateHandlerB) default: p.RouteTo(stateHandlerC) } // When there's a "hi" on input, emit a parser item for it. if p.On(parsekit.C.Str("hi")).Accept() { p.Emit(SomeItemType, p.BufLiteral()) }
func (*ParseAPI) RouteRepeat ¶ added in v0.0.2
func (p *ParseAPI) RouteRepeat()
RouteRepeat tells the parser that on the next parsing cycle, the current StateHandler must be reinvoked.
func (*ParseAPI) RouteReturn ¶ added in v0.0.2
func (p *ParseAPI) RouteReturn()
RouteReturn tells the parser that on the next cycle the last StateHandler that was pushed on the route stack must be invoked.
Using this method is optional. When implementating a StateHandler that is used as a sort of subroutine (using constructions like p.RouteTo(subroutine).ThenReturnHere()), you can refrain from providing an explicit routing decision from that handler. The parser will automatically assume a RouteReturn() in that case.
func (*ParseAPI) RouteTo ¶ added in v0.0.2
func (p *ParseAPI) RouteTo(state StateHandler) *RouteFollowupAction
RouteTo tells the parser what StateHandler function to invoke on the next parse cycle.
func (*ParseAPI) UnexpectedInput ¶ added in v0.0.2
func (p *ParseAPI) UnexpectedInput()
UnexpectedInput is used by a StateHandler function to emit an error item that tells the client that an unexpected rune was encountered in the input.
type ParseRun ¶ added in v0.0.2
type ParseRun struct {
// contains filtered or unexported fields
}
ParseRun represents a single parse run for a Parser.
func (*ParseRun) Next ¶ added in v0.0.2
Next retrieves the next parsed item for a parse run.
When a valid item was found, then the boolean return parameter will be true. On error or when successfully reaching the end of the input, false is returned. When an error occurred, false will be returned and the error return value will be set (default is nil).
type Parser ¶
type Parser struct {
// contains filtered or unexported fields
}
Parser is the top-level struct that holds the configuration for a parser. The Parser can be instantiated using the parsekit.NewParser() method.
func NewParser ¶
func NewParser(startState StateHandler) *Parser
NewParser instantiates a new Parser.
The Parser is a state machine-style recursive descent parser, in which StateHandler functions are used to move the state machine forward during parsing. This style of parser is typically used for parsing languages and structured data formats (like json, toml, etc.)
To start parsing input data, use the method Parser.Parse().
type RouteFollowupAction ¶ added in v0.0.2
type RouteFollowupAction struct {
// contains filtered or unexported fields
}
RouteFollowupAction chains parsing routes. It allows for routing code like p.RouteTo(handlerA).ThenTo(handlerB).
func (*RouteFollowupAction) ThenReturnHere ¶ added in v0.0.2
func (a *RouteFollowupAction) ThenReturnHere()
ThenReturnHere schedules the current StateHandler to be invoked after the RouteTo StateHandler has been completed. For example:
p.RouteTo(handlerA).ThenReturnHere()
func (*RouteFollowupAction) ThenTo ¶ added in v0.0.2
func (a *RouteFollowupAction) ThenTo(state StateHandler)
ThenTo schedules a StateHandler that must be invoked after the RouteTo StateHandler has been completed. For example:
p.RouteTo(handlerA).ThenTo(handlerB)
type StateHandler ¶
type StateHandler func(*ParseAPI)
StateHandler defines the type of function that must be implemented to handle a parsing state in a Parser state machine.
A StateHandler function gets a ParseAPI struct as its input. This struct holds all the internal state for the parsing state machine and provides the interface that the StateHandler uses to interact with the parser.
type TokenAPI ¶ added in v0.0.2
type TokenAPI struct {
// contains filtered or unexported fields
}
TokenAPI is used by TokenHandler functions to retrieve runes from the input to match against and to report back results.
Basic operation:
To retrieve the next rune from the input, the TokenHandler function can call the TokenAPI.NextRune() method.
The TokenHandler function can then evaluate the retrieved rune and either accept of skip the rune. When accepting it using TokenAPI.Accept(), the rune
is added to the resulting output of the TokenAPI. When using TokenAPI.Skip(),
the rune will not be added to the output. It is mandatory for a TokenHandler to call either Accept() or Skip() after retrieving a rune, before calling NextRune() again.
Eventually, the TokenHandler function must return a boolean value, indicating whether or not a match was found. When true, then the calling code will use the runes that were accepted into the TokenAPI's resulting output.
Forking operation for easy lookahead support:
Sometimes, a TokenHandler function must be able to perform a lookahead, which might either succeed or fail. In case of a failing lookahead, the state of the TokenAPI must be brought back to the original state.
The way in which this is supported, is by forking a TokenAPI by calling TokenAPI.Fork(). This will return a child TokenAPI, with an empty output buffer, but using the same input cursor position as the forked parent.
The TokenHandler function can then use the same interface as described for normal operation to retrieve runes from the input and to fill the resulting output. When the TokenHandler function decides that the lookahead was successful, then the method TokenAPI.Merge() can be called on the forked child to append the resulting output from the child to the parent's resulting output, and to update the parent input cursor position to that of the child.
When the TokenHandler function decides that the lookahead was unsuccessful, then it can simply discard the forked child. The parent TokenAPI was never modified, so a new match can be safely started using that parent, as if the lookahead never happened.
func (*TokenAPI) Accept ¶ added in v0.0.2
func (t *TokenAPI) Accept()
Accept will add the last rune as read by TokenAPI.NextRune() to the resulting output of the TokenAPI.
func (*TokenAPI) ClearInput ¶ added in v0.0.2
func (t *TokenAPI) ClearInput()
ClearInput clears the input for the TokenAPI, but it keeps the output and input offset as-is.
func (*TokenAPI) ClearOutput ¶ added in v0.0.2
func (t *TokenAPI) ClearOutput()
ClearOutput clears the resulting output for the TokenAPI, but it keeps the input and input offset as-is.
func (*TokenAPI) Fork ¶ added in v0.0.2
Fork splits off a child TokenAPI, containing the same input cursor position
as the parent TokenAPI, but with all other data in a fresh state.
By forking, a TokenHandler function can freely work with a TokenAPI, without affecting the parent TokenAPI. This is for example useful when the TokenHandler function must perform some form of lookahead.
When a successful match was found, the TokenHandler function can call TokenAPI.Merge() on the forked child to have the resulting output added to the parent TokenAPI.
When no match was found, the forked child can simply be discarded.
Example case: A TokenHandler checks for a sequence of runes: 'a', 'b', 'c', 'd'. This is done in 4 steps and only after finishing all steps, the TokenHandler function can confirm a successful match. The TokenHandler function for this case could look like this (yes, it's naive, but it shows the point): TODO make proper tested example
func MatchAbcd(t *TokenAPI) bool { child := t.Fork() // fork to keep m from input untouched for _, letter := []rune {'a', 'b', 'c', 'd'} { if r, ok := t.NextRune(); !ok || r != letter { return false // report mismatch, t is left untouched } child.Accept() // add rune to child output } child.Merge() // we have a match, add resulting output to parent return true // and report the successful match }
func (*TokenAPI) Merge ¶ added in v0.0.2
Merge merges the resulting output from a forked child TokenAPI back into its parent: The runes that are accepted in the child are added to the parent runes and the parent's input cursor position is advanced to the child's cursor position.
After the merge, the child TokenAPI is reset so it can immediately be reused for performing another match (all data are cleared, except for the input offset which is kept at its current position).
func (*TokenAPI) NextRune ¶ added in v0.0.2
NextRune retrieves the next rune from the input.
It returns the rune and a boolean. The boolean will be false in case an invalid UTF8 rune or the end of the file was encountered.
After using NextRune() to retrieve a rune, Accept() or Skip() can be called to respectively add the rune to the TokenAPI's resulting output or to fully ignore it. This way, a TokenHandler has full control over what runes are significant for the resulting output of that TokenHandler.
After using NextRune(), this method can not be reinvoked, until the last read rune is explicitly accepted or skipped as described above.
type TokenHandler ¶ added in v0.0.2
TokenHandler is the function type that is involved in turning a low level stream of UTF8 runes into parsing tokens. Its purpose is to check if input data matches some kind of pattern and to report back the match.
A TokenHandler is to be used in conjunction with parsekit.P.On() or parsekit.Matcher().
A TokenHandler function gets a TokenAPI as its input and returns a boolean to indicate whether or not it found a match on the input. The TokenAPI is used for retrieving input data to match against and for reporting back results.
func MatchAny ¶
func MatchAny(handlers ...TokenHandler) TokenHandler
MatchAny creates a TokenHandler that checks if any of the provided TokenHandlers can be applied. They are applied in their provided order. The first TokenHandler that applies is used for reporting back a match.
func MatchAnyRune ¶
func MatchAnyRune() TokenHandler
MatchAnyRune creates a TokenHandler function that checks if a valid rune can be read from the input. It reports back a successful match if the end of the input has not yet been reached and the upcoming input is a valid UTF8 rune.
Example ¶
// Easy access to the parsekit definitions. a := parsekit.A stateHandler := func(p *parsekit.ParseAPI) { p.Expects("Any valid rune") if p.On(a.AnyRune).Accept() { p.EmitLiteral(TestItem) p.RouteRepeat() } } parser := parsekit.NewParser(stateHandler) run := parser.Parse("¡Any / valid / character will dö!") for i := 0; i < 5; i++ { match, _, _ := run.Next() fmt.Printf("Match = %q\n", match.Value) }
Output: Match = "¡" Match = "A" Match = "n" Match = "y" Match = " "
func MatchEndOfFile ¶
func MatchEndOfFile() TokenHandler
MatchEndOfFile creates a TokenHandler that checks if the end of the input data has been reached. This TokenHandler will never produce output. It only reports a successful or a failing match through its boolean return value.
func MatchExcept ¶
func MatchExcept(except TokenHandler, handler TokenHandler) TokenHandler
MatchExcept creates a TokenHandler that checks if the provided TokenHandler can be applied to the upcoming input. It also checks if the except TokenHandler can be applied. If the handler applies, but the except TokenHandler as well, then the match as a whole will be treated as a mismatch.
func MatchMax ¶
func MatchMax(max int, handler TokenHandler) TokenHandler
MatchMax creates a TokenHandler that checks if the provided TokenHandler can be applied at maximum the provided minimum number of times. When more matches are possible, these will be included in the output. Zero matches are considered a successful match.
func MatchMin ¶
func MatchMin(min int, handler TokenHandler) TokenHandler
MatchMin creates a TokenHandler that checks if the provided TokenHandler can be applied at least the provided minimum number of times. When more matches are possible, these will be included in the output.
func MatchMinMax ¶
func MatchMinMax(min int, max int, handler TokenHandler) TokenHandler
MatchMinMax creates a TokenHandler that checks if the provided TokenHandler can be applied between the provided minimum and maximum number of times, inclusive. All matches will be included in the output.
func MatchNot ¶
func MatchNot(handler TokenHandler) TokenHandler
MatchNot creates a TokenHandler that checks if the provided TokenHandler applies to the current input. If it does, then a failed match will be reported. If it does not, then the next rune from the input will be reported as a match.
func MatchOneOrMore ¶
func MatchOneOrMore(handler TokenHandler) TokenHandler
MatchOneOrMore creates a TokenHandler that checks if the provided TokenHandler can be applied one or more times. All matches will be included in the output.
func MatchOpt ¶
func MatchOpt(handler TokenHandler) TokenHandler
MatchOpt creates a TokenHandler that makes the provided TokenHandler optional. When the provided TokenHandler applies, then its output is used, otherwise no output is generated but still a successful match is reported.
func MatchRep ¶
func MatchRep(times int, handler TokenHandler) TokenHandler
MatchRep creates a TokenHandler that checks if the provided TokenHandler can be applied exactly the provided amount of times.
Note that the input can contain more than the provided number of matches, e.g.:
MatchRep(4, MatchRune('X'))
will not match input "XXX", it will match input "XXXX", but also "XXXXXX". In that last case, there will be a remainder "XX" on the input.
func MatchRune ¶
func MatchRune(expected rune) TokenHandler
MatchRune creates a TokenHandler function that checks if the next rune from the input matches the provided rune.
func MatchRuneRange ¶
func MatchRuneRange(start rune, end rune) TokenHandler
MatchRuneRange creates a TokenHandler function that that checks if the next rune from the input is contained by the provided rune range.
The rune range is defined by a start and an end rune, inclusive, so:
MatchRuneRange('g', 'k')
creates a TokenHandler that will match any of 'g', 'h', 'i', 'j' or 'k'.
func MatchRunes ¶
func MatchRunes(expected ...rune) TokenHandler
MatchRunes creates a TokenHandler function that that checks if the next rune from the input is one of the provided runes.
func MatchSeparated ¶
func MatchSeparated(separator TokenHandler, separated TokenHandler) TokenHandler
MatchSeparated creates a TokenHandler that checks for a pattern of one or more TokenHandlers of one type (the separated), separated by TokenHandler of another type (the separator). All matches (separated + separator) are included in the output.
func MatchSeq ¶
func MatchSeq(handlers ...TokenHandler) TokenHandler
MatchSeq creates a TokenHandler that checks if the provided TokenHandlers can be applied in their exact order. Only if all matcher apply, the sequence reports successful match.
func MatchStr ¶
func MatchStr(expected string) TokenHandler
MatchStr creates a TokenHandler that will check if the upcoming runes on the input match the provided string. TODO make this a more efficient string-level match?
func MatchStrNoCase ¶
func MatchStrNoCase(expected string) TokenHandler
MatchStrNoCase creates a TokenHandler that will check if the upcoming runes on the input match the provided string in a case-insensitive manner. TODO make this a more efficient string-level match?
func MatchZeroOrMore ¶
func MatchZeroOrMore(handler TokenHandler) TokenHandler
MatchZeroOrMore creates a TokenHandler that checks if the provided TokenHandler can be applied zero or more times. All matches will be included in the output. Zero matches are considered a successful match.
func ModifyByCallback ¶
func ModifyByCallback(handler TokenHandler, modfunc func(string) string) TokenHandler
ModifyByCallback creates a TokenHandler that checks if the provided TokenHandler applies. If it does, then its output is taken and it is fed to the provided modfunc. This is a simple function that takes a string on input and returns a possibly modified string on output. The return value of the modfunc will replace the resulting output.
func ModifyDrop ¶
func ModifyDrop(handler TokenHandler) TokenHandler
ModifyDrop creates a TokenHandler that checks if the provided TokenHandler applies. If it does, then its output is discarded completely.
Note that if the TokenHandler does not apply, a mismatch will be reported back, even though we would have dropped the output anyway. So if you would like to drop optional whitespace, then use something like:
M.Drop(C.Opt(A.Whitespace))
instead of:
M.Drop(A.Whitespace)
Since whitespace is defined as "1 or more spaces and/or tabs", the input string "bork" would not match against the second form, but " bork" would. In both cases, it would match the first form.
func ModifyReplace ¶
func ModifyReplace(handler TokenHandler, replaceWith string) TokenHandler
ModifyReplace creates a TokenHandler that checks if the provided TokenHandler applies. If it does, then its output is replaced by the provided string.
func ModifyToLower ¶
func ModifyToLower(handler TokenHandler) TokenHandler
ModifyToLower creates a TokenHandler that checks if the provided TokenHandler applies. If it does, then its output is taken and characters from the provided cutset are converted into lower case.
func ModifyToUpper ¶
func ModifyToUpper(handler TokenHandler) TokenHandler
ModifyToUpper creates a TokenHandler that checks if the provided TokenHandler applies. If it does, then its output is taken and characters from the provided cutset are converted into upper case.
func ModifyTrim ¶
func ModifyTrim(handler TokenHandler, cutset string) TokenHandler
ModifyTrim creates a TokenHandler that checks if the provided TokenHandler applies. If it does, then its output is taken and characters from the provided cutset are trimmed from both the left and the right of the output.
func ModifyTrimLeft ¶
func ModifyTrimLeft(handler TokenHandler, cutset string) TokenHandler
ModifyTrimLeft creates a TokenHandler that checks if the provided TokenHandler applies. If it does, then its output is taken and characters from the provided cutset are trimmed from the left of the output.
func ModifyTrimRight ¶
func ModifyTrimRight(handler TokenHandler, cutset string) TokenHandler
ModifyTrimRight creates a TokenHandler that checks if the provided TokenHandler applies. If it does, then its output is taken and characters from the provided cutset are trimmed from the right of the output.