sqllexer

package module
v0.1.6 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 22, 2025 License: MIT Imports: 4 Imported by: 4

README

go-sqllexer

This repository contains a hand written SQL Lexer that tokenizes SQL queries with a focus on obfuscating and normalization. The lexer is written in Go with no external dependencies. Note This is NOT a SQL parser, it only tokenizes SQL queries.

Features

  • 🚀 Fast and lightweight tokenization (not regex based)
  • 🔒 Obfuscates sensitive data (e.g. numbers, strings, specific literals like dollar quoted strings in Postgres, etc.)
  • 📖 Even works with truncated queries
  • 🌐 UTF-8 support
  • 🔧 Normalizes obfuscated queries

Installation

go get github.com/DataDog/go-sqllexer

Usage

Tokenize
import "github.com/DataDog/go-sqllexer"

func main() {
    query := "SELECT * FROM users WHERE id = 1"
    lexer := sqllexer.New(query)
    for {
        token := lexer.Scan()
        if token.Type == EOF {
            break
        }
        fmt.Println(token)
    }
}
Obfuscate
import (
    "fmt"
    "github.com/DataDog/go-sqllexer"
)

func main() {
    query := "SELECT * FROM users WHERE id = 1"
    obfuscator := sqllexer.NewObfuscator()
    obfuscated := obfuscator.Obfuscate(query)
    // "SELECT * FROM users WHERE id = ?"
    fmt.Println(obfuscated)
}
Normalize
import (
    "fmt"
    "github.com/DataDog/go-sqllexer"
)

func main() {
    query := "SELECT * FROM users WHERE id in (?, ?)"
    normalizer := sqllexer.NewNormalizer(
        WithCollectComments(true),
        WithCollectCommands(true),
        WithCollectTables(true),
        WithKeepSQLAlias(false),
    )
    normalized, statementMetadata, err := normalizer.Normalize(query)
    // "SELECT * FROM users WHERE id in (?)"
    fmt.Println(normalized)
}

Testing

go test -v ./...

Benchmarks

go test -bench=. -benchmem ./...

License

MIT License

Documentation

Index

Examples

Constants

View Source
const (
	StringPlaceholder = "?"
	NumberPlaceholder = "?"
)

Variables

This section is empty.

Functions

func WithCollectCommands

func WithCollectCommands(collectCommands bool) normalizerOption

func WithCollectComments

func WithCollectComments(collectComments bool) normalizerOption

func WithCollectProcedures added in v0.0.7

func WithCollectProcedures(collectProcedure bool) normalizerOption

func WithCollectTables

func WithCollectTables(collectTables bool) normalizerOption

func WithDBMS

func WithDBMS(dbms DBMSType) lexerOption

func WithDollarQuotedFunc

func WithDollarQuotedFunc(dollarQuotedFunc bool) obfuscatorOption

func WithKeepIdentifierQuotation added in v0.0.9

func WithKeepIdentifierQuotation(keepIdentifierQuotation bool) normalizerOption

func WithKeepJsonPath added in v0.0.15

func WithKeepJsonPath(keepJsonPath bool) obfuscatorOption

func WithKeepSQLAlias

func WithKeepSQLAlias(keepSQLAlias bool) normalizerOption

func WithKeepTrailingSemicolon added in v0.0.9

func WithKeepTrailingSemicolon(keepTrailingSemicolon bool) normalizerOption

func WithRemoveSpaceBetweenParentheses added in v0.0.8

func WithRemoveSpaceBetweenParentheses(removeSpaceBetweenParentheses bool) normalizerOption

func WithReplaceBindParameter added in v0.0.18

func WithReplaceBindParameter(replaceBindParameter bool) obfuscatorOption

func WithReplaceBoolean added in v0.0.3

func WithReplaceBoolean(replaceBoolean bool) obfuscatorOption

func WithReplaceDigits

func WithReplaceDigits(replaceDigits bool) obfuscatorOption

func WithReplaceNull added in v0.0.3

func WithReplaceNull(replaceNull bool) obfuscatorOption

func WithReplacePositionalParameter added in v0.0.3

func WithReplacePositionalParameter(replacePositionalParameter bool) obfuscatorOption

func WithUppercaseKeywords added in v0.0.2

func WithUppercaseKeywords(uppercaseKeywords bool) normalizerOption

Types

type DBMSType added in v0.0.2

type DBMSType string
const (
	// DBMSSQLServer is a MS SQL
	DBMSSQLServer       DBMSType = "mssql"
	DBMSSQLServerAlias1 DBMSType = "sql-server" // .Net tracer
	DBMSSQLServerAlias2 DBMSType = "sqlserver"  // Java tracer
	// DBMSPostgres is a PostgreSQL Server
	DBMSPostgres       DBMSType = "postgresql"
	DBMSPostgresAlias1 DBMSType = "postgres" // Ruby, JavaScript tracers
	// DBMSMySQL is a MySQL Server
	DBMSMySQL DBMSType = "mysql"
	// DBMSOracle is a Oracle Server
	DBMSOracle DBMSType = "oracle"
	// DBMSSnowflake is a Snowflake Server
	DBMSSnowflake DBMSType = "snowflake"
)

type LastValueToken added in v0.1.0

type LastValueToken struct {
	Type  TokenType
	Value string
	// contains filtered or unexported fields
}

type Lexer

type Lexer struct {
	// contains filtered or unexported fields
}

SQL Lexer inspired from Rob Pike's talk on Lexical Scanning in Go

Example
query := "SELECT * FROM users WHERE id = 1"
lexer := New(query)

// Print tokens one by one
for {
	token := lexer.Scan()
	if token.Type == EOF {
		break
	}
	fmt.Println(token)
}
Output:

func New

func New(input string, opts ...lexerOption) *Lexer

func (*Lexer) Scan

func (s *Lexer) Scan() *Token

Scan scans the next token and returns it.

type LexerConfig added in v0.0.2

type LexerConfig struct {
	DBMS DBMSType `json:"dbms,omitempty"`
}

type Normalizer

type Normalizer struct {
	// contains filtered or unexported fields
}
Example
normalizer := NewNormalizer(
	WithCollectComments(true),
	WithCollectCommands(true),
	WithCollectTables(true),
	WithCollectProcedures(true),
	WithKeepSQLAlias(false),
)

normalizedSQL, statementMetadata, _ := normalizer.Normalize(
	`
		/* this is a comment */
		SELECT * FROM users WHERE id in (?, ?)
		`,
)

fmt.Println(normalizedSQL)
fmt.Println(statementMetadata)
Output:

SELECT * FROM users WHERE id in ( ? )
&{34 [users] [/* this is a comment */] [SELECT] []}

func NewNormalizer

func NewNormalizer(opts ...normalizerOption) *Normalizer

func (*Normalizer) Normalize

func (n *Normalizer) Normalize(input string, lexerOpts ...lexerOption) (normalizedSQL string, statementMetadata *StatementMetadata, err error)

type Obfuscator

type Obfuscator struct {
	// contains filtered or unexported fields
}
Example
obfuscator := NewObfuscator()
obfuscated := obfuscator.Obfuscate("SELECT * FROM users WHERE id = 1")
fmt.Println(obfuscated)
Output:

SELECT * FROM users WHERE id = ?

func NewObfuscator

func NewObfuscator(opts ...obfuscatorOption) *Obfuscator

func (*Obfuscator) Obfuscate

func (o *Obfuscator) Obfuscate(input string, lexerOpts ...lexerOption) string

Obfuscate takes an input SQL string and returns an obfuscated SQL string. The obfuscator replaces all literal values with a single placeholder

func (*Obfuscator) ObfuscateTokenValue added in v0.0.3

func (o *Obfuscator) ObfuscateTokenValue(token *Token, lastValueToken *LastValueToken, lexerOpts ...lexerOption)

type StatementMetadata

type StatementMetadata struct {
	Size       int      `json:"size"`
	Tables     []string `json:"tables"`
	Comments   []string `json:"comments"`
	Commands   []string `json:"commands"`
	Procedures []string `json:"procedures"`
}

func ObfuscateAndNormalize added in v0.0.3

func ObfuscateAndNormalize(input string, obfuscator *Obfuscator, normalizer *Normalizer, lexerOpts ...lexerOption) (normalizedSQL string, statementMetadata *StatementMetadata, err error)

ObfuscateAndNormalize takes an input SQL string and returns an normalized SQL string with metadata This function is a convenience function that combines the Obfuscator and Normalizer in one pass

type Token

type Token struct {
	Type  TokenType
	Value string
	// contains filtered or unexported fields
}

Token represents a SQL token with its type and value.

type TokenType

type TokenType int
const (
	ERROR TokenType = iota
	EOF
	SPACE                  // space or newline
	STRING                 // string literal
	INCOMPLETE_STRING      // incomplete string literal so that we can obfuscate it, e.g. 'abc
	NUMBER                 // number literal
	IDENT                  // identifier
	QUOTED_IDENT           // quoted identifier
	OPERATOR               // operator
	WILDCARD               // wildcard *
	COMMENT                // comment
	MULTILINE_COMMENT      // multiline comment
	PUNCTUATION            // punctuation
	DOLLAR_QUOTED_FUNCTION // dollar quoted function
	DOLLAR_QUOTED_STRING   // dollar quoted string
	POSITIONAL_PARAMETER   // numbered parameter
	BIND_PARAMETER         // bind parameter
	FUNCTION               // function
	SYSTEM_VARIABLE        // system variable
	UNKNOWN                // unknown token
	COMMAND                // SQL commands like SELECT, INSERT
	KEYWORD                // Other SQL keywords
	JSON_OP                // JSON operators
	BOOLEAN                // boolean literal
	NULL                   // null literal
	PROC_INDICATOR         // procedure indicator
	CTE_INDICATOR          // CTE indicator
	ALIAS_INDICATOR        // alias indicator
)

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL