document

package
v0.0.0-...-73d4566 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 15, 2025 License: GPL-3.0 Imports: 13 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func AddTypeID

func AddTypeID[WordID, VariantID comparable](id uint32)

func Parse

func Parse(str string) (string, []string)

func Root

func Root(str string) string

Root find the prefix of the string containing letters and numbers.

Types

type ChangeSet

type ChangeSet[T any] struct {
	Added, Removed slice.Slice[T]
}

ChangeSet shows the IDs that were added and removed from a Document.

type Decoder

type Decoder[WordID, VariantID any] interface {
	IDToWord(WordID) string
	IDToVariant(VariantID) Variant
}

Encoder supplies the necessary decoding information to translate IDs into strings.

type Document

type Document[WordID, VariantID comparable] struct {
	ID
	entity.Key
	Start            string
	ByteLen, WordLen int
	// Words holds the root words present in the document. This slice can
	// be reordered without effecting the encoding.
	Words    []Locations[WordID]
	Variants *huffslice.Slice[VariantID]
	// contains filtered or unexported fields
}

Document is a string encoded as the root words. This makes identifying which words are in a document fast.

func (*Document[WordID, VariantID]) Bale

func (doc *Document[WordID, VariantID]) Bale() *DocumentBale[WordID, VariantID]

func (*Document[WordID, VariantID]) EntLoad

func (doc *Document[WordID, VariantID]) EntLoad(k entity.Key, data []byte) error

func (*Document[WordID, VariantID]) EntVal

func (doc *Document[WordID, VariantID]) EntVal(buf []byte) ([]byte, error)

func (*Document[WordID, VariantID]) Save

func (doc *Document[WordID, VariantID]) Save() (*entity.Ref[Document[WordID, VariantID], *Document[WordID, VariantID]], error)

func (*Document[WordID, VariantID]) WordIDs

func (doc *Document[WordID, VariantID]) WordIDs() []WordID

WordIDs returns a slice with all the WordIDs in the document.

type DocumentBale

type DocumentBale[WordID, VariantID comparable] struct {
	ID
	Start            string
	ByteLen, WordLen int
	// Words holds the root words present in the document. This slice can
	// be reordered without effecting the encoding.
	Words    []Locations[WordID]
	Variants *huffslice.SliceBale[VariantID]
}

func (*DocumentBale[WordID, VariantID]) EntRefs

func (bale *DocumentBale[WordID, VariantID]) EntRefs() []entity.Key

func (*DocumentBale[WordID, VariantID]) TypeID32

func (bale *DocumentBale[WordID, VariantID]) TypeID32() uint32

func (*DocumentBale[WordID, VariantID]) UnbaleTo

func (bale *DocumentBale[WordID, VariantID]) UnbaleTo(doc *Document[WordID, VariantID])

type DocumentDecoder

type DocumentDecoder[WordID, VariantID comparable] struct {
	Decoder[WordID, VariantID]
	WordSingleToken WordID
	VarSingleToken  VariantID
}

DocumentDecoder can decode a Document into a string.

func (DocumentDecoder[WordID, VariantID]) Decode

func (dec DocumentDecoder[WordID, VariantID]) Decode(doc *Document[WordID, VariantID]) string

Decode a Document to a string

type DocumentEncoder

type DocumentEncoder[WordID, VariantID comparable] struct {
	Encoder[WordID, VariantID]
	Splitter        func(string) (string, []string)
	RootVariant     func(string) (string, Variant)
	WordSingleToken WordID
	VarSingleToken  VariantID
}

DocumentEncoder can encode a string into a Document.

func (DocumentEncoder[WordID, VariantID]) AddTypeID

func (enc DocumentEncoder[WordID, VariantID]) AddTypeID(id uint32)

func (DocumentEncoder[WordID, VariantID]) Build

func (enc DocumentEncoder[WordID, VariantID]) Build(str string) *Document[WordID, VariantID]

Build takes a stirng and encodes it to a Document.

func (DocumentEncoder[WordID, VariantID]) Update

func (enc DocumentEncoder[WordID, VariantID]) Update(doc *Document[WordID, VariantID], str string) *ChangeSet[WordID]

Update a document updates the encoding and returns a ChangeSet.

type Encoder

type Encoder[WordID, VariantID any] interface {
	WordToID(string) WordID
	VariantToID(Variant) VariantID
}

Encoder supplies the necessary encoding information to translate strings into IDs.

type ID

type ID uint32

func (ID) DocID

func (id ID) DocID() ID

ID fullfils DocIDer

type Locations

type Locations[T comparable] struct {
	ID   T
	Idxs []uint32
}

Locations hold an ID and the index locations where that ID occures.

type Variant

type Variant []byte

Variant encodes the casing of a word and the non-alphanumeric characters that follow the word.

func RootVariant

func RootVariant(str string) (string, Variant)

RootVariant find the prefix of the string containing letters and numbers and the Variant to convert the root back to the original input.

func (Variant) Apply

func (v Variant) Apply(root string, buf []byte) []byte

Apply a variant to a word. It is expected that the root is all lower case. The casing will be changed according the variant and non-alphanumeric runes will be appended.

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL