index

package
v0.0.0-...-82d9017 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 20, 2023 License: BSD-3-Clause Imports: 13 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func GetVocab

func GetVocab(s string) (words []string)

func LoadPageRank

func LoadPageRank(file string) (map[keys.Key]float32, error)

func StorePageRank

func StorePageRank(file string, scores map[keys.Key]float32) error

Types

type CounterVocab

type CounterVocab map[string]int

Simple crawler.Processor that count the vocabulary occure.

func (CounterVocab) Frequency

func (counter CounterVocab) Frequency() (list []WordFrequency)

Get the word frequency list sorted in reverse order.

func (CounterVocab) Process

func (counter CounterVocab) Process(page *crawler.Page)

func (CounterVocab) Sum

func (counter CounterVocab) Sum() (sum int)

Total numbers of vocabulary all document. Must be call after all VocabCounter.Process()

type KeyFloat32

type KeyFloat32 struct {
	Key keys.Key
	F32 float32
}
type Links struct {
	// contains filtered or unexported fields
}
func NewLinks(redirection map[keys.Key]keys.Key) Links

func (*Links) DevStats

func (pr *Links) DevStats(logger *slog.Logger)

func (*Links) PageRank

func (links *Links) PageRank(repeat int, epsilon float32) (int, map[keys.Key]float32)

func (*Links) Process

func (links *Links) Process(page *crawler.Page)

type ReverseIndex

type ReverseIndex map[keys.Key][]KeyFloat32

func LoadReverseIndex

func LoadReverseIndex(file string) (ReverseIndex, error)

func (ReverseIndex) Process

func (index ReverseIndex) Process(page *crawler.Page)

func (ReverseIndex) Sort

func (advanced ReverseIndex) Sort()

Sort map item by the order of the key.

func (ReverseIndex) Store

func (advanced ReverseIndex) Store(file string) error

type SortPageRankItem

type SortPageRankItem struct {
	keys.Key
	Rank float32
	URL  string
}

On item returned by SortPageRank.

func SortPageRank

func SortPageRank(db *crawldatabase.Database[crawler.Page], scores map[keys.Key]float32, limit int) ([]SortPageRankItem, error)

Return the limit most ranked page.

type WordFrequency

type WordFrequency struct {
	Word  string
	Count int
}

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL