Documentation
¶
Overview ¶
Package reader provides document loading functionality for go-llamaindex.
Index ¶
- func ExtractTextFromPDF(filePath string) (string, error)
- func ExtractTextFromPDFByPage(filePath string) ([]string, error)
- func GetPDFMetadata(filePath string) (map[string]string, error)
- func GetPDFPageCount(filePath string) (int, error)
- type CSVReader
- func (r *CSVReader) LoadData() ([]schema.Node, error)
- func (r *CSVReader) LoadFromFile(filePath string) ([]schema.Node, error)
- func (r *CSVReader) Metadata() ReaderMetadata
- func (r *CSVReader) WithConcatRows(concat bool) *CSVReader
- func (r *CSVReader) WithDelimiter(delimiter rune) *CSVReader
- func (r *CSVReader) WithHeader(hasHeader bool) *CSVReader
- func (r *CSVReader) WithMetadataColumns(columns ...string) *CSVReader
- func (r *CSVReader) WithRowSeparator(sep string) *CSVReader
- func (r *CSVReader) WithTextColumns(columns ...string) *CSVReader
- type CSVStreamReader
- type DocxReader
- func (r *DocxReader) LoadData() ([]schema.Node, error)
- func (r *DocxReader) LoadFromBytes(content []byte, sourceName string) ([]schema.Node, error)
- func (r *DocxReader) LoadFromFile(filePath string) ([]schema.Node, error)
- func (r *DocxReader) Metadata() ReaderMetadata
- func (r *DocxReader) WithExtractImages(extract bool) *DocxReader
- func (r *DocxReader) WithExtractMetadata(extract bool) *DocxReader
- func (r *DocxReader) WithExtractTables(extract bool) *DocxReader
- func (r *DocxReader) WithPreserveParagraphs(preserve bool) *DocxReader
- type ExcelReader
- func (r *ExcelReader) LoadData() ([]schema.Node, error)
- func (r *ExcelReader) LoadFromFile(filePath string) ([]schema.Node, error)
- func (r *ExcelReader) Metadata() ReaderMetadata
- func (r *ExcelReader) WithConcatRows(concat bool) *ExcelReader
- func (r *ExcelReader) WithConcatSheets(concat bool) *ExcelReader
- func (r *ExcelReader) WithHeader(hasHeader bool) *ExcelReader
- func (r *ExcelReader) WithMetadataColumns(columns ...string) *ExcelReader
- func (r *ExcelReader) WithRowSeparator(sep string) *ExcelReader
- func (r *ExcelReader) WithSheetSeparator(sep string) *ExcelReader
- func (r *ExcelReader) WithSheets(sheets ...string) *ExcelReader
- func (r *ExcelReader) WithTextColumns(columns ...string) *ExcelReader
- type FileReader
- type HTMLReader
- func (r *HTMLReader) LoadData() ([]schema.Node, error)
- func (r *HTMLReader) LoadFromFile(filePath string) ([]schema.Node, error)
- func (r *HTMLReader) Metadata() ReaderMetadata
- func (r *HTMLReader) WithPreserveWhitespace(preserve bool) *HTMLReader
- func (r *HTMLReader) WithTagsToExtract(tags ...string) *HTMLReader
- func (r *HTMLReader) WithTagsToRemove(tags ...string) *HTMLReader
- type JSONReader
- func (r *JSONReader) LoadData() ([]schema.Node, error)
- func (r *JSONReader) LoadFromFile(filePath string) ([]schema.Node, error)
- func (r *JSONReader) Metadata() ReaderMetadata
- func (r *JSONReader) WithJSONL(isJSONL bool) *JSONReader
- func (r *JSONReader) WithMetadataKeys(keys ...string) *JSONReader
- func (r *JSONReader) WithTextContentKey(key string) *JSONReader
- type LazyReader
- type MarkdownReader
- func (r *MarkdownReader) LoadData() ([]schema.Node, error)
- func (r *MarkdownReader) LoadFromFile(filePath string) ([]schema.Node, error)
- func (r *MarkdownReader) Metadata() ReaderMetadata
- func (r *MarkdownReader) WithRemoveHyperlinks(remove bool) *MarkdownReader
- func (r *MarkdownReader) WithRemoveImages(remove bool) *MarkdownReader
- func (r *MarkdownReader) WithSplitByHeaders(split bool, levels ...int) *MarkdownReader
- type PDFReader
- func (r *PDFReader) LazyLoadData() (<-chan schema.Node, <-chan error)
- func (r *PDFReader) LoadData() ([]schema.Node, error)
- func (r *PDFReader) LoadDataWithContext(ctx context.Context) ([]schema.Node, error)
- func (r *PDFReader) LoadFromFile(filePath string) ([]schema.Node, error)
- func (r *PDFReader) Metadata() ReaderMetadata
- func (r *PDFReader) WithExtraMetadata(metadata map[string]interface{}) *PDFReader
- func (r *PDFReader) WithSplitByPage(split bool) *PDFReader
- type PDFReaderOption
- func WithPDFExtraMetadata(metadata map[string]interface{}) PDFReaderOption
- func WithPDFInputDir(dir string) PDFReaderOption
- func WithPDFInputFiles(files ...string) PDFReaderOption
- func WithPDFPasswordFunc(fn func(filePath string) string) PDFReaderOption
- func WithPDFRecursive(recursive bool) PDFReaderOption
- func WithPDFSplitByPage(split bool) PDFReaderOption
- type Reader
- type ReaderError
- type ReaderMetadata
- type ReaderOptions
- type ReaderWithContext
- type ReaderWithMetadata
- type SimpleDirectoryReader
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func ExtractTextFromPDF ¶
ExtractTextFromPDF is a utility function to extract text from a PDF file.
func ExtractTextFromPDFByPage ¶
ExtractTextFromPDFByPage extracts text from a PDF file, returning text per page.
func GetPDFMetadata ¶
GetPDFMetadata extracts metadata from a PDF file.
func GetPDFPageCount ¶
GetPDFPageCount returns the number of pages in a PDF file.
Types ¶
type CSVReader ¶
type CSVReader struct {
// InputFiles is a list of CSV file paths to read
InputFiles []string
// InputDir is a directory containing CSV files
InputDir string
// Recursive determines if subdirectories should be searched
Recursive bool
// Delimiter is the field delimiter (default: comma)
Delimiter rune
// HasHeader indicates if the first row is a header row
HasHeader bool
// TextColumns are column names or indices to use as document text.
// If empty, all columns are concatenated as text.
TextColumns []string
// MetadataColumns are column names or indices to extract as metadata.
// If empty, all non-text columns are used as metadata.
MetadataColumns []string
// ConcatRows determines if all rows should be concatenated into a single document.
// If false (default), each row becomes a separate document.
ConcatRows bool
// RowSeparator is used when ConcatRows is true (default: newline)
RowSeparator string
}
CSVReader reads CSV files and converts them to documents.
func NewCSVReader ¶
NewCSVReader creates a new CSVReader for specific files.
func NewCSVReaderFromDir ¶
NewCSVReaderFromDir creates a new CSVReader for a directory.
func (*CSVReader) LoadFromFile ¶
LoadFromFile loads a single CSV file.
func (*CSVReader) Metadata ¶
func (r *CSVReader) Metadata() ReaderMetadata
Metadata returns reader metadata.
func (*CSVReader) WithConcatRows ¶
WithConcatRows sets whether to concatenate all rows into a single document.
func (*CSVReader) WithDelimiter ¶
WithDelimiter sets the field delimiter.
func (*CSVReader) WithHeader ¶
WithHeader sets whether the first row is a header.
func (*CSVReader) WithMetadataColumns ¶
WithMetadataColumns sets which columns to extract as metadata.
func (*CSVReader) WithRowSeparator ¶
WithRowSeparator sets the separator used when concatenating rows.
func (*CSVReader) WithTextColumns ¶
WithTextColumns sets which columns to use as document text.
type CSVStreamReader ¶
type CSVStreamReader struct {
*CSVReader
// contains filtered or unexported fields
}
CSVStreamReader provides streaming CSV reading for large files.
func NewCSVStreamReader ¶
func NewCSVStreamReader(filePath string) (*CSVStreamReader, error)
NewCSVStreamReader creates a streaming CSV reader.
func (*CSVStreamReader) Close ¶
func (r *CSVStreamReader) Close() error
Close closes the underlying file.
func (*CSVStreamReader) LazyLoadData ¶
func (r *CSVStreamReader) LazyLoadData() (<-chan schema.Node, <-chan error)
LazyLoadData returns a channel that yields documents one at a time.
func (*CSVStreamReader) ReadHeaders ¶
func (r *CSVStreamReader) ReadHeaders() ([]string, error)
ReadHeaders reads and returns the header row.
type DocxReader ¶
type DocxReader struct {
// InputFiles is a list of DOCX file paths to read
InputFiles []string
// InputDir is a directory containing DOCX files
InputDir string
// Recursive determines if subdirectories should be searched
Recursive bool
// ExtractImages determines if images should be extracted as separate nodes
ExtractImages bool
// PreserveParagraphs keeps paragraph breaks in the output
PreserveParagraphs bool
// ExtractMetadata extracts document properties (author, title, etc.)
ExtractMetadata bool
// ExtractTables extracts table content
ExtractTables bool
}
DocxReader reads Microsoft Word (.docx) files and converts them to documents.
func NewDocxReader ¶
func NewDocxReader(inputFiles ...string) *DocxReader
NewDocxReader creates a new DocxReader for specific files.
func NewDocxReaderFromDir ¶
func NewDocxReaderFromDir(inputDir string, recursive bool) *DocxReader
NewDocxReaderFromDir creates a new DocxReader for a directory.
func (*DocxReader) LoadData ¶
func (r *DocxReader) LoadData() ([]schema.Node, error)
LoadData loads DOCX files and returns documents.
func (*DocxReader) LoadFromBytes ¶
LoadFromBytes loads a DOCX from byte content.
func (*DocxReader) LoadFromFile ¶
func (r *DocxReader) LoadFromFile(filePath string) ([]schema.Node, error)
LoadFromFile loads a single DOCX file.
func (*DocxReader) Metadata ¶
func (r *DocxReader) Metadata() ReaderMetadata
Metadata returns reader metadata.
func (*DocxReader) WithExtractImages ¶
func (r *DocxReader) WithExtractImages(extract bool) *DocxReader
WithExtractImages enables image extraction.
func (*DocxReader) WithExtractMetadata ¶
func (r *DocxReader) WithExtractMetadata(extract bool) *DocxReader
WithExtractMetadata sets whether to extract document properties.
func (*DocxReader) WithExtractTables ¶
func (r *DocxReader) WithExtractTables(extract bool) *DocxReader
WithExtractTables sets whether to extract table content.
func (*DocxReader) WithPreserveParagraphs ¶
func (r *DocxReader) WithPreserveParagraphs(preserve bool) *DocxReader
WithPreserveParagraphs sets whether to preserve paragraph breaks.
type ExcelReader ¶
type ExcelReader struct {
// InputFiles is a list of Excel file paths to read
InputFiles []string
// InputDir is a directory containing Excel files
InputDir string
// Recursive determines if subdirectories should be searched
Recursive bool
// SheetNames specifies which sheets to read. If empty, all sheets are read.
SheetNames []string
// HasHeader indicates if the first row is a header row
HasHeader bool
// TextColumns are column names or indices to use as document text.
// If empty, all columns are concatenated as text.
TextColumns []string
// MetadataColumns are column names or indices to extract as metadata.
// If empty, all non-text columns are used as metadata.
MetadataColumns []string
// ConcatRows determines if all rows should be concatenated into a single document.
// If false (default), each row becomes a separate document.
ConcatRows bool
// ConcatSheets determines if all sheets should be concatenated into a single document.
// If false (default), each sheet is processed separately.
ConcatSheets bool
// RowSeparator is used when ConcatRows is true (default: newline)
RowSeparator string
// SheetSeparator is used when ConcatSheets is true (default: double newline)
SheetSeparator string
}
ExcelReader reads Excel files (.xlsx, .xlsm) and converts them to documents.
func NewExcelReader ¶
func NewExcelReader(inputFiles ...string) *ExcelReader
NewExcelReader creates a new ExcelReader for specific files.
func NewExcelReaderFromDir ¶
func NewExcelReaderFromDir(inputDir string, recursive bool) *ExcelReader
NewExcelReaderFromDir creates a new ExcelReader for a directory.
func (*ExcelReader) LoadData ¶
func (r *ExcelReader) LoadData() ([]schema.Node, error)
LoadData loads Excel files and returns documents.
func (*ExcelReader) LoadFromFile ¶
func (r *ExcelReader) LoadFromFile(filePath string) ([]schema.Node, error)
LoadFromFile loads a single Excel file.
func (*ExcelReader) Metadata ¶
func (r *ExcelReader) Metadata() ReaderMetadata
Metadata returns reader metadata.
func (*ExcelReader) WithConcatRows ¶
func (r *ExcelReader) WithConcatRows(concat bool) *ExcelReader
WithConcatRows sets whether to concatenate all rows into a single document.
func (*ExcelReader) WithConcatSheets ¶
func (r *ExcelReader) WithConcatSheets(concat bool) *ExcelReader
WithConcatSheets sets whether to concatenate all sheets into a single document.
func (*ExcelReader) WithHeader ¶
func (r *ExcelReader) WithHeader(hasHeader bool) *ExcelReader
WithHeader sets whether the first row is a header.
func (*ExcelReader) WithMetadataColumns ¶
func (r *ExcelReader) WithMetadataColumns(columns ...string) *ExcelReader
WithMetadataColumns sets which columns to extract as metadata.
func (*ExcelReader) WithRowSeparator ¶
func (r *ExcelReader) WithRowSeparator(sep string) *ExcelReader
WithRowSeparator sets the separator used when concatenating rows.
func (*ExcelReader) WithSheetSeparator ¶
func (r *ExcelReader) WithSheetSeparator(sep string) *ExcelReader
WithSheetSeparator sets the separator used when concatenating sheets.
func (*ExcelReader) WithSheets ¶
func (r *ExcelReader) WithSheets(sheets ...string) *ExcelReader
WithSheets sets which sheets to read.
func (*ExcelReader) WithTextColumns ¶
func (r *ExcelReader) WithTextColumns(columns ...string) *ExcelReader
WithTextColumns sets which columns to use as document text.
type FileReader ¶
type FileReader interface {
Reader
// LoadFromFile loads a document from a specific file path.
LoadFromFile(filePath string) ([]schema.Node, error)
}
FileReader is a Reader that loads from file paths.
type HTMLReader ¶
type HTMLReader struct {
// InputFiles is a list of HTML file paths to read
InputFiles []string
// InputDir is a directory containing HTML files
InputDir string
// Recursive determines if subdirectories should be searched
Recursive bool
// TagsToExtract specifies which HTML tags to extract text from.
// If empty, extracts from body. Common values: "p", "div", "article", "main"
TagsToExtract []string
// TagsToRemove specifies which HTML tags to remove entirely (e.g., "script", "style")
TagsToRemove []string
// PreserveWhitespace keeps original whitespace formatting
PreserveWhitespace bool
}
HTMLReader reads HTML files and extracts text content.
func NewHTMLReader ¶
func NewHTMLReader(inputFiles ...string) *HTMLReader
NewHTMLReader creates a new HTMLReader for specific files.
func NewHTMLReaderFromDir ¶
func NewHTMLReaderFromDir(inputDir string, recursive bool) *HTMLReader
NewHTMLReaderFromDir creates a new HTMLReader for a directory.
func (*HTMLReader) LoadData ¶
func (r *HTMLReader) LoadData() ([]schema.Node, error)
LoadData loads HTML files and returns documents.
func (*HTMLReader) LoadFromFile ¶
func (r *HTMLReader) LoadFromFile(filePath string) ([]schema.Node, error)
LoadFromFile loads a single HTML file.
func (*HTMLReader) Metadata ¶
func (r *HTMLReader) Metadata() ReaderMetadata
Metadata returns reader metadata.
func (*HTMLReader) WithPreserveWhitespace ¶
func (r *HTMLReader) WithPreserveWhitespace(preserve bool) *HTMLReader
WithPreserveWhitespace enables whitespace preservation.
func (*HTMLReader) WithTagsToExtract ¶
func (r *HTMLReader) WithTagsToExtract(tags ...string) *HTMLReader
WithTagsToExtract sets which tags to extract text from.
func (*HTMLReader) WithTagsToRemove ¶
func (r *HTMLReader) WithTagsToRemove(tags ...string) *HTMLReader
WithTagsToRemove sets which tags to remove entirely.
type JSONReader ¶
type JSONReader struct {
// InputFiles is a list of JSON file paths to read
InputFiles []string
// InputDir is a directory containing JSON files
InputDir string
// Recursive determines if subdirectories should be searched
Recursive bool
// TextContentKey is the JSON key to use as document text content.
// If empty, the entire JSON is serialized as text.
TextContentKey string
// MetadataKeys are JSON keys to extract as document metadata.
// If empty, all non-text keys are used as metadata.
MetadataKeys []string
// IsJSONL indicates if files are JSON Lines format (one JSON object per line)
IsJSONL bool
}
JSONReader reads JSON files and converts them to documents.
func NewJSONReader ¶
func NewJSONReader(inputFiles ...string) *JSONReader
NewJSONReader creates a new JSONReader for specific files.
func NewJSONReaderFromDir ¶
func NewJSONReaderFromDir(inputDir string, recursive bool) *JSONReader
NewJSONReaderFromDir creates a new JSONReader for a directory.
func (*JSONReader) LoadData ¶
func (r *JSONReader) LoadData() ([]schema.Node, error)
LoadData loads JSON files and returns documents.
func (*JSONReader) LoadFromFile ¶
func (r *JSONReader) LoadFromFile(filePath string) ([]schema.Node, error)
LoadFromFile loads a single JSON file.
func (*JSONReader) Metadata ¶
func (r *JSONReader) Metadata() ReaderMetadata
Metadata returns reader metadata.
func (*JSONReader) WithJSONL ¶
func (r *JSONReader) WithJSONL(isJSONL bool) *JSONReader
WithJSONL enables JSON Lines format parsing.
func (*JSONReader) WithMetadataKeys ¶
func (r *JSONReader) WithMetadataKeys(keys ...string) *JSONReader
WithMetadataKeys sets the keys to extract as metadata.
func (*JSONReader) WithTextContentKey ¶
func (r *JSONReader) WithTextContentKey(key string) *JSONReader
WithTextContentKey sets the key to use for document text content.
type LazyReader ¶
type LazyReader interface {
Reader
// LazyLoadData returns a channel that yields documents one at a time.
// The channel is closed when all documents have been loaded or an error occurs.
LazyLoadData() (<-chan schema.Node, <-chan error)
}
LazyReader is a Reader that can load documents lazily via a channel.
type MarkdownReader ¶
type MarkdownReader struct {
// InputFiles is a list of Markdown file paths to read
InputFiles []string
// InputDir is a directory containing Markdown files
InputDir string
// Recursive determines if subdirectories should be searched
Recursive bool
// RemoveHyperlinks removes hyperlinks from the text
RemoveHyperlinks bool
// RemoveImages removes image references from the text
RemoveImages bool
// SplitByHeaders splits document into multiple nodes by headers
SplitByHeaders bool
// HeadersToSplitOn specifies which header levels to split on (e.g., []int{1, 2})
HeadersToSplitOn []int
}
MarkdownReader reads Markdown files and converts them to documents.
func NewMarkdownReader ¶
func NewMarkdownReader(inputFiles ...string) *MarkdownReader
NewMarkdownReader creates a new MarkdownReader for specific files.
func NewMarkdownReaderFromDir ¶
func NewMarkdownReaderFromDir(inputDir string, recursive bool) *MarkdownReader
NewMarkdownReaderFromDir creates a new MarkdownReader for a directory.
func (*MarkdownReader) LoadData ¶
func (r *MarkdownReader) LoadData() ([]schema.Node, error)
LoadData loads Markdown files and returns documents.
func (*MarkdownReader) LoadFromFile ¶
func (r *MarkdownReader) LoadFromFile(filePath string) ([]schema.Node, error)
LoadFromFile loads a single Markdown file.
func (*MarkdownReader) Metadata ¶
func (r *MarkdownReader) Metadata() ReaderMetadata
Metadata returns reader metadata.
func (*MarkdownReader) WithRemoveHyperlinks ¶
func (r *MarkdownReader) WithRemoveHyperlinks(remove bool) *MarkdownReader
WithRemoveHyperlinks enables hyperlink removal.
func (*MarkdownReader) WithRemoveImages ¶
func (r *MarkdownReader) WithRemoveImages(remove bool) *MarkdownReader
WithRemoveImages enables image reference removal.
func (*MarkdownReader) WithSplitByHeaders ¶
func (r *MarkdownReader) WithSplitByHeaders(split bool, levels ...int) *MarkdownReader
WithSplitByHeaders enables splitting by headers.
type PDFReader ¶
type PDFReader struct {
// InputFiles is a list of PDF file paths to read
InputFiles []string
// InputDir is a directory containing PDF files
InputDir string
// Recursive determines if subdirectories should be searched
Recursive bool
// SplitByPage creates separate nodes for each page
SplitByPage bool
// ExtraMetadata is additional metadata to add to all documents
ExtraMetadata map[string]interface{}
// PasswordFunc is a function that returns the password for a PDF file
// The function receives the file path and should return the password
PasswordFunc func(filePath string) string
}
PDFReader reads PDF files and converts them to documents. It uses the ledongthuc/pdf library for text extraction.
func NewPDFReader ¶
NewPDFReader creates a new PDFReader for specific files.
func NewPDFReaderFromDir ¶
NewPDFReaderFromDir creates a new PDFReader for a directory.
func NewPDFReaderWithOptions ¶
func NewPDFReaderWithOptions(opts ...PDFReaderOption) *PDFReader
NewPDFReaderWithOptions creates a new PDFReader with options.
func (*PDFReader) LazyLoadData ¶
LazyLoadData returns a channel that yields documents one at a time.
func (*PDFReader) LoadDataWithContext ¶
LoadDataWithContext loads PDF files with context support.
func (*PDFReader) LoadFromFile ¶
LoadFromFile loads a single PDF file.
func (*PDFReader) Metadata ¶
func (r *PDFReader) Metadata() ReaderMetadata
Metadata returns reader metadata.
func (*PDFReader) WithExtraMetadata ¶
WithExtraMetadata sets extra metadata (fluent API).
func (*PDFReader) WithSplitByPage ¶
WithSplitByPage enables splitting by page (fluent API).
type PDFReaderOption ¶
type PDFReaderOption func(*PDFReader)
PDFReaderOption configures PDFReader.
func WithPDFExtraMetadata ¶
func WithPDFExtraMetadata(metadata map[string]interface{}) PDFReaderOption
WithPDFExtraMetadata sets extra metadata.
func WithPDFInputDir ¶
func WithPDFInputDir(dir string) PDFReaderOption
WithPDFInputDir sets the input directory.
func WithPDFInputFiles ¶
func WithPDFInputFiles(files ...string) PDFReaderOption
WithPDFInputFiles sets the input files.
func WithPDFPasswordFunc ¶
func WithPDFPasswordFunc(fn func(filePath string) string) PDFReaderOption
WithPDFPasswordFunc sets the password function.
func WithPDFRecursive ¶
func WithPDFRecursive(recursive bool) PDFReaderOption
WithPDFRecursive enables recursive directory scanning.
func WithPDFSplitByPage ¶
func WithPDFSplitByPage(split bool) PDFReaderOption
WithPDFSplitByPage enables splitting by page.
type Reader ¶
type Reader interface {
// LoadData loads documents and returns them as a slice.
LoadData() ([]schema.Node, error)
}
Reader is the interface for document loaders. Implementations should load documents from various sources (files, URLs, etc.)
type ReaderError ¶
type ReaderError struct {
Source string // File path or URL that caused the error
Message string
Err error
}
ReaderError represents an error during document loading.
func NewReaderError ¶
func NewReaderError(source, message string, err error) *ReaderError
NewReaderError creates a new ReaderError.
func (*ReaderError) Error ¶
func (e *ReaderError) Error() string
func (*ReaderError) Unwrap ¶
func (e *ReaderError) Unwrap() error
type ReaderMetadata ¶
type ReaderMetadata struct {
// Name is the reader name (e.g., "JSONReader", "PDFReader")
Name string
// SupportedExtensions lists file extensions this reader supports
SupportedExtensions []string
// Description describes what this reader does
Description string
}
ReaderMetadata contains metadata about a reader.
type ReaderOptions ¶
type ReaderOptions struct {
// Recursive determines if directory readers should recurse into subdirectories
Recursive bool
// FileExtensions filters which file extensions to process
FileExtensions []string
// ExcludePatterns are glob patterns for files/dirs to exclude
ExcludePatterns []string
// IncludeHidden determines if hidden files should be included
IncludeHidden bool
// NumWorkers is the number of concurrent workers for parallel loading
NumWorkers int
// ExtraMetadata is additional metadata to add to all loaded documents
ExtraMetadata map[string]interface{}
}
ReaderOptions contains common options for readers.
func DefaultReaderOptions ¶
func DefaultReaderOptions() ReaderOptions
DefaultReaderOptions returns default reader options.
type ReaderWithContext ¶
type ReaderWithContext interface {
Reader
// LoadDataWithContext loads documents with context support.
LoadDataWithContext(ctx context.Context) ([]schema.Node, error)
}
ReaderWithContext is a Reader that supports context for cancellation.
type ReaderWithMetadata ¶
type ReaderWithMetadata interface {
Reader
// Metadata returns information about this reader.
Metadata() ReaderMetadata
}
ReaderWithMetadata is a Reader that provides metadata about itself.
type SimpleDirectoryReader ¶
type SimpleDirectoryReader struct {
// contains filtered or unexported fields
}
SimpleDirectoryReader reads files from a directory.
func NewSimpleDirectoryReader ¶
func NewSimpleDirectoryReader(inputDir string, extensions ...string) *SimpleDirectoryReader
NewSimpleDirectoryReader creates a new SimpleDirectoryReader.