rag

package module
v0.0.0-...-f761944 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Sep 5, 2025 License: AGPL-3.0 Imports: 13 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type AddChunkRequest

type AddChunkRequest struct {
	Content           string   `json:"content"`
	ImportantKeywords []string `json:"important_keywords,omitempty"`
	Questions         []string `json:"questions,omitempty"`
}

AddChunkRequest 添加分块请求

type AddChunkResponse

type AddChunkResponse struct {
	Code int `json:"code"`
	Data struct {
		Chunk Chunk `json:"chunk"`
	} `json:"data"`
}

type AddModelConfigRequest

type AddModelConfigRequest struct {
	Provider    string          `json:"provider"` //openai-compatible-api
	Name        string          `json:"name"`
	TaskType    string          `json:"task_type"` // embedding, rerank, chat
	ApiBase     string          `json:"api_base"`
	ApiKey      string          `json:"api_key"`
	MaxTokens   int             `json:"max_tokens"`
	IsDefault   bool            `json:"is_default"` // 是否默认
	Enabled     bool            `json:"enabled"`    // 是否启用
	Config      json.RawMessage `json:"config,omitempty"`
	Description string          `json:"description,omitempty"`
	Version     string          `json:"version,omitempty"`
	Timeout     int             `json:"timeout,omitempty"`
	CreateTime  int64           `json:"create_time,omitempty"`
	UpdateTime  int64           `json:"update_time,omitempty"`
	Owner       string          `json:"owner,omitempty"`
	QuotaLimit  int             `json:"quota_limit,omitempty"`
}

type AddModelConfigResponse

type AddModelConfigResponse struct {
	Code int         `json:"code"`
	Data ModelConfig `json:"data"`
}

type ChatMessage

type ChatMessage struct {
	Role    string `json:"role"`
	Content string `json:"content"`
}

ChatMessage 聊天消息结构

type Chunk

type Chunk struct {
	ID                string   `json:"id"`                 // 分块ID
	Content           string   `json:"content"`            // 分块内容
	DocumentID        string   `json:"document_id"`        // 所属文档ID
	DatasetID         string   `json:"dataset_id"`         // 所属数据集ID
	GroupIDs          []int    `json:"group_ids"`          // 权限组
	ImportantKeywords []string `json:"important_keywords"` // 关键词
	Questions         []string `json:"questions"`          // 相关问题
	Available         bool     `json:"available"`          // 是否可用
	CreateTime        string   `json:"create_time"`
	CreateTimestamp   float64  `json:"create_timestamp"`
}

Chunk 表示一个分块对象

type Client

type Client struct {
	// contains filtered or unexported fields
}

Client 是所有API的统一客户端

func New

func New(apiBase string, apiKey string, opts ...ClientOption) *Client

New 创建一个新的API客户端

func (*Client) AddChunk

func (c *Client) AddChunk(ctx context.Context, datasetID, documentID string, req AddChunkRequest) (*Chunk, error)

AddChunk 向指定文档添加分块

func (*Client) AddModelConfig

func (c *Client) AddModelConfig(ctx context.Context, req AddModelConfigRequest) (*ModelConfig, error)

GetModelConfig 获取模型配置

func (*Client) CreateDataset

func (c *Client) CreateDataset(ctx context.Context, req CreateDatasetRequest) (*Dataset, error)

CreateDataset 创建数据集

func (*Client) DeleteChunks

func (c *Client) DeleteChunks(ctx context.Context, datasetID, documentID string, chunkIDs []string) error

DeleteChunks 删除指定文档的分块(支持批量)

func (*Client) DeleteDatasets

func (c *Client) DeleteDatasets(ctx context.Context, ids []string) error

DeleteDatasets 删除数据集(支持批量)

func (*Client) DeleteDocuments

func (c *Client) DeleteDocuments(ctx context.Context, datasetID string, ids []string) error

DeleteDocuments 删除文档(支持批量)

func (*Client) DeleteModelConfig

func (c *Client) DeleteModelConfig(ctx context.Context, models []ModelItem) error

func (*Client) DownloadDocument

func (c *Client) DownloadDocument(ctx context.Context, datasetID, documentID, outputPath string) error

DownloadDocument 下载文档到本地

func (*Client) GetModelConfigList

func (c *Client) GetModelConfigList(ctx context.Context) ([]ModelConfig, error)

func (*Client) ListChunks

func (c *Client) ListChunks(ctx context.Context, datasetID, documentID string, params map[string]string) ([]Chunk, int, error)

ListChunks 列出指定文档的分块

func (*Client) ListDatasets

func (c *Client) ListDatasets(ctx context.Context, req ListDatasetsRequest) ([]Dataset, error)

ListDatasets 列出数据集

func (*Client) ListDocuments

func (c *Client) ListDocuments(ctx context.Context, datasetID string, params map[string]string) ([]Document, int, error)

ListDocuments 列出文档

func (*Client) ParseDocuments

func (c *Client) ParseDocuments(ctx context.Context, datasetID string, documentIDs []string) error

ParseDocuments 解析指定文档(批量)

func (*Client) RelatedQuestions

func (c *Client) RelatedQuestions(ctx context.Context, loginToken string, req RelatedQuestionsRequest) ([]string, error)

RelatedQuestions 生成相关问题(多样化检索) 注意:该接口需要 Bearer Login Token,通常与API Key不同

func (*Client) RetrieveChunks

func (c *Client) RetrieveChunks(ctx context.Context, req RetrievalRequest) ([]RetrievalChunk, int, string, error)

RetrieveChunks 检索分块(向量/关键词检索)

func (*Client) StopParseDocuments

func (c *Client) StopParseDocuments(ctx context.Context, datasetID string, documentIDs []string) error

StopParseDocuments 停止解析指定文档(批量)

func (*Client) UpdateChunk

func (c *Client) UpdateChunk(ctx context.Context, datasetID, documentID, chunkID string, req UpdateChunkRequest) error

UpdateChunk 更新指定分块内容

func (*Client) UpdateDataset

func (c *Client) UpdateDataset(ctx context.Context, datasetID string, req UpdateDatasetRequest) error

UpdateDataset 更新数据集

func (*Client) UpdateDocument

func (c *Client) UpdateDocument(ctx context.Context, datasetID, documentID string, reqBody UpdateDocumentRequest) error

UpdateDocument 更新文档

func (*Client) UpdateDocumentGroupIDs

func (c *Client) UpdateDocumentGroupIDs(ctx context.Context, datasetID, documentID string, groupIDs []int) error

UpdateDocumentGroupIDs 更新单个文档的权限

func (*Client) UpdateDocumentText

func (c *Client) UpdateDocumentText(ctx context.Context, datasetID string, documentID string, content string) (*Document, error)

UpdateDocumentText 更新文档内容 由于后端不支持直接更新文档,此函数会先删除旧文档,然后创建新文档

func (*Client) UpdateDocumentsGroupIDsBatch

func (c *Client) UpdateDocumentsGroupIDsBatch(ctx context.Context, datasetID string, documentIDs []string, groupIDs []int) error

UpdateDocumentsGroupIDsBatch 批量更新文档的权限

func (*Client) UploadDocumentText

func (c *Client) UploadDocumentText(ctx context.Context, datasetID string, jsonStr string) ([]Document, error)

UploadDocumentText 上传文本内容为文档 jsonStr 形如 {"filename": "xxx.txt", "content": "...", "file_type": "text/plain", "group_ids": [1,2,3]}

func (*Client) UploadDocumentTextAndParse

func (c *Client) UploadDocumentTextAndParse(ctx context.Context, datasetID string, jsonStr string) ([]Document, error)

UploadDocumentTextAndParse 上传文本内容为文档并解析

func (*Client) UploadDocuments

func (c *Client) UploadDocuments(ctx context.Context, datasetID string, filePaths []string, groupIDs []int) ([]Document, error)

UploadDocuments 上传文档(支持多文件和权限设置)

func (*Client) UploadDocumentsAndParse

func (c *Client) UploadDocumentsAndParse(ctx context.Context, datasetID string, filePaths []string, groupIDs []int) ([]Document, error)

UploadDocuments 上传文档(支持多文件和权限设置)

type ClientOption

type ClientOption func(*Client)

func WithHTTPClient

func WithHTTPClient(httpClient *http.Client) ClientOption

WithHTTPClient 自定义http.Client

type CommonResponse

type CommonResponse struct {
	Code    int    `json:"code"`
	Message string `json:"message"`
}

type CreateDatasetRequest

type CreateDatasetRequest struct {
	Name           string       `json:"name"`
	Avatar         string       `json:"avatar,omitempty"`
	Description    string       `json:"description,omitempty"`
	EmbeddingModel string       `json:"embedding_model,omitempty"`
	Permission     string       `json:"permission,omitempty"`
	ChunkMethod    string       `json:"chunk_method,omitempty"`
	Pagerank       int          `json:"pagerank,omitempty"`
	ParserConfig   ParserConfig `json:"parser_config,omitempty"`
}

CreateDatasetRequest 创建数据集请求

type CreateDatasetResponse

type CreateDatasetResponse struct {
	Code int     `json:"code"`
	Data Dataset `json:"data"`
}

type Dataset

type Dataset struct {
	ID                     string       `json:"id"`              // 数据集ID
	Name                   string       `json:"name"`            // 数据集名称
	Avatar                 string       `json:"avatar"`          // 头像(Base64)
	Description            string       `json:"description"`     // 描述
	EmbeddingModel         string       `json:"embedding_model"` // 嵌入模型
	Permission             string       `json:"permission"`      // 权限
	ChunkMethod            string       `json:"chunk_method"`    // 分块方式
	Pagerank               int          `json:"pagerank"`        // PageRank
	ParserConfig           ParserConfig `json:"parser_config"`   // 解析配置
	ChunkCount             int          `json:"chunk_count"`     // 分块数
	CreateDate             string       `json:"create_date"`
	CreateTime             int64        `json:"create_time"`
	CreatedBy              string       `json:"created_by"`
	DocumentCount          int          `json:"document_count"`
	Language               string       `json:"language"`
	SimilarityThreshold    float64      `json:"similarity_threshold"`
	Status                 string       `json:"status"`
	TenantID               string       `json:"tenant_id"`
	TokenNum               int          `json:"token_num"`
	UpdateDate             string       `json:"update_date"`
	UpdateTime             int64        `json:"update_time"`
	VectorSimilarityWeight float64      `json:"vector_similarity_weight"`
}

Dataset 表示一个数据集对象 包含所有基础属性

type DeleteChunksRequest

type DeleteChunksRequest struct {
	ChunkIDs []string `json:"chunk_ids"`
}

DeleteChunksRequest 删除分块请求

type DeleteChunksResponse

type DeleteChunksResponse struct {
	Code int `json:"code"`
}

type DeleteDatasetsRequest

type DeleteDatasetsRequest struct {
	IDs []string `json:"ids"`
}

DeleteDatasetsRequest 删除数据集请求

type DeleteDatasetsResponse

type DeleteDatasetsResponse struct {
	Code int `json:"code"`
}

type DeleteDocumentsRequest

type DeleteDocumentsRequest struct {
	IDs []string `json:"ids"`
}

DeleteDocumentsRequest 删除文档请求

type DeleteDocumentsResponse

type DeleteDocumentsResponse struct {
	Code int `json:"code"`
}

type DeleteModelConfigsRequest

type DeleteModelConfigsRequest struct {
	ModelIDs []string    `json:"ids,omitempty"`
	Models   []ModelItem `json:"models,omitempty"`
}

type Document

type Document struct {
	ID              string      `json:"id"`            // 文档ID
	Name            string      `json:"name"`          // 文档名
	Location        string      `json:"location"`      // 存储位置
	DatasetID       string      `json:"dataset_id"`    // 所属数据集ID
	GroupIDs        []int       `json:"group_ids"`     // 权限组
	CreatedBy       string      `json:"created_by"`    // 创建人
	ChunkMethod     string      `json:"chunk_method"`  // 分块方式
	ParserConfig    interface{} `json:"parser_config"` // 解析配置
	Run             string      `json:"run"`           // 处理状态
	Size            int64       `json:"size"`          // 文件大小
	Thumbnail       string      `json:"thumbnail"`     // 缩略图
	Type            string      `json:"type"`          // 类型
	Status          string      `json:"status"`        // 状态
	CreateDate      string      `json:"create_date"`
	CreateTime      int64       `json:"create_time"`
	UpdateDate      string      `json:"update_date"`
	UpdateTime      int64       `json:"update_time"`
	ChunkCount      int         `json:"chunk_count"`
	TokenCount      int         `json:"token_count"`
	SourceType      string      `json:"source_type"`
	ProcessBeginAt  string      `json:"process_begin_at"`
	ProcessDuration float64     `json:"process_duation"`
	Progress        float64     `json:"progress"`
	ProgressMsg     string      `json:"progress_msg"`
}

Document 表示一个文档对象

type GraphragConfig

type GraphragConfig struct {
	UseGraphRAG bool     `json:"use_graphrag"`
	EntityTypes []string `json:"entity_types,omitempty"`
	Method      string   `json:"method,omitempty"`
	Community   bool     `json:"community,omitempty"`
	Resolution  bool     `json:"resolution,omitempty"`
}

GraphragConfig 配置 完全适配 Python 版本 use_graphrag, entity_types, method, community, resolution

type ListChunksResponse

type ListChunksResponse struct {
	Code int `json:"code"`
	Data struct {
		Chunks []Chunk `json:"chunks"`
		Total  int     `json:"total"`
	} `json:"data"`
}

ListChunksResponse 分块列表响应

type ListDatasetsRequest

type ListDatasetsRequest struct {
	Page     int    `json:"page,omitempty"`
	PageSize int    `json:"page_size,omitempty"`
	OrderBy  string `json:"orderby,omitempty"`
	Desc     bool   `json:"desc,omitempty"`
	Name     string `json:"name,omitempty"`
	ID       string `json:"id,omitempty"`
}

ListDatasetsRequest 列表请求参数

type ListDatasetsResponse

type ListDatasetsResponse struct {
	Code int       `json:"code"`
	Data []Dataset `json:"data"`
}

type ListDocumentsResponse

type ListDocumentsResponse struct {
	Code int `json:"code"`
	Data struct {
		Docs  []Document `json:"docs"`
		Total int        `json:"total"`
	} `json:"data"`
}

ListDocumentsResponse 文档列表响应

type ListModelConfigsResponse

type ListModelConfigsResponse struct {
	Code int           `json:"code"`
	Data []ModelConfig `json:"data"`
}

type ModelConfig

type ModelConfig struct {
	ID          string          `json:"id"`
	Provider    string          `json:"provider"` //openai-compatible-api
	Name        string          `json:"name"`
	TaskType    string          `json:"task_type"` // embedding, rerank, chat
	ApiBase     string          `json:"api_base"`
	ApiKey      string          `json:"api_key"`
	MaxTokens   int             `json:"max_tokens"`
	IsDefault   bool            `json:"is_default"`
	Enabled     bool            `json:"enabled"`
	Config      json.RawMessage `json:"config,omitempty"`
	Description string          `json:"description,omitempty"`
	Version     string          `json:"version,omitempty"`
	Timeout     int             `json:"timeout,omitempty"`
	CreateTime  int64           `json:"create_time,omitempty"`
	UpdateTime  int64           `json:"update_time,omitempty"`
	Owner       string          `json:"owner,omitempty"`
	QuotaLimit  int             `json:"quota_limit,omitempty"`
}

ModelConfig 模型配置

type ModelItem

type ModelItem struct {
	Name    string `json:"name"`
	ApiBase string `json:"api_base"`
}

type ParseDocumentsRequest

type ParseDocumentsRequest struct {
	DocumentIDs []string `json:"document_ids"`
}

ParseDocumentsRequest 解析文档请求 POST /api/v1/datasets/{dataset_id}/chunks Body: {"document_ids": ["id1", "id2"]}

type ParseDocumentsResponse

type ParseDocumentsResponse struct {
	Code int `json:"code"`
}

type ParserConfig

type ParserConfig struct {
	AutoKeywords       int             `json:"auto_keywords,omitempty"`        // 自动关键词数
	AutoQuestions      int             `json:"auto_questions,omitempty"`       // 自动问题数
	ChunkTokenNum      int             `json:"chunk_token_num,omitempty"`      // 分块token数
	Delimiter          string          `json:"delimiter,omitempty"`            // 分隔符
	Graphrag           *GraphragConfig `json:"graphrag,omitempty"`             // GraphRAG配置
	HTML4Excel         bool            `json:"html4excel,omitempty"`           // Excel转HTML
	LayoutRecognize    string          `json:"layout_recognize,omitempty"`     // 布局识别
	Raptor             *RaptorConfig   `json:"raptor,omitempty"`               // Raptor配置
	TagKBIDs           []string        `json:"tag_kb_ids,omitempty"`           // 标签知识库ID
	TopnTags           int             `json:"topn_tags,omitempty"`            // TopN标签
	FilenameEmbdWeight *float64        `json:"filename_embd_weight,omitempty"` // 文件名嵌入权重
	TaskPageSize       *int            `json:"task_page_size,omitempty"`       // PDF分页
	Pages              *[][]int        `json:"pages,omitempty"`                // 页码范围
}

ParserConfig 解析配置,随 chunk_method 变化

type RaptorConfig

type RaptorConfig struct {
	UseRaptor  bool    `json:"use_raptor"`
	Prompt     string  `json:"prompt,omitempty"`
	MaxToken   int     `json:"max_token,omitempty"`
	Threshold  float64 `json:"threshold,omitempty"`
	MaxCluster int     `json:"max_cluster,omitempty"`
	RandomSeed int     `json:"random_seed,omitempty"`
}

RaptorConfig 配置 完全适配 Python 版本 use_raptor, prompt, max_token, threshold, max_cluster, random_seed

type RelatedQuestionsRequest

type RelatedQuestionsRequest struct {
	Question string `json:"question"`
}

RelatedQuestionsRequest 相关问题请求

type RelatedQuestionsResponse

type RelatedQuestionsResponse struct {
	Code    int      `json:"code"`
	Data    []string `json:"data"`
	Message string   `json:"message"`
}

RelatedQuestionsResponse 相关问题响应

type RetrievalChunk

type RetrievalChunk struct {
	ID                string        `json:"id"`
	Content           string        `json:"content"`
	ContentLtks       string        `json:"content_ltks"`
	DocumentID        string        `json:"document_id"`
	DocumentKeyword   string        `json:"document_keyword"`
	Highlight         string        `json:"highlight"`
	ImageID           string        `json:"image_id"`
	ImportantKeywords []string      `json:"important_keywords"`
	KBID              string        `json:"kb_id"`
	Positions         []interface{} `json:"positions"`
	Similarity        float64       `json:"similarity"`
	TermSimilarity    float64       `json:"term_similarity"`
	VectorSimilarity  float64       `json:"vector_similarity"`
}

RetrievalChunk 检索结果分块

type RetrievalRequest

type RetrievalRequest struct {
	Question               string        `json:"question"`                           // 查询问题
	DatasetIDs             []string      `json:"dataset_ids,omitempty"`              // 数据集ID列表
	DocumentIDs            []string      `json:"document_ids,omitempty"`             // 文档ID列表
	UserGroupIDs           []int         `json:"user_group_ids,omitempty"`           // 用户权限组
	Page                   int           `json:"page,omitempty"`                     // 页码
	PageSize               int           `json:"page_size,omitempty"`                // 每页数量
	SimilarityThreshold    float64       `json:"similarity_threshold,omitempty"`     // 相似度阈值
	VectorSimilarityWeight float64       `json:"vector_similarity_weight,omitempty"` // 向量相似度权重
	TopK                   int           `json:"top_k,omitempty"`                    // 参与向量计算的topK
	RerankID               string        `json:"rerank_id,omitempty"`                // rerank模型ID
	Keyword                bool          `json:"keyword,omitempty"`                  // 是否启用关键词匹配
	Highlight              bool          `json:"highlight,omitempty"`                // 是否高亮
	ChatMessages           []ChatMessage `json:"chat_messages,omitempty"`            // 聊天消息,用于问题重写
}

RetrievalRequest 检索请求

type RetrievalResponse

type RetrievalResponse struct {
	Code int `json:"code"`
	Data struct {
		Chunks         []RetrievalChunk `json:"chunks"`
		Total          int              `json:"total"`
		RewrittenQuery string           `json:"rewritten_query"` // 重写后的问题,如果不需要重写,则返回空字符串
	} `json:"data"`
}

RetrievalResponse 检索响应

type StopParseDocumentsRequest

type StopParseDocumentsRequest struct {
	DocumentIDs []string `json:"document_ids"`
}

StopParseDocumentsRequest 停止解析文档请求 DELETE /api/v1/datasets/{dataset_id}/chunks Body: {"document_ids": ["id1", "id2"]}

type StopParseDocumentsResponse

type StopParseDocumentsResponse struct {
	Code int `json:"code"`
}

type UpdateChunkRequest

type UpdateChunkRequest struct {
	Content           string   `json:"content,omitempty"`
	ImportantKeywords []string `json:"important_keywords,omitempty"`
	Available         *bool    `json:"available,omitempty"`
}

UpdateChunkRequest 更新分块请求

type UpdateChunkResponse

type UpdateChunkResponse struct {
	Code int `json:"code"`
}

type UpdateDatasetRequest

type UpdateDatasetRequest struct {
	Name           string       `json:"name,omitempty"`
	Avatar         string       `json:"avatar,omitempty"`
	Description    string       `json:"description,omitempty"`
	EmbeddingModel string       `json:"embedding_model,omitempty"`
	Permission     string       `json:"permission,omitempty"`
	ChunkMethod    string       `json:"chunk_method,omitempty"`
	Pagerank       int          `json:"pagerank,omitempty"`
	ParserConfig   ParserConfig `json:"parser_config,omitempty"`
}

UpdateDatasetRequest 更新数据集请求

type UpdateDatasetResponse

type UpdateDatasetResponse struct {
	Code int `json:"code"`
}

type UpdateDocumentRequest

type UpdateDocumentRequest struct {
	Name         string                 `json:"name,omitempty"`
	MetaFields   map[string]interface{} `json:"meta_fields,omitempty"`
	ChunkMethod  string                 `json:"chunk_method,omitempty"`
	ParserConfig map[string]interface{} `json:"parser_config,omitempty"`
}

UpdateDocumentRequest 更新文档请求

type UpdateDocumentResponse

type UpdateDocumentResponse struct {
	Code int `json:"code"`
}

type UploadDocumentResponse

type UploadDocumentResponse struct {
	Code int        `json:"code"`
	Data []Document `json:"data"`
}

UploadDocumentResponse 上传文档响应

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL