Files
common/ragflow/chunk.go

181 lines
6.1 KiB
Go
Raw Normal View History

2025-11-27 09:50:12 +08:00
package ragflow
import (
"context"
"github.com/gogf/gf/v2/errors/gerror"
2025-11-27 09:50:12 +08:00
)
2025-11-27 17:38:42 +08:00
// 数据集内知识块管理
// 参考: https://ragflow.com.cn/docs/dev/http_api_reference#数据集内知识块管理
// Chunk 知识块结构体
2025-11-27 09:50:12 +08:00
type Chunk struct {
Id string `json:"id"`
Content string `json:"content"`
DocumentId string `json:"document_id"`
DatasetId string `json:"dataset_id"`
CreateTime string `json:"create_time"`
CreateTimestamp float64 `json:"create_timestamp"`
ImportantKeywords []string `json:"important_keywords"`
Questions []string `json:"questions"`
Available bool `json:"available"`
ImageId string `json:"image_id"`
Positions []string `json:"positions"`
}
// AddChunkReq 添加知识块请求
type AddChunkReq struct {
Content string `json:"content"`
ImportantKeywords []string `json:"important_keywords,omitempty"`
Questions []string `json:"questions,omitempty"`
}
// ListChunksReq 列出知识块请求
type ListChunksReq struct {
Keywords string `json:"keywords,omitempty"`
Page int `json:"page,omitempty"`
PageSize int `json:"page_size,omitempty"`
Id string `json:"id,omitempty"`
}
// ListChunksRes 列出知识块响应
2025-12-02 14:59:07 +08:00
// 注意:响应结构包含 chunks知识块列表、doc关联文档信息和 total总数
2025-11-27 09:50:12 +08:00
type ListChunksRes struct {
2025-12-02 14:59:07 +08:00
Code int `json:"code"` // 状态码0 表示成功
2025-11-27 09:50:12 +08:00
Data struct {
2025-12-02 14:59:07 +08:00
Chunks []*Chunk `json:"chunks"` // 知识块列表
Doc interface{} `json:"doc"` // 关联文档信息(完整的 Document 对象)
Total int `json:"total"` // 知识块总数
2025-11-27 09:50:12 +08:00
} `json:"data"`
}
// DeleteChunksReq 删除知识块请求
type DeleteChunksReq struct {
ChunkIds []string `json:"chunk_ids,omitempty"` // 如果为空,删除所有
}
// UpdateChunkReq 更新知识块请求
type UpdateChunkReq struct {
Content string `json:"content,omitempty"`
ImportantKeywords []string `json:"important_keywords,omitempty"`
Available *bool `json:"available,omitempty"`
}
// RetrieveChunksReq 检索知识块请求
type RetrieveChunksReq struct {
Question string `json:"question"`
DatasetIds []string `json:"dataset_ids,omitempty"`
DocumentIds []string `json:"document_ids,omitempty"`
Page int `json:"page,omitempty"`
PageSize int `json:"page_size,omitempty"`
SimilarityThreshold float64 `json:"similarity_threshold,omitempty"`
VectorSimilarityWeight float64 `json:"vector_similarity_weight,omitempty"`
TopK int `json:"top_k,omitempty"`
RerankId string `json:"rerank_id,omitempty"`
Keyword bool `json:"keyword,omitempty"`
Highlight bool `json:"highlight,omitempty"`
CrossLanguages []string `json:"cross_languages,omitempty"`
MetadataCondition map[string]interface{} `json:"metadata_condition,omitempty"`
}
// RetrieveChunksRes 检索知识块响应 (结构比较复杂,暂时简化,根据实际返回调整)
// 官方文档未给出详细响应结构,假设返回 chunks 列表
type RetrieveChunksRes struct {
Code int `json:"code"`
Data struct {
Chunks []interface{} `json:"chunks"` // 检索结果可能包含额外信息
Total int `json:"total"`
} `json:"data"`
}
// AddChunk 添加知识块
func (c *Client) AddChunk(ctx context.Context, datasetId, documentId string, req *AddChunkReq) (*Chunk, error) {
path := "/api/v1/datasets/" + datasetId + "/documents/" + documentId + "/chunks"
2025-11-27 09:50:12 +08:00
var res struct {
Code int `json:"code"`
Data struct {
Chunk *Chunk `json:"chunk"`
} `json:"data"`
Msg string `json:"message"`
}
if err := c.request(ctx, "POST", path, req, &res); err != nil {
return nil, err
}
if res.Code != 0 {
return nil, gerror.Newf("add chunk failed: %s", res.Msg)
2025-11-27 09:50:12 +08:00
}
return res.Data.Chunk, nil
}
// ListChunks 列出知识块
func (c *Client) ListChunks(ctx context.Context, datasetId, documentId string, req *ListChunksReq) (*ListChunksRes, error) {
path := "/api/v1/datasets/" + datasetId + "/documents/" + documentId + "/chunks"
2025-11-27 09:50:12 +08:00
params := map[string]interface{}{}
if req.Keywords != "" {
params["keywords"] = req.Keywords
}
if req.Page > 0 {
params["page"] = req.Page
}
if req.PageSize > 0 {
params["page_size"] = req.PageSize
}
if req.Id != "" {
params["id"] = req.Id
}
2025-11-27 17:38:42 +08:00
query := buildQueryString(params)
if query != "" {
path += "?" + query
2025-11-27 09:50:12 +08:00
}
var res ListChunksRes
if err := c.request(ctx, "GET", path, nil, &res); err != nil {
return nil, err
}
if res.Code != 0 {
return nil, gerror.Newf("list chunks failed: code=%d", res.Code)
2025-11-27 09:50:12 +08:00
}
return &res, nil
}
// DeleteChunks 删除知识块
func (c *Client) DeleteChunks(ctx context.Context, datasetId, documentId string, chunkIds []string) (err error) {
2025-11-27 09:50:12 +08:00
req := DeleteChunksReq{ChunkIds: chunkIds}
var res CommonResponse
path := "/api/v1/datasets/" + datasetId + "/documents/" + documentId + "/chunks"
if err = c.request(ctx, "DELETE", path, req, &res); err != nil {
return
2025-11-27 09:50:12 +08:00
}
if !res.IsSuccess() {
return gerror.Newf("delete chunks failed: %s", res.Message)
2025-11-27 09:50:12 +08:00
}
return
2025-11-27 09:50:12 +08:00
}
// UpdateChunk 更新知识块
func (c *Client) UpdateChunk(ctx context.Context, datasetId, documentId, chunkId string, req *UpdateChunkReq) (err error) {
2025-11-27 09:50:12 +08:00
var res CommonResponse
path := "/api/v1/datasets/" + datasetId + "/documents/" + documentId + "/chunks/" + chunkId
if err = c.request(ctx, "PUT", path, req, &res); err != nil {
return
2025-11-27 09:50:12 +08:00
}
if !res.IsSuccess() {
return gerror.Newf("update chunk failed: %s", res.Message)
2025-11-27 09:50:12 +08:00
}
return
2025-11-27 09:50:12 +08:00
}
// RetrieveChunks 检索知识块
func (c *Client) RetrieveChunks(ctx context.Context, req *RetrieveChunksReq) (*RetrieveChunksRes, error) {
var res RetrieveChunksRes
if err := c.request(ctx, "POST", "/api/v1/retrieval", req, &res); err != nil {
return nil, err
}
if res.Code != 0 {
return nil, gerror.Newf("retrieve chunks failed: code=%d", res.Code)
2025-11-27 09:50:12 +08:00
}
return &res, nil
}