2025-11-27 09:50:12 +08:00
|
|
|
|
package ragflow
|
|
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
|
"context"
|
2025-12-06 18:04:29 +08:00
|
|
|
|
|
|
|
|
|
|
"github.com/gogf/gf/v2/errors/gerror"
|
2025-11-27 09:50:12 +08:00
|
|
|
|
)
|
|
|
|
|
|
|
2025-11-27 17:38:42 +08:00
|
|
|
|
// 数据集内知识块管理
|
|
|
|
|
|
// 参考: https://ragflow.com.cn/docs/dev/http_api_reference#数据集内知识块管理
|
|
|
|
|
|
|
|
|
|
|
|
// Chunk 知识块结构体
|
2025-11-27 09:50:12 +08:00
|
|
|
|
type Chunk struct {
|
|
|
|
|
|
Id string `json:"id"`
|
|
|
|
|
|
Content string `json:"content"`
|
|
|
|
|
|
DocumentId string `json:"document_id"`
|
|
|
|
|
|
DatasetId string `json:"dataset_id"`
|
|
|
|
|
|
CreateTime string `json:"create_time"`
|
|
|
|
|
|
CreateTimestamp float64 `json:"create_timestamp"`
|
|
|
|
|
|
ImportantKeywords []string `json:"important_keywords"`
|
|
|
|
|
|
Questions []string `json:"questions"`
|
|
|
|
|
|
Available bool `json:"available"`
|
|
|
|
|
|
ImageId string `json:"image_id"`
|
|
|
|
|
|
Positions []string `json:"positions"`
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// AddChunkReq 添加知识块请求
|
|
|
|
|
|
type AddChunkReq struct {
|
|
|
|
|
|
Content string `json:"content"`
|
|
|
|
|
|
ImportantKeywords []string `json:"important_keywords,omitempty"`
|
|
|
|
|
|
Questions []string `json:"questions,omitempty"`
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// ListChunksReq 列出知识块请求
|
|
|
|
|
|
type ListChunksReq struct {
|
|
|
|
|
|
Keywords string `json:"keywords,omitempty"`
|
|
|
|
|
|
Page int `json:"page,omitempty"`
|
|
|
|
|
|
PageSize int `json:"page_size,omitempty"`
|
|
|
|
|
|
Id string `json:"id,omitempty"`
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// ListChunksRes 列出知识块响应
|
2025-12-02 14:59:07 +08:00
|
|
|
|
// 注意:响应结构包含 chunks(知识块列表)、doc(关联文档信息)和 total(总数)
|
2025-11-27 09:50:12 +08:00
|
|
|
|
type ListChunksRes struct {
|
2025-12-02 14:59:07 +08:00
|
|
|
|
Code int `json:"code"` // 状态码,0 表示成功
|
2025-11-27 09:50:12 +08:00
|
|
|
|
Data struct {
|
2025-12-02 14:59:07 +08:00
|
|
|
|
Chunks []*Chunk `json:"chunks"` // 知识块列表
|
|
|
|
|
|
Doc interface{} `json:"doc"` // 关联文档信息(完整的 Document 对象)
|
|
|
|
|
|
Total int `json:"total"` // 知识块总数
|
2025-11-27 09:50:12 +08:00
|
|
|
|
} `json:"data"`
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// DeleteChunksReq 删除知识块请求
|
|
|
|
|
|
type DeleteChunksReq struct {
|
|
|
|
|
|
ChunkIds []string `json:"chunk_ids,omitempty"` // 如果为空,删除所有
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// UpdateChunkReq 更新知识块请求
|
|
|
|
|
|
type UpdateChunkReq struct {
|
|
|
|
|
|
Content string `json:"content,omitempty"`
|
|
|
|
|
|
ImportantKeywords []string `json:"important_keywords,omitempty"`
|
|
|
|
|
|
Available *bool `json:"available,omitempty"`
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// RetrieveChunksReq 检索知识块请求
|
|
|
|
|
|
type RetrieveChunksReq struct {
|
|
|
|
|
|
Question string `json:"question"`
|
|
|
|
|
|
DatasetIds []string `json:"dataset_ids,omitempty"`
|
|
|
|
|
|
DocumentIds []string `json:"document_ids,omitempty"`
|
|
|
|
|
|
Page int `json:"page,omitempty"`
|
|
|
|
|
|
PageSize int `json:"page_size,omitempty"`
|
|
|
|
|
|
SimilarityThreshold float64 `json:"similarity_threshold,omitempty"`
|
|
|
|
|
|
VectorSimilarityWeight float64 `json:"vector_similarity_weight,omitempty"`
|
|
|
|
|
|
TopK int `json:"top_k,omitempty"`
|
|
|
|
|
|
RerankId string `json:"rerank_id,omitempty"`
|
|
|
|
|
|
Keyword bool `json:"keyword,omitempty"`
|
|
|
|
|
|
Highlight bool `json:"highlight,omitempty"`
|
|
|
|
|
|
CrossLanguages []string `json:"cross_languages,omitempty"`
|
|
|
|
|
|
MetadataCondition map[string]interface{} `json:"metadata_condition,omitempty"`
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// RetrieveChunksRes 检索知识块响应 (结构比较复杂,暂时简化,根据实际返回调整)
|
|
|
|
|
|
// 官方文档未给出详细响应结构,假设返回 chunks 列表
|
|
|
|
|
|
type RetrieveChunksRes struct {
|
|
|
|
|
|
Code int `json:"code"`
|
|
|
|
|
|
Data struct {
|
|
|
|
|
|
Chunks []interface{} `json:"chunks"` // 检索结果可能包含额外信息
|
|
|
|
|
|
Total int `json:"total"`
|
|
|
|
|
|
} `json:"data"`
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// AddChunk 添加知识块
|
|
|
|
|
|
func (c *Client) AddChunk(ctx context.Context, datasetId, documentId string, req *AddChunkReq) (*Chunk, error) {
|
2025-12-06 18:04:29 +08:00
|
|
|
|
path := "/api/v1/datasets/" + datasetId + "/documents/" + documentId + "/chunks"
|
2025-11-27 09:50:12 +08:00
|
|
|
|
var res struct {
|
|
|
|
|
|
Code int `json:"code"`
|
|
|
|
|
|
Data struct {
|
|
|
|
|
|
Chunk *Chunk `json:"chunk"`
|
|
|
|
|
|
} `json:"data"`
|
|
|
|
|
|
Msg string `json:"message"`
|
|
|
|
|
|
}
|
|
|
|
|
|
if err := c.request(ctx, "POST", path, req, &res); err != nil {
|
|
|
|
|
|
return nil, err
|
|
|
|
|
|
}
|
|
|
|
|
|
if res.Code != 0 {
|
2025-12-06 18:04:29 +08:00
|
|
|
|
return nil, gerror.Newf("add chunk failed: %s", res.Msg)
|
2025-11-27 09:50:12 +08:00
|
|
|
|
}
|
|
|
|
|
|
return res.Data.Chunk, nil
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// ListChunks 列出知识块
|
|
|
|
|
|
func (c *Client) ListChunks(ctx context.Context, datasetId, documentId string, req *ListChunksReq) (*ListChunksRes, error) {
|
2025-12-06 18:04:29 +08:00
|
|
|
|
path := "/api/v1/datasets/" + datasetId + "/documents/" + documentId + "/chunks"
|
2025-11-27 09:50:12 +08:00
|
|
|
|
params := map[string]interface{}{}
|
|
|
|
|
|
if req.Keywords != "" {
|
|
|
|
|
|
params["keywords"] = req.Keywords
|
|
|
|
|
|
}
|
|
|
|
|
|
if req.Page > 0 {
|
|
|
|
|
|
params["page"] = req.Page
|
|
|
|
|
|
}
|
|
|
|
|
|
if req.PageSize > 0 {
|
|
|
|
|
|
params["page_size"] = req.PageSize
|
|
|
|
|
|
}
|
|
|
|
|
|
if req.Id != "" {
|
|
|
|
|
|
params["id"] = req.Id
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-11-27 17:38:42 +08:00
|
|
|
|
query := buildQueryString(params)
|
|
|
|
|
|
if query != "" {
|
|
|
|
|
|
path += "?" + query
|
2025-11-27 09:50:12 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
var res ListChunksRes
|
|
|
|
|
|
if err := c.request(ctx, "GET", path, nil, &res); err != nil {
|
|
|
|
|
|
return nil, err
|
|
|
|
|
|
}
|
|
|
|
|
|
if res.Code != 0 {
|
2025-12-06 18:04:29 +08:00
|
|
|
|
return nil, gerror.Newf("list chunks failed: code=%d", res.Code)
|
2025-11-27 09:50:12 +08:00
|
|
|
|
}
|
|
|
|
|
|
return &res, nil
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// DeleteChunks 删除知识块
|
2025-12-06 18:04:29 +08:00
|
|
|
|
func (c *Client) DeleteChunks(ctx context.Context, datasetId, documentId string, chunkIds []string) (err error) {
|
2025-11-27 09:50:12 +08:00
|
|
|
|
req := DeleteChunksReq{ChunkIds: chunkIds}
|
|
|
|
|
|
var res CommonResponse
|
2025-12-06 18:04:29 +08:00
|
|
|
|
path := "/api/v1/datasets/" + datasetId + "/documents/" + documentId + "/chunks"
|
|
|
|
|
|
if err = c.request(ctx, "DELETE", path, req, &res); err != nil {
|
|
|
|
|
|
return
|
2025-11-27 09:50:12 +08:00
|
|
|
|
}
|
|
|
|
|
|
if !res.IsSuccess() {
|
2025-12-06 18:04:29 +08:00
|
|
|
|
return gerror.Newf("delete chunks failed: %s", res.Message)
|
2025-11-27 09:50:12 +08:00
|
|
|
|
}
|
2025-12-06 18:04:29 +08:00
|
|
|
|
return
|
2025-11-27 09:50:12 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// UpdateChunk 更新知识块
|
2025-12-06 18:04:29 +08:00
|
|
|
|
func (c *Client) UpdateChunk(ctx context.Context, datasetId, documentId, chunkId string, req *UpdateChunkReq) (err error) {
|
2025-11-27 09:50:12 +08:00
|
|
|
|
var res CommonResponse
|
2025-12-06 18:04:29 +08:00
|
|
|
|
path := "/api/v1/datasets/" + datasetId + "/documents/" + documentId + "/chunks/" + chunkId
|
|
|
|
|
|
if err = c.request(ctx, "PUT", path, req, &res); err != nil {
|
|
|
|
|
|
return
|
2025-11-27 09:50:12 +08:00
|
|
|
|
}
|
|
|
|
|
|
if !res.IsSuccess() {
|
2025-12-06 18:04:29 +08:00
|
|
|
|
return gerror.Newf("update chunk failed: %s", res.Message)
|
2025-11-27 09:50:12 +08:00
|
|
|
|
}
|
2025-12-06 18:04:29 +08:00
|
|
|
|
return
|
2025-11-27 09:50:12 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// RetrieveChunks 检索知识块
|
|
|
|
|
|
func (c *Client) RetrieveChunks(ctx context.Context, req *RetrieveChunksReq) (*RetrieveChunksRes, error) {
|
|
|
|
|
|
var res RetrieveChunksRes
|
|
|
|
|
|
if err := c.request(ctx, "POST", "/api/v1/retrieval", req, &res); err != nil {
|
|
|
|
|
|
return nil, err
|
|
|
|
|
|
}
|
|
|
|
|
|
if res.Code != 0 {
|
2025-12-06 18:04:29 +08:00
|
|
|
|
return nil, gerror.Newf("retrieve chunks failed: code=%d", res.Code)
|
2025-11-27 09:50:12 +08:00
|
|
|
|
}
|
|
|
|
|
|
return &res, nil
|
|
|
|
|
|
}
|