2025-12-22 17:50:53 +08:00
|
|
|
|
// Package ragflow - RAGFlow文档管理
|
|
|
|
|
|
// 功能:RAGFlow知识库文档的上传、列表、删除操作
|
2025-11-27 09:50:12 +08:00
|
|
|
|
package ragflow
|
|
|
|
|
|
|
|
|
|
|
|
import (
|
2025-12-22 17:50:53 +08:00
|
|
|
|
"bytes"
|
2025-11-27 09:50:12 +08:00
|
|
|
|
"context"
|
2025-12-24 18:33:11 +08:00
|
|
|
|
"encoding/json"
|
2025-12-22 20:05:01 +08:00
|
|
|
|
"mime/multipart"
|
2025-12-02 14:59:07 +08:00
|
|
|
|
"strings"
|
2025-12-06 18:04:29 +08:00
|
|
|
|
|
2026-01-08 15:55:44 +08:00
|
|
|
|
commonHttp "gitee.com/red-future---jilin-g/common/http"
|
2025-12-06 18:04:29 +08:00
|
|
|
|
"github.com/gogf/gf/v2/errors/gerror"
|
2026-01-06 17:23:03 +08:00
|
|
|
|
"github.com/gogf/gf/v2/frame/g"
|
2025-11-27 09:50:12 +08:00
|
|
|
|
)
|
|
|
|
|
|
|
2025-11-27 17:38:42 +08:00
|
|
|
|
// 数据集内文件管理
|
|
|
|
|
|
// 参考: https://ragflow.com.cn/docs/dev/http_api_reference#数据集内文件管理
|
|
|
|
|
|
|
2025-12-02 14:59:07 +08:00
|
|
|
|
// ... (rest of the code remains the same)
|
2025-11-27 09:50:12 +08:00
|
|
|
|
type Document struct {
|
|
|
|
|
|
Id string `json:"id"`
|
|
|
|
|
|
DatasetId string `json:"dataset_id"`
|
|
|
|
|
|
Name string `json:"name"`
|
|
|
|
|
|
Size int64 `json:"size"`
|
|
|
|
|
|
Location string `json:"location"`
|
|
|
|
|
|
CreatedBy string `json:"created_by"`
|
|
|
|
|
|
CreateTime int64 `json:"create_time"`
|
|
|
|
|
|
Thumbnail string `json:"thumbnail"`
|
|
|
|
|
|
Type string `json:"type"`
|
|
|
|
|
|
RunStatus string `json:"run_status"` // 对应 API 返回的 "run" 字段,可能需要确认
|
|
|
|
|
|
Status string `json:"status"`
|
|
|
|
|
|
ChunkMethod string `json:"chunk_method"`
|
|
|
|
|
|
ParserConfig map[string]interface{} `json:"parser_config"`
|
|
|
|
|
|
TokenNum int `json:"token_num"`
|
|
|
|
|
|
ChunkCount int `json:"chunk_count"`
|
|
|
|
|
|
ProcessBegin int64 `json:"process_begin"`
|
|
|
|
|
|
ProcessDu int64 `json:"process_du"`
|
|
|
|
|
|
Progress float64 `json:"progress"`
|
|
|
|
|
|
ProgressMsg string `json:"progress_msg"`
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// UploadDocumentReq 上传文档请求
|
|
|
|
|
|
// 注意:上传文件通常需要 multipart/form-data,这里仅定义结构,实际逻辑在方法中处理
|
|
|
|
|
|
type UploadDocumentReq struct {
|
|
|
|
|
|
FilePaths []string // 本地文件路径列表
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-12-24 18:33:11 +08:00
|
|
|
|
// UploadDocumentRes 上传文档响应
|
|
|
|
|
|
type UploadDocumentRes struct {
|
|
|
|
|
|
Id string `json:"id"` // 文档ID
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-11-27 09:50:12 +08:00
|
|
|
|
// ListDocumentsReq 列出文档请求
|
|
|
|
|
|
type ListDocumentsReq struct {
|
2025-12-02 14:59:07 +08:00
|
|
|
|
Page int `json:"page,omitempty"` // 页码,默认 1
|
|
|
|
|
|
PageSize int `json:"page_size,omitempty"` // 每页数量,默认 30
|
|
|
|
|
|
OrderBy string `json:"orderby,omitempty"` // 排序字段:create_time(默认)或 update_time
|
|
|
|
|
|
Desc bool `json:"desc,omitempty"` // 是否降序,默认 true
|
|
|
|
|
|
Keywords string `json:"keywords,omitempty"` // 关键词过滤(匹配文档标题)
|
|
|
|
|
|
Id string `json:"id,omitempty"` // 文档 ID 过滤
|
|
|
|
|
|
Name string `json:"name,omitempty"` // 文档名称过滤
|
|
|
|
|
|
CreateTimeFrom int64 `json:"create_time_from,omitempty"` // 创建时间起始(Unix 时间戳),0 表示无限制
|
|
|
|
|
|
CreateTimeTo int64 `json:"create_time_to,omitempty"` // 创建时间截止(Unix 时间戳),0 表示无限制
|
|
|
|
|
|
Suffix []string `json:"suffix,omitempty"` // 文件后缀过滤,如 ["pdf", "txt", "docx"]
|
|
|
|
|
|
Run []string `json:"run,omitempty"` // 处理状态过滤,支持 ["UNSTART", "RUNNING", "CANCEL", "DONE", "FAIL"] 或数字格式 ["0", "1", "2", "3", "4"]
|
2025-11-27 09:50:12 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// ListDocumentsRes 列出文档响应
|
2025-12-02 14:59:07 +08:00
|
|
|
|
// 注意:响应结构与其他 List 接口不同,data 是一个对象而非数组
|
2025-11-27 09:50:12 +08:00
|
|
|
|
type ListDocumentsRes struct {
|
2025-12-02 14:59:07 +08:00
|
|
|
|
Code int `json:"code"` // 状态码,0 表示成功
|
|
|
|
|
|
Data struct {
|
|
|
|
|
|
Docs []*Document `json:"docs"` // 文档列表
|
|
|
|
|
|
TotalDatasets int `json:"total_datasets"` // 总文档数
|
|
|
|
|
|
} `json:"data"`
|
2025-11-27 09:50:12 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// DeleteDocumentsReq 删除文档请求
|
|
|
|
|
|
type DeleteDocumentsReq struct {
|
|
|
|
|
|
Ids []string `json:"ids"`
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// ListDocuments 列出文档
|
|
|
|
|
|
func (c *Client) ListDocuments(ctx context.Context, datasetId string, req *ListDocumentsReq) (*ListDocumentsRes, error) {
|
2025-12-06 18:04:29 +08:00
|
|
|
|
path := "/api/v1/datasets/" + datasetId + "/documents"
|
2025-11-27 09:50:12 +08:00
|
|
|
|
params := map[string]interface{}{}
|
|
|
|
|
|
if req.Page > 0 {
|
|
|
|
|
|
params["page"] = req.Page
|
|
|
|
|
|
}
|
|
|
|
|
|
if req.PageSize > 0 {
|
|
|
|
|
|
params["page_size"] = req.PageSize
|
|
|
|
|
|
}
|
|
|
|
|
|
if req.OrderBy != "" {
|
|
|
|
|
|
params["orderby"] = req.OrderBy
|
|
|
|
|
|
}
|
|
|
|
|
|
if req.Desc {
|
|
|
|
|
|
params["desc"] = "true"
|
|
|
|
|
|
} else {
|
|
|
|
|
|
params["desc"] = "false"
|
|
|
|
|
|
}
|
|
|
|
|
|
if req.Keywords != "" {
|
|
|
|
|
|
params["keywords"] = req.Keywords
|
|
|
|
|
|
}
|
|
|
|
|
|
if req.Id != "" {
|
|
|
|
|
|
params["id"] = req.Id
|
|
|
|
|
|
}
|
|
|
|
|
|
if req.Name != "" {
|
|
|
|
|
|
params["name"] = req.Name
|
|
|
|
|
|
}
|
|
|
|
|
|
if req.CreateTimeFrom > 0 {
|
|
|
|
|
|
params["create_time_from"] = req.CreateTimeFrom
|
|
|
|
|
|
}
|
|
|
|
|
|
if req.CreateTimeTo > 0 {
|
|
|
|
|
|
params["create_time_to"] = req.CreateTimeTo
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-12-02 14:59:07 +08:00
|
|
|
|
// 构造查询字符串
|
2025-11-27 17:38:42 +08:00
|
|
|
|
query := buildQueryString(params)
|
2025-12-02 14:59:07 +08:00
|
|
|
|
var queryParts []string
|
2025-11-27 17:38:42 +08:00
|
|
|
|
if query != "" {
|
2025-12-02 14:59:07 +08:00
|
|
|
|
queryParts = append(queryParts, query)
|
2025-11-27 09:50:12 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
2025-12-02 14:59:07 +08:00
|
|
|
|
// 处理数组参数:suffix(文件后缀过滤)
|
|
|
|
|
|
// API 要求多个值时重复参数名,如:suffix=pdf&suffix=txt
|
|
|
|
|
|
for _, suffix := range req.Suffix {
|
2025-12-06 18:04:29 +08:00
|
|
|
|
queryParts = append(queryParts, "suffix="+suffix)
|
2025-12-02 14:59:07 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 处理数组参数:run(处理状态过滤)
|
|
|
|
|
|
// 支持数字格式("0"-"4")或文本格式("UNSTART", "RUNNING", "CANCEL", "DONE", "FAIL")
|
|
|
|
|
|
for _, run := range req.Run {
|
2025-12-06 18:04:29 +08:00
|
|
|
|
queryParts = append(queryParts, "run="+run)
|
2025-12-02 14:59:07 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 构造最终请求路径
|
|
|
|
|
|
if len(queryParts) > 0 {
|
|
|
|
|
|
path += "?" + strings.Join(queryParts, "&")
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 发送请求并处理响应
|
2025-11-27 09:50:12 +08:00
|
|
|
|
var res ListDocumentsRes
|
|
|
|
|
|
if err := c.request(ctx, "GET", path, nil, &res); err != nil {
|
|
|
|
|
|
return nil, err
|
|
|
|
|
|
}
|
|
|
|
|
|
if res.Code != 0 {
|
2025-12-06 18:04:29 +08:00
|
|
|
|
return nil, gerror.Newf("list documents failed: code=%d", res.Code)
|
2025-11-27 09:50:12 +08:00
|
|
|
|
}
|
|
|
|
|
|
return &res, nil
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-12-22 17:50:53 +08:00
|
|
|
|
// UploadDocumentFromText 上传文本内容作为文档
|
|
|
|
|
|
func (c *Client) UploadDocumentFromText(ctx context.Context, datasetId, content, filename string) (documentId string, err error) {
|
|
|
|
|
|
if datasetId == "" {
|
|
|
|
|
|
return "", gerror.New("datasetId不能为空")
|
|
|
|
|
|
}
|
|
|
|
|
|
if content == "" {
|
|
|
|
|
|
return "", gerror.New("文档内容不能为空")
|
|
|
|
|
|
}
|
|
|
|
|
|
if filename == "" {
|
|
|
|
|
|
filename = "document.txt"
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-01-09 17:57:14 +08:00
|
|
|
|
// 构造URL(使用负载均衡)
|
|
|
|
|
|
endpoint := c.getNextEndpoint()
|
|
|
|
|
|
if endpoint == "" {
|
|
|
|
|
|
return "", gerror.New("RAGFlow endpoints not configured")
|
|
|
|
|
|
}
|
|
|
|
|
|
url := endpoint + "/api/v1/datasets/" + datasetId + "/documents"
|
2025-12-22 17:50:53 +08:00
|
|
|
|
|
2025-12-22 20:05:01 +08:00
|
|
|
|
// 创建multipart writer
|
|
|
|
|
|
body := &bytes.Buffer{}
|
|
|
|
|
|
writer := multipart.NewWriter(body)
|
|
|
|
|
|
|
|
|
|
|
|
// 添加文件字段
|
|
|
|
|
|
part, err := writer.CreateFormFile("file", filename)
|
|
|
|
|
|
if err != nil {
|
|
|
|
|
|
return "", gerror.Wrap(err, "创建form file失败")
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 写入内容
|
|
|
|
|
|
if _, err = part.Write([]byte(content)); err != nil {
|
|
|
|
|
|
return "", gerror.Wrap(err, "写入文件内容失败")
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 关闭multipart writer
|
|
|
|
|
|
if err = writer.Close(); err != nil {
|
|
|
|
|
|
return "", gerror.Wrap(err, "关闭multipart writer失败")
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 发送请求
|
2026-01-08 15:55:44 +08:00
|
|
|
|
client := commonHttp.Httpclient.Clone()
|
2025-12-22 17:50:53 +08:00
|
|
|
|
client.SetHeader("Authorization", "Bearer "+c.APIKey)
|
2025-12-22 20:05:01 +08:00
|
|
|
|
client.SetHeader("Content-Type", writer.FormDataContentType())
|
2025-12-22 17:50:53 +08:00
|
|
|
|
|
2025-12-22 20:05:01 +08:00
|
|
|
|
resp, err := client.Post(ctx, url, body.Bytes())
|
2025-12-22 17:50:53 +08:00
|
|
|
|
if err != nil {
|
2025-12-22 20:05:01 +08:00
|
|
|
|
return "", gerror.Wrap(err, "上传文档请求失败")
|
2025-12-22 17:50:53 +08:00
|
|
|
|
}
|
|
|
|
|
|
defer resp.Close()
|
|
|
|
|
|
|
|
|
|
|
|
// 解析响应
|
2025-12-24 18:33:11 +08:00
|
|
|
|
var response struct {
|
|
|
|
|
|
Code int `json:"code"`
|
|
|
|
|
|
Message string `json:"message"`
|
|
|
|
|
|
Data []UploadDocumentRes `json:"data"` // RAGFlow返回数组
|
2025-12-22 17:50:53 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
2026-01-06 17:23:03 +08:00
|
|
|
|
respBody := resp.ReadAll()
|
2025-12-22 17:50:53 +08:00
|
|
|
|
|
2026-01-06 17:23:03 +08:00
|
|
|
|
if err := json.Unmarshal(respBody, &response); err != nil {
|
|
|
|
|
|
g.Log().Errorf(ctx, "解析RAGFlow响应失败: %v, 原始响应: %s", err, string(respBody))
|
|
|
|
|
|
return "", gerror.Newf("json Decode failed: %v", err)
|
2025-12-22 17:50:53 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
2026-01-06 17:23:03 +08:00
|
|
|
|
// 先检查code,再检查data
|
2025-12-24 18:33:11 +08:00
|
|
|
|
if response.Code != 0 {
|
2026-01-06 17:23:03 +08:00
|
|
|
|
g.Log().Errorf(ctx, "RAGFlow返回错误: code=%d, message=%s", response.Code, response.Message)
|
2025-12-24 18:33:11 +08:00
|
|
|
|
return "", gerror.Newf("上传文档失败 (code=%d): %s", response.Code, response.Message)
|
2025-12-22 17:50:53 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
2026-01-06 17:23:03 +08:00
|
|
|
|
if len(response.Data) == 0 {
|
|
|
|
|
|
g.Log().Errorf(ctx, "RAGFlow返回data为空, 完整响应: %s", string(respBody))
|
|
|
|
|
|
return "", gerror.New("上传文档返回data为空")
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-12-24 18:33:11 +08:00
|
|
|
|
return response.Data[0].Id, nil
|
2025-12-22 17:50:53 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// UploadDocument 上传文档(保留兼容)
|
2025-12-06 18:04:29 +08:00
|
|
|
|
func (c *Client) UploadDocument(ctx context.Context, datasetId string, filePaths []string) (err error) {
|
2025-12-22 17:50:53 +08:00
|
|
|
|
return gerror.New("upload document from file not implemented yet, use UploadDocumentFromText instead")
|
2025-11-27 09:50:12 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
2025-12-23 17:03:58 +08:00
|
|
|
|
// ParseDocumentsReq 解析文档请求
|
|
|
|
|
|
type ParseDocumentsReq struct {
|
|
|
|
|
|
DocumentIds []string `json:"document_ids"` // 要解析的文档ID列表
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// ParseDocuments 解析文档(上传后必须调用此接口才会开始解析)
|
|
|
|
|
|
func (c *Client) ParseDocuments(ctx context.Context, datasetId string, documentIds []string) error {
|
|
|
|
|
|
if datasetId == "" {
|
|
|
|
|
|
return gerror.New("datasetId不能为空")
|
|
|
|
|
|
}
|
|
|
|
|
|
if len(documentIds) == 0 {
|
|
|
|
|
|
return gerror.New("documentIds不能为空")
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
req := ParseDocumentsReq{DocumentIds: documentIds}
|
|
|
|
|
|
var res CommonResponse
|
|
|
|
|
|
path := "/api/v1/datasets/" + datasetId + "/chunks"
|
|
|
|
|
|
if err := c.request(ctx, "POST", path, req, &res); err != nil {
|
|
|
|
|
|
return err
|
|
|
|
|
|
}
|
|
|
|
|
|
if !res.IsSuccess() {
|
|
|
|
|
|
return gerror.Newf("解析文档失败: %s", res.Message)
|
|
|
|
|
|
}
|
|
|
|
|
|
return nil
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-11-27 09:50:12 +08:00
|
|
|
|
// DeleteDocument 删除文档
|
2025-12-06 18:04:29 +08:00
|
|
|
|
func (c *Client) DeleteDocument(ctx context.Context, datasetId string, ids []string) (err error) {
|
2025-11-27 09:50:12 +08:00
|
|
|
|
req := DeleteDocumentsReq{Ids: ids}
|
|
|
|
|
|
var res CommonResponse
|
2025-12-06 18:04:29 +08:00
|
|
|
|
path := "/api/v1/datasets/" + datasetId + "/documents"
|
|
|
|
|
|
if err = c.request(ctx, "DELETE", path, req, &res); err != nil {
|
|
|
|
|
|
return
|
2025-11-27 09:50:12 +08:00
|
|
|
|
}
|
|
|
|
|
|
if !res.IsSuccess() {
|
2025-12-06 18:04:29 +08:00
|
|
|
|
return gerror.Newf("delete document failed: %s", res.Message)
|
2025-11-27 09:50:12 +08:00
|
|
|
|
}
|
2025-12-06 18:04:29 +08:00
|
|
|
|
return
|
2025-11-27 09:50:12 +08:00
|
|
|
|
}
|