refactor: 重构文档处理流程和任务管理

This commit is contained in:
2026-04-09 09:11:43 +08:00
parent b6896f3fb4
commit 7f894745e9
34 changed files with 1216 additions and 1056 deletions

View File

@@ -84,12 +84,6 @@ type ProcessDocumentReq struct {
DatasetId int64 `json:"datasetId" v:"required#数据集ID不能为空"`
}
// ProcessDocumentRes 处理文件响应
type ProcessDocumentRes struct {
ChunkCount int64 `json:"chunkCount"`
CostTime int64 `json:"costTime"`
}
type ListDocumentChunkRPC struct {
List []*DocumentChunkRPC `json:"list"`
}

View File

@@ -52,6 +52,7 @@ type ListKeywordReq struct {
DatasetId int64 `json:"datasetId"`
DocumentId int64 `json:"documentId"`
Word string `json:"word"`
Words []string `json:"words"`
Keyword string `json:"keyword" dc:"关键词搜索"`
}
@@ -62,9 +63,11 @@ type ListKeywordRes struct {
}
type KeywordVO struct {
Id int64 `json:"id,string" dc:"id"`
Word string `json:"word" dc:"关键词名称"`
Weight int16 `json:"weight" dc:"权重"`
CreatedAt *gtime.Time `json:"createdAt" dc:"创建时间"`
UpdatedAt *gtime.Time `json:"updatedAt" dc:"更新时间"`
Id int64 `json:"id,string" dc:"id"`
Word string `json:"word" dc:"关键词名称"`
Weight int16 `json:"weight" dc:"权重"`
DatasetId int64 `json:"datasetId,string" dc:"数据集ID"`
DocumentId int64 `json:"documentId,string" dc:"文档ID"`
CreatedAt *gtime.Time `json:"createdAt" dc:"创建时间"`
UpdatedAt *gtime.Time `json:"updatedAt" dc:"更新时间"`
}

21
model/dto/rag_query.go Normal file
View File

@@ -0,0 +1,21 @@
package dto
import (
"github.com/gogf/gf/v2/frame/g"
)
// RAGQueryReq RAG查询请求
type RAGQueryReq struct {
g.Meta `path:"/ragQuery" method:"post" tags:"RAG查询" summary:"执行RAG查询" dc:"执行RAG查询"`
Content string `json:"content" v:"required#查询内容不能为空" dc:"用户问题"`
DatasetIds []int64 `json:"datasetIds" dc:"数据集ID"`
TopK int `json:"topK" d:"5" dc:"检索topK默认5"`
}
// RAGQueryRes RAG查询响应
type RAGQueryRes struct {
Answer string `json:"answer" dc:"生成的答案"`
DatasetId string `json:"datasetId" dc:"使用的数据集ID"`
Sources []string `json:"sources" dc:"参考来源"`
}

65
model/dto/task.go Normal file
View File

@@ -0,0 +1,65 @@
package dto
import (
"rag/common/task"
)
// WriteTaskProgressReq 写入任务进度请求
type WriteTaskProgressReq struct {
TaskType task.TaskType `json:"taskType" dc:"任务类型"`
Status task.TaskStatus `json:"status" dc:"任务状态"`
TaskId int64 `json:"taskId" dc:"任务ID"`
Remark string `json:"remark" dc:"备注"`
}
// CreateTaskReq 创建任务请求
type CreateTaskReq struct {
TaskType task.TaskType `json:"taskType" dc:"任务类型"`
Status task.TaskStatus `json:"status" dc:"任务状态"`
TaskId int64 `json:"taskId" dc:"任务ID"`
Remark string `json:"remark" dc:"备注"`
}
// UpdateTaskReq 更新任务请求
type UpdateTaskReq struct {
Id int64 `json:"id" dc:"任务ID"`
TaskId int64 `json:"taskId" dc:"任务ID"`
Status task.TaskStatus `json:"status" dc:"任务状态"`
Remark string `json:"remark" dc:"备注"`
}
// DeleteTaskByTaskIdReq 删除任务请求
type DeleteTaskByTaskIdReq struct {
TaskId int64 `json:"taskId" v:"required#任务id不能为空"`
}
// GetTaskReq 获取任务请求
type GetTaskReq struct {
Id int64 `json:"id" dc:"任务ID"`
TaskId int64 `json:"taskId" dc:"任务ID"`
TaskType task.TaskType `json:"taskType" dc:"任务类型"`
}
// TaskVO 任务视图对象
type TaskVO struct {
Id int64 `json:"id" dc:"任务ID"`
TaskType task.TaskType `json:"taskType" dc:"任务类型"`
Status task.TaskStatus `json:"status" dc:"任务状态"`
Priority task.TaskPriority `json:"priority" dc:"任务优先级"`
ParentTaskID int64 `json:"parentTaskId" dc:"父任务ID"`
TotalItems int64 `json:"totalItems" dc:"总项数"`
ProcessedItems int64 `json:"processedItems" dc:"已处理项数"`
Progress float64 `json:"progress" dc:"进度百分比"`
StartTime *int64 `json:"startTime" dc:"开始时间戳"`
EndTime *int64 `json:"endTime" dc:"结束时间戳"`
Duration int64 `json:"duration" dc:"耗时(毫秒)"`
SuccessCount int64 `json:"successCount" dc:"成功数"`
FailCount int64 `json:"failCount" dc:"失败数"`
Executor string `json:"executor" dc:"执行器"`
DocumentID int64 `json:"documentId" dc:"文档ID"`
Remark string `json:"remark" dc:"备注"`
Creator string `json:"creator" dc:"创建人"`
CreatedAt int64 `json:"createdAt" dc:"创建时间"`
Updater string `json:"updater" dc:"更新人"`
UpdatedAt int64 `json:"updatedAt" dc:"更新时间"`
}

66
model/entity/task.go Normal file
View File

@@ -0,0 +1,66 @@
package entity
import (
"rag/common/task"
"gitea.com/red-future/common/beans"
)
type taskCol struct {
beans.SQLBaseCol
TaskId string
TaskType string
Status string
Executor string
Remark string
//Priority string
//ParentTaskId string
//TotalItems string
//ProcessedItems string
//Progress string
//StartTime string
//EndTime string
//Duration string
//SuccessCount string
//FailCount string
}
var TaskCol = taskCol{
SQLBaseCol: beans.DefSQLBaseCol,
TaskId: "task_id",
TaskType: "task_type",
Status: "status",
Executor: "executor",
Remark: "remark",
//Priority: "priority",
//ParentTaskId: "parent_task_id",
//TotalItems: "total_items",
//ProcessedItems: "processed_items",
//Progress: "progress",
//StartTime: "start_time",
//EndTime: "end_time",
//Duration: "duration",
//SuccessCount: "success_count",
//FailCount: "fail_count",
}
// Task 任务记录表
type Task struct {
beans.SQLBaseDO `orm:",inline"`
TaskId int64 `orm:"task_id" json:"taskId" dc:"任务ID"`
TaskType task.TaskType `orm:"task_type" json:"taskType" dc:"任务类型"`
Status task.TaskStatus `orm:"status" json:"status" dc:"任务状态"`
Executor string `orm:"executor" json:"executor" dc:"执行器"`
Remark string `orm:"remark" json:"remark" dc:"备注"`
//Priority task.TaskPriority `orm:"priority" json:"priority" dc:"任务优先级"`
//ParentTaskId int64 `orm:"parent_task_id" json:"parentTaskId" dc:"父任务ID"`
//TotalItems int64 `orm:"total_items" json:"totalItems" dc:"总项数"`
//ProcessedItems int64 `orm:"processed_items" json:"processedItems" dc:"已处理项数"`
//SuccessCount int64 `orm:"success_count" json:"successCount" dc:"成功数"`
//FailCount int64 `orm:"fail_count" json:"failCount" dc:"失败数"`
//Progress float64 `orm:"progress" json:"progress" dc:"进度百分比"`
//StartTime *gtime.Time `orm:"start_time" json:"startTime" dc:"开始时间戳"`
//EndTime *gtime.Time `orm:"end_time" json:"endTime" dc:"结束时间戳"`
//Duration int64 `orm:"duration" json:"duration" dc:"耗时(毫秒)"`
}