refactor: 重构文档向量相关代码结构

This commit is contained in:
2026-04-10 13:12:19 +08:00
parent a7b8713e26
commit 94df015aa9
30 changed files with 335 additions and 506 deletions

View File

@@ -51,7 +51,7 @@ func (d *datasetIndexDao) InsertIndex(ctx context.Context, indexName string) (er
USING ivfflat (vector vector_cosine_ops)
WITH (lists = 100)
WHERE vector IS NOT NULL;
`, indexName, gfdb.TablePrefix+public.TableNameDocumentChunk)
`, indexName, gfdb.TablePrefix+public.TableNameDocumentVector)
_, err = db.Exec(ctx, sqlStr)
return
}

View File

@@ -15,17 +15,17 @@ import (
"github.com/pgvector/pgvector-go"
)
var DocumentChunk = new(documentChunkDao)
var DocumentVector = new(documentVectorDao)
type documentChunkDao struct{}
type documentVectorDao struct{}
// BatchInsert 批量插入文件块
func (d *documentChunkDao) BatchInsert(ctx context.Context, req []*dto.VectorDocumentChunkMsg) (rows int64, err error) {
var res []*entity.DocumentChunk
func (d *documentVectorDao) BatchInsert(ctx context.Context, req []*dto.VectorDocumentVectorMsg) (rows int64, err error) {
var res []*entity.DocumentVector
if err = gconv.Structs(req, &res); err != nil {
return
}
r, err := gfdb.DB(ctx, public.DbNameVector).Model(ctx, public.TableNameDocumentChunk).Data(&res).Insert()
r, err := gfdb.DB(ctx, public.DbNameVector).Model(ctx, public.TableNameDocumentVector).Data(&res).Insert()
if err != nil {
return
}
@@ -33,9 +33,9 @@ func (d *documentChunkDao) BatchInsert(ctx context.Context, req []*dto.VectorDoc
}
// Update 更新文件块
func (d *documentChunkDao) Update(ctx context.Context, req *dto.UpdateDocumentChunkReq) (rows int64, err error) {
model := gfdb.DB(ctx, public.DbNameVector).Model(ctx, public.TableNameDocumentChunk)
r, err := model.Data(&req).Where(entity.DocumentChunkCol.Id, req.Id).Update()
func (d *documentVectorDao) Update(ctx context.Context, req *dto.UpdateDocumentVectorReq) (rows int64, err error) {
model := gfdb.DB(ctx, public.DbNameVector).Model(ctx, public.TableNameDocumentVector)
r, err := model.Data(&req).Where(entity.DocumentVectorCol.Id, req.Id).Update()
if err != nil {
return
}
@@ -43,13 +43,13 @@ func (d *documentChunkDao) Update(ctx context.Context, req *dto.UpdateDocumentCh
}
// List 文件块列表
func (d *documentChunkDao) List(ctx context.Context, req *dto.ListDocumentChunkReq, fields ...string) (res []*entity.DocumentChunk, total int, err error) {
model := gfdb.DB(ctx, public.DbNameVector).Model(ctx, public.TableNameDocumentChunk).Fields(fields).OmitEmpty().
Where(entity.DocumentChunkCol.DatasetId, req.DatasetId).
Where(entity.DocumentChunkCol.DocumentId, req.DocumentId).
Where(entity.DocumentChunkCol.Status, req.Status).
Where(entity.DocumentChunkCol.VectorStatus, req.VectorStatus).
OrderDesc(entity.DocumentChunkCol.CreatedAt)
func (d *documentVectorDao) List(ctx context.Context, req *dto.ListDocumentVectorReq, fields ...string) (res []*entity.DocumentVector, total int, err error) {
model := gfdb.DB(ctx, public.DbNameVector).Model(ctx, public.TableNameDocumentVector).Fields(fields).OmitEmpty().
Where(entity.DocumentVectorCol.DatasetId, req.DatasetId).
Where(entity.DocumentVectorCol.DocumentId, req.DocumentId).
Where(entity.DocumentVectorCol.Status, req.Status).
Where(entity.DocumentVectorCol.VectorStatus, req.VectorStatus).
OrderDesc(entity.DocumentVectorCol.CreatedAt)
if req.Page != nil {
model.Page(int(req.Page.PageNum), int(req.Page.PageSize))
}
@@ -61,27 +61,22 @@ func (d *documentChunkDao) List(ctx context.Context, req *dto.ListDocumentChunkR
return
}
func (d *documentChunkDao) GetAllByVector(ctx context.Context, datasetId []int64, queryVec pgvector.Vector, topK int) (list gdb.List, err error) {
sql := `
SELECT id, content, dataset_id, document_id,
vector <=> ? AS distance
FROM rag_vector_document_chunk
WHERE dataset_id IN (?)
AND vector IS NOT NULL
ORDER BY distance ASC
LIMIT ?
`
// 顺序vector, dataset_id, topK
result, err := gfdb.DB(ctx, public.DbNameVector).GetAll(ctx, sql, queryVec, datasetId, topK)
func (d *documentVectorDao) GetAllByVector(ctx context.Context, datasetIds []int64, vector pgvector.Vector, topK int) (list gdb.List, err error) {
result, err := gfdb.DB(ctx, public.DbNameVector).Model(ctx, public.TableNameDocumentVector).
Fields("id, content, dataset_id, document_id, vector <=> ? AS distance", vector).
WhereIn(entity.DocumentVectorCol.DatasetId, datasetIds).
WhereNotNull(entity.DocumentVectorCol.Vector).
OrderAsc("distance").
Limit(topK).
All()
if err != nil {
return nil, err
}
return result.List(), nil
}
// SearchByKeywords 通过关键词全文检索文档块
func (d *documentChunkDao) SearchByKeywords(ctx context.Context, query string, datasetIds []int64, topK int) (list gdb.List, err error) {
func (d *documentVectorDao) SearchByKeywords(ctx context.Context, query string, datasetIds []int64, topK int) (list gdb.List, err error) {
// 构建 meilisearch 查询参数
searchParams := &meilisearch.SearchParams{
Query: query,