feat: rag初始版

This commit is contained in:
2026-04-03 09:16:53 +08:00
commit 6f5c80da16
38 changed files with 3840 additions and 0 deletions

37
model/entity/dataset.go Normal file
View File

@@ -0,0 +1,37 @@
package entity
import (
"gitea.com/red-future/common/beans"
)
type datasetCol struct {
beans.SQLBaseCol
Name string
Description string
Embedding string
Dimension string
DocumentCount string
DocumentSize string
}
var DatasetCol = datasetCol{
SQLBaseCol: beans.DefSQLBaseCol,
Name: "name",
Description: "description",
Embedding: "embedding",
Dimension: "dimension",
DocumentCount: "document_count",
DocumentSize: "document_size",
}
// Dataset 数据集表
type Dataset struct {
beans.SQLBaseDO `orm:",inline"`
Name string `orm:"name" json:"name" dc:"数据集名称"`
Description string `orm:"description" json:"description" dc:"数据集描述"`
Embedding string `orm:"embedding" json:"embedding" dc:"向量模型"`
Dimension int `orm:"dimension" json:"dimension" dc:"向量维度"`
DocumentCount int64 `orm:"document_count" json:"documentCount" dc:"文档数量"`
DocumentSize int64 `orm:"document_size" json:"documentSize" dc:"文档大小"`
}

View File

@@ -0,0 +1,46 @@
package entity
import "gitea.com/red-future/common/beans"
type datasetIndexCol struct {
beans.SQLBaseCol
Status string
VectorStatus string
DatasetId string
Name string
Collection string
Dimension string
FieldType string
MetricType string
VectorCount string
Description string
}
var DatasetIndexCol = datasetIndexCol{
SQLBaseCol: beans.DefSQLBaseCol,
Status: "status",
VectorStatus: "vector_status",
DatasetId: "dataset_id",
Name: "name",
Collection: "collection",
Dimension: "dimension",
FieldType: "field_type",
MetricType: "metric_type",
VectorCount: "vector_count",
Description: "description",
}
// DatasetIndex 数据集索引实体
type DatasetIndex struct {
beans.SQLBaseDO `orm:",inline"`
DatasetId int64 `orm:"dataset_id" json:"datasetId" dc:"数据集ID"`
Name string `orm:"name" json:"name" dc:"索引名称"`
Collection string `orm:"collection" json:"collection" dc:"向量集合名称"`
Dimension int `orm:"dimension" json:"dimension" dc:"向量维度"`
FieldType string `orm:"field_type" json:"fieldType" dc:"字段类型: float, binary"`
MetricType string `orm:"metric_type" json:"metricType" dc:"度量类型: L2, IP, COSINE"`
Status *int8 `orm:"status" json:"status" dc:"状态: creating, ready, error"`
VectorCount int64 `orm:"vector_count" json:"vectorCount" dc:"向量数量"`
Description string `orm:"description" json:"description" dc:"描述"`
}

64
model/entity/document.go Normal file
View File

@@ -0,0 +1,64 @@
package entity
import (
"gitea.com/red-future/common/beans"
"rag/consts/document"
)
type documentCol struct {
beans.SQLBaseCol
DatasetId string
Title string
Content string
Format string
Source string
SourceId string
Status string
VectorStatus string
ChunkCount string
FileSize string
FilePath string
Metadata string
}
var DocumentCol = documentCol{
SQLBaseCol: beans.DefSQLBaseCol,
DatasetId: "dataset_id",
Title: "title",
Content: "content",
Format: "format",
Source: "source",
SourceId: "source_id",
Status: "status",
VectorStatus: "vector_status",
ChunkCount: "chunk_count",
FileSize: "file_size",
FilePath: "file_path",
Metadata: "metadata",
}
// Document 文件实体
type Document struct {
beans.SQLBaseDO `orm:",inline"`
DatasetId int64 `orm:"dataset_id" json:"datasetId" dc:"数据集ID"`
Title string `orm:"title" json:"title" dc:"文件标题"`
Content string `orm:"content" json:"content" dc:"文件内容"`
Format string `orm:"format" json:"format" dc:"文件格式"`
Source string `orm:"source" json:"source" dc:"来源"`
SourceId string `orm:"source_id" json:"sourceId" dc:"来源ID"`
Status document.Status `orm:"status" json:"status" dc:"状态"`
VectorStatus document.VectorStatus `orm:"vector_status" json:"vectorStatus" dc:"向量状态"`
ChunkCount int64 `orm:"chunk_count" json:"chunkCount" dc:"切分块数量"`
FileSize int64 `orm:"file_size" json:"fileSize" dc:"文件大小"`
FilePath string `orm:"file_path" json:"filePath" dc:"文件存储路径"`
Metadata *Metadata `orm:"metadata" json:"metadata" dc:"文件元信息"`
}
// Metadata 文件元数据
type Metadata struct {
Author string `orm:"author" json:"author" dc:"作者"`
Tags []string `orm:"tags" json:"tags" dc:"标签"`
Custom map[string]string `orm:"custom" json:"custom" dc:"自定义字段"`
}

View File

@@ -0,0 +1,49 @@
package entity
import (
"rag/consts/document"
"gitea.com/red-future/common/beans"
"github.com/pgvector/pgvector-go"
)
type documentChunkCol struct {
beans.SQLBaseCol
Status string
VectorStatus string
DatasetId string
DocumentId string
Content string
ContentHash string
ChunkIndex string
Vector string
Metadata string
}
var DocumentChunkCol = documentChunkCol{
SQLBaseCol: beans.DefSQLBaseCol,
Status: "status",
VectorStatus: "vector_status",
DatasetId: "dataset_id",
DocumentId: "document_id",
Content: "content",
ContentHash: "content_hash",
ChunkIndex: "chunk_index",
Vector: "vector",
Metadata: "metadata",
}
// DocumentChunk 文档切分块实体
type DocumentChunk struct {
beans.SQLBaseDO `orm:",inline"`
Status document.Status `orm:"status" json:"status" dc:"状态"`
VectorStatus document.VectorStatus `orm:"vector_status" json:"vectorStatus" dc:"向量状态"`
DatasetId int64 `orm:"dataset_id" json:"datasetId" dc:"数据集ID"`
DocumentId int64 `orm:"document_id" json:"documentId" dc:"文件ID"`
Content string `orm:"content" json:"content" dc:"切分块内容"`
ContentHash string `orm:"content_hash" json:"contentHash" dc:"切分块内容哈希"`
ChunkIndex int64 `orm:"chunk_index" json:"chunkIndex" dc:"切分块索引"`
Vector pgvector.Vector `orm:"vector" json:"vector" dc:"向量"`
Metadata map[string]interface{} `orm:"metadata" json:"metadata" dc:"元信息"`
}

27
model/entity/keyword.go Normal file
View File

@@ -0,0 +1,27 @@
package entity
import "gitea.com/red-future/common/beans"
type keywordCol struct {
beans.SQLBaseCol
DatasetId string
DocumentId string
Word string
Weight string
}
var KeywordCol = keywordCol{
SQLBaseCol: beans.DefSQLBaseCol,
DatasetId: "dataset_id",
DocumentId: "document_id",
Word: "word",
Weight: "weight",
}
type Keyword struct {
beans.SQLBaseDO `orm:",inline"`
DatasetId int64 `orm:"dataset_id" json:"datasetId" dc:"数据集ID"`
DocumentId int64 `orm:"document_id" json:"documentId" dc:"文件ID"`
Word string `orm:"word" json:"word" dc:"关键词"`
Weight int16 `orm:"weight" json:"weight" dc:"权重"`
}