diff --git a/db/gfdb/gfdb.go b/db/gfdb/gfdb.go index f3cfe6f..d3c0aaa 100644 --- a/db/gfdb/gfdb.go +++ b/db/gfdb/gfdb.go @@ -440,7 +440,7 @@ func (d *dataBase) Model(ctx context.Context, tableNameOrStruct ...any) *model { m.Sharding(shardingConfig).ShardingValue(user.TenantId) } - m.OmitNil().OmitEmpty().Hook(catchSQLHook()) + m.OmitNil().Hook(catchSQLHook()) return &model{ Model: m, } diff --git a/rag/eino/document_semantic.go b/rag/eino/document_semantic.go new file mode 100644 index 0000000..1fa53bd --- /dev/null +++ b/rag/eino/document_semantic.go @@ -0,0 +1,63 @@ +package eino + +import ( + "context" + + "github.com/cloudwego/eino-ext/components/document/transformer/splitter/recursive" + "github.com/cloudwego/eino-ext/components/document/transformer/splitter/semantic" + "github.com/cloudwego/eino/components/document" + "github.com/cloudwego/eino/schema" + "github.com/gogf/gf/v2/frame/g" +) + +// 全局只初始化一次 +var ( + splitter document.Transformer +) + +// SemanticSplitDocument 语义分割文档 +func SemanticSplitDocument(ctx context.Context, docs []*schema.Document) (res []*schema.Document, err error) { + if g.IsEmpty(splitter) { + // 默认分隔符(支持中英文) + separators := []string{"\n\n", "\n", "。", "!", "?", ";", ".", "!", "?", ";"} + // 读取配置,使用合理的默认值 + bufferSize := g.Cfg().MustGet(ctx, "eino.splitter.bufferSize").Int() + percentile := g.Cfg().MustGet(ctx, "eino.splitter.percentile").Float64() + batchSize := g.Cfg().MustGet(ctx, "eino.splitter.batchSize").Int() + if batchSize <= 0 { + batchSize = 10 // doubao-embedding-vision 限制每批最多 10 个 + } + + // 使用批量包装器 + batchEmbedder := NewBatchEmbedder(Embedder, batchSize) + + splitter, err = semantic.NewSplitter(ctx, &semantic.Config{ + Embedding: batchEmbedder, + BufferSize: bufferSize, + Percentile: percentile, + Separators: separators, + }) + if err != nil { + return + } + } + return splitter.Transform(ctx, docs) +} + +// RecursiveSplitDocument 递归分割文档 +func RecursiveSplitDocument(ctx context.Context, docs []*schema.Document) (res []*schema.Document, err error) { + if g.IsEmpty(splitter) { + // 默认分隔符(支持中英文) + separators := []string{"\n\n", "\n", "。", "!", "?", ";", ".", "!", "?", ";"} + splitter, err = recursive.NewSplitter(ctx, &recursive.Config{ + ChunkSize: 1500, + OverlapSize: 300, + KeepType: recursive.KeepTypeNone, + Separators: separators, + }) + if err != nil { + return + } + } + return splitter.Transform(ctx, docs) +}