refactor: 优化数据库查询构建链式调用
This commit is contained in:
@@ -440,7 +440,7 @@ func (d *dataBase) Model(ctx context.Context, tableNameOrStruct ...any) *model {
|
|||||||
m.Sharding(shardingConfig).ShardingValue(user.TenantId)
|
m.Sharding(shardingConfig).ShardingValue(user.TenantId)
|
||||||
}
|
}
|
||||||
|
|
||||||
m.OmitNil().OmitEmpty().Hook(catchSQLHook())
|
m.OmitNil().Hook(catchSQLHook())
|
||||||
return &model{
|
return &model{
|
||||||
Model: m,
|
Model: m,
|
||||||
}
|
}
|
||||||
|
|||||||
63
rag/eino/document_semantic.go
Normal file
63
rag/eino/document_semantic.go
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
package eino
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
|
||||||
|
"github.com/cloudwego/eino-ext/components/document/transformer/splitter/recursive"
|
||||||
|
"github.com/cloudwego/eino-ext/components/document/transformer/splitter/semantic"
|
||||||
|
"github.com/cloudwego/eino/components/document"
|
||||||
|
"github.com/cloudwego/eino/schema"
|
||||||
|
"github.com/gogf/gf/v2/frame/g"
|
||||||
|
)
|
||||||
|
|
||||||
|
// 全局只初始化一次
|
||||||
|
var (
|
||||||
|
splitter document.Transformer
|
||||||
|
)
|
||||||
|
|
||||||
|
// SemanticSplitDocument 语义分割文档
|
||||||
|
func SemanticSplitDocument(ctx context.Context, docs []*schema.Document) (res []*schema.Document, err error) {
|
||||||
|
if g.IsEmpty(splitter) {
|
||||||
|
// 默认分隔符(支持中英文)
|
||||||
|
separators := []string{"\n\n", "\n", "。", "!", "?", ";", ".", "!", "?", ";"}
|
||||||
|
// 读取配置,使用合理的默认值
|
||||||
|
bufferSize := g.Cfg().MustGet(ctx, "eino.splitter.bufferSize").Int()
|
||||||
|
percentile := g.Cfg().MustGet(ctx, "eino.splitter.percentile").Float64()
|
||||||
|
batchSize := g.Cfg().MustGet(ctx, "eino.splitter.batchSize").Int()
|
||||||
|
if batchSize <= 0 {
|
||||||
|
batchSize = 10 // doubao-embedding-vision 限制每批最多 10 个
|
||||||
|
}
|
||||||
|
|
||||||
|
// 使用批量包装器
|
||||||
|
batchEmbedder := NewBatchEmbedder(Embedder, batchSize)
|
||||||
|
|
||||||
|
splitter, err = semantic.NewSplitter(ctx, &semantic.Config{
|
||||||
|
Embedding: batchEmbedder,
|
||||||
|
BufferSize: bufferSize,
|
||||||
|
Percentile: percentile,
|
||||||
|
Separators: separators,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return splitter.Transform(ctx, docs)
|
||||||
|
}
|
||||||
|
|
||||||
|
// RecursiveSplitDocument 递归分割文档
|
||||||
|
func RecursiveSplitDocument(ctx context.Context, docs []*schema.Document) (res []*schema.Document, err error) {
|
||||||
|
if g.IsEmpty(splitter) {
|
||||||
|
// 默认分隔符(支持中英文)
|
||||||
|
separators := []string{"\n\n", "\n", "。", "!", "?", ";", ".", "!", "?", ";"}
|
||||||
|
splitter, err = recursive.NewSplitter(ctx, &recursive.Config{
|
||||||
|
ChunkSize: 1500,
|
||||||
|
OverlapSize: 300,
|
||||||
|
KeepType: recursive.KeepTypeNone,
|
||||||
|
Separators: separators,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return splitter.Transform(ctx, docs)
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user