feat: 支持多租户多模型对话及文档去重优化
This commit is contained in:
@@ -2,6 +2,7 @@ package eino
|
||||
|
||||
import (
|
||||
"context"
|
||||
"rag/consts/model"
|
||||
|
||||
"github.com/cloudwego/eino-ext/components/document/transformer/splitter/recursive"
|
||||
"github.com/cloudwego/eino-ext/components/document/transformer/splitter/semantic"
|
||||
@@ -10,7 +11,7 @@ import (
|
||||
)
|
||||
|
||||
// SemanticSplitDocument 语义分割文档
|
||||
func SemanticSplitDocument(ctx context.Context, docs []*schema.Document) (res []*schema.Document, err error) {
|
||||
func SemanticSplitDocument(ctx context.Context, docs []*schema.Document, vectorModel model.ModelConfigType) (res []*schema.Document, err error) {
|
||||
// 默认分隔符(支持中英文)
|
||||
separators := []string{"\n\n", "\n", "。", "!", "?", ";", ".", "!", "?", ";"}
|
||||
// 读取配置,使用合理的默认值
|
||||
@@ -18,24 +19,14 @@ func SemanticSplitDocument(ctx context.Context, docs []*schema.Document) (res []
|
||||
minChunkSize := g.Cfg().MustGet(ctx, "eino.splitter.minChunkSize").Int()
|
||||
percentile := g.Cfg().MustGet(ctx, "eino.splitter.percentile").Float64()
|
||||
batchSize := g.Cfg().MustGet(ctx, "eino.splitter.batchSize").Int()
|
||||
if batchSize <= 0 {
|
||||
batchSize = 10 // doubao-embedding-vision 限制每批最多 10 个
|
||||
}
|
||||
|
||||
// 使用批量包装器
|
||||
var batchEmbedder *BatchEmbedder
|
||||
provider := g.Cfg().MustGet(ctx, "eino.embedding.provider").String()
|
||||
switch provider {
|
||||
case providerArk:
|
||||
batchEmbedder = NewBatchEmbedder(EmbedderArk, batchSize)
|
||||
case providerOpenai:
|
||||
batchEmbedder = NewBatchEmbedder(EmbedderOpenAI, batchSize)
|
||||
case providerDashscope:
|
||||
batchEmbedder = NewBatchEmbedder(EmbedderDashscope, batchSize)
|
||||
embedder, err := GetTenantEmbedderByType(ctx, vectorModel)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
splitter, err := semantic.NewSplitter(ctx, &semantic.Config{
|
||||
Embedding: batchEmbedder,
|
||||
Embedding: NewBatchEmbedder(embedder, batchSize),
|
||||
BufferSize: bufferSize,
|
||||
MinChunkSize: minChunkSize,
|
||||
Percentile: percentile,
|
||||
|
||||
Reference in New Issue
Block a user