feat: rag初始版
This commit is contained in:
107
common/eino/b.go
Normal file
107
common/eino/b.go
Normal file
@@ -0,0 +1,107 @@
|
||||
package eino
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"github.com/cloudwego/eino/schema"
|
||||
"github.com/elastic/go-elasticsearch/v8"
|
||||
|
||||
"github.com/cloudwego/eino-ext/components/indexer/es8"
|
||||
)
|
||||
|
||||
const (
|
||||
indexName = "eino_example"
|
||||
fieldContent = "content"
|
||||
fieldContentVector = "content_vector"
|
||||
fieldExtraLocation = "location"
|
||||
docExtraLocation = "location"
|
||||
)
|
||||
|
||||
func TestIndexer() {
|
||||
ctx := context.Background()
|
||||
|
||||
// 1. 创建 ES 客户端
|
||||
client, err := elasticsearch.NewClient(elasticsearch.Config{
|
||||
Addresses: []string{"http://localhost:9200"},
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
fmt.Printf("create client error: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
// 2. 定义 Index Spec(选填:如果索引不存在,将自动创建)
|
||||
indexSpec := &es8.IndexSpec{
|
||||
Settings: map[string]any{
|
||||
"number_of_shards": 1,
|
||||
"number_of_replicas": 0,
|
||||
},
|
||||
Mappings: map[string]any{
|
||||
"properties": map[string]any{
|
||||
fieldContentVector: map[string]any{
|
||||
"type": "dense_vector",
|
||||
"dims": 1024,
|
||||
"index": true,
|
||||
"similarity": "l2_norm",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// 4. 准备文档
|
||||
// 文档通常包含 ID 和 Content
|
||||
// 也可以包含额外的 Metadata 用于过滤或其他用途
|
||||
docs := []*schema.Document{
|
||||
{
|
||||
ID: "1",
|
||||
Content: "Eiffel Tower: Located in Paris, France.",
|
||||
MetaData: map[string]any{
|
||||
docExtraLocation: "France",
|
||||
},
|
||||
},
|
||||
{
|
||||
ID: "2",
|
||||
Content: "The Great Wall: Located in China.",
|
||||
MetaData: map[string]any{
|
||||
docExtraLocation: "China",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// 5. 创建 ES 索引器组件
|
||||
indexer, err := es8.NewIndexer(ctx, &es8.IndexerConfig{
|
||||
Client: client,
|
||||
Index: indexName,
|
||||
IndexSpec: indexSpec, // 添加此项以启用自动索引创建
|
||||
BatchSize: 10,
|
||||
// DocumentToFields 指定如何将文档字段映射到 ES 字段
|
||||
DocumentToFields: func(ctx context.Context, doc *schema.Document) (field2Value map[string]es8.FieldValue, err error) {
|
||||
return map[string]es8.FieldValue{
|
||||
fieldContent: {
|
||||
Value: doc.Content,
|
||||
EmbedKey: fieldContentVector, // 对文档内容进行向量化并保存到 "content_vector" 字段
|
||||
},
|
||||
fieldExtraLocation: {
|
||||
// 额外的 metadata 字段
|
||||
Value: doc.MetaData[docExtraLocation],
|
||||
},
|
||||
}, nil
|
||||
},
|
||||
// 提供 embedding 组件用于向量化
|
||||
Embedding: EmbedderDashscope,
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
fmt.Printf("create indexer error: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
// 6. 索引文档
|
||||
ids, err := indexer.Store(ctx, docs)
|
||||
if err != nil {
|
||||
fmt.Printf("index error: %v\n", err)
|
||||
return
|
||||
}
|
||||
fmt.Println("indexed ids:", ids)
|
||||
}
|
||||
Reference in New Issue
Block a user