108 lines
2.5 KiB
Go
108 lines
2.5 KiB
Go
package eino
|
||
|
||
import (
|
||
"context"
|
||
"fmt"
|
||
|
||
"github.com/cloudwego/eino/schema"
|
||
"github.com/elastic/go-elasticsearch/v8"
|
||
|
||
"github.com/cloudwego/eino-ext/components/indexer/es8"
|
||
)
|
||
|
||
const (
|
||
indexName = "eino_example"
|
||
fieldContent = "content"
|
||
fieldContentVector = "content_vector"
|
||
fieldExtraLocation = "location"
|
||
docExtraLocation = "location"
|
||
)
|
||
|
||
func TestIndexer() {
|
||
ctx := context.Background()
|
||
|
||
// 1. 创建 ES 客户端
|
||
client, err := elasticsearch.NewClient(elasticsearch.Config{
|
||
Addresses: []string{"http://localhost:9200"},
|
||
})
|
||
|
||
if err != nil {
|
||
fmt.Printf("create client error: %v\n", err)
|
||
return
|
||
}
|
||
|
||
// 2. 定义 Index Spec(选填:如果索引不存在,将自动创建)
|
||
indexSpec := &es8.IndexSpec{
|
||
Settings: map[string]any{
|
||
"number_of_shards": 1,
|
||
"number_of_replicas": 0,
|
||
},
|
||
Mappings: map[string]any{
|
||
"properties": map[string]any{
|
||
fieldContentVector: map[string]any{
|
||
"type": "dense_vector",
|
||
"dims": 1024,
|
||
"index": true,
|
||
"similarity": "l2_norm",
|
||
},
|
||
},
|
||
},
|
||
}
|
||
|
||
// 4. 准备文档
|
||
// 文档通常包含 ID 和 Content
|
||
// 也可以包含额外的 Metadata 用于过滤或其他用途
|
||
docs := []*schema.Document{
|
||
{
|
||
ID: "1",
|
||
Content: "Eiffel Tower: Located in Paris, France.",
|
||
MetaData: map[string]any{
|
||
docExtraLocation: "France",
|
||
},
|
||
},
|
||
{
|
||
ID: "2",
|
||
Content: "The Great Wall: Located in China.",
|
||
MetaData: map[string]any{
|
||
docExtraLocation: "China",
|
||
},
|
||
},
|
||
}
|
||
|
||
// 5. 创建 ES 索引器组件
|
||
indexer, err := es8.NewIndexer(ctx, &es8.IndexerConfig{
|
||
Client: client,
|
||
Index: indexName,
|
||
IndexSpec: indexSpec, // 添加此项以启用自动索引创建
|
||
BatchSize: 10,
|
||
// DocumentToFields 指定如何将文档字段映射到 ES 字段
|
||
DocumentToFields: func(ctx context.Context, doc *schema.Document) (field2Value map[string]es8.FieldValue, err error) {
|
||
return map[string]es8.FieldValue{
|
||
fieldContent: {
|
||
Value: doc.Content,
|
||
EmbedKey: fieldContentVector, // 对文档内容进行向量化并保存到 "content_vector" 字段
|
||
},
|
||
fieldExtraLocation: {
|
||
// 额外的 metadata 字段
|
||
Value: doc.MetaData[docExtraLocation],
|
||
},
|
||
}, nil
|
||
},
|
||
// 提供 embedding 组件用于向量化
|
||
Embedding: EmbedderDashscope,
|
||
})
|
||
|
||
if err != nil {
|
||
fmt.Printf("create indexer error: %v\n", err)
|
||
return
|
||
}
|
||
|
||
// 6. 索引文档
|
||
ids, err := indexer.Store(ctx, docs)
|
||
if err != nil {
|
||
fmt.Printf("index error: %v\n", err)
|
||
return
|
||
}
|
||
fmt.Println("indexed ids:", ids)
|
||
}
|