108 lines
2.5 KiB
Go
108 lines
2.5 KiB
Go
|
|
package eino
|
|||
|
|
|
|||
|
|
import (
|
|||
|
|
"context"
|
|||
|
|
"fmt"
|
|||
|
|
|
|||
|
|
"github.com/cloudwego/eino/schema"
|
|||
|
|
"github.com/elastic/go-elasticsearch/v8"
|
|||
|
|
|
|||
|
|
"github.com/cloudwego/eino-ext/components/indexer/es8"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
const (
|
|||
|
|
indexName = "eino_example"
|
|||
|
|
fieldContent = "content"
|
|||
|
|
fieldContentVector = "content_vector"
|
|||
|
|
fieldExtraLocation = "location"
|
|||
|
|
docExtraLocation = "location"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
func TestIndexer() {
|
|||
|
|
ctx := context.Background()
|
|||
|
|
|
|||
|
|
// 1. 创建 ES 客户端
|
|||
|
|
client, err := elasticsearch.NewClient(elasticsearch.Config{
|
|||
|
|
Addresses: []string{"http://localhost:9200"},
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
if err != nil {
|
|||
|
|
fmt.Printf("create client error: %v\n", err)
|
|||
|
|
return
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 2. 定义 Index Spec(选填:如果索引不存在,将自动创建)
|
|||
|
|
indexSpec := &es8.IndexSpec{
|
|||
|
|
Settings: map[string]any{
|
|||
|
|
"number_of_shards": 1,
|
|||
|
|
"number_of_replicas": 0,
|
|||
|
|
},
|
|||
|
|
Mappings: map[string]any{
|
|||
|
|
"properties": map[string]any{
|
|||
|
|
fieldContentVector: map[string]any{
|
|||
|
|
"type": "dense_vector",
|
|||
|
|
"dims": 1024,
|
|||
|
|
"index": true,
|
|||
|
|
"similarity": "l2_norm",
|
|||
|
|
},
|
|||
|
|
},
|
|||
|
|
},
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 4. 准备文档
|
|||
|
|
// 文档通常包含 ID 和 Content
|
|||
|
|
// 也可以包含额外的 Metadata 用于过滤或其他用途
|
|||
|
|
docs := []*schema.Document{
|
|||
|
|
{
|
|||
|
|
ID: "1",
|
|||
|
|
Content: "Eiffel Tower: Located in Paris, France.",
|
|||
|
|
MetaData: map[string]any{
|
|||
|
|
docExtraLocation: "France",
|
|||
|
|
},
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
ID: "2",
|
|||
|
|
Content: "The Great Wall: Located in China.",
|
|||
|
|
MetaData: map[string]any{
|
|||
|
|
docExtraLocation: "China",
|
|||
|
|
},
|
|||
|
|
},
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 5. 创建 ES 索引器组件
|
|||
|
|
indexer, err := es8.NewIndexer(ctx, &es8.IndexerConfig{
|
|||
|
|
Client: client,
|
|||
|
|
Index: indexName,
|
|||
|
|
IndexSpec: indexSpec, // 添加此项以启用自动索引创建
|
|||
|
|
BatchSize: 10,
|
|||
|
|
// DocumentToFields 指定如何将文档字段映射到 ES 字段
|
|||
|
|
DocumentToFields: func(ctx context.Context, doc *schema.Document) (field2Value map[string]es8.FieldValue, err error) {
|
|||
|
|
return map[string]es8.FieldValue{
|
|||
|
|
fieldContent: {
|
|||
|
|
Value: doc.Content,
|
|||
|
|
EmbedKey: fieldContentVector, // 对文档内容进行向量化并保存到 "content_vector" 字段
|
|||
|
|
},
|
|||
|
|
fieldExtraLocation: {
|
|||
|
|
// 额外的 metadata 字段
|
|||
|
|
Value: doc.MetaData[docExtraLocation],
|
|||
|
|
},
|
|||
|
|
}, nil
|
|||
|
|
},
|
|||
|
|
// 提供 embedding 组件用于向量化
|
|||
|
|
Embedding: EmbedderDashscope,
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
if err != nil {
|
|||
|
|
fmt.Printf("create indexer error: %v\n", err)
|
|||
|
|
return
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 6. 索引文档
|
|||
|
|
ids, err := indexer.Store(ctx, docs)
|
|||
|
|
if err != nil {
|
|||
|
|
fmt.Printf("index error: %v\n", err)
|
|||
|
|
return
|
|||
|
|
}
|
|||
|
|
fmt.Println("indexed ids:", ids)
|
|||
|
|
}
|