You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
159 lines
3.9 KiB
Go
159 lines
3.9 KiB
Go
/*
|
|
* Copyright 2025 CloudWeGo Authors
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
package main
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"io/fs"
|
|
"path/filepath"
|
|
"strings"
|
|
|
|
"github.com/cloudwego/eino-ext/components/document/transformer/splitter/markdown"
|
|
"github.com/cloudwego/eino/components/document"
|
|
"github.com/cloudwego/eino/components/embedding"
|
|
"github.com/redis/go-redis/v9"
|
|
|
|
"github.com/cloudwego/eino-examples/quickstart/eino_assistant/eino/knowledgeindexing"
|
|
)
|
|
|
|
func main() {
|
|
ctx := context.Background()
|
|
|
|
err := indexMarkdownFiles(ctx, "./eino-docs")
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
|
|
fmt.Println("index success")
|
|
}
|
|
|
|
func indexMarkdownFiles(ctx context.Context, dir string) error {
|
|
runner, err := knowledgeindexing.BuildKnowledgeIndexing(ctx, &knowledgeindexing.BuildConfig{
|
|
KnowledgeIndexing: &knowledgeindexing.KnowledgeIndexingBuildConfig{
|
|
MarkdownSplitterKeyOfDocumentTransformer: &markdown.HeaderConfig{
|
|
Headers: map[string]string{
|
|
"#": "title",
|
|
},
|
|
},
|
|
},
|
|
})
|
|
if err != nil {
|
|
return fmt.Errorf("build index graph failed: %w", err)
|
|
}
|
|
|
|
// 遍历 dir 下的所有 markdown 文件
|
|
err = filepath.WalkDir(dir, func(path string, d fs.DirEntry, err error) error {
|
|
if err != nil {
|
|
return fmt.Errorf("walk dir failed: %w", err)
|
|
}
|
|
if d.IsDir() {
|
|
return nil
|
|
}
|
|
|
|
if !strings.HasSuffix(path, ".md") {
|
|
fmt.Printf("[skip] not a markdown file: %s\n", path)
|
|
return nil
|
|
}
|
|
|
|
fmt.Printf("[start] indexing file: %s\n", path)
|
|
|
|
ids, err := runner.Invoke(ctx, document.Source{URI: path})
|
|
if err != nil {
|
|
return fmt.Errorf("invoke index graph failed: %w", err)
|
|
}
|
|
|
|
fmt.Printf("[done] indexing file: %s, len of parts: %d\n", path, len(ids))
|
|
|
|
return nil
|
|
})
|
|
|
|
return err
|
|
}
|
|
|
|
type RedisVectorStoreConfig struct {
|
|
RedisKeyPrefix string
|
|
IndexName string
|
|
Embedding embedding.Embedder
|
|
Dimension int
|
|
MinScore float64
|
|
RedisAddr string
|
|
}
|
|
|
|
func initVectorIndex(ctx context.Context, config *RedisVectorStoreConfig) (err error) {
|
|
if config.Embedding == nil {
|
|
return fmt.Errorf("embedding cannot be nil")
|
|
}
|
|
if config.Dimension <= 0 {
|
|
return fmt.Errorf("dimension must be positive")
|
|
}
|
|
|
|
client := redis.NewClient(&redis.Options{
|
|
Addr: config.RedisAddr,
|
|
})
|
|
|
|
// 确保在错误时关闭连接
|
|
defer func() {
|
|
if err != nil {
|
|
client.Close()
|
|
}
|
|
}()
|
|
|
|
if err = client.Ping(ctx).Err(); err != nil {
|
|
return fmt.Errorf("failed to connect to Redis: %w", err)
|
|
}
|
|
|
|
indexName := fmt.Sprintf("%s%s", config.RedisKeyPrefix, config.IndexName)
|
|
|
|
// 检查是否存在索引
|
|
exists, err := client.Do(ctx, "FT.INFO", indexName).Result()
|
|
if err != nil {
|
|
if !strings.Contains(err.Error(), "Unknown index name") {
|
|
return fmt.Errorf("failed to check if index exists: %w", err)
|
|
}
|
|
err = nil
|
|
} else if exists != nil {
|
|
return nil
|
|
}
|
|
|
|
// Create new index
|
|
createIndexArgs := []interface{}{
|
|
"FT.CREATE", indexName,
|
|
"ON", "HASH",
|
|
"PREFIX", "1", config.RedisKeyPrefix,
|
|
"SCHEMA",
|
|
"content", "TEXT",
|
|
"metadata", "TEXT",
|
|
"vector", "VECTOR", "FLAT",
|
|
"6",
|
|
"TYPE", "FLOAT32",
|
|
"DIM", config.Dimension,
|
|
"DISTANCE_METRIC", "COSINE",
|
|
}
|
|
|
|
if err = client.Do(ctx, createIndexArgs...).Err(); err != nil {
|
|
return fmt.Errorf("failed to create index: %w", err)
|
|
}
|
|
|
|
// 验证索引是否创建成功
|
|
if _, err = client.Do(ctx, "FT.INFO", indexName).Result(); err != nil {
|
|
return fmt.Errorf("failed to verify index creation: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|