You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

166 lines
4.0 KiB
Go

/*
* Copyright 2025 CloudWeGo Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
import (
"context"
"fmt"
"io/fs"
"path/filepath"
"strings"
"github.com/cloudwego/eino-examples/quickstart/eino_assistant/pkg/env"
"github.com/cloudwego/eino-ext/components/document/transformer/splitter/markdown"
"github.com/cloudwego/eino/components/document"
"github.com/cloudwego/eino/components/embedding"
"github.com/redis/go-redis/v9"
"github.com/cloudwego/eino-examples/quickstart/eino_assistant/eino/knowledgeindexing"
)
func init() {
// check some essential envs
env.MustHasEnvs("ARK_API_KEY", "ARK_EMBEDDING_MODEL")
}
func main() {
ctx := context.Background()
err := indexMarkdownFiles(ctx, "./eino-docs")
if err != nil {
panic(err)
}
fmt.Println("index success")
}
func indexMarkdownFiles(ctx context.Context, dir string) error {
runner, err := knowledgeindexing.BuildKnowledgeIndexing(ctx, &knowledgeindexing.BuildConfig{
KnowledgeIndexing: &knowledgeindexing.KnowledgeIndexingBuildConfig{
MarkdownSplitterKeyOfDocumentTransformer: &markdown.HeaderConfig{
Headers: map[string]string{
"#": "title",
},
},
},
})
if err != nil {
return fmt.Errorf("build index graph failed: %w", err)
}
// 遍历 dir 下的所有 markdown 文件
err = filepath.WalkDir(dir, func(path string, d fs.DirEntry, err error) error {
if err != nil {
return fmt.Errorf("walk dir failed: %w", err)
}
if d.IsDir() {
return nil
}
if !strings.HasSuffix(path, ".md") {
fmt.Printf("[skip] not a markdown file: %s\n", path)
return nil
}
fmt.Printf("[start] indexing file: %s\n", path)
ids, err := runner.Invoke(ctx, document.Source{URI: path})
if err != nil {
return fmt.Errorf("invoke index graph failed: %w", err)
}
fmt.Printf("[done] indexing file: %s, len of parts: %d\n", path, len(ids))
return nil
})
return err
}
type RedisVectorStoreConfig struct {
RedisKeyPrefix string
IndexName string
Embedding embedding.Embedder
Dimension int
MinScore float64
RedisAddr string
}
func initVectorIndex(ctx context.Context, config *RedisVectorStoreConfig) (err error) {
if config.Embedding == nil {
return fmt.Errorf("embedding cannot be nil")
}
if config.Dimension <= 0 {
return fmt.Errorf("dimension must be positive")
}
client := redis.NewClient(&redis.Options{
Addr: config.RedisAddr,
})
// 确保在错误时关闭连接
defer func() {
if err != nil {
client.Close()
}
}()
if err = client.Ping(ctx).Err(); err != nil {
return fmt.Errorf("failed to connect to Redis: %w", err)
}
indexName := fmt.Sprintf("%s%s", config.RedisKeyPrefix, config.IndexName)
// 检查是否存在索引
exists, err := client.Do(ctx, "FT.INFO", indexName).Result()
if err != nil {
if !strings.Contains(err.Error(), "Unknown index name") {
return fmt.Errorf("failed to check if index exists: %w", err)
}
err = nil
} else if exists != nil {
return nil
}
// Create new index
createIndexArgs := []interface{}{
"FT.CREATE", indexName,
"ON", "HASH",
"PREFIX", "1", config.RedisKeyPrefix,
"SCHEMA",
"content", "TEXT",
"metadata", "TEXT",
"vector", "VECTOR", "FLAT",
"6",
"TYPE", "FLOAT32",
"DIM", config.Dimension,
"DISTANCE_METRIC", "COSINE",
}
if err = client.Do(ctx, createIndexArgs...).Err(); err != nil {
return fmt.Errorf("failed to create index: %w", err)
}
// 验证索引是否创建成功
if _, err = client.Do(ctx, "FT.INFO", indexName).Result(); err != nil {
return fmt.Errorf("failed to verify index creation: %w", err)
}
return nil
}