/* * Copyright 2026 CloudWeGo Authors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ // Package rag provides an answer_from_document tool backed by a compose.Workflow. // // The workflow uses field mapping to share the user's question across non-adjacent // nodes (score, answer) without threading it through every intermediate output type: // // START{FilePath, Question} // │ (data via WithNoDirectDependency)──────────────────────────────────────────┐ // ▼ │ Question // [load] os.ReadFile → []*schema.Document │ // ▼ │ // [chunk] paragraph splitter → []*schema.Document │ // ▼ Chunks ─────────────────────────────────────────────────────────────► [score] // │ []scoredChunk // ▼ // [filter] top-k // │ TopK (may be empty) // ▼ // [answer] ◄─ Question (START) // (synthesize or not_found inline) // │ // END // // The [score] node wraps a BatchNode whose inner workflow scores each chunk with // a ChatModel call in parallel (MaxConcurrency=5). package rag import ( "context" "encoding/json" "fmt" "os" "sort" "strings" "github.com/cloudwego/eino/components/model" "github.com/cloudwego/eino/components/tool" "github.com/cloudwego/eino/compose" "github.com/cloudwego/eino/schema" "github.com/cloudwego/eino-examples/adk/common/tool/graphtool" "github.com/cloudwego/eino-examples/compose/batch/batch" ) // Input is the tool call argument struct. Its JSON tags are used by utils.InferTool // to generate the tool's parameter schema automatically. type Input struct { FilePath string `json:"file_path" jsonschema:"description=Absolute path to the uploaded document file"` Question string `json:"question" jsonschema:"description=The question to answer from the document"` } // Output is the structured result returned by the tool. type Output struct { Answer string `json:"answer"` Sources []string `json:"sources"` // key excerpts used to produce the answer } // scoreTask is the per-chunk input fed into the inner BatchNode workflow. type scoreTask struct { Text string Question string } // scoredChunk is the per-chunk result produced by the inner BatchNode workflow. type scoredChunk struct { Text string Score int // 0–10 relevance to the question Excerpt string // most relevant sentence or phrase from this chunk } // scoreIn is the input to the outer "score" Lambda node. // It is assembled by field mapping from two sources: // - Chunks: full output of "chunk" node ([]*schema.Document) // - Question: Question field of START (Input) type scoreIn struct { Chunks []*schema.Document Question string } // synthIn is the input to the "synthesize" Lambda node. // It is assembled by field mapping from two sources: // - TopK: full output of "filter" node ([]scoredChunk) // - Question: Question field of START (Input) type synthIn struct { TopK []scoredChunk Question string } // BuildTool constructs the answer_from_document tool backed by the RAG workflow. // It uses graphtool.NewInvokableGraphTool, which compiles the workflow per invocation // and supports interrupt/resume via a built-in checkpoint store. func BuildTool(ctx context.Context, cm model.BaseChatModel) (tool.BaseTool, error) { wf := buildWorkflow(cm) return graphtool.NewInvokableGraphTool[Input, Output]( wf, "answer_from_document", "Search a large uploaded document for content relevant to a question and synthesize a "+ "cited answer from the most relevant passages. "+ "Use this instead of read_file when the document may be too large to fit in context.", ) } // buildWorkflow constructs the RAG compose.Workflow (uncompiled). // graphtool.NewInvokableGraphTool compiles it per invocation. func buildWorkflow(cm model.BaseChatModel) *compose.Workflow[Input, Output] { scoreWF := newScoreWorkflow(cm) scorer := batch.NewBatchNode(&batch.NodeConfig[scoreTask, scoredChunk]{ Name: "ChunkScorer", InnerTask: scoreWF, MaxConcurrency: 5, }) wf := compose.NewWorkflow[Input, Output]() // load: read file from disk, emit a single Document. wf.AddLambdaNode("load", compose.InvokableLambda( func(ctx context.Context, in Input) ([]*schema.Document, error) { data, err := os.ReadFile(in.FilePath) if err != nil { return nil, fmt.Errorf("read %q: %w", in.FilePath, err) } return []*schema.Document{{Content: string(data)}}, nil }, )).AddInput(compose.START) // chunk: split each Document into ~800-char pieces. wf.AddLambdaNode("chunk", compose.InvokableLambda( func(ctx context.Context, docs []*schema.Document) ([]*schema.Document, error) { var out []*schema.Document for _, d := range docs { out = append(out, splitIntoChunks(d.Content, 800)...) } return out, nil }, )).AddInput("load") // score: score each chunk against the question in parallel via BatchNode. // Chunks comes from "chunk"; Question comes directly from START. // Both use WithNoDirectDependency because the execution order is already // established by the direct edges START→load→chunk→score. wf.AddLambdaNode("score", compose.InvokableLambda( func(ctx context.Context, in scoreIn) ([]scoredChunk, error) { tasks := make([]scoreTask, len(in.Chunks)) for i, c := range in.Chunks { tasks[i] = scoreTask{Text: c.Content, Question: in.Question} } return scorer.Invoke(ctx, tasks) }, )). AddInputWithOptions("chunk", []*compose.FieldMapping{compose.ToField("Chunks")}, compose.WithNoDirectDependency()). AddInputWithOptions(compose.START, []*compose.FieldMapping{compose.MapFields("Question", "Question")}, compose.WithNoDirectDependency()) // filter: sort descending by score, keep up to top-3 chunks with score ≥ 3. wf.AddLambdaNode("filter", compose.InvokableLambda( func(ctx context.Context, scored []scoredChunk) ([]scoredChunk, error) { sort.Slice(scored, func(i, j int) bool { return scored[i].Score > scored[j].Score }) const maxK = 3 var top []scoredChunk for _, c := range scored { if c.Score < 3 { break } top = append(top, c) if len(top) == maxK { break } } return top, nil }, )).AddInput("score") // answer: synthesize a response from top-k chunks, or return a not-found message if empty. // TopK comes from "filter"; Question comes directly from START. // Both use WithNoDirectDependency: "filter" governs execution order via its direct edge. wf.AddLambdaNode("answer", compose.InvokableLambda( func(ctx context.Context, in synthIn) (Output, error) { if len(in.TopK) == 0 { return Output{ Answer: fmt.Sprintf("No relevant content found in the document for: %q", in.Question), }, nil } return synthesize(ctx, cm, in) }, )). AddInputWithOptions("filter", []*compose.FieldMapping{compose.ToField("TopK")}, compose.WithNoDirectDependency()). AddInputWithOptions(compose.START, []*compose.FieldMapping{compose.MapFields("Question", "Question")}, compose.WithNoDirectDependency()) // END receives output from answer. wf.End(). AddInput("answer") return wf } // newScoreWorkflow builds the single-node inner workflow used by each BatchNode task. // It is intentionally trivial: BatchNode provides the parallelism, not the inner graph. func newScoreWorkflow(cm model.BaseChatModel) *compose.Workflow[scoreTask, scoredChunk] { wf := compose.NewWorkflow[scoreTask, scoredChunk]() wf.AddLambdaNode("score_chunk", compose.InvokableLambda( func(ctx context.Context, t scoreTask) (scoredChunk, error) { return scoreOneChunk(ctx, cm, t) }, )).AddInput(compose.START) wf.End().AddInput("score_chunk") return wf } // scoreOneChunk asks the model to rate the relevance of a single chunk (0–10) // and extract the most relevant excerpt. Parse errors are treated as score 0 // so a bad JSON response never aborts the pipeline. func scoreOneChunk(ctx context.Context, cm model.BaseChatModel, t scoreTask) (scoredChunk, error) { prompt := fmt.Sprintf(`Rate how relevant the following text chunk is to the question. Question: %s Chunk: %s Reply with JSON only — no explanation, no markdown fences: {"score": <0-10>, "excerpt": ""} Score guide: 0=completely irrelevant, 3=tangentially related, 7=clearly relevant, 10=directly answers the question.`, t.Question, t.Text) resp, err := cm.Generate(ctx, []*schema.Message{schema.UserMessage(prompt)}) if err != nil { // treat model error as irrelevant rather than aborting the batch return scoredChunk{Text: t.Text, Score: 0}, nil } content := strings.TrimSpace(resp.Content) // strip optional markdown code block wrapper content = strings.TrimPrefix(content, "```json") content = strings.TrimPrefix(content, "```") content = strings.TrimSuffix(content, "```") content = strings.TrimSpace(content) var sr struct { Score int `json:"score"` Excerpt string `json:"excerpt"` } if err := json.Unmarshal([]byte(content), &sr); err != nil { return scoredChunk{Text: t.Text, Score: 0}, nil } return scoredChunk{Text: t.Text, Score: sr.Score, Excerpt: sr.Excerpt}, nil } // synthesize builds a prompt from the top-k chunks and generates a cited answer. func synthesize(ctx context.Context, cm model.BaseChatModel, in synthIn) (Output, error) { var sb strings.Builder sb.WriteString("Answer the following question using only the provided document excerpts.\n\n") sb.WriteString("Question: ") sb.WriteString(in.Question) sb.WriteString("\n\nDocument excerpts:\n") sources := make([]string, len(in.TopK)) for i, c := range in.TopK { excerpt := c.Excerpt if excerpt == "" { excerpt = c.Text } sources[i] = excerpt fmt.Fprintf(&sb, "[%d] %s\n\n", i+1, excerpt) } sb.WriteString("Provide a clear, concise answer. Cite excerpt numbers like [1] when referencing sources.") resp, err := cm.Generate(ctx, []*schema.Message{schema.UserMessage(sb.String())}) if err != nil { return Output{}, fmt.Errorf("synthesize: %w", err) } return Output{Answer: resp.Content, Sources: sources}, nil } // splitIntoChunks splits text into chunks of at most chunkSize characters, // breaking on paragraph boundaries (\n\n) where possible, then on newlines. func splitIntoChunks(text string, chunkSize int) []*schema.Document { var chunks []*schema.Document var buf strings.Builder flush := func() { s := strings.TrimSpace(buf.String()) if s != "" { chunks = append(chunks, &schema.Document{Content: s}) } buf.Reset() } for _, para := range strings.Split(text, "\n\n") { para = strings.TrimSpace(para) if para == "" { continue } if buf.Len()+len(para)+2 > chunkSize && buf.Len() > 0 { flush() } // paragraph itself exceeds chunkSize: split by line if len(para) > chunkSize { for _, line := range strings.Split(para, "\n") { line = strings.TrimSpace(line) if line == "" { continue } if buf.Len()+len(line)+1 > chunkSize && buf.Len() > 0 { flush() } if buf.Len() > 0 { buf.WriteByte('\n') } buf.WriteString(line) } } else { if buf.Len() > 0 { buf.WriteString("\n\n") } buf.WriteString(para) } } flush() return chunks }