{
  "canonical_name": "Inferensys/contextful",
  "compilation_id": "pack_428d90bfb77c4487b254b70136b0f472",
  "created_at": "2026-05-24T13:29:49.612002+00:00",
  "created_by": "project-pack-compiler",
  "feedback": {
    "carrier_selection_notes": [
      "viable_asset_types=skill, recipe, host_instruction, eval, preflight",
      "recommended_asset_types=skill, recipe, host_instruction, eval, preflight"
    ],
    "evidence_delta": {
      "confirmed_claims": [
        "identity_anchor_present",
        "capability_and_host_targets_present",
        "install_path_declared_or_better"
      ],
      "missing_required_fields": [],
      "must_verify_forwarded": [
        "Run or inspect `npx @inferensys/contextful` in an isolated environment.",
        "Confirm the project exposes the claimed capability to at least one target host."
      ],
      "quickstart_execution_scope": "allowlisted_sandbox_smoke",
      "sandbox_command": "npx @inferensys/contextful",
      "sandbox_container_image": "node:22-slim",
      "sandbox_execution_backend": "docker",
      "sandbox_planner_decision": "deterministic_isolated_install",
      "sandbox_validation_id": "sbx_81c822c974f54856a37fab77c6cb7ef9"
    },
    "feedback_event_type": "project_pack_compilation_feedback",
    "learning_candidate_reasons": [],
    "template_gaps": []
  },
  "identity": {
    "canonical_id": "project_6d1d87fdbc61f0de69507804647868c0",
    "canonical_name": "Inferensys/contextful",
    "homepage_url": null,
    "license": "unknown",
    "repo_url": "https://github.com/Inferensys/contextful",
    "slug": "contextful",
    "source_packet_id": "phit_c356bced3c75459ebd1856c19e32d434",
    "source_validation_id": "dval_a57d47da1eb34bfc9406efeb91587234"
  },
  "merchandising": {
    "best_for": "需要信息检索与知识管理能力，并使用 claude的用户",
    "github_forks": 0,
    "github_stars": 0,
    "one_liner_en": "Most-efficient Context Management Layer for Agentic AI. Graph-based knowledge context, SQLite index, advanced FTS5 lexical/BM25 search and cross-session live memory.",
    "one_liner_zh": "Most-efficient Context Management Layer for Agentic AI. Graph-based knowledge context, SQLite index, advanced FTS5 lexical/BM25 search and cross-session live memory.",
    "primary_category": {
      "category_id": "research-knowledge",
      "confidence": "high",
      "name_en": "Research & Knowledge",
      "name_zh": "信息检索与知识管理",
      "reason": "matched_keywords:knowledge, graph, search"
    },
    "target_user": "使用 claude, claude_code 等宿主 AI 的用户",
    "title_en": "contextful",
    "title_zh": "contextful 能力包",
    "visible_tags": [
      {
        "label_en": "MCP Tools",
        "label_zh": "MCP 工具",
        "source": "repo_evidence_project_characteristics",
        "tag_id": "product_domain-mcp-tools",
        "type": "product_domain"
      },
      {
        "label_en": "Knowledge Base Q&A",
        "label_zh": "知识库问答",
        "source": "repo_evidence_project_characteristics",
        "tag_id": "user_job-knowledge-base-q-a",
        "type": "user_job"
      },
      {
        "label_en": "Workflow Automation",
        "label_zh": "流程自动化",
        "source": "repo_evidence_project_characteristics",
        "tag_id": "core_capability-workflow-automation",
        "type": "core_capability"
      },
      {
        "label_en": "Node-based Workflow",
        "label_zh": "节点式流程编排",
        "source": "repo_evidence_project_characteristics",
        "tag_id": "workflow_pattern-node-based-workflow",
        "type": "workflow_pattern"
      },
      {
        "label_en": "Evaluation Suite",
        "label_zh": "评测体系",
        "source": "repo_evidence_project_characteristics",
        "tag_id": "selection_signal-evaluation-suite",
        "type": "selection_signal"
      }
    ]
  },
  "packet_id": "phit_c356bced3c75459ebd1856c19e32d434",
  "page_model": {
    "artifacts": {
      "artifact_slug": "contextful",
      "files": [
        "PROJECT_PACK.json",
        "QUICK_START.md",
        "PROMPT_PREVIEW.md",
        "HUMAN_MANUAL.md",
        "AI_CONTEXT_PACK.md",
        "BOUNDARY_RISK_CARD.md",
        "PITFALL_LOG.md",
        "REPO_INSPECTION.json",
        "REPO_INSPECTION.md",
        "CAPABILITY_CONTRACT.json",
        "EVIDENCE_INDEX.json",
        "CLAIM_GRAPH.json"
      ],
      "required_files": [
        "PROJECT_PACK.json",
        "QUICK_START.md",
        "PROMPT_PREVIEW.md",
        "HUMAN_MANUAL.md",
        "AI_CONTEXT_PACK.md",
        "BOUNDARY_RISK_CARD.md",
        "PITFALL_LOG.md",
        "REPO_INSPECTION.json"
      ]
    },
    "detail": {
      "capability_source": "Project Hit Packet + DownstreamValidationResult",
      "commands": [
        {
          "command": "npx @inferensys/contextful",
          "label": "Node.js / npx · 官方安装入口",
          "source": "https://github.com/Inferensys/contextful#readme",
          "verified": true
        }
      ],
      "display_tags": [
        "MCP 工具",
        "知识库问答",
        "流程自动化",
        "节点式流程编排",
        "评测体系"
      ],
      "eyebrow": "信息检索与知识管理",
      "glance": [
        {
          "body": "判断自己是不是目标用户。",
          "label": "最适合谁",
          "value": "需要信息检索与知识管理能力，并使用 claude的用户"
        },
        {
          "body": "先理解能力边界，再决定是否继续。",
          "label": "核心价值",
          "value": "Most-efficient Context Management Layer for Agentic AI. Graph-based knowledge context, SQLite index, advanced FTS5 lexical/BM25 search and cross-session live memory."
        },
        {
          "body": "未完成验证前保持审慎。",
          "label": "继续前",
          "value": "publish to Doramagic.ai project surfaces"
        }
      ],
      "guardrail_source": "Boundary & Risk Card",
      "guardrails": [
        {
          "body": "Prompt Preview 只展示流程，不证明项目已安装或运行。",
          "label": "Check 1",
          "value": "不要把试用当真实运行"
        },
        {
          "body": "claude, claude_code",
          "label": "Check 2",
          "value": "确认宿主兼容"
        },
        {
          "body": "publish to Doramagic.ai project surfaces",
          "label": "Check 3",
          "value": "先隔离验证"
        }
      ],
      "mode": "skill, recipe, host_instruction, eval, preflight",
      "pitfall_log": {
        "items": [
          {
            "body": "项目面向 Claude/Cursor/Codex/Gemini/OpenCode 等宿主，或安装命令涉及用户配置目录。",
            "category": "配置坑",
            "evidence": [
              "capability.host_targets | github_repo:1240001007 | https://github.com/Inferensys/contextful | host_targets=claude, claude_code"
            ],
            "severity": "medium",
            "suggested_check": "列出会写入的配置文件、目录和卸载/回滚步骤。",
            "title": "可能修改宿主 AI 配置",
            "user_impact": "安装可能改变本机 AI 工具行为，用户需要知道写入位置和回滚方法。"
          },
          {
            "body": "README/documentation is current enough for a first validation pass.",
            "category": "能力坑",
            "evidence": [
              "capability.assumptions | github_repo:1240001007 | https://github.com/Inferensys/contextful | README/documentation is current enough for a first validation pass."
            ],
            "severity": "medium",
            "suggested_check": "将假设转成下游验证清单。",
            "title": "能力判断依赖假设",
            "user_impact": "假设不成立时，用户拿不到承诺的能力。"
          },
          {
            "body": "未记录 last_activity_observed。",
            "category": "维护坑",
            "evidence": [
              "evidence.maintainer_signals | github_repo:1240001007 | https://github.com/Inferensys/contextful | last_activity_observed missing"
            ],
            "severity": "medium",
            "suggested_check": "补 GitHub 最近 commit、release、issue/PR 响应信号。",
            "title": "维护活跃度未知",
            "user_impact": "新项目、停更项目和活跃项目会被混在一起，推荐信任度下降。"
          },
          {
            "body": "no_demo",
            "category": "安全/权限坑",
            "evidence": [
              "downstream_validation.risk_items | github_repo:1240001007 | https://github.com/Inferensys/contextful | no_demo; severity=medium"
            ],
            "severity": "medium",
            "suggested_check": "进入安全/权限治理复核队列。",
            "title": "下游验证发现风险项",
            "user_impact": "下游已经要求复核，不能在页面中弱化。"
          },
          {
            "body": "no_demo",
            "category": "安全/权限坑",
            "evidence": [
              "risks.scoring_risks | github_repo:1240001007 | https://github.com/Inferensys/contextful | no_demo; severity=medium"
            ],
            "severity": "medium",
            "suggested_check": "把风险写入边界卡，并确认是否需要人工复核。",
            "title": "存在评分风险",
            "user_impact": "风险会影响是否适合普通用户安装。"
          },
          {
            "body": "issue_or_pr_quality=unknown。",
            "category": "维护坑",
            "evidence": [
              "evidence.maintainer_signals | github_repo:1240001007 | https://github.com/Inferensys/contextful | issue_or_pr_quality=unknown"
            ],
            "severity": "low",
            "suggested_check": "抽样最近 issue/PR，判断是否长期无人处理。",
            "title": "issue/PR 响应质量未知",
            "user_impact": "用户无法判断遇到问题后是否有人维护。"
          },
          {
            "body": "release_recency=unknown。",
            "category": "维护坑",
            "evidence": [
              "evidence.maintainer_signals | github_repo:1240001007 | https://github.com/Inferensys/contextful | release_recency=unknown"
            ],
            "severity": "low",
            "suggested_check": "确认最近 release/tag 和 README 安装命令是否一致。",
            "title": "发布节奏不明确",
            "user_impact": "安装命令和文档可能落后于代码，用户踩坑概率升高。"
          }
        ],
        "source": "ProjectPitfallLog + ProjectHitPacket + validation + community signals",
        "summary": "发现 7 个潜在踩坑项，其中 0 个为 high/blocking；最高优先级：配置坑 - 可能修改宿主 AI 配置。",
        "title": "踩坑日志"
      },
      "snapshot": {
        "contributors": 1,
        "forks": 0,
        "license": "unknown",
        "note": "站点快照，非实时质量证明；用于开工前背景判断。",
        "stars": 0
      },
      "source_url": "https://github.com/Inferensys/contextful",
      "steps": [
        {
          "body": "不安装项目，先体验能力节奏。",
          "code": "preview",
          "title": "先试 Prompt"
        },
        {
          "body": "理解输入、输出、失败模式和边界。",
          "code": "manual",
          "title": "读说明书"
        },
        {
          "body": "把上下文交给宿主 AI 继续工作。",
          "code": "context",
          "title": "带给 AI"
        },
        {
          "body": "进入主力环境前先完成安装入口与风险边界验证。",
          "code": "verify",
          "title": "沙箱验证"
        }
      ],
      "subtitle": "Most-efficient Context Management Layer for Agentic AI. Graph-based knowledge context, SQLite index, advanced FTS5 lexical/BM25 search and cross-session live memory.",
      "title": "contextful 能力包",
      "trial_prompt": "# contextful - Prompt Preview\n\n> Copy the prompt below into your AI host before installing anything.\n> Its purpose is to let you safely feel the project's workflow, not to claim the project has already run.\n\n## Copy this prompt\n\n```text\nYou are using an independent Doramagic capability pack for Inferensys/contextful.\n\nProject:\n- Name: contextful\n- Repository: https://github.com/Inferensys/contextful\n- Summary: Most-efficient Context Management Layer for Agentic AI. Graph-based knowledge context, SQLite index, advanced FTS5 lexical/BM25 search and cross-session live memory.\n- Host target: claude, claude_code\n\nGoal:\nHelp me evaluate this project for the following task without installing it yet: Most-efficient Context Management Layer for Agentic AI. Graph-based knowledge context, SQLite index, advanced FTS5 lexical/BM25 search and cross-session live memory.\n\nBefore taking action:\n1. Restate my task, success standard, and boundary.\n2. Identify whether the next step requires tools, browser access, network access, filesystem access, credentials, package installation, or host configuration.\n3. Use only the Doramagic Project Pack, the upstream repository, and the source-linked evidence listed below.\n4. If a real command, install step, API call, file write, or host integration is required, mark it as \"requires post-install verification\" and ask for approval first.\n5. If evidence is missing, say \"evidence is missing\" instead of filling the gap.\n\nPreviewable capabilities:\n- Capability 1: Most-efficient Context Management Layer for Agentic AI. Graph-based knowledge context, SQLite index, advanced FTS5 lexical/BM25 search and cross-session live memory.\n\nCapabilities that require post-install verification:\n- Capability 1: Use the source-backed project context to guide one small, checkable workflow step.\n\nCore service flow:\n1. project-introduction: Project Introduction. Produce one small intermediate artifact and wait for confirmation.\n2. quick-start: Quick Start Guide. Produce one small intermediate artifact and wait for confirmation.\n3. high-level-architecture: High-Level Architecture. Produce one small intermediate artifact and wait for confirmation.\n4. search-engine: Search Engine. Produce one small intermediate artifact and wait for confirmation.\n5. context-packs: Context Packs. Produce one small intermediate artifact and wait for confirmation.\n\nSource-backed evidence to keep in mind:\n- https://github.com/Inferensys/contextful\n- https://github.com/Inferensys/contextful#readme\n- README.md\n- package.json\n- src/index.ts\n- server.json\n- src/mcp-server.ts\n- src/indexer.ts\n- src/cli.ts\n- src/search.ts\n\nFirst response rules:\n1. Start Step 1 only.\n2. Explain the one service action you will perform first.\n3. Ask exactly three questions about my target workflow, success standard, and sandbox boundary.\n4. Stop and wait for my answers.\n\nStep 1 follow-up protocol:\n- After I answer the first three questions, stay in Step 1.\n- Produce six parts only: clarified task, success standard, boundary conditions, two or three options, tradeoffs for each option, and one recommendation.\n- End by asking whether I confirm the recommendation.\n- Do not move to Step 2 until I explicitly confirm.\n\nConversation rules:\n- Advance one step at a time and wait for confirmation after each small artifact.\n- Write outputs as recommendations or planned checks, not as completed execution.\n- Do not claim tests passed, files changed, commands ran, APIs were called, or the project was installed.\n- If the user asks for execution, first provide the sandbox setup, expected output, rollback, and approval checkpoint.\n```\n",
      "voices": [
        {
          "body": "当前没有项目级社区来源；不会把未抓取讨论包装成社会证明。",
          "items": [],
          "status": "待发现 Agent 补证",
          "title": "社区讨论"
        }
      ]
    },
    "homepage_card": {
      "category": "信息检索与知识管理",
      "desc": "Most-efficient Context Management Layer for Agentic AI. Graph-based knowledge context, SQLite index, advanced FTS5 lexical/BM25 search and cross-session live memory.",
      "effort": "安装已验证",
      "forks": 0,
      "icon": "search",
      "name": "contextful 能力包",
      "risk": "需复核",
      "slug": "contextful",
      "stars": 0,
      "tags": [
        "MCP 工具",
        "知识库问答",
        "流程自动化",
        "节点式流程编排",
        "评测体系"
      ],
      "thumb": "blue",
      "type": "Skill Pack"
    },
    "manual": {
      "markdown": "# https://github.com/Inferensys/contextful 项目说明书\n\n生成时间：2026-05-16 06:05:31 UTC\n\n## 目录\n\n- [Project Introduction](#project-introduction)\n- [Quick Start Guide](#quick-start)\n- [High-Level Architecture](#high-level-architecture)\n- [Runtime Components](#runtime-components)\n- [Search Engine](#search-engine)\n- [Context Packs](#context-packs)\n- [Memory Ledger](#memory-ledger)\n- [Graph Traversal and Analysis](#graph-traversal)\n- [SQLite Database Schema](#sqlite-database)\n- [Workspace Indexing System](#indexing-system)\n\n<a id='project-introduction'></a>\n\n## Project Introduction\n\n### 相关页面\n\n相关主题：[High-Level Architecture](#high-level-architecture), [Quick Start Guide](#quick-start)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [README.md](https://github.com/Inferensys/contextful/blob/main/README.md)\n- [package.json](https://github.com/Inferensys/contextful/blob/main/package.json)\n- [src/cli.ts](https://github.com/Inferensys/contextful/blob/main/src/cli.ts)\n- [src/extract.ts](https://github.com/Inferensys/contextful/blob/main/src/extract.ts)\n- [src/search.ts](https://github.com/Inferensys/contextful/blob/main/src/search.ts)\n- [src/mcp-server.ts](https://github.com/Inferensys/contextful/blob/main/src/mcp-server.ts)\n- [src/report.ts](https://github.com/Inferensys/contextful/blob/main/src/report.ts)\n</details>\n\n# Project Introduction\n\nContextful is an intelligent code context management system designed to provide AI agents with compact, evidence-backed information for codebase navigation and understanding. The project serves as a bridge between large codebases and AI-powered development tools by indexing source code, extracting symbols, tracking dependencies, and generating token-budgeted evidence packs for queries.\n\n## Purpose and Scope\n\nContextful solves the fundamental problem that AI coding assistants face when working with large repositories: excessive context requirements that lead to token waste and degraded performance. Instead of forcing agents to read dozens of random files, Contextful enables targeted, cited, and ranked context retrieval that maximizes the value of each token spent.\n\nThe system operates in three primary modes:\n\n1. **Indexing Mode** - Scans and indexes source code, extracting symbols, dependencies, and semantic chunks\n2. **Query Mode** - Creates evidence packs for natural language queries with token budgets\n3. **Search Mode** - Provides lightweight search across code, docs, symbols, and memory without full evidence compilation\n\n资料来源：[README.md:1-15]()\n\n## Architecture Overview\n\nThe Contextful system consists of several interconnected components that work together to provide context management capabilities.\n\n```mermaid\ngraph TD\n    A[Source Code] --> B[Indexing Engine]\n    B --> C[SQLite Kernel DB]\n    C --> D[Search Module]\n    C --> E[Graph Analysis]\n    C --> F[Memory Ledger]\n    \n    G[CLI / MCP Server] --> D\n    G --> E\n    G --> F\n    \n    D --> H[Evidence Pack]\n    E --> H\n    F --> H\n    \n    H --> I[AI Agent / User]\n```\n\n### Component Responsibilities\n\n| Component | File | Responsibility |\n|-----------|------|----------------|\n| Indexing Engine | `src/extract.ts` | Parse source files, extract symbols and dependencies |\n| Search Module | `src/search.ts` | Full-text search, intent classification, ranking |\n| Graph Analysis | `src/search.ts` | Trace dependencies and code paths |\n| Memory Ledger | `src/memory.ts` | Store evidence-backed lessons across sessions |\n| CLI Interface | `src/cli.ts` | Command-line interface for all operations |\n| MCP Server | `src/mcp-server.ts` | Model Context Protocol stdio server |\n\n资料来源：[src/extract.ts:1-50](), [src/search.ts:1-30](), [src/cli.ts:1-40]()\n\n## Supported Languages and File Types\n\nContextful supports multiple programming languages through pattern-based extraction. The indexing engine recognizes language-specific syntax for symbols and dependencies.\n\n### Language Support Matrix\n\n| Language | Functions | Classes | Types | Imports |\n|----------|-----------|---------|-------|---------|\n| TypeScript/JavaScript | ✓ | ✓ | ✓ | ✓ |\n| Python | ✓ | ✓ | - | ✓ |\n| Go | ✓ | ✓ | ✓ | ✓ |\n| Rust | ✓ | ✓ | ✓ | ✓ |\n| Markdown | - | - | Headings | - |\n| JSON | - | - | Config keys | - |\n\n资料来源：[src/extract.ts:15-80]()\n\n## Core MCP Tools\n\nContextful exposes its capabilities through the Model Context Protocol (MCP), providing AI agents with a standardized tool interface. The primary tools are designed to keep the agent surface small while providing maximum utility.\n\n```mermaid\ngraph LR\n    A[Agent] -->|context_pack| B[Evidence Pack Generator]\n    A -->|search_code| C[Code Search]\n    A -->|trace_path| D[Graph Traversal]\n    A -->|impact_analysis| E[Dependency Analyzer]\n    A -->|why_changed| F[Git History]\n    A -->|recall_memory| G[Memory Search]\n    A -->|write_lesson| H[Lesson Writer]\n```\n\n### Tool Descriptions\n\n| Tool | Purpose | Key Parameters |\n|------|---------|----------------|\n| `context_pack` | Returns ranked, cited, token-budgeted context bundles | `query`, `budget`, `scope` |\n| `search_code` | Powerful search across code, docs, symbols, and memory | `query`, `mode`, `filters` |\n| `trace_path` | Graph traversal across files, symbols, modules, and config | `from`, `to`, `edge_types` |\n| `impact_analysis` | Reverse dependencies and likely tests | `symbol_or_file` |\n| `why_changed` | Current evidence plus git history | `symbol_or_file` |\n| `recall_memory` | Search session learnings and durable lessons | `query`, `scope` |\n| `write_lesson` | Store evidence-backed lessons | `claim`, `evidence_refs`, `confidence` |\n\n资料来源：[README.md:25-45](), [src/mcp-server.ts:1-80]()\n\n## CLI Interface\n\nContextful provides a command-line interface through the `cxf` binary (with `contextful` as a readable alias). The CLI supports both one-shot operations and daemon mode for continuous indexing.\n\n### Command Reference\n\n| Command | Description | Key Options |\n|---------|-------------|-------------|\n| `index` | Index a workspace | `--workspace`, `--watch` |\n| `daemon` | Run local indexing daemon | `--workspace` |\n| `query` | Create evidence pack for query | `--workspace`, `--budget`, `--json` |\n| `search` | Search without full evidence pack | `--workspace`, `--limit`, `--kind` |\n| `report` | Generate context report | `--workspace`, `--format` |\n| `memory add` | Store evidence-backed lesson | `--claim`, `--evidence`, `--scope`, `--confidence` |\n| `server` | Run MCP stdio server | - |\n\n资料来源：[src/cli.ts:40-120](), [README.md:15-35]()\n\n### Example Usage\n\n```bash\n# Index a workspace\nnpx @inferensys/contextful index --workspace .\n\n# Query with token budget\nnpx @inferensys/contextful query \"where is user auth handled\" --workspace . --budget 2000\n\n# Run as MCP server\nnpx @inferensys/contextful server\n```\n\n资料来源：[README.md:8-15]()\n\n## Data Models\n\n### Evidence Pack Structure\n\nThe `EvidencePack` is the core data structure returned by query operations. It contains all necessary context for an agent to answer a query.\n\n```typescript\ninterface EvidencePack {\n  id: string;                    // Unique pack identifier\n  query: string;                 // Original query\n  scope: string;                 // Scope of the context\n  intent: SearchIntent;          // Classified query intent\n  summary: string;               // Human-readable summary\n  citations: SearchHit[];        // Ranked evidence items\n  files: FileContext[];          // Grouped file references\n  symbols: SymbolRecord[];       // Relevant symbols\n  graphPaths: GraphPath[];       // Dependency paths\n  memoryHits: SearchHit[];       // Memory matches\n  confidence: number;            // Confidence score (0.1-0.92)\n  tokenEstimate: number;         // Estimated token count\n  budget: number;                // Token budget\n  createdAt: string;             // ISO timestamp\n}\n```\n\n资料来源：[src/search.ts:200-250]()\n\n### Search Hit Structure\n\nEach search result is represented as a `SearchHit` with relevance ranking and excerpt information.\n\n| Field | Type | Description |\n|-------|------|-------------|\n| `ref` | string | Reference identifier (e.g., `file:src/auth.ts:1-20`) |\n| `path` | string | File path |\n| `title` | string | Display title |\n| `excerpt` | string | Relevant text snippet |\n| `kind` | string | Type: `code`, `doc`, `symbol`, `memory` |\n| `rank` | number | BM25 relevance score |\n\n资料来源：[src/search.ts:50-80]()\n\n## Dependencies and Technology Stack\n\nContextful is built on a carefully selected set of dependencies that enable efficient code indexing and search.\n\n| Dependency | Version | Purpose |\n|------------|---------|---------|\n| `@modelcontextprotocol/sdk` | ^1.29.0 | MCP protocol implementation |\n| `better-sqlite3` | ^12.10.0 | SQLite database for indexing |\n| `commander` | ^14.0.3 | CLI argument parsing |\n| `fast-glob` | ^3.3.3 | File pattern matching |\n| `tree-sitter-wasms` | ^0.1.13 | Syntax parsing |\n| `web-tree-sitter` | ^0.20.8 | Tree-sitter bindings |\n| `zod` | ^4.4.3 | Schema validation |\n\n资料来源：[package.json:20-40]()\n\n### System Requirements\n\n- **Node.js**: >= 20\n- **License**: MIT\n- **Repository**: [inferensys/contextful](https://github.com/Inferensys/contextful)\n\n资料来源：[package.json:45-55]()\n\n## Supported IDE Integration\n\nContextful is designed to integrate with a wide range of AI-powered development tools:\n\n| IDE/Extension | Status |\n|---------------|--------|\n| GitHub Copilot | Supported |\n| VS Code | Supported |\n| Cursor | Supported |\n| Windsurf | Supported |\n| Cline | Supported |\n| Roo Code | Supported |\n| Continue | Supported |\n| Zed | Supported |\n\n资料来源：[package.json:10-20]()\n\n## Workflow: From Indexing to Query\n\nThe complete workflow demonstrates how Contextful transforms raw source code into actionable intelligence for AI agents.\n\n```mermaid\nsequenceDiagram\n    participant U as User/Agent\n    participant CLI as CLI/MCP Server\n    participant IDX as Indexer\n    participant DB as SQLite Kernel\n    participant SRCH as Search Engine\n    participant MEM as Memory Ledger\n\n    U->>CLI: index --workspace ./project\n    CLI->>IDX: Extract symbols & dependencies\n    IDX->>DB: Store in chunks_fts, symbols, edges\n    DB-->>CLI: Index complete\n\n    U->>CLI: query \"how is auth handled\"\n    CLI->>SRCH: classifyQuery() intent=exact\n    SRCH->>DB: FTS + BM25 search\n    DB-->>SRCH: Ranked hits\n    SRCH->>MEM: Check memory ledger\n    MEM-->>SRCH: Related lessons\n    CLI-->>U: EvidencePack (token-budgeted)\n\n    U->>CLI: write_lesson --claim \"Auth pattern\" --evidence file:...\n    CLI->>MEM: Store lesson with confidence\n    MEM-->>CLI: Lesson saved\n```\n\n资料来源：[src/search.ts:100-150](), [src/report.ts:80-120]()\n\n## Next Steps\n\nTo continue exploring Contextful:\n\n1. **Installation Guide** - Set up Contextful in your development environment\n2. **CLI Reference** - Detailed documentation of all CLI commands\n3. **MCP Tools API** - Complete reference for MCP tool interfaces\n4. **Configuration** - Workspace configuration and tuning options\n5. **Memory System** - Using the evidence-backed lesson system\n\n---\n\n<a id='quick-start'></a>\n\n## Quick Start Guide\n\n### 相关页面\n\n相关主题：[Project Introduction](#project-introduction)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [README.md](https://github.com/Inferensys/contextful/blob/main/README.md)\n- [src/cli.ts](https://github.com/Inferensys/contextful/blob/main/src/cli.ts)\n- [src/mcp-server.ts](https://github.com/Inferensys/contextful/blob/main/src/mcp-server.ts)\n- [src/search.ts](https://github.com/Inferensys/contextful/blob/main/src/search.ts)\n- [src/report.ts](https://github.com/Inferensys/contextful/blob/main/src/report.ts)\n</details>\n\n# Quick Start Guide\n\n## Overview\n\nContextful is a contextual indexing and search system designed to help AI agents efficiently retrieve relevant code evidence. Instead of forcing agents to perform dozens of random file reads, Contextful returns compact, ranked, and cited evidence packs that fit within a token budget.\n\n资料来源：[README.md:1-10]()\n\n## Installation\n\nInstall Contextful using npm. The package provides both the `cxf` binary and the full `contextful` alias.\n\n```bash\nnpm install -g @inferensys/contextful\n```\n\nAlternatively, run commands directly via `npx`:\n\n```bash\nnpx @inferensys/contextful index --workspace .\n```\n\n资料来源：[README.md:11-14]()\n\n## CLI Commands\n\nContextful provides a command-line interface with the following primary commands:\n\n| Command | Description |\n|---------|-------------|\n| `cxf index` | Index a workspace for search |\n| `cxf daemon` | Run a local indexing daemon |\n| `cxf query` | Create an evidence pack for a query |\n| `cxf search` | Search indexed context |\n| `cxf report` | Generate a context report |\n| `cxf memory add` | Store an evidence-backed lesson |\n| `cxf server` | Run the MCP stdio server |\n\n资料来源：[README.md:23-32]()\n\n## Basic Workflow\n\n### Step 1: Index Your Workspace\n\nBefore searching, you must index your codebase. This creates the searchable database:\n\n```bash\ncxf index --workspace .\n```\n\nFor continuous indexing as files change, use the daemon mode:\n\n```bash\ncxf daemon --workspace .\n```\n\n资料来源：[src/cli.ts:1-20]()\n\n### Step 2: Query for Context\n\nOnce indexed, ask questions about your codebase:\n\n```bash\ncxf query \"where is user auth handled\" --workspace . --budget 2000\n```\n\nThe `query` command returns a ranked evidence pack with citations and file references.\n\n#### Query Options\n\n| Option | Description | Default |\n|--------|-------------|---------|\n| `--workspace <path>` | Workspace path | Current directory |\n| `--budget <tokens>` | Approximate token budget | 2000 |\n| `--json` | Output as JSON instead of Markdown | false |\n\n资料来源：[src/cli.ts:22-30]()\n\n### Step 3: Search Without Building Evidence Packs\n\nFor quick lookups without compiling full evidence packs, use `search`:\n\n```bash\ncxf search \"authentication middleware\" --workspace . --limit 10 --kind code\n```\n\n#### Search Options\n\n| Option | Description | Default |\n|--------|-------------|---------|\n| `--workspace <path>` | Workspace path | Current directory |\n| `--limit <count>` | Maximum hits | 10 |\n| `--kind` | Filter: `all`, `code`, `docs`, `symbols`, `memory` | `all` |\n\n资料来源：[src/cli.ts:32-42]()\n\n### Step 4: Generate Reports\n\nGenerate comprehensive context reports in various formats:\n\n```bash\ncxf report --workspace . --format markdown\ncxf report --workspace . --format json\ncxf report --workspace . --format html\n```\n\n资料来源：[src/cli.ts:44-48]()\n\n## MCP Server Integration\n\nContextful can run as a Model Context Protocol (MCP) server, providing tools directly to AI agents.\n\n```bash\ncxf server\n```\n\n### Available MCP Tools\n\n| Tool | Purpose |\n|------|---------|\n| `context_pack` | Returns ranked, cited, token-budgeted evidence bundles |\n| `search_code` | Code, docs, symbol, and memory search |\n| `trace_path` | Graph traversal across files, symbols, modules, and config |\n| `impact_analysis` | Reverse dependencies and likely tests |\n| `why_changed` | Current evidence plus git history |\n| `recall_memory` | Search session learnings and durable project lessons |\n| `write_lesson` | Store evidence-backed lessons for future sessions |\n\n资料来源：[README.md:40-48]()\n\n### MCP Tool Parameters\n\n#### context_pack\n\n| Parameter | Type | Required | Description |\n|-----------|------|----------|-------------|\n| `query` | string | Yes | Query to answer from indexed context |\n| `budget` | number | No | Token budget for the response |\n| `scope` | string | No | Search scope |\n\n资料来源：[src/mcp-server.ts:1-25]()\n\n#### search_code\n\n| Parameter | Type | Required | Description |\n|-----------|------|----------|-------------|\n| `query` | string | Yes | Search query |\n| `mode` | string | No | Search mode |\n| `filters` | object | No | Search filters |\n| `workspace` | string | No | Workspace path |\n| `limit` | number | No | Maximum results |\n\n资料来源：[src/mcp-server.ts:26-40]()\n\n#### write_lesson\n\n| Parameter | Type | Required | Description |\n|-----------|------|----------|-------------|\n| `claim` | string | Yes | Lesson claim |\n| `evidence_refs` | array | Yes | Evidence references (e.g., `file:src/auth.ts:1-20`) |\n| `scope` | string | No | Memory scope |\n| `confidence` | number | No | Confidence from 0 to 1 |\n| `supersedes` | string | No | Previous lesson ID to supersede |\n\n资料来源：[src/mcp-server.ts:65-80]()\n\n## Memory System\n\nContextful includes an evidence-backed memory system for storing lessons across sessions.\n\n### Adding a Lesson\n\n```bash\ncxf memory add \\\n  --claim \"Always validate tokens in middleware\" \\\n  --evidence \"file:src/auth.ts:1-20\" \\\n  --workspace . \\\n  --confidence 0.8\n```\n\n#### Memory Command Options\n\n| Option | Required | Description |\n|--------|----------|-------------|\n| `--claim <text>` | Yes | The lesson or claim |\n| `--evidence <ref...>` | Yes | Evidence references |\n| `--workspace <path>` | No | Workspace path |\n| `--scope <scope>` | No | Memory scope (default: `repo`) |\n| `--confidence <number>` | No | Confidence from 0 to 1 (default: 0.7) |\n\n资料来源：[src/cli.ts:50-75]()\n\n## Output Formats\n\n### Markdown Output (Default)\n\n```bash\ncxf query \"where is auth handled\" --workspace .\n```\n\nReturns a formatted Markdown document with citations and graph paths.\n\n### JSON Output\n\n```bash\ncxf query \"where is auth handled\" --workspace . --json\n```\n\nReturns structured JSON data suitable for programmatic processing.\n\n资料来源：[src/cli.ts:22-30]()\n\n### Report Formats\n\n| Format | Description |\n|--------|-------------|\n| `markdown` | Human-readable Markdown report |\n| `json` | Structured JSON data |\n| `html` | Standalone HTML page |\n\n资料来源：[src/cli.ts:44-48]()\n\n## Architecture Overview\n\n```mermaid\ngraph TD\n    A[CLI / MCP Server] --> B[Workspace Indexer]\n    B --> C[SQLite Kernel DB]\n    C --> D[Full-Text Search]\n    C --> E[Symbol Index]\n    C --> F[Graph Edges]\n    G[Query Request] --> H[Search Context]\n    H --> I[Evidence Pack Builder]\n    I --> D\n    I --> E\n    I --> F\n    I --> J[Memory Ledger]\n    I --> K[Evidence Pack Output]\n    J --> J\n```\n\n## Common Usage Patterns\n\n### Pattern 1: Initial Setup\n\n```bash\n# Index the workspace\ncxf index --workspace /path/to/project --watch\n\n# Generate initial report\ncxf report --workspace /path/to/project --format html > report.html\n```\n\n### Pattern 2: Interactive Exploration\n\n```bash\n# Run as MCP server\ncxf server\n\n# Or use CLI directly\ncxf query \"how does the cache work\" --workspace . --budget 3000\n```\n\n### Pattern 3: Agent Memory Persistence\n\n```bash\n# Store learned lessons\ncxf memory add --claim \"Config validation happens in validate.ts\" --evidence \"file:src/config/validate.ts:1-50\"\n\n# Recall past lessons\n# Via MCP: recall_memory(query=\"config validation\")\n```\n\n## Next Steps\n\n- Explore [Architecture Documentation](architecture) for deep dive into indexing and search internals\n- Learn about [Memory System](memory) for evidence-backed knowledge persistence\n- Review [API Reference](api) for programmatic integration\n\n---\n\n<a id='high-level-architecture'></a>\n\n## High-Level Architecture\n\n### 相关页面\n\n相关主题：[Runtime Components](#runtime-components), [Search Engine](#search-engine), [SQLite Database Schema](#sqlite-database)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [README.md](https://github.com/Inferensys/contextful/blob/main/README.md)\n- [src/cli.ts](https://github.com/Inferensys/contextful/blob/main/src/cli.ts)\n- [src/extract.ts](https://github.com/Inferensys/contextful/blob/main/src/extract.ts)\n- [src/search.ts](https://github.com/Inferensys/contextful/blob/main/src/search.ts)\n- [src/indexer.ts](https://github.com/Inferensys/contextful/blob/main/src/indexer.ts)\n- [src/report.ts](https://github.com/Inferensys/contextful/blob/main/src/report.ts)\n- [src/util.ts](https://github.com/Inferensys/contextful/blob/main/src/util.ts)\n</details>\n\n# High-Level Architecture\n\nContextful is a local-only indexing and context management tool designed to help AI coding assistants retrieve compact, evidence-backed context from workspace codebases. The system operates without external embedding APIs, instead relying on SQLite FTS5 full-text search, graph-based dependency tracking, and intent-classified query routing. 资料来源：[README.md](https://github.com/Inferensys/contextful/blob/main/README.md)\n\n## System Overview\n\nContextful functions as a local daemon that continuously indexes workspace files, extracts code symbols and import relationships, and provides a structured context pack API to agents. The architecture follows a three-layer design:\n\n1. **Indexing Layer** - File parsing, symbol extraction, edge detection\n2. **Storage Layer** - SQLite kernel with FTS5 search and graph tables\n3. **Query Layer** - Intent classification, ranked search, evidence pack assembly\n\n资料来源：[src/indexer.ts](https://github.com/Inferensys/contextful/blob/main/src/indexer.ts)\n\n## Component Architecture\n\n```mermaid\ngraph TD\n    A[Workspace Files] --> B[Indexer]\n    B --> C[Symbol Extraction]\n    B --> D[Edge Detection]\n    B --> E[Chunk Generation]\n    C --> F[SQLite Kernel DB]\n    D --> F\n    E --> F\n    G[CLI / MCP Server] --> H[Search Module]\n    H --> F\n    H --> I[Context Pack Assembly]\n    I --> J[Evidence Pack Output]\n```\n\n### Core Components\n\n| Component | File | Responsibility |\n|-----------|------|----------------|\n| Indexer | `src/indexer.ts` | Recursively walks workspace, triggers file processing |\n| Extractor | `src/extract.ts` | Parses symbols, edges, and code chunks per file |\n| Search | `src/search.ts` | FTS5 queries, intent classification, ranking |\n| CLI | `src/cli.ts` | Command-line interface and MCP server entry point |\n| Report | `src/report.ts` | Generates workspace context reports |\n\n资料来源：[src/indexer.ts](https://github.com/Inferensys/contextful/blob/main/src/indexer.ts), [src/extract.ts](https://github.com/Inferensys/contextful/blob/main/src/extract.ts), [src/search.ts](https://github.com/Inferensys/contextful/blob/main/src/search.ts)\n\n## Indexing Pipeline\n\nThe indexing pipeline processes workspace files through multiple extraction stages. Each source file is read, classified by language, and passed through specialized extractors that produce structured records.\n\n```mermaid\ngraph LR\n    A[File Content] --> B[Language Detection]\n    B --> C[Symbol Extraction]\n    B --> D[Edge Extraction]\n    B --> E[Chunk Extraction]\n    C --> F[symbols table]\n    D --> G[edges table]\n    E --> H[chunks_fts table]\n```\n\n### Symbol Extraction\n\nThe `extractSymbols` function identifies named code entities based on language-specific patterns:\n\n| Language | Supported Symbols |\n|----------|-------------------|\n| TypeScript/JavaScript | functions, classes, interfaces, types, const arrow functions |\n| Python | functions, classes |\n| Go | functions, structs, interfaces |\n| Rust | functions, structs, enums, traits, impl blocks |\n| Markdown | headings |\n| JSON | config keys |\n\n资料来源：[src/extract.ts:1-80](https://github.com/Inferensys/contextful/blob/main/src/extract.ts)\n\n### Edge Detection\n\nImport relationships are tracked as directed edges between modules. The `extractEdges` function processes different import syntaxes per language:\n\n- **TypeScript/JavaScript**: ES6 `import` and `require()` statements\n- **Python**: `from ... import` and `import` statements\n- **Go**: Import strings within double quotes\n- **Rust**: `use` and `mod` declarations\n- **JSON**: Top-level keys in configuration files\n\n资料来源：[src/extract.ts:100-160](https://github.com/Inferensys/contextful/blob/main/src/extract.ts)\n\n### Chunk Generation\n\nCode files are split into semantic chunks for full-text search. The `codeChunks` function segments content into logical blocks based on:\n- Empty line boundaries\n- Token budget (target: ~300 tokens per chunk)\n- Language-specific token estimation via `estimateTokens`\n\n资料来源：[src/extract.ts:180-220](https://github.com/Inferensys/contextful/blob/main/src/extract.ts)\n\n## Storage Layer\n\n### SQLite Kernel Schema\n\nThe kernel database uses SQLite with several specialized tables:\n\n| Table | Purpose | Key Columns |\n|-------|---------|-------------|\n| `files` | Tracked workspace files | `path`, `language`, `hash`, `indexed_at` |\n| `symbols` | Extracted code symbols | `ref`, `name`, `kind`, `file_path`, `line`, `signature`, `exported` |\n| `edges` | Import/dependency graph | `source_file`, `target_name`, `target_type`, `edge_type`, `line` |\n| `chunks_fts` | FTS5 virtual table for full-text search | `ref`, `path`, `title`, `text`, `kind` |\n| `memory` | Evidence-backed lessons | `id`, `claim`, `scope`, `confidence`, `created_at` |\n\n资料来源：[src/search.ts](https://github.com/Inferensys/contextful/blob/main/src/search.ts), [src/indexer.ts](https://github.com/Inferensys/contextful/blob/main/src/indexer.ts)\n\n## Query and Search System\n\n### Intent Classification\n\nQueries are classified into intents to optimize search strategy:\n\n| Intent | Trigger Keywords | Search Focus |\n|--------|------------------|--------------|\n| `code` | `function`, `class`, `implementation` | Symbol and code chunks |\n| `memory` | `memory`, `lesson`, `session` | Memory ledger |\n| `impact` | `impact`, `depends on`, `blast radius` | Dependency graph |\n| `historical` | `why`, `changed`, `commit` | Git history |\n| `architectural` | `architecture`, `flow`, `path`, `trace` | Graph traversal |\n| `docs` | `documentation`, `readme`, `guide` | Markdown chunks |\n| `exact` | symbols, paths, line references | Precise symbol matching |\n| `vague` | Default fallback | Broad FTS search |\n\n资料来源：[src/search.ts:1-50](https://github.com/Inferensys/contextful/blob/main/src/search.ts)\n\n### Context Pack Assembly\n\nThe `createContextPack` function orchestrates the evidence gathering:\n\n1. Classify query intent\n2. Execute FTS5 search across chunks\n3. Apply query expansion with domain-specific term additions\n4. Score and rank hits using BM25 with intent-based bonuses\n5. Select hits within token budget\n6. Load related symbols and graph paths\n7. Assemble and return `EvidencePack`\n\n资料来源：[src/search.ts:200-280](https://github.com/Inferensys/contextful/blob/main/src/search.ts)\n\n## CLI and MCP Integration\n\n### Command Structure\n\n| Command | Purpose | Key Options |\n|---------|---------|-------------|\n| `index` | Initial workspace indexing | `--workspace`, `--watch` |\n| `daemon` | Continuous indexing with file watching | `--workspace` |\n| `query` | Generate evidence pack | `--workspace`, `--budget`, `--json` |\n| `search` | Direct search without packing | `--workspace`, `--limit`, `--kind` |\n| `report` | Generate context report | `--workspace`, `--format` |\n| `memory add` | Store evidence-backed lessons | `--claim`, `--evidence`, `--scope` |\n| `server` | Start MCP stdio server | (none) |\n\n资料来源：[src/cli.ts:20-100](https://github.com/Inferensys/contextful/blob/main/src/cli.ts)\n\n### MCP Server Tools\n\nThe MCP server exposes standardized tools for agent integration:\n\n- `context_pack(query, budget, scope)` - Primary killer tool returning ranked, cited evidence\n- `search_code(query, mode, filters)` - Code, docs, symbol, and memory search\n- `trace_path(from, to, edge_types)` - Graph traversal across the codebase\n- `impact_analysis(symbol_or_file)` - Reverse dependency analysis\n- `why_changed(symbol_or_file)` - Git history with current evidence\n- `recall_memory(query, scope)` - Search persistent lessons\n- `write_lesson(claim, evidence_refs, scope)` - Store new memories\n\n资料来源：[README.md](https://github.com/Inferensys/contextful/blob/main/README.md)\n\n## Report Generation\n\nThe report system aggregates workspace statistics and warnings:\n\n```mermaid\ngraph TD\n    A[generateReport] --> B[Index Status Check]\n    B --> C[File Statistics]\n    B --> D[Symbol Statistics]\n    B --> E[Edge Statistics]\n    B --> F[Warning Collection]\n    C --> G[renderMarkdown / renderHtml]\n    D --> G\n    E --> G\n    F --> G\n```\n\nReports support three output formats:\n- **markdown** - Plain text with markdown headings\n- **json** - Structured JSON with all report fields\n- **html** - Self-contained HTML document with styling\n\n资料来源：[src/report.ts:1-80](https://github.com/Inferensys/contextful/blob/main/src/report.ts)\n\n## Privacy and Security\n\nContextful operates entirely locally with no external API calls:\n\n- No embedding API calls for vector search\n- No source code uploads\n- No file editing or auto-fixes\n- No dependency installation in target workspace\n\nEvidence references are validated and stale references are rejected to maintain integrity of the memory system.\n\n资料来源：[README.md](https://github.com/Inferensys/contextful/blob/main/README.md)\n\n## Data Flow Summary\n\n```mermaid\nsequenceDiagram\n    participant User\n    participant CLI as CLI/MCP Server\n    participant Indexer\n    participant Extractor\n    participant Search\n    participant Kernel as SQLite Kernel\n    \n    User->>CLI: index --workspace .\n    CLI->>Indexer: indexWorkspace()\n    Indexer->>Extractor: extractFile()\n    Extractor->>Kernel: Insert symbols, edges, chunks\n    Kernel-->>Indexer: Confirmation\n    \n    User->>CLI: query \"where is auth handled\"\n    CLI->>Search: searchContext()\n    Search->>Kernel: FTS5 query\n    Search->>Kernel: Graph traversal\n    Search->>Kernel: Memory search\n    Kernel-->>Search: Ranked hits\n    Search-->>CLI: EvidencePack\n    CLI-->>User: Compact context output\n```\n\n## Key Design Decisions\n\n| Decision | Rationale |\n|----------|-----------|\n| SQLite FTS5 over vector embeddings | Local-only operation, no external API dependencies |\n| Intent-based query routing | Optimizes search strategy based on query semantics |\n| BM25 scoring with bonuses | Balances relevance with domain-specific priorities |\n| Token-budgeted evidence packs | Prevents context overflow in LLM contexts |\n| Evidence refs as first-class citizens | Enables verifiable, traceable AI responses |\n\n资料来源：[src/search.ts:50-150](https://github.com/Inferensys/contextful/blob/main/src/search.ts), [src/util.ts](https://github.com/Inferensys/contextful/blob/main/src/util.ts)\n\n---\n\n<a id='runtime-components'></a>\n\n## Runtime Components\n\n### 相关页面\n\n相关主题：[High-Level Architecture](#high-level-architecture)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [src/cli.ts](https://github.com/Inferensys/contextful/blob/main/src/cli.ts)\n- [src/indexer.ts](https://github.com/Inferensys/contextful/blob/main/src/indexer.ts)\n- [src/search.ts](https://github.com/Inferensys/contextful/blob/main/src/search.ts)\n- [src/report.ts](https://github.com/Inferensys/contextful/blob/main/src/report.ts)\n- [src/util.ts](https://github.com/Inferensys/contextful/blob/main/src/util.ts)\n</details>\n\n# Runtime Components\n\n## Overview\n\nThe **Runtime Components** in Contextful encompass the services, daemons, and server processes that enable real-time code indexing, search, and context-aware information retrieval. These components operate as the execution layer of the application, providing persistent indexing, live workspace monitoring, and MCP (Model Context Protocol) server capabilities for AI agent integration.\n\nThe runtime layer bridges the gap between static code analysis and dynamic query resolution, allowing users and AI agents to query indexed repositories with token-budgeted evidence packs.\n\n---\n\n## Core Runtime Services\n\n### Indexing Daemon\n\nThe **Indexing Daemon** provides continuous workspace monitoring and automatic re-indexing when file changes are detected.\n\n#### Architecture\n\n```mermaid\ngraph TD\n    A[File System] -->|fs.watch| B[Debounce Timer]\n    B --> C{500ms elapsed?}\n    C -->|Yes| D[indexWorkspace]\n    D --> E[Kernel DB Update]\n    C -->|No| B\n    A -->|Initial| F[First Index]\n    F --> E\n```\n\n#### Key Functions\n\n| Function | Purpose | Location |\n|----------|---------|----------|\n| `watchWorkspace` | Monitors filesystem changes and triggers re-indexing | `src/indexer.ts:1-15` |\n| `indexWorkspace` | Performs full or incremental workspace indexing | `src/indexer.ts` |\n\n#### Implementation Details\n\nThe daemon uses Node.js `fs.watch()` with a 500ms debounce timer to batch rapid file changes into single indexing operations. This prevents excessive CPU usage during bulk file operations like git checkouts or build processes.\n\n```typescript\n// src/indexer.ts - Watch implementation pattern\nfs.watch(resolved, { recursive: true }, () => {\n  if (timer) clearTimeout(timer);\n  timer = setTimeout(async () => {\n    onIndex(await indexWorkspace({ workspace: resolved }));\n  }, 500);\n});\n```\n\nThe daemon outputs index results as JSON to stdout, making it suitable for IPC communication with parent processes.\n\n---\n\n### MCP Server (stdio Mode)\n\nThe **MCP Server** exposes Contextful's capabilities through the Model Context Protocol standard, enabling integration with AI coding assistants.\n\n#### Supported MCP Tools\n\n| Tool Name | Purpose | Input Parameters |\n|-----------|---------|------------------|\n| `context_pack` | Returns token-budgeted evidence bundle | `query`, `budget`, `scope` |\n| `search_code` | Code, docs, symbol, and memory search | `query`, `mode`, `filters` |\n| `trace_path` | Graph traversal across codebase | `from`, `to`, `edge_types` |\n| `impact_analysis` | Reverse dependency analysis | `symbol_or_file` |\n| `why_changed` | Git history with current evidence | `symbol_or_file` |\n| `recall_memory` | Search project lessons and sessions | `query`, `scope` |\n| `write_lesson` | Store evidence-backed lessons | `claim`, `evidence`, `scope`, `confidence` |\n\n资料来源：[README.md:1-30](https://github.com/Inferensys/contextful/blob/main/README.md)\n\n#### Server Execution\n\n```bash\n# Run as MCP stdio server\nnpx @inferensys/contextful server\n```\n\nThe server operates in stdio mode, accepting JSON-RPC requests and responding with JSON-RPC results through stdin/stdout streams.\n\n---\n\n## CLI Runtime Commands\n\nThe CLI provides multiple entry points for runtime operations.\n\n### Command Reference\n\n| Command | Description | Key Options |\n|---------|-------------|-------------|\n| `cxf daemon` | Run local indexing daemon | `--workspace <path>` |\n| `cxf query` | Create evidence pack for query | `--workspace`, `--budget`, `--json` |\n| `cxf search` | Search without evidence pack | `--workspace`, `--limit`, `--kind` |\n| `cxf report` | Generate context report | `--workspace`, `--format` |\n| `cxf server` | Run MCP stdio server | - |\n| `cxf memory add` | Store evidence-backed lesson | `--claim`, `--evidence`, `--scope`, `--confidence` |\n\n资料来源：[src/cli.ts:1-80](https://github.com/Inferensys/contextful/blob/main/src/cli.ts)\n\n### Daemon Mode\n\n```typescript\n// src/cli.ts - Daemon command registration\nprogram\n  .command(\"daemon\")\n  .description(\"Run the local indexing daemon for a workspace.\")\n  .option(\"--workspace <path>\", \"Workspace path.\", process.cwd())\n  .action(async (options: { workspace: string }) => {\n    await watchWorkspace(options.workspace, (result) => {\n      process.stdout.write(`${JSON.stringify(result, null, 2)}\\n`);\n    });\n  });\n```\n\n### Query Command\n\nThe query command compiles an evidence pack based on a natural language query and token budget:\n\n```typescript\n// src/cli.ts - Query command\nprogram\n  .command(\"query\")\n  .description(\"Create an evidence pack for a query.\")\n  .argument(\"<query>\", \"Query to answer from indexed context.\")\n  .option(\"--workspace <path>\", \"Workspace path.\", process.cwd())\n  .option(\"--budget <tokens>\", \"Approximate token budget.\", parseInteger, 2000)\n  .option(\"--json\", \"Print JSON instead of Markdown.\")\n  .action(async (query: string, options) => {\n    const pack = await createContextPack({ workspace: options.workspace, query, budget: options.budget });\n    process.stdout.write(options.json ? `${JSON.stringify(pack, null, 2)}\\n` : renderEvidencePackMarkdown(pack));\n  });\n```\n\n---\n\n## Evidence Pack System\n\n### Pack Creation Flow\n\n```mermaid\ngraph LR\n    A[Query Input] --> B[classifyQuery]\n    B --> C[searchContext]\n    C --> D{Results Available?}\n    D -->|Yes| E[Select & Rank Hits]\n    D -->|No| F[Expand Search Terms]\n    F --> C\n    E --> G[Load Symbols & Graph]\n    G --> H[Build EvidencePack]\n    H --> I[Save to Kernel DB]\n    I --> J[Return Pack]\n```\n\n### Pack Structure\n\n| Field | Type | Description |\n|-------|------|-------------|\n| `id` | string | Unique pack identifier with `ctx_` prefix |\n| `query` | string | Original query text |\n| `intent` | SearchIntent | Classified query intent |\n| `summary` | string | Natural language summary |\n| `citations` | SearchHit[] | Ranked evidence items |\n| `files` | FileInfo[] | Referenced files with reasons |\n| `symbols` | SymbolRecord[] | Matched symbol definitions |\n| `graphPaths` | GraphPath[] | Module/import relationships |\n| `memoryHits` | SearchHit[] | Recallable memory matches |\n| `confidence` | number | 0.1-0.92 confidence score |\n| `tokenEstimate` | number | Actual token count used |\n| `budget` | number | Maximum token budget |\n| `createdAt` | string | ISO timestamp |\n\n资料来源：[src/search.ts:150-200](https://github.com/Inferensys/contextful/blob/main/src/search.ts)\n\n### Confidence Calculation\n\nThe confidence score is computed using a clamped formula:\n\n```\nconfidence = clamp(0.25 + hits * 0.05 + graphPaths * 0.02 + memoryHits * 0.05, 0.1, 0.92)\n```\n\nThis ensures a minimum confidence of 10% even with poor matches and a maximum of 92% to maintain epistemic humility.\n\n资料来源：[src/search.ts:80-82](https://github.com/Inferensys/contextful/blob/main/src/search.ts)\n\n---\n\n## Workspace Resolution\n\n### Path Resolution Flow\n\n```mermaid\ngraph TD\n    A[CLI Input Path] --> B{Is Absolute?}\n    B -->|No| C[Resolve relative to cwd]\n    B -->|Yes| D[Use as-is]\n    C --> E[validateWorkspace]\n    D --> E\n    E --> F{Valid Directory?}\n    F -->|Yes| G[Load Kernel DB]\n    F -->|No| H[Create New Index]\n```\n\nThe `resolveWorkspace()` utility normalizes all workspace paths, while `ensureIndexed()` guarantees the workspace has been indexed before search operations proceed.\n\n资料来源：[src/util.ts:1-20](https://github.com/Inferensys/contextful/blob/main/src/util.ts)\n\n---\n\n## Report Generation\n\nThe report system generates comprehensive context reports in multiple formats.\n\n### Supported Formats\n\n| Format | Renderer Function |\n|--------|-------------------|\n| `markdown` | `renderMarkdown()` |\n| `json` | `JSON.stringify()` |\n| `html` | `renderHtml()` |\n\n### Report Contents\n\n- **Summary**: Overview of indexed state\n- **Statistics**: Token counts, file counts, index timestamps\n- **Warnings**: Potential issues (up to 20)\n- **Token Savings**: Estimated efficiency metrics\n\n资料来源：[src/report.ts:1-50](https://github.com/Inferensys/contextful/blob/main/src/report.ts)\n\n---\n\n## Error Handling\n\n### Workspace Validation\n\nRuntime components validate workspace paths before operations:\n\n```typescript\n// Validation checks include:\n// 1. Directory exists and is readable\n// 2. Kernel DB can be opened or created\n// 3. Index state is consistent\n```\n\n### Broken JSON Handling\n\nWhen parsing `package.json` during indexing, broken JSON is handled gracefully:\n\n```typescript\n// src/extract.ts - JSON error handling\ntry {\n  const parsed = JSON.parse(content) as Record<string, unknown>;\n  // Process dependencies, devDependencies, scripts\n} catch {\n  // Broken JSON receives text chunks; syntax diagnostics out of scope\n}\n```\n\n---\n\n## Memory and Lessons\n\n### Lesson Storage\n\nLessons are evidence-backed statements stored for recall during future queries:\n\n| Parameter | Type | Default | Description |\n|-----------|------|---------|-------------|\n| `claim` | string | required | The lesson statement |\n| `evidence` | string[] | required | File refs (e.g., `file:src/auth.ts:1-20`) |\n| `scope` | string | \"repo\" | Memory scope (repo, global) |\n| `confidence` | number | 0.7 | Confidence score (0-1) |\n\n资料来源：[src/cli.ts:60-80](https://github.com/Inferensys/contextful/blob/main/src/cli.ts)\n\n### Memory Recall\n\nMemory hits are weighted in evidence pack generation, providing higher confidence when prior lessons match the query context.\n\n---\n\n## See Also\n\n- [CLI Reference](../cli.md) - Complete CLI command documentation\n- [Indexing System](../indexing.md) - Code analysis and symbol extraction\n- [Search API](../search-api.md) - Query classification and ranking\n\n---\n\n<a id='search-engine'></a>\n\n## Search Engine\n\n### 相关页面\n\n相关主题：[Context Packs](#context-packs), [SQLite Database Schema](#sqlite-database)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [src/search.ts](https://github.com/Inferensys/contextful/blob/main/src/search.ts)\n- [src/util.ts](https://github.com/Inferensys/contextful/blob/main/src/util.ts)\n- [src/cli.ts](https://github.com/Inferensys/contextful/blob/main/src/cli.ts)\n- [src/mcp-server.ts](https://github.com/Inferensys/contextful/blob/main/src/mcp-server.ts)\n- [src/extract.ts](https://github.com/Inferensys/contextful/blob/main/src/extract.ts)\n</details>\n\n# Search Engine\n\n## Overview\n\nThe Search Engine is the core retrieval system in Contextful, designed to provide intelligent, evidence-backed context for agent queries. It combines full-text search (FTS), symbol indexing, dependency graph traversal, and memory recall to deliver ranked, cited results within a configurable token budget.\n\nThe system serves as the foundation for multiple interfaces: CLI commands (`query`, `search`), MCP server tools (`search_code`, `context_pack`), and report generation.\n\n资料来源：[src/search.ts:1-50]()\n\n## Architecture\n\n```mermaid\ngraph TD\n    A[Query Input] --> B[Query Classification]\n    B --> C{Intent Type}\n    C -->|code/docs| D[Full-Text Search]\n    C -->|symbols| E[Symbol Lookup]\n    C -->|memory| F[Memory Ledger Search]\n    C -->|impact| G[Graph Traversal]\n    C -->|historical| H[Git History + Search]\n    D --> I[BM25 Ranking]\n    E --> J[Symbol Index]\n    F --> K[Memory DB]\n    G --> L[Edge Database]\n    H --> M[Git Operations]\n    I --> N[Result Scoring]\n    J --> N\n    K --> N\n    L --> N\n    N --> O[Context Pack]\n```\n\n### Core Components\n\n| Component | File | Responsibility |\n|-----------|------|----------------|\n| Search Kernel | `src/search.ts` | Core search logic and ranking |\n| Query Classifier | `src/search.ts` | Intent detection |\n| FTS Engine | `src/search.ts` | Full-text search using SQLite FTS5 |\n| Graph Tracer | `src/search.ts` | Dependency graph traversal |\n| Memory Store | `src/memory.ts` | Evidence-backed memory recall |\n\n资料来源：[src/search.ts:50-120]()\n\n## Query Classification\n\nThe search engine classifies each query into one of seven intent types to optimize retrieval strategy.\n\n### SearchIntent Types\n\n| Intent | Trigger Keywords | Search Strategy |\n|--------|------------------|-----------------|\n| `code` | `code`, `function`, `class`, `impl` | FTS + symbol lookup |\n| `docs` | `resource`, `docs`, `readme`, `how to` | FTS on markdown/json |\n| `symbols` | `define`, `interface`, `type`, `symbol` | Direct symbol index |\n| `memory` | `remember`, `lesson`, `learned`, `session` | Memory ledger query |\n| `impact` | `impact`, `affected`, `depends`, `blast radius` | Reverse dependency graph |\n| `historical` | `why`, `changed`, `commit`, `history` | Git history + current search |\n| `architectural` | `architecture`, `flow`, `trace`, `connects` | Graph path analysis |\n| `exact` | Code patterns, paths, line refs | Direct file/symbol lookup |\n| `vague` | Default | Broad FTS + graph |\n\n```typescript\nfunction classifyQuery(query: string): SearchIntent {\n  const q = query.toLowerCase();\n  if (/\\b(code|function|class|implement|module)\\b/.test(q)) return \"code\";\n  if (/\\b(define|interface|type|symbol)\\b/.test(q)) return \"symbols\";\n  if (/\\b(memory|remember|lesson|learned|sessions?)\\b/.test(q)) return \"memory\";\n  // ... additional classifications\n}\n```\n\n资料来源：[src/search.ts:1-30]()\n\n## Search Flow\n\n### Main Search Pipeline\n\n```mermaid\nsequenceDiagram\n    participant CLI as CLI/MCP\n    participant Search as searchContext()\n    participant Kernel as Kernel DB\n    participant FTS as FTS5 Engine\n    participant Graph as Graph DB\n    participant Memory as Memory Store\n\n    CLI->>Search: query, workspace, limit\n    Search->>Kernel: ensureIndexed()\n    Search->>Kernel: addQuery()\n    Search->>FTS: ftsQuery(expandedTerms)\n    FTS-->>Search: ranked rows (BM25)\n    Search->>Search: scoreFromRank()\n    Search->>Graph: loadGraphPaths()\n    Search-->>CLI: {intent, hits}\n```\n\n### Full-Text Search Query Builder\n\nThe `ftsQuery` function transforms user queries into FTS5-compatible search strings:\n\n```typescript\nfunction ftsQuery(query: string): string {\n  const terms = expandedTerms(query);\n  return Array.from(new Set(terms.map((term) => term.toLowerCase())))\n    .filter((term) => !STOPWORDS.has(term))\n    .slice(0, 14)\n    .map((term) => `${term}*`)\n    .join(\" OR \");\n}\n```\n\nKey behaviors:\n- Expands terms based on query context (e.g., \"tool\" → \"server\", \"tool\", \"callTool\")\n- Filters stopwords: `where`, `what`, `which`, `when`, `how`, `are`, `the`, `for`, `with`, `and`, `or`, `to`\n- Limits to 14 terms maximum\n- Appends wildcard `*` for prefix matching\n\n资料来源：[src/search.ts:200-280]()\n\n## Scoring System\n\n### Rank-to-Score Transformation\n\nThe `scoreFromRank` function converts BM25 ranks into relevance scores (0-10 scale) with domain-specific bonuses:\n\n```typescript\nfunction scoreFromRank(rank: number, query: string, corpus: string): number {\n  const base = 10 / (1 + Math.abs(rank));\n  let bonus = 0;\n  \n  // Domain-specific bonuses\n  if (/\\b(tool|tools|registered|register)\\b/.test(q) && corpus.includes(\"server.tool(\")) {\n    bonus += 9;\n  }\n  if (/\\bmcp\\b/.test(q) && corpus.includes(\"mcp-server\")) {\n    bonus += 4;\n  }\n  \n  return clamp(base + bonus, 0.1, 10);\n}\n```\n\n### Scoring Bonuses Matrix\n\n| Query Pattern | Content Match | Bonus |\n|---------------|---------------|-------|\n| `tool/tools/register` | `server.tool(` | +9 |\n| `mcp` | `mcp-server` | +4 |\n| `where registered` | `function runMcpServer` | +4 |\n| `tool` query | `src/search.ts` | -8 |\n| `memory` query | `src/memory.ts` | +5 |\n| `memory` query | `src/search.ts` | -16 |\n\nThis anti-gaming mechanism penalizes results from the search implementation itself when irrelevant.\n\n资料来源：[src/search.ts:240-320]()\n\n## Term Expansion\n\nThe `expandedTerms` function intelligently expands query terms based on semantic context:\n\n```typescript\nfunction expandedTerms(query: string): string[] {\n  const lower = query.toLowerCase();\n  const additions: string[] = [];\n  \n  if (/\\b(tool|tools|registered|register)\\b/.test(lower)) {\n    additions.push(\"server\", \"tool\", \"tools\", \"callTool\");\n  }\n  if (/\\bmcp\\b/.test(lower)) {\n    additions.push(\"mcp\", \"server\", \"stdio\");\n  }\n  if (/\\bmemory|memories|remember|remembers|lesson|lessons|learned|session|sessions\\b/.test(lower)) {\n    additions.push(\"memory\", \"memories\", \"lesson\", \"lessons\", \"claim\", \"ledger\", \"evidence\");\n  }\n  if (/\\bimpact|depends|dependents|uses\\b/.test(lower)) {\n    additions.push(\"imports\", \"tests\", \"edges\");\n  }\n  \n  return [...terms, ...additions];\n}\n```\n\n资料来源：[src/search.ts:320-380]()\n\n## CLI Commands\n\n### Query Command\n\n```bash\ncxf query \"<query>\" --workspace <path> --budget <tokens> --json\n```\n\n| Option | Type | Default | Description |\n|--------|------|---------|-------------|\n| `query` | string | required | Query to answer from indexed context |\n| `--workspace` | path | `cwd()` | Workspace path |\n| `--budget` | number | 2000 | Approximate token budget |\n| `--json` | flag | false | Output JSON instead of Markdown |\n\n### Search Command\n\n```bash\ncxf search \"<query>\" --workspace <path> --limit <count> --kind <kind>\n```\n\n| Option | Type | Default | Description |\n|--------|------|---------|-------------|\n| `query` | string | required | Search query |\n| `--workspace` | path | `cwd()` | Workspace path |\n| `--limit` | number | 10 | Maximum hits |\n| `--kind` | enum | `all` | Search category: `all\\|code\\|docs\\|symbols\\|memory` |\n\n资料来源：[src/cli.ts:40-80]()\n\n## MCP Server Tools\n\nThe search engine exposes the following MCP tools:\n\n### search_code\n\n```typescript\nserver.tool(\"search_code\", \"Search indexed code, docs, symbols, and stored context\", {\n  query: z.string(),\n  mode: z.enum([\"all\", \"code\", \"docs\", \"symbols\", \"memory\"]).optional(),\n  limit: z.number().optional(),\n  filters: z.record(z.string(), z.unknown()).optional()\n});\n```\n\n### trace_path\n\n```typescript\nserver.tool(\"trace_path\", \"Trace graph relationships between files, symbols, modules\", {\n  from: z.string(),\n  to: z.string().optional(),\n  edge_types: z.array(z.string()).optional(),\n  limit: z.number().optional()\n});\n```\n\n### impact_analysis\n\n```typescript\nserver.tool(\"impact_analysis\", \"Find likely dependents and tests\", {\n  symbol_or_file: z.string(),\n  limit: z.number().optional()\n});\n```\n\n### why_changed\n\n```typescript\nserver.tool(\"why_changed\", \"Explain why a file/symbol may have changed\", {\n  symbol_or_file: z.string(),\n  limit: z.number().optional()\n});\n```\n\n资料来源：[src/mcp-server.ts:1-80]()\n\n## Context Pack\n\nThe `createContextPack` function assembles comprehensive evidence bundles:\n\n```typescript\nexport async function createContextPack(options: {\n  workspace?: string;\n  query: string;\n  budget?: number;\n  scope?: string;\n}): Promise<EvidencePack>\n```\n\n### EvidencePack Structure\n\n| Field | Type | Description |\n|-------|------|-------------|\n| `id` | string | Unique pack identifier (`ctx_<hash>`) |\n| `query` | string | Original query |\n| `scope` | string | Search scope (default: `repo`) |\n| `intent` | SearchIntent | Classified intent |\n| `summary` | string | Human-readable summary |\n| `citations` | SearchHit[] | Ranked search results |\n| `files` | FileContext[] | Grouped file references |\n| `symbols` | SymbolRecord[] | Relevant symbols (≤20) |\n| `graphPaths` | GraphPath[] | Dependency connections (≤20) |\n| `memoryHits` | SearchHit[] | Memory matches |\n| `confidence` | number | Confidence score (0.1-0.92) |\n| `tokenEstimate` | number | Estimated token count |\n| `budget` | number | Token budget used |\n| `createdAt` | string | ISO timestamp |\n\n### Confidence Calculation\n\n```typescript\nfunction confidenceFor(hits: SearchHit[], graphPaths: GraphPath[], memoryHits: SearchHit[]): number {\n  return clamp(\n    0.25 + \n    hits.length * 0.05 + \n    graphPaths.length * 0.02 + \n    memoryHits.length * 0.05,\n    0.1,\n    0.92\n  );\n}\n```\n\n资料来源：[src/search.ts:400-480]()\n\n## Graph Traversal\n\nThe `traceGraph` function performs dependency graph analysis:\n\n```typescript\nexport async function traceGraph(options: {\n  workspace?: string;\n  from: string;\n  to?: string;\n  edgeTypes?: string[];\n  limit?: number;\n}): Promise<GraphPath[]>\n```\n\n### Edge Types\n\n| Edge Type | Direction | Description |\n|-----------|-----------|-------------|\n| `IMPORTS` | File → Module | Import/require statements |\n| `DEFINES` | File → Symbol | Symbol definitions |\n| `CONFIGURES` | File → Config | Configuration keys |\n| `TESTS` | Test → Source | Test file relationships |\n\n### Impact Analysis\n\n```typescript\nexport async function impactAnalysis(options: {\n  workspace?: string;\n  target: string;\n  limit?: number;\n}): Promise<{\n  target: string;\n  forward: string[];\n  reverse: string[];\n  tests: string[];\n}>\n```\n\nReturns forward dependencies, reverse dependents, and likely test files for a given symbol or file.\n\n资料来源：[src/search.ts:480-550]()\n\n## Utility Functions\n\n### lineRange\n\nExtracts a specific line range from text:\n\n```typescript\nexport function lineRange(text: string, startLine: number, endLine: number): string {\n  const lines = text.split(/\\r?\\n/);\n  return lines.slice(Math.max(0, startLine - 1), Math.min(lines.length, endLine)).join(\"\\n\");\n}\n```\n\n### clamp\n\nConstrains values within bounds:\n\n```typescript\nexport function clamp(value: number, min: number, max: number): number {\n  return Math.max(min, Math.min(max, value));\n}\n```\n\n### unique\n\nDeduplicates arrays:\n\n```typescript\nexport function unique<T>(items: T[]): T[] {\n  return Array.from(new Set(items));\n}\n```\n\n### isLikelyBinary\n\nDetects binary files by checking for null bytes:\n\n```typescript\nexport function isLikelyBinary(buffer: Buffer): boolean {\n  const sample = buffer.subarray(0, Math.min(buffer.length, 4096));\n  return sample.includes(0);\n}\n```\n\n资料来源：[src/util.ts:1-50]()\n\n## Data Models\n\n### SearchHit\n\n```typescript\ninterface SearchHit {\n  ref: string;        // Format: \"file:path:start-end\"\n  path: string;       // File path\n  kind: string;       // \"chunk\", \"symbol\", \"memory\", \"doc\"\n  title: string;      // Display title\n  text: string;       // Content snippet\n  score: number;      // Relevance score\n  line?: number;      // Starting line number\n}\n```\n\n### SymbolRecord\n\n```typescript\ninterface SymbolRecord {\n  ref: string;\n  name: string;\n  kind: string;       // \"function\", \"class\", \"interface\", \"type\", etc.\n  filePath: string;\n  line: number;\n  signature?: string;\n  exported?: boolean;\n}\n```\n\n资料来源：[src/search.ts:100-150]()\n\n## Index Status\n\nThe `getIndexStatus` function returns workspace indexing metadata:\n\n```typescript\nexport async function getIndexStatus(options: { workspace?: string }): Promise<IndexStatus>\n```\n\n### IndexStatus Structure\n\n| Field | Type | Description |\n|-------|------|-------------|\n| `workspace` | string | Workspace path |\n| `languageCounts` | Record<string, number> | File count per language |\n| `warnings` | string[] | Index warnings |\n| `lastIndexed` | string | ISO timestamp of last index |\n| `totalChunks` | number | Total indexed chunks |\n\n资料来源：[src/search.ts:550-600]()\n\n## Summary\n\nThe Search Engine provides Contextful's intelligent retrieval capabilities through:\n\n1. **Intent Classification** - Automatically routes queries to optimal search strategies\n2. **Full-Text Search** - SQLite FTS5 with BM25 ranking and domain-specific scoring\n3. **Symbol Index** - Fast lookup of code definitions across languages\n4. **Graph Traversal** - Dependency analysis and impact tracking\n5. **Memory Integration** - Recall of past lessons and evidence-backed claims\n6. **Token Budgeting** - Constrains output to specified budget limits\n7. **Confidence Scoring** - Quantifies result reliability\n\nAll search operations flow through a unified kernel database that combines FTS chunks, symbol records, and edge relationships for comprehensive context retrieval.\n\n---\n\n<a id='context-packs'></a>\n\n## Context Packs\n\n### 相关页面\n\n相关主题：[Search Engine](#search-engine), [Memory Ledger](#memory-ledger)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [src/search.ts](https://github.com/Inferensys/contextful/blob/main/src/search.ts)\n- [src/types.ts](https://github.com/Inferensys/contextful/blob/main/src/types.ts)\n- [src/extract.ts](https://github.com/Inferensys/contextful/blob/main/src/extract.ts)\n- [src/cli.ts](https://github.com/Inferensys/contextful/blob/main/src/cli.ts)\n- [src/report.ts](https://github.com/Inferensys/contextful/blob/main/src/report.ts)\n</details>\n\n# Context Packs\n\nContext Packs are the core output format of Contextful, providing AI agents with compact, ranked, and cited evidence bundles that fit within a specified token budget. Instead of forcing agents to read dozens of arbitrary files, Context Packs deliver precisely the evidence needed to answer a specific query.\n\n## Overview\n\nA Context Pack is a structured evidence package generated by the `context_pack()` MCP tool or the `cxf query` CLI command. It contains:\n\n- Ranked code and documentation citations matching the query\n- Related symbols (functions, classes, interfaces) from matching files\n- Graph paths connecting related components\n- Memory hits from evidence-backed lessons\n- A confidence score and token budget accounting\n\nThe pack is designed to be consumed directly by an LLM agent, providing traceable citations and a clear summary of what evidence was found.\n\n## Data Model\n\n### EvidencePack Structure\n\n| Field | Type | Description |\n|-------|------|-------------|\n| `id` | `string` | Unique identifier (format: `ctx_<hash>`) |\n| `query` | `string` | The original search query |\n| `scope` | `string` | Search scope (e.g., \"repo\") |\n| `intent` | `SearchIntent` | Classified query intent |\n| `summary` | `string` | Human-readable summary of findings |\n| `citations` | `SearchHit[]` | Ranked evidence items |\n| `files` | `FileContext[]` | Grouped file references with reasons |\n| `symbols` | `SymbolRecord[]` | Relevant symbols from matched files |\n| `graphPaths` | `GraphPath[]` | Graph traversals between components |\n| `memoryHits` | `SearchHit[]` | Memory/lesson hits |\n| `confidence` | `number` | Estimated confidence (0.1-0.92) |\n| `tokenEstimate` | `number` | Estimated token count of pack |\n| `budget` | `number` | Requested token budget |\n| `createdAt` | `string` | ISO timestamp of creation |\n\n资料来源：[src/search.ts:search.ts]()\n\n### SearchHit Structure\n\n| Field | Type | Description |\n|-------|------|-------------|\n| `ref` | `string` | Reference identifier (e.g., `file:src/auth.ts:1-20`) |\n| `path` | `string` | File path |\n| `title` | `string` | Display title |\n| `kind` | `string` | Hit kind: code, doc, symbol, memory |\n| `excerpt` | `string` | Relevant text excerpt |\n| `score` | `number` | Relevance score |\n| `rank` | `number` | BM25 rank |\n\n### SearchIntent Enum\n\n| Intent | Trigger Keywords |\n|--------|-----------------|\n| `exact` | Code patterns, paths, symbol names with special chars |\n| `symbol` | Function names, class names, method calls |\n| `test` | test, spec, mock, fixture, unit |\n| `memory` | memory, lesson, learned, session |\n| `impact` | impact, affected, depends, blast radius |\n| `historical` | why, changed, commit, history, regression |\n| `architectural` | architecture, flow, trace, connects, imports |\n| `docs` | resource, docs, documentation, guide, readme |\n| `vague` | Default for generic queries |\n\n资料来源：[src/search.ts:search.ts]()\n\n## Creation Flow\n\nThe `createContextPack` function orchestrates the entire pack creation process:\n\n```mermaid\ngraph TD\n    A[createContextPack] --> B[searchContext]\n    B --> C[classifyQuery]\n    C --> D[ftsQuery + expandedTerms]\n    D --> E[FTS Search on chunks_fts]\n    E --> F[scoreFromRank]\n    F --> G[Select Hits within Budget]\n    G --> H[loadSymbolsForPaths]\n    G --> I[loadGraphPaths]\n    G --> J[Filter memoryHits]\n    H --> K[Build EvidencePack]\n    I --> K\n    J --> K\n    K --> L[saveEvidencePack]\n    L --> M[Return EvidencePack]\n```\n\n### Step 1: Search Context\n\nThe process begins by classifying the query intent and executing full-text search:\n\n```typescript\nconst search = await searchContext({ workspace, query, limit: budget * 2 });\nconst selected = selectWithinBudget(search.hits, budget);\n```\n\n资料来源：[src/search.ts:search.ts]()\n\n### Step 2: Budget-Aware Selection\n\nHits are selected greedily until the token estimate exceeds the budget:\n\n```typescript\nfunction selectWithinBudget(hits: SearchHit[], budget: number): SearchHit[] {\n  const selected: SearchHit[] = [];\n  let tokenEstimate = 0;\n  for (const hit of hits) {\n    const est = estimateTokens(hit.excerpt || hit.title);\n    if (tokenEstimate + est >= budget) break;\n    selected.push(hit);\n    tokenEstimate += est;\n  }\n  return selected;\n}\n```\n\n资料来源：[src/search.ts:search.ts]()\n\n### Step 3: Symbol Loading\n\nFor each selected file, related symbols are loaded (up to 20 total):\n\n```typescript\nconst symbols = loadSymbolsForPaths(kernel.db, paths).slice(0, 20);\n```\n\nThe symbols query joins against the `symbols` table:\n\n```typescript\nSELECT ref, name, kind, file_path, line, signature, exported \nFROM symbols \nWHERE file_path IN (...)\n```\n\n资料来源：[src/search.ts:search.ts]()\n\n### Step 4: Graph Path Loading\n\nGraph paths connect files through import/dependency relationships:\n\n```typescript\nconst graphPaths = loadGraphPaths(kernel.db, paths, 20);\n```\n\n资料来源：[src/search.ts:search.ts]()\n\n### Step 5: Memory Hit Extraction\n\nMemory hits are filtered from selected hits by kind:\n\n```typescript\nconst memoryHits = selected.filter((hit) => hit.kind === \"memory\");\n```\n\n### Step 6: Confidence Calculation\n\nConfidence is calculated using a clamped formula:\n\n```typescript\nfunction confidenceFor(hits, graphPaths, memoryHits): number {\n  return clamp(\n    0.25 + hits.length * 0.05 + graphPaths.length * 0.02 + memoryHits.length * 0.05,\n    0.1,\n    0.92\n  );\n}\n```\n\n- Base: 0.25\n- Each hit: +0.05\n- Each graph path: +0.02\n- Each memory hit: +0.05\n- Clamped to [0.1, 0.92]\n\n资料来源：[src/search.ts:search.ts]()\n\n## Query Classification\n\nThe `classifyQuery` function determines the search intent based on keywords:\n\n```typescript\nfunction classifyQuery(q: string): SearchIntent {\n  const lower = q.toLowerCase();\n  if (/[`\"'#.:/]/.test(q) || /\\b[A-Z][A-Za-z0-9_]{2,}\\b/.test(q)) return \"exact\";\n  if (/\\b(test|spec|mock|fixture)\\b/.test(q)) return \"test\";\n  if (/\\b(memory|lesson|learned|session|sessions)\\b/.test(q)) return \"memory\";\n  if (/\\b(impact|affected|depends|dependents|blast radius)\\b/.test(q)) return \"impact\";\n  if (/\\b(why|changed|commit|history|regression|introduced)\\b/.test(q)) return \"historical\";\n  if (/\\b(architecture|flow|path|trace|connects|calls|imports)\\b/.test(q)) return \"architectural\";\n  if (/\\b(resource|docs|documentation|guide|readme|how to|setup)\\b/.test(q)) return \"docs\";\n  return \"vague\";\n}\n```\n\n资料来源：[src/search.ts:search.ts]()\n\n## Term Expansion\n\nThe `expandedTerms` function adds related terms to improve recall for specific domains:\n\n```typescript\nfunction expandedTerms(query: string): string[] {\n  const additions: string[] = [];\n  if (/\\b(tool|tools|registered|register)\\b/.test(lower)) {\n    additions.push(\"server\", \"tool\", \"tools\", \"callTool\");\n  }\n  if (/\\bmcp\\b/.test(lower)) {\n    additions.push(\"mcp\", \"server\", \"stdio\");\n  }\n  if (/\\bmemory|memories|remember|remembers|lesson|lessons\\b/.test(lower)) {\n    additions.push(\"memory\", \"memories\", \"lesson\", \"lessons\", \"claim\", \"ledger\", \"evidence\");\n  }\n  if (/\\bimpact|depends|dependents|uses\\b/.test(lower)) {\n    additions.push(\"imports\", \"tests\", \"edges\");\n  }\n  return [...terms, ...additions];\n}\n```\n\n资料来源：[src/search.ts:search.ts]()\n\n## Scoring Algorithm\n\nThe `scoreFromRank` function calculates relevance scores:\n\n```typescript\nfunction scoreFromRank(rank: number, q: string): number {\n  let bonus = 0;\n  const lower = q.toLowerCase();\n  \n  if (/\\bmemory|memories|remember|remembers|lesson|lessons|sessions\\b/.test(q)) {\n    if (lower.includes(\"memory ledger\")) bonus += 7;\n    if (lower.includes(\"src/memory.ts\")) bonus += 5;\n    if (lower.includes(\"readme.md\")) bonus += 4;\n    if (lower.includes(\"src/search.ts\")) bonus -= 16;\n  }\n  if (/\\b(where|how)\\b/.test(q) && lower.includes(\"config-key\")) bonus -= 2;\n  \n  return 10 / (1 + Math.abs(rank)) + bonus;\n}\n```\n\n资料来源：[src/search.ts:search.ts]()\n\n## CLI Usage\n\nThe `query` command creates Context Packs via CLI:\n\n```bash\ncxf query \"<query>\" --workspace <path> --budget 2000 --json\n```\n\n### Options\n\n| Option | Type | Default | Description |\n|--------|------|---------|-------------|\n| `--workspace` | `path` | `cwd` | Workspace path |\n| `--budget` | `number` | `2000` | Approximate token budget |\n| `--json` | `flag` | `false` | Output as JSON instead of Markdown |\n\n### Example Output\n\n```\n# Context Pack ctx_abc123\n\nQuery: where is user auth handled\nIntent: architectural\nConfidence: 65%\nToken estimate: 1850/2000\n\nFound 5 evidence items for a architectural query, with 2 graph connections and 1 memory hit.\n\n## Citations\n- file:src/auth.ts:1-50 (auth module)\n  Handles user authentication via JWT tokens...\n- file:src/middleware/auth.ts:1-30 (auth middleware)\n  Express middleware for auth validation...\n\n## Graph Paths\n- src/auth.ts --IMPORTS--> src/utils/jwt.ts (src/auth.ts:5)\n- src/middleware/auth.ts --IMPORTS--> src/auth.ts (src/middleware/auth.ts:3)\n\n## Memory Hits\n- memory:lesson:1: JWT tokens should be validated on every protected route.\n```\n\n资料来源：[src/cli.ts:cli.ts]()\n\n## Rendering\n\nContext Packs can be rendered in multiple formats via `renderEvidencePackMarkdown`:\n\n```typescript\nexport function renderEvidencePackMarkdown(pack: EvidencePack): string {\n  const lines = [\n    `# Context Pack ${pack.id}`,\n    \"\",\n    `Query: ${pack.query}`,\n    `Intent: ${pack.intent}`,\n    `Confidence: ${Math.round(pack.confidence * 100)}%`,\n    `Token estimate: ${pack.tokenEstimate}/${pack.budget}`,\n    \"\",\n    pack.summary,\n    \"\",\n    \"## Citations\"\n  ];\n  // ... citations, graph paths, memory hits\n}\n```\n\n资料来源：[src/report.ts:report.ts]()\n\n## Chunk Extraction\n\nContextual chunks are extracted during indexing for searchability:\n\n```mermaid\ngraph LR\n    A[Source File] --> B[Language Detection]\n    B --> C[extractSymbols]\n    B --> D[extractEdges]\n    B --> E[extractChunks]\n    C --> F[Symbol Table]\n    D --> G[Edge Table]\n    E --> H[Chunk Table]\n```\n\n### Supported Languages\n\n| Language | Symbol Patterns |\n|----------|-----------------|\n| TypeScript/JavaScript | function, class, interface, type, const arrow |\n| Python | def, class |\n| Go | func, type struct/interface |\n| Rust | fn, struct, enum, trait, impl |\n| Markdown | headings (H1-H6) |\n| JSON | top-level keys |\n\n资料来源：[src/extract.ts:extract.ts]()\n\n### Chunking Strategy\n\n- **Code files**: Divided into blocks of ~60 lines, with overlap for context\n- **Markdown files**: Split by headings, with the heading as the chunk title\n- **Token estimation**: Used for both selection and budget accounting\n\n```typescript\nfunction codeChunks(relativePath: string, content: string): ChunkRecord[] {\n  const lines = content.split(/\\r?\\n/);\n  const chunks: ChunkRecord[] = [];\n  // Split into ~60-line blocks with overlap\n  for (let start = 1; start <= lines.length; start += 50) {\n    const end = Math.min(start + 60 - 1, lines.length);\n    const text = lineRange(content, start, end);\n    chunks.push({\n      ref: fileRef(relativePath, start, end),\n      filePath: relativePath,\n      startLine: start,\n      endLine: end,\n      kind: \"file\",\n      title: `${relativePath}:${start}-${end}`,\n      text,\n      tokenEstimate: estimateTokens(text)\n    });\n  }\n  return chunks;\n}\n```\n\n资料来源：[src/extract.ts:extract.ts]()\n\n## Summary Generation\n\nThe `summarizePack` function generates human-readable summaries:\n\n```typescript\nfunction summarizePack(\n  query: string,\n  intent: SearchIntent,\n  hits: SearchHit[],\n  graphPaths: GraphPath[],\n  memoryHits: SearchHit[]\n): string {\n  if (hits.length === 0) {\n    return `No indexed evidence matched \"${query}\". Re-index or broaden the query.`;\n  }\n  return `Found ${hits.length} evidence item${hits.length === 1 ? \"\" : \"s\"} ` +\n    `for a ${intent} query, with ${graphPaths.length} graph connection${graphPaths.length === 1 ? \"\" : \"s\"} ` +\n    `and ${memoryHits.length} memory hit${memoryHits.length === 1 ? \"\" : \"s\"}.`;\n}\n```\n\n资料来源：[src/search.ts:search.ts]()\n\n## Persistence\n\nEvidence packs are saved to the kernel database for audit and retrieval:\n\n```typescript\nsaveEvidencePack(kernel.db, { \n  id: pack.id, \n  query: pack.query, \n  tokenEstimate, \n  json: JSON.stringify(pack) \n});\n```\n\n资料来源：[src/search.ts:search.ts]()\n\n## Design Principles\n\n1. **Token budget awareness**: Never exceed the requested budget; select the most relevant items first\n2. **Cited evidence**: Every piece of information is traceable to a specific file and line range\n3. **Intent-driven**: Query classification shapes what gets searched and how results are interpreted\n4. **Graph connectivity**: Beyond matching files, show how they connect through imports and dependencies\n5. **Memory integration**: Blend indexed content with evidence-backed lessons from prior sessions\n\n---\n\n<a id='memory-ledger'></a>\n\n## Memory Ledger\n\n### 相关页面\n\n相关主题：[Context Packs](#context-packs), [Search Engine](#search-engine)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [src/cli.ts](https://github.com/Inferensys/contextful/blob/main/src/cli.ts)\n- [src/search.ts](https://github.com/Inferensys/contextful/blob/main/src/search.ts)\n- [src/mcp-server.ts](https://github.com/Inferensys/contextful/blob/main/src/mcp-server.ts)\n- [src/extract.ts](https://github.com/Inferensys/contextful/blob/main/src/extract.ts)\n- [src/report.ts](https://github.com/Inferensys/contextful/blob/main/src/report.ts)\n- [src/util.ts](https://github.com/Inferensys/contextful/blob/main/src/util.ts)\n- [README.md](https://github.com/Inferensys/contextful/blob/main/README.md)\n</details>\n\n# Memory Ledger\n\nThe Memory Ledger is Contextful's evidence-backed persistent memory system that enables AI agents to retain and recall learned lessons across sessions. Unlike ephemeral context that disappears when a session ends, the Memory Ledger stores structured knowledge annotated with source evidence, allowing agents to build cumulative understanding of a codebase over time.\n\n## Overview\n\nThe Memory Ledger solves a fundamental problem in AI-assisted development: knowledge gained during one session is lost in the next. When an agent discovers how authentication works, identifies a fragile dependency, or learns a non-obvious architectural pattern, that knowledge typically vanishes when the session ends.\n\nContextful's approach requires every stored memory to be anchored to concrete evidence—file references, code symbols, or prior context packs. This design prevents hallucinated or unsubstantiated memories from polluting the knowledge base and ensures that recalled lessons can be traced back to their source.\n\nThe system operates entirely locally with no external API calls, embedding services, or cloud dependencies. All memory data remains within the workspace's SQLite database.\n\n## Architecture\n\n```mermaid\ngraph TD\n    A[Agent Session] -->|write_lesson| B[Memory Ledger]\n    A -->|recall_memory| C[Memory Search]\n    B -->|evidence refs| D[Evidence Pack]\n    C -->|cited memories| A\n    D -->|citations| E[Source Files]\n    F[Workspace DB] -->|stores| B\n    F -->|stores| C\n```\n\n### Core Components\n\n| Component | Role | Source |\n|-----------|------|--------|\n| Memory Storage | SQLite-backed persistent storage for lessons | `src/db.ts` |\n| Memory Search | FTS-enabled retrieval of memories by query | `src/search.ts` |\n| Evidence Validation | Ensures evidence refs are valid before storage | `src/mcp-server.ts` |\n| Confidence Scoring | Assigns credibility scores to stored memories | `src/cli.ts:85` |\n\n## Data Model\n\n### Memory Record Structure\n\nEach memory in the ledger contains the following fields:\n\n| Field | Type | Description |\n|-------|------|-------------|\n| `id` | string | Unique identifier (prefixed with `memory:`) |\n| `claim` | string | The substantive lesson or observation |\n| `scope` | string | Granularity level: `repo`, `file`, `symbol`, or `session` |\n| `evidenceRefs` | string[] | Validated references to source evidence |\n| `confidence` | number | Credibility score from 0.0 to 1.0 |\n| `status` | string | Current state: `active`, `superseded`, or `stale` |\n| `supersedes` | string? | ID of the memory this replaces (if any) |\n\n### Evidence Reference Formats\n\nValid evidence references that can be attached to memories:\n\n| Format | Example | Purpose |\n|--------|---------|---------|\n| File range | `file:src/auth.ts:10-40` | Reference specific lines in a file |\n| Symbol | `symbol:src/auth.ts#AuthService:12` | Point to a specific code symbol |\n| Context pack | `pack:ctx_abc123` | Reference a prior evidence pack |\n\n资料来源：[README.md:54-56]()\n\nEvidence references must come from search results or context packs—arbitrary references are rejected. This prevents storing claims without verifiable backing.\n\n## Memory Scopes\n\nThe scope field determines the durability and applicability of a memory:\n\n| Scope | Description | Persistence |\n|-------|-------------|-------------|\n| `repo` | Project-wide lessons applicable across sessions | Permanent |\n| `file` | File-specific knowledge | Permanent |\n| `symbol` | Symbol-level lessons | Permanent |\n| `session` | Ephemeral session-scoped learnings | Lost on session end |\n\nThe default scope is `repo`, reflecting the assumption that most valuable memories have project-wide relevance.\n\n资料来源：[src/cli.ts:73]()\n\n## Writing Memories\n\n### CLI Usage\n\n```bash\ncxf memory add \\\n  --claim \"AuthService.validateToken() throws on expired tokens without catching\" \\\n  --evidence \"file:src/auth.ts:45-67\" \\\n  --evidence \"file:src/api/middleware.ts:12-20\" \\\n  --confidence 0.85 \\\n  --scope repo\n```\n\n### MCP Tool Usage\n\n```typescript\nawait server.callTool(\"write_lesson\", {\n  claim: \"The payment module requires initialization before use\",\n  evidence_refs: [\"file:src/payment/core.ts:10-30\", \"symbol:src/payment/core.ts#initialize:15\"],\n  scope: \"repo\",\n  confidence: 0.9\n});\n```\n\n资料来源：[src/mcp-server.ts:79-94]()\n\n### Validation Rules\n\nMemories are subject to strict validation:\n\n1. **Evidence required**: At least one valid evidence reference must be provided\n2. **Evidence must be fresh**: References must originate from search results or context packs\n3. **Claim must be substantive**: Empty or trivial claims are rejected\n4. **Confidence in valid range**: Must be between 0.0 and 1.0\n\n## Searching Memories\n\n### Intent Classification\n\nContextful automatically classifies queries to determine when to search memories. The query classifier recognizes memory-related intents through keyword detection:\n\n```typescript\nconst memoryPattern = /\\bmemory|memories|remember|remembers|lesson|lessons|learned|session|sessions\\b/;\n```\n\nWhen matched, the classifier returns `intent: \"memory\"` and the search system automatically queries the memories FTS index.\n\n资料来源：[src/search.ts:14-17]()\n\n### Query Expansion\n\nMemory searches benefit from automatic term expansion. When a query mentions relevant concepts, additional search terms are added:\n\n```typescript\nif (/\\bmemory|memories|remember|remembers|lesson|lessons|learned|session|sessions\\b/.test(lower)) {\n  additions.push(\"memory\", \"memories\", \"lesson\", \"lessons\", \"claim\", \"ledger\", \"evidence\");\n}\n```\n\nThis ensures that queries like \"what did we learn about auth\" retrieve memory results even if those exact words don't appear in the stored claims.\n\n资料来源：[src/search.ts:28-30]()\n\n### Search Results\n\nMemory hits in search results include:\n\n| Field | Description |\n|-------|-------------|\n| `ref` | Memory reference in format `memory:<id>` |\n| `kind` | Always `\"memory\"` for memory hits |\n| `title` | Display title including scope |\n| `excerpt` | Redacted claim text (secrets removed) |\n| `evidence` | Original evidence references |\n| `status` | Current memory status |\n| `score` | Relevance score |\n\n## Memory Lifecycle\n\n```mermaid\nstateDiagram-v2\n    [*] --> Active: write_lesson\n    Active --> Superseded: write_lesson with supersedes\n    Active --> Stale: Evidence becomes invalid\n    Superseded --> [*]\n    Stale --> [*]\n    Active --> [*]: Deleted\n```\n\n### Status Transitions\n\n**Active** → Default state for newly written memories. Active memories are returned in search results and can supersede other memories.\n\n**Superseded** → When a newer, more accurate memory replaces an older one, the superseded memory retains its ID and evidence but is excluded from search results. The `supersedes` field links to the replaced memory.\n\n**Stale** → Memories become stale when their evidence references point to files or symbols that have changed significantly since the memory was written. The reporting system tracks stale memories for review.\n\n资料来源：[src/report.ts:54-58]()\n\n## Integration with Context Packs\n\nThe Memory Ledger integrates with Contextful's evidence pack system:\n\n1. **Before writing**: Search context or create a context pack to get evidence references\n2. **Writing lessons**: Use those evidence refs to anchor the memory claim\n3. **Recalling**: Later sessions query the ledger, retrieving cited memories\n\n```typescript\n// During a session: create pack, identify lessons\nconst pack = await createContextPack({ query: \"how is auth handled\", budget: 2000 });\n\n// Later session: recall what was learned\nconst result = await recallMemory({ query: \"auth patterns\", scope: \"repo\" });\n```\n\nThis bidirectional relationship means memories enhance future context packs, and context packs provide evidence for future memories.\n\n## Reporting\n\nThe `report` command includes memory statistics:\n\n```bash\ncxf report --workspace . --format markdown\n```\n\nOutput includes a \"Stale Memories\" section listing memories whose evidence references may no longer be valid:\n\n```\n## Stale Memories\n- memory_abc123: AuthService.validateToken() behavior changed in v2\n- memory_def456: payment module initialization order is now reversed\n```\n\n资料来源：[src/report.ts:54-58]()\n\n## Configuration Options\n\n| Option | CLI Flag | Default | Description |\n|--------|----------|---------|-------------|\n| Workspace | `--workspace` | `process.cwd()` | Path to workspace with memory database |\n| Claim | `--claim` | required | The memory content |\n| Evidence | `--evidence` | required | One or more evidence refs |\n| Scope | `--scope` | `repo` | Memory scope level |\n| Confidence | `--confidence` | `0.7` | Credibility score |\n\n## Privacy Considerations\n\nThe Memory Ledger is designed with privacy as a core principle:\n\n- **Local only**: No data leaves the workspace\n- **No cloud sync**: Memories remain on the local machine\n- **Evidence-linked**: Claims cannot be stored without verifiable source\n- **Content redaction**: Secrets are automatically redacted from stored claims using pattern matching for emails, API keys, and tokens\n\n资料来源：[src/util.ts:12-18]()\n\n## Related MCP Tools\n\n| Tool | Purpose |\n|------|---------|\n| `recall_memory` | Search the memory ledger |\n| `write_lesson` | Store a new evidence-backed memory |\n| `context_pack` | Generate evidence packs that can feed into memories |\n\n资料来源：[README.md:35-40]()\n\n---\n\n<a id='graph-traversal'></a>\n\n## Graph Traversal and Analysis\n\n### 相关页面\n\n相关主题：[Search Engine](#search-engine), [SQLite Database Schema](#sqlite-database)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [src/search.ts](https://github.com/Inferensys/contextful/blob/main/src/search.ts)\n- [src/types.ts](https://github.com/Inferensys/contextful/blob/main/src/types.ts)\n- [src/extract.ts](https://github.com/Inferensys/contextful/blob/main/src/extract.ts)\n- [src/cli.ts](https://github.com/Inferensys/contextful/blob/main/src/cli.ts)\n- [src/mcp-server.ts](https://github.com/Inferensys/contextful/blob/main/src/mcp-server.ts)\n</details>\n\n# Graph Traversal and Analysis\n\nGraph Traversal and Analysis is a core feature of Contextful that builds and queries a dependency graph from source code. This system tracks relationships between files, symbols, modules, and configuration nodes, enabling sophisticated impact analysis, change tracing, and dependency exploration.\n\n## Overview\n\nContextful extracts code relationships during indexing and stores them in a SQLite database as a traversable graph. This enables agents to answer questions like:\n\n- \"What depends on this module?\"\n- \"What tests cover this file?\"\n- \"How does this symbol connect to other parts of the codebase?\"\n\n资料来源：[src/extract.ts:68-95]()\n\n## Architecture\n\n```mermaid\ngraph TD\n    A[Source Files] --> B[extractEdges]\n    B --> C[GraphEdge Records]\n    C --> D[SQLite Kernel DB]\n    E[CLI/MCP Query] --> F[searchContext]\n    F --> G[traceGraph]\n    G --> H[GraphPath Results]\n    F --> I[impactAnalysis]\n    I --> J[Impact Results]\n    F --> K[whyChanged]\n    K --> L[Git History + Evidence]\n```\n\n### Data Flow\n\n1. **Extraction Phase**: During workspace indexing, `extractEdges()` parses source files to identify relationships 资料来源：[src/extract.ts:52-95]()\n2. **Storage Phase**: Edge data is stored in the `edges` table within the kernel SQLite database 资料来源：[src/search.ts:1-30]()\n3. **Query Phase**: CLI commands and MCP tools query the graph using traversal algorithms 资料来源：[src/search.ts:180-220]()\n\n## Graph Data Model\n\n### Core Types\n\n```typescript\ninterface GraphEdge {\n  sourceType: \"file\" | \"symbol\";\n  sourceName: string;\n  targetType: \"file\" | \"symbol\" | \"module\" | \"config\";\n  targetName: string;\n  edgeType: EdgeType;\n  filePath: string;\n  line: number;\n}\n\ninterface GraphPath {\n  edges: Array<{\n    sourceName: string;\n    sourceType: string;\n    edgeType: string;\n    targetName: string;\n    targetType: string;\n  }>;\n  totalHops: number;\n}\n\ninterface GraphNode {\n  name: string;\n  type: \"file\" | \"symbol\" | \"module\" | \"config\";\n  path?: string;\n  kind?: string;\n}\n```\n\n资料来源：[src/types.ts:45-70]()\n\n### Edge Types\n\n| Edge Type | Description | Source Detection |\n|-----------|-------------|------------------|\n| `DEFINES` | File defines a symbol | Function/class declarations |\n| `IMPORTS` | File imports a module | `import`, `require`, `from` statements |\n| `CONFIGURES` | File/config references a key | JSON keys, package.json fields |\n| `TESTS` | Test file tests imports | Auto-generated for test files |\n\n资料来源：[src/extract.ts:75-100]()\n\n### Language-Specific Detection\n\nThe extraction layer supports multiple languages:\n\n| Language | Import Patterns | Symbol Patterns |\n|----------|-----------------|-----------------|\n| TypeScript/JavaScript | `from \"module\"`, `require(\"module\")` | `export function/class/interface` |\n| Python | `from module import` | `def`, `class` |\n| Go | `\"package\"` | `func`, `type struct/interface` |\n| Rust | `use module;`, `mod name;` | `fn`, `struct`, `enum`, `trait` |\n\n资料来源：[src/extract.ts:70-95]()\n\n## Graph Traversal API\n\n### traceGraph\n\nPerforms graph traversal starting from a source node, optionally filtering by edge types and limiting results.\n\n```typescript\nexport async function traceGraph(options: {\n  workspace?: string;\n  from: string;\n  to?: string;\n  edgeTypes?: string[];\n  limit?: number;\n}): Promise<GraphPath[]>\n```\n\n#### Parameters\n\n| Parameter | Type | Required | Description |\n|-----------|------|----------|-------------|\n| `workspace` | `string` | No | Workspace path (defaults to CWD) |\n| `from` | `string` | Yes | Starting node name |\n| `to` | `string` | No | Target node for path finding |\n| `edgeTypes` | `string[]` | No | Filter by specific edge types |\n| `limit` | `number` | No | Maximum paths to return (default: 10) |\n\n资料来源：[src/search.ts:180-190]()\n\n### loadGraphPaths\n\nLoads graph paths from the database for a set of file paths.\n\n```typescript\nfunction loadGraphPaths(\n  db: Database,\n  paths: string[],\n  limit: number\n): GraphPath[]\n```\n\n资料来源：[src/search.ts:60-80]()\n\n## Impact Analysis\n\nImpact analysis identifies reverse dependencies—what depends on a given file or symbol—and finds relevant test coverage.\n\n```mermaid\ngraph LR\n    A[Target File/Symbol] --> B[Find All Edges Pointing TO Target]\n    B --> C[Group by Source File]\n    C --> D[Identify Test Files]\n    D --> E[Return Impact Set]\n```\n\n### impactAnalysis Function\n\n```typescript\nexport async function impactAnalysis(options: {\n  workspace?: string;\n  target: string;\n  limit?: number;\n}): Promise<ImpactResult>\n```\n\n#### Impact Result Structure\n\n| Field | Type | Description |\n|-------|------|-------------|\n| `target` | `string` | The analyzed symbol or file |\n| `dependents` | `DependentInfo[]` | Files/symbols that depend on target |\n| `tests` | `SearchHit[]` | Related test files |\n\n```typescript\ninterface DependentInfo {\n  path: string;\n  type: string;\n  imports: string[];\n}\n\ninterface ImpactResult {\n  target: string;\n  dependents: DependentInfo[];\n  tests: SearchHit[];\n}\n```\n\n资料来源：[src/search.ts:130-175]()\n\n### Test Detection Logic\n\nTest files are identified by path patterns and edges with `TESTS` type:\n\n```typescript\nconst testPaths = paths.filter(\n  (path) => path.edgeType === \"TESTS\" || \n            /(^|\\/)(tests?|__tests__)\\/|(\\.|-)(test|spec)\\./.test(path.filePath)\n);\n```\n\n资料来源：[src/search.ts:165-170]()\n\n## Change Analysis\n\n### whyChanged\n\nCombines current code evidence with git history to explain why a file or symbol may have changed.\n\n```typescript\nexport async function whyChanged(options: {\n  workspace?: string;\n  target: string;\n  limit?: number\n}): Promise<{\n  target: string;\n  currentEvidence: SearchHit[];\n  commits: Array<{\n    hash: string;\n    subject: string;\n    date?: string;\n    files: string[];\n  }>;\n}>\n```\n\n#### Workflow\n\n```mermaid\ngraph TD\n    A[whyChanged] --> B[searchContext for target]\n    B --> C[Extract file paths from hits]\n    C --> D[readGitHistory with file paths]\n    D --> E[Combine evidence + commits]\n    E --> F[Return structured result]\n```\n\n资料来源：[src/search.ts:200-230]()\n\n### Git History Integration\n\nThe system reads git history for affected files:\n\n```typescript\nfunction readGitHistory(\n  workspace: string,\n  filePaths: string[],\n  limit: number\n): Array<{\n  hash: string;\n  subject: string;\n  date?: string;\n  files: string[];\n}>\n```\n\n资料来源：[src/search.ts:85-100]()\n\n## CLI Commands\n\n### trace Command\n\n```bash\ncxf trace --from <symbol_or_file> [--to <target>] [--edge-types <types>] [--limit <count>]\n```\n\n#### Options\n\n| Option | Type | Default | Description |\n|--------|------|---------|-------------|\n| `--from` | `string` | Required | Starting node |\n| `--to` | `string` | - | Target node |\n| `--edge-types` | `string` | all | Comma-separated edge types |\n| `--limit` | `number` | 10 | Maximum paths |\n| `--workspace` | `string` | CWD | Workspace path |\n\n资料来源：[src/cli.ts:45-60]()\n\n### report Command\n\nGenerates a comprehensive context report including graph statistics:\n\n```bash\ncxf report --workspace <path> --format markdown|json|html\n```\n\n#### Report Includes\n\n- Index status with graph node/edge counts\n- Top queries by intent type\n- Stale memory detection\n- Recent evidence packs\n\n资料来源：[src/cli.ts:70-85]()\n\n## MCP Server Tools\n\nContextful exposes graph traversal as MCP tools for integration with AI coding assistants.\n\n### trace_path\n\n```json\n{\n  \"name\": \"trace_path\",\n  \"description\": \"Trace graph relationships between files, symbols, modules, and config nodes.\",\n  \"inputSchema\": {\n    \"from\": \"string\",\n    \"to\": \"string (optional)\",\n    \"edge_types\": [\"string\"] (optional),\n    \"limit\": \"number (optional)\"\n  }\n}\n```\n\n资料来源：[src/mcp-server.ts:45-55]()\n\n### impact_analysis\n\n```json\n{\n  \"name\": \"impact_analysis\",\n  \"description\": \"Find likely dependents and tests for a file, symbol, or module.\",\n  \"inputSchema\": {\n    \"symbol_or_file\": \"string\",\n    \"limit\": \"number (optional)\"\n  }\n}\n```\n\n资料来源：[src/mcp-server.ts:56-65]()\n\n### why_changed\n\n```json\n{\n  \"name\": \"why_changed\",\n  \"description\": \"Explain why a file or symbol may have changed by combining current evidence with git history.\",\n  \"inputSchema\": {\n    \"symbol_or_file\": \"string\",\n    \"limit\": \"number (optional)\"\n  }\n}\n```\n\n资料来源：[src/mcp-server.ts:66-75]()\n\n## Usage Examples\n\n### Direct CLI Usage\n\n```bash\n# Trace dependencies of auth module\ncxf trace --from src/auth.ts --edge-types IMPORTS\n\n# Find what tests cover a file\ncxf impact --target src/parser.ts\n\n# Get change history for a symbol\ncxf why --target AuthService\n```\n\n### MCP Integration\n\n```json\n{\n  \"mcpServers\": {\n    \"contextful\": {\n      \"command\": \"npx\",\n      \"args\": [\"-y\", \"@inferensys/contextful\", \"server\"]\n    }\n  }\n}\n```\n\n```typescript\n// In an MCP client\nconst result = await client.callTool(\"trace_path\", {\n  from: \"src/auth.ts\",\n  to: \"src/database.ts\",\n  edgeTypes: [\"IMPORTS\", \"DEFINES\"]\n});\n```\n\n## Query Intent Classification\n\nGraph queries are automatically classified to route to appropriate traversal strategies:\n\n| Intent | Keywords | Graph Relevance |\n|--------|----------|-----------------|\n| `architectural` | architecture, flow, path, connects, calls | High priority |\n| `impact` | impact, affected, depends, blast radius | Direct edge query |\n| `historical` | why, changed, history, regression | Graph + git history |\n| `exact` | Symbol names, file paths | Symbol-level traversal |\n\n资料来源：[src/search.ts:115-130]()\n\n## Limitations and Design Decisions\n\n### Privacy Guarantees\n\n- All processing is local-only\n- No external embedding APIs used\n- No source code upload\n- No file editing capabilities\n\n资料来源：[README.md:45-50]()\n\n### v1 Scope Boundaries\n\n- Broken JSON during indexing produces warnings but continues processing\n- Syntax diagnostics are intentionally out of scope\n- Git history is read-only\n\n资料来源：[src/extract.ts:120-125]()\n\n## Summary\n\nThe Graph Traversal and Analysis system in Contextful provides:\n\n1. **Automatic Relationship Extraction** - Builds a dependency graph during indexing\n2. **Multiple Query Entry Points** - CLI commands and MCP tools\n3. **Path Finding** - Trace connections between any two nodes\n4. **Impact Analysis** - Identify dependents and test coverage\n5. **Change Attribution** - Combine current state with git history\n\nThis enables AI coding assistants to answer sophisticated questions about code relationships without requiring manual documentation or extensive file reading.\n\n---\n\n<a id='sqlite-database'></a>\n\n## SQLite Database Schema\n\n### 相关页面\n\n相关主题：[Workspace Indexing System](#indexing-system), [Search Engine](#search-engine)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [src/db.ts](https://github.com/Inferensys/contextful/blob/main/src/db.ts)\n- [src/types.ts](https://github.com/Inferensys/contextful/blob/main/src/types.ts)\n- [src/util.ts](https://github.com/Inferensys/contextful/blob/main/src/util.ts)\n- [src/extract.ts](https://github.com/Inferensys/contextful/blob/main/src/extract.ts)\n- [src/search.ts](https://github.com/Inferensys/contextful/blob/main/src/search.ts)\n</details>\n\n# SQLite Database Schema\n\n## Overview\n\nContextful uses SQLite as its primary storage engine for indexing codebase artifacts. The database schema is designed to support full-text search, symbol indexing, dependency graph traversal, and evidence pack generation for AI-assisted queries. All operations are managed through `better-sqlite3` for synchronous, high-performance access.\n\n资料来源：[src/db.ts:1-50]()\n\n## Schema Tables\n\n### Primary Storage Tables\n\n#### `chunks`\n\nStores indexed code and documentation segments extracted from source files. Each chunk represents a logical unit of content bounded by language-specific rules (functions, classes, headings, etc.).\n\n| Column | Type | Description |\n|--------|------|-------------|\n| `ref` | TEXT | Unique reference identifier (format: `file:path:start-end`) |\n| `file_path` | TEXT | Relative path to the source file |\n| `start_line` | INTEGER | Starting line number (1-indexed) |\n| `end_line` | INTEGER | Ending line number |\n| `kind` | TEXT | Chunk classification: `code`, `doc`, `file` |\n| `title` | TEXT | Display title for the chunk |\n| `text` | TEXT | Full content of the chunk |\n| `token_estimate` | INTEGER | Estimated token count using GPT tokenizer |\n\n资料来源：[src/db.ts:23-36]()\n\n#### `symbols`\n\nCaptures programming constructs (functions, classes, interfaces, types) extracted from source files.\n\n| Column | Type | Description |\n|--------|------|-------------|\n| `ref` | TEXT | Unique symbol reference |\n| `name` | TEXT | Symbol name |\n| `kind` | TEXT | Symbol type: `function`, `class`, `interface`, `type`, `struct`, `enum`, `trait`, `impl` |\n| `file_path` | TEXT | Source file path |\n| `line` | INTEGER | Line number where symbol is defined |\n| `signature` | TEXT | First 160 characters of symbol declaration |\n| `exported` | INTEGER | Boolean flag (1 = exported, 0 = local) |\n\n资料来源：[src/db.ts:47-60]()\n\n#### `edges`\n\nRepresents relationships between code entities, including imports, module dependencies, and configuration references.\n\n| Column | Type | Description |\n|--------|------|-------------|\n| `source_name` | TEXT | Name of the importing/configuring entity |\n| `target_name` | TEXT | Name or path of the imported/dependency target |\n| `edge_type` | TEXT | Relationship type: `IMPORTS`, `CONFIGURES` |\n| `file_path` | TEXT | File where the relationship is defined |\n| `line` | INTEGER | Line number of the relationship definition |\n\n资料来源：[src/db.ts:38-45]()\n\n### Full-Text Search Index\n\n#### `chunks_fts`\n\nVirtual FTS5 table providing fast full-text search across all indexed content. Mirrors core chunk data for BM25-ranked retrieval.\n\n| Column | Type | Description |\n|--------|------|-------------|\n| `ref` | TEXT | Chunk reference |\n| `path` | TEXT | File path for filtering |\n| `title` | TEXT | Searchable title field |\n| `text` | TEXT | Full searchable content |\n\n资料来源：[src/db.ts:37-42]()\n\nThe FTS table is queried using BM25 ranking in search operations:\n\n```sql\nSELECT ref, path, title, text, bm25(chunks_fts) AS rank \nFROM chunks_fts WHERE chunks_fts MATCH ?\n```\n\n资料来源：[src/search.ts:45-47]()\n\n### Graph and Metadata Tables\n\n#### `nodes`\n\nRepresents graph vertices for dependency analysis and traversal operations.\n\n| Column | Type | Description |\n|--------|------|-------------|\n| `id` | INTEGER | Auto-incrementing primary key |\n| `ref` | TEXT | Node reference |\n| `kind` | TEXT | Node classification: `file`, `symbol`, `chunk`, `module`, `config` |\n| `name` | TEXT | Display name |\n| `file_path` | TEXT | Associated file path (nullable) |\n\n资料来源：[src/db.ts:12-22]()\n\n#### `files`\n\nStores metadata about indexed source files.\n\n| Column | Type | Description |\n|--------|------|-------------|\n| `absolute_path` | TEXT | Full absolute file path |\n| `language` | TEXT | Detected programming language |\n| `hash` | TEXT | SHA-based content hash for change detection |\n| `size` | TEXT | File size in bytes |\n\n资料来源：[src/db.ts:13-17]()\n\n#### `fingerprints`\n\nStores content fingerprints for deduplication and incremental indexing.\n\n| Column | Type | Description |\n|--------|------|-------------|\n| `ref` | TEXT | Reference to the content chunk |\n| `kind` | TEXT | Content type |\n| `fingerprint` | TEXT | Hash of the content |\n\n#### `evidence_packs`\n\nPersists generated evidence packs for audit and replay.\n\n| Column | Type | Description |\n|--------|------|-------------|\n| `id` | TEXT | Unique pack identifier |\n| `query` | TEXT | Original search query |\n| `token_estimate` | INTEGER | Total token count |\n| `json` | TEXT | Serialized pack data |\n\n#### `query_log`\n\nRecords search history for analysis and debugging.\n\n| Column | Type | Description |\n|--------|------|-------------|\n| `query` | TEXT | Search query text |\n| `intent` | TEXT | Classified search intent |\n| `timestamp` | TEXT | ISO timestamp |\n\n资料来源：[src/db.ts:1-10]()\n\n## Data Flow Architecture\n\n```mermaid\ngraph TD\n    A[Source Files] --> B[extractSymbols]\n    A --> C[extractEdges]\n    A --> D[extractChunks]\n    \n    B --> E[symbols table]\n    C --> F[edges table]\n    D --> G[chunks table]\n    D --> H[chunks_fts index]\n    \n    G --> I[Full-Text Search]\n    E --> J[Symbol Lookup]\n    F --> K[Graph Traversal]\n    \n    I --> L[searchContext]\n    J --> L\n    K --> L\n    \n    L --> M[Evidence Pack]\n    M --> N[evidence_packs]\n```\n\n资料来源：[src/extract.ts:1-150]()\n\n## Supported Symbol Kinds\n\nThe indexer extracts and classifies symbols based on language-specific patterns:\n\n| Language | Supported Kinds |\n|----------|-----------------|\n| TypeScript/JavaScript | `function`, `class`, `interface`, `type` |\n| Python | `function`, `class` |\n| Go | `function`, `struct`, `interface` |\n| Rust | `function`, `struct`, `enum`, `trait`, `impl` |\n\n资料来源：[src/extract.ts:30-60]()\n\n## Supported Edge Types\n\n| Edge Type | Description | Example |\n|-----------|-------------|---------|\n| `IMPORTS` | Module/dependency import | `import { foo } from './bar'` |\n| `CONFIGURES` | Configuration key reference | `\"dependencies\": { ... }` in package.json |\n\nThe `CONFIGURES` edge type is specifically generated for package.json dependency sections and JSON configuration keys.\n\n资料来源：[src/extract.ts:70-120]()\n\n## Query Classification and Intent\n\nThe search system classifies queries into intent categories that influence result ranking:\n\n| Intent | Trigger Keywords | Purpose |\n|--------|-----------------|---------|\n| `symbol` | Class/function names, exact identifiers | Find symbol definitions |\n| `code` | Code-related terms | Locate implementation |\n| `memory` | memory, lessons, session | Search evidence-backed memory |\n| `impact` | depends, affected, blast radius | Reverse dependency analysis |\n| `historical` | why, changed, history, commit | Git history queries |\n| `architectural` | architecture, flow, imports | Dependency tracing |\n| `docs` | docs, documentation, readme | Documentation lookup |\n| `exact` | File paths, line refs, symbols | Precise file/line access |\n| `vague` | Default fallback | Broad search |\n\n资料来源：[src/search.ts:15-30]()\n\n## Token Estimation\n\nToken counts are estimated using a heuristic approximation:\n\n```typescript\nexport function estimateTokens(text: string): number {\n  return Math.ceil(text.length / 4);\n}\n```\n\nThis provides a rough approximation where 1 token ≈ 4 characters, suitable for budget management in evidence pack generation.\n\n资料来源：[src/util.ts:1-10]()\n\n## Key Database Operations\n\n### Chunk Insertion\n\n```typescript\ndb.prepare(`\n  INSERT INTO chunks (ref, file_path, start_line, end_line, kind, title, text, token_estimate)\n  VALUES (?, ?, ?, ?, ?, ?, ?, ?)\n`).run(chunk.ref, chunk.filePath, chunk.startLine, chunk.endLine, chunk.kind, chunk.title, chunk.text, chunk.tokenEstimate);\n```\n\n同步写入 `chunks` 表和 `chunks_fts` FTS 索引。\n\n### Symbol Loading\n\n```typescript\ndb.prepare(`SELECT ref, name, kind, file_path, line, signature, exported \nFROM symbols WHERE file_path IN (${paths.map(() => \"?\").join(\",\")})`)\n  .all(...paths)\n```\n\n资料来源：[src/db.ts:23-42]()\n资料来源：[src/search.ts:180-195]()\n\n## Schema Version and Metadata\n\nThe database stores schema version and workspace metadata:\n\n| Key | Description |\n|-----|-------------|\n| `schema_version` | Current schema version number |\n| `workspace` | Workspace root path |\n| `indexed_at` | Last indexing timestamp |\n| `parser_backend` | Parser backend description |\n| `warnings` | Last 50 indexing warnings |\n\n资料来源：[src/indexer.ts:80-90]()\n\n## Conclusion\n\nThe SQLite schema in Contextful provides a normalized, queryable representation of source code structure and content. The dual-table approach for chunks (storage + FTS index) enables both efficient storage and fast full-text retrieval. The edges and symbols tables together support graph traversal for dependency analysis, while the evidence pack system enables persistent, ranked context generation for AI queries.\n\n---\n\n<a id='indexing-system'></a>\n\n## Workspace Indexing System\n\n### 相关页面\n\n相关主题：[SQLite Database Schema](#sqlite-database), [Search Engine](#search-engine)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [src/indexer.ts](https://github.com/Inferensys/contextful/blob/main/src/indexer.ts)\n- [src/extract.ts](https://github.com/Inferensys/contextful/blob/main/src/extract.ts)\n- [src/cli.ts](https://github.com/Inferensys/contextful/blob/main/src/cli.ts)\n- [src/search.ts](https://github.com/Inferensys/contextful/blob/main/src/search.ts)\n- [src/report.ts](https://github.com/Inferensys/contextful/blob/main/src/report.ts)\n- [src/util.ts](https://github.com/Inferensys/contextful/blob/main/src/util.ts)\n</details>\n\n# Workspace Indexing System\n\n## Overview\n\nThe Workspace Indexing System is the core indexing engine of Contextful. It scans, parses, and stores representations of source code files from a workspace into a local SQLite database, enabling semantic search, dependency graph traversal, and evidence-backed context retrieval.\n\n**Primary responsibilities:**\n\n| Responsibility | Description |\n|----------------|-------------|\n| File Discovery | Recursively traverse workspace directories, filtering by language and ignore rules |\n| Symbol Extraction | Parse and catalog functions, classes, interfaces, types, enums, traits |\n| Edge Extraction | Track import/export relationships between modules and dependencies |\n| Content Chunking | Split large files into manageable, line-numbered chunks for retrieval |\n| Watch Mode | Monitor file system changes and incrementally re-index on modifications |\n\n资料来源：[src/cli.ts:1-20](https://github.com/Inferensys/contextful/blob/main/src/cli.ts)\n\n## Architecture\n\n```mermaid\ngraph TD\n    A[Workspace Directory] --> B[File Discovery]\n    B --> C[Language Detection]\n    C --> D[Content Extraction]\n    D --> E[Symbol Extraction]\n    D --> F[Edge Extraction]\n    D --> G[Chunk Generation]\n    E --> H[SQLite DB]\n    F --> H\n    G --> H\n    I[Search/Query] --> H\n    J[Watch Mode] --> B\n```\n\nThe system is built around a SQLite database that stores three core entities: symbols, edges, and chunks. The indexer processes files in a single pass, extracting all three data types simultaneously to minimize I/O overhead.\n\n资料来源：[src/extract.ts:1-50](https://github.com/Inferensys/contextful/blob/main/src/extract.ts)\n\n## Supported Languages\n\nThe indexer natively supports symbol and edge extraction for the following languages:\n\n| Language | Symbol Patterns | Import Patterns |\n|----------|----------------|-----------------|\n| TypeScript / JavaScript | `function`, `class`, `interface`, `type`, `const` arrow/function | `import from`, `require()` |\n| Python | `def`, `class` | `from ... import`, `import` |\n| Go | `func`, `type struct/interface` | `\"...\"` (quoted imports) |\n| Rust | `fn`, `struct`, `enum`, `trait`, `impl` | `use`, `mod` |\n| Markdown | Headings (`#{1,6}`) | N/A |\n| JSON | Config keys (`\"key\":`) | N/A |\n\n资料来源：[src/extract.ts:15-45](https://github.com/Inferensys/contextful/blob/main/src/extract.ts)\n\n## Indexing Process\n\n### Phase 1: File Discovery\n\nThe indexer recursively scans the workspace directory, applying language-specific filtering and Gitignore-style ignore rules. Binary files are detected and skipped using a simple null-byte heuristic.\n\n```typescript\nexport function isLikelyBinary(buffer: Buffer): boolean {\n  const sample = buffer.subarray(0, Math.min(buffer.length, 4096));\n  return sample.includes(0);\n}\n```\n\n资料来源：[src/util.ts:20-22](https://github.com/Inferensys/contextful/blob/main/src/util.ts)\n\n### Phase 2: Symbol Extraction\n\nSymbols are extracted using language-specific regular expression patterns. Each symbol record includes:\n\n| Field | Type | Description |\n|-------|------|-------------|\n| `name` | string | Symbol identifier |\n| `kind` | string | Category: function, class, interface, type, struct, enum, trait, impl |\n| `line` | number | Declaration line number |\n| `signature` | string | First 160 characters of the declaration line |\n| `exported` | boolean | Whether the symbol is exported |\n\n```typescript\nconst push = (name: string, kind: string, exported = false) =>\n  symbols.push({ name, kind, line: lineNumber, signature: excerpt(line, 160), exported });\n```\n\n资料来源：[src/extract.ts:5-7](https://github.com/Inferensys/contextful/blob/main/src/extract.ts)\n\nFor TypeScript and JavaScript, the extractor captures export modifiers:\n\n```typescript\nmatchPush(line, /^\\s*(export\\s+)?(?:async\\s+)?function\\s+([A-Za-z_$][\\w$]*)/, push, \"function\");\nmatchPush(line, /^\\s*(export\\s+)?class\\s+([A-Za-z_$][\\w$]*)/, push, \"class\");\n```\n\n资料来源：[src/extract.ts:12-15](https://github.com/Inferensys/contextful/blob/main/src/extract.ts)\n\n### Phase 3: Edge Extraction\n\nEdges represent dependency relationships between modules. The extractor identifies:\n\n- **IMPORTS**: Direct import statements for each language\n- **CONFIGURES**: Dependencies declared in configuration files (package.json, Cargo.toml, etc.)\n\n```typescript\nif (language === \"typescript\" || language === \"javascript\") {\n  for (const match of line.matchAll(/(?:from\\s+|import\\s*)[\"']([^\"']+)[\"']/g))\n    addImport(match[1]);\n  for (const match of line.matchAll(/require\\([\"']([^\"']+)[\"']\\)/g))\n    addImport(match[1]);\n}\n```\n\n资料来源：[src/extract.ts:67-72](https://github.com/Inferensys/contextful/blob/main/src/extract.ts)\n\nFor `package.json`, dependencies and scripts are indexed as CONFIGURES edges:\n\n```typescript\nfor (const section of [\"dependencies\", \"devDependencies\", \"peerDependencies\", \"scripts\"]) {\n  const values = parsed[section];\n  if (!values || typeof values !== \"object\") continue;\n  for (const key of Object.keys(values)) {\n    edges.push({ targetName: `${section}:${key}`, targetType: \"config\", edgeType: \"CONFIGURES\", line: 1 });\n  }\n}\n```\n\n资料来源：[src/extract.ts:105-114](https://github.com/Inferensys/contextful/blob/main/src/extract.ts)\n\n### Phase 4: Chunk Generation\n\nLarge files are split into overlapping chunks to enable granular retrieval. The system uses a sliding window approach with overlap between consecutive chunks:\n\n```mermaid\ngraph LR\n    A[File Lines 1-200] --> B[Chunk 1: 1-80]\n    A --> C[Chunk 2: 60-140]\n    A --> D[Chunk 3: 120-200]\n    B --> E[Token Estimate]\n    C --> E\n    D --> E\n```\n\nEach chunk includes:\n\n| Field | Description |\n|-------|-------------|\n| `ref` | Unique reference string (`file:path:start-end`) |\n| `filePath` | Relative path to source file |\n| `startLine` | Starting line number |\n| `endLine` | Ending line number |\n| `kind` | Chunk type: `code`, `doc`, `file` |\n| `title` | Human-readable title |\n| `tokenEstimate` | Estimated token count |\n\n资料来源：[src/extract.ts:145-160](https://github.com/Inferensys/contextful/blob/main/src/extract.ts)\n\n### Phase 5: Markdown Document Chunking\n\nMarkdown files receive special treatment. Instead of fixed-size chunks, the indexer uses headings as natural section boundaries:\n\n```typescript\nlines.forEach((line, index) => {\n  const match = line.match(/^(#{1,6})\\s+(.+)$/);\n  if (match) headings.push({ title: match[2].trim(), line: index + 1 });\n});\nreturn headings.map((heading, index) => {\n  const next = headings[index + 1];\n  const endLine = next ? next.line - 1 : lines.length;\n  // ... create chunk for section\n});\n```\n\n资料来源：[src/extract.ts:174-185](https://github.com/Inferensys/contextful/blob/main/src/extract.ts)\n\n## Watch Mode\n\nThe indexer supports continuous monitoring via file system watchers:\n\n```typescript\nexport async function watchWorkspace(workspace: string, onIndex: (result: IndexResult) => void): Promise<void> {\n  const resolved = path.resolve(workspace);\n  onIndex(await indexWorkspace({ workspace: resolved }));\n  let timer: NodeJS.Timeout | undefined;\n  fs.watch(resolved, { recursive: true }, () => {\n    if (timer) clearTimeout(timer);\n    timer = setTimeout(async () => {\n      onIndex(await indexWorkspace({ workspace: resolved }));\n    }, 500);\n  });\n}\n```\n\n资料来源：[src/indexer.ts:80-91](https://github.com/Inferensys/contextful/blob/main/src/indexer.ts)\n\nKey characteristics:\n- Debounces file change events with a 500ms delay to batch rapid successive changes\n- Re-runs full indexing on each trigger\n- Outputs JSON results to stdout for consumption by other processes\n\n## CLI Commands\n\nThe indexing system exposes three primary CLI commands:\n\n| Command | Description |\n|---------|-------------|\n| `cxf index --workspace <path> [--watch]` | Initial or incremental indexing of a workspace |\n| `cxf daemon --workspace <path>` | Run as a long-lived daemon that outputs index results on file changes |\n| `cxf report --workspace <path> --format markdown\\|json\\|html` | Generate an index status report |\n\n```bash\n# Index a workspace\nnpx @inferensys/contextful index --workspace .\n\n# Watch for changes and print results\nnpx @inferensys/contextful daemon --workspace .\n```\n\n资料来源：[src/cli.ts:22-35](https://github.com/Inferensys/contextful/blob/main/src/cli.ts)\n\n## Search Integration\n\nThe indexing system powers Contextful's search capabilities. After indexing, users can query the database using natural language:\n\n```typescript\nexport async function searchContext(options: SearchOptions): Promise<{ intent: SearchIntent; hits: SearchHit[] }> {\n  const workspace = resolveWorkspace(options.options.workspace);\n  await ensureIndexed(workspace);\n  const intent = classifyQuery(options.query);\n  // ... perform FTS and semantic search\n}\n```\n\n资料来源：[src/search.ts:45-55](https://github.com/Inferensys/contextful/blob/main/src/search.ts)\n\nQuery intents are automatically classified to optimize search behavior:\n\n| Intent | Trigger Keywords | Description |\n|--------|-----------------|-------------|\n| `code` | function names, variable names | Code and implementation search |\n| `exact` | Backticks, quotes, `#`, file paths | Literal symbol/identifier lookup |\n| `impact` | impact, affected, depends, blast radius | Dependency and change analysis |\n| `historical` | why, changed, commit, history | Git history and regression tracking |\n| `architectural` | architecture, flow, trace, connects | Dependency graph traversal |\n| `docs` | resource, documentation, guide, how to | Documentation and README search |\n| `memory` | remember, session, lesson, learned | Agent memory recall |\n\n资料来源：[src/search.ts:5-18](https://github.com/Inferensys/contextful/blob/main/src/search.ts)\n\n## Token Estimation\n\nEvery chunk and evidence pack includes a token estimate for budget management:\n\n```typescript\nexport function packTokenCount(text: string): number {\n  return estimateTokens(text);\n}\n```\n\nThe system uses this estimate to enforce budget limits when building context packs for LLM consumption, ensuring responses stay within token budgets.\n\n资料来源：[src/report.ts:50-52](https://github.com/Inferensys/contextful/blob/main/src/report.ts)\n\n## Data Models\n\n### Symbol Record\n\n```typescript\ninterface SymbolRecord {\n  ref: string;\n  name: string;\n  kind: \"function\" | \"class\" | \"interface\" | \"type\" | \"struct\" | \"enum\" | \"trait\" | \"impl\";\n  filePath: string;\n  line: number;\n  signature: string;\n  exported: boolean;\n}\n```\n\n### Edge Record\n\n```typescript\ninterface RawEdge {\n  targetName: string;\n  targetType: \"module\" | \"config\" | \"symbol\";\n  edgeType: \"IMPORTS\" | \"CONFIGURES\" | \"DEFINES\";\n  line: number;\n}\n```\n\n### Chunk Record\n\n```typescript\ninterface ChunkRecord {\n  ref: string;\n  filePath: string;\n  startLine: number;\n  endLine: number;\n  kind: \"code\" | \"doc\" | \"file\";\n  title: string;\n  text: string;\n  tokenEstimate: number;\n}\n```\n\n## Extension Points\n\n### Adding New Language Support\n\nTo add support for a new language:\n\n1. Add language detection in the file scanner\n2. Implement symbol extraction patterns in `extractSymbols()`\n3. Implement edge extraction patterns in `extractEdges()`\n4. Update the chunking logic if special handling is needed\n\nExample pattern structure:\n\n```typescript\n} else if (language === \"newlang\") {\n  matchPush(line, /^\\s*(pub\\s+)?fn\\s+([A-Za-z_][\\w]*)/, push, \"function\");\n  const use = line.match(/^\\s*use\\s+([^;]+);/);\n  if (use) addImport(use[1].trim());\n}\n```\n\n资料来源：[src/extract.ts:35-44](https://github.com/Inferensys/contextful/blob/main/src/extract.ts)\n\n---\n\n---\n\n## Doramagic Pitfall Log\n\nProject: Inferensys/contextful\n\nSummary: Found 7 potential pitfall items; 0 are high/blocking. Highest priority: configuration - 可能修改宿主 AI 配置.\n\n## 1. configuration · 可能修改宿主 AI 配置\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: 项目面向 Claude/Cursor/Codex/Gemini/OpenCode 等宿主，或安装命令涉及用户配置目录。\n- User impact: 安装可能改变本机 AI 工具行为，用户需要知道写入位置和回滚方法。\n- Suggested check: 列出会写入的配置文件、目录和卸载/回滚步骤。\n- Guardrail action: 涉及宿主配置目录时必须给回滚路径，不能只给安装命令。\n- Evidence: capability.host_targets | github_repo:1240001007 | https://github.com/Inferensys/contextful | host_targets=claude, claude_code\n\n## 2. capability · 能力判断依赖假设\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: README/documentation is current enough for a first validation pass.\n- User impact: 假设不成立时，用户拿不到承诺的能力。\n- Suggested check: 将假设转成下游验证清单。\n- Guardrail action: 假设必须转成验证项；没有验证结果前不能写成事实。\n- Evidence: capability.assumptions | github_repo:1240001007 | https://github.com/Inferensys/contextful | README/documentation is current enough for a first validation pass.\n\n## 3. maintenance · 维护活跃度未知\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: 未记录 last_activity_observed。\n- User impact: 新项目、停更项目和活跃项目会被混在一起，推荐信任度下降。\n- Suggested check: 补 GitHub 最近 commit、release、issue/PR 响应信号。\n- Guardrail action: 维护活跃度未知时，推荐强度不能标为高信任。\n- Evidence: evidence.maintainer_signals | github_repo:1240001007 | https://github.com/Inferensys/contextful | last_activity_observed missing\n\n## 4. security_permissions · 下游验证发现风险项\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: no_demo\n- User impact: 下游已经要求复核，不能在页面中弱化。\n- Suggested check: 进入安全/权限治理复核队列。\n- Guardrail action: 下游风险存在时必须保持 review/recommendation 降级。\n- Evidence: downstream_validation.risk_items | github_repo:1240001007 | https://github.com/Inferensys/contextful | no_demo; severity=medium\n\n## 5. security_permissions · 存在评分风险\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: no_demo\n- User impact: 风险会影响是否适合普通用户安装。\n- Suggested check: 把风险写入边界卡，并确认是否需要人工复核。\n- Guardrail action: 评分风险必须进入边界卡，不能只作为内部分数。\n- Evidence: risks.scoring_risks | github_repo:1240001007 | https://github.com/Inferensys/contextful | no_demo; severity=medium\n\n## 6. maintenance · issue/PR 响应质量未知\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: issue_or_pr_quality=unknown。\n- User impact: 用户无法判断遇到问题后是否有人维护。\n- Suggested check: 抽样最近 issue/PR，判断是否长期无人处理。\n- Guardrail action: issue/PR 响应未知时，必须提示维护风险。\n- Evidence: evidence.maintainer_signals | github_repo:1240001007 | https://github.com/Inferensys/contextful | issue_or_pr_quality=unknown\n\n## 7. maintenance · 发布节奏不明确\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: release_recency=unknown。\n- User impact: 安装命令和文档可能落后于代码，用户踩坑概率升高。\n- Suggested check: 确认最近 release/tag 和 README 安装命令是否一致。\n- Guardrail action: 发布节奏未知或过期时，安装说明必须标注可能漂移。\n- Evidence: evidence.maintainer_signals | github_repo:1240001007 | https://github.com/Inferensys/contextful | release_recency=unknown\n\n<!-- canonical_name: Inferensys/contextful; human_manual_source: deepwiki_human_wiki -->\n",
      "markdown_key": "contextful",
      "pages": "draft",
      "source_refs": [
        {
          "evidence_id": "github_repo:1240001007",
          "kind": "repo",
          "supports_claim_ids": [
            "claim_identity",
            "claim_distribution",
            "claim_capability"
          ],
          "url": "https://github.com/Inferensys/contextful"
        },
        {
          "evidence_id": "art_9602e7fcea104ce288e652334966e49d",
          "kind": "docs",
          "supports_claim_ids": [
            "claim_identity",
            "claim_distribution",
            "claim_capability"
          ],
          "url": "https://github.com/Inferensys/contextful#readme"
        }
      ],
      "summary": "DeepWiki/Human Wiki output with a Doramagic pitfall appendix.",
      "title": "contextful 说明书",
      "toc": [
        "https://github.com/Inferensys/contextful 项目说明书",
        "目录",
        "Project Introduction",
        "Purpose and Scope",
        "Architecture Overview",
        "Supported Languages and File Types",
        "Core MCP Tools",
        "CLI Interface",
        "Doramagic 踩坑日志"
      ]
    }
  },
  "quality_gate": {
    "blocking_gaps": [],
    "category_confidence": "medium",
    "compile_status": "ready_for_review",
    "five_assets_present": true,
    "install_sandbox_verified": true,
    "missing_evidence": [],
    "next_action": "publish to Doramagic.ai project surfaces",
    "prompt_preview_boundary_ok": true,
    "publish_status": "publishable",
    "quick_start_verified": true,
    "repo_clone_verified": true,
    "repo_commit": "59bcd22c94d622c51c504a05740222c2d9ef4ad1",
    "repo_inspection_error": null,
    "repo_inspection_files": [
      "package.json",
      "README.md",
      "docs/IMPROVEMENTS.md",
      "src/parser-backend.ts",
      "src/report.ts",
      "src/search.ts",
      "src/util.ts",
      "src/db.ts",
      "src/files.ts",
      "src/index.ts",
      "src/language.ts",
      "src/init.ts",
      "src/types.ts",
      "src/mcp-server.ts",
      "src/extract.ts",
      "src/memory.ts",
      "src/cli.ts",
      "src/indexer.ts"
    ],
    "repo_inspection_verified": true,
    "review_reasons": [
      "community_discussion_evidence_below_public_threshold"
    ],
    "tag_count_ok": true,
    "unsupported_claims": []
  },
  "schema_version": "0.1",
  "user_assets": {
    "ai_context_pack": {
      "asset_id": "ai_context_pack",
      "filename": "AI_CONTEXT_PACK.md",
      "markdown": "# @inferensys/contextful - Doramagic AI Context Pack\n\n> 定位：安装前体验与判断资产。它帮助宿主 AI 有一个好的开始，但不代表已经安装、执行或验证目标项目。\n\n## 充分原则\n\n- **充分原则，不是压缩原则**：AI Context Pack 应该充分到让宿主 AI 在开工前理解项目价值、能力边界、使用入口、风险和证据来源；它可以分层组织，但不以最短摘要为目标。\n- **压缩策略**：只压缩噪声和重复内容，不压缩会影响判断和开工质量的上下文。\n\n## 给宿主 AI 的使用方式\n\n你正在读取 Doramagic 为 @inferensys/contextful 编译的 AI Context Pack。请把它当作开工前上下文：帮助用户理解适合谁、能做什么、如何开始、哪些必须安装后验证、风险在哪里。不要声称你已经安装、运行或执行了目标项目。\n\n## Claim 消费规则\n\n- **事实来源**：Repo Evidence + Claim/Evidence Graph；Human Wiki 只提供显著性、术语和叙事结构。\n- **事实最低状态**：`supported`\n- `supported`：可以作为项目事实使用，但回答中必须引用 claim_id 和证据路径。\n- `weak`：只能作为低置信度线索，必须要求用户继续核实。\n- `inferred`：只能用于风险提示或待确认问题，不能包装成项目事实。\n- `unverified`：不得作为事实使用，应明确说证据不足。\n- `contradicted`：必须展示冲突来源，不得替用户强行选择一个版本。\n\n## 它最适合谁\n\n- **正在使用 Claude/Codex/Cursor/Gemini 等宿主 AI 的开发者**：README 或插件配置提到多个宿主 AI。 证据：`README.md` Claim：`clm_0002` supported 0.86\n\n## 它能做什么\n\n- **命令行启动或安装流程**（需要安装后验证）：项目文档中存在可执行命令，真实使用需要在本地或宿主环境中运行这些命令。 证据：`README.md` Claim：`clm_0001` supported 0.86\n\n## 怎么开始\n\n- `npx @inferensys/contextful index --workspace .` 证据：`README.md` Claim：`clm_0003` supported 0.86\n- `npx @inferensys/contextful query \"where is user auth handled\" --workspace . --budget 2000` 证据：`README.md` Claim：`clm_0004` supported 0.86\n- `npx @inferensys/contextful server` 证据：`README.md` Claim：`clm_0005` supported 0.86\n\n## 继续前判断卡\n\n- **当前建议**：先做权限沙盒试用\n- **为什么**：项目存在安装命令、宿主配置或本地写入线索，不建议直接进入主力环境，应先在隔离环境试装。\n\n### 30 秒判断\n\n- **现在怎么做**：先做权限沙盒试用\n- **最小安全下一步**：先跑 Prompt Preview；若仍要安装，只在隔离环境试装\n- **先别相信**：工具权限边界不能在安装前相信。\n- **继续会触碰**：命令执行、本地环境或项目文件、宿主 AI 上下文\n\n### 现在可以相信\n\n- **适合人群线索：正在使用 Claude/Codex/Cursor/Gemini 等宿主 AI 的开发者**（supported）：有 supported claim 或项目证据支撑，但仍不等于真实安装效果。 证据：`README.md` Claim：`clm_0002` supported 0.86\n- **能力存在：命令行启动或安装流程**（supported）：可以相信项目包含这类能力线索；是否适合你的具体任务仍要试用或安装后验证。 证据：`README.md` Claim：`clm_0001` supported 0.86\n- **存在 Quick Start / 安装命令线索**（supported）：可以相信项目文档出现过启动或安装入口；不要因此直接在主力环境运行。 证据：`README.md` Claim：`clm_0003` supported 0.86\n\n### 现在还不能相信\n\n- **工具权限边界不能在安装前相信。**（unverified）：MCP/tool 类项目通常会触碰文件、网络、浏览器或外部 API，必须真实检查权限和日志。\n- **真实输出质量不能在安装前相信。**（unverified）：Prompt Preview 只能展示引导方式，不能证明真实项目中的结果质量。\n- **宿主 AI 版本兼容性不能在安装前相信。**（unverified）：Claude、Cursor、Codex、Gemini 等宿主加载规则和版本差异必须在真实环境验证。\n- **不会污染现有宿主 AI 行为，不能直接相信。**（inferred）：Skill、plugin、AGENTS/CLAUDE/GEMINI 指令可能改变宿主 AI 的默认行为。\n- **可安全回滚不能默认相信。**（unverified）：除非项目明确提供卸载和恢复说明，否则必须先在隔离环境验证。\n- **真实安装后是否与用户当前宿主 AI 版本兼容？**（unverified）：兼容性只能通过实际宿主环境验证。\n- **项目输出质量是否满足用户具体任务？**（unverified）：安装前预览只能展示流程和边界，不能替代真实评测。\n- **安装命令是否需要网络、权限或全局写入？**（unverified）：这影响企业环境和个人环境的安装风险。 证据：`README.md`\n\n### 继续会触碰什么\n\n- **命令执行**：包管理器、网络下载、本地插件目录、项目配置或用户主目录。 原因：运行第一条命令就可能产生环境改动；必须先判断是否值得跑。 证据：`README.md`\n- **本地环境或项目文件**：安装结果、插件缓存、项目配置或本地依赖目录。 原因：安装前无法证明写入范围和回滚方式，需要隔离验证。 证据：`README.md`\n- **宿主 AI 上下文**：AI Context Pack、Prompt Preview、Skill 路由、风险规则和项目事实。 原因：导入上下文会影响宿主 AI 后续判断，必须避免把未验证项包装成事实。\n\n### 最小安全下一步\n\n- **先跑 Prompt Preview**：用安装前交互式试用判断工作方式是否匹配，不需要授权或改环境。（适用：任何项目都适用，尤其是输出质量未知时。）\n- **只在隔离目录或测试账号试装**：避免安装命令污染主力宿主 AI、真实项目或用户主目录。（适用：存在命令执行、插件配置或本地写入线索时。）\n- **安装后只验证一个最小任务**：先验证加载、兼容、输出质量和回滚，再决定是否深用。（适用：准备从试用进入真实工作流时。）\n\n### 退出方式\n\n- **保留安装前状态**：记录原始宿主配置和项目状态，后续才能判断是否可恢复。\n- **记录安装命令和写入路径**：没有明确卸载说明时，至少要知道哪些目录或配置需要手动清理。\n- **如果没有回滚路径，不进入主力环境**：不可回滚是继续前阻断项，不应靠信任或运气继续。\n\n## 哪些只能预览\n\n- 解释项目适合谁和能做什么\n- 基于项目文档演示典型对话流程\n- 帮助用户判断是否值得安装或继续研究\n\n## 哪些必须安装后验证\n\n- 真实安装 Skill、插件或 CLI\n- 执行脚本、修改本地文件或访问外部服务\n- 验证真实输出质量、性能和兼容性\n\n## 边界与风险判断卡\n\n- **把安装前预览误认为真实运行**：用户可能高估项目已经完成的配置、权限和兼容性验证。 处理方式：明确区分 prompt_preview_can_do 与 runtime_required。 Claim：`clm_0006` inferred 0.45\n- **命令执行会修改本地环境**：安装命令可能写入用户主目录、宿主插件目录或项目配置。 处理方式：先在隔离环境或测试账号中运行。 证据：`README.md` Claim：`clm_0007` supported 0.86\n- **待确认**：真实安装后是否与用户当前宿主 AI 版本兼容？。原因：兼容性只能通过实际宿主环境验证。\n- **待确认**：项目输出质量是否满足用户具体任务？。原因：安装前预览只能展示流程和边界，不能替代真实评测。\n- **待确认**：安装命令是否需要网络、权限或全局写入？。原因：这影响企业环境和个人环境的安装风险。\n\n## 开工前工作上下文\n\n### 加载顺序\n\n- 先读取 how_to_use.host_ai_instruction，建立安装前判断资产的边界。\n- 读取 claim_graph_summary，确认事实来自 Claim/Evidence Graph，而不是 Human Wiki 叙事。\n- 再读取 intended_users、capabilities 和 quick_start_candidates，判断用户是否匹配。\n- 需要执行具体任务时，优先查 role_skill_index，再查 evidence_index。\n- 遇到真实安装、文件修改、网络访问、性能或兼容性问题时，转入 risk_card 和 boundaries.runtime_required。\n\n### 任务路由\n\n- **命令行启动或安装流程**：先说明这是安装后验证能力，再给出安装前检查清单。 边界：必须真实安装或运行后验证。 证据：`README.md` Claim：`clm_0001` supported 0.86\n\n### 上下文规模\n\n- 文件总数：36\n- 重要文件覆盖：27/36\n- 证据索引条目：27\n- 角色 / Skill 条目：3\n\n### 证据不足时的处理\n\n- **missing_evidence**：说明证据不足，要求用户提供目标文件、README 段落或安装后验证记录；不要补全事实。\n- **out_of_scope_request**：说明该任务超出当前 AI Context Pack 证据范围，并建议用户先查看 Human Manual 或真实安装后验证。\n- **runtime_request**：给出安装前检查清单和命令来源，但不要替用户执行命令或声称已执行。\n- **source_conflict**：同时展示冲突来源，标记为待核实，不要强行选择一个版本。\n\n## Prompt Recipes\n\n### 适配判断\n\n- 目标：判断这个项目是否适合用户当前任务。\n- 预期输出：适配结论、关键理由、证据引用、安装前可预览内容、必须安装后验证内容、下一步建议。\n\n```text\n请基于 @inferensys/contextful 的 AI Context Pack，先问我 3 个必要问题，然后判断它是否适合我的任务。回答必须包含：适合谁、能做什么、不能做什么、是否值得安装、证据来自哪里。所有项目事实必须引用 evidence_refs、source_paths 或 claim_id。\n```\n\n### 安装前体验\n\n- 目标：让用户在安装前感受核心工作流，同时避免把预览包装成真实能力或营销承诺。\n- 预期输出：一段带边界标签的体验剧本、安装后验证清单和谨慎建议；不含真实运行承诺或强营销表述。\n\n```text\n请把 @inferensys/contextful 当作安装前体验资产，而不是已安装工具或真实运行环境。\n\n请严格输出四段：\n1. 先问我 3 个必要问题。\n2. 给出一段“体验剧本”：用 [安装前可预览]、[必须安装后验证]、[证据不足] 三种标签展示它可能如何引导工作流。\n3. 给出安装后验证清单：列出哪些能力只有真实安装、真实宿主加载、真实项目运行后才能确认。\n4. 给出谨慎建议：只能说“值得继续研究/试装”“先补充信息后再判断”或“不建议继续”，不得替项目背书。\n\n硬性边界：\n- 不要声称已经安装、运行、执行测试、修改文件或产生真实结果。\n- 不要写“自动适配”“确保通过”“完美适配”“强烈建议安装”等承诺性表达。\n- 如果描述安装后的工作方式，必须使用“如果安装成功且宿主正确加载 Skill，它可能会……”这种条件句。\n- 体验剧本只能写成“示例台词/假设流程”：使用“可能会询问/可能会建议/可能会展示”，不要写“已写入、已生成、已通过、正在运行、正在生成”。\n- Prompt Preview 不负责给安装命令；如用户准备试装，只能提示先阅读 Quick Start 和 Risk Card，并在隔离环境验证。\n- 所有项目事实必须来自 supported claim、evidence_refs 或 source_paths；inferred/unverified 只能作风险或待确认项。\n\n```\n\n### 角色 / Skill 选择\n\n- 目标：从项目里的角色或 Skill 中挑选最匹配的资产。\n- 预期输出：候选角色或 Skill 列表，每项包含适用场景、证据路径、风险边界和是否需要安装后验证。\n\n```text\n请读取 role_skill_index，根据我的目标任务推荐 3-5 个最相关的角色或 Skill。每个推荐都要说明适用场景、可能输出、风险边界和 evidence_refs。\n```\n\n### 风险预检\n\n- 目标：安装或引入前识别环境、权限、规则冲突和质量风险。\n- 预期输出：环境、权限、依赖、许可、宿主冲突、质量风险和未知项的检查清单。\n\n```text\n请基于 risk_card、boundaries 和 quick_start_candidates，给我一份安装前风险预检清单。不要替我执行命令，只说明我应该检查什么、为什么检查、失败会有什么影响。\n```\n\n### 宿主 AI 开工指令\n\n- 目标：把项目上下文转成一次对话开始前的宿主 AI 指令。\n- 预期输出：一段边界明确、证据引用明确、适合复制给宿主 AI 的开工前指令。\n\n```text\n请基于 @inferensys/contextful 的 AI Context Pack，生成一段我可以粘贴给宿主 AI 的开工前指令。这段指令必须遵守 not_runtime=true，不能声称项目已经安装、运行或产生真实结果。\n```\n\n\n## 角色 / Skill 索引\n\n- 共索引 3 个角色 / Skill / 项目文档条目。\n\n- **contextful**（project_doc）：! contextful cover image docs/cover.svg 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`README.md`\n- **Contextful Dogfood Findings**（project_doc）：These came from running Contextful against its own repo and a temp copy of mcp-doctor . 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/IMPROVEMENTS.md`\n- **Resource Access**（project_doc）：The resource access flow starts in AuthService.login , loads the user profile, then returns dashboard and billing resources. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`tests/fixtures/sample-repo/docs/resources.md`\n\n## 证据索引\n\n- 共索引 27 条证据。\n\n- **contextful**（documentation）：! contextful cover image docs/cover.svg 证据：`README.md`\n- **Package**（package_manifest）：{ \"name\": \"@inferensys/contextful\", \"version\": \"0.1.0\", \"description\": \"Highly efficient context management for agentic AI: search, evidence packs, and memory for Claude Code, Codex, Cursor, Windsurf, and Copilot.\", \"type\": \"module\", \"main\": \"dist/index.js\", \"bin\": { \"cxf\": \"dist/cli.js\", \"contextful\": \"dist/cli.js\" }, \"files\": \"dist\", \"docs\", \"README.md\", \"LICENSE\", \"server.json\", \"tests/fixtures\" , \"scripts\": { \"build\": \"tsc\", \"test\": \"vitest run\", \"check\": \"npm run build && npm run test\", \"prepare\": \"npm run build\", \"prepack\": \"npm run check\" }, \"keywords\": \"mcp\", \"model-context-protocol\", \"context\", \"context-management\", \"token-efficiency\", \"agentic-ai\", \"code-search\", \"evidence-packs\",… 证据：`package.json`\n- **Package**（package_manifest）：{ \"name\": \"sample-context-repo\", \"version\": \"0.1.0\", \"scripts\": { \"test\": \"vitest run\", \"build\": \"tsc\" }, \"dependencies\": { \"zod\": \"^4.0.0\" }, \"devDependencies\": { \"vitest\": \"^4.0.0\" } } 证据：`tests/fixtures/sample-repo/package.json`\n- **License**（source_file）：Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files the \"Software\" , to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 证据：`LICENSE`\n- **Contextful Dogfood Findings**（documentation）：These came from running Contextful against its own repo and a temp copy of mcp-doctor . 证据：`docs/IMPROVEMENTS.md`\n- **Resource Access**（documentation）：The resource access flow starts in AuthService.login , loads the user profile, then returns dashboard and billing resources. 证据：`tests/fixtures/sample-repo/docs/resources.md`\n- **Server**（structured_config）：{ \"$schema\": \"https://static.modelcontextprotocol.io/schemas/2025-12-11/server.schema.json\", \"name\": \"io.github.Inferensys/contextful\", \"title\": \"Contextful\", \"description\": \"Efficient context management: code search, evidence packs, and memory for coding agents.\", \"version\": \"0.1.0\", \"websiteUrl\": \"https://inferensys.github.io/contextful/\", \"repository\": { \"url\": \"https://github.com/Inferensys/contextful\", \"source\": \"github\" }, \"packages\": { \"registryType\": \"npm\", \"identifier\": \"@inferensys/contextful\", \"version\": \"0.1.0\", \"transport\": { \"type\": \"stdio\" } } } 证据：`server.json`\n- **Tsconfig**（structured_config）：{ \"compilerOptions\": { \"target\": \"ES2022\", \"module\": \"Node16\", \"moduleResolution\": \"Node16\", \"outDir\": \"./dist\", \"rootDir\": \"./src\", \"strict\": true, \"esModuleInterop\": true, \"skipLibCheck\": true, \"forceConsistentCasingInFileNames\": true, \"declaration\": true, \"sourceMap\": true }, \"include\": \"src/ / \" , \"exclude\": \"node modules\", \"dist\", \"tests\" } 证据：`tsconfig.json`\n- **.gitignore**（source_file）：node modules/ dist/ .contextful/ coverage/ .DS Store .tgz 证据：`.gitignore`\n- **Cover**（source_file）： 证据：`docs/cover.svg`\n- **Index**（source_file）：Contextful - Efficient Context Management for Agentic AI :root { color-scheme: light; --ink: 141414; --muted: 5c6470; --line: d9dde3; --paper: ffffff; --soft: f6f7f9; --accent: 0f766e; --accent-2: 92400e; --code: 101418; } 证据：`docs/index.html`\n- **!/usr/bin/env node**（source_file）：import process from \"node:process\"; import { Command } from \"commander\"; import { indexWorkspace, watchWorkspace } from \"./indexer.js\"; import { writeLesson } from \"./memory.js\"; import { generateReport, renderEvidencePackMarkdown, renderReport } from \"./report.js\"; import { createContextPack, searchContext } from \"./search.js\"; import { runMcpServer } from \"./mcp-server.js\"; import { type ReportFormat } from \"./types.js\"; 证据：`src/cli.ts`\n- **Db**（source_file）：import Database from \"better-sqlite3\"; import { dbPathFor, ensureDir, hashText, nowIso, stateDirFor } from \"./util.js\"; import { type ChunkRecord, type GraphEdge, type IndexedFile, type MemoryRecord, type SearchIntent, SCHEMA VERSION } from \"./types.js\"; 证据：`src/db.ts`\n- **Extract**（source_file）：import path from \"node:path\"; import { detectLanguage } from \"./language.js\"; import { type ChunkRecord, type ExtractedFacts, type GraphEdge, type SymbolRecord } from \"./types.js\"; import { estimateTokens, excerpt, lineRange } from \"./util.js\"; 证据：`src/extract.ts`\n- **Files**（source_file）：import fs from \"node:fs\"; import path from \"node:path\"; import fg from \"fast-glob\"; import ignore from \"ignore\"; import { type DiscoveredFile } from \"./types.js\"; import { isLikelyBinary } from \"./util.js\"; 证据：`src/files.ts`\n- **Index**（source_file）：export { indexWorkspace, watchWorkspace } from \"./indexer.js\"; export { createContextPack, searchContext, searchCode, traceGraph, tracePath, impactAnalysis, whyChanged, getIndexStatus, classifyQuery } from \"./search.js\"; export { writeLesson, recallMemory } from \"./memory.js\"; export { generateReport, renderReport } from \"./report.js\"; export { runMcpServer } from \"./mcp-server.js\"; export type { ContextReport, EvidencePack, GraphPath, IndexResult, IndexStatus, MemoryRecord, SearchHit, SearchIntent } from \"./types.js\"; 证据：`src/index.ts`\n- **Indexer**（source_file）：import { execFileSync } from \"node:child process\"; import fs from \"node:fs\"; import path from \"node:path\"; import { closeKernelDb, count, getFileHashes, insertChunk, insertEdge, insertFile, insertSymbol, listMemories, openKernelDb, rebuildAdjacencyCache, resetIndex, setMemoryStatus, setMeta } from \"./db.js\"; import { discoverWorkspaceFiles } from \"./files.js\"; import { detectLanguage, isSupportedPrimaryLanguage } from \"./language.js\"; import { extractFileFacts } from \"./extract.js\"; import { probeTreeSitterBackend } from \"./parser-backend.js\"; import { type IndexResult, SCHEMA VERSION } from \"./types.js\"; import { hashText, nowIso, resolveWorkspace, stateDirFor } from \"./util.js\"; 证据：`src/indexer.ts`\n- **Language**（source_file）：const EXTENSIONS: Record = { \".ts\": \"typescript\", \".tsx\": \"typescript\", \".mts\": \"typescript\", \".cts\": \"typescript\", \".js\": \"javascript\", \".jsx\": \"javascript\", \".mjs\": \"javascript\", \".cjs\": \"javascript\", \".py\": \"python\", \".go\": \"go\", \".rs\": \"rust\", \".json\": \"json\", \".jsonc\": \"json\", \".md\": \"markdown\", \".mdx\": \"markdown\", \".yml\": \"yaml\", \".yaml\": \"yaml\", \".toml\": \"toml\" }; 证据：`src/language.ts`\n- **Mcp Server**（source_file）：import { McpServer } from \"@modelcontextprotocol/sdk/server/mcp.js\"; import { StdioServerTransport } from \"@modelcontextprotocol/sdk/server/stdio.js\"; import { z } from \"zod\"; import { recallMemory, writeLesson } from \"./memory.js\"; import { createContextPack, impactAnalysis, searchCode, tracePath, whyChanged } from \"./search.js\"; 证据：`src/mcp-server.ts`\n- **Memory**（source_file）：import { closeKernelDb, insertMemory, openKernelDb } from \"./db.js\"; import { ensureIndexed, searchContext } from \"./search.js\"; import { type MemoryRecord, type SearchHit } from \"./types.js\"; import { nowIso, resolveWorkspace, shortHash } from \"./util.js\"; 证据：`src/memory.ts`\n- **Parser Backend**（source_file）：import { createRequire } from \"node:module\"; import Parser from \"web-tree-sitter\"; 证据：`src/parser-backend.ts`\n- **Report**（source_file）：import { closeKernelDb, openKernelDb, listMemories } from \"./db.js\"; import { getIndexStatus } from \"./search.js\"; import { type ContextReport, type ReportFormat } from \"./types.js\"; import { estimateTokens, redactText, resolveWorkspace } from \"./util.js\"; 证据：`src/report.ts`\n- **Search**（source_file）：import fs from \"node:fs\"; import { execFileSync } from \"node:child process\"; import { addQuery, closeKernelDb, count, getMeta, memoryFromRow, openKernelDb, saveEvidencePack } from \"./db.js\"; import { indexWorkspace } from \"./indexer.js\"; import { type EvidencePack, type GraphPath, type IndexStatus, type SearchHit, type SearchIntent, type SymbolRecord, SCHEMA VERSION } from \"./types.js\"; import { clamp, dbPathFor, estimateTokens, excerpt, nowIso, redactText, resolveWorkspace, shortHash } from \"./util.js\"; 证据：`src/search.ts`\n- **Types**（source_file）：export const SCHEMA VERSION = 1; export const STATE DIR = \".contextful\"; 证据：`src/types.ts`\n- **Util**（source_file）：import crypto from \"node:crypto\"; import fs from \"node:fs\"; import path from \"node:path\"; import { STATE DIR } from \"./types.js\"; 证据：`src/util.ts`\n- **Context Kernel.Test**（source_file）：import { execFileSync } from \"node:child process\"; import fs from \"node:fs\"; import os from \"node:os\"; import path from \"node:path\"; import { fileURLToPath } from \"node:url\"; import { Client } from \"@modelcontextprotocol/sdk/client/index.js\"; import { StdioClientTransport } from \"@modelcontextprotocol/sdk/client/stdio.js\"; import { afterEach, describe, expect, it } from \"vitest\"; import { createContextPack, generateReport, getIndexStatus, indexWorkspace, recallMemory, searchContext, whyChanged, writeLesson } from \"../src/index.js\"; 证据：`tests/context-kernel.test.ts`\n- **Vitest.Config**（source_file）：import { defineConfig } from \"vitest/config\"; 证据：`vitest.config.ts`\n\n## 宿主 AI 必须遵守的规则\n\n- **把本资产当作开工前上下文，而不是运行环境。**：AI Context Pack 只包含证据化项目理解，不包含目标项目的可执行状态。 证据：`README.md`, `package.json`, `tests/fixtures/sample-repo/package.json`\n- **回答用户时区分可预览内容与必须安装后才能验证的内容。**：安装前体验的消费者价值来自降低误装和误判，而不是伪装成真实运行。 证据：`README.md`, `package.json`, `tests/fixtures/sample-repo/package.json`\n\n## 用户开工前应该回答的问题\n\n- 你准备在哪个宿主 AI 或本地环境中使用它？\n- 你只是想先体验工作流，还是准备真实安装？\n- 你最在意的是安装成本、输出质量、还是和现有规则的冲突？\n\n## 验收标准\n\n- 所有能力声明都能回指到 evidence_refs 中的文件路径。\n- AI_CONTEXT_PACK.md 没有把预览包装成真实运行。\n- 用户能在 3 分钟内看懂适合谁、能做什么、如何开始和风险边界。\n\n---\n\n## Doramagic Context Augmentation\n\nThe following material strengthens the Repomix/AI Context Pack body. Human Manual is only a reading skeleton; pitfall logs become hard operating constraints for the host AI.\n\n## Human Manual Skeleton\n\nUsage rule: this is only a reading path and salience signal, not factual authority. Concrete facts must still come from repo evidence / Claim Graph.\n\nHard rules for the host AI:\n- Do not treat page titles, order, summaries, or importance as project facts.\n- When explaining the Human Manual skeleton, state that it is only a reading path / salience signal.\n- Capability, installation, compatibility, runtime status, and risk judgments must cite repo evidence, source paths, or Claim Graph.\n\n- **Project Introduction**：importance `high`\n  - source_paths: README.md, package.json, src/index.ts\n- **Quick Start Guide**：importance `high`\n  - source_paths: README.md, server.json\n- **High-Level Architecture**：importance `high`\n  - source_paths: README.md, src/mcp-server.ts, src/indexer.ts, src/cli.ts\n- **Runtime Components**：importance `medium`\n  - source_paths: src/mcp-server.ts, src/indexer.ts, src/cli.ts\n- **Search Engine**：importance `high`\n  - source_paths: src/search.ts, src/types.ts, src/util.ts\n- **Context Packs**：importance `high`\n  - source_paths: src/search.ts, src/types.ts, src/extract.ts\n- **Memory Ledger**：importance `high`\n  - source_paths: src/memory.ts, src/db.ts, src/types.ts\n- **Graph Traversal and Analysis**：importance `medium`\n  - source_paths: src/search.ts, src/db.ts, src/types.ts\n\n## Repo Inspection Evidence\n\n- repo_clone_verified: true\n- repo_inspection_verified: true\n- repo_commit: `59bcd22c94d622c51c504a05740222c2d9ef4ad1`\n- inspected_files: `package.json`, `README.md`, `docs/IMPROVEMENTS.md`, `src/parser-backend.ts`, `src/report.ts`, `src/search.ts`, `src/util.ts`, `src/db.ts`, `src/files.ts`, `src/index.ts`, `src/language.ts`, `src/init.ts`, `src/types.ts`, `src/mcp-server.ts`, `src/extract.ts`, `src/memory.ts`, `src/cli.ts`, `src/indexer.ts`\n\nHard rules for the host AI:\n- Without repo_clone_verified=true, do not claim the source code has been read.\n- Without repo_inspection_verified=true, do not turn README/docs/package observations into facts.\n- Without quick_start_verified=true, do not claim the Quick Start has been successfully run.\n\n## Doramagic Pitfall Constraints\n\nThese rules come from Doramagic discovery, validation, or compilation pitfalls. The host AI must treat them as operating constraints, not general background notes.\n\n### Constraint 1: 可能修改宿主 AI 配置\n\n- Trigger: 项目面向 Claude/Cursor/Codex/Gemini/OpenCode 等宿主，或安装命令涉及用户配置目录。\n- Host AI rule: 列出会写入的配置文件、目录和卸载/回滚步骤。\n- Why it matters: 安装可能改变本机 AI 工具行为，用户需要知道写入位置和回滚方法。\n- Evidence: capability.host_targets | github_repo:1240001007 | https://github.com/Inferensys/contextful | host_targets=claude, claude_code\n- Hard boundary: do not present this pitfall as solved, verified, or safe to ignore unless later validation evidence explicitly closes it.\n\n### Constraint 2: 能力判断依赖假设\n\n- Trigger: README/documentation is current enough for a first validation pass.\n- Host AI rule: 将假设转成下游验证清单。\n- Why it matters: 假设不成立时，用户拿不到承诺的能力。\n- Evidence: capability.assumptions | github_repo:1240001007 | https://github.com/Inferensys/contextful | README/documentation is current enough for a first validation pass.\n- Hard boundary: do not present this pitfall as solved, verified, or safe to ignore unless later validation evidence explicitly closes it.\n\n### Constraint 3: 维护活跃度未知\n\n- Trigger: 未记录 last_activity_observed。\n- Host AI rule: 补 GitHub 最近 commit、release、issue/PR 响应信号。\n- Why it matters: 新项目、停更项目和活跃项目会被混在一起，推荐信任度下降。\n- Evidence: evidence.maintainer_signals | github_repo:1240001007 | https://github.com/Inferensys/contextful | last_activity_observed missing\n- Hard boundary: do not present this pitfall as solved, verified, or safe to ignore unless later validation evidence explicitly closes it.\n\n### Constraint 4: 下游验证发现风险项\n\n- Trigger: no_demo\n- Host AI rule: 进入安全/权限治理复核队列。\n- Why it matters: 下游已经要求复核，不能在页面中弱化。\n- Evidence: downstream_validation.risk_items | github_repo:1240001007 | https://github.com/Inferensys/contextful | no_demo; severity=medium\n- Hard boundary: do not present this pitfall as solved, verified, or safe to ignore unless later validation evidence explicitly closes it.\n\n### Constraint 5: 存在评分风险\n\n- Trigger: no_demo\n- Host AI rule: 把风险写入边界卡，并确认是否需要人工复核。\n- Why it matters: 风险会影响是否适合普通用户安装。\n- Evidence: risks.scoring_risks | github_repo:1240001007 | https://github.com/Inferensys/contextful | no_demo; severity=medium\n- Hard boundary: do not present this pitfall as solved, verified, or safe to ignore unless later validation evidence explicitly closes it.\n\n### Constraint 6: issue/PR 响应质量未知\n\n- Trigger: issue_or_pr_quality=unknown。\n- Host AI rule: 抽样最近 issue/PR，判断是否长期无人处理。\n- Why it matters: 用户无法判断遇到问题后是否有人维护。\n- Evidence: evidence.maintainer_signals | github_repo:1240001007 | https://github.com/Inferensys/contextful | issue_or_pr_quality=unknown\n- Hard boundary: do not present this pitfall as solved, verified, or safe to ignore unless later validation evidence explicitly closes it.\n\n### Constraint 7: 发布节奏不明确\n\n- Trigger: release_recency=unknown。\n- Host AI rule: 确认最近 release/tag 和 README 安装命令是否一致。\n- Why it matters: 安装命令和文档可能落后于代码，用户踩坑概率升高。\n- Evidence: evidence.maintainer_signals | github_repo:1240001007 | https://github.com/Inferensys/contextful | release_recency=unknown\n- Hard boundary: do not present this pitfall as solved, verified, or safe to ignore unless later validation evidence explicitly closes it.\n",
      "summary": "Context and operating boundaries for host AI agents.",
      "title": "AI Context Pack"
    },
    "boundary_risk_card": {
      "asset_id": "boundary_risk_card",
      "filename": "BOUNDARY_RISK_CARD.md",
      "markdown": "# Boundary & Risk Card\n\nProject: Inferensys/contextful\n\n## Doramagic Trial Decision\n\nCurrent decision: it can enter pre-publication recommendation checks. First use should still start with least privilege, a temporary directory, and reversible configuration.\n\n## What The User Can Do Now\n\n- Read the Human Manual first to understand the project purpose and main workflows.\n- Use Prompt Preview for pre-install exploration; it validates interaction shape, not real execution.\n- Run official Quick Start commands only inside an isolated environment, not a primary setup.\n\n## Do Not Do Yet\n\n- Do not treat Prompt Preview as a real project execution result.\n- Do not treat metadata-only validation as sandbox installation validation.\n- Do not describe unverified capabilities as supported, working, or safe to install.\n- Do not provide production data, private files, real secrets, or primary host configuration on first trial.\n\n## Pre-Install Checklist\n\n- Host AI match: claude, claude_code\n- Official installation entry status: official entry point found\n- Isolated temporary directory, temporary host, or container validation: required\n- Configuration rollback path: required\n- API keys, network access, file access, or host configuration changes: treat as high risk until confirmed\n- Installation command, actual output, and failure logs: must be recorded\n\n## Current Blockers\n\n- review_required: community_discussion_evidence_below_public_threshold\n\n## Project-Specific Pitfalls\n\n- 可能修改宿主 AI 配置 (medium): 安装可能改变本机 AI 工具行为，用户需要知道写入位置和回滚方法。 Suggested check: 列出会写入的配置文件、目录和卸载/回滚步骤。\n- 能力判断依赖假设 (medium): 假设不成立时，用户拿不到承诺的能力。 Suggested check: 将假设转成下游验证清单。\n- 维护活跃度未知 (medium): 新项目、停更项目和活跃项目会被混在一起，推荐信任度下降。 Suggested check: 补 GitHub 最近 commit、release、issue/PR 响应信号。\n- 下游验证发现风险项 (medium): 下游已经要求复核，不能在页面中弱化。 Suggested check: 进入安全/权限治理复核队列。\n- 存在评分风险 (medium): 风险会影响是否适合普通用户安装。 Suggested check: 把风险写入边界卡，并确认是否需要人工复核。\n\n## Risk And Permission Notes\n\n- no_demo: medium\n\n## Evidence Gaps\n\n- No structured evidence gaps are currently visible.\n",
      "summary": "Installation, permission, validation, and pre-recommendation risks.",
      "title": "Boundary & Risk Card"
    },
    "human_manual": {
      "asset_id": "human_manual",
      "filename": "HUMAN_MANUAL.md",
      "markdown": "# https://github.com/Inferensys/contextful 项目说明书\n\n生成时间：2026-05-16 06:05:31 UTC\n\n## 目录\n\n- [Project Introduction](#project-introduction)\n- [Quick Start Guide](#quick-start)\n- [High-Level Architecture](#high-level-architecture)\n- [Runtime Components](#runtime-components)\n- [Search Engine](#search-engine)\n- [Context Packs](#context-packs)\n- [Memory Ledger](#memory-ledger)\n- [Graph Traversal and Analysis](#graph-traversal)\n- [SQLite Database Schema](#sqlite-database)\n- [Workspace Indexing System](#indexing-system)\n\n<a id='project-introduction'></a>\n\n## Project Introduction\n\n### 相关页面\n\n相关主题：[High-Level Architecture](#high-level-architecture), [Quick Start Guide](#quick-start)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [README.md](https://github.com/Inferensys/contextful/blob/main/README.md)\n- [package.json](https://github.com/Inferensys/contextful/blob/main/package.json)\n- [src/cli.ts](https://github.com/Inferensys/contextful/blob/main/src/cli.ts)\n- [src/extract.ts](https://github.com/Inferensys/contextful/blob/main/src/extract.ts)\n- [src/search.ts](https://github.com/Inferensys/contextful/blob/main/src/search.ts)\n- [src/mcp-server.ts](https://github.com/Inferensys/contextful/blob/main/src/mcp-server.ts)\n- [src/report.ts](https://github.com/Inferensys/contextful/blob/main/src/report.ts)\n</details>\n\n# Project Introduction\n\nContextful is an intelligent code context management system designed to provide AI agents with compact, evidence-backed information for codebase navigation and understanding. The project serves as a bridge between large codebases and AI-powered development tools by indexing source code, extracting symbols, tracking dependencies, and generating token-budgeted evidence packs for queries.\n\n## Purpose and Scope\n\nContextful solves the fundamental problem that AI coding assistants face when working with large repositories: excessive context requirements that lead to token waste and degraded performance. Instead of forcing agents to read dozens of random files, Contextful enables targeted, cited, and ranked context retrieval that maximizes the value of each token spent.\n\nThe system operates in three primary modes:\n\n1. **Indexing Mode** - Scans and indexes source code, extracting symbols, dependencies, and semantic chunks\n2. **Query Mode** - Creates evidence packs for natural language queries with token budgets\n3. **Search Mode** - Provides lightweight search across code, docs, symbols, and memory without full evidence compilation\n\n资料来源：[README.md:1-15]()\n\n## Architecture Overview\n\nThe Contextful system consists of several interconnected components that work together to provide context management capabilities.\n\n```mermaid\ngraph TD\n    A[Source Code] --> B[Indexing Engine]\n    B --> C[SQLite Kernel DB]\n    C --> D[Search Module]\n    C --> E[Graph Analysis]\n    C --> F[Memory Ledger]\n    \n    G[CLI / MCP Server] --> D\n    G --> E\n    G --> F\n    \n    D --> H[Evidence Pack]\n    E --> H\n    F --> H\n    \n    H --> I[AI Agent / User]\n```\n\n### Component Responsibilities\n\n| Component | File | Responsibility |\n|-----------|------|----------------|\n| Indexing Engine | `src/extract.ts` | Parse source files, extract symbols and dependencies |\n| Search Module | `src/search.ts` | Full-text search, intent classification, ranking |\n| Graph Analysis | `src/search.ts` | Trace dependencies and code paths |\n| Memory Ledger | `src/memory.ts` | Store evidence-backed lessons across sessions |\n| CLI Interface | `src/cli.ts` | Command-line interface for all operations |\n| MCP Server | `src/mcp-server.ts` | Model Context Protocol stdio server |\n\n资料来源：[src/extract.ts:1-50](), [src/search.ts:1-30](), [src/cli.ts:1-40]()\n\n## Supported Languages and File Types\n\nContextful supports multiple programming languages through pattern-based extraction. The indexing engine recognizes language-specific syntax for symbols and dependencies.\n\n### Language Support Matrix\n\n| Language | Functions | Classes | Types | Imports |\n|----------|-----------|---------|-------|---------|\n| TypeScript/JavaScript | ✓ | ✓ | ✓ | ✓ |\n| Python | ✓ | ✓ | - | ✓ |\n| Go | ✓ | ✓ | ✓ | ✓ |\n| Rust | ✓ | ✓ | ✓ | ✓ |\n| Markdown | - | - | Headings | - |\n| JSON | - | - | Config keys | - |\n\n资料来源：[src/extract.ts:15-80]()\n\n## Core MCP Tools\n\nContextful exposes its capabilities through the Model Context Protocol (MCP), providing AI agents with a standardized tool interface. The primary tools are designed to keep the agent surface small while providing maximum utility.\n\n```mermaid\ngraph LR\n    A[Agent] -->|context_pack| B[Evidence Pack Generator]\n    A -->|search_code| C[Code Search]\n    A -->|trace_path| D[Graph Traversal]\n    A -->|impact_analysis| E[Dependency Analyzer]\n    A -->|why_changed| F[Git History]\n    A -->|recall_memory| G[Memory Search]\n    A -->|write_lesson| H[Lesson Writer]\n```\n\n### Tool Descriptions\n\n| Tool | Purpose | Key Parameters |\n|------|---------|----------------|\n| `context_pack` | Returns ranked, cited, token-budgeted context bundles | `query`, `budget`, `scope` |\n| `search_code` | Powerful search across code, docs, symbols, and memory | `query`, `mode`, `filters` |\n| `trace_path` | Graph traversal across files, symbols, modules, and config | `from`, `to`, `edge_types` |\n| `impact_analysis` | Reverse dependencies and likely tests | `symbol_or_file` |\n| `why_changed` | Current evidence plus git history | `symbol_or_file` |\n| `recall_memory` | Search session learnings and durable lessons | `query`, `scope` |\n| `write_lesson` | Store evidence-backed lessons | `claim`, `evidence_refs`, `confidence` |\n\n资料来源：[README.md:25-45](), [src/mcp-server.ts:1-80]()\n\n## CLI Interface\n\nContextful provides a command-line interface through the `cxf` binary (with `contextful` as a readable alias). The CLI supports both one-shot operations and daemon mode for continuous indexing.\n\n### Command Reference\n\n| Command | Description | Key Options |\n|---------|-------------|-------------|\n| `index` | Index a workspace | `--workspace`, `--watch` |\n| `daemon` | Run local indexing daemon | `--workspace` |\n| `query` | Create evidence pack for query | `--workspace`, `--budget`, `--json` |\n| `search` | Search without full evidence pack | `--workspace`, `--limit`, `--kind` |\n| `report` | Generate context report | `--workspace`, `--format` |\n| `memory add` | Store evidence-backed lesson | `--claim`, `--evidence`, `--scope`, `--confidence` |\n| `server` | Run MCP stdio server | - |\n\n资料来源：[src/cli.ts:40-120](), [README.md:15-35]()\n\n### Example Usage\n\n```bash\n# Index a workspace\nnpx @inferensys/contextful index --workspace .\n\n# Query with token budget\nnpx @inferensys/contextful query \"where is user auth handled\" --workspace . --budget 2000\n\n# Run as MCP server\nnpx @inferensys/contextful server\n```\n\n资料来源：[README.md:8-15]()\n\n## Data Models\n\n### Evidence Pack Structure\n\nThe `EvidencePack` is the core data structure returned by query operations. It contains all necessary context for an agent to answer a query.\n\n```typescript\ninterface EvidencePack {\n  id: string;                    // Unique pack identifier\n  query: string;                 // Original query\n  scope: string;                 // Scope of the context\n  intent: SearchIntent;          // Classified query intent\n  summary: string;               // Human-readable summary\n  citations: SearchHit[];        // Ranked evidence items\n  files: FileContext[];          // Grouped file references\n  symbols: SymbolRecord[];       // Relevant symbols\n  graphPaths: GraphPath[];       // Dependency paths\n  memoryHits: SearchHit[];       // Memory matches\n  confidence: number;            // Confidence score (0.1-0.92)\n  tokenEstimate: number;         // Estimated token count\n  budget: number;                // Token budget\n  createdAt: string;             // ISO timestamp\n}\n```\n\n资料来源：[src/search.ts:200-250]()\n\n### Search Hit Structure\n\nEach search result is represented as a `SearchHit` with relevance ranking and excerpt information.\n\n| Field | Type | Description |\n|-------|------|-------------|\n| `ref` | string | Reference identifier (e.g., `file:src/auth.ts:1-20`) |\n| `path` | string | File path |\n| `title` | string | Display title |\n| `excerpt` | string | Relevant text snippet |\n| `kind` | string | Type: `code`, `doc`, `symbol`, `memory` |\n| `rank` | number | BM25 relevance score |\n\n资料来源：[src/search.ts:50-80]()\n\n## Dependencies and Technology Stack\n\nContextful is built on a carefully selected set of dependencies that enable efficient code indexing and search.\n\n| Dependency | Version | Purpose |\n|------------|---------|---------|\n| `@modelcontextprotocol/sdk` | ^1.29.0 | MCP protocol implementation |\n| `better-sqlite3` | ^12.10.0 | SQLite database for indexing |\n| `commander` | ^14.0.3 | CLI argument parsing |\n| `fast-glob` | ^3.3.3 | File pattern matching |\n| `tree-sitter-wasms` | ^0.1.13 | Syntax parsing |\n| `web-tree-sitter` | ^0.20.8 | Tree-sitter bindings |\n| `zod` | ^4.4.3 | Schema validation |\n\n资料来源：[package.json:20-40]()\n\n### System Requirements\n\n- **Node.js**: >= 20\n- **License**: MIT\n- **Repository**: [inferensys/contextful](https://github.com/Inferensys/contextful)\n\n资料来源：[package.json:45-55]()\n\n## Supported IDE Integration\n\nContextful is designed to integrate with a wide range of AI-powered development tools:\n\n| IDE/Extension | Status |\n|---------------|--------|\n| GitHub Copilot | Supported |\n| VS Code | Supported |\n| Cursor | Supported |\n| Windsurf | Supported |\n| Cline | Supported |\n| Roo Code | Supported |\n| Continue | Supported |\n| Zed | Supported |\n\n资料来源：[package.json:10-20]()\n\n## Workflow: From Indexing to Query\n\nThe complete workflow demonstrates how Contextful transforms raw source code into actionable intelligence for AI agents.\n\n```mermaid\nsequenceDiagram\n    participant U as User/Agent\n    participant CLI as CLI/MCP Server\n    participant IDX as Indexer\n    participant DB as SQLite Kernel\n    participant SRCH as Search Engine\n    participant MEM as Memory Ledger\n\n    U->>CLI: index --workspace ./project\n    CLI->>IDX: Extract symbols & dependencies\n    IDX->>DB: Store in chunks_fts, symbols, edges\n    DB-->>CLI: Index complete\n\n    U->>CLI: query \"how is auth handled\"\n    CLI->>SRCH: classifyQuery() intent=exact\n    SRCH->>DB: FTS + BM25 search\n    DB-->>SRCH: Ranked hits\n    SRCH->>MEM: Check memory ledger\n    MEM-->>SRCH: Related lessons\n    CLI-->>U: EvidencePack (token-budgeted)\n\n    U->>CLI: write_lesson --claim \"Auth pattern\" --evidence file:...\n    CLI->>MEM: Store lesson with confidence\n    MEM-->>CLI: Lesson saved\n```\n\n资料来源：[src/search.ts:100-150](), [src/report.ts:80-120]()\n\n## Next Steps\n\nTo continue exploring Contextful:\n\n1. **Installation Guide** - Set up Contextful in your development environment\n2. **CLI Reference** - Detailed documentation of all CLI commands\n3. **MCP Tools API** - Complete reference for MCP tool interfaces\n4. **Configuration** - Workspace configuration and tuning options\n5. **Memory System** - Using the evidence-backed lesson system\n\n---\n\n<a id='quick-start'></a>\n\n## Quick Start Guide\n\n### 相关页面\n\n相关主题：[Project Introduction](#project-introduction)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [README.md](https://github.com/Inferensys/contextful/blob/main/README.md)\n- [src/cli.ts](https://github.com/Inferensys/contextful/blob/main/src/cli.ts)\n- [src/mcp-server.ts](https://github.com/Inferensys/contextful/blob/main/src/mcp-server.ts)\n- [src/search.ts](https://github.com/Inferensys/contextful/blob/main/src/search.ts)\n- [src/report.ts](https://github.com/Inferensys/contextful/blob/main/src/report.ts)\n</details>\n\n# Quick Start Guide\n\n## Overview\n\nContextful is a contextual indexing and search system designed to help AI agents efficiently retrieve relevant code evidence. Instead of forcing agents to perform dozens of random file reads, Contextful returns compact, ranked, and cited evidence packs that fit within a token budget.\n\n资料来源：[README.md:1-10]()\n\n## Installation\n\nInstall Contextful using npm. The package provides both the `cxf` binary and the full `contextful` alias.\n\n```bash\nnpm install -g @inferensys/contextful\n```\n\nAlternatively, run commands directly via `npx`:\n\n```bash\nnpx @inferensys/contextful index --workspace .\n```\n\n资料来源：[README.md:11-14]()\n\n## CLI Commands\n\nContextful provides a command-line interface with the following primary commands:\n\n| Command | Description |\n|---------|-------------|\n| `cxf index` | Index a workspace for search |\n| `cxf daemon` | Run a local indexing daemon |\n| `cxf query` | Create an evidence pack for a query |\n| `cxf search` | Search indexed context |\n| `cxf report` | Generate a context report |\n| `cxf memory add` | Store an evidence-backed lesson |\n| `cxf server` | Run the MCP stdio server |\n\n资料来源：[README.md:23-32]()\n\n## Basic Workflow\n\n### Step 1: Index Your Workspace\n\nBefore searching, you must index your codebase. This creates the searchable database:\n\n```bash\ncxf index --workspace .\n```\n\nFor continuous indexing as files change, use the daemon mode:\n\n```bash\ncxf daemon --workspace .\n```\n\n资料来源：[src/cli.ts:1-20]()\n\n### Step 2: Query for Context\n\nOnce indexed, ask questions about your codebase:\n\n```bash\ncxf query \"where is user auth handled\" --workspace . --budget 2000\n```\n\nThe `query` command returns a ranked evidence pack with citations and file references.\n\n#### Query Options\n\n| Option | Description | Default |\n|--------|-------------|---------|\n| `--workspace <path>` | Workspace path | Current directory |\n| `--budget <tokens>` | Approximate token budget | 2000 |\n| `--json` | Output as JSON instead of Markdown | false |\n\n资料来源：[src/cli.ts:22-30]()\n\n### Step 3: Search Without Building Evidence Packs\n\nFor quick lookups without compiling full evidence packs, use `search`:\n\n```bash\ncxf search \"authentication middleware\" --workspace . --limit 10 --kind code\n```\n\n#### Search Options\n\n| Option | Description | Default |\n|--------|-------------|---------|\n| `--workspace <path>` | Workspace path | Current directory |\n| `--limit <count>` | Maximum hits | 10 |\n| `--kind` | Filter: `all`, `code`, `docs`, `symbols`, `memory` | `all` |\n\n资料来源：[src/cli.ts:32-42]()\n\n### Step 4: Generate Reports\n\nGenerate comprehensive context reports in various formats:\n\n```bash\ncxf report --workspace . --format markdown\ncxf report --workspace . --format json\ncxf report --workspace . --format html\n```\n\n资料来源：[src/cli.ts:44-48]()\n\n## MCP Server Integration\n\nContextful can run as a Model Context Protocol (MCP) server, providing tools directly to AI agents.\n\n```bash\ncxf server\n```\n\n### Available MCP Tools\n\n| Tool | Purpose |\n|------|---------|\n| `context_pack` | Returns ranked, cited, token-budgeted evidence bundles |\n| `search_code` | Code, docs, symbol, and memory search |\n| `trace_path` | Graph traversal across files, symbols, modules, and config |\n| `impact_analysis` | Reverse dependencies and likely tests |\n| `why_changed` | Current evidence plus git history |\n| `recall_memory` | Search session learnings and durable project lessons |\n| `write_lesson` | Store evidence-backed lessons for future sessions |\n\n资料来源：[README.md:40-48]()\n\n### MCP Tool Parameters\n\n#### context_pack\n\n| Parameter | Type | Required | Description |\n|-----------|------|----------|-------------|\n| `query` | string | Yes | Query to answer from indexed context |\n| `budget` | number | No | Token budget for the response |\n| `scope` | string | No | Search scope |\n\n资料来源：[src/mcp-server.ts:1-25]()\n\n#### search_code\n\n| Parameter | Type | Required | Description |\n|-----------|------|----------|-------------|\n| `query` | string | Yes | Search query |\n| `mode` | string | No | Search mode |\n| `filters` | object | No | Search filters |\n| `workspace` | string | No | Workspace path |\n| `limit` | number | No | Maximum results |\n\n资料来源：[src/mcp-server.ts:26-40]()\n\n#### write_lesson\n\n| Parameter | Type | Required | Description |\n|-----------|------|----------|-------------|\n| `claim` | string | Yes | Lesson claim |\n| `evidence_refs` | array | Yes | Evidence references (e.g., `file:src/auth.ts:1-20`) |\n| `scope` | string | No | Memory scope |\n| `confidence` | number | No | Confidence from 0 to 1 |\n| `supersedes` | string | No | Previous lesson ID to supersede |\n\n资料来源：[src/mcp-server.ts:65-80]()\n\n## Memory System\n\nContextful includes an evidence-backed memory system for storing lessons across sessions.\n\n### Adding a Lesson\n\n```bash\ncxf memory add \\\n  --claim \"Always validate tokens in middleware\" \\\n  --evidence \"file:src/auth.ts:1-20\" \\\n  --workspace . \\\n  --confidence 0.8\n```\n\n#### Memory Command Options\n\n| Option | Required | Description |\n|--------|----------|-------------|\n| `--claim <text>` | Yes | The lesson or claim |\n| `--evidence <ref...>` | Yes | Evidence references |\n| `--workspace <path>` | No | Workspace path |\n| `--scope <scope>` | No | Memory scope (default: `repo`) |\n| `--confidence <number>` | No | Confidence from 0 to 1 (default: 0.7) |\n\n资料来源：[src/cli.ts:50-75]()\n\n## Output Formats\n\n### Markdown Output (Default)\n\n```bash\ncxf query \"where is auth handled\" --workspace .\n```\n\nReturns a formatted Markdown document with citations and graph paths.\n\n### JSON Output\n\n```bash\ncxf query \"where is auth handled\" --workspace . --json\n```\n\nReturns structured JSON data suitable for programmatic processing.\n\n资料来源：[src/cli.ts:22-30]()\n\n### Report Formats\n\n| Format | Description |\n|--------|-------------|\n| `markdown` | Human-readable Markdown report |\n| `json` | Structured JSON data |\n| `html` | Standalone HTML page |\n\n资料来源：[src/cli.ts:44-48]()\n\n## Architecture Overview\n\n```mermaid\ngraph TD\n    A[CLI / MCP Server] --> B[Workspace Indexer]\n    B --> C[SQLite Kernel DB]\n    C --> D[Full-Text Search]\n    C --> E[Symbol Index]\n    C --> F[Graph Edges]\n    G[Query Request] --> H[Search Context]\n    H --> I[Evidence Pack Builder]\n    I --> D\n    I --> E\n    I --> F\n    I --> J[Memory Ledger]\n    I --> K[Evidence Pack Output]\n    J --> J\n```\n\n## Common Usage Patterns\n\n### Pattern 1: Initial Setup\n\n```bash\n# Index the workspace\ncxf index --workspace /path/to/project --watch\n\n# Generate initial report\ncxf report --workspace /path/to/project --format html > report.html\n```\n\n### Pattern 2: Interactive Exploration\n\n```bash\n# Run as MCP server\ncxf server\n\n# Or use CLI directly\ncxf query \"how does the cache work\" --workspace . --budget 3000\n```\n\n### Pattern 3: Agent Memory Persistence\n\n```bash\n# Store learned lessons\ncxf memory add --claim \"Config validation happens in validate.ts\" --evidence \"file:src/config/validate.ts:1-50\"\n\n# Recall past lessons\n# Via MCP: recall_memory(query=\"config validation\")\n```\n\n## Next Steps\n\n- Explore [Architecture Documentation](architecture) for deep dive into indexing and search internals\n- Learn about [Memory System](memory) for evidence-backed knowledge persistence\n- Review [API Reference](api) for programmatic integration\n\n---\n\n<a id='high-level-architecture'></a>\n\n## High-Level Architecture\n\n### 相关页面\n\n相关主题：[Runtime Components](#runtime-components), [Search Engine](#search-engine), [SQLite Database Schema](#sqlite-database)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [README.md](https://github.com/Inferensys/contextful/blob/main/README.md)\n- [src/cli.ts](https://github.com/Inferensys/contextful/blob/main/src/cli.ts)\n- [src/extract.ts](https://github.com/Inferensys/contextful/blob/main/src/extract.ts)\n- [src/search.ts](https://github.com/Inferensys/contextful/blob/main/src/search.ts)\n- [src/indexer.ts](https://github.com/Inferensys/contextful/blob/main/src/indexer.ts)\n- [src/report.ts](https://github.com/Inferensys/contextful/blob/main/src/report.ts)\n- [src/util.ts](https://github.com/Inferensys/contextful/blob/main/src/util.ts)\n</details>\n\n# High-Level Architecture\n\nContextful is a local-only indexing and context management tool designed to help AI coding assistants retrieve compact, evidence-backed context from workspace codebases. The system operates without external embedding APIs, instead relying on SQLite FTS5 full-text search, graph-based dependency tracking, and intent-classified query routing. 资料来源：[README.md](https://github.com/Inferensys/contextful/blob/main/README.md)\n\n## System Overview\n\nContextful functions as a local daemon that continuously indexes workspace files, extracts code symbols and import relationships, and provides a structured context pack API to agents. The architecture follows a three-layer design:\n\n1. **Indexing Layer** - File parsing, symbol extraction, edge detection\n2. **Storage Layer** - SQLite kernel with FTS5 search and graph tables\n3. **Query Layer** - Intent classification, ranked search, evidence pack assembly\n\n资料来源：[src/indexer.ts](https://github.com/Inferensys/contextful/blob/main/src/indexer.ts)\n\n## Component Architecture\n\n```mermaid\ngraph TD\n    A[Workspace Files] --> B[Indexer]\n    B --> C[Symbol Extraction]\n    B --> D[Edge Detection]\n    B --> E[Chunk Generation]\n    C --> F[SQLite Kernel DB]\n    D --> F\n    E --> F\n    G[CLI / MCP Server] --> H[Search Module]\n    H --> F\n    H --> I[Context Pack Assembly]\n    I --> J[Evidence Pack Output]\n```\n\n### Core Components\n\n| Component | File | Responsibility |\n|-----------|------|----------------|\n| Indexer | `src/indexer.ts` | Recursively walks workspace, triggers file processing |\n| Extractor | `src/extract.ts` | Parses symbols, edges, and code chunks per file |\n| Search | `src/search.ts` | FTS5 queries, intent classification, ranking |\n| CLI | `src/cli.ts` | Command-line interface and MCP server entry point |\n| Report | `src/report.ts` | Generates workspace context reports |\n\n资料来源：[src/indexer.ts](https://github.com/Inferensys/contextful/blob/main/src/indexer.ts), [src/extract.ts](https://github.com/Inferensys/contextful/blob/main/src/extract.ts), [src/search.ts](https://github.com/Inferensys/contextful/blob/main/src/search.ts)\n\n## Indexing Pipeline\n\nThe indexing pipeline processes workspace files through multiple extraction stages. Each source file is read, classified by language, and passed through specialized extractors that produce structured records.\n\n```mermaid\ngraph LR\n    A[File Content] --> B[Language Detection]\n    B --> C[Symbol Extraction]\n    B --> D[Edge Extraction]\n    B --> E[Chunk Extraction]\n    C --> F[symbols table]\n    D --> G[edges table]\n    E --> H[chunks_fts table]\n```\n\n### Symbol Extraction\n\nThe `extractSymbols` function identifies named code entities based on language-specific patterns:\n\n| Language | Supported Symbols |\n|----------|-------------------|\n| TypeScript/JavaScript | functions, classes, interfaces, types, const arrow functions |\n| Python | functions, classes |\n| Go | functions, structs, interfaces |\n| Rust | functions, structs, enums, traits, impl blocks |\n| Markdown | headings |\n| JSON | config keys |\n\n资料来源：[src/extract.ts:1-80](https://github.com/Inferensys/contextful/blob/main/src/extract.ts)\n\n### Edge Detection\n\nImport relationships are tracked as directed edges between modules. The `extractEdges` function processes different import syntaxes per language:\n\n- **TypeScript/JavaScript**: ES6 `import` and `require()` statements\n- **Python**: `from ... import` and `import` statements\n- **Go**: Import strings within double quotes\n- **Rust**: `use` and `mod` declarations\n- **JSON**: Top-level keys in configuration files\n\n资料来源：[src/extract.ts:100-160](https://github.com/Inferensys/contextful/blob/main/src/extract.ts)\n\n### Chunk Generation\n\nCode files are split into semantic chunks for full-text search. The `codeChunks` function segments content into logical blocks based on:\n- Empty line boundaries\n- Token budget (target: ~300 tokens per chunk)\n- Language-specific token estimation via `estimateTokens`\n\n资料来源：[src/extract.ts:180-220](https://github.com/Inferensys/contextful/blob/main/src/extract.ts)\n\n## Storage Layer\n\n### SQLite Kernel Schema\n\nThe kernel database uses SQLite with several specialized tables:\n\n| Table | Purpose | Key Columns |\n|-------|---------|-------------|\n| `files` | Tracked workspace files | `path`, `language`, `hash`, `indexed_at` |\n| `symbols` | Extracted code symbols | `ref`, `name`, `kind`, `file_path`, `line`, `signature`, `exported` |\n| `edges` | Import/dependency graph | `source_file`, `target_name`, `target_type`, `edge_type`, `line` |\n| `chunks_fts` | FTS5 virtual table for full-text search | `ref`, `path`, `title`, `text`, `kind` |\n| `memory` | Evidence-backed lessons | `id`, `claim`, `scope`, `confidence`, `created_at` |\n\n资料来源：[src/search.ts](https://github.com/Inferensys/contextful/blob/main/src/search.ts), [src/indexer.ts](https://github.com/Inferensys/contextful/blob/main/src/indexer.ts)\n\n## Query and Search System\n\n### Intent Classification\n\nQueries are classified into intents to optimize search strategy:\n\n| Intent | Trigger Keywords | Search Focus |\n|--------|------------------|--------------|\n| `code` | `function`, `class`, `implementation` | Symbol and code chunks |\n| `memory` | `memory`, `lesson`, `session` | Memory ledger |\n| `impact` | `impact`, `depends on`, `blast radius` | Dependency graph |\n| `historical` | `why`, `changed`, `commit` | Git history |\n| `architectural` | `architecture`, `flow`, `path`, `trace` | Graph traversal |\n| `docs` | `documentation`, `readme`, `guide` | Markdown chunks |\n| `exact` | symbols, paths, line references | Precise symbol matching |\n| `vague` | Default fallback | Broad FTS search |\n\n资料来源：[src/search.ts:1-50](https://github.com/Inferensys/contextful/blob/main/src/search.ts)\n\n### Context Pack Assembly\n\nThe `createContextPack` function orchestrates the evidence gathering:\n\n1. Classify query intent\n2. Execute FTS5 search across chunks\n3. Apply query expansion with domain-specific term additions\n4. Score and rank hits using BM25 with intent-based bonuses\n5. Select hits within token budget\n6. Load related symbols and graph paths\n7. Assemble and return `EvidencePack`\n\n资料来源：[src/search.ts:200-280](https://github.com/Inferensys/contextful/blob/main/src/search.ts)\n\n## CLI and MCP Integration\n\n### Command Structure\n\n| Command | Purpose | Key Options |\n|---------|---------|-------------|\n| `index` | Initial workspace indexing | `--workspace`, `--watch` |\n| `daemon` | Continuous indexing with file watching | `--workspace` |\n| `query` | Generate evidence pack | `--workspace`, `--budget`, `--json` |\n| `search` | Direct search without packing | `--workspace`, `--limit`, `--kind` |\n| `report` | Generate context report | `--workspace`, `--format` |\n| `memory add` | Store evidence-backed lessons | `--claim`, `--evidence`, `--scope` |\n| `server` | Start MCP stdio server | (none) |\n\n资料来源：[src/cli.ts:20-100](https://github.com/Inferensys/contextful/blob/main/src/cli.ts)\n\n### MCP Server Tools\n\nThe MCP server exposes standardized tools for agent integration:\n\n- `context_pack(query, budget, scope)` - Primary killer tool returning ranked, cited evidence\n- `search_code(query, mode, filters)` - Code, docs, symbol, and memory search\n- `trace_path(from, to, edge_types)` - Graph traversal across the codebase\n- `impact_analysis(symbol_or_file)` - Reverse dependency analysis\n- `why_changed(symbol_or_file)` - Git history with current evidence\n- `recall_memory(query, scope)` - Search persistent lessons\n- `write_lesson(claim, evidence_refs, scope)` - Store new memories\n\n资料来源：[README.md](https://github.com/Inferensys/contextful/blob/main/README.md)\n\n## Report Generation\n\nThe report system aggregates workspace statistics and warnings:\n\n```mermaid\ngraph TD\n    A[generateReport] --> B[Index Status Check]\n    B --> C[File Statistics]\n    B --> D[Symbol Statistics]\n    B --> E[Edge Statistics]\n    B --> F[Warning Collection]\n    C --> G[renderMarkdown / renderHtml]\n    D --> G\n    E --> G\n    F --> G\n```\n\nReports support three output formats:\n- **markdown** - Plain text with markdown headings\n- **json** - Structured JSON with all report fields\n- **html** - Self-contained HTML document with styling\n\n资料来源：[src/report.ts:1-80](https://github.com/Inferensys/contextful/blob/main/src/report.ts)\n\n## Privacy and Security\n\nContextful operates entirely locally with no external API calls:\n\n- No embedding API calls for vector search\n- No source code uploads\n- No file editing or auto-fixes\n- No dependency installation in target workspace\n\nEvidence references are validated and stale references are rejected to maintain integrity of the memory system.\n\n资料来源：[README.md](https://github.com/Inferensys/contextful/blob/main/README.md)\n\n## Data Flow Summary\n\n```mermaid\nsequenceDiagram\n    participant User\n    participant CLI as CLI/MCP Server\n    participant Indexer\n    participant Extractor\n    participant Search\n    participant Kernel as SQLite Kernel\n    \n    User->>CLI: index --workspace .\n    CLI->>Indexer: indexWorkspace()\n    Indexer->>Extractor: extractFile()\n    Extractor->>Kernel: Insert symbols, edges, chunks\n    Kernel-->>Indexer: Confirmation\n    \n    User->>CLI: query \"where is auth handled\"\n    CLI->>Search: searchContext()\n    Search->>Kernel: FTS5 query\n    Search->>Kernel: Graph traversal\n    Search->>Kernel: Memory search\n    Kernel-->>Search: Ranked hits\n    Search-->>CLI: EvidencePack\n    CLI-->>User: Compact context output\n```\n\n## Key Design Decisions\n\n| Decision | Rationale |\n|----------|-----------|\n| SQLite FTS5 over vector embeddings | Local-only operation, no external API dependencies |\n| Intent-based query routing | Optimizes search strategy based on query semantics |\n| BM25 scoring with bonuses | Balances relevance with domain-specific priorities |\n| Token-budgeted evidence packs | Prevents context overflow in LLM contexts |\n| Evidence refs as first-class citizens | Enables verifiable, traceable AI responses |\n\n资料来源：[src/search.ts:50-150](https://github.com/Inferensys/contextful/blob/main/src/search.ts), [src/util.ts](https://github.com/Inferensys/contextful/blob/main/src/util.ts)\n\n---\n\n<a id='runtime-components'></a>\n\n## Runtime Components\n\n### 相关页面\n\n相关主题：[High-Level Architecture](#high-level-architecture)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [src/cli.ts](https://github.com/Inferensys/contextful/blob/main/src/cli.ts)\n- [src/indexer.ts](https://github.com/Inferensys/contextful/blob/main/src/indexer.ts)\n- [src/search.ts](https://github.com/Inferensys/contextful/blob/main/src/search.ts)\n- [src/report.ts](https://github.com/Inferensys/contextful/blob/main/src/report.ts)\n- [src/util.ts](https://github.com/Inferensys/contextful/blob/main/src/util.ts)\n</details>\n\n# Runtime Components\n\n## Overview\n\nThe **Runtime Components** in Contextful encompass the services, daemons, and server processes that enable real-time code indexing, search, and context-aware information retrieval. These components operate as the execution layer of the application, providing persistent indexing, live workspace monitoring, and MCP (Model Context Protocol) server capabilities for AI agent integration.\n\nThe runtime layer bridges the gap between static code analysis and dynamic query resolution, allowing users and AI agents to query indexed repositories with token-budgeted evidence packs.\n\n---\n\n## Core Runtime Services\n\n### Indexing Daemon\n\nThe **Indexing Daemon** provides continuous workspace monitoring and automatic re-indexing when file changes are detected.\n\n#### Architecture\n\n```mermaid\ngraph TD\n    A[File System] -->|fs.watch| B[Debounce Timer]\n    B --> C{500ms elapsed?}\n    C -->|Yes| D[indexWorkspace]\n    D --> E[Kernel DB Update]\n    C -->|No| B\n    A -->|Initial| F[First Index]\n    F --> E\n```\n\n#### Key Functions\n\n| Function | Purpose | Location |\n|----------|---------|----------|\n| `watchWorkspace` | Monitors filesystem changes and triggers re-indexing | `src/indexer.ts:1-15` |\n| `indexWorkspace` | Performs full or incremental workspace indexing | `src/indexer.ts` |\n\n#### Implementation Details\n\nThe daemon uses Node.js `fs.watch()` with a 500ms debounce timer to batch rapid file changes into single indexing operations. This prevents excessive CPU usage during bulk file operations like git checkouts or build processes.\n\n```typescript\n// src/indexer.ts - Watch implementation pattern\nfs.watch(resolved, { recursive: true }, () => {\n  if (timer) clearTimeout(timer);\n  timer = setTimeout(async () => {\n    onIndex(await indexWorkspace({ workspace: resolved }));\n  }, 500);\n});\n```\n\nThe daemon outputs index results as JSON to stdout, making it suitable for IPC communication with parent processes.\n\n---\n\n### MCP Server (stdio Mode)\n\nThe **MCP Server** exposes Contextful's capabilities through the Model Context Protocol standard, enabling integration with AI coding assistants.\n\n#### Supported MCP Tools\n\n| Tool Name | Purpose | Input Parameters |\n|-----------|---------|------------------|\n| `context_pack` | Returns token-budgeted evidence bundle | `query`, `budget`, `scope` |\n| `search_code` | Code, docs, symbol, and memory search | `query`, `mode`, `filters` |\n| `trace_path` | Graph traversal across codebase | `from`, `to`, `edge_types` |\n| `impact_analysis` | Reverse dependency analysis | `symbol_or_file` |\n| `why_changed` | Git history with current evidence | `symbol_or_file` |\n| `recall_memory` | Search project lessons and sessions | `query`, `scope` |\n| `write_lesson` | Store evidence-backed lessons | `claim`, `evidence`, `scope`, `confidence` |\n\n资料来源：[README.md:1-30](https://github.com/Inferensys/contextful/blob/main/README.md)\n\n#### Server Execution\n\n```bash\n# Run as MCP stdio server\nnpx @inferensys/contextful server\n```\n\nThe server operates in stdio mode, accepting JSON-RPC requests and responding with JSON-RPC results through stdin/stdout streams.\n\n---\n\n## CLI Runtime Commands\n\nThe CLI provides multiple entry points for runtime operations.\n\n### Command Reference\n\n| Command | Description | Key Options |\n|---------|-------------|-------------|\n| `cxf daemon` | Run local indexing daemon | `--workspace <path>` |\n| `cxf query` | Create evidence pack for query | `--workspace`, `--budget`, `--json` |\n| `cxf search` | Search without evidence pack | `--workspace`, `--limit`, `--kind` |\n| `cxf report` | Generate context report | `--workspace`, `--format` |\n| `cxf server` | Run MCP stdio server | - |\n| `cxf memory add` | Store evidence-backed lesson | `--claim`, `--evidence`, `--scope`, `--confidence` |\n\n资料来源：[src/cli.ts:1-80](https://github.com/Inferensys/contextful/blob/main/src/cli.ts)\n\n### Daemon Mode\n\n```typescript\n// src/cli.ts - Daemon command registration\nprogram\n  .command(\"daemon\")\n  .description(\"Run the local indexing daemon for a workspace.\")\n  .option(\"--workspace <path>\", \"Workspace path.\", process.cwd())\n  .action(async (options: { workspace: string }) => {\n    await watchWorkspace(options.workspace, (result) => {\n      process.stdout.write(`${JSON.stringify(result, null, 2)}\\n`);\n    });\n  });\n```\n\n### Query Command\n\nThe query command compiles an evidence pack based on a natural language query and token budget:\n\n```typescript\n// src/cli.ts - Query command\nprogram\n  .command(\"query\")\n  .description(\"Create an evidence pack for a query.\")\n  .argument(\"<query>\", \"Query to answer from indexed context.\")\n  .option(\"--workspace <path>\", \"Workspace path.\", process.cwd())\n  .option(\"--budget <tokens>\", \"Approximate token budget.\", parseInteger, 2000)\n  .option(\"--json\", \"Print JSON instead of Markdown.\")\n  .action(async (query: string, options) => {\n    const pack = await createContextPack({ workspace: options.workspace, query, budget: options.budget });\n    process.stdout.write(options.json ? `${JSON.stringify(pack, null, 2)}\\n` : renderEvidencePackMarkdown(pack));\n  });\n```\n\n---\n\n## Evidence Pack System\n\n### Pack Creation Flow\n\n```mermaid\ngraph LR\n    A[Query Input] --> B[classifyQuery]\n    B --> C[searchContext]\n    C --> D{Results Available?}\n    D -->|Yes| E[Select & Rank Hits]\n    D -->|No| F[Expand Search Terms]\n    F --> C\n    E --> G[Load Symbols & Graph]\n    G --> H[Build EvidencePack]\n    H --> I[Save to Kernel DB]\n    I --> J[Return Pack]\n```\n\n### Pack Structure\n\n| Field | Type | Description |\n|-------|------|-------------|\n| `id` | string | Unique pack identifier with `ctx_` prefix |\n| `query` | string | Original query text |\n| `intent` | SearchIntent | Classified query intent |\n| `summary` | string | Natural language summary |\n| `citations` | SearchHit[] | Ranked evidence items |\n| `files` | FileInfo[] | Referenced files with reasons |\n| `symbols` | SymbolRecord[] | Matched symbol definitions |\n| `graphPaths` | GraphPath[] | Module/import relationships |\n| `memoryHits` | SearchHit[] | Recallable memory matches |\n| `confidence` | number | 0.1-0.92 confidence score |\n| `tokenEstimate` | number | Actual token count used |\n| `budget` | number | Maximum token budget |\n| `createdAt` | string | ISO timestamp |\n\n资料来源：[src/search.ts:150-200](https://github.com/Inferensys/contextful/blob/main/src/search.ts)\n\n### Confidence Calculation\n\nThe confidence score is computed using a clamped formula:\n\n```\nconfidence = clamp(0.25 + hits * 0.05 + graphPaths * 0.02 + memoryHits * 0.05, 0.1, 0.92)\n```\n\nThis ensures a minimum confidence of 10% even with poor matches and a maximum of 92% to maintain epistemic humility.\n\n资料来源：[src/search.ts:80-82](https://github.com/Inferensys/contextful/blob/main/src/search.ts)\n\n---\n\n## Workspace Resolution\n\n### Path Resolution Flow\n\n```mermaid\ngraph TD\n    A[CLI Input Path] --> B{Is Absolute?}\n    B -->|No| C[Resolve relative to cwd]\n    B -->|Yes| D[Use as-is]\n    C --> E[validateWorkspace]\n    D --> E\n    E --> F{Valid Directory?}\n    F -->|Yes| G[Load Kernel DB]\n    F -->|No| H[Create New Index]\n```\n\nThe `resolveWorkspace()` utility normalizes all workspace paths, while `ensureIndexed()` guarantees the workspace has been indexed before search operations proceed.\n\n资料来源：[src/util.ts:1-20](https://github.com/Inferensys/contextful/blob/main/src/util.ts)\n\n---\n\n## Report Generation\n\nThe report system generates comprehensive context reports in multiple formats.\n\n### Supported Formats\n\n| Format | Renderer Function |\n|--------|-------------------|\n| `markdown` | `renderMarkdown()` |\n| `json` | `JSON.stringify()` |\n| `html` | `renderHtml()` |\n\n### Report Contents\n\n- **Summary**: Overview of indexed state\n- **Statistics**: Token counts, file counts, index timestamps\n- **Warnings**: Potential issues (up to 20)\n- **Token Savings**: Estimated efficiency metrics\n\n资料来源：[src/report.ts:1-50](https://github.com/Inferensys/contextful/blob/main/src/report.ts)\n\n---\n\n## Error Handling\n\n### Workspace Validation\n\nRuntime components validate workspace paths before operations:\n\n```typescript\n// Validation checks include:\n// 1. Directory exists and is readable\n// 2. Kernel DB can be opened or created\n// 3. Index state is consistent\n```\n\n### Broken JSON Handling\n\nWhen parsing `package.json` during indexing, broken JSON is handled gracefully:\n\n```typescript\n// src/extract.ts - JSON error handling\ntry {\n  const parsed = JSON.parse(content) as Record<string, unknown>;\n  // Process dependencies, devDependencies, scripts\n} catch {\n  // Broken JSON receives text chunks; syntax diagnostics out of scope\n}\n```\n\n---\n\n## Memory and Lessons\n\n### Lesson Storage\n\nLessons are evidence-backed statements stored for recall during future queries:\n\n| Parameter | Type | Default | Description |\n|-----------|------|---------|-------------|\n| `claim` | string | required | The lesson statement |\n| `evidence` | string[] | required | File refs (e.g., `file:src/auth.ts:1-20`) |\n| `scope` | string | \"repo\" | Memory scope (repo, global) |\n| `confidence` | number | 0.7 | Confidence score (0-1) |\n\n资料来源：[src/cli.ts:60-80](https://github.com/Inferensys/contextful/blob/main/src/cli.ts)\n\n### Memory Recall\n\nMemory hits are weighted in evidence pack generation, providing higher confidence when prior lessons match the query context.\n\n---\n\n## See Also\n\n- [CLI Reference](../cli.md) - Complete CLI command documentation\n- [Indexing System](../indexing.md) - Code analysis and symbol extraction\n- [Search API](../search-api.md) - Query classification and ranking\n\n---\n\n<a id='search-engine'></a>\n\n## Search Engine\n\n### 相关页面\n\n相关主题：[Context Packs](#context-packs), [SQLite Database Schema](#sqlite-database)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [src/search.ts](https://github.com/Inferensys/contextful/blob/main/src/search.ts)\n- [src/util.ts](https://github.com/Inferensys/contextful/blob/main/src/util.ts)\n- [src/cli.ts](https://github.com/Inferensys/contextful/blob/main/src/cli.ts)\n- [src/mcp-server.ts](https://github.com/Inferensys/contextful/blob/main/src/mcp-server.ts)\n- [src/extract.ts](https://github.com/Inferensys/contextful/blob/main/src/extract.ts)\n</details>\n\n# Search Engine\n\n## Overview\n\nThe Search Engine is the core retrieval system in Contextful, designed to provide intelligent, evidence-backed context for agent queries. It combines full-text search (FTS), symbol indexing, dependency graph traversal, and memory recall to deliver ranked, cited results within a configurable token budget.\n\nThe system serves as the foundation for multiple interfaces: CLI commands (`query`, `search`), MCP server tools (`search_code`, `context_pack`), and report generation.\n\n资料来源：[src/search.ts:1-50]()\n\n## Architecture\n\n```mermaid\ngraph TD\n    A[Query Input] --> B[Query Classification]\n    B --> C{Intent Type}\n    C -->|code/docs| D[Full-Text Search]\n    C -->|symbols| E[Symbol Lookup]\n    C -->|memory| F[Memory Ledger Search]\n    C -->|impact| G[Graph Traversal]\n    C -->|historical| H[Git History + Search]\n    D --> I[BM25 Ranking]\n    E --> J[Symbol Index]\n    F --> K[Memory DB]\n    G --> L[Edge Database]\n    H --> M[Git Operations]\n    I --> N[Result Scoring]\n    J --> N\n    K --> N\n    L --> N\n    N --> O[Context Pack]\n```\n\n### Core Components\n\n| Component | File | Responsibility |\n|-----------|------|----------------|\n| Search Kernel | `src/search.ts` | Core search logic and ranking |\n| Query Classifier | `src/search.ts` | Intent detection |\n| FTS Engine | `src/search.ts` | Full-text search using SQLite FTS5 |\n| Graph Tracer | `src/search.ts` | Dependency graph traversal |\n| Memory Store | `src/memory.ts` | Evidence-backed memory recall |\n\n资料来源：[src/search.ts:50-120]()\n\n## Query Classification\n\nThe search engine classifies each query into one of seven intent types to optimize retrieval strategy.\n\n### SearchIntent Types\n\n| Intent | Trigger Keywords | Search Strategy |\n|--------|------------------|-----------------|\n| `code` | `code`, `function`, `class`, `impl` | FTS + symbol lookup |\n| `docs` | `resource`, `docs`, `readme`, `how to` | FTS on markdown/json |\n| `symbols` | `define`, `interface`, `type`, `symbol` | Direct symbol index |\n| `memory` | `remember`, `lesson`, `learned`, `session` | Memory ledger query |\n| `impact` | `impact`, `affected`, `depends`, `blast radius` | Reverse dependency graph |\n| `historical` | `why`, `changed`, `commit`, `history` | Git history + current search |\n| `architectural` | `architecture`, `flow`, `trace`, `connects` | Graph path analysis |\n| `exact` | Code patterns, paths, line refs | Direct file/symbol lookup |\n| `vague` | Default | Broad FTS + graph |\n\n```typescript\nfunction classifyQuery(query: string): SearchIntent {\n  const q = query.toLowerCase();\n  if (/\\b(code|function|class|implement|module)\\b/.test(q)) return \"code\";\n  if (/\\b(define|interface|type|symbol)\\b/.test(q)) return \"symbols\";\n  if (/\\b(memory|remember|lesson|learned|sessions?)\\b/.test(q)) return \"memory\";\n  // ... additional classifications\n}\n```\n\n资料来源：[src/search.ts:1-30]()\n\n## Search Flow\n\n### Main Search Pipeline\n\n```mermaid\nsequenceDiagram\n    participant CLI as CLI/MCP\n    participant Search as searchContext()\n    participant Kernel as Kernel DB\n    participant FTS as FTS5 Engine\n    participant Graph as Graph DB\n    participant Memory as Memory Store\n\n    CLI->>Search: query, workspace, limit\n    Search->>Kernel: ensureIndexed()\n    Search->>Kernel: addQuery()\n    Search->>FTS: ftsQuery(expandedTerms)\n    FTS-->>Search: ranked rows (BM25)\n    Search->>Search: scoreFromRank()\n    Search->>Graph: loadGraphPaths()\n    Search-->>CLI: {intent, hits}\n```\n\n### Full-Text Search Query Builder\n\nThe `ftsQuery` function transforms user queries into FTS5-compatible search strings:\n\n```typescript\nfunction ftsQuery(query: string): string {\n  const terms = expandedTerms(query);\n  return Array.from(new Set(terms.map((term) => term.toLowerCase())))\n    .filter((term) => !STOPWORDS.has(term))\n    .slice(0, 14)\n    .map((term) => `${term}*`)\n    .join(\" OR \");\n}\n```\n\nKey behaviors:\n- Expands terms based on query context (e.g., \"tool\" → \"server\", \"tool\", \"callTool\")\n- Filters stopwords: `where`, `what`, `which`, `when`, `how`, `are`, `the`, `for`, `with`, `and`, `or`, `to`\n- Limits to 14 terms maximum\n- Appends wildcard `*` for prefix matching\n\n资料来源：[src/search.ts:200-280]()\n\n## Scoring System\n\n### Rank-to-Score Transformation\n\nThe `scoreFromRank` function converts BM25 ranks into relevance scores (0-10 scale) with domain-specific bonuses:\n\n```typescript\nfunction scoreFromRank(rank: number, query: string, corpus: string): number {\n  const base = 10 / (1 + Math.abs(rank));\n  let bonus = 0;\n  \n  // Domain-specific bonuses\n  if (/\\b(tool|tools|registered|register)\\b/.test(q) && corpus.includes(\"server.tool(\")) {\n    bonus += 9;\n  }\n  if (/\\bmcp\\b/.test(q) && corpus.includes(\"mcp-server\")) {\n    bonus += 4;\n  }\n  \n  return clamp(base + bonus, 0.1, 10);\n}\n```\n\n### Scoring Bonuses Matrix\n\n| Query Pattern | Content Match | Bonus |\n|---------------|---------------|-------|\n| `tool/tools/register` | `server.tool(` | +9 |\n| `mcp` | `mcp-server` | +4 |\n| `where registered` | `function runMcpServer` | +4 |\n| `tool` query | `src/search.ts` | -8 |\n| `memory` query | `src/memory.ts` | +5 |\n| `memory` query | `src/search.ts` | -16 |\n\nThis anti-gaming mechanism penalizes results from the search implementation itself when irrelevant.\n\n资料来源：[src/search.ts:240-320]()\n\n## Term Expansion\n\nThe `expandedTerms` function intelligently expands query terms based on semantic context:\n\n```typescript\nfunction expandedTerms(query: string): string[] {\n  const lower = query.toLowerCase();\n  const additions: string[] = [];\n  \n  if (/\\b(tool|tools|registered|register)\\b/.test(lower)) {\n    additions.push(\"server\", \"tool\", \"tools\", \"callTool\");\n  }\n  if (/\\bmcp\\b/.test(lower)) {\n    additions.push(\"mcp\", \"server\", \"stdio\");\n  }\n  if (/\\bmemory|memories|remember|remembers|lesson|lessons|learned|session|sessions\\b/.test(lower)) {\n    additions.push(\"memory\", \"memories\", \"lesson\", \"lessons\", \"claim\", \"ledger\", \"evidence\");\n  }\n  if (/\\bimpact|depends|dependents|uses\\b/.test(lower)) {\n    additions.push(\"imports\", \"tests\", \"edges\");\n  }\n  \n  return [...terms, ...additions];\n}\n```\n\n资料来源：[src/search.ts:320-380]()\n\n## CLI Commands\n\n### Query Command\n\n```bash\ncxf query \"<query>\" --workspace <path> --budget <tokens> --json\n```\n\n| Option | Type | Default | Description |\n|--------|------|---------|-------------|\n| `query` | string | required | Query to answer from indexed context |\n| `--workspace` | path | `cwd()` | Workspace path |\n| `--budget` | number | 2000 | Approximate token budget |\n| `--json` | flag | false | Output JSON instead of Markdown |\n\n### Search Command\n\n```bash\ncxf search \"<query>\" --workspace <path> --limit <count> --kind <kind>\n```\n\n| Option | Type | Default | Description |\n|--------|------|---------|-------------|\n| `query` | string | required | Search query |\n| `--workspace` | path | `cwd()` | Workspace path |\n| `--limit` | number | 10 | Maximum hits |\n| `--kind` | enum | `all` | Search category: `all\\|code\\|docs\\|symbols\\|memory` |\n\n资料来源：[src/cli.ts:40-80]()\n\n## MCP Server Tools\n\nThe search engine exposes the following MCP tools:\n\n### search_code\n\n```typescript\nserver.tool(\"search_code\", \"Search indexed code, docs, symbols, and stored context\", {\n  query: z.string(),\n  mode: z.enum([\"all\", \"code\", \"docs\", \"symbols\", \"memory\"]).optional(),\n  limit: z.number().optional(),\n  filters: z.record(z.string(), z.unknown()).optional()\n});\n```\n\n### trace_path\n\n```typescript\nserver.tool(\"trace_path\", \"Trace graph relationships between files, symbols, modules\", {\n  from: z.string(),\n  to: z.string().optional(),\n  edge_types: z.array(z.string()).optional(),\n  limit: z.number().optional()\n});\n```\n\n### impact_analysis\n\n```typescript\nserver.tool(\"impact_analysis\", \"Find likely dependents and tests\", {\n  symbol_or_file: z.string(),\n  limit: z.number().optional()\n});\n```\n\n### why_changed\n\n```typescript\nserver.tool(\"why_changed\", \"Explain why a file/symbol may have changed\", {\n  symbol_or_file: z.string(),\n  limit: z.number().optional()\n});\n```\n\n资料来源：[src/mcp-server.ts:1-80]()\n\n## Context Pack\n\nThe `createContextPack` function assembles comprehensive evidence bundles:\n\n```typescript\nexport async function createContextPack(options: {\n  workspace?: string;\n  query: string;\n  budget?: number;\n  scope?: string;\n}): Promise<EvidencePack>\n```\n\n### EvidencePack Structure\n\n| Field | Type | Description |\n|-------|------|-------------|\n| `id` | string | Unique pack identifier (`ctx_<hash>`) |\n| `query` | string | Original query |\n| `scope` | string | Search scope (default: `repo`) |\n| `intent` | SearchIntent | Classified intent |\n| `summary` | string | Human-readable summary |\n| `citations` | SearchHit[] | Ranked search results |\n| `files` | FileContext[] | Grouped file references |\n| `symbols` | SymbolRecord[] | Relevant symbols (≤20) |\n| `graphPaths` | GraphPath[] | Dependency connections (≤20) |\n| `memoryHits` | SearchHit[] | Memory matches |\n| `confidence` | number | Confidence score (0.1-0.92) |\n| `tokenEstimate` | number | Estimated token count |\n| `budget` | number | Token budget used |\n| `createdAt` | string | ISO timestamp |\n\n### Confidence Calculation\n\n```typescript\nfunction confidenceFor(hits: SearchHit[], graphPaths: GraphPath[], memoryHits: SearchHit[]): number {\n  return clamp(\n    0.25 + \n    hits.length * 0.05 + \n    graphPaths.length * 0.02 + \n    memoryHits.length * 0.05,\n    0.1,\n    0.92\n  );\n}\n```\n\n资料来源：[src/search.ts:400-480]()\n\n## Graph Traversal\n\nThe `traceGraph` function performs dependency graph analysis:\n\n```typescript\nexport async function traceGraph(options: {\n  workspace?: string;\n  from: string;\n  to?: string;\n  edgeTypes?: string[];\n  limit?: number;\n}): Promise<GraphPath[]>\n```\n\n### Edge Types\n\n| Edge Type | Direction | Description |\n|-----------|-----------|-------------|\n| `IMPORTS` | File → Module | Import/require statements |\n| `DEFINES` | File → Symbol | Symbol definitions |\n| `CONFIGURES` | File → Config | Configuration keys |\n| `TESTS` | Test → Source | Test file relationships |\n\n### Impact Analysis\n\n```typescript\nexport async function impactAnalysis(options: {\n  workspace?: string;\n  target: string;\n  limit?: number;\n}): Promise<{\n  target: string;\n  forward: string[];\n  reverse: string[];\n  tests: string[];\n}>\n```\n\nReturns forward dependencies, reverse dependents, and likely test files for a given symbol or file.\n\n资料来源：[src/search.ts:480-550]()\n\n## Utility Functions\n\n### lineRange\n\nExtracts a specific line range from text:\n\n```typescript\nexport function lineRange(text: string, startLine: number, endLine: number): string {\n  const lines = text.split(/\\r?\\n/);\n  return lines.slice(Math.max(0, startLine - 1), Math.min(lines.length, endLine)).join(\"\\n\");\n}\n```\n\n### clamp\n\nConstrains values within bounds:\n\n```typescript\nexport function clamp(value: number, min: number, max: number): number {\n  return Math.max(min, Math.min(max, value));\n}\n```\n\n### unique\n\nDeduplicates arrays:\n\n```typescript\nexport function unique<T>(items: T[]): T[] {\n  return Array.from(new Set(items));\n}\n```\n\n### isLikelyBinary\n\nDetects binary files by checking for null bytes:\n\n```typescript\nexport function isLikelyBinary(buffer: Buffer): boolean {\n  const sample = buffer.subarray(0, Math.min(buffer.length, 4096));\n  return sample.includes(0);\n}\n```\n\n资料来源：[src/util.ts:1-50]()\n\n## Data Models\n\n### SearchHit\n\n```typescript\ninterface SearchHit {\n  ref: string;        // Format: \"file:path:start-end\"\n  path: string;       // File path\n  kind: string;       // \"chunk\", \"symbol\", \"memory\", \"doc\"\n  title: string;      // Display title\n  text: string;       // Content snippet\n  score: number;      // Relevance score\n  line?: number;      // Starting line number\n}\n```\n\n### SymbolRecord\n\n```typescript\ninterface SymbolRecord {\n  ref: string;\n  name: string;\n  kind: string;       // \"function\", \"class\", \"interface\", \"type\", etc.\n  filePath: string;\n  line: number;\n  signature?: string;\n  exported?: boolean;\n}\n```\n\n资料来源：[src/search.ts:100-150]()\n\n## Index Status\n\nThe `getIndexStatus` function returns workspace indexing metadata:\n\n```typescript\nexport async function getIndexStatus(options: { workspace?: string }): Promise<IndexStatus>\n```\n\n### IndexStatus Structure\n\n| Field | Type | Description |\n|-------|------|-------------|\n| `workspace` | string | Workspace path |\n| `languageCounts` | Record<string, number> | File count per language |\n| `warnings` | string[] | Index warnings |\n| `lastIndexed` | string | ISO timestamp of last index |\n| `totalChunks` | number | Total indexed chunks |\n\n资料来源：[src/search.ts:550-600]()\n\n## Summary\n\nThe Search Engine provides Contextful's intelligent retrieval capabilities through:\n\n1. **Intent Classification** - Automatically routes queries to optimal search strategies\n2. **Full-Text Search** - SQLite FTS5 with BM25 ranking and domain-specific scoring\n3. **Symbol Index** - Fast lookup of code definitions across languages\n4. **Graph Traversal** - Dependency analysis and impact tracking\n5. **Memory Integration** - Recall of past lessons and evidence-backed claims\n6. **Token Budgeting** - Constrains output to specified budget limits\n7. **Confidence Scoring** - Quantifies result reliability\n\nAll search operations flow through a unified kernel database that combines FTS chunks, symbol records, and edge relationships for comprehensive context retrieval.\n\n---\n\n<a id='context-packs'></a>\n\n## Context Packs\n\n### 相关页面\n\n相关主题：[Search Engine](#search-engine), [Memory Ledger](#memory-ledger)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [src/search.ts](https://github.com/Inferensys/contextful/blob/main/src/search.ts)\n- [src/types.ts](https://github.com/Inferensys/contextful/blob/main/src/types.ts)\n- [src/extract.ts](https://github.com/Inferensys/contextful/blob/main/src/extract.ts)\n- [src/cli.ts](https://github.com/Inferensys/contextful/blob/main/src/cli.ts)\n- [src/report.ts](https://github.com/Inferensys/contextful/blob/main/src/report.ts)\n</details>\n\n# Context Packs\n\nContext Packs are the core output format of Contextful, providing AI agents with compact, ranked, and cited evidence bundles that fit within a specified token budget. Instead of forcing agents to read dozens of arbitrary files, Context Packs deliver precisely the evidence needed to answer a specific query.\n\n## Overview\n\nA Context Pack is a structured evidence package generated by the `context_pack()` MCP tool or the `cxf query` CLI command. It contains:\n\n- Ranked code and documentation citations matching the query\n- Related symbols (functions, classes, interfaces) from matching files\n- Graph paths connecting related components\n- Memory hits from evidence-backed lessons\n- A confidence score and token budget accounting\n\nThe pack is designed to be consumed directly by an LLM agent, providing traceable citations and a clear summary of what evidence was found.\n\n## Data Model\n\n### EvidencePack Structure\n\n| Field | Type | Description |\n|-------|------|-------------|\n| `id` | `string` | Unique identifier (format: `ctx_<hash>`) |\n| `query` | `string` | The original search query |\n| `scope` | `string` | Search scope (e.g., \"repo\") |\n| `intent` | `SearchIntent` | Classified query intent |\n| `summary` | `string` | Human-readable summary of findings |\n| `citations` | `SearchHit[]` | Ranked evidence items |\n| `files` | `FileContext[]` | Grouped file references with reasons |\n| `symbols` | `SymbolRecord[]` | Relevant symbols from matched files |\n| `graphPaths` | `GraphPath[]` | Graph traversals between components |\n| `memoryHits` | `SearchHit[]` | Memory/lesson hits |\n| `confidence` | `number` | Estimated confidence (0.1-0.92) |\n| `tokenEstimate` | `number` | Estimated token count of pack |\n| `budget` | `number` | Requested token budget |\n| `createdAt` | `string` | ISO timestamp of creation |\n\n资料来源：[src/search.ts:search.ts]()\n\n### SearchHit Structure\n\n| Field | Type | Description |\n|-------|------|-------------|\n| `ref` | `string` | Reference identifier (e.g., `file:src/auth.ts:1-20`) |\n| `path` | `string` | File path |\n| `title` | `string` | Display title |\n| `kind` | `string` | Hit kind: code, doc, symbol, memory |\n| `excerpt` | `string` | Relevant text excerpt |\n| `score` | `number` | Relevance score |\n| `rank` | `number` | BM25 rank |\n\n### SearchIntent Enum\n\n| Intent | Trigger Keywords |\n|--------|-----------------|\n| `exact` | Code patterns, paths, symbol names with special chars |\n| `symbol` | Function names, class names, method calls |\n| `test` | test, spec, mock, fixture, unit |\n| `memory` | memory, lesson, learned, session |\n| `impact` | impact, affected, depends, blast radius |\n| `historical` | why, changed, commit, history, regression |\n| `architectural` | architecture, flow, trace, connects, imports |\n| `docs` | resource, docs, documentation, guide, readme |\n| `vague` | Default for generic queries |\n\n资料来源：[src/search.ts:search.ts]()\n\n## Creation Flow\n\nThe `createContextPack` function orchestrates the entire pack creation process:\n\n```mermaid\ngraph TD\n    A[createContextPack] --> B[searchContext]\n    B --> C[classifyQuery]\n    C --> D[ftsQuery + expandedTerms]\n    D --> E[FTS Search on chunks_fts]\n    E --> F[scoreFromRank]\n    F --> G[Select Hits within Budget]\n    G --> H[loadSymbolsForPaths]\n    G --> I[loadGraphPaths]\n    G --> J[Filter memoryHits]\n    H --> K[Build EvidencePack]\n    I --> K\n    J --> K\n    K --> L[saveEvidencePack]\n    L --> M[Return EvidencePack]\n```\n\n### Step 1: Search Context\n\nThe process begins by classifying the query intent and executing full-text search:\n\n```typescript\nconst search = await searchContext({ workspace, query, limit: budget * 2 });\nconst selected = selectWithinBudget(search.hits, budget);\n```\n\n资料来源：[src/search.ts:search.ts]()\n\n### Step 2: Budget-Aware Selection\n\nHits are selected greedily until the token estimate exceeds the budget:\n\n```typescript\nfunction selectWithinBudget(hits: SearchHit[], budget: number): SearchHit[] {\n  const selected: SearchHit[] = [];\n  let tokenEstimate = 0;\n  for (const hit of hits) {\n    const est = estimateTokens(hit.excerpt || hit.title);\n    if (tokenEstimate + est >= budget) break;\n    selected.push(hit);\n    tokenEstimate += est;\n  }\n  return selected;\n}\n```\n\n资料来源：[src/search.ts:search.ts]()\n\n### Step 3: Symbol Loading\n\nFor each selected file, related symbols are loaded (up to 20 total):\n\n```typescript\nconst symbols = loadSymbolsForPaths(kernel.db, paths).slice(0, 20);\n```\n\nThe symbols query joins against the `symbols` table:\n\n```typescript\nSELECT ref, name, kind, file_path, line, signature, exported \nFROM symbols \nWHERE file_path IN (...)\n```\n\n资料来源：[src/search.ts:search.ts]()\n\n### Step 4: Graph Path Loading\n\nGraph paths connect files through import/dependency relationships:\n\n```typescript\nconst graphPaths = loadGraphPaths(kernel.db, paths, 20);\n```\n\n资料来源：[src/search.ts:search.ts]()\n\n### Step 5: Memory Hit Extraction\n\nMemory hits are filtered from selected hits by kind:\n\n```typescript\nconst memoryHits = selected.filter((hit) => hit.kind === \"memory\");\n```\n\n### Step 6: Confidence Calculation\n\nConfidence is calculated using a clamped formula:\n\n```typescript\nfunction confidenceFor(hits, graphPaths, memoryHits): number {\n  return clamp(\n    0.25 + hits.length * 0.05 + graphPaths.length * 0.02 + memoryHits.length * 0.05,\n    0.1,\n    0.92\n  );\n}\n```\n\n- Base: 0.25\n- Each hit: +0.05\n- Each graph path: +0.02\n- Each memory hit: +0.05\n- Clamped to [0.1, 0.92]\n\n资料来源：[src/search.ts:search.ts]()\n\n## Query Classification\n\nThe `classifyQuery` function determines the search intent based on keywords:\n\n```typescript\nfunction classifyQuery(q: string): SearchIntent {\n  const lower = q.toLowerCase();\n  if (/[`\"'#.:/]/.test(q) || /\\b[A-Z][A-Za-z0-9_]{2,}\\b/.test(q)) return \"exact\";\n  if (/\\b(test|spec|mock|fixture)\\b/.test(q)) return \"test\";\n  if (/\\b(memory|lesson|learned|session|sessions)\\b/.test(q)) return \"memory\";\n  if (/\\b(impact|affected|depends|dependents|blast radius)\\b/.test(q)) return \"impact\";\n  if (/\\b(why|changed|commit|history|regression|introduced)\\b/.test(q)) return \"historical\";\n  if (/\\b(architecture|flow|path|trace|connects|calls|imports)\\b/.test(q)) return \"architectural\";\n  if (/\\b(resource|docs|documentation|guide|readme|how to|setup)\\b/.test(q)) return \"docs\";\n  return \"vague\";\n}\n```\n\n资料来源：[src/search.ts:search.ts]()\n\n## Term Expansion\n\nThe `expandedTerms` function adds related terms to improve recall for specific domains:\n\n```typescript\nfunction expandedTerms(query: string): string[] {\n  const additions: string[] = [];\n  if (/\\b(tool|tools|registered|register)\\b/.test(lower)) {\n    additions.push(\"server\", \"tool\", \"tools\", \"callTool\");\n  }\n  if (/\\bmcp\\b/.test(lower)) {\n    additions.push(\"mcp\", \"server\", \"stdio\");\n  }\n  if (/\\bmemory|memories|remember|remembers|lesson|lessons\\b/.test(lower)) {\n    additions.push(\"memory\", \"memories\", \"lesson\", \"lessons\", \"claim\", \"ledger\", \"evidence\");\n  }\n  if (/\\bimpact|depends|dependents|uses\\b/.test(lower)) {\n    additions.push(\"imports\", \"tests\", \"edges\");\n  }\n  return [...terms, ...additions];\n}\n```\n\n资料来源：[src/search.ts:search.ts]()\n\n## Scoring Algorithm\n\nThe `scoreFromRank` function calculates relevance scores:\n\n```typescript\nfunction scoreFromRank(rank: number, q: string): number {\n  let bonus = 0;\n  const lower = q.toLowerCase();\n  \n  if (/\\bmemory|memories|remember|remembers|lesson|lessons|sessions\\b/.test(q)) {\n    if (lower.includes(\"memory ledger\")) bonus += 7;\n    if (lower.includes(\"src/memory.ts\")) bonus += 5;\n    if (lower.includes(\"readme.md\")) bonus += 4;\n    if (lower.includes(\"src/search.ts\")) bonus -= 16;\n  }\n  if (/\\b(where|how)\\b/.test(q) && lower.includes(\"config-key\")) bonus -= 2;\n  \n  return 10 / (1 + Math.abs(rank)) + bonus;\n}\n```\n\n资料来源：[src/search.ts:search.ts]()\n\n## CLI Usage\n\nThe `query` command creates Context Packs via CLI:\n\n```bash\ncxf query \"<query>\" --workspace <path> --budget 2000 --json\n```\n\n### Options\n\n| Option | Type | Default | Description |\n|--------|------|---------|-------------|\n| `--workspace` | `path` | `cwd` | Workspace path |\n| `--budget` | `number` | `2000` | Approximate token budget |\n| `--json` | `flag` | `false` | Output as JSON instead of Markdown |\n\n### Example Output\n\n```\n# Context Pack ctx_abc123\n\nQuery: where is user auth handled\nIntent: architectural\nConfidence: 65%\nToken estimate: 1850/2000\n\nFound 5 evidence items for a architectural query, with 2 graph connections and 1 memory hit.\n\n## Citations\n- file:src/auth.ts:1-50 (auth module)\n  Handles user authentication via JWT tokens...\n- file:src/middleware/auth.ts:1-30 (auth middleware)\n  Express middleware for auth validation...\n\n## Graph Paths\n- src/auth.ts --IMPORTS--> src/utils/jwt.ts (src/auth.ts:5)\n- src/middleware/auth.ts --IMPORTS--> src/auth.ts (src/middleware/auth.ts:3)\n\n## Memory Hits\n- memory:lesson:1: JWT tokens should be validated on every protected route.\n```\n\n资料来源：[src/cli.ts:cli.ts]()\n\n## Rendering\n\nContext Packs can be rendered in multiple formats via `renderEvidencePackMarkdown`:\n\n```typescript\nexport function renderEvidencePackMarkdown(pack: EvidencePack): string {\n  const lines = [\n    `# Context Pack ${pack.id}`,\n    \"\",\n    `Query: ${pack.query}`,\n    `Intent: ${pack.intent}`,\n    `Confidence: ${Math.round(pack.confidence * 100)}%`,\n    `Token estimate: ${pack.tokenEstimate}/${pack.budget}`,\n    \"\",\n    pack.summary,\n    \"\",\n    \"## Citations\"\n  ];\n  // ... citations, graph paths, memory hits\n}\n```\n\n资料来源：[src/report.ts:report.ts]()\n\n## Chunk Extraction\n\nContextual chunks are extracted during indexing for searchability:\n\n```mermaid\ngraph LR\n    A[Source File] --> B[Language Detection]\n    B --> C[extractSymbols]\n    B --> D[extractEdges]\n    B --> E[extractChunks]\n    C --> F[Symbol Table]\n    D --> G[Edge Table]\n    E --> H[Chunk Table]\n```\n\n### Supported Languages\n\n| Language | Symbol Patterns |\n|----------|-----------------|\n| TypeScript/JavaScript | function, class, interface, type, const arrow |\n| Python | def, class |\n| Go | func, type struct/interface |\n| Rust | fn, struct, enum, trait, impl |\n| Markdown | headings (H1-H6) |\n| JSON | top-level keys |\n\n资料来源：[src/extract.ts:extract.ts]()\n\n### Chunking Strategy\n\n- **Code files**: Divided into blocks of ~60 lines, with overlap for context\n- **Markdown files**: Split by headings, with the heading as the chunk title\n- **Token estimation**: Used for both selection and budget accounting\n\n```typescript\nfunction codeChunks(relativePath: string, content: string): ChunkRecord[] {\n  const lines = content.split(/\\r?\\n/);\n  const chunks: ChunkRecord[] = [];\n  // Split into ~60-line blocks with overlap\n  for (let start = 1; start <= lines.length; start += 50) {\n    const end = Math.min(start + 60 - 1, lines.length);\n    const text = lineRange(content, start, end);\n    chunks.push({\n      ref: fileRef(relativePath, start, end),\n      filePath: relativePath,\n      startLine: start,\n      endLine: end,\n      kind: \"file\",\n      title: `${relativePath}:${start}-${end}`,\n      text,\n      tokenEstimate: estimateTokens(text)\n    });\n  }\n  return chunks;\n}\n```\n\n资料来源：[src/extract.ts:extract.ts]()\n\n## Summary Generation\n\nThe `summarizePack` function generates human-readable summaries:\n\n```typescript\nfunction summarizePack(\n  query: string,\n  intent: SearchIntent,\n  hits: SearchHit[],\n  graphPaths: GraphPath[],\n  memoryHits: SearchHit[]\n): string {\n  if (hits.length === 0) {\n    return `No indexed evidence matched \"${query}\". Re-index or broaden the query.`;\n  }\n  return `Found ${hits.length} evidence item${hits.length === 1 ? \"\" : \"s\"} ` +\n    `for a ${intent} query, with ${graphPaths.length} graph connection${graphPaths.length === 1 ? \"\" : \"s\"} ` +\n    `and ${memoryHits.length} memory hit${memoryHits.length === 1 ? \"\" : \"s\"}.`;\n}\n```\n\n资料来源：[src/search.ts:search.ts]()\n\n## Persistence\n\nEvidence packs are saved to the kernel database for audit and retrieval:\n\n```typescript\nsaveEvidencePack(kernel.db, { \n  id: pack.id, \n  query: pack.query, \n  tokenEstimate, \n  json: JSON.stringify(pack) \n});\n```\n\n资料来源：[src/search.ts:search.ts]()\n\n## Design Principles\n\n1. **Token budget awareness**: Never exceed the requested budget; select the most relevant items first\n2. **Cited evidence**: Every piece of information is traceable to a specific file and line range\n3. **Intent-driven**: Query classification shapes what gets searched and how results are interpreted\n4. **Graph connectivity**: Beyond matching files, show how they connect through imports and dependencies\n5. **Memory integration**: Blend indexed content with evidence-backed lessons from prior sessions\n\n---\n\n<a id='memory-ledger'></a>\n\n## Memory Ledger\n\n### 相关页面\n\n相关主题：[Context Packs](#context-packs), [Search Engine](#search-engine)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [src/cli.ts](https://github.com/Inferensys/contextful/blob/main/src/cli.ts)\n- [src/search.ts](https://github.com/Inferensys/contextful/blob/main/src/search.ts)\n- [src/mcp-server.ts](https://github.com/Inferensys/contextful/blob/main/src/mcp-server.ts)\n- [src/extract.ts](https://github.com/Inferensys/contextful/blob/main/src/extract.ts)\n- [src/report.ts](https://github.com/Inferensys/contextful/blob/main/src/report.ts)\n- [src/util.ts](https://github.com/Inferensys/contextful/blob/main/src/util.ts)\n- [README.md](https://github.com/Inferensys/contextful/blob/main/README.md)\n</details>\n\n# Memory Ledger\n\nThe Memory Ledger is Contextful's evidence-backed persistent memory system that enables AI agents to retain and recall learned lessons across sessions. Unlike ephemeral context that disappears when a session ends, the Memory Ledger stores structured knowledge annotated with source evidence, allowing agents to build cumulative understanding of a codebase over time.\n\n## Overview\n\nThe Memory Ledger solves a fundamental problem in AI-assisted development: knowledge gained during one session is lost in the next. When an agent discovers how authentication works, identifies a fragile dependency, or learns a non-obvious architectural pattern, that knowledge typically vanishes when the session ends.\n\nContextful's approach requires every stored memory to be anchored to concrete evidence—file references, code symbols, or prior context packs. This design prevents hallucinated or unsubstantiated memories from polluting the knowledge base and ensures that recalled lessons can be traced back to their source.\n\nThe system operates entirely locally with no external API calls, embedding services, or cloud dependencies. All memory data remains within the workspace's SQLite database.\n\n## Architecture\n\n```mermaid\ngraph TD\n    A[Agent Session] -->|write_lesson| B[Memory Ledger]\n    A -->|recall_memory| C[Memory Search]\n    B -->|evidence refs| D[Evidence Pack]\n    C -->|cited memories| A\n    D -->|citations| E[Source Files]\n    F[Workspace DB] -->|stores| B\n    F -->|stores| C\n```\n\n### Core Components\n\n| Component | Role | Source |\n|-----------|------|--------|\n| Memory Storage | SQLite-backed persistent storage for lessons | `src/db.ts` |\n| Memory Search | FTS-enabled retrieval of memories by query | `src/search.ts` |\n| Evidence Validation | Ensures evidence refs are valid before storage | `src/mcp-server.ts` |\n| Confidence Scoring | Assigns credibility scores to stored memories | `src/cli.ts:85` |\n\n## Data Model\n\n### Memory Record Structure\n\nEach memory in the ledger contains the following fields:\n\n| Field | Type | Description |\n|-------|------|-------------|\n| `id` | string | Unique identifier (prefixed with `memory:`) |\n| `claim` | string | The substantive lesson or observation |\n| `scope` | string | Granularity level: `repo`, `file`, `symbol`, or `session` |\n| `evidenceRefs` | string[] | Validated references to source evidence |\n| `confidence` | number | Credibility score from 0.0 to 1.0 |\n| `status` | string | Current state: `active`, `superseded`, or `stale` |\n| `supersedes` | string? | ID of the memory this replaces (if any) |\n\n### Evidence Reference Formats\n\nValid evidence references that can be attached to memories:\n\n| Format | Example | Purpose |\n|--------|---------|---------|\n| File range | `file:src/auth.ts:10-40` | Reference specific lines in a file |\n| Symbol | `symbol:src/auth.ts#AuthService:12` | Point to a specific code symbol |\n| Context pack | `pack:ctx_abc123` | Reference a prior evidence pack |\n\n资料来源：[README.md:54-56]()\n\nEvidence references must come from search results or context packs—arbitrary references are rejected. This prevents storing claims without verifiable backing.\n\n## Memory Scopes\n\nThe scope field determines the durability and applicability of a memory:\n\n| Scope | Description | Persistence |\n|-------|-------------|-------------|\n| `repo` | Project-wide lessons applicable across sessions | Permanent |\n| `file` | File-specific knowledge | Permanent |\n| `symbol` | Symbol-level lessons | Permanent |\n| `session` | Ephemeral session-scoped learnings | Lost on session end |\n\nThe default scope is `repo`, reflecting the assumption that most valuable memories have project-wide relevance.\n\n资料来源：[src/cli.ts:73]()\n\n## Writing Memories\n\n### CLI Usage\n\n```bash\ncxf memory add \\\n  --claim \"AuthService.validateToken() throws on expired tokens without catching\" \\\n  --evidence \"file:src/auth.ts:45-67\" \\\n  --evidence \"file:src/api/middleware.ts:12-20\" \\\n  --confidence 0.85 \\\n  --scope repo\n```\n\n### MCP Tool Usage\n\n```typescript\nawait server.callTool(\"write_lesson\", {\n  claim: \"The payment module requires initialization before use\",\n  evidence_refs: [\"file:src/payment/core.ts:10-30\", \"symbol:src/payment/core.ts#initialize:15\"],\n  scope: \"repo\",\n  confidence: 0.9\n});\n```\n\n资料来源：[src/mcp-server.ts:79-94]()\n\n### Validation Rules\n\nMemories are subject to strict validation:\n\n1. **Evidence required**: At least one valid evidence reference must be provided\n2. **Evidence must be fresh**: References must originate from search results or context packs\n3. **Claim must be substantive**: Empty or trivial claims are rejected\n4. **Confidence in valid range**: Must be between 0.0 and 1.0\n\n## Searching Memories\n\n### Intent Classification\n\nContextful automatically classifies queries to determine when to search memories. The query classifier recognizes memory-related intents through keyword detection:\n\n```typescript\nconst memoryPattern = /\\bmemory|memories|remember|remembers|lesson|lessons|learned|session|sessions\\b/;\n```\n\nWhen matched, the classifier returns `intent: \"memory\"` and the search system automatically queries the memories FTS index.\n\n资料来源：[src/search.ts:14-17]()\n\n### Query Expansion\n\nMemory searches benefit from automatic term expansion. When a query mentions relevant concepts, additional search terms are added:\n\n```typescript\nif (/\\bmemory|memories|remember|remembers|lesson|lessons|learned|session|sessions\\b/.test(lower)) {\n  additions.push(\"memory\", \"memories\", \"lesson\", \"lessons\", \"claim\", \"ledger\", \"evidence\");\n}\n```\n\nThis ensures that queries like \"what did we learn about auth\" retrieve memory results even if those exact words don't appear in the stored claims.\n\n资料来源：[src/search.ts:28-30]()\n\n### Search Results\n\nMemory hits in search results include:\n\n| Field | Description |\n|-------|-------------|\n| `ref` | Memory reference in format `memory:<id>` |\n| `kind` | Always `\"memory\"` for memory hits |\n| `title` | Display title including scope |\n| `excerpt` | Redacted claim text (secrets removed) |\n| `evidence` | Original evidence references |\n| `status` | Current memory status |\n| `score` | Relevance score |\n\n## Memory Lifecycle\n\n```mermaid\nstateDiagram-v2\n    [*] --> Active: write_lesson\n    Active --> Superseded: write_lesson with supersedes\n    Active --> Stale: Evidence becomes invalid\n    Superseded --> [*]\n    Stale --> [*]\n    Active --> [*]: Deleted\n```\n\n### Status Transitions\n\n**Active** → Default state for newly written memories. Active memories are returned in search results and can supersede other memories.\n\n**Superseded** → When a newer, more accurate memory replaces an older one, the superseded memory retains its ID and evidence but is excluded from search results. The `supersedes` field links to the replaced memory.\n\n**Stale** → Memories become stale when their evidence references point to files or symbols that have changed significantly since the memory was written. The reporting system tracks stale memories for review.\n\n资料来源：[src/report.ts:54-58]()\n\n## Integration with Context Packs\n\nThe Memory Ledger integrates with Contextful's evidence pack system:\n\n1. **Before writing**: Search context or create a context pack to get evidence references\n2. **Writing lessons**: Use those evidence refs to anchor the memory claim\n3. **Recalling**: Later sessions query the ledger, retrieving cited memories\n\n```typescript\n// During a session: create pack, identify lessons\nconst pack = await createContextPack({ query: \"how is auth handled\", budget: 2000 });\n\n// Later session: recall what was learned\nconst result = await recallMemory({ query: \"auth patterns\", scope: \"repo\" });\n```\n\nThis bidirectional relationship means memories enhance future context packs, and context packs provide evidence for future memories.\n\n## Reporting\n\nThe `report` command includes memory statistics:\n\n```bash\ncxf report --workspace . --format markdown\n```\n\nOutput includes a \"Stale Memories\" section listing memories whose evidence references may no longer be valid:\n\n```\n## Stale Memories\n- memory_abc123: AuthService.validateToken() behavior changed in v2\n- memory_def456: payment module initialization order is now reversed\n```\n\n资料来源：[src/report.ts:54-58]()\n\n## Configuration Options\n\n| Option | CLI Flag | Default | Description |\n|--------|----------|---------|-------------|\n| Workspace | `--workspace` | `process.cwd()` | Path to workspace with memory database |\n| Claim | `--claim` | required | The memory content |\n| Evidence | `--evidence` | required | One or more evidence refs |\n| Scope | `--scope` | `repo` | Memory scope level |\n| Confidence | `--confidence` | `0.7` | Credibility score |\n\n## Privacy Considerations\n\nThe Memory Ledger is designed with privacy as a core principle:\n\n- **Local only**: No data leaves the workspace\n- **No cloud sync**: Memories remain on the local machine\n- **Evidence-linked**: Claims cannot be stored without verifiable source\n- **Content redaction**: Secrets are automatically redacted from stored claims using pattern matching for emails, API keys, and tokens\n\n资料来源：[src/util.ts:12-18]()\n\n## Related MCP Tools\n\n| Tool | Purpose |\n|------|---------|\n| `recall_memory` | Search the memory ledger |\n| `write_lesson` | Store a new evidence-backed memory |\n| `context_pack` | Generate evidence packs that can feed into memories |\n\n资料来源：[README.md:35-40]()\n\n---\n\n<a id='graph-traversal'></a>\n\n## Graph Traversal and Analysis\n\n### 相关页面\n\n相关主题：[Search Engine](#search-engine), [SQLite Database Schema](#sqlite-database)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [src/search.ts](https://github.com/Inferensys/contextful/blob/main/src/search.ts)\n- [src/types.ts](https://github.com/Inferensys/contextful/blob/main/src/types.ts)\n- [src/extract.ts](https://github.com/Inferensys/contextful/blob/main/src/extract.ts)\n- [src/cli.ts](https://github.com/Inferensys/contextful/blob/main/src/cli.ts)\n- [src/mcp-server.ts](https://github.com/Inferensys/contextful/blob/main/src/mcp-server.ts)\n</details>\n\n# Graph Traversal and Analysis\n\nGraph Traversal and Analysis is a core feature of Contextful that builds and queries a dependency graph from source code. This system tracks relationships between files, symbols, modules, and configuration nodes, enabling sophisticated impact analysis, change tracing, and dependency exploration.\n\n## Overview\n\nContextful extracts code relationships during indexing and stores them in a SQLite database as a traversable graph. This enables agents to answer questions like:\n\n- \"What depends on this module?\"\n- \"What tests cover this file?\"\n- \"How does this symbol connect to other parts of the codebase?\"\n\n资料来源：[src/extract.ts:68-95]()\n\n## Architecture\n\n```mermaid\ngraph TD\n    A[Source Files] --> B[extractEdges]\n    B --> C[GraphEdge Records]\n    C --> D[SQLite Kernel DB]\n    E[CLI/MCP Query] --> F[searchContext]\n    F --> G[traceGraph]\n    G --> H[GraphPath Results]\n    F --> I[impactAnalysis]\n    I --> J[Impact Results]\n    F --> K[whyChanged]\n    K --> L[Git History + Evidence]\n```\n\n### Data Flow\n\n1. **Extraction Phase**: During workspace indexing, `extractEdges()` parses source files to identify relationships 资料来源：[src/extract.ts:52-95]()\n2. **Storage Phase**: Edge data is stored in the `edges` table within the kernel SQLite database 资料来源：[src/search.ts:1-30]()\n3. **Query Phase**: CLI commands and MCP tools query the graph using traversal algorithms 资料来源：[src/search.ts:180-220]()\n\n## Graph Data Model\n\n### Core Types\n\n```typescript\ninterface GraphEdge {\n  sourceType: \"file\" | \"symbol\";\n  sourceName: string;\n  targetType: \"file\" | \"symbol\" | \"module\" | \"config\";\n  targetName: string;\n  edgeType: EdgeType;\n  filePath: string;\n  line: number;\n}\n\ninterface GraphPath {\n  edges: Array<{\n    sourceName: string;\n    sourceType: string;\n    edgeType: string;\n    targetName: string;\n    targetType: string;\n  }>;\n  totalHops: number;\n}\n\ninterface GraphNode {\n  name: string;\n  type: \"file\" | \"symbol\" | \"module\" | \"config\";\n  path?: string;\n  kind?: string;\n}\n```\n\n资料来源：[src/types.ts:45-70]()\n\n### Edge Types\n\n| Edge Type | Description | Source Detection |\n|-----------|-------------|------------------|\n| `DEFINES` | File defines a symbol | Function/class declarations |\n| `IMPORTS` | File imports a module | `import`, `require`, `from` statements |\n| `CONFIGURES` | File/config references a key | JSON keys, package.json fields |\n| `TESTS` | Test file tests imports | Auto-generated for test files |\n\n资料来源：[src/extract.ts:75-100]()\n\n### Language-Specific Detection\n\nThe extraction layer supports multiple languages:\n\n| Language | Import Patterns | Symbol Patterns |\n|----------|-----------------|-----------------|\n| TypeScript/JavaScript | `from \"module\"`, `require(\"module\")` | `export function/class/interface` |\n| Python | `from module import` | `def`, `class` |\n| Go | `\"package\"` | `func`, `type struct/interface` |\n| Rust | `use module;`, `mod name;` | `fn`, `struct`, `enum`, `trait` |\n\n资料来源：[src/extract.ts:70-95]()\n\n## Graph Traversal API\n\n### traceGraph\n\nPerforms graph traversal starting from a source node, optionally filtering by edge types and limiting results.\n\n```typescript\nexport async function traceGraph(options: {\n  workspace?: string;\n  from: string;\n  to?: string;\n  edgeTypes?: string[];\n  limit?: number;\n}): Promise<GraphPath[]>\n```\n\n#### Parameters\n\n| Parameter | Type | Required | Description |\n|-----------|------|----------|-------------|\n| `workspace` | `string` | No | Workspace path (defaults to CWD) |\n| `from` | `string` | Yes | Starting node name |\n| `to` | `string` | No | Target node for path finding |\n| `edgeTypes` | `string[]` | No | Filter by specific edge types |\n| `limit` | `number` | No | Maximum paths to return (default: 10) |\n\n资料来源：[src/search.ts:180-190]()\n\n### loadGraphPaths\n\nLoads graph paths from the database for a set of file paths.\n\n```typescript\nfunction loadGraphPaths(\n  db: Database,\n  paths: string[],\n  limit: number\n): GraphPath[]\n```\n\n资料来源：[src/search.ts:60-80]()\n\n## Impact Analysis\n\nImpact analysis identifies reverse dependencies—what depends on a given file or symbol—and finds relevant test coverage.\n\n```mermaid\ngraph LR\n    A[Target File/Symbol] --> B[Find All Edges Pointing TO Target]\n    B --> C[Group by Source File]\n    C --> D[Identify Test Files]\n    D --> E[Return Impact Set]\n```\n\n### impactAnalysis Function\n\n```typescript\nexport async function impactAnalysis(options: {\n  workspace?: string;\n  target: string;\n  limit?: number;\n}): Promise<ImpactResult>\n```\n\n#### Impact Result Structure\n\n| Field | Type | Description |\n|-------|------|-------------|\n| `target` | `string` | The analyzed symbol or file |\n| `dependents` | `DependentInfo[]` | Files/symbols that depend on target |\n| `tests` | `SearchHit[]` | Related test files |\n\n```typescript\ninterface DependentInfo {\n  path: string;\n  type: string;\n  imports: string[];\n}\n\ninterface ImpactResult {\n  target: string;\n  dependents: DependentInfo[];\n  tests: SearchHit[];\n}\n```\n\n资料来源：[src/search.ts:130-175]()\n\n### Test Detection Logic\n\nTest files are identified by path patterns and edges with `TESTS` type:\n\n```typescript\nconst testPaths = paths.filter(\n  (path) => path.edgeType === \"TESTS\" || \n            /(^|\\/)(tests?|__tests__)\\/|(\\.|-)(test|spec)\\./.test(path.filePath)\n);\n```\n\n资料来源：[src/search.ts:165-170]()\n\n## Change Analysis\n\n### whyChanged\n\nCombines current code evidence with git history to explain why a file or symbol may have changed.\n\n```typescript\nexport async function whyChanged(options: {\n  workspace?: string;\n  target: string;\n  limit?: number\n}): Promise<{\n  target: string;\n  currentEvidence: SearchHit[];\n  commits: Array<{\n    hash: string;\n    subject: string;\n    date?: string;\n    files: string[];\n  }>;\n}>\n```\n\n#### Workflow\n\n```mermaid\ngraph TD\n    A[whyChanged] --> B[searchContext for target]\n    B --> C[Extract file paths from hits]\n    C --> D[readGitHistory with file paths]\n    D --> E[Combine evidence + commits]\n    E --> F[Return structured result]\n```\n\n资料来源：[src/search.ts:200-230]()\n\n### Git History Integration\n\nThe system reads git history for affected files:\n\n```typescript\nfunction readGitHistory(\n  workspace: string,\n  filePaths: string[],\n  limit: number\n): Array<{\n  hash: string;\n  subject: string;\n  date?: string;\n  files: string[];\n}>\n```\n\n资料来源：[src/search.ts:85-100]()\n\n## CLI Commands\n\n### trace Command\n\n```bash\ncxf trace --from <symbol_or_file> [--to <target>] [--edge-types <types>] [--limit <count>]\n```\n\n#### Options\n\n| Option | Type | Default | Description |\n|--------|------|---------|-------------|\n| `--from` | `string` | Required | Starting node |\n| `--to` | `string` | - | Target node |\n| `--edge-types` | `string` | all | Comma-separated edge types |\n| `--limit` | `number` | 10 | Maximum paths |\n| `--workspace` | `string` | CWD | Workspace path |\n\n资料来源：[src/cli.ts:45-60]()\n\n### report Command\n\nGenerates a comprehensive context report including graph statistics:\n\n```bash\ncxf report --workspace <path> --format markdown|json|html\n```\n\n#### Report Includes\n\n- Index status with graph node/edge counts\n- Top queries by intent type\n- Stale memory detection\n- Recent evidence packs\n\n资料来源：[src/cli.ts:70-85]()\n\n## MCP Server Tools\n\nContextful exposes graph traversal as MCP tools for integration with AI coding assistants.\n\n### trace_path\n\n```json\n{\n  \"name\": \"trace_path\",\n  \"description\": \"Trace graph relationships between files, symbols, modules, and config nodes.\",\n  \"inputSchema\": {\n    \"from\": \"string\",\n    \"to\": \"string (optional)\",\n    \"edge_types\": [\"string\"] (optional),\n    \"limit\": \"number (optional)\"\n  }\n}\n```\n\n资料来源：[src/mcp-server.ts:45-55]()\n\n### impact_analysis\n\n```json\n{\n  \"name\": \"impact_analysis\",\n  \"description\": \"Find likely dependents and tests for a file, symbol, or module.\",\n  \"inputSchema\": {\n    \"symbol_or_file\": \"string\",\n    \"limit\": \"number (optional)\"\n  }\n}\n```\n\n资料来源：[src/mcp-server.ts:56-65]()\n\n### why_changed\n\n```json\n{\n  \"name\": \"why_changed\",\n  \"description\": \"Explain why a file or symbol may have changed by combining current evidence with git history.\",\n  \"inputSchema\": {\n    \"symbol_or_file\": \"string\",\n    \"limit\": \"number (optional)\"\n  }\n}\n```\n\n资料来源：[src/mcp-server.ts:66-75]()\n\n## Usage Examples\n\n### Direct CLI Usage\n\n```bash\n# Trace dependencies of auth module\ncxf trace --from src/auth.ts --edge-types IMPORTS\n\n# Find what tests cover a file\ncxf impact --target src/parser.ts\n\n# Get change history for a symbol\ncxf why --target AuthService\n```\n\n### MCP Integration\n\n```json\n{\n  \"mcpServers\": {\n    \"contextful\": {\n      \"command\": \"npx\",\n      \"args\": [\"-y\", \"@inferensys/contextful\", \"server\"]\n    }\n  }\n}\n```\n\n```typescript\n// In an MCP client\nconst result = await client.callTool(\"trace_path\", {\n  from: \"src/auth.ts\",\n  to: \"src/database.ts\",\n  edgeTypes: [\"IMPORTS\", \"DEFINES\"]\n});\n```\n\n## Query Intent Classification\n\nGraph queries are automatically classified to route to appropriate traversal strategies:\n\n| Intent | Keywords | Graph Relevance |\n|--------|----------|-----------------|\n| `architectural` | architecture, flow, path, connects, calls | High priority |\n| `impact` | impact, affected, depends, blast radius | Direct edge query |\n| `historical` | why, changed, history, regression | Graph + git history |\n| `exact` | Symbol names, file paths | Symbol-level traversal |\n\n资料来源：[src/search.ts:115-130]()\n\n## Limitations and Design Decisions\n\n### Privacy Guarantees\n\n- All processing is local-only\n- No external embedding APIs used\n- No source code upload\n- No file editing capabilities\n\n资料来源：[README.md:45-50]()\n\n### v1 Scope Boundaries\n\n- Broken JSON during indexing produces warnings but continues processing\n- Syntax diagnostics are intentionally out of scope\n- Git history is read-only\n\n资料来源：[src/extract.ts:120-125]()\n\n## Summary\n\nThe Graph Traversal and Analysis system in Contextful provides:\n\n1. **Automatic Relationship Extraction** - Builds a dependency graph during indexing\n2. **Multiple Query Entry Points** - CLI commands and MCP tools\n3. **Path Finding** - Trace connections between any two nodes\n4. **Impact Analysis** - Identify dependents and test coverage\n5. **Change Attribution** - Combine current state with git history\n\nThis enables AI coding assistants to answer sophisticated questions about code relationships without requiring manual documentation or extensive file reading.\n\n---\n\n<a id='sqlite-database'></a>\n\n## SQLite Database Schema\n\n### 相关页面\n\n相关主题：[Workspace Indexing System](#indexing-system), [Search Engine](#search-engine)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [src/db.ts](https://github.com/Inferensys/contextful/blob/main/src/db.ts)\n- [src/types.ts](https://github.com/Inferensys/contextful/blob/main/src/types.ts)\n- [src/util.ts](https://github.com/Inferensys/contextful/blob/main/src/util.ts)\n- [src/extract.ts](https://github.com/Inferensys/contextful/blob/main/src/extract.ts)\n- [src/search.ts](https://github.com/Inferensys/contextful/blob/main/src/search.ts)\n</details>\n\n# SQLite Database Schema\n\n## Overview\n\nContextful uses SQLite as its primary storage engine for indexing codebase artifacts. The database schema is designed to support full-text search, symbol indexing, dependency graph traversal, and evidence pack generation for AI-assisted queries. All operations are managed through `better-sqlite3` for synchronous, high-performance access.\n\n资料来源：[src/db.ts:1-50]()\n\n## Schema Tables\n\n### Primary Storage Tables\n\n#### `chunks`\n\nStores indexed code and documentation segments extracted from source files. Each chunk represents a logical unit of content bounded by language-specific rules (functions, classes, headings, etc.).\n\n| Column | Type | Description |\n|--------|------|-------------|\n| `ref` | TEXT | Unique reference identifier (format: `file:path:start-end`) |\n| `file_path` | TEXT | Relative path to the source file |\n| `start_line` | INTEGER | Starting line number (1-indexed) |\n| `end_line` | INTEGER | Ending line number |\n| `kind` | TEXT | Chunk classification: `code`, `doc`, `file` |\n| `title` | TEXT | Display title for the chunk |\n| `text` | TEXT | Full content of the chunk |\n| `token_estimate` | INTEGER | Estimated token count using GPT tokenizer |\n\n资料来源：[src/db.ts:23-36]()\n\n#### `symbols`\n\nCaptures programming constructs (functions, classes, interfaces, types) extracted from source files.\n\n| Column | Type | Description |\n|--------|------|-------------|\n| `ref` | TEXT | Unique symbol reference |\n| `name` | TEXT | Symbol name |\n| `kind` | TEXT | Symbol type: `function`, `class`, `interface`, `type`, `struct`, `enum`, `trait`, `impl` |\n| `file_path` | TEXT | Source file path |\n| `line` | INTEGER | Line number where symbol is defined |\n| `signature` | TEXT | First 160 characters of symbol declaration |\n| `exported` | INTEGER | Boolean flag (1 = exported, 0 = local) |\n\n资料来源：[src/db.ts:47-60]()\n\n#### `edges`\n\nRepresents relationships between code entities, including imports, module dependencies, and configuration references.\n\n| Column | Type | Description |\n|--------|------|-------------|\n| `source_name` | TEXT | Name of the importing/configuring entity |\n| `target_name` | TEXT | Name or path of the imported/dependency target |\n| `edge_type` | TEXT | Relationship type: `IMPORTS`, `CONFIGURES` |\n| `file_path` | TEXT | File where the relationship is defined |\n| `line` | INTEGER | Line number of the relationship definition |\n\n资料来源：[src/db.ts:38-45]()\n\n### Full-Text Search Index\n\n#### `chunks_fts`\n\nVirtual FTS5 table providing fast full-text search across all indexed content. Mirrors core chunk data for BM25-ranked retrieval.\n\n| Column | Type | Description |\n|--------|------|-------------|\n| `ref` | TEXT | Chunk reference |\n| `path` | TEXT | File path for filtering |\n| `title` | TEXT | Searchable title field |\n| `text` | TEXT | Full searchable content |\n\n资料来源：[src/db.ts:37-42]()\n\nThe FTS table is queried using BM25 ranking in search operations:\n\n```sql\nSELECT ref, path, title, text, bm25(chunks_fts) AS rank \nFROM chunks_fts WHERE chunks_fts MATCH ?\n```\n\n资料来源：[src/search.ts:45-47]()\n\n### Graph and Metadata Tables\n\n#### `nodes`\n\nRepresents graph vertices for dependency analysis and traversal operations.\n\n| Column | Type | Description |\n|--------|------|-------------|\n| `id` | INTEGER | Auto-incrementing primary key |\n| `ref` | TEXT | Node reference |\n| `kind` | TEXT | Node classification: `file`, `symbol`, `chunk`, `module`, `config` |\n| `name` | TEXT | Display name |\n| `file_path` | TEXT | Associated file path (nullable) |\n\n资料来源：[src/db.ts:12-22]()\n\n#### `files`\n\nStores metadata about indexed source files.\n\n| Column | Type | Description |\n|--------|------|-------------|\n| `absolute_path` | TEXT | Full absolute file path |\n| `language` | TEXT | Detected programming language |\n| `hash` | TEXT | SHA-based content hash for change detection |\n| `size` | TEXT | File size in bytes |\n\n资料来源：[src/db.ts:13-17]()\n\n#### `fingerprints`\n\nStores content fingerprints for deduplication and incremental indexing.\n\n| Column | Type | Description |\n|--------|------|-------------|\n| `ref` | TEXT | Reference to the content chunk |\n| `kind` | TEXT | Content type |\n| `fingerprint` | TEXT | Hash of the content |\n\n#### `evidence_packs`\n\nPersists generated evidence packs for audit and replay.\n\n| Column | Type | Description |\n|--------|------|-------------|\n| `id` | TEXT | Unique pack identifier |\n| `query` | TEXT | Original search query |\n| `token_estimate` | INTEGER | Total token count |\n| `json` | TEXT | Serialized pack data |\n\n#### `query_log`\n\nRecords search history for analysis and debugging.\n\n| Column | Type | Description |\n|--------|------|-------------|\n| `query` | TEXT | Search query text |\n| `intent` | TEXT | Classified search intent |\n| `timestamp` | TEXT | ISO timestamp |\n\n资料来源：[src/db.ts:1-10]()\n\n## Data Flow Architecture\n\n```mermaid\ngraph TD\n    A[Source Files] --> B[extractSymbols]\n    A --> C[extractEdges]\n    A --> D[extractChunks]\n    \n    B --> E[symbols table]\n    C --> F[edges table]\n    D --> G[chunks table]\n    D --> H[chunks_fts index]\n    \n    G --> I[Full-Text Search]\n    E --> J[Symbol Lookup]\n    F --> K[Graph Traversal]\n    \n    I --> L[searchContext]\n    J --> L\n    K --> L\n    \n    L --> M[Evidence Pack]\n    M --> N[evidence_packs]\n```\n\n资料来源：[src/extract.ts:1-150]()\n\n## Supported Symbol Kinds\n\nThe indexer extracts and classifies symbols based on language-specific patterns:\n\n| Language | Supported Kinds |\n|----------|-----------------|\n| TypeScript/JavaScript | `function`, `class`, `interface`, `type` |\n| Python | `function`, `class` |\n| Go | `function`, `struct`, `interface` |\n| Rust | `function`, `struct`, `enum`, `trait`, `impl` |\n\n资料来源：[src/extract.ts:30-60]()\n\n## Supported Edge Types\n\n| Edge Type | Description | Example |\n|-----------|-------------|---------|\n| `IMPORTS` | Module/dependency import | `import { foo } from './bar'` |\n| `CONFIGURES` | Configuration key reference | `\"dependencies\": { ... }` in package.json |\n\nThe `CONFIGURES` edge type is specifically generated for package.json dependency sections and JSON configuration keys.\n\n资料来源：[src/extract.ts:70-120]()\n\n## Query Classification and Intent\n\nThe search system classifies queries into intent categories that influence result ranking:\n\n| Intent | Trigger Keywords | Purpose |\n|--------|-----------------|---------|\n| `symbol` | Class/function names, exact identifiers | Find symbol definitions |\n| `code` | Code-related terms | Locate implementation |\n| `memory` | memory, lessons, session | Search evidence-backed memory |\n| `impact` | depends, affected, blast radius | Reverse dependency analysis |\n| `historical` | why, changed, history, commit | Git history queries |\n| `architectural` | architecture, flow, imports | Dependency tracing |\n| `docs` | docs, documentation, readme | Documentation lookup |\n| `exact` | File paths, line refs, symbols | Precise file/line access |\n| `vague` | Default fallback | Broad search |\n\n资料来源：[src/search.ts:15-30]()\n\n## Token Estimation\n\nToken counts are estimated using a heuristic approximation:\n\n```typescript\nexport function estimateTokens(text: string): number {\n  return Math.ceil(text.length / 4);\n}\n```\n\nThis provides a rough approximation where 1 token ≈ 4 characters, suitable for budget management in evidence pack generation.\n\n资料来源：[src/util.ts:1-10]()\n\n## Key Database Operations\n\n### Chunk Insertion\n\n```typescript\ndb.prepare(`\n  INSERT INTO chunks (ref, file_path, start_line, end_line, kind, title, text, token_estimate)\n  VALUES (?, ?, ?, ?, ?, ?, ?, ?)\n`).run(chunk.ref, chunk.filePath, chunk.startLine, chunk.endLine, chunk.kind, chunk.title, chunk.text, chunk.tokenEstimate);\n```\n\n同步写入 `chunks` 表和 `chunks_fts` FTS 索引。\n\n### Symbol Loading\n\n```typescript\ndb.prepare(`SELECT ref, name, kind, file_path, line, signature, exported \nFROM symbols WHERE file_path IN (${paths.map(() => \"?\").join(\",\")})`)\n  .all(...paths)\n```\n\n资料来源：[src/db.ts:23-42]()\n资料来源：[src/search.ts:180-195]()\n\n## Schema Version and Metadata\n\nThe database stores schema version and workspace metadata:\n\n| Key | Description |\n|-----|-------------|\n| `schema_version` | Current schema version number |\n| `workspace` | Workspace root path |\n| `indexed_at` | Last indexing timestamp |\n| `parser_backend` | Parser backend description |\n| `warnings` | Last 50 indexing warnings |\n\n资料来源：[src/indexer.ts:80-90]()\n\n## Conclusion\n\nThe SQLite schema in Contextful provides a normalized, queryable representation of source code structure and content. The dual-table approach for chunks (storage + FTS index) enables both efficient storage and fast full-text retrieval. The edges and symbols tables together support graph traversal for dependency analysis, while the evidence pack system enables persistent, ranked context generation for AI queries.\n\n---\n\n<a id='indexing-system'></a>\n\n## Workspace Indexing System\n\n### 相关页面\n\n相关主题：[SQLite Database Schema](#sqlite-database), [Search Engine](#search-engine)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [src/indexer.ts](https://github.com/Inferensys/contextful/blob/main/src/indexer.ts)\n- [src/extract.ts](https://github.com/Inferensys/contextful/blob/main/src/extract.ts)\n- [src/cli.ts](https://github.com/Inferensys/contextful/blob/main/src/cli.ts)\n- [src/search.ts](https://github.com/Inferensys/contextful/blob/main/src/search.ts)\n- [src/report.ts](https://github.com/Inferensys/contextful/blob/main/src/report.ts)\n- [src/util.ts](https://github.com/Inferensys/contextful/blob/main/src/util.ts)\n</details>\n\n# Workspace Indexing System\n\n## Overview\n\nThe Workspace Indexing System is the core indexing engine of Contextful. It scans, parses, and stores representations of source code files from a workspace into a local SQLite database, enabling semantic search, dependency graph traversal, and evidence-backed context retrieval.\n\n**Primary responsibilities:**\n\n| Responsibility | Description |\n|----------------|-------------|\n| File Discovery | Recursively traverse workspace directories, filtering by language and ignore rules |\n| Symbol Extraction | Parse and catalog functions, classes, interfaces, types, enums, traits |\n| Edge Extraction | Track import/export relationships between modules and dependencies |\n| Content Chunking | Split large files into manageable, line-numbered chunks for retrieval |\n| Watch Mode | Monitor file system changes and incrementally re-index on modifications |\n\n资料来源：[src/cli.ts:1-20](https://github.com/Inferensys/contextful/blob/main/src/cli.ts)\n\n## Architecture\n\n```mermaid\ngraph TD\n    A[Workspace Directory] --> B[File Discovery]\n    B --> C[Language Detection]\n    C --> D[Content Extraction]\n    D --> E[Symbol Extraction]\n    D --> F[Edge Extraction]\n    D --> G[Chunk Generation]\n    E --> H[SQLite DB]\n    F --> H\n    G --> H\n    I[Search/Query] --> H\n    J[Watch Mode] --> B\n```\n\nThe system is built around a SQLite database that stores three core entities: symbols, edges, and chunks. The indexer processes files in a single pass, extracting all three data types simultaneously to minimize I/O overhead.\n\n资料来源：[src/extract.ts:1-50](https://github.com/Inferensys/contextful/blob/main/src/extract.ts)\n\n## Supported Languages\n\nThe indexer natively supports symbol and edge extraction for the following languages:\n\n| Language | Symbol Patterns | Import Patterns |\n|----------|----------------|-----------------|\n| TypeScript / JavaScript | `function`, `class`, `interface`, `type`, `const` arrow/function | `import from`, `require()` |\n| Python | `def`, `class` | `from ... import`, `import` |\n| Go | `func`, `type struct/interface` | `\"...\"` (quoted imports) |\n| Rust | `fn`, `struct`, `enum`, `trait`, `impl` | `use`, `mod` |\n| Markdown | Headings (`#{1,6}`) | N/A |\n| JSON | Config keys (`\"key\":`) | N/A |\n\n资料来源：[src/extract.ts:15-45](https://github.com/Inferensys/contextful/blob/main/src/extract.ts)\n\n## Indexing Process\n\n### Phase 1: File Discovery\n\nThe indexer recursively scans the workspace directory, applying language-specific filtering and Gitignore-style ignore rules. Binary files are detected and skipped using a simple null-byte heuristic.\n\n```typescript\nexport function isLikelyBinary(buffer: Buffer): boolean {\n  const sample = buffer.subarray(0, Math.min(buffer.length, 4096));\n  return sample.includes(0);\n}\n```\n\n资料来源：[src/util.ts:20-22](https://github.com/Inferensys/contextful/blob/main/src/util.ts)\n\n### Phase 2: Symbol Extraction\n\nSymbols are extracted using language-specific regular expression patterns. Each symbol record includes:\n\n| Field | Type | Description |\n|-------|------|-------------|\n| `name` | string | Symbol identifier |\n| `kind` | string | Category: function, class, interface, type, struct, enum, trait, impl |\n| `line` | number | Declaration line number |\n| `signature` | string | First 160 characters of the declaration line |\n| `exported` | boolean | Whether the symbol is exported |\n\n```typescript\nconst push = (name: string, kind: string, exported = false) =>\n  symbols.push({ name, kind, line: lineNumber, signature: excerpt(line, 160), exported });\n```\n\n资料来源：[src/extract.ts:5-7](https://github.com/Inferensys/contextful/blob/main/src/extract.ts)\n\nFor TypeScript and JavaScript, the extractor captures export modifiers:\n\n```typescript\nmatchPush(line, /^\\s*(export\\s+)?(?:async\\s+)?function\\s+([A-Za-z_$][\\w$]*)/, push, \"function\");\nmatchPush(line, /^\\s*(export\\s+)?class\\s+([A-Za-z_$][\\w$]*)/, push, \"class\");\n```\n\n资料来源：[src/extract.ts:12-15](https://github.com/Inferensys/contextful/blob/main/src/extract.ts)\n\n### Phase 3: Edge Extraction\n\nEdges represent dependency relationships between modules. The extractor identifies:\n\n- **IMPORTS**: Direct import statements for each language\n- **CONFIGURES**: Dependencies declared in configuration files (package.json, Cargo.toml, etc.)\n\n```typescript\nif (language === \"typescript\" || language === \"javascript\") {\n  for (const match of line.matchAll(/(?:from\\s+|import\\s*)[\"']([^\"']+)[\"']/g))\n    addImport(match[1]);\n  for (const match of line.matchAll(/require\\([\"']([^\"']+)[\"']\\)/g))\n    addImport(match[1]);\n}\n```\n\n资料来源：[src/extract.ts:67-72](https://github.com/Inferensys/contextful/blob/main/src/extract.ts)\n\nFor `package.json`, dependencies and scripts are indexed as CONFIGURES edges:\n\n```typescript\nfor (const section of [\"dependencies\", \"devDependencies\", \"peerDependencies\", \"scripts\"]) {\n  const values = parsed[section];\n  if (!values || typeof values !== \"object\") continue;\n  for (const key of Object.keys(values)) {\n    edges.push({ targetName: `${section}:${key}`, targetType: \"config\", edgeType: \"CONFIGURES\", line: 1 });\n  }\n}\n```\n\n资料来源：[src/extract.ts:105-114](https://github.com/Inferensys/contextful/blob/main/src/extract.ts)\n\n### Phase 4: Chunk Generation\n\nLarge files are split into overlapping chunks to enable granular retrieval. The system uses a sliding window approach with overlap between consecutive chunks:\n\n```mermaid\ngraph LR\n    A[File Lines 1-200] --> B[Chunk 1: 1-80]\n    A --> C[Chunk 2: 60-140]\n    A --> D[Chunk 3: 120-200]\n    B --> E[Token Estimate]\n    C --> E\n    D --> E\n```\n\nEach chunk includes:\n\n| Field | Description |\n|-------|-------------|\n| `ref` | Unique reference string (`file:path:start-end`) |\n| `filePath` | Relative path to source file |\n| `startLine` | Starting line number |\n| `endLine` | Ending line number |\n| `kind` | Chunk type: `code`, `doc`, `file` |\n| `title` | Human-readable title |\n| `tokenEstimate` | Estimated token count |\n\n资料来源：[src/extract.ts:145-160](https://github.com/Inferensys/contextful/blob/main/src/extract.ts)\n\n### Phase 5: Markdown Document Chunking\n\nMarkdown files receive special treatment. Instead of fixed-size chunks, the indexer uses headings as natural section boundaries:\n\n```typescript\nlines.forEach((line, index) => {\n  const match = line.match(/^(#{1,6})\\s+(.+)$/);\n  if (match) headings.push({ title: match[2].trim(), line: index + 1 });\n});\nreturn headings.map((heading, index) => {\n  const next = headings[index + 1];\n  const endLine = next ? next.line - 1 : lines.length;\n  // ... create chunk for section\n});\n```\n\n资料来源：[src/extract.ts:174-185](https://github.com/Inferensys/contextful/blob/main/src/extract.ts)\n\n## Watch Mode\n\nThe indexer supports continuous monitoring via file system watchers:\n\n```typescript\nexport async function watchWorkspace(workspace: string, onIndex: (result: IndexResult) => void): Promise<void> {\n  const resolved = path.resolve(workspace);\n  onIndex(await indexWorkspace({ workspace: resolved }));\n  let timer: NodeJS.Timeout | undefined;\n  fs.watch(resolved, { recursive: true }, () => {\n    if (timer) clearTimeout(timer);\n    timer = setTimeout(async () => {\n      onIndex(await indexWorkspace({ workspace: resolved }));\n    }, 500);\n  });\n}\n```\n\n资料来源：[src/indexer.ts:80-91](https://github.com/Inferensys/contextful/blob/main/src/indexer.ts)\n\nKey characteristics:\n- Debounces file change events with a 500ms delay to batch rapid successive changes\n- Re-runs full indexing on each trigger\n- Outputs JSON results to stdout for consumption by other processes\n\n## CLI Commands\n\nThe indexing system exposes three primary CLI commands:\n\n| Command | Description |\n|---------|-------------|\n| `cxf index --workspace <path> [--watch]` | Initial or incremental indexing of a workspace |\n| `cxf daemon --workspace <path>` | Run as a long-lived daemon that outputs index results on file changes |\n| `cxf report --workspace <path> --format markdown\\|json\\|html` | Generate an index status report |\n\n```bash\n# Index a workspace\nnpx @inferensys/contextful index --workspace .\n\n# Watch for changes and print results\nnpx @inferensys/contextful daemon --workspace .\n```\n\n资料来源：[src/cli.ts:22-35](https://github.com/Inferensys/contextful/blob/main/src/cli.ts)\n\n## Search Integration\n\nThe indexing system powers Contextful's search capabilities. After indexing, users can query the database using natural language:\n\n```typescript\nexport async function searchContext(options: SearchOptions): Promise<{ intent: SearchIntent; hits: SearchHit[] }> {\n  const workspace = resolveWorkspace(options.options.workspace);\n  await ensureIndexed(workspace);\n  const intent = classifyQuery(options.query);\n  // ... perform FTS and semantic search\n}\n```\n\n资料来源：[src/search.ts:45-55](https://github.com/Inferensys/contextful/blob/main/src/search.ts)\n\nQuery intents are automatically classified to optimize search behavior:\n\n| Intent | Trigger Keywords | Description |\n|--------|-----------------|-------------|\n| `code` | function names, variable names | Code and implementation search |\n| `exact` | Backticks, quotes, `#`, file paths | Literal symbol/identifier lookup |\n| `impact` | impact, affected, depends, blast radius | Dependency and change analysis |\n| `historical` | why, changed, commit, history | Git history and regression tracking |\n| `architectural` | architecture, flow, trace, connects | Dependency graph traversal |\n| `docs` | resource, documentation, guide, how to | Documentation and README search |\n| `memory` | remember, session, lesson, learned | Agent memory recall |\n\n资料来源：[src/search.ts:5-18](https://github.com/Inferensys/contextful/blob/main/src/search.ts)\n\n## Token Estimation\n\nEvery chunk and evidence pack includes a token estimate for budget management:\n\n```typescript\nexport function packTokenCount(text: string): number {\n  return estimateTokens(text);\n}\n```\n\nThe system uses this estimate to enforce budget limits when building context packs for LLM consumption, ensuring responses stay within token budgets.\n\n资料来源：[src/report.ts:50-52](https://github.com/Inferensys/contextful/blob/main/src/report.ts)\n\n## Data Models\n\n### Symbol Record\n\n```typescript\ninterface SymbolRecord {\n  ref: string;\n  name: string;\n  kind: \"function\" | \"class\" | \"interface\" | \"type\" | \"struct\" | \"enum\" | \"trait\" | \"impl\";\n  filePath: string;\n  line: number;\n  signature: string;\n  exported: boolean;\n}\n```\n\n### Edge Record\n\n```typescript\ninterface RawEdge {\n  targetName: string;\n  targetType: \"module\" | \"config\" | \"symbol\";\n  edgeType: \"IMPORTS\" | \"CONFIGURES\" | \"DEFINES\";\n  line: number;\n}\n```\n\n### Chunk Record\n\n```typescript\ninterface ChunkRecord {\n  ref: string;\n  filePath: string;\n  startLine: number;\n  endLine: number;\n  kind: \"code\" | \"doc\" | \"file\";\n  title: string;\n  text: string;\n  tokenEstimate: number;\n}\n```\n\n## Extension Points\n\n### Adding New Language Support\n\nTo add support for a new language:\n\n1. Add language detection in the file scanner\n2. Implement symbol extraction patterns in `extractSymbols()`\n3. Implement edge extraction patterns in `extractEdges()`\n4. Update the chunking logic if special handling is needed\n\nExample pattern structure:\n\n```typescript\n} else if (language === \"newlang\") {\n  matchPush(line, /^\\s*(pub\\s+)?fn\\s+([A-Za-z_][\\w]*)/, push, \"function\");\n  const use = line.match(/^\\s*use\\s+([^;]+);/);\n  if (use) addImport(use[1].trim());\n}\n```\n\n资料来源：[src/extract.ts:35-44](https://github.com/Inferensys/contextful/blob/main/src/extract.ts)\n\n---\n\n---\n\n## Doramagic Pitfall Log\n\nProject: Inferensys/contextful\n\nSummary: Found 7 potential pitfall items; 0 are high/blocking. Highest priority: configuration - 可能修改宿主 AI 配置.\n\n## 1. configuration · 可能修改宿主 AI 配置\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: 项目面向 Claude/Cursor/Codex/Gemini/OpenCode 等宿主，或安装命令涉及用户配置目录。\n- User impact: 安装可能改变本机 AI 工具行为，用户需要知道写入位置和回滚方法。\n- Suggested check: 列出会写入的配置文件、目录和卸载/回滚步骤。\n- Guardrail action: 涉及宿主配置目录时必须给回滚路径，不能只给安装命令。\n- Evidence: capability.host_targets | github_repo:1240001007 | https://github.com/Inferensys/contextful | host_targets=claude, claude_code\n\n## 2. capability · 能力判断依赖假设\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: README/documentation is current enough for a first validation pass.\n- User impact: 假设不成立时，用户拿不到承诺的能力。\n- Suggested check: 将假设转成下游验证清单。\n- Guardrail action: 假设必须转成验证项；没有验证结果前不能写成事实。\n- Evidence: capability.assumptions | github_repo:1240001007 | https://github.com/Inferensys/contextful | README/documentation is current enough for a first validation pass.\n\n## 3. maintenance · 维护活跃度未知\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: 未记录 last_activity_observed。\n- User impact: 新项目、停更项目和活跃项目会被混在一起，推荐信任度下降。\n- Suggested check: 补 GitHub 最近 commit、release、issue/PR 响应信号。\n- Guardrail action: 维护活跃度未知时，推荐强度不能标为高信任。\n- Evidence: evidence.maintainer_signals | github_repo:1240001007 | https://github.com/Inferensys/contextful | last_activity_observed missing\n\n## 4. security_permissions · 下游验证发现风险项\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: no_demo\n- User impact: 下游已经要求复核，不能在页面中弱化。\n- Suggested check: 进入安全/权限治理复核队列。\n- Guardrail action: 下游风险存在时必须保持 review/recommendation 降级。\n- Evidence: downstream_validation.risk_items | github_repo:1240001007 | https://github.com/Inferensys/contextful | no_demo; severity=medium\n\n## 5. security_permissions · 存在评分风险\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: no_demo\n- User impact: 风险会影响是否适合普通用户安装。\n- Suggested check: 把风险写入边界卡，并确认是否需要人工复核。\n- Guardrail action: 评分风险必须进入边界卡，不能只作为内部分数。\n- Evidence: risks.scoring_risks | github_repo:1240001007 | https://github.com/Inferensys/contextful | no_demo; severity=medium\n\n## 6. maintenance · issue/PR 响应质量未知\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: issue_or_pr_quality=unknown。\n- User impact: 用户无法判断遇到问题后是否有人维护。\n- Suggested check: 抽样最近 issue/PR，判断是否长期无人处理。\n- Guardrail action: issue/PR 响应未知时，必须提示维护风险。\n- Evidence: evidence.maintainer_signals | github_repo:1240001007 | https://github.com/Inferensys/contextful | issue_or_pr_quality=unknown\n\n## 7. maintenance · 发布节奏不明确\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: release_recency=unknown。\n- User impact: 安装命令和文档可能落后于代码，用户踩坑概率升高。\n- Suggested check: 确认最近 release/tag 和 README 安装命令是否一致。\n- Guardrail action: 发布节奏未知或过期时，安装说明必须标注可能漂移。\n- Evidence: evidence.maintainer_signals | github_repo:1240001007 | https://github.com/Inferensys/contextful | release_recency=unknown\n\n<!-- canonical_name: Inferensys/contextful; human_manual_source: deepwiki_human_wiki -->\n",
      "summary": "DeepWiki/Human Wiki output with a Doramagic pitfall appendix.",
      "title": "Human Manual"
    },
    "pitfall_log": {
      "asset_id": "pitfall_log",
      "filename": "PITFALL_LOG.md",
      "markdown": "# Pitfall Log\n\nProject: Inferensys/contextful\n\nSummary: Found 7 potential pitfall items; 0 are high/blocking. Highest priority: configuration - 可能修改宿主 AI 配置.\n\n## 1. configuration · 可能修改宿主 AI 配置\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: 项目面向 Claude/Cursor/Codex/Gemini/OpenCode 等宿主，或安装命令涉及用户配置目录。\n- User impact: 安装可能改变本机 AI 工具行为，用户需要知道写入位置和回滚方法。\n- Suggested check: 列出会写入的配置文件、目录和卸载/回滚步骤。\n- Guardrail action: 涉及宿主配置目录时必须给回滚路径，不能只给安装命令。\n- Evidence: capability.host_targets | github_repo:1240001007 | https://github.com/Inferensys/contextful | host_targets=claude, claude_code\n\n## 2. capability · 能力判断依赖假设\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: README/documentation is current enough for a first validation pass.\n- User impact: 假设不成立时，用户拿不到承诺的能力。\n- Suggested check: 将假设转成下游验证清单。\n- Guardrail action: 假设必须转成验证项；没有验证结果前不能写成事实。\n- Evidence: capability.assumptions | github_repo:1240001007 | https://github.com/Inferensys/contextful | README/documentation is current enough for a first validation pass.\n\n## 3. maintenance · 维护活跃度未知\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: 未记录 last_activity_observed。\n- User impact: 新项目、停更项目和活跃项目会被混在一起，推荐信任度下降。\n- Suggested check: 补 GitHub 最近 commit、release、issue/PR 响应信号。\n- Guardrail action: 维护活跃度未知时，推荐强度不能标为高信任。\n- Evidence: evidence.maintainer_signals | github_repo:1240001007 | https://github.com/Inferensys/contextful | last_activity_observed missing\n\n## 4. security_permissions · 下游验证发现风险项\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: no_demo\n- User impact: 下游已经要求复核，不能在页面中弱化。\n- Suggested check: 进入安全/权限治理复核队列。\n- Guardrail action: 下游风险存在时必须保持 review/recommendation 降级。\n- Evidence: downstream_validation.risk_items | github_repo:1240001007 | https://github.com/Inferensys/contextful | no_demo; severity=medium\n\n## 5. security_permissions · 存在评分风险\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: no_demo\n- User impact: 风险会影响是否适合普通用户安装。\n- Suggested check: 把风险写入边界卡，并确认是否需要人工复核。\n- Guardrail action: 评分风险必须进入边界卡，不能只作为内部分数。\n- Evidence: risks.scoring_risks | github_repo:1240001007 | https://github.com/Inferensys/contextful | no_demo; severity=medium\n\n## 6. maintenance · issue/PR 响应质量未知\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: issue_or_pr_quality=unknown。\n- User impact: 用户无法判断遇到问题后是否有人维护。\n- Suggested check: 抽样最近 issue/PR，判断是否长期无人处理。\n- Guardrail action: issue/PR 响应未知时，必须提示维护风险。\n- Evidence: evidence.maintainer_signals | github_repo:1240001007 | https://github.com/Inferensys/contextful | issue_or_pr_quality=unknown\n\n## 7. maintenance · 发布节奏不明确\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: release_recency=unknown。\n- User impact: 安装命令和文档可能落后于代码，用户踩坑概率升高。\n- Suggested check: 确认最近 release/tag 和 README 安装命令是否一致。\n- Guardrail action: 发布节奏未知或过期时，安装说明必须标注可能漂移。\n- Evidence: evidence.maintainer_signals | github_repo:1240001007 | https://github.com/Inferensys/contextful | release_recency=unknown\n",
      "summary": "Identity, installation, configuration, runtime, and safety pitfalls before user trial.",
      "title": "Pitfall Log"
    },
    "prompt_preview": {
      "asset_id": "prompt_preview",
      "filename": "PROMPT_PREVIEW.md",
      "markdown": "# contextful - Prompt Preview\n\n> Copy the prompt below into your AI host before installing anything.\n> Its purpose is to let you safely feel the project's workflow, not to claim the project has already run.\n\n## Copy this prompt\n\n```text\nYou are using an independent Doramagic capability pack for Inferensys/contextful.\n\nProject:\n- Name: contextful\n- Repository: https://github.com/Inferensys/contextful\n- Summary: Most-efficient Context Management Layer for Agentic AI. Graph-based knowledge context, SQLite index, advanced FTS5 lexical/BM25 search and cross-session live memory.\n- Host target: claude, claude_code\n\nGoal:\nHelp me evaluate this project for the following task without installing it yet: Most-efficient Context Management Layer for Agentic AI. Graph-based knowledge context, SQLite index, advanced FTS5 lexical/BM25 search and cross-session live memory.\n\nBefore taking action:\n1. Restate my task, success standard, and boundary.\n2. Identify whether the next step requires tools, browser access, network access, filesystem access, credentials, package installation, or host configuration.\n3. Use only the Doramagic Project Pack, the upstream repository, and the source-linked evidence listed below.\n4. If a real command, install step, API call, file write, or host integration is required, mark it as \"requires post-install verification\" and ask for approval first.\n5. If evidence is missing, say \"evidence is missing\" instead of filling the gap.\n\nPreviewable capabilities:\n- Capability 1: Most-efficient Context Management Layer for Agentic AI. Graph-based knowledge context, SQLite index, advanced FTS5 lexical/BM25 search and cross-session live memory.\n\nCapabilities that require post-install verification:\n- Capability 1: Use the source-backed project context to guide one small, checkable workflow step.\n\nCore service flow:\n1. project-introduction: Project Introduction. Produce one small intermediate artifact and wait for confirmation.\n2. quick-start: Quick Start Guide. Produce one small intermediate artifact and wait for confirmation.\n3. high-level-architecture: High-Level Architecture. Produce one small intermediate artifact and wait for confirmation.\n4. search-engine: Search Engine. Produce one small intermediate artifact and wait for confirmation.\n5. context-packs: Context Packs. Produce one small intermediate artifact and wait for confirmation.\n\nSource-backed evidence to keep in mind:\n- https://github.com/Inferensys/contextful\n- https://github.com/Inferensys/contextful#readme\n- README.md\n- package.json\n- src/index.ts\n- server.json\n- src/mcp-server.ts\n- src/indexer.ts\n- src/cli.ts\n- src/search.ts\n\nFirst response rules:\n1. Start Step 1 only.\n2. Explain the one service action you will perform first.\n3. Ask exactly three questions about my target workflow, success standard, and sandbox boundary.\n4. Stop and wait for my answers.\n\nStep 1 follow-up protocol:\n- After I answer the first three questions, stay in Step 1.\n- Produce six parts only: clarified task, success standard, boundary conditions, two or three options, tradeoffs for each option, and one recommendation.\n- End by asking whether I confirm the recommendation.\n- Do not move to Step 2 until I explicitly confirm.\n\nConversation rules:\n- Advance one step at a time and wait for confirmation after each small artifact.\n- Write outputs as recommendations or planned checks, not as completed execution.\n- Do not claim tests passed, files changed, commands ran, APIs were called, or the project was installed.\n- If the user asks for execution, first provide the sandbox setup, expected output, rollback, and approval checkpoint.\n```\n",
      "summary": "不安装项目也能感受能力节奏的安全试用 Prompt。",
      "title": "Prompt Preview / 安装前试用 Prompt"
    },
    "quick_start": {
      "asset_id": "quick_start",
      "filename": "QUICK_START.md",
      "markdown": "# Quick Start\n\nProject: Inferensys/contextful\n\n## Official Entry Points\n\n### Node.js / npx · 官方安装入口\n\n```bash\nnpx @inferensys/contextful\n```\n\nSource：https://github.com/Inferensys/contextful#readme\n\n## Sources\n\n- repo: https://github.com/Inferensys/contextful\n- docs: https://github.com/Inferensys/contextful#readme\n",
      "summary": "Entry points extracted from official README or installation documentation.",
      "title": "Quick Start"
    }
  },
  "validation_id": "dval_a57d47da1eb34bfc9406efeb91587234"
}