{
  "canonical_name": "Uranid/mnem",
  "compilation_id": "pack_dc6ebdf51f374259ba7bef03cbb36719",
  "created_at": "2026-05-15T07:48:54.474318+00:00",
  "created_by": "project-pack-compiler",
  "feedback": {
    "carrier_selection_notes": [
      "viable_asset_types=mcp_config, recipe, host_instruction, eval, preflight",
      "recommended_asset_types=mcp_config, recipe, host_instruction, eval, preflight"
    ],
    "evidence_delta": {
      "confirmed_claims": [
        "identity_anchor_present",
        "capability_and_host_targets_present",
        "install_path_declared_or_better"
      ],
      "missing_required_fields": [],
      "must_verify_forwarded": [
        "Run or inspect `pip install mnem-cli` in an isolated environment.",
        "Confirm the project exposes the claimed capability to at least one target host."
      ],
      "quickstart_execution_scope": "allowlisted_sandbox_smoke",
      "sandbox_command": "pip install mnem-cli",
      "sandbox_container_image": "python:3.12-slim",
      "sandbox_execution_backend": "docker",
      "sandbox_planner_decision": "llm_execute_isolated_install",
      "sandbox_validation_id": "sbx_cb10b3629f4b452aa893956c18dedac7"
    },
    "feedback_event_type": "project_pack_compilation_feedback",
    "learning_candidate_reasons": [],
    "template_gaps": []
  },
  "identity": {
    "canonical_id": "project_c4f7a13530a6dfab0fd064304a487ea4",
    "canonical_name": "Uranid/mnem",
    "homepage_url": null,
    "license": "unknown",
    "repo_url": "https://github.com/Uranid/mnem",
    "slug": "mnem",
    "source_packet_id": "phit_be76522d04b24a649fb250c1288a447d",
    "source_validation_id": "dval_aa4979168e914161b416107790486e05"
  },
  "merchandising": {
    "best_for": "需要信息检索与知识管理能力，并使用 mcp_host的用户",
    "github_forks": 28,
    "github_stars": 83,
    "one_liner_en": "Git for AI Agent Knowledge. A persistent, versioned memory layer for AI systems. Hybrid GraphRAG retrieval. Runs entirely offline.",
    "one_liner_zh": "Git for AI Agent Knowledge. A persistent, versioned memory layer for AI systems. Hybrid GraphRAG retrieval. Runs entirely offline.",
    "primary_category": {
      "category_id": "research-knowledge",
      "confidence": "high",
      "name_en": "Research & Knowledge",
      "name_zh": "信息检索与知识管理",
      "reason": "matched_keywords:knowledge, rag, graph"
    },
    "target_user": "使用 mcp_host 等宿主 AI 的用户",
    "title_en": "mnem",
    "title_zh": "mnem 能力包",
    "visible_tags": [
      {
        "label_en": "MCP Tools",
        "label_zh": "MCP 工具",
        "source": "repo_evidence_project_characteristics",
        "tag_id": "product_domain-mcp-tools",
        "type": "product_domain"
      },
      {
        "label_en": "Knowledge Base Q&A",
        "label_zh": "知识库问答",
        "source": "repo_evidence_project_characteristics",
        "tag_id": "user_job-knowledge-base-q-a",
        "type": "user_job"
      },
      {
        "label_en": "Workflow Automation",
        "label_zh": "流程自动化",
        "source": "repo_evidence_project_characteristics",
        "tag_id": "core_capability-workflow-automation",
        "type": "core_capability"
      },
      {
        "label_en": "Node-based Workflow",
        "label_zh": "节点式流程编排",
        "source": "repo_evidence_project_characteristics",
        "tag_id": "workflow_pattern-node-based-workflow",
        "type": "workflow_pattern"
      },
      {
        "label_en": "Evaluation Suite",
        "label_zh": "评测体系",
        "source": "repo_evidence_project_characteristics",
        "tag_id": "selection_signal-evaluation-suite",
        "type": "selection_signal"
      }
    ]
  },
  "packet_id": "phit_be76522d04b24a649fb250c1288a447d",
  "page_model": {
    "artifacts": {
      "artifact_slug": "mnem",
      "files": [
        "PROJECT_PACK.json",
        "QUICK_START.md",
        "PROMPT_PREVIEW.md",
        "HUMAN_MANUAL.md",
        "AI_CONTEXT_PACK.md",
        "BOUNDARY_RISK_CARD.md",
        "PITFALL_LOG.md",
        "REPO_INSPECTION.json",
        "REPO_INSPECTION.md",
        "CAPABILITY_CONTRACT.json",
        "EVIDENCE_INDEX.json",
        "CLAIM_GRAPH.json"
      ],
      "required_files": [
        "PROJECT_PACK.json",
        "QUICK_START.md",
        "PROMPT_PREVIEW.md",
        "HUMAN_MANUAL.md",
        "AI_CONTEXT_PACK.md",
        "BOUNDARY_RISK_CARD.md",
        "PITFALL_LOG.md",
        "REPO_INSPECTION.json"
      ]
    },
    "detail": {
      "capability_source": "Project Hit Packet + DownstreamValidationResult",
      "commands": [
        {
          "command": "pip install mnem-cli",
          "label": "Python / pip · 官方安装入口",
          "source": "https://github.com/Uranid/mnem#readme",
          "verified": true
        }
      ],
      "display_tags": [
        "MCP 工具",
        "知识库问答",
        "流程自动化",
        "节点式流程编排",
        "评测体系"
      ],
      "eyebrow": "信息检索与知识管理",
      "glance": [
        {
          "body": "判断自己是不是目标用户。",
          "label": "最适合谁",
          "value": "需要信息检索与知识管理能力，并使用 mcp_host的用户"
        },
        {
          "body": "先理解能力边界，再决定是否继续。",
          "label": "核心价值",
          "value": "Git for AI Agent Knowledge. A persistent, versioned memory layer for AI systems. Hybrid GraphRAG retrieval. Runs entirely offline."
        },
        {
          "body": "未完成验证前保持审慎。",
          "label": "继续前",
          "value": "publish to Doramagic.ai project surfaces"
        }
      ],
      "guardrail_source": "Boundary & Risk Card",
      "guardrails": [
        {
          "body": "Prompt Preview 只展示流程，不证明项目已安装或运行。",
          "label": "Check 1",
          "value": "不要把试用当真实运行"
        },
        {
          "body": "mcp_host",
          "label": "Check 2",
          "value": "确认宿主兼容"
        },
        {
          "body": "publish to Doramagic.ai project surfaces",
          "label": "Check 3",
          "value": "先隔离验证"
        }
      ],
      "mode": "mcp_config, recipe, host_instruction, eval, preflight",
      "pitfall_log": {
        "items": [
          {
            "body": "GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：[feature] hermes support",
            "category": "安全/权限坑",
            "evidence": [
              "community_evidence:github | cevd_c54919b2b8b340438a9e5aa17291b93a | https://github.com/Uranid/mnem/issues/27 | 来源类型 github_issue 暴露的待验证使用条件。"
            ],
            "severity": "high",
            "suggested_check": "来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。",
            "title": "来源证据：[feature] hermes support",
            "user_impact": "可能影响授权、密钥配置或安全边界。"
          },
          {
            "body": "README/documentation is current enough for a first validation pass.",
            "category": "能力坑",
            "evidence": [
              "capability.assumptions | github_repo:1221867246 | https://github.com/Uranid/mnem | README/documentation is current enough for a first validation pass."
            ],
            "severity": "medium",
            "suggested_check": "将假设转成下游验证清单。",
            "title": "能力判断依赖假设",
            "user_impact": "假设不成立时，用户拿不到承诺的能力。"
          },
          {
            "body": "GitHub 社区证据显示该项目存在一个维护/版本相关的待验证问题：[bug] Broken docs links: SPEC.md, ROADMAP.md, and Architecture page",
            "category": "维护坑",
            "evidence": [
              "community_evidence:github | cevd_5c74e7a10f774af6b0460b5da009d1b4 | https://github.com/Uranid/mnem/issues/23 | 来源讨论提到 windows 相关条件，需在安装/试用前复核。"
            ],
            "severity": "medium",
            "suggested_check": "来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。",
            "title": "来源证据：[bug] Broken docs links: SPEC.md, ROADMAP.md, and Architecture page",
            "user_impact": "可能增加新用户试用和生产接入成本。"
          },
          {
            "body": "未记录 last_activity_observed。",
            "category": "维护坑",
            "evidence": [
              "evidence.maintainer_signals | github_repo:1221867246 | https://github.com/Uranid/mnem | last_activity_observed missing"
            ],
            "severity": "medium",
            "suggested_check": "补 GitHub 最近 commit、release、issue/PR 响应信号。",
            "title": "维护活跃度未知",
            "user_impact": "新项目、停更项目和活跃项目会被混在一起，推荐信任度下降。"
          },
          {
            "body": "no_demo",
            "category": "安全/权限坑",
            "evidence": [
              "downstream_validation.risk_items | github_repo:1221867246 | https://github.com/Uranid/mnem | no_demo; severity=medium"
            ],
            "severity": "medium",
            "suggested_check": "进入安全/权限治理复核队列。",
            "title": "下游验证发现风险项",
            "user_impact": "下游已经要求复核，不能在页面中弱化。"
          },
          {
            "body": "no_demo",
            "category": "安全/权限坑",
            "evidence": [
              "risks.scoring_risks | github_repo:1221867246 | https://github.com/Uranid/mnem | no_demo; severity=medium"
            ],
            "severity": "medium",
            "suggested_check": "把风险写入边界卡，并确认是否需要人工复核。",
            "title": "存在评分风险",
            "user_impact": "风险会影响是否适合普通用户安装。"
          },
          {
            "body": "issue_or_pr_quality=unknown。",
            "category": "维护坑",
            "evidence": [
              "evidence.maintainer_signals | github_repo:1221867246 | https://github.com/Uranid/mnem | issue_or_pr_quality=unknown"
            ],
            "severity": "low",
            "suggested_check": "抽样最近 issue/PR，判断是否长期无人处理。",
            "title": "issue/PR 响应质量未知",
            "user_impact": "用户无法判断遇到问题后是否有人维护。"
          },
          {
            "body": "release_recency=unknown。",
            "category": "维护坑",
            "evidence": [
              "evidence.maintainer_signals | github_repo:1221867246 | https://github.com/Uranid/mnem | release_recency=unknown"
            ],
            "severity": "low",
            "suggested_check": "确认最近 release/tag 和 README 安装命令是否一致。",
            "title": "发布节奏不明确",
            "user_impact": "安装命令和文档可能落后于代码，用户踩坑概率升高。"
          }
        ],
        "source": "ProjectPitfallLog + ProjectHitPacket + validation + community signals",
        "summary": "发现 8 个潜在踩坑项，其中 1 个为 high/blocking；最高优先级：安全/权限坑 - 来源证据：[feature] hermes support。",
        "title": "踩坑日志"
      },
      "snapshot": {
        "contributors": 1,
        "forks": 28,
        "license": "unknown",
        "note": "站点快照，非实时质量证明；用于开工前背景判断。",
        "stars": 83
      },
      "source_url": "https://github.com/Uranid/mnem",
      "steps": [
        {
          "body": "不安装项目，先体验能力节奏。",
          "code": "preview",
          "title": "先试 Prompt"
        },
        {
          "body": "理解输入、输出、失败模式和边界。",
          "code": "manual",
          "title": "读说明书"
        },
        {
          "body": "把上下文交给宿主 AI 继续工作。",
          "code": "context",
          "title": "带给 AI"
        },
        {
          "body": "进入主力环境前先完成安装入口与风险边界验证。",
          "code": "verify",
          "title": "沙箱验证"
        }
      ],
      "subtitle": "Git for AI Agent Knowledge. A persistent, versioned memory layer for AI systems. Hybrid GraphRAG retrieval. Runs entirely offline.",
      "title": "mnem 能力包",
      "trial_prompt": "# mnem - Prompt Preview\n\n> 复制下面这段 Prompt 到你常用的 AI，先试一次，不需要安装。\n> 它的目标是让你直接体验这个项目的服务方式，而不是阅读项目介绍。\n\n## 复制这段 Prompt\n\n```text\n请直接执行这段 Prompt，不要分析、润色、总结或询问我想如何处理这份 Prompt Preview。\n\n你现在扮演 mnem 的“安装前体验版”。\n这不是项目介绍、不是评价报告、不是 README 总结。你的任务是让我用最小成本体验它的核心服务。\n\n我的试用任务：我想快速理解一组资料，并得到结构化摘要、对比和继续研究的问题。\n我常用的宿主 AI：MCP Client\n\n【体验目标】\n围绕我的真实任务，现场演示这个项目如何把输入转成 示例引导, 判断线索。重点是让我感受到工作方式，而不是给我项目背景。\n\n【业务流约束】\n- 你必须像一个正在提供服务的项目能力包，而不是像一个讲解员。\n- 每一轮只推进一个步骤；提出问题后必须停下来等我回答。\n- 每一步都必须让我感受到一个具体服务动作：澄清、整理、规划、检查、判断或收尾。\n- 每一步都要说明：当前目标、你需要我提供什么、我回答后你会产出什么。\n- 不要安装、不要运行命令、不要写代码、不要声称测试通过、不要声称已经修改文件。\n- 需要真实安装或宿主加载后才能验证的内容，必须明确说“这一步需要安装后验证”。\n- 如果我说“用示例继续”，你可以用虚构示例推进，但仍然不能声称真实执行。\n\n【可体验服务能力】\n- 安装前能力预览: Git for AI Agent Knowledge. A persistent, versioned memory layer for AI systems. Hybrid GraphRAG retrieval. Runs entirely offline. 输入：用户任务, 当前 AI 对话上下文；输出：示例引导, 判断线索。\n\n【必须安装后才可验证的能力】\n- 命令行启动或安装流程: 项目文档中存在可执行命令，真实使用需要在本地或宿主环境中运行这些命令。 输入：终端环境, 包管理器, 项目依赖；输出：安装结果, 列表/更新/运行结果。\n\n【核心服务流】\n请严格按这个顺序带我体验。不要一次性输出完整流程：\n1. page-introduction：Introduction to mnem。围绕“Introduction to mnem”模拟一次用户任务，不展示安装或运行结果。\n2. page-installation：Installation Guide。围绕“Installation Guide”模拟一次用户任务，不展示安装或运行结果。\n3. page-architecture：System Architecture。围绕“System Architecture”模拟一次用户任务，不展示安装或运行结果。\n4. page-core-components：Core Components。围绕“Core Components”模拟一次用户任务，不展示安装或运行结果。\n5. page-hybrid-retrieval：Hybrid Retrieval System。围绕“Hybrid Retrieval System”模拟一次用户任务，不展示安装或运行结果。\n\n【核心能力体验剧本】\n每一步都必须按“输入 -> 服务动作 -> 中间产物”执行。不要只说流程名：\n1. page-introduction\n输入：用户提供的“Introduction to mnem”相关信息。\n服务动作：模拟项目在这一步的核心判断和整理方式。\n中间产物：一个可检查的小结果。\n\n2. page-installation\n输入：用户提供的“Installation Guide”相关信息。\n服务动作：模拟项目在这一步的核心判断和整理方式。\n中间产物：一个可检查的小结果。\n\n3. page-architecture\n输入：用户提供的“System Architecture”相关信息。\n服务动作：模拟项目在这一步的核心判断和整理方式。\n中间产物：一个可检查的小结果。\n\n4. page-core-components\n输入：用户提供的“Core Components”相关信息。\n服务动作：模拟项目在这一步的核心判断和整理方式。\n中间产物：一个可检查的小结果。\n\n5. page-hybrid-retrieval\n输入：用户提供的“Hybrid Retrieval System”相关信息。\n服务动作：模拟项目在这一步的核心判断和整理方式。\n中间产物：一个可检查的小结果。\n\n【项目服务规则】\n这些规则决定你如何服务用户。不要解释规则本身，而要在每一步执行时遵守：\n- 先确认用户任务、输入材料和成功标准，再模拟项目能力。\n- 每一步都必须形成可检查的小产物，并等待用户确认后再继续。\n- 凡是需要安装、调用工具或访问外部服务的能力，都必须标记为安装后验证。\n\n【每一步的服务约束】\n- Step 1 / page-introduction：Step 1 必须围绕“Introduction to mnem”形成一个小中间产物，并等待用户确认。\n- Step 2 / page-installation：Step 2 必须围绕“Installation Guide”形成一个小中间产物，并等待用户确认。\n- Step 3 / page-architecture：Step 3 必须围绕“System Architecture”形成一个小中间产物，并等待用户确认。\n- Step 4 / page-core-components：Step 4 必须围绕“Core Components”形成一个小中间产物，并等待用户确认。\n- Step 5 / page-hybrid-retrieval：Step 5 必须围绕“Hybrid Retrieval System”形成一个小中间产物，并等待用户确认。\n\n【边界与风险】\n- 不要声称已经安装、运行、调用 API、读写本地文件或完成真实任务。\n- 安装前预览只能展示工作方式，不能证明兼容性、性能或输出质量。\n- 涉及安装、插件加载、工具调用或外部服务的能力必须安装后验证。\n\n【可追溯依据】\n这些路径只用于你内部校验或在我追问“依据是什么”时简要引用。不要在首次回复主动展开：\n- https://github.com/Uranid/mnem\n- https://github.com/Uranid/mnem#readme\n- README.md\n- docs/src/introduction.md\n- crates/mnem-core/src/lib.rs\n- docs/src/install.md\n- Cargo.toml\n- Dockerfile\n- docker-compose.yml\n- crates/mnem-core/src/id/cid.rs\n- crates/mnem-core/src/id/multihash.rs\n- crates/mnem-core/src/objects/mod.rs\n\n【首次问题规则】\n- 首次三问必须先确认用户目标、成功标准和边界，不要提前进入工具、安装或实现细节。\n- 如果后续需要技术条件、文件路径或运行环境，必须等用户确认目标后再追问。\n\n首次回复必须只输出下面 4 个部分：\n1. 体验开始：用 1 句话说明你将带我体验 mnem 的核心服务。\n2. 当前步骤：明确进入 Step 1，并说明这一步要解决什么。\n3. 你会如何服务我：说明你会先改变我完成任务的哪个动作。\n4. 只问我 3 个问题，然后停下等待回答。\n\n首次回复禁止输出：后续完整流程、证据清单、安装命令、项目评价、营销文案、已经安装或运行的说法。\n\nStep 1 / brainstorming 的二轮协议：\n- 我回答首次三问后，你仍然停留在 Step 1 / brainstorming，不要进入 Step 2。\n- 第二次回复必须产出 6 个部分：澄清后的任务定义、成功标准、边界条件、\n  2-3 个可选方案、每个方案的权衡、推荐方案。\n- 第二次回复最后必须问我是否确认推荐方案；只有我明确确认后，才能进入下一步。\n- 第二次回复禁止输出 git worktree、代码计划、测试文件、命令或真实执行结果。\n\n后续对话规则：\n- 我回答后，你先完成当前步骤的中间产物并等待确认；只有我确认后，才能进入下一步。\n- 每一步都要生成一个小的中间产物，例如澄清后的目标、计划草案、测试意图、验证清单或继续/停止判断。\n- 所有演示都写成“我会建议/我会引导/这一步会形成”，不要写成已经真实执行。\n- 不要声称已经测试通过、文件已修改、命令已运行或结果已产生。\n- 如果某个能力必须安装后验证，请直接说“这一步需要安装后验证”。\n- 如果证据不足，请明确说“证据不足”，不要补事实。\n```\n",
      "voices": [
        {
          "body": "来源平台：github。github/github_issue: [feature] hermes support（https://github.com/Uranid/mnem/issues/27）；github/github_issue: [bug] Broken docs links: SPEC.md, ROADMAP.md, and Architecture page（https://github.com/Uranid/mnem/issues/23）。这些是项目级外部声音，不作为单独质量证明。",
          "items": [
            {
              "kind": "github_issue",
              "source": "github",
              "title": "[feature] hermes support",
              "url": "https://github.com/Uranid/mnem/issues/27"
            },
            {
              "kind": "github_issue",
              "source": "github",
              "title": "[bug] Broken docs links: SPEC.md, ROADMAP.md, and Architecture page",
              "url": "https://github.com/Uranid/mnem/issues/23"
            }
          ],
          "status": "已收录 2 条来源",
          "title": "社区讨论"
        }
      ]
    },
    "homepage_card": {
      "category": "信息检索与知识管理",
      "desc": "Git for AI Agent Knowledge. A persistent, versioned memory layer for AI systems. Hybrid GraphRAG retrieval. Runs entirely offline.",
      "effort": "安装已验证",
      "forks": 28,
      "icon": "search",
      "name": "mnem 能力包",
      "risk": "需复核",
      "slug": "mnem",
      "stars": 83,
      "tags": [
        "MCP 工具",
        "知识库问答",
        "流程自动化",
        "节点式流程编排",
        "评测体系"
      ],
      "thumb": "blue",
      "type": "MCP 配置"
    },
    "manual": {
      "markdown": "# https://github.com/Uranid/mnem 项目说明书\n\n生成时间：2026-05-15 07:30:14 UTC\n\n## 目录\n\n- [Introduction to mnem](#page-introduction)\n- [Installation Guide](#page-installation)\n- [System Architecture](#page-architecture)\n- [Core Components](#page-core-components)\n- [Hybrid Retrieval System](#page-hybrid-retrieval)\n- [Embedding Providers](#page-embed-providers)\n- [Storage Backend](#page-storage-backend)\n- [Ingestion Pipeline](#page-ingestion)\n- [CLI Commands Reference](#page-cli-commands)\n- [GraphRAG Implementation](#page-graphrag)\n\n<a id='page-introduction'></a>\n\n## Introduction to mnem\n\n### 相关页面\n\n相关主题：[System Architecture](#page-architecture), [Installation Guide](#page-installation)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [crates/mnem-core/src/lib.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-core/src/lib.rs)\n- [crates/mnem-core/src/objects/node.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-core/src/objects/node.rs)\n- [crates/mnem-core/src/objects/operation.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-core/src/objects/operation.rs)\n- [crates/mnem-ingest/src/lib.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-ingest/src/lib.rs)\n- [crates/mnem-ingest/src/chunk.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-ingest/src/chunk.rs)\n- [crates/mnem-ingest/src/pipeline.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-ingest/src/pipeline.rs)\n- [crates/mnem-ingest/src/types.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-ingest/src/types.rs)\n</details>\n\n# Introduction to mnem\n\nmnem is a Rust-based knowledge management system designed for AI agents. It provides a structured approach to storing, retrieving, and managing information using a DAG-based (Directed Acyclic Graph) storage architecture with content-addressed data structures.\n\n## Overview\n\nmnem serves as a personal knowledge graph for AI agents, enabling them to:\n\n- **Store structured information** with nodes, edges, and properties in a version-controlled repository\n- **Ingest various document formats** including Markdown, PDF, plain text, code files, and conversation logs\n- **Retrieve relevant context** using vector search, sparse ranking, and token-budget packing\n- **Track changes** through a commit-based operation log with cryptographic signatures\n- **Support branching** for experimental or temporary state management\n\n资料来源：[crates/mnem-core/src/lib.rs:1-30]()\n\n## Architecture\n\nmnem is organized as a monorepo with multiple Rust crates:\n\n```mermaid\ngraph TD\n    subgraph \"mnem Repository Structure\"\n        CLI[\"mnem-cli<br/>Command Line Interface\"]\n        HTTP[\"mnem-http<br/>HTTP API Server\"]\n        INGEST[\"mnem-ingest<br/>Document Ingestion\"]\n        CORE[\"mnem-core<br/>Core Data Model & Retrieval\"]\n    end\n    \n    CLI --> CORE\n    HTTP --> CORE\n    INGEST --> CORE\n    \n    INGEST --> |\"parse/chunk/extract\"| RAW[(\"Raw Source<br/>.md .pdf .txt .json\")]\n    CORE --> |\"store/retrieve\"| GRAPH[(\"Knowledge<br/>Graph\")]\n```\n\n### Crate Responsibilities\n\n| Crate | Purpose |\n|-------|---------|\n| `mnem-core` | Core data models (`Node`, `Edge`, `Commit`, `Operation`), DAG-CBOR codec, prolly trees, vector/sparse indexing, agent-facing retrieval |\n| `mnem-ingest` | Document parsing, chunking strategies, entity extraction (rule-based, KeyBERT, or LLM) |\n| `mnem-cli` | Terminal interface for all operations |\n| `mnem-http` | REST API for remote agent access |\n\n资料来源：[crates/mnem-core/src/lib.rs:15-25]()\n\n## Core Data Model\n\n### Nodes\n\nNodes are the fundamental unit of information storage. Each node contains:\n\n```mermaid\ngraph LR\n    subgraph \"Node Structure\"\n        NTYPE[\"ntype<br/>Node Type Label\"]\n        CTX[\"context_sentence<br/>Positional Cue\"]\n        SUM[\"summary<br/>LLM-facing Text\"]\n        PROPS[\"props<br/>Property Map\"]\n        CONTENT[\"content<br/>Opaque Payload\"]\n    end\n```\n\n| Field | Type | Description |\n|-------|------|-------------|\n| `ntype` | `String` | Semantic label (e.g., `Fact`, `Doc`, `Person`) |\n| `context_sentence` | `Option<String>` | LLM-generated placement cue for contextual retrieval |\n| `summary` | `Option<String>` | Primary text for embedding and retrieval |\n| `props` | `BTreeMap<String, Ipld>` | Structured key-value metadata |\n| `content` | `Option<Bytes>` | Opaque payload (document body, file data) |\n\nThe `context_sentence` field implements Anthropic's 2024 contextual retrieval approach, storing an LLM-generated one-sentence placement cue that captures positional and relational context. 资料来源：[crates/mnem-core/src/objects/node.rs:30-75]()\n\n### Edges\n\nEdges represent relationships between nodes. They are typed links with source and target references.\n\n### Operations and Commits\n\n| Concept | Description |\n|---------|-------------|\n| `Operation` | A single atomic change to the repository state |\n| `Commit` | A snapshot referencing a sequence of operations |\n| `View` | Current head state with references and tombstones |\n\nOperations include metadata for provenance:\n\n```rust\npub struct Operation {\n    pub author: String,\n    pub agent_id: Option<String>,\n    pub task_id: Option<String>,\n    pub host: Option<String>,\n    pub time: u64,\n    pub description: String,\n    pub signature: Option<Signature>,\n}\n```\n\n资料来源：[crates/mnem-core/src/objects/operation.rs:15-35]()\n\n## Document Ingestion Pipeline\n\nThe ingestion system (`mnem-ingest`) handles the transformation of raw documents into chunked, indexed nodes:\n\n```mermaid\nflowchart LR\n    RAW[\"Raw Source<br/>.md .pdf .txt\"] --> PARSE[\"Parse\"]\n    PARSE --> SECTION[\"Sections\"]\n    SECTION --> CHUNK[\"Chunk\"]\n    CHUNK --> EXTRACT[\"Extract Entities<br/>Relations\"]\n    EXTRACT --> NODE[\"Nodes + Edges\"]\n    NODE --> STORE[\"Commit to Store\"]\n```\n\n资料来源：[crates/mnem-ingest/src/lib.rs:20-45]()\n\n### Supported Source Types\n\n| Source | Extensions | Strategy | Default Chunker |\n|--------|------------|----------|-----------------|\n| Markdown | `.md`, `.markdown` | CommonMark + GFM | `Paragraph` |\n| Text | `.txt`, unknown | Plain text | `SentenceRecursive` |\n| PDF | `.pdf` | Text layer extraction | `SentenceRecursive` |\n| Conversation | `.json`, `.jsonl` | Chat export formats | `Session` |\n| Code | `.rs`, `.py`, `.js`, `.ts`, `.go`, `.java`, `.c`, `.cpp`, `.rb`, `.cs` | Tree-sitter parsing | `Structural` |\n\n资料来源：[crates/mnem-ingest/src/pipeline.rs:45-60]()\n\n### Chunker Strategies\n\nFive chunking strategies are available:\n\n| Strategy | Description | Use Case |\n|----------|-------------|----------|\n| `Paragraph` | Splits on double-newlines | Markdown documents |\n| `Recursive` | Token-budgeted word-window sliding | Backwards compatibility |\n| `SentenceRecursive` | Sentence-aware token packing using Unicode boundaries | Prose (Text, PDF) |\n| `Session` | Groups messages up to `max_messages` | Conversation logs |\n| `Structural` | One chunk per section | Code (function/class level) |\n\nThe `SentenceRecursive` chunker is the preferred strategy for prose as it prevents cutting mid-sentence and produces more uniform chunk sizes. Token counts are estimated via whitespace split for speed and determinism. 资料来源：[crates/mnem-ingest/src/chunk.rs:1-45]()\n\n### Entity Extraction\n\nmnem supports multiple extraction providers:\n\n| Provider | Method |\n|----------|--------|\n| `rule` (default) | Capitalized phrase heuristic |\n| `keybert` | Statistical keyword extraction (requires feature flag) |\n| `ollama` | LLM-based extraction (requires feature flag) |\n| `none` | Suppress entity extraction |\n\n资料来源：[crates/mnem-cli/src/commands/ingest.rs:25-40]()\n\n## Retrieval System\n\nThe retrieval layer composes multiple ranking strategies to deliver relevant context to agents under a token budget:\n\n```mermaid\ngraph TD\n    QUERY[\"Query\"] --> VEC[\"Vector Search\"]\n    QUERY --> SPARSE[\"Sparse Ranking\"]\n    VEC --> RERANK[\"Rerank\"]\n    SPARSE --> RERANK\n    RERANK --> PACK[\"Token Budget Packing\"]\n    PACK --> RESULT[\"Context for Agent\"]\n```\n\nThe retriever renders nodes in a YAML-like format:\n\n```text\nntype: <ntype>\nid: <uuid>\ncontext: <context_sentence>\nsummary: <summary>\n<prop_key>: <prop_value>\n```\n\n- `ntype` and `id` are always present\n- `context` appears before `summary` (per Anthropic's contextual-retrieval recipe)\n- `summary` is clipped at 8192 chars by default\n- Scalar props are emitted in BTreeMap order; non-scalar props are skipped\n\n资料来源：[crates/mnem-core/src/retrieve/mod.rs:1-50]()\n\n## CLI Interface\n\nThe `mnem` CLI provides commands for repository management:\n\n| Command | Description |\n|---------|-------------|\n| `mnem ingest <path>` | Parse and commit documents to the graph |\n| `mnem tag` | Manage versioned references (create, list, delete) |\n| `mnem branch` | Create and manage branches |\n\n### Ingest Command Options\n\n| Option | Default | Description |\n|--------|---------|-------------|\n| `--chunker` | `auto` | Strategy: `auto`, `paragraph`, `recursive`, `sentence_recursive`, `session`, `structural` |\n| `--max-tokens` | `512` | Target tokens per chunk |\n| `--overlap` | `32` | Overlap tokens between chunks |\n| `--recursive` | false | Walk directory trees |\n| `--extractor` | `none` | Entity extraction provider |\n| `--ner-provider` | `rule` | NER method: `rule`, `none` |\n\n资料来源：[crates/mnem-cli/src/commands/ingest.rs:50-70]()\n\n## HTTP API\n\nThe HTTP server exposes REST endpoints for remote agent access:\n\n| Endpoint | Method | Description |\n|----------|--------|-------------|\n| `/v1/ingest` | POST | Ingest documents with JSON or multipart payload |\n| `/v1/branches` | GET | List all branches |\n| `/v1/branches` | POST | Create a new branch |\n\n### Ingest Request Parameters\n\n| Parameter | Type | Description |\n|-----------|------|-------------|\n| `chunker` | String | Strategy: `auto`, `paragraph`, `recursive`, `session` |\n| `max_tokens` | u32 | Target tokens per chunk |\n| `overlap` | u32 | Overlap tokens between chunks |\n| `author` | String | Required commit author |\n| `message` | String | Optional commit message |\n| `extractor` | String | Extraction provider |\n| `ner_provider` | String | NER method override |\n\n资料来源：[crates/mnem-http/src/handlers_ingest.rs:15-45]()\n\n## Key Design Principles\n\n1. **No unsafe code**: The entire `mnem-core` crate enforces `#![forbid(unsafe_code)]` 资料来源：[crates/mnem-core/src/lib.rs:30]()\n\n2. **Canonical encoding**: Every object type preserves byte-exact round-trip property (`decode(encode(x)) == x`)\n\n3. **Deterministic retrieval**: Node props use `BTreeMap` for consistent iteration order\n\n4. **Extensible architecture**: Sidecar support for external tools (docling, unstructured) via feature flags\n\n5. **Branch support**: Tags and branches enable experimental state management without losing history\n\n---\n\n<a id='page-installation'></a>\n\n## Installation Guide\n\n### 相关页面\n\n相关主题：[Introduction to mnem](#page-introduction)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [crates/mnem-cli/src/main.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-cli/src/main.rs)\n- [crates/mnem-cli/src/config.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-cli/src/config.rs)\n- [py-packages/mnem-cli/README.md](https://github.com/Uranid/mnem/blob/main/py-packages/mnem-cli/README.md)\n- [crates/mnem-ingest/src/lib.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-ingest/src/lib.rs)\n- [crates/mnem-core/src/lib.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-core/src/lib.rs)\n- [crates/mnem-cli/src/commands/ingest.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-cli/src/commands/ingest.rs)\n</details>\n\n# Installation Guide\n\n## Overview\n\nThe mnem project is a Git-like version control system designed specifically for AI Agent Knowledge management. It provides versioned storage, retrieval, and synchronization of structured knowledge nodes. This guide covers all supported installation methods, system requirements, and configuration steps to get mnem running on your platform.\n\nThe project is organized as a Rust monorepo with multiple crates and language bindings. Installation options include native binaries via multiple package managers, Python packages, Docker containers, and prebuilt releases. 资料来源：[crates/mnem-core/src/lib.rs:1-20]()\n\n## System Requirements\n\n### Supported Platforms\n\nmnem supports the following platforms and architectures:\n\n| Platform | Architecture | Notes |\n|----------|--------------|-------|\n| Linux | x86_64, aarch64 | Full support |\n| macOS | arm64 (Apple Silicon), x86_64 | Rosetta 2 compatible |\n| Windows | x86_64 | Full support |\n\n资料来源：[py-packages/mnem-cli/README.md:1-20]()\n\n### Runtime Dependencies\n\n| Component | Requirement | Purpose |\n|-----------|-------------|---------|\n| Rust toolchain | 1.70+ (stable) | Building from source |\n| Python | 3.9+ | Python bindings (`mnem-py`) |\n| Node.js | 18+ | npm package |\n| Docker | 20.10+ | Container deployment |\n\n## Installation Methods\n\n### CLI Installation via pip\n\nThe simplest method to install the mnem CLI is through Python's package manager:\n\n```bash\npip install mnem-cli\nmnem --version\n```\n\nOn first run, `mnem` automatically downloads the correct prebuilt binary for your platform from the GitHub release assets and caches it in `~/.mnem_cli/`. Subsequent calls run the cached binary directly. 资料来源：[py-packages/mnem-cli/README.md:1-15]()\n\n### CLI Installation via Cargo\n\nFor users with the Rust toolchain installed, install from crates.io:\n\n```bash\ncargo install --locked mnem-cli --features bundled-embedder\n```\n\nThe `--features bundled-embedder` flag compiles the embedder dependency into the binary, making it self-contained without external embedding services. 资料来源：[crates/mnem-cli/src/main.rs:1-50]()\n\n### CLI Installation via npm\n\nNode.js users can install globally via npm:\n\n```bash\nnpm install -g mnem-cli\n```\n\n资料来源：[py-packages/mnem-cli/README.md:1-20]()\n\n### Prebuilt Binaries\n\nDownload prebuilt binaries directly from the [GitHub Releases](https://github.com/Uranid/mnem/releases) page. Binaries are available for all supported platforms in the release assets.\n\nAfter downloading, make the binary executable:\n\n```bash\nchmod +x mnem-*-x86_64-unknown-linux-gnu\n./mnem-*-x86_64-unknown-linux-gnu --version\n```\n\n### Docker Installation\n\nContainer-based deployment is available via Docker. The project includes both a `Dockerfile` and `docker-compose.yml` for containerized deployments.\n\nTo build the Docker image:\n\n```bash\ndocker build -t mnem:latest .\n```\n\nFor orchestrated deployments using docker-compose:\n\n```bash\ndocker-compose up -d\n```\n\n资料来源：[Dockerfile](https://github.com/Uranid/mnem/blob/main/Dockerfile), [docker-compose.yml](https://github.com/Uranid/mnem/blob/main/docker-compose.yml)\n\n## Python Bindings\n\nFor programmatic access from Python applications, install the Python bindings package:\n\n```bash\npip install mnem-py\n```\n\nThis package provides the `import pymnem` interface for Python applications to interact with mnem repositories. 资料来源：[py-packages/mnem-cli/README.md:1-30]()\n\n## Build from Source\n\n### Prerequisites\n\n- Rust 1.70 or later (stable toolchain)\n- Cargo (included with Rust)\n- Git\n\n### Build Steps\n\n```bash\n# Clone the repository\ngit clone https://github.com/Uranid/mnem.git\ncd mnem\n\n# Build the CLI\ncargo build --release --bin mnem\n\n# Build all crates\ncargo build --release\n```\n\n### Feature Flags\n\nThe project supports several feature flags to customize the build:\n\n| Feature | Description |\n|---------|-------------|\n| `bundled-embedder` | Embedder for local vector storage |\n| `keybert` | Statistical keyphrase extraction |\n| `ollama` | LLM-based extraction via Ollama |\n| `sidecar-docling` | PDF extraction via docling CLI |\n| `sidecar-unstructured` | PDF extraction via unstructured |\n\n资料来源：[crates/mnem-ingest/src/lib.rs:1-60]()\n\n## Initial Configuration\n\n### Repository Initialization\n\nAfter installation, initialize a new mnem repository:\n\n```bash\nmnem init\n```\n\nThis creates the `.mnem/` directory with the repository database (`repo.redb`). 资料来源：[crates/mnem-cli/src/main.rs:1-80]()\n\n### Configuration File\n\nThe CLI reads configuration from `.mnem/config.toml` in the repository root. Configuration includes:\n\n```toml\n[user]\nname = \"Your Name\"\nemail = \"your.email@example.com\"\nagent_id = \"agent-identifier\"\n\n[llm]\nprovider = \"ollama\"  # or \"openai\", \"anthropic\"\nmodel = \"llama3.2\"\nbase_url = \"http://localhost:11434\"\ntimeout_secs = 120\n```\n\nThe author string for commits follows the format `name <email>` when both are present. If only one is available, it uses that value alone. When neither is configured, it falls back to the `agent_id` or defaults to `\"mnem-cli\"`. 资料来源：[crates/mnem-cli/src/config.rs:1-50]()\n\n### Repository Path Resolution\n\nThe CLI automatically searches for the `.mnem/` directory by walking up from the current working directory, similar to Git's behavior. You can override this with the `-R` / `--repo` flag:\n\n```bash\nmnem -R ~/notes status\n```\n\n资料来源：[crates/mnem-cli/src/main.rs:1-80]()\n\n## HTTP Server Deployment\n\n### Starting the Server\n\nThe HTTP server provides REST API access to mnem repositories:\n\n```bash\nmnem serve --port 8080\n```\n\n### API Endpoints\n\n| Endpoint | Method | Purpose |\n|----------|--------|---------|\n| `/v1/ingest` | POST | Ingest documents |\n| `/v1/branches` | GET | List branches |\n| `/v1/branches` | POST | Create branch |\n| `/v1/retrieve` | POST | Query knowledge |\n\n### Ingest Configuration\n\nThe ingest endpoint accepts JSON payloads with the following parameters:\n\n| Parameter | Type | Required | Default | Description |\n|-----------|------|----------|---------|-------------|\n| `content` | String | Yes | - | Content to ingest |\n| `chunker` | String | No | `auto` | Chunking strategy |\n| `max_tokens` | u32 | No | 512 | Target tokens per chunk |\n| `overlap` | u32 | No | 32 | Overlap tokens |\n| `author` | String | Yes | - | Commit author |\n| `message` | String | No | `\"mnem http ingest\"` | Commit message |\n| `extractor` | String | No | `\"none\"` | Entity extractor |\n| `ner_provider` | String | No | `\"rule\"` | NER provider |\n\n资料来源：[crates/mnem-http/src/handlers_ingest.rs:1-50]()\n\n## Verification\n\n### Verify Installation\n\nAfter installation, verify the CLI is working:\n\n```bash\nmnem --version\nmnem status\n```\n\n### First-Run Wizard\n\nOn first run with no repository present, mnem launches a first-run wizard to help configure the basic settings. Returning users see the `mnem status` output directly. 资料来源：[crates/mnem-cli/src/main.rs:1-80]()\n\n## Alternative Installation Summary\n\n| Method | Command | Notes |\n|--------|---------|-------|\n| pip | `pip install mnem-cli` | Auto-downloads binary |\n| cargo | `cargo install --locked mnem-cli --features bundled-embedder` | Self-contained binary |\n| npm | `npm install -g mnem-cli` | Node.js integration |\n| Docker | `docker-compose up -d` | Containerized deployment |\n| Binary | Download from Releases | Manual installation |\n\n资料来源：[py-packages/mnem-cli/README.md:1-20]()\n\n## Next Steps\n\nAfter installation, consult these related guides:\n\n- **Quick Start** - Create your first repository and add content\n- **Configuration Reference** - Complete configuration options\n- **Ingest Guide** - Document ingestion and chunking strategies\n- **Retrieve Guide** - Query your knowledge base\n\n---\n\n<a id='page-architecture'></a>\n\n## System Architecture\n\n### 相关页面\n\n相关主题：[Core Components](#page-core-components), [Storage Backend](#page-storage-backend)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [crates/mnem-core/src/lib.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-core/src/lib.rs)\n- [crates/mnem-core/src/id/link.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-core/src/id/link.rs)\n- [crates/mnem-core/src/objects/node.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-core/src/objects/node.rs)\n- [crates/mnem-core/src/retrieve/mod.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-core/src/retrieve/mod.rs)\n- [crates/mnem-ingest/src/lib.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-ingest/src/lib.rs)\n- [crates/mnem-ingest/src/pipeline.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-ingest/src/pipeline.rs)\n- [crates/mnem-ingest/src/types.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-ingest/src/types.rs)\n- [crates/mnem-ingest/src/chunk.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-ingest/src/chunk.rs)\n</details>\n\n# System Architecture\n\n## Overview\n\nmnem is a content-addressed, CRDT-based (Conflict-free Replicated Data Types) knowledge management system designed for agentic workflows. The system provides immutable content-addressed storage with a secondary vector index for retrieval-augmented generation (RAG) applications.\n\nThe architecture follows a modular design with distinct crates handling different concerns:\n\n| Crate | Purpose |\n|-------|---------|\n| `mnem-core` | Core data types, CRDT operations, storage, indexing, retrieval |\n| `mnem-ingest` | Source parsing, chunking, and entity extraction |\n| `mnem-cli` | Command-line interface |\n| `mnem-http` | HTTP API server |\n\n资料来源：[crates/mnem-core/src/lib.rs:1-30]()\n\n---\n\n## Core Data Model\n\n### Content Identifiers (CIDs)\n\nContent in mnem is identified by cryptographic hashes using IPLD CIDs (Content Identifiers). CIDs are self-describing content addresses that encode the hash function and compressed representation of the content.\n\nThe system uses DAG-CBOR encoding for canonical binary serialization, ensuring byte-exact round-trip encoding/decoding:\n\n```text\ndecode(encode(x)) == x\nencode(decode(b)) == b\n```\n\n资料来源：[crates/mnem-core/src/lib.rs:35-40]()\n\n### Phantom-Typed Links\n\nmnem introduces `Link<T>` - a phantom-typed CID that encodes the type of the referenced content at the type level. While a bare CID points to \"some content,\" a `Link<T>` points to \"content that is a `T`.\"\n\n```rust\npub struct Link<T: ?Sized> {\n    cid: Cid,\n    _target: PhantomData<fn() -> T>,\n}\n```\n\nThe phantom type prevents reference-mixing bugs at compile time. For example, `fn parents(&self) -> &[Link<Commit>]` will reject a `Link<Node>` at compile time.\n\nOn the wire, `Link<T>` is identical to a `Cid` - same bytes, same CBOR tag. The phantom type exists solely for Rust-level type safety.\n\n资料来源：[crates/mnem-core/src/id/link.rs:1-35]()\n\n### Node Object\n\nThe fundamental unit of content storage is the `Node`:\n\n```rust\npub struct Node {\n    pub id: NodeId,\n    pub ntype: NodeType,\n    pub context_sentence: Option<String>,\n    pub summary: Option<String>,\n    pub props: BTreeMap<String, Ipld>,\n    pub content: Option<Bytes>,\n    pub ext: Option<BTreeMap<String, Ipld>>,\n}\n```\n\n| Field | Purpose |\n|-------|---------|\n| `id` | Unique identifier |\n| `ntype` | Semantic type (e.g., `Chunk`, `Concept`, `Entity`) |\n| `context_sentence` | LLM-generated positional cue for contextual retrieval |\n| `summary` | Token-efficient content representation for LLM consumption |\n| `props` | Structured key-value properties |\n| `content` | Opaque payload (document body, file data) |\n| `ext` | Forward-compatibility extension map |\n\nThe `context_sentence` field implements Anthropic's 2024 Contextual Retrieval technique, storing LLM-generated placement cues alongside nodes. This reportedly reduces retrieval failure by 49-67%.\n\n资料来源：[crates/mnem-core/src/objects/node.rs:1-80]()\n\n---\n\n## Repository Structure\n\nmnem uses a CRDT-based repository model with the following object types:\n\n```mermaid\ngraph TD\n    A[View] --> B[Commit]\n    B --> C[Node]\n    B --> D[Edge]\n    C --> E[Node Content]\n    D --> F[Link<Node>]\n    D --> G[Link<Node>]\n    \n    B --> H[Operation]\n    H --> I[ChangeId]\n    H --> J[OperationId]\n```\n\n### Core Objects\n\n| Type | Description |\n|------|-------------|\n| `Node` | Atomic content unit with type, summary, and properties |\n| `Edge` | Directed relationship between two nodes |\n| `Commit` | Atomic snapshot of changes with parent references |\n| `Operation` | Individual change record for sync |\n| `View` | Immutable repository head view |\n| `IndexSet` | Secondary index collection |\n\n资料来源：[crates/mnem-core/src/lib.rs:10-20]()\n\n### Blockstore Abstraction\n\nThe system abstracts storage through the `Blockstore` trait, allowing different backends:\n\n```rust\npub trait Blockstore {\n    fn get(&self, cid: &Cid) -> Result<Option<Vec<u8>>>;\n    fn put(&self, data: &[u8]) -> Result<Cid>;\n}\n```\n\nReference implementations include in-memory stores, with pluggable backends for production use.\n\n---\n\n## Ingest Pipeline\n\nThe ingest pipeline handles parsing, chunking, and extracting content from various source types.\n\n```mermaid\ngraph LR\n    A[Raw Bytes] --> B[Source Detection]\n    B --> C[Parser]\n    C --> D[Chunker]\n    D --> E[Extractor]\n    E --> F[Transaction]\n    F --> G[Commit]\n```\n\n### Source Types\n\nThe system automatically detects source types from file extensions:\n\n| Extension | SourceKind | Chunker Strategy |\n|-----------|------------|------------------|\n| `md`, `markdown` | `Markdown` | Paragraph |\n| `pdf` | `Pdf` | SentenceRecursive (512 tokens, 64 overlap) |\n| `json`, `jsonl` | `Conversation` | Session (10 messages) |\n| `rs` | `Code(Rust)` | Structural |\n| `py`, `pyi` | `Code(Python)` | Structural |\n| `js`, `mjs`, `cjs` | `Code(JavaScript)` | Structural |\n| `ts`, `tsx`, `mts`, `cts` | `Code(TypeScript)` | Structural |\n| Other | `Text` | SentenceRecursive (256 tokens, 32 overlap) |\n\n资料来源：[crates/mnem-ingest/src/types.rs:1-50]()\n\n### Chunker Strategies\n\n| Strategy | Configuration | Use Case |\n|----------|---------------|----------|\n| `Paragraph` | - | Markdown documents |\n| `SentenceRecursive` | `max_tokens`, `overlap` | Text, PDFs |\n| `Session` | `max_messages` | Chat/conversation logs |\n| `Structural` | - | Code (tree-sitter based) |\n| `Recursive` | `max_tokens`, `overlap` | Generic recursive splitting |\n\nFor code parsing, mnem uses tree-sitter to extract named language constructs:\n\n| Language | Extracted Items |\n|----------|-----------------|\n| Rust | `function_item`, `struct_item`, `enum_item`, `trait_item` |\n| Python | `function_definition`, `class_definition` |\n| JavaScript/TypeScript | `function_declaration`, `class_declaration`, `method_definition` |\n| Go | `function_declaration`, `type_declaration` |\n\n资料来源：[crates/mnem-ingest/src/chunk.rs:1-50]()\n\n### Auto-Chunker Defaults\n\nThe `auto_chunker` function maps source kinds to default chunking strategies:\n\n```rust\npub fn auto_chunker(kind: SourceKind, heuristics: ChunkerAuto) -> ChunkerKind {\n    match kind {\n        SourceKind::Markdown => ChunkerKind::Paragraph,\n        SourceKind::Text => ChunkerKind::SentenceRecursive { \n            max_tokens: 256, \n            overlap: 32 \n        },\n        SourceKind::Pdf => ChunkerKind::SentenceRecursive { \n            max_tokens: 512, \n            overlap: 64 \n        },\n        SourceKind::Conversation => ChunkerKind::Session { \n            max_messages: 10 \n        },\n        SourceKind::Code(_) => ChunkerKind::Structural,\n    }\n}\n```\n\n资料来源：[crates/mnem-ingest/src/chunk.rs:1-60]()\n\n---\n\n## Retrieval System\n\nThe retrieval system provides agent-facing context assembly with token-budget packing.\n\n### Node Rendering\n\nNodes are rendered to a compact, deterministic YAML-like format for LLM consumption:\n\n```text\nntype: <ntype>\nid: <uuid>\ncontext: <context_sentence>\nsummary: <summary>\n<prop_key>: <prop_value>\n```\n\nRendering rules:\n- `ntype` and `id` are always present\n- `context` appears before `summary` (per Anthropic's contextual retrieval recipe)\n- `summary` is clipped at 8192 chars by default (configurable via `MNEM_RENDER_SUMMARY_CAP_CHARS`)\n- Scalar props (`String`, `Integer`, `Float`, `Bool`) are emitted in BTreeMap order\n- Non-scalar props (`Link`, `Map`, `List`, `Bytes`, `Null`) are skipped\n\n资料来源：[crates/mnem-core/src/retrieve/mod.rs:1-50]()\n\n### Retrieval Pipeline\n\n```mermaid\ngraph TD\n    A[Query] --> B[Filters]\n    B --> C[Vector Ranking]\n    C --> D[Sparse Ranking]\n    D --> E[Token Budget Packing]\n    E --> F[Rendered Context]\n```\n\nThe `Retriever` composes:\n1. **Filters** - Property-based pre-filtering\n2. **Vector Index** - Dense embedding similarity search\n3. **Sparse Ranking** - BM25-style keyword matching\n4. **Token Budget Packing** - Context window optimization\n\n---\n\n## Module Organization\n\n### mnem-core\n\nCore library providing:\n\n| Module | Exports |\n|--------|---------|\n| `id` | `Cid`, `NodeId`, `ChangeId`, `OperationId`, `Link<T>` |\n| `codec` | DAG-CBOR encode/decode, DAG-JSON debug export |\n| `objects` | `Node`, `Edge`, `Commit`, `Operation`, `View` |\n| `prolly` | Prolly tree algorithms, chunks, builder |\n| `store` | `Blockstore`, `OpHeadsStore` traits |\n| `repo` | `ReadonlyRepo`, `Transaction` facade |\n| `index` | `Query`, `BruteForceVectorIndex` |\n| `retrieve` | `Retriever` for agent-facing context |\n| `sign` | Ed25519 signing, revocation-list verification |\n\n### mnem-ingest\n\nContent ingestion library with optional LLM features:\n\n| Module | Description |\n|--------|-------------|\n| `chunk` | Text chunking strategies |\n| `code` | Tree-sitter based code parsing |\n| `conversation` | Chat log processing |\n| `extract` | Rule-based entity extraction |\n| `extract_keybert` | Statistical NER (feature-gated) |\n| `extract_llm` | LLM-based extraction (feature-gated) |\n| `md` | Markdown parsing |\n| `pdf` | PDF processing |\n| `sidecar` | External parser integration |\n\n资料来源：[crates/mnem-ingest/src/lib.rs:1-50]()\n\n---\n\n## Configuration\n\n### LLM Providers\n\nmnem supports multiple LLM providers for extraction and synthesis:\n\n| Provider | Config Keys |\n|----------|-------------|\n| OpenAI | `llm.provider=openai`, `llm.model`, `llm.api_key_env`, `llm.base_url` |\n| Ollama | `llm.provider=ollama`, `llm.model`, `llm.base_url` (defaults to `http://localhost:11434`) |\n\n### Retrieval Configuration\n\n| Key | Description | Default |\n|-----|-------------|---------|\n| `retrieve.limit` | Max results to return | - |\n| `retrieve.budget` | Token budget for context | - |\n| `retrieve.vector_cap` | Max vectors per query | - |\n| `retrieve.graph_expand` | Neighbor expansion count | - |\n| `retrieve.graph_depth` | Traversal depth | - |\n| `retrieve.rerank_top_k` | Top-k for reranking | - |\n| `retrieve.hyde_max_tokens` | HyDE document tokens | - |\n\n### Rerank Providers\n\nSupported rerankers: `cohere`, `voyage`, `jina`\n\n### NER Providers\n\n| Provider | Description |\n|----------|-------------|\n| `rule` | Capitalized-phrase heuristic (default) |\n| `none` | Disable entity extraction |\n\n---\n\n## Crate Dependencies\n\n```mermaid\ngraph TD\n    CLI[mnem-cli] --> CORE[mnem-core]\n    CLI --> INGEST[mnem-ingest]\n    HTTP[mnem-http] --> CORE\n    HTTP --> INGEST\n    INGEST --> CORE\n```\n\nAll crates maintain `#![forbid(unsafe_code)]` - no unsafe code permitted in the repository.\n\n资料来源：[crates/mnem-core/src/lib.rs:30-45]()\n\n---\n\n## Summary\n\nThe mnem system architecture implements a layered, modular design:\n\n1. **Storage Layer** - Content-addressed immutable storage with CRDT semantics\n2. **Data Model** - Nodes, edges, commits with phantom-typed links for type safety\n3. **Ingest Layer** - Pluggable parsers and chunkers for diverse source types\n4. **Retrieval Layer** - Hybrid vector/sparse search with token-budget aware packing\n5. **Interface Layer** - CLI and HTTP APIs for integration with agents and tools\n\nThe architecture prioritizes determinism, type safety, and modularity, making it suitable for building reliable agentic knowledge systems.\n\n---\n\n<a id='page-core-components'></a>\n\n## Core Components\n\n### 相关页面\n\n相关主题：[System Architecture](#page-architecture), [Hybrid Retrieval System](#page-hybrid-retrieval)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [crates/mnem-core/src/objects/commit.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-core/src/objects/commit.rs)\n- [crates/mnem-core/src/objects/node.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-core/src/objects/node.rs)\n- [crates/mnem-core/src/objects/edge.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-core/src/objects/edge.rs)\n- [crates/mnem-core/src/repo/mod.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-core/src/repo/mod.rs)\n- [crates/mnem-core/src/repo/transaction.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-core/src/repo/transaction.rs)\n- [crates/mnem-core/src/repo/merge.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-core/src/repo/merge.rs)\n- [crates/mnem-core/src/prolly/tree.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-core/src/prolly/tree.rs)\n</details>\n\n# Core Components\n\nThe mnem system is built around a set of core components that work together to provide a versioned, graph-based knowledge management system. The core is implemented entirely in Rust with `#![forbid(unsafe_code)]`, ensuring memory safety throughout the codebase. Every object type preserves byte-exact canonical-encoding round-trip properties (`decode(encode(x)) == x` and `encode(decode(b)) == b`). 资料来源：[crates/mnem-core/src/lib.rs]()\n\n## System Architecture Overview\n\nmnem implements a content-addressed graph database with prolly trees for efficient storage and retrieval. The architecture separates concerns between data structures (`objects`), storage (`store`), repository management (`repo`), and retrieval (`retrieve`).\n\n```mermaid\ngraph TD\n    subgraph \"mnem-core\"\n        OBJ[objects: Node, Edge, Commit, View]\n        PRO[prolly: TreeChunk, Builder, Cursor]\n        STORE[store: Blockstore, OpHeadsStore]\n        REPO[repo: ReadonlyRepo, Transaction]\n        IDX[index: Query, BruteForceVectorIndex]\n        RET[retrieve: Retriever]\n        CODEC[codec: DAG-CBOR, DAG-JSON]\n    end\n    \n    subgraph \"mnem-ingest\"\n        ING[Ingester Pipeline]\n        CHUNK[Chunking Strategies]\n        PARSE[Parsers: MD, PDF, Code, JSON]\n    end\n    \n    subgraph \"External Interfaces\"\n        CLI[mnem-cli]\n        HTTP[mnem-http]\n        MCP[MCP Server]\n    end\n    \n    ING -->|adds nodes/edges| REPO\n    REPO -->|reads/writes| STORE\n    RET -->|queries| IDX\n    IDX -->|indexes| OBJ\n    CODEC -->|encodes/decodes| OBJ\n    CLI --> REPO\n    HTTP --> REPO\n```\n\n## Data Objects\n\nThe fundamental building blocks of the mnem knowledge graph are the core object types defined in `crates/mnem-core/src/objects/`. Each object is serializable via DAG-CBOR for canonical encoding. 资料来源：[crates/mnem-core/src/lib.rs]()\n\n### Node\n\nThe `Node` is the primary unit of knowledge storage. It represents a single fact, entity, or chunk of content within the graph.\n\n```rust\n// Simplified structure from crates/mnem-core/src/objects/node.rs\npub struct Node {\n    pub id: NodeId,                                    // Unique identifier\n    pub ntype: String,                                 // Node type label (e.g., \"Fact\", \"Doc\")\n    pub summary: Option<String>,                       // LLM-facing retrieval text\n    pub props: BTreeMap<String, Ipld>,                 // Property map\n    pub content: Option<Bytes>,                        // Optional opaque payload\n    pub context_sentence: Option<String>,              // Positional chunk prefix\n    pub ext: Option<BTreeMap<String, Ipld>>,           // Forward-compat extension map\n}\n```\n\n| Field | Type | Description |\n|-------|------|-------------|\n| `id` | `NodeId` | Unique content-addressed identifier |\n| `ntype` | `String` | Free-form type label for the node |\n| `summary` | `Option<String>` | Text summary for LLM retrieval under token budget |\n| `props` | `BTreeMap<String, Ipld>` | Structured metadata with any DAG-CBOR value |\n| `content` | `Option<Bytes>` | Opaque payload (document body, file data) |\n| `context_sentence` | `Option<String>` | LLM-generated placement cue per Anthropic's contextual retrieval recipe |\n| `ext` | `Option<BTreeMap>` | Forward-compat extension map preserving unknown fields |\n\nThe `summary` field is designed for LLM consumption—the field agents read when assembling context under a token budget. It is distinct from `props` (structured) and `content` (opaque payload). 资料来源：[crates/mnem-core/src/objects/node.rs]()\n\nThe `context_sentence` implements Anthropic's 2024 Contextual Retrieval paper approach, which reports -49% to -67% retrieval-failure reduction when present. mnem stores it on the node so the render path can surface it back to the agent for faithful source attribution. 资料来源：[crates/mnem-core/src/objects/node.rs]()\n\n### Edge\n\nEdges connect nodes and represent relationships between entities.\n\n```rust\n// From crates/mnem-core/src/objects/edge.rs\npub struct Edge {\n    pub src: NodeId,           // Source node ID\n    pub rel: String,           // Relation label (e.g., \"works_at\", \"extracted_from\")\n    pub dst: NodeId,           // Destination node ID\n    pub props: BTreeMap<String, Ipld>,  // Optional edge properties\n}\n```\n\nEdges are used to create graph relationships like `works_at`, `lives_in`, `traveling_with`, `has_preference`, and `extracted_from`. The mnem-cli integration guidelines recommend using the compound `mnem_commit_relation` tool when both endpoints are entities—it resolve-or-creates both nodes and adds the edge in one call. 资料来源：[crates/mnem-cli/src/integrate.rs]()\n\n### Commit\n\nThe `Commit` object represents a point-in-time snapshot of the repository state.\n\n```rust\n// From crates/mnem-core/src/objects/commit.rs\npub struct Commit {\n    pub message: String,           // Commit message\n    pub author: Author,            // Author information\n    pub timestamp: Timestamp,       // Commit timestamp\n    pub root: NodeId,              // Root of the node tree\n    pub ops: Vec<Operation>,        // Operations applied in this commit\n}\n```\n\n### View\n\nThe `View` contains repository metadata including branch references and commit heads.\n\n```rust\n// Referenced in crates/mnem-http/src/handlers.rs\npub struct View {\n    pub heads: Vec<Cid>,           // Current head commit CIDs\n    pub refs: BTreeMap<String, RefTarget>,  // Named references\n}\n```\n\nThe View exposes branch information via the HTTP API with the schema `mnem.v1.branches`. 资料来源：[crates/mnem-http/src/handlers.rs]()\n\n### Operation\n\nOperations represent individual changes applied to the repository. They are collected within commits to provide a complete audit trail.\n\n## Repository Layer\n\nThe repository layer provides the main interface for interacting with the knowledge graph. It is defined across several modules in `crates/mnem-core/src/repo/`. 资料来源：[crates/mnem-core/src/repo/mod.rs]()\n\n### ReadonlyRepo\n\n`ReadonlyRepo` provides a read-only view into the repository state.\n\n```rust\n// Simplified from crates/mnem-core/src/repo/mod.rs\npub trait ReadonlyRepo {\n    fn view(&self) -> &View;\n    fn blockstore(&self) -> &dyn Blockstore;\n}\n```\n\n### Transaction\n\n`Transaction` enables write operations to the repository. All changes are staged until explicitly committed.\n\n```rust\n// From crates/mnem-core/src/repo/transaction.rs\npub struct Transaction {\n    // Internal state managing pending operations\n}\n\nimpl Transaction {\n    pub fn add_node(&mut self, node: Node) -> Result<NodeId, Error>;\n    pub fn add_edge(&mut self, edge: Edge) -> Result<EdgeId, Error>;\n    pub fn commit(self, author: Author, message: String) -> Result<ReadonlyRepo, Error>;\n}\n```\n\nThe `ingest` method on the Ingester pipeline uses Transaction to add nodes and edges:\n\n> Parse, chunk, extract, and write into `tx`. Does **not** commit. `bytes` is the raw source payload; `kind` says how to parse it. Returns an `IngestResult` with counts and elapsed time. The `commit_cid` field is left `None` - callers who want a CID should call `tx.commit(...)` afterwards and stash the returned `ReadonlyRepo`'s head commit CID. 资料来源：[crates/mnem-ingest/src/pipeline.rs]()\n\n### Merge and Conflict Detection\n\nThe merge system handles combining divergent repository states.\n\n```rust\n// From crates/mnem-core/src/repo/merge.rs\npub fn detect_conflicts(\n    repo: &ReadonlyRepo,\n    left: Cid,\n    right: Cid,\n    lca: Option<Cid>,\n) -> Result<MergeConflicts, Error>;\n```\n\nConflict detection supports an explicit `ConflictPolicy` for customizing merge behavior. The detector loads tombstone sets via the Views attached to each commit's operation. 资料来源：[crates/mnem-core/src/repo/conflict.rs]()\n\n```mermaid\ngraph LR\n    A[Commit A] -->|diverged| B[Common Ancestor]\n    C[Commit B] -->|diverged| B\n    B --> D[Detect Conflicts]\n    D --> E{MergeConflicts?}\n    E -->|Yes| F[Surface conflicts to caller]\n    E -->|No| G[Auto-merge possible]\n```\n\n## Prolly Trees\n\nmnem uses prolly trees (probabilistic trees) for efficient storage and lookup of the node graph. This is implemented in `crates/mnem-core/src/prolly/`. 资料来源：[crates/mnem-core/src/prolly/tree.rs]()\n\n```mermaid\ngraph TD\n    subgraph \"Prolly Tree Structure\"\n        ROOT[Root Node / TreeChunk] --> LEFT[Left Child TreeChunk]\n        ROOT --> RIGHT[Right Child TreeChunk]\n        LEFT --> LL[Leaf TreeChunk]\n        LEFT --> LR[Leaf TreeChunk]\n        RIGHT --> RL[Leaf TreeChunk]\n        RIGHT --> RR[Leaf TreeChunk]\n    end\n    \n    style ROOT fill:#e1f5fe\n    style LL fill:#f3e5f5\n    style LR fill:#f3e5f5\n    style RL fill:#f3e5f5\n    style RR fill:#f3e5f5\n```\n\nThe prolly tree implementation includes:\n\n| Component | Purpose |\n|-----------|---------|\n| `TreeChunk` | Immutable chunk containing sorted entries |\n| `Builder` | Constructs new trees from operations |\n| `Cursor` | Navigates tree structure for lookups |\n| `diff` | Computes differences between trees |\n| `merge` | Merges divergent tree versions |\n\nProlly trees provide logarithmic-time lookups and efficient diffing for collaborative editing scenarios. 资料来源：[crates/mnem-core/src/lib.rs]()\n\n## Storage Layer\n\nThe storage layer abstracts over different backend implementations.\n\n### Blockstore\n\n```rust\n// From crates/mnem-core/src/lib.rs\npub trait Blockstore {\n    fn get(&self, cid: &Cid) -> Result<Option<Vec<u8>>, Error>;\n    fn put(&self, cid: &Cid, data: &[u8]) -> Result<(), Error>;\n}\n```\n\n### OpHeadsStore\n\n```rust\n// From crates/mnem-core/src/lib.rs\npub trait OpHeadsStore {\n    fn get_heads(&self) -> Result<Vec<Cid>, Error>;\n    fn set_heads(&mut self, heads: &[Cid]) -> Result<(), Error>;\n}\n```\n\nThe codebase includes in-memory reference implementations of both traits for testing and development. 资料来源：[crates/mnem-core/src/lib.rs]()\n\n## Index System\n\nSecondary indexes enable efficient querying of the knowledge graph.\n\n### Query\n\nThe primary query interface for searching nodes and edges.\n\n### BruteForceVectorIndex\n\nA vector index implementation for semantic search capabilities. This works in conjunction with the retrieve module to provide dense + sparse retrieval lanes that capture positional and relational context. 资料来源：[crates/mnem-core/src/lib.rs]()\n\n## Retrieval System\n\nThe retrieve module provides the agent-facing interface for context assembly.\n\n```rust\n// From crates/mnem-core/src/retrieve/mod.rs\npub struct Retriever { /* ... */ }\n```\n\nThe retriever composes:\n\n1. **Filters** - Pre-filter nodes by type, properties, or time range\n2. **Vector ranking** - Dense embeddings from the configured embedder\n3. **Sparse ranking** - BM25-style keyword matching\n4. **Token-budget packing** - Assembles context within LLM token limits\n\n### Node Rendering\n\nNodes are rendered to a compact, deterministic YAML-like format suitable for LLM consumption:\n\n```text\nntype: <ntype>\nid: <uuid>\ncontext: <context_sentence>\nsummary: <summary>\n<prop_key>: <prop_value>\n```\n\n- `ntype` and `id` are always present\n- `context` is emitted if `node.context_sentence` is `Some` (sits BEFORE summary per Anthropic's contextual-retrieval recipe)\n- `summary` is emitted if `node.summary` is `Some`, clipped at `DEFAULT_RENDER_SUMMARY_CAP_CHARS` (8192) chars\n- Scalar props (`String`, `Integer`, `Float`, `Bool`) are emitted in BTreeMap order\n- Non-scalar props (`Link`, `Map`, `List`, `Bytes`, `Null`) are skipped\n- Opaque `content` bytes are never rendered 资料来源：[crates/mnem-core/src/retrieve/mod.rs]()\n\n## Identification System\n\nmnem uses phantom-typed identifiers for type safety:\n\n| Type | Description |\n|------|-------------|\n| `NodeId` | Identifies a node in the graph |\n| `EdgeId` | Identifies an edge |\n| `ChangeId` | Identifies a change operation |\n| `OperationId` | Identifies an operation |\n| `Link<T>` | Phantom-typed link to any type |\n\nAll CIDs are content-addressed, ensuring that the same content always produces the same identifier. 资料来源：[crates/mnem-core/src/lib.rs]()\n\n## Codec System\n\nThe codec system provides canonical encoding and decoding:\n\n```rust\n// From crates/mnem-core/src/lib.rs\npub mod codec {\n    pub fn encode<T: Encode>(&self, value: &T) -> Vec<u8>;\n    pub fn decode<T: Decode>(&self, bytes: &[u8]) -> Result<T, Error>;\n}\n```\n\n- **DAG-CBOR** - Primary serialization format with canonical encoding guarantees\n- **DAG-JSON** - Debug export format for human inspection\n\nEvery object type preserves the byte-exact canonical-encoding round-trip property. 资料来源：[crates/mnem-core/src/lib.rs]()\n\n## Signing System\n\nThe `sign` module provides Ed25519 signing and revocation-list verification for trust and integrity:\n\n```rust\n// From crates/mnem-core/src/lib.rs\npub mod sign {\n    // Ed25519 signing operations\n    // Revocation-list verification\n}\n```\n\n## Chunking Integration\n\nWhile chunking is primarily handled by the `mnem-ingest` crate, the core objects are designed to work seamlessly with chunked content:\n\nThe `Chunk` type is used throughout the system:\n\n```rust\n// Referenced from crates/mnem-ingest/src/chunk.rs\npub struct Chunk {\n    pub content: String,\n    pub tokens_estimate: usize,  // Fast whitespace-split estimation\n}\n```\n\nChunks preserve source order: section 0's chunks come before section 1's. Empty sections are skipped silently. 资料来源：[crates/mnem-ingest/src/chunk.rs]()\n\n## Summary\n\nThe mnem core components form a layered architecture:\n\n| Layer | Components | Responsibility |\n|-------|------------|----------------|\n| **Objects** | Node, Edge, Commit, View, Operation | Core data structures |\n| **Storage** | Blockstore, OpHeadsStore | Persistence abstraction |\n| **Trees** | ProllyTree, TreeChunk, Builder | Efficient ordered storage |\n| **Repository** | ReadonlyRepo, Transaction | Access control and mutation |\n| **Index** | Query, VectorIndex | Secondary access paths |\n| **Retrieval** | Retriever | Agent-facing context assembly |\n| **Codec** | DAG-CBOR, DAG-JSON | Canonical serialization |\n| **Crypto** | Ed25519 signing | Integrity and trust |\n\nThis architecture enables mnem to serve as a versioned, collaborative knowledge graph with strong consistency guarantees and efficient retrieval capabilities for LLM integration.\n\n---\n\n<a id='page-hybrid-retrieval'></a>\n\n## Hybrid Retrieval System\n\n### 相关页面\n\n相关主题：[Embedding Providers](#page-embed-providers)\n\n<details>\n<summary>Relevant Source Files</summary>\n\n以下源码文件用于生成本页说明：\n\n- [crates/mnem-core/src/retrieve/mod.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-core/src/retrieve/mod.rs)\n- [crates/mnem-core/src/objects/node.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-core/src/objects/node.rs)\n- [crates/mnem-http/src/handlers.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-http/src/handlers.rs)\n- [crates/mnem-cli/src/config.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-cli/src/config.rs)\n- [crates/mnem-ingest/src/types.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-ingest/src/types.rs)\n- [crates/mnem-ingest/src/chunk.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-ingest/src/chunk.rs)\n- [crates/mnem-core/src/lib.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-core/src/lib.rs)\n</details>\n\n# Hybrid Retrieval System\n\n## Overview\n\nThe Hybrid Retrieval System in mnem is an agent-facing retrieval subsystem that composes multiple ranking strategies—vector (dense), sparse, and graph-based expansion—into a unified token-budgeted context assembly pipeline. It is designed for LLM consumption, enabling autonomous agents to fetch relevant nodes from the repository under strict token budgets.\n\nThe system lives in `crates/mnem-core/src/retrieve/` and is exposed via HTTP API (`crates/mnem-http/src/handlers.rs`) and CLI (`crates/mnem-cli/`).\n\n资料来源：[crates/mnem-core/src/lib.rs:18-23]()\n\n## Architecture\n\n```mermaid\ngraph TD\n    subgraph \"Retrieval Entry Points\"\n        HTTP[HTTP API: POST /v1/retrieve]\n        CLI[CLI: mnem retrieve]\n    end\n    \n    subgraph \"Hybrid Retrieval Core\"\n        RT[Retriever]\n        HF[Hybrid Fuser]\n        VQ[Vector Query]\n        SQ[Sparse Query]\n        GQ[Graph Expansion]\n        TB[Token Budget Packer]\n    end\n    \n    subgraph \"Indexes\"\n        VI[Vector Index]\n        SI[Sparse Index]\n        GI[Graph Index]\n    end\n    \n    HTTP --> RT\n    CLI --> RT\n    RT --> HF\n    HF --> VQ\n    HF --> SQ\n    HF --> GQ\n    VQ --> VI\n    SQ --> SI\n    GQ --> GI\n    HF --> TB --> Output[LLM Context]\n```\n\n## Core Components\n\n### Retriever\n\nThe `Retriever` struct is the main facade for retrieval operations. It orchestrates query planning, index selection, and result fusion.\n\n**Key Responsibilities:**\n- Accept a query string and configuration parameters\n- Dispatch parallel queries to vector, sparse, and graph indexes\n- Fuse ranked results using configurable strategies\n- Pack results into token budgets suitable for LLM context windows\n\n资料来源：[crates/mnem-core/src/retrieve/mod.rs:1-50]()\n\n### Node Rendering\n\nBefore results reach the LLM, nodes are rendered to a compact, deterministic YAML-like text representation:\n\n```text\nntype: <ntype>\nid: <uuid>\ncontext: <context_sentence>\nsummary: <summary>\n<prop_key>: <prop_value>\n...\n```\n\n**Rendering Rules:**\n| Field | Condition | Notes |\n|-------|-----------|-------|\n| `ntype` | Always | Node type identifier |\n| `id` | Always | UUID |\n| `context` | If `node.context_sentence` is `Some` | Position cue, emitted BEFORE summary |\n| `summary` | If `node.summary` is `Some` | Clipped at 8192 chars by default |\n| Scalar props | Always | Strings, integers, floats, booleans in BTreeMap order |\n| Non-scalar props | Skipped | Links, Maps, Lists, Bytes, Null |\n\n资料来源：[crates/mnem-core/src/retrieve/mod.rs:60-95]()\n\n### Context Sentence (Anthropic Contextual Retrieval)\n\nmnem implements Anthropic's 2024 Contextual Retrieval recipe. Each node may carry an optional `context_sentence` field—an LLM-generated one-sentence placement cue.\n\n> \"This paragraph is from Section 3 of a legal contract between Alice and Bob's employer...\"\n\nThe ingest pipeline prepends this to `summary` before embedding so both dense and sparse lanes capture positional and relational context.\n\n资料来源：[crates/mnem-core/src/objects/node.rs:95-115]()\n\n## Retrieval Configuration\n\n### CLI Configuration Keys\n\n| Key | Type | Default | Description |\n|-----|------|---------|-------------|\n| `retrieve.limit` | `usize` | — | Maximum results to return |\n| `retrieve.budget` | `u32` | — | Token budget for result packing |\n| `retrieve.vector_cap` | `usize` | — | Vector index candidate cap |\n| `retrieve.graph_expand` | `usize` | — | Graph neighbor expansion count |\n| `retrieve.graph_depth` | `usize` | — | Graph traversal depth |\n| `retrieve.graph_decay` | `u32` | — | Decay factor for graph scores |\n| `retrieve.rerank_top_k` | `usize` | — | Top-K for re-ranking |\n| `retrieve.hyde_max_tokens` | `usize` | — | Max tokens for HyDE hypothesis |\n| `rerank.model` | `String` | — | Re-ranker model identifier |\n| `rerank.base_url` | `String` | — | Re-ranker service base URL |\n\n资料来源：[crates/mnem-cli/src/config.rs:1-100]()\n\n### HTTP API Parameters\n\nThe `POST /v1/retrieve` endpoint accepts the following JSON body:\n\n| Field | Type | Default | Description |\n|-------|------|---------|-------------|\n| `query` | `String` | Required | Search query |\n| `limit` | `usize` | 20 | Result limit (clamped to `MAX_RETRIEVE_LIMIT`) |\n| `vector_cap` | `usize` | — | Vector candidate cap (clamped to `MAX_VECTOR_CAP`) |\n| `rerank_top_k` | `usize` | — | Re-rank candidate count (clamped to `MAX_RERANK_TOP_K`) |\n| `hyde` | `bool` | false | Enable HyDE extractive summarization |\n| `summarize` | `bool` | false | Enable centroid + MMR summarization |\n| `summarize_k` | `usize` | 3 | Summary sentences count |\n\n**Clamping Constants:**\n- `MAX_RETRIEVE_LIMIT` — Prevents unbounded result sets\n- `MAX_VECTOR_CAP` — Bounds vector search candidates\n- `MAX_RERANK_TOP_K` — Limits re-ranking computation\n\n资料来源：[crates/mnem-http/src/handlers.rs:200-280]()\n\n## Chunking Strategies\n\nThe retrieval system operates on pre-chunked content. The ingest pipeline supports five chunking strategies, selectable per source kind:\n\n| Strategy | Source Kind | Configuration | Behavior |\n|----------|-------------|---------------|----------|\n| `Paragraph` | Markdown | None | Splits on double-newline boundaries |\n| `SentenceRecursive` | Text | `max_tokens`, `overlap` | Sentence-aware token-budgeted packing using Unicode UAX #29 boundaries |\n| `SentenceRecursive` | PDF | `max_tokens=512`, `overlap=64` | Same as above with larger defaults |\n| `Session` | Conversation | `max_messages=10` | Groups messages until role returns to `user` or max reached |\n| `Structural` | Code | None | One chunk per section (function/class body from tree-sitter parser) |\n| `Recursive` | (legacy) | `max_tokens`, `overlap` | Token-budgeted word-window sliding window |\n\n资料来源：[crates/mnem-ingest/src/chunk.rs:1-100]()\n\n### Auto-Chunking\n\nThe `auto_chunker(kind, heuristics)` function selects optimal strategies:\n\n```rust\nmatch kind {\n    SourceKind::Markdown => ChunkerKind::Paragraph,\n    SourceKind::Text => ChunkerKind::SentenceRecursive { max_tokens: 256, overlap: 32 },\n    SourceKind::Pdf => ChunkerKind::SentenceRecursive { max_tokens: 512, overlap: 64 },\n    SourceKind::Conversation => ChunkerKind::Session { max_messages: 10 },\n    SourceKind::Code(_) => ChunkerKind::Structural,\n}\n```\n\n资料来源：[crates/mnem-ingest/src/chunk.rs:40-65]()\n\n## Source Kind Taxonomy\n\n| Kind | Extensions | Parser | Index Type |\n|------|------------|--------|------------|\n| `Markdown` | `.md`, `.markdown` | `parse_markdown` | Hybrid |\n| `Pdf` | `.pdf` | Sidecar (docling/unstructured) | Hybrid |\n| `Conversation` | `.json`, `.jsonl` | Session parser | Session |\n| `Text` | Other/unspecified | Raw text | Hybrid |\n| `Code(Rust)` | `.rs` | Tree-sitter | Structural |\n| `Code(Python)` | `.py`, `.pyi` | Tree-sitter | Structural |\n| `Code(JavaScript)` | `.js`, `.mjs`, `.cjs` | Tree-sitter | Structural |\n| `Code(TypeScript)` | `.ts`, `.tsx`, `.mts`, `.cts` | Tree-sitter | Structural |\n| `Code(Go)` | `.go` | Tree-sitter | Structural |\n| `Code(Java)` | `.java` | Tree-sitter | Structural |\n| `Code(C)` | `.c`, `.h` | Tree-sitter | Structural |\n| `Code(Cpp)` | `.cpp`, `.cc`, `.cxx`, `.hpp` | Tree-sitter | Structural |\n| `Code(Ruby)` | `.rb`, `.gemspec`, `.rake`, `.erb` | Tree-sitter | Structural |\n| `Code(CSharp)` | `.cs`, `.csx` | Tree-sitter | Structural |\n\n资料来源：[crates/mnem-ingest/src/types.rs:1-80]()\n\n## Retrieval Flow\n\n```mermaid\nsequenceDiagram\n    participant Client\n    participant Retriever\n    participant VectorIndex\n    participant SparseIndex\n    participant GraphIndex\n    participant Fuser\n    participant TokenBudgetPacker\n    participant LLM\n\n    Client->>Retriever: query + config\n    Retriever->>VectorIndex: vector_search(query)\n    Retriever->>SparseIndex: sparse_search(query)\n    Retriever->>GraphIndex: graph_expand(seed_nodes)\n    VectorIndex-->>Fuser: ranked_candidates\n    SparseIndex-->>Fuser: ranked_candidates\n    GraphIndex-->>Fuser: ranked_candidates\n    Fuser->>Fuser: reciprocal_rank_fusion\n    Fuser->>TokenBudgetPacker: fused_results\n    alt summarize=true\n        TokenBudgetPacker->>TokenBudgetPacker: centroid_MMR_extraction\n    end\n    TokenBudgetPacker-->>LLM: token_budgeted_context\n```\n\n## HyDE (Hypothetical Document Embeddings)\n\nWhen `hyde=true`, the system generates extractive summaries of top-M candidate nodes before final ranking. This follows the HyDE (Hypothetical Document Embeddings) pattern where:\n\n1. Initial candidates are retrieved\n2. Extractive summarization produces hypotheses\n3. Hypotheses are re-embedded and ranked\n4. Final top-K are packed into the context budget\n\n资料来源：[crates/mnem-http/src/handlers.rs:250-270]()\n\n## Branch Name Validation\n\nThe HTTP API validates branch names before creating commit references during ingest operations:\n\n```\nInvalid characters: space, tab, newline, null, ~, ^, :, ?, *, [, \\, @{, .., //\nInvalid patterns: leading /, trailing /, trailing ., trailing .lock\n```\n\n资料来源：[crates/mnem-http/src/handlers.rs:180-210]()\n\n## Extractor Integration\n\nThe retrieval system works in conjunction with the entity extraction pipeline. Extractors produce entity spans and relation spans that populate the graph index:\n\n| Extractor | Provider | Features |\n|-----------|----------|----------|\n| `RuleExtractor` | Default (NER) | Capitalized phrase heuristic, verb-window regex relations |\n| `KeyBertAdapter` | Statistical | Requires `keybert` feature flag |\n| `LLM` | Ollama | Requires `ollama` feature flag |\n\n资料来源：[crates/mnem-ingest/src/extract.rs:1-100]()\n\n## Configuration Example\n\n```toml\n[retrieve]\nlimit = 20\nbudget = 4096\nvector_cap = 100\ngraph_expand = 5\ngraph_depth = 2\ngraph_decay = 80\nrerank_top_k = 10\nhyde_max_tokens = 256\n\n[rerank]\nmodel = \"cross-encoder/ms-marco-MiniLM-L-6-v2\"\nbase_url = \"http://localhost:8080\"\n\n[ner]\nprovider = \"rule\"  # or \"none\"\n```\n\n资料来源：[crates/mnem-cli/src/config.rs:50-120]()\n\n## See Also\n\n- [Node Data Model](objects/node.md) — Node structure with summary and context_sentence\n- [Chunking Strategies](ingest/chunk.md) — Five chunker implementations\n- [HTTP API Reference](http/api.md) — REST endpoint documentation\n- [Entity Extraction](ingest/extract.md) — NER and relation extraction\n\n---\n\n<a id='page-embed-providers'></a>\n\n## Embedding Providers\n\n### 相关页面\n\n相关主题：[Hybrid Retrieval System](#page-hybrid-retrieval)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [crates/mnem-cli/src/commands/mod.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-cli/src/commands/mod.rs)\n- [crates/mnem-cli/src/config.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-cli/src/config.rs)\n- [crates/mnem-http/src/lib.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-http/src/lib.rs)\n- [crates/mnem-mcp/src/tools/embed.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-mcp/src/tools/embed.rs)\n- [crates/mnem-core/src/objects/node.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-core/src/objects/node.rs)\n- [crates/mnem-embed-providers/src/http.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-embed-providers/src/http.rs)\n</details>\n\n# Embedding Providers\n\nEmbedding Providers is a pluggable subsystem in the mnem monorepo that abstracts the generation of vector embeddings for text content. It lives in the `crates/mnem-embed-providers` crate and is consumed by `mnem-cli`, `mnem-http`, and `mnem-mcp` to support dense vector indexing and semantic retrieval.\n\n## Architecture Overview\n\nThe provider system follows a **strategy pattern** with runtime-configurable backends. Each provider implements the same `Embedder` trait, returning `Vec<f32>` vectors regardless of the underlying implementation (HTTP API, local model, ONNX runtime).\n\n```mermaid\ngraph TD\n    A[\"mnem-cli / mnem-http / mnem-mcp\"] --> B[\"mnem-embed-providers\"]\n    B --> C[\"ProviderConfig\"]\n    C --> D[\"OpenAI Provider\"]\n    C --> E[\"Ollama Provider\"]\n    C --> F[\"ONNX Provider\"]\n    D --> G[\"REST API / OpenAI Compatible\"]\n    E --> H[\"Local Ollama Server\"]\n    F --> I[\"Local ONNX Runtime\"]\n    \n    J[\"config.toml / ENV vars\"] --> B\n```\n\n资料来源：[crates/mnem-cli/src/commands/mod.rs:1-50](https://github.com/Uranid/mnem/blob/main/crates/mnem-cli/src/commands/mod.rs)\n\n## Supported Providers\n\n| Provider | Backend Type | Model Selection | Configuration |\n|----------|-------------|-----------------|---------------|\n| **OpenAI** | Remote REST API | Via `model` field | `base_url`, `api_key`, `timeout_secs` |\n| **Ollama** | Local REST API | Via `model` field | `base_url` (default: `http://localhost:11434`), `timeout_secs` |\n| **ONNX** | Local ONNX Runtime | Bundled `all-MiniLM-L6-v2` | No network required |\n\n资料来源：[crates/mnem-cli/src/config.rs:1-80](https://github.com/Uranid/mnem/blob/main/crates/mnem-cli/src/config.rs)\n\n### OpenAI Provider\n\nSends text to OpenAI's embedding API or any OpenAI-compatible endpoint. Requires:\n- `base_url`: API endpoint (default: `https://api.openai.com/v1`)\n- `api_key`: Authentication token\n- `model`: Embedding model identifier\n\n### Ollama Provider\n\nConnects to a local Ollama server for running open-source embedding models. Default endpoint is `http://localhost:11434`. The provider sets a 120-second timeout by default.\n\n### ONNX Provider\n\nRuns inference entirely offline using the ONNX Runtime with the `all-MiniLM-L6-v2` model. This is the **bundled default** when mnem is compiled with the `bundled-embedder` feature, providing zero-configuration embeddings for single-machine deployments.\n\n资料来源：[crates/mnem-mcp/src/tools/embed.rs:1-60](https://github.com/Uranid/mnem/blob/main/crates/mnem-mcp/src/tools/embed.rs)\n\n## Configuration Resolution\n\nEmbedding providers are configured through a **precedence chain** that varies slightly between consumer applications.\n\n### mnem-cli Precedence\n\n| Priority | Source | Fields |\n|----------|--------|--------|\n| 1 | Environment variables | `MNEM_EMBED_PROVIDER`, `MNEM_EMBED_MODEL`, `MNEM_EMBED_API_KEY_ENV`, `MNEM_EMBED_BASE_URL`, `MNEM_EMBED_DIM` |\n| 2 | `~/.mnem/config.toml` | `[embed]` section |\n| 3 | `<repo>/config.toml` | `[embed]` section |\n| 4 | Bundled ONNX fallback | When compiled with `bundled-embedder` feature |\n\n资料来源：[crates/mnem-cli/src/config.rs:80-120](https://github.com/Uranid/mnem/blob/main/crates/mnem-cli/src/config.rs)\n\n### mnem-http Precedence\n\n| Priority | Source | Behavior |\n|----------|--------|----------|\n| 1 | `POST /v1/embed` request body | Per-request model override |\n| 2 | `<data_dir>/config.toml` | Server-wide `[embed]` section |\n\nThe HTTP server loads embed configuration lazily at startup. A malformed `[embed]` section logs a warning but does not prevent server startup—auto-embed simply remains disabled.\n\n```rust\nfn load_embed_config(data_dir: &Path) -> Option<mnem_embed_providers::ProviderConfig> {\n    #[derive(serde::Deserialize)]\n    struct MiniCfg {\n        embed: Option<mnem_embed_providers::ProviderConfig>,\n    }\n    let path = data_dir.join(\"config.toml\");\n    let s = std::fs::read_to_string(&path).ok()?;\n    match toml::from_str::<MiniCfg>(&s) {\n        Ok(parsed) => parsed.embed,\n        Err(e) => {\n            tracing::warn!(path = %path.display(), error = %e,\n                \"config.toml [embed] parse failed; auto-embed disabled\"\n            );\n            None\n        }\n    }\n}\n```\n\n资料来源：[crates/mnem-http/src/lib.rs:1-50](https://github.com/Uranid/mnem/blob/main/crates/mnem-http/src/lib.rs)\n\n### mnem-mcp Precedence\n\nThe MCP server uses a simplified three-tier chain without the global `~/.mnem/config.toml` lookup (design point: per-repo isolation):\n\n1. `MNEM_EMBED_*` environment variables\n2. `<repo>/config.toml` `[embed]` section\n3. Bundled ONNX fallback (only when `bundled-embedder` feature is compiled)\n\n资料来源：[crates/mnem-mcp/src/tools/embed.rs:20-40](https://github.com/Uranid/mnem/blob/main/crates/mnem-mcp/src/tools/embed.rs)\n\n## ProviderConfig Schema\n\nThe configuration is parsed from TOML into a discriminated union:\n\n```rust\npub enum ProviderConfig {\n    Openai(OpenaiConfig),\n    Ollama(OllamaConfig),\n    Onnx(OnnxConfig),\n}\n```\n\nEach variant carries only the parameters relevant to that provider, keeping the configuration minimal.\n\n## Error Handling\n\nAll embedding operations return `EmbedError`, which is mapped from transport failures into actionable diagnostics:\n\n```mermaid\ngraph LR\n    A[\"ureq::Error\"] --> B{\"EmbedError\"}\n    B --> C[\"RateLimited\"]\n    B --> D[\"BadRequest<br/>status + body\"]\n    B --> E[\"Server<br/>status + body\"]\n    B --> F[\"Network<br/>transport message\"]\n    B --> G[\"Decode<br/>JSON parse failure\"]\n```\n\n资料来源：[crates/mnem-embed-providers/src/http.rs:1-50](https://github.com/Uranid/mnem/blob/main/crates/mnem-embed-providers/src/http.rs)\n\n### Error Display for Users\n\nWhen embedding fails, mnem-cli formats the error into a short, actionable one-liner suitable for `eprintln!`:\n\n| Provider | Common Cause | Suggestion |\n|----------|--------------|------------|\n| OpenAI | Invalid API key | Check `MNEM_EMBED_API_KEY_ENV` |\n| Ollama | Server not running | Verify `ollama serve` is active |\n| ONNX | Missing model file | Ensure `all-MiniLM-L6-v2` is bundled |\n\nThe `format_embed_failure` function accepts a `context` parameter (`\"embedding\"` for writes, `\"query embedding\"` for retrieval) to tailor suggestions.\n\n资料来源：[crates/mnem-cli/src/commands/mod.rs:50-100](https://github.com/Uranid/mnem/blob/main/crates/mnem-cli/src/commands/mod.rs)\n\n## Integration with Node Storage\n\nEmbedding vectors are stored on `Node` objects for use during semantic retrieval:\n\n```rust\npub struct Node {\n    pub id: NodeId,\n    pub label: String,\n    pub summary: Option<String>,           // LLM-facing retrieval text\n    pub content: Option<Bytes>,           // Opaque payload\n    pub context_sentence: Option<String>,  // Anthropic contextual retrieval prefix\n    pub props: BTreeMap<String, Ipld>,    // Structured properties\n}\n```\n\nThe `summary` field is the primary text indexed by the dense embedder. The `context_sentence` (per Anthropic's 2024 Contextual Retrieval paper) is prepended to `summary` before embedding to capture positional context, reducing retrieval failure by 49-67%.\n\n资料来源：[crates/mnem-core/src/objects/node.rs:1-50](https://github.com/Uranid/mnem/blob/main/crates/mnem-core/src/objects/node.rs)\n\n## Bundled Embedder Feature\n\nThe `bundled-embedder` Cargo feature compiles in an ONNX provider with `all-MiniLM-L6-v2`. When enabled:\n\n- `mnem embed` works out-of-the-box without external services\n- The MCP `mnem_retrieve` tool has a tier-3 fallback when no explicit vector provider is configured\n- Ideal for air-gapped environments or local-first workflows\n\nWhen not enabled, missing embedder configuration results in a warning during ingest; nodes are created without vectors, and a recovery path via `mnem reindex` is promoted.\n\n## Summary\n\nEmbedding Providers abstracts vector generation behind a common interface, supporting three backends with distinct deployment profiles:\n\n- **OpenAI**: Cloud-hosted, highest quality, requires API credentials\n- **Ollama**: Self-hosted, flexible model selection, local compute\n- **ONNX**: Offline-capable, bundled model, zero-configuration\n\nConfiguration flows from environment variables through TOML files, with graceful fallback behavior that never prevents core operations from functioning.\n\n---\n\n<a id='page-storage-backend'></a>\n\n## Storage Backend\n\n### 相关页面\n\n相关主题：[System Architecture](#page-architecture)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [crates/mnem-core/src/store/mod.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-core/src/store/mod.rs)\n- [crates/mnem-core/src/store/blockstore.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-core/src/store/blockstore.rs)\n- [crates/mnem-core/src/store/op_heads.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-core/src/store/op_heads.rs)\n- [crates/mnem-backend-redb/src/lib.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-backend-redb/src/lib.rs)\n- [crates/mnem-backend-redb/src/blockstore.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-backend-redb/src/blockstore.rs)\n- [crates/mnem-backend-redb/src/knn_edges_store.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-backend-redb/src/knn_edges_store.rs)\n</details>\n\n# Storage Backend\n\nThe storage backend is a critical subsystem in mnem that provides persistent storage for the content-addressable object graph. It abstracts storage operations behind well-defined traits, enabling pluggable storage implementations while maintaining a consistent API for the core data layer.\n\n## Architecture Overview\n\nThe storage backend follows a trait-based abstraction pattern where `mnem-core` defines the storage interfaces and concrete implementations are provided by backend crates. This separation allows the core logic to remain independent of specific storage technologies.\n\n```mermaid\ngraph TD\n    subgraph \"Application Layer\"\n        CLI[mnem-cli]\n        HTTP[mnem-http]\n    end\n    \n    subgraph \"mnem-core\"\n        Repo[Repository]\n        Transaction[Transaction]\n        Objects[Node / Edge / Commit]\n    end\n    \n    subgraph \"Storage Traits\"\n        Blockstore[BlockStore Trait]\n        OpHeadsStore[OpHeadsStore Trait]\n        KnnEdgesStore[KnnEdgesStore Trait]\n    end\n    \n    subgraph \"Backend Implementations\"\n        RedbBackend[mnem-backend-redb]\n    end\n    \n    CLI --> Repo\n    HTTP --> Repo\n    Repo --> Transaction\n    Transaction --> Blockstore\n    Transaction --> OpHeadsStore\n    Repo --> KnnEdgesStore\n    Blockstore --> RedbBackend\n    OpHeadsStore --> RedbBackend\n    KnnEdgesStore --> RedbBackend\n```\n\n## Core Storage Traits\n\nThe storage layer is built on three fundamental traits that define the contract between the core library and storage implementations.\n\n### BlockStore Trait\n\nThe `BlockStore` trait provides low-level operations for storing and retrieving binary data blocks identified by Content Identifiers (CIDs).\n\n```rust\n// crates/mnem-core/src/store/blockstore.rs\npub trait Blockstore: Send + Sync {\n    fn get(&self, cid: &Cid) -> Result<Option<Vec<u8>>>;\n    fn put(&self, block: &[u8]) -> Result<Cid>;\n    fn put_many<I>(&self, blocks: I) -> Result<Vec<Cid>>\n    where\n        I: IntoIterator<Item = Vec<u8>>,\n        I::IntoIter: Send + Sync;\n}\n```\n\n| Method | Purpose | Return Type |\n|--------|---------|-------------|\n| `get(cid)` | Retrieve a block by its CID | `Result<Option<Vec<u8>>>` |\n| `put(block)` | Store a single block, returning its CID | `Result<Cid>` |\n| `put_many(blocks)` | Batch insert multiple blocks | `Result<Vec<Cid>>` |\n\nThe trait implements the CAR (Content Addressable Archive) storage pattern where data integrity is verified through content hashing. 资料来源：[crates/mnem-core/src/store/blockstore.rs:1-20]()\n\n### OpHeadsStore Trait\n\nThe `OpHeadsStore` trait manages operation heads—references to the latest operations in the operational transform system. It supports both single-head and multi-head scenarios with conflict detection.\n\n```rust\n// crates/mnem-core/src/store/op_heads.rs\npub trait OpHeadsStore: Send + Sync {\n    fn get_heads(&self) -> Result<Vec<Cid>>;\n    fn put_head(&self, op: &Op) -> Result<()>;\n    fn put_heads(&self, ops: &[Op]) -> Result<()>;\n    fn merge_heads(&self, merged: Vec<Cid>) -> Result<()>;\n}\n```\n\n| Method | Purpose |\n|--------|---------|\n| `get_heads()` | Retrieve all current operation head CIDs |\n| `put_head(op)` | Atomically update the single head |\n| `put_heads(ops)` | Set multiple operation heads |\n| `merge_heads(merged)` | Replace heads with merged result after conflict resolution |\n\n资料来源：[crates/mnem-core/src/store/op_heads.rs:1-50]()\n\n### KnnEdgesStore Trait\n\nThe `KnnEdgesStore` trait provides specialized storage for k-nearest-neighbor graph edges, enabling efficient vector similarity searches.\n\n```rust\n// Backend interface for KNN edge storage\npub trait KnnEdgesStore: Send + Sync {\n    fn insert(&self, source_id: NodeId, embedding: &[f32]) -> Result<()>;\n    fn search(&self, query: &[f32], k: usize) -> Result<Vec<(NodeId, f32)>>;\n}\n```\n\n资料来源：[crates/mnem-backend-redb/src/knn_edges_store.rs:1-30]()\n\n## Redb Backend Implementation\n\nThe `mnem-backend-redb` crate provides the reference implementation using the Redb embedded database, a fast, lightweight key-value store written in Rust.\n\n### Module Structure\n\n```\nmnem-backend-redb/\n├── src/\n│   ├── lib.rs           # Main entry point and configuration\n│   ├── blockstore.rs    # BlockStore implementation\n│   └── knn_edges_store.rs # KNN edge storage with HNSW\n```\n\n### Initialization\n\nThe backend initializes by opening or creating a Redb database file:\n\n```rust\n// crates/mnem-backend-redb/src/lib.rs\npub struct Backend {\n    db: redb::Database,\n    path: PathBuf,\n}\n\nimpl Backend {\n    pub fn open(path: &Path) -> Result<Self> {\n        let db = redb::Database::create(path)?;\n        Ok(Self { db, path: path.to_path_buf() })\n    }\n}\n```\n\n资料来源：[crates/mnem-backend-redb/src/lib.rs:1-50]()\n\n### BlockStore Implementation\n\nThe Redb blockstore implementation wraps the database with CAR-compatible semantics:\n\n```rust\n// crates/mnem-backend-redb/src/blockstore.rs\nimpl Blockstore for RedbBlockstore {\n    fn get(&self, cid: &Cid) -> Result<Option<Vec<u8>>> {\n        let key = cid.to_bytes();\n        let guard = self.db.begin()?;\n        let table = guard.open_table(BLOCKS_TABLE)?;\n        Ok(table.get(key)?.map(|v| v.value().as_bytes().to_vec()))\n    }\n    \n    fn put(&self, block: &[u8]) -> Result<Cid> {\n        let hash = multihash::Sha256::digest(block);\n        let cid = Cid::new_v1(DAG_CBOR, hash);\n        // Store with CID bytes as key\n    }\n}\n```\n\n资料来源：[crates/mnem-backend-redb/src/blockstore.rs:1-100]()\n\n### Storage Format\n\nThe redb backend organizes data into multiple tables:\n\n| Table Name | Key Type | Value Type | Purpose |\n|------------|----------|------------|---------|\n| `blocks` | CID bytes | Raw block data | Content-addressed storage |\n| `op_heads` | Fixed key | CID bytes | Operation head references |\n| `knn_edges` | NodeId | Serialized edges | Vector similarity graph |\n\n## Transaction Model\n\nmnem implements a transactional write model through the `Transaction` type in the repository layer. Transactions provide ACID-like semantics for graph modifications.\n\n```mermaid\ngraph LR\n    A[Begin Transaction] --> B[Add Nodes]\n    B --> C[Add Edges]\n    C --> D[Commit]\n    D --> E[Update OpHeads]\n    E --> F[Success]\n    \n    D --> G[Abort]\n    G --> H[Rollback]\n```\n\n### Write Operations\n\nTransactions support atomic batch operations:\n\n```rust\n// Conceptual transaction interface\nimpl Transaction {\n    pub fn add_node(&mut self, node: Node) -> Result<NodeId>;\n    pub fn add_edge(&mut self, source: NodeId, target: NodeId, relation: &str) -> Result<()>;\n    pub fn commit(self) -> Result<Commit>;\n}\n```\n\n资料来源：[crates/mnem-core/src/repo/mod.rs:1-100]()\n\n### Commit Structure\n\nEach commit creates an immutable snapshot of the repository state:\n\n```rust\n// crates/mnem-core/src/objects/commit.rs\npub struct Commit {\n    pub operation: Operation,\n    pub parent: Option<Cid>,\n    pub author: Author,\n    pub message: String,\n    pub timestamp: DateTime<Utc>,\n}\n```\n\n资料来源：[crates/mnem-core/src/objects/commit.rs:1-50]()\n\n## Data Persistence Flow\n\n```mermaid\nsequenceDiagram\n    participant App as Application\n    participant Tx as Transaction\n    participant BS as BlockStore\n    participant OHS as OpHeadsStore\n    participant Redb as Redb DB\n\n    App->>Tx: begin()\n    Tx->>Tx: add_node(node)\n    Tx->>BS: put(block)\n    BS->>Redb: write(cid, data)\n    Tx->>Tx: add_edge(src, dst)\n    Tx->>BS: put(block)\n    Tx->>Tx: commit()\n    Tx->>BS: put(commit_block)\n    Tx->>OHS: put_head(new_op)\n    OHS->>Redb: update_heads()\n    Redb-->>Tx: success\n    Tx-->>App: commit_cid\n```\n\n## Configuration\n\nStorage backend behavior is configured through the `Config` structure:\n\n```rust\n// crates/mnem-cli/src/config.rs\npub struct Config {\n    pub store: Option<StoreConfig>,\n    pub data_dir: PathBuf,\n    // ...\n}\n\npub struct StoreConfig {\n    pub path: PathBuf,\n    pub flush_interval_ms: Option<u64>,\n}\n```\n\n资料来源：[crates/mnem-cli/src/config.rs:1-100]()\n\n### Configuration Options\n\n| Option | Type | Default | Description |\n|--------|------|---------|-------------|\n| `path` | `PathBuf` | `data/` | Base directory for storage files |\n| `flush_interval_ms` | `u64` | `1000` | Periodic flush interval in milliseconds |\n\n## Object Types and Serialization\n\n### Node Storage\n\nNodes are serialized using DAG-CBOR and stored as blocks:\n\n```rust\n// crates/mnem-core/src/objects/node.rs\npub struct Node {\n    pub id: NodeId,\n    pub ntype: NodeType,\n    pub summary: Option<String>,\n    pub props: BTreeMap<String, Ipld>,\n    pub content: Option<Bytes>,\n    pub context_sentence: Option<String>,\n}\n```\n\n资料来源：[crates/mnem-core/src/objects/node.rs:1-100]()\n\n### Edge Storage\n\nEdges link nodes with labeled relationships:\n\n```rust\n// crates/mnem-core/src/objects/edge.rs\npub struct Edge {\n    pub source: NodeId,\n    pub target: NodeId,\n    pub relation: String,\n    pub confidence: Option<f32>,\n}\n```\n\n资料来源：[crates/mnem-core/src/objects/edge.rs:1-50]()\n\n## Error Handling\n\nStorage operations return the `Error` type defined in the core crate:\n\n```rust\n// crates/mnem-core/src/store/mod.rs\npub enum Error {\n    #[error(\"block not found: {0}\")]\n    BlockNotFound(Cid),\n    #[error(\"serialization failed: {0}\")]\n    SerializationFailed(String),\n    #[error(\"database error: {0}\")]\n    DatabaseError(String),\n}\n```\n\n资料来源：[crates/mnem-core/src/store/mod.rs:1-100]()\n\n### Error Recovery\n\n| Error Type | Recovery Strategy |\n|------------|-------------------|\n| `BlockNotFound` | Indicates data corruption; repository repair required |\n| `SerializationFailed` | Check data integrity; may indicate schema mismatch |\n| `DatabaseError` | Retry operation; check disk space and permissions |\n\n## Indexes and Secondary Storage\n\n### Vector Index\n\nThe KNN edges store maintains a vector index for similarity search operations:\n\n```mermaid\ngraph TD\n    A[Query Vector] --> B[KnnEdgesStore]\n    B --> C[HNSW Index]\n    C --> D[Approximate KNN Search]\n    D --> E[Top-K Results]\n```\n\nThe implementation uses HNSW (Hierarchical Navigable Small World) algorithm for efficient approximate nearest neighbor search. 资料来源：[crates/mnem-backend-redb/src/knn_edges_store.rs:1-100]()\n\n### Query Interface\n\nThe retrieve module composes vector search with graph traversal:\n\n```rust\n// crates/mnem-core/src/retrieve/mod.rs\npub struct Retriever {\n    blockstore: Arc<dyn Blockstore>,\n    knn_edges: Arc<dyn KnnEdgesStore>,\n    // ...\n}\n```\n\n## Related Documentation\n\n- [Repository Model](../core-concepts/repository-model) - Transaction and commit management\n- [Object Schema](../core-concepts/object-schema) - Node, Edge, and Commit structures\n- [Retrieval System](../features/retrieval) - Query and retrieval workflows\n- [Configuration Guide](../getting-started/configuration) - Storage configuration options\n\n---\n\n<a id='page-ingestion'></a>\n\n## Ingestion Pipeline\n\n### 相关页面\n\n相关主题：[Core Components](#page-core-components)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [crates/mnem-ingest/src/lib.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-ingest/src/lib.rs)\n- [crates/mnem-ingest/src/pipeline.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-ingest/src/pipeline.rs)\n- [crates/mnem-ingest/src/chunk.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-ingest/src/chunk.rs)\n- [crates/mnem-ingest/src/pdf.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-ingest/src/pdf.rs)\n- [crates/mnem-ingest/src/types.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-ingest/src/types.rs)\n- [crates/mnem-cli/src/commands/ingest.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-cli/src/commands/ingest.rs)\n- [crates/mnem-core/src/objects/node.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-core/src/objects/node.rs)\n</details>\n\n# Ingestion Pipeline\n\nThe Ingestion Pipeline is the core system in mnem responsible for transforming external source documents (Markdown, PDFs, code files, conversations) into structured graph nodes within the repository. It handles parsing, chunking, entity extraction, and writing to the graph transaction—all without committing, allowing callers to control transaction boundaries.\n\n## Overview\n\nThe pipeline orchestrates a multi-stage process:\n\n1. **Detection** — Determine source kind from file extension or explicit configuration\n2. **Parsing** — Convert raw bytes into a list of `Section` objects\n3. **Chunking** — Split sections into semantically meaningful `Chunk` objects\n4. **Extraction** — Optionally identify entities and relations via rule-based or LLM providers\n5. **Writing** — Add nodes and edges to a borrowed `Transaction`\n\n```mermaid\ngraph TD\n    A[Raw Bytes] --> B[SourceKind Detection]\n    B --> C[Parser Selection]\n    C --> D[Parse to Sections]\n    D --> E[Chunker Strategy]\n    E --> F[Extract Entities & Relations]\n    F --> G[Transaction Write]\n    G --> H[IngestResult]\n    \n    C -->|md| C1[Markdown Parser]\n    C -->|pdf| C2[PDF Parser]\n    C -->|code| C3[Tree-sitter Parser]\n    C -->|json/jsonl| C4[Conversation Parser]\n    C -->|text| C5[Plain Text]\n```\n\n## Source Kind Detection\n\nThe `Ingester` automatically detects the source kind based on file extension. This determines both the parser and the default chunking strategy.\n\n| Extension(s) | SourceKind | Default Chunker |\n|--------------|------------|-----------------|\n| `.md`, `.markdown` | `Markdown` | `Paragraph` |\n| `.txt` | `Text` | `SentenceRecursive` (256 tokens, 32 overlap) |\n| `.pdf` | `Pdf` | `SentenceRecursive` (512 tokens, 64 overlap) |\n| `.json`, `.jsonl` | `Conversation` | `Session` (max 10 messages) |\n| `.rs` | `Code(Rust)` | `Structural` |\n| `.py`, `.pyi` | `Code(Python)` | `Structural` |\n| `.js`, `.mjs`, `.cjs` | `Code(JavaScript)` | `Structural` |\n| `.ts`, `.tsx`, `.mts`, `.cts` | `Code(TypeScript)` | `Structural` |\n| `.go` | `Code(Go)` | `Structural` |\n| `.java` | `Code(Java)` | `Structural` |\n| `.c`, `.h` | `Code(C)` | `Structural` |\n| `.cpp`, `.cc`, `.cxx`, `.hpp` | `Code(Cpp)` | `Structural` |\n| `.rb`, `.gemspec`, `.rake` | `Code(Ruby)` | `Structural` |\n| `.cs`, `.csx` | `Code(CSharp)` | `Structural` |\n| Unknown/ext none | `Text` | `SentenceRecursive` |\n\n资料来源：[pipeline.rs:source_kind_from_ext()]() [types.rs:SourceKind]() [types.rs:CodeLanguage::from_extension()]()\n\n## Supported File Formats\n\n### Markdown (`.md`, `.markdown`)\n\nParsed using CommonMark + GitHub Flavored Markdown (GFM) support. The parser extracts headings with depth information, creating section boundaries that respect document structure. Each heading becomes a section boundary.\n\n### PDF (`.pdf`)\n\nPure-Rust text-layer extraction using `pdf-extract`. One section per page is created, with heading set to `\"Page {n}\"` at depth 1. PDFs with fewer than 100 text characters per page are flagged as potentially scanned. Malformed PDFs return `Error::ParseFailed`. 资料来源：[pdf.rs:MIN_TEXT_PER_PAGE]() [pdf.rs:parse_pdf()]()\n\n### Code Files\n\nParsed using tree-sitter for supported languages (Rust, Python, JavaScript, TypeScript, Go, Java, C, Cpp, Ruby, CSharp). The parser extracts function and class bodies as sections, preserving structural boundaries. 资料来源：[code.rs]()\n\n### Conversations (`.json`, `.jsonl`)\n\nSupports chat exports from ChatGPT, Claude, and generic conversation formats. Messages are extracted with role (user/assistant/system), content, and timestamps when available. 资料来源：[conversation.rs]()\n\n### Plain Text (`.txt` and others)\n\nFalls back to plain text parsing for unknown extensions, including files without extensions like `README`.\n\n## Chunker Strategies\n\nThe `ChunkerKind` enum defines five chunking strategies. Callers can override the auto-selected strategy via CLI or API.\n\n### Strategy Selection\n\n```mermaid\ngraph TD\n    A[SourceKind] --> B[auto_chunker]\n    B -->|Markdown| C[Paragraph]\n    B -->|Text| D[SentenceRecursive<br/>256 tokens, 32 overlap]\n    B -->|Pdf| E[SentenceRecursive<br/>512 tokens, 64 overlap]\n    B -->|Conversation| F[Session<br/>max 10 messages]\n    B -->|Code| G[Structural]\n```\n\n### Paragraph Chunker\n\nSplits each section's body on double-newline boundaries. Fast and deterministic, ideal for Markdown where authoring structure already matches desired chunk boundaries. 资料来源：[chunk.rs:ChunkerKind::Paragraph]()\n\n### Recursive Chunker\n\nToken-budgeted word-window sliding window with configurable overlap. Kept for backwards compatibility. 资料来源：[chunk.rs:ChunkerKind::Recursive]()\n\n### SentenceRecursive Chunker\n\nSentence-aware token-budgeted packing using Unicode sentence boundaries (UAX #29). Preferred for prose:\n\n- Chunks never cut mid-sentence\n- Overlap measured at sentence granularity\n- Average chunk size is more uniform\n\nDefault for `Text` (256 tokens, 32 overlap) and `Pdf` (512 tokens, 64 overlap) source kinds. 资料来源：[chunk.rs:ChunkerKind::SentenceRecursive]() [chunk.rs:auto_chunker()]()\n\n### Session Chunker\n\nGroups contiguous conversation messages into session chunks. Boundaries fire on:\n- Role returning to `user`, OR\n- Reaching `max_messages` (default: 10)\n\nPreserves turn ordering. Default for `Conversation` source kind. 资料来源：[chunk.rs:ChunkerKind::Session]()\n\n### Structural Chunker\n\nOne chunk per section. Used for code sources where each section is already a function or class body extracted by the tree-sitter parser. 资料来源：[chunk.rs:ChunkerKind::Structural]()\n\n## Entity Extraction\n\nThe pipeline optionally extracts entities and relations using configured extractors.\n\n### RuleExtractor (Default)\n\nDelegates entity detection to a `NerProvider` (default: capitalized-phrase heuristic) and proximity-based relation detection via verb-window regex. Supported relation patterns include: `joined`, `founded`, `acquired`, `owns`, `hired`, etc. 资料来源：[extract.rs:RuleExtractor]() [extract.rs:verb_window]()\n\n### Optional: OllamaExtractor\n\nSchema-constrained NER via a local Ollama server (gated behind `ollama` feature). Hallucinated spans are verified against section text and rejected. Failures degrade gracefully to empty results, keeping the rule-based baseline as the load-bearing path. 资料来源：[lib.rs:extract_llm]()\n\n### Optional: KeyBertAdapter\n\nStatistical entity extraction adapter driven by the server's configured embedder (gated behind `keybert` feature). 资料来源：[lib.rs:extract_keybert]()\n\n## Pipeline API\n\n### Ingester Configuration\n\n```rust\npub struct IngestConfig {\n    pub chunker: ChunkerKind,\n    pub extractor: ExtractorKind,\n    pub ner_provider: Option<NerProviderKind>,\n    pub include_text: bool,\n}\n```\n\n资料来源：[pipeline.rs:IngestConfig]() [lib.rs:IngestConfig]()\n\n### Core Method\n\n```rust\npub fn ingest(\n    &self,\n    tx: &mut Transaction,\n    bytes: &[u8],\n    kind: SourceKind,\n) -> Result<IngestResult, Error>\n```\n\n**Returns** an `IngestResult` with counts and elapsed time. The `commit_cid` field is left `None`—callers who want a CID should call `tx.commit(...)` afterwards.\n\n**Errors:**\n- `Error::ParseFailed` — parser rejects the input\n- `Error::UnsupportedSource` — source kind not covered\n- `Error::Commit` — upstream codec/blockstore failures from `Transaction::add_node`/`add_edge`\n\n资料来源：[pipeline.rs:Ingester::ingest()]()\n\n## CLI Integration\n\nThe `mnem ingest` command provides CLI access to the pipeline.\n\n```bash\nmnem ingest notes.md\nmnem ingest --text \"The quick brown fox\"\nmnem ingest --chunker recursive --max-tokens 1024 book.pdf\nmnem ingest --recursive docs/\n```\n\n### CLI Options\n\n| Flag | Description | Default |\n|------|-------------|---------|\n| `--chunker` | Strategy selection | `auto` |\n| `--max-tokens` | Target tokens per chunk | 512 |\n| `--overlap` | Overlap tokens (recursive) | 32 |\n| `--recursive` | Walk directory trees | false |\n| `--ntype` | Root Doc node label | `Doc` |\n| `-m`, `--message` | Commit message | Auto-generated |\n\n资料来源：[commands/ingest.rs:Args]()\n\n## Output Nodes\n\nThe pipeline writes three node types to the graph:\n\n1. **Doc node** — Root node representing the ingested document\n2. **Chunk nodes** — Smaller content pieces with `summary`, `content`, `context_sentence`, and `props` fields\n3. **Entity nodes** — Extracted entities with span information\n4. **Relation edges** — Connections between entities based on relation extraction\n\n### Contextual Retrieval\n\nEach chunk optionally stores a `context_sentence`—an LLM-generated one-sentence placement cue (e.g., \"This paragraph is from Section 3 of a legal contract...\"). This is prepended to the summary before embedding, following Anthropic's 2024 Contextual Retrieval recipe, which reports -49% to -67% retrieval-failure reduction. 资料来源：[node.rs:Node.context_sentence]()\n\n## Sidecar Support\n\nFor PDFs with poor text-layer extraction, the pipeline supports escalation to external tools:\n\n- **docling** (gated behind `sidecar-docling` feature)\n- **unstructured-ingest** (gated behind `sidecar-unstructured` feature)\n\nSidecars are invoked when built-in PDF extraction quality is insufficient. 资料来源：[lib.rs:sidecar]()\n\n## Token Estimation\n\nToken counts are estimated via whitespace split (`tokens_estimate` field on `Chunk`). This is intentionally fast and deterministic. Cl100k accuracy is a documented future improvement. 资料来源：[chunk.rs:token estimation comment]()\n\n## Error Handling\n\n| Error Type | Cause | Recovery |\n|------------|-------|----------|\n| `ParseFailed` | Malformed input, encryption | Return error, don't create nodes |\n| `UnsupportedSource` | Unknown source kind | Return error |\n| `Commit` | Blockstore failure | Return error |\n| Sidecar errors | Missing binary, CLI failure | Return `Error::Sidecar` |\n| LLM extraction failure | Timeout, schema mismatch | Degrade to empty Vec |\n\n资料来源：[pipeline.rs:ingest errors]() [lib.rs:Error types]()\n\n---\n\n<a id='page-cli-commands'></a>\n\n## CLI Commands Reference\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [crates/mnem-cli/src/main.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-cli/src/main.rs)\n- [crates/mnem-cli/src/commands/ingest.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-cli/src/commands/ingest.rs)\n- [crates/mnem-cli/src/commands/tag.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-cli/src/commands/tag.rs)\n- [crates/mnem-cli/src/commands/refs.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-cli/src/commands/refs.rs)\n- [crates/mnem-cli/src/integrate.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-cli/src/integrate.rs)\n- [crates/mnem-cli/src/config.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-cli/src/config.rs)\n</details>\n\n# CLI Commands Reference\n\nThis page documents all command-line interface commands available in `mnem-cli`, the primary user-facing tool for interacting with mnem repositories.\n\n## Overview\n\nThe mnem CLI provides a unified interface for managing a local knowledge graph repository. It supports operations including repository initialization, content ingestion, node/edge manipulation, branching, tagging, retrieval, and third-party tool integration.\n\n```mermaid\ngraph TD\n    A[mnem CLI] --> B[Repository Operations]\n    A --> C[Content Ingestion]\n    A --> D[Graph Manipulation]\n    A --> E[Version Control]\n    A --> F[Retrieval]\n    A --> G[Integration]\n    \n    B --> B1[init]\n    B --> B2[status]\n    \n    C --> C1[ingest]\n    \n    D --> D1[add node]\n    D --> D2[add edge]\n    \n    E --> E1[log]\n    E --> E2[show]\n    E --> E3[refs]\n    E --> E4[tag]\n    E --> E5[branches]\n    \n    F --> F1[retrieve]\n    \n    G --> G1[integrate]\n```\n\n资料来源：[crates/mnem-cli/src/main.rs:40-90](https://github.com/Uranid/mnem/blob/main/crates/mnem-cli/src/main.rs)\n\n## Global Options\n\nThe following options are available for all commands:\n\n| Option | Short | Description |\n|--------|-------|-------------|\n| `--repo <PATH>` | `-R` | Path to the repository directory (`.mnem/`). Defaults to walking up from the current directory, like `git` does. |\n\n资料来源：[crates/mnem-cli/src/main.rs:33-36](https://github.com/Uranid/mnem/blob/main/crates/mnem-cli/src/main.rs)\n\n## Repository Operations\n\n### init\n\nInitializes a new mnem repository.\n\n```bash\nmnem init [OPTIONS]\n```\n\n| Option | Description |\n|--------|-------------|\n| `--path <PATH>` | Custom repository path |\n| `--name <NAME>` | Repository name |\n| `--author <NAME>` | Default author name |\n| `--email <EMAIL>` | Default author email |\n\n### status\n\nPrints current op-head, head commit, ref summary, and label counts.\n\n```bash\nmnem status [OPTIONS]\n```\n\n```bash\n# Examples:\nmnem status                    # current op + head commit + ref count\nmnem -R ~/notes status         # explicit repo path\n```\n\n## Content Ingestion\n\n### ingest\n\nParses external source files into the graph, creating Doc + Chunk + Entity nodes.\n\n```bash\nmnem ingest <PATH> [OPTIONS]\n```\n\n#### Supported Source Types\n\n| Extension | Source Kind | Chunker Strategy |\n|-----------|-------------|------------------|\n| `.md`, `.markdown` | Markdown | Paragraph |\n| `.txt` | Plain Text | SentenceRecursive (256 tokens, 32 overlap) |\n| `.pdf` | PDF | SentenceRecursive (512 tokens, 64 overlap) |\n| `.json`, `.jsonl` | Conversation | Session (10 messages max) |\n| `.rs` | Rust Code | Structural |\n| `.py`, `.pyi` | Python Code | Structural |\n| `.js`, `.mjs`, `.cjs` | JavaScript Code | Structural |\n| `.ts`, `.tsx`, `.mts`, `.cts` | TypeScript Code | Structural |\n| `.go` | Go Code | Structural |\n| `.java` | Java Code | Structural |\n| `.c`, `.h` | C Code | Structural |\n| `.cpp`, `.cc`, `.cxx`, `.hpp`, `.hxx` | C++ Code | Structural |\n| `.rb`, `.gemspec`, `.rake`, `.erb` | Ruby Code | Structural |\n| `.cs`, `.csx` | C# Code | Structural |\n| Other | Text | SentenceRecursive |\n\n资料来源：[crates/mnem-cli/src/commands/ingest.rs:1-50](https://github.com/Uranid/mnem/blob/main/crates/mnem-cli/src/commands/ingest.rs)\n\n#### ingest Options\n\n| Option | Description | Default |\n|--------|-------------|---------|\n| `<PATH>` | File or directory to ingest | Required (unless `--text`) |\n| `--text` | Inline text to ingest | - |\n| `--ntype <LABEL>` | Root Doc node label | `Doc` |\n| `--chunker <STRATEGY>` | Chunker strategy | `auto` |\n| `--max-tokens <N>` | Target tokens per chunk | `512` |\n| `--overlap <N>` | Overlap tokens between chunks | `32` |\n| `--recursive` | Walk directory trees | `false` |\n| `-m`, `--message <MSG>` | Commit message | Auto-generated |\n\n#### Chunker Strategies\n\n| Strategy | Description |\n|----------|-------------|\n| `auto` | Picks strategy based on source kind |\n| `paragraph` | Splits on double-newline (Markdown) |\n| `recursive` | Token-budgeted sliding window |\n| `sentence_recursive` | Sentence-aware token packing |\n| `session` | Groups conversation messages |\n| `structural` | One chunk per section (code) |\n\n资料来源：[crates/mnem-cli/src/commands/ingest.rs:60-80](https://github.com/Uranid/mnem/blob/main/crates/mnem-cli/src/commands/ingest.rs)\n\n## Graph Manipulation\n\n### add node\n\nCreates a new node in the graph.\n\n```bash\nmnem add node [OPTIONS]\n```\n\n| Option | Description |\n|--------|-------------|\n| `-s`, `--summary <TEXT>` | Node summary |\n| `--label <LABEL>` | Node type label |\n| `--prop <KEY=VALUE>` | Property (can be repeated) |\n| `--context-sentence <TEXT>` | Positional context for retrieval |\n\n```bash\n# Examples:\nmnem add node -s \"Alice lives in Berlin\"\nmnem add node --label Person --prop name=Alice --prop city=Berlin -s \"Alice is a climber\"\n```\n\n### add edge\n\nCreates a directed edge between two nodes.\n\n```bash\nmnem add edge [OPTIONS]\n```\n\n| Option | Description |\n|--------|-------------|\n| `--from <UUID>` | Source node UUID |\n| `--to <UUID>` | Target node UUID |\n| `--label <LABEL>` | Edge type label |\n| `--prop <KEY=VALUE>` | Property (can be repeated) |\n\n```bash\n# Examples:\nmnem add edge --from <src-uuid> --to <dst-uuid> --label knows\n```\n\n资料来源：[crates/mnem-cli/src/main.rs:65-80](https://github.com/Uranid/mnem/blob/main/crates/mnem-cli/src/main.rs)\n\n## Version Control\n\n### log\n\nWalks the op-log backwards from the current head.\n\n```bash\nmnem log [OPTIONS]\n```\n\n| Option | Description |\n|--------|-------------|\n| `--limit <N>` | Maximum number of operations to show |\n| `--format <FORMAT>` | Output format (`short`, `full`, `json`) |\n\n### show\n\nShows the full detail of one operation.\n\n```bash\nmnem show <OPERATION_ID>\n```\n\n```bash\n# Examples:\nmnem show 01HZ...\n```\n\n### refs\n\nManages symbolic references to commits.\n\n```bash\nmnem refs <SUBCOMMAND>\n```\n\n#### refs Subcommands\n\n| Subcommand | Description |\n|------------|-------------|\n| `list` | List every ref in the current view |\n| `set <name> <target>` | Set ref to point at a target CID |\n| `delete <name>` | Delete a ref |\n\n```bash\n# Examples:\nmnem refs list\nmnem refs set feature_branch 01HXYZ...\nmnem refs delete old_branch\n```\n\n资料来源：[crates/mnem-cli/src/commands/refs.rs:1-45](https://github.com/Uranid/mnem/blob/main/crates/mnem-cli/src/commands/refs.rs)\n\n### tag\n\nManages named tags that point to commits.\n\n```bash\nmnem tag <SUBCOMMAND>\n```\n\n#### tag Subcommands\n\n| Subcommand | Description |\n|------------|-------------|\n| `list` | List every `refs/tags/<name>` ref with their target CIDs |\n| `create <name>` | Create a new tag |\n| `delete <name>` | Delete a tag |\n\n#### tag create Options\n\n| Option | Description |\n|--------|-------------|\n| `<name>` | Tag name (stored as `refs/tags/<name>`) |\n| `target` | Optional commit CID, ref name, branch shortname, or `HEAD` |\n| `--from <CID>` | Commit CID / ref / branch to point the tag at |\n\n```bash\n# Examples:\nmnem tag list\nmnem tag create v0.9\nmnem tag create release-2024 --from 01HZ...\nmnem tag delete v0.9\n```\n\n资料来源：[crates/mnem-cli/src/commands/tag.rs:1-60](https://github.com/Uranid/mnem/blob/main/crates/mnem-cli/src/commands/tag.rs)\n\n### branches\n\nManages named branches in the repository.\n\n```bash\nmnem branches [OPTIONS]\n```\n\n| Option | Description |\n|--------|-------------|\n| `--list` | List all branches |\n| `--create <NAME>` | Create a new branch |\n| `--delete <NAME>` | Delete a branch |\n| `--switch <NAME>` | Switch to a branch |\n\n#### Branch Output Format\n\n```json\n{\n  \"schema\": \"mnem.v1.branches\",\n  \"branches\": [\n    {\"name\": \"main\", \"head\": \"<commit-cid>\", \"is_current\": true},\n    ...\n  ]\n}\n```\n\n## Retrieval\n\n### retrieve\n\nSearches the graph for nodes matching a query.\n\n```bash\nmnem retrieve [OPTIONS] <QUERY>\n```\n\n| Option | Description | Default |\n|--------|-------------|---------|\n| `--top-k <N>` | Number of results to return | `10` |\n| `--max-tokens <N>` | Maximum tokens in response | `4096` |\n| `--include <FIELD>` | Fields to include (summary, context, props) | All |\n| `--format <FORMAT>` | Output format (`text`, `json`) | `text` |\n\n```bash\n# Examples:\nmnem retrieve \"query\"\nmnem retrieve --top-k 5 --max-tokens 2048 \"machine learning\"\n```\n\n## Integration\n\n### integrate\n\nIntegrates mnem system prompts with third-party AI tools.\n\n```bash\nmnem integrate <HOST> [OPTIONS]\n```\n\n#### Supported Hosts\n\n| Host | System Prompt Path |\n|------|-------------------|\n| `claude-code` | `~/.claude/CLAUDE.md` |\n| `gemini-cli` | `~/.gemini/GEMINI.md` |\n| `cursor` | `~/.cursor/rules/mnem.mdc` |\n| `continue` | `~/.continue/config.json` |\n| `zed` | `~/.config/zed/settings.json` (Linux) or `~/Library/Application Support/Zed/settings.json` (macOS) |\n\n#### integrate Options\n\n| Option | Description |\n|--------|-------------|\n| `--install` | Install system prompt to host |\n| `--uninstall` | Remove system prompt from host |\n| `--status` | Show integration status |\n\n```mermaid\ngraph LR\n    A[mnem integrate] --> B{Host Selection}\n    B --> C[Claude Code]\n    B --> D[Cursor]\n    B --> E[Continue]\n    B --> F[Zed]\n    B --> G[Gemini CLI]\n    \n    C --> H[Markdown Marker]\n    D --> H\n    E --> I[JSON Field: systemMessage]\n    F --> J[JSON Field: assistant.system_prompt]\n    G --> H\n```\n\n资料来源：[crates/mnem-cli/src/integrate.rs:1-60](https://github.com/Uranid/mnem/blob/main/crates/mnem-cli/src/integrate.rs)\n\n## Configuration\n\nThe CLI loads configuration from `~/.config/mnem/config.toml` or `.mnem/config.toml` in the repository root.\n\n| Setting | Description |\n|---------|-------------|\n| `user.name` | Author name for commits |\n| `user.email` | Author email for commits |\n| `user.agent_id` | Agent identifier fallback |\n| `llm.provider` | LLM provider (`ollama`, `openai`, `anthropic`) |\n| `llm.model` | Model name |\n| `llm.base_url` | API base URL (default: `http://localhost:11434`) |\n| `llm.timeout_secs` | Request timeout (default: `120`) |\n\n#### Author String Format\n\nThe author string for commits follows this precedence:\n1. `name <email>` if both present\n2. `name` if only name present\n3. `email` if only email present\n4. `agent_id` if only that present\n5. `mnem-cli` as fallback\n\n资料来源：[crates/mnem-cli/src/config.rs:1-80](https://github.com/Uranid/mnem/blob/main/crates/mnem-cli/src/config.rs)\n\n## Command Pipeline\n\nThe following diagram shows how commands interact with the repository:\n\n```mermaid\ngraph TD\n    subgraph \"CLI Layer\"\n        A[mnem CLI] --> B[Commands]\n        B --> C[Ingest]\n        B --> D[Add]\n        B --> E[Retrieve]\n        B --> F[Refs/Tags]\n    end\n    \n    subgraph \"Core Layer\"\n        C --> G[Ingester Pipeline]\n        G --> H[Parser]\n        H --> I[Chunker]\n        I --> J[Extractor]\n        J --> K[Transaction]\n        \n        D --> K\n        F --> K\n        E --> L[Retriever]\n    end\n    \n    subgraph \"Storage Layer\"\n        K --> M[Transaction]\n        M --> N[Blockstore]\n        M --> O[OpHeadsStore]\n        L --> P[VectorIndex]\n        P --> N\n    end\n```\n\n## Exit Codes\n\n| Code | Meaning |\n|------|---------|\n| `0` | Success |\n| `1` | General error |\n| `2` | Invalid arguments |\n| `3` | Repository not found |\n| `4` | Object not found |\n| `5` | Conflict detected |\n\n---\n\n<a id='page-graphrag'></a>\n\n## GraphRAG Implementation\n\n### 相关页面\n\n相关主题：[Hybrid Retrieval System](#page-hybrid-retrieval), [Core Components](#page-core-components)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [crates/mnem-graphrag/src/lib.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-graphrag/src/lib.rs)\n- [crates/mnem-graphrag/src/community.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-graphrag/src/community.rs)\n- [crates/mnem-graphrag/src/calibration.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-graphrag/src/calibration.rs)\n- [crates/mnem-graphrag/src/confidence.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-graphrag/src/confidence.rs)\n- [crates/mnem-graphrag/src/summarize.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-graphrag/src/summarize.rs)\n- [crates/mnem-core/src/retrieve/community_filter.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-core/src/retrieve/community_filter.rs)\n</details>\n\n# GraphRAG Implementation\n\nGraphRAG (Graph-based Retrieval Augmented Generation) is a hybrid retrieval approach that combines vector similarity search with graph-structured knowledge representation. In mnem, the GraphRAG implementation provides community-based entity extraction, confidence scoring, and intelligent graph traversal for enhanced context retrieval.\n\n## Overview\n\nThe mnem GraphRAG system operates as a layered architecture that:\n\n1. Extracts entities and relationships from ingested documents\n2. Builds a knowledge graph with typed edges and communities\n3. Enables community-aware retrieval that goes beyond simple vector similarity\n4. Provides confidence-calibrated results suitable for agentic workflows\n\nThe implementation lives in `crates/mnem-graphrag/` and integrates with the core retrieval pipeline in `crates/mnem-core/src/retrieve/`.\n\n## Core Components\n\n### Module Structure\n\n| Module | Purpose |\n|--------|---------|\n| `lib.rs` | Main entry point and public API exports |\n| `community.rs` | Community detection and hierarchy management |\n| `calibration.rs` | Confidence score calibration utilities |\n| `confidence.rs` | Confidence scoring algorithms |\n| `summarize.rs` | Community and entity summarization |\n\n### Community Detection (`community.rs`)\n\nCommunity detection partitions the knowledge graph into semantically coherent clusters. The implementation supports hierarchical community structures where:\n\n- **Leaf communities** contain tightly interconnected entities\n- **Parent communities** aggregate related sub-communities\n- **Cross-community edges** connect related concepts across boundaries\n\nCommunities are used during retrieval to:\n- Expand candidate sets by including related entities within the same community\n- Filter results to the most relevant community cluster\n- Enable \"zoom-in\" and \"zoom-out\" traversal patterns\n\n资料来源：[crates/mnem-graphrag/src/community.rs]()\n\n### Confidence Scoring (`confidence.rs`)\n\nEvery extracted entity and relationship receives a confidence score based on:\n\n- **Extraction evidence**: Frequency and clarity of mentions in source documents\n- **Graph connectivity**: Number and strength of edges connecting to other entities\n- **Source reliability**: Document-level trust signals from the ingest pipeline\n\nConfidence scores are normalized to a `[0.0, 1.0]` range and drive downstream filtering decisions.\n\n资料来源：[crates/mnem-graphrag/src/confidence.rs]()\n\n### Calibration (`calibration.rs`)\n\nCalibration ensures that confidence scores accurately reflect true extraction quality. The module provides:\n\n- **Score distribution analysis**: Histogram-based validation of score distributions\n- **Threshold tuning**: Per-use-case threshold adjustment for precision/recall tradeoffs\n- **Calibration curves**: Tools for evaluating score reliability\n\n资料来源：[crates/mnem-graphrag/src/calibration.rs]()\n\n### Summarization (`summarize.rs`)\n\nThe summarization module generates concise descriptions for:\n\n- **Individual entities**: One-sentence summaries capturing core identity\n- **Relationships**: Edge labels and descriptions explaining connections\n- **Communities**: Multi-sentence overviews of community purpose and membership\n\nSummaries are stored as `context_sentence` on Node objects, enabling contextual retrieval patterns described in the Anthropic Contextual Retrieval paper.\n\n资料来源：[crates/mnem-graphrag/src/summarize.rs]()\n\n## Retrieval Integration\n\n### Community Filter (`community_filter.rs`)\n\nThe retrieval pipeline integrates GraphRAG through the community filter stage. When enabled, the retriever:\n\n1. Identifies the community containing the top-scoring candidate\n2. Expands the candidate set to include other high-confidence entities in that community\n3. Re-ranks the expanded set using the configured reranker\n\n```mermaid\ngraph TD\n    A[Query Embedding] --> B[Vector Search]\n    B --> C[Initial Candidates]\n    C --> D[Community Detection]\n    D --> E[Community Expansion]\n    E --> F[Reranker]\n    F --> G[Final Results]\n```\n\n资料来源：[crates/mnem-core/src/retrieve/community_filter.rs]()\n\n### Retriever Configuration\n\nThe `Retriever` struct in `crates/mnem-core/src/retrieve/retriever.rs` exposes GraphRAG-related options:\n\n| Parameter | Type | Default | Description |\n|-----------|------|---------|-------------|\n| `graph_expand` | `Option<usize>` | `None` | Expansion radius for community-based retrieval |\n| `graph_decay` | `Option<f32>` | `None` | Decay factor for graph traversal weights |\n| `graph_depth` | `Option<usize>` | `None` | Maximum traversal depth |\n| `community_filter_enabled` | `bool` | `false` | Enable community-based filtering |\n| `ppr_size_gate` | `Option<usize>` | `None` | PPR personalization size threshold |\n\n### PPR-Based Expansion\n\nFor larger graphs, mnem supports Personalized PageRank (PPR) based expansion using the adjacency index:\n\n```rust\nadjacency_index: Option<Arc<dyn AdjacencyIndex + Send + Sync>>\n```\n\nWhen the adjacency index is available, PPR mode provides:\n- Personalized scoring based on seed nodes\n- Cohesive community member inclusion\n- Falls back to historical decay walk when index is unavailable\n\n资料来源：[crates/mnem-core/src/retrieve/retriever.rs:10-50]()\n\n## Data Flow\n\n### Ingest Pipeline to GraphRAG\n\n```mermaid\ngraph LR\n    A[Source File] --> B[Parser]\n    B --> C[Chunker]\n    C --> D[Entity Extractor]\n    D --> E[Graph Builder]\n    E --> F[Community Detection]\n    F --> G[Confidence Scoring]\n    G --> H[Committed Nodes/Edges]\n```\n\nThe ingest pipeline in `crates/mnem-ingest/src/pipeline.rs` coordinates:\n\n1. **Parsing**: Detect source type and extract raw content\n2. **Chunking**: Split into manageable units using auto-selected chunker\n3. **Extraction**: Rule-based or LLM-powered entity extraction\n4. **Graph building**: Create nodes and edges in the transaction\n5. **Commit**: Persist to the IPLD-based object store\n\n资料来源：[crates/mnem-ingest/src/pipeline.rs]()\n\n### Chunk Strategy by Source Type\n\n| Source Kind | Chunker | Tokens | Overlap |\n|-------------|---------|--------|---------|\n| Markdown | Paragraph | - | - |\n| Text | SentenceRecursive | 256 | 32 |\n| PDF | SentenceRecursive | 512 | 64 |\n| Conversation | Session | 10 messages | - |\n| Code | Structural | - | - |\n\n资料来源：[crates/mnem-ingest/src/chunk.rs]()\n\n## Configuration\n\n### TOML Configuration\n\n```toml\n[retrieve]\nlimit = 20              # Maximum results\nbudget = 8192           # Token budget\nvector_cap = 10         # Vector search candidates\ngraph_expand = 5        # Community expansion size\ngraph_depth = 3         # Traversal depth\nrerank_top_k = 5        # Final reranking pool\n\n[community]\nenabled = true          # Enable community filtering\nmin_community_size = 3  # Minimum entities per community\n```\n\n### CLI Configuration\n\n```bash\n# Ingest with community extraction\nmnem ingest --extractor keybert docs/\n\n# Retrieve with community expansion\nmnem retrieve \"query\" --graph-expand 10\n\n# Configure via config command\nmnem config set retrieve.graph_expand 5\n```\n\n资料来源：[crates/mnem-cli/src/config.rs]()\n\n## API Reference\n\n### Core Types\n\n#### `Community`\n\n```rust\npub struct Community {\n    pub id: CommunityId,\n    pub parent: Option<CommunityId>,\n    pub members: Vec<EntityId>,\n    pub summary: Option<String>,\n    pub depth: u32,\n}\n```\n\n#### `Entity`\n\n```rust\npub struct Entity {\n    pub id: EntityId,\n    pub ntype: String,\n    pub summary: Option<String>,\n    pub context_sentence: Option<String>,\n    pub confidence: f32,\n    pub community: Option<CommunityId>,\n}\n```\n\n### Public API (`lib.rs`)\n\n| Function | Signature | Description |\n|----------|-----------|-------------|\n| `detect_communities` | `(graph: &Graph) -> Vec<Community>` | Run community detection |\n| `score_entity` | `(entity: &Entity, graph: &Graph) -> f32` | Calculate confidence |\n| `calibrate_scores` | `(scores: Vec<f32>) -> Vec<f32>` | Apply calibration |\n| `summarize_community` | `(community: &Community) -> String` | Generate summary |\n| `expand_from_seed` | `(seed: &[NodeId], depth: usize) -> Vec<NodeId>` | Graph expansion |\n\n## Architecture Diagram\n\n```mermaid\ngraph TD\n    subgraph \"Ingest Layer\"\n        I1[Markdown]\n        I2[PDF]\n        I3[Code]\n        I4[Conversation]\n    end\n    \n    subgraph \"Extract Layer\"\n        E1[Rule Extractor]\n        E2[LLM Extractor]\n        E3[KeyBERT Adapter]\n    end\n    \n    subgraph \"Graph Layer\"\n        G1[Node Builder]\n        G2[Edge Builder]\n        G3[Community Detector]\n    end\n    \n    subgraph \"Score Layer\"\n        S1[Confidence Scorer]\n        S2[Calibrator]\n    end\n    \n    subgraph \"Retrieve Layer\"\n        R1[Vector Index]\n        R2[Community Filter]\n        R3[Reranker]\n    end\n    \n    I1 --> E1\n    I2 --> E2\n    I3 --> E1\n    I4 --> E3\n    \n    E1 --> G1\n    E2 --> G1\n    E3 --> G1\n    \n    G1 --> G2\n    G2 --> G3\n    \n    G3 --> S1\n    S1 --> S2\n    \n    S2 --> R1\n    R1 --> R2\n    R2 --> R3\n```\n\n## Experimental Features\n\n### E1: Community Expander\n\nExperiment E1 enables community-expansion during retrieval:\n\n- When `cfg.enabled` is `false` (default): Stage is a no-op\n- When enabled: Top-N seeds' communities pull in additional cohesive members\n- **Additive only**: Never drops existing candidates\n- Matrix v4 showed -29pp R@10 regression with the old drop-filter semantic\n\n### E2: PPR Graph Expansion\n\nExperiment E2 introduces Personalized PageRank for graph expansion:\n\n- Uses optional `AdjacencyIndex` for efficient neighborhood queries\n- Falls back to historical decay walk when index unavailable\n- Maintains byte-identical retrieval for default configuration\n\n资料来源：[crates/mnem-core/src/retrieve/retriever.rs]()\n\n## Warnings and Diagnostics\n\nThe retrieval system emits warnings when GraphRAG features encounter issues:\n\n| Warning Code | Feature | Description |\n|--------------|---------|-------------|\n| `community_filter` | Community Filter | No-op community filter triggered |\n| `graph_mode` | PPR | PPR ran without substrate graph |\n| `graph_expand` | Expansion | Authored adjacency list was empty |\n| `min_confidence` | Confidence | Results fell below confidence floor |\n| `warnings_truncated` | Diagnostics | Warning list was truncated |\n\n资料来源：[crates/mnem-core/src/retrieve/warnings.rs]()\n\n## Best Practices\n\n1. **Enable community filtering** for queries requiring holistic context\n2. **Tune `graph_expand`** based on graph density—larger graphs need smaller expansion radii\n3. **Calibrate confidence thresholds** per use case using the calibration module\n4. **Use structural chunking** for codebases to capture function-level entity granularity\n5. **Set `context_sentence`** on high-value nodes to improve contextual retrieval\n\n## See Also\n\n- [Retrieval System](../core/retrieve.md)\n- [Ingest Pipeline](../ingest/pipeline.md)\n- [Configuration Guide](../cli/config.md)\n- [Contextual Retrieval](../core/contextual_retrieval.md)\n\n---\n\n---\n\n## Doramagic 踩坑日志\n\n项目：Uranid/mnem\n\n摘要：发现 8 个潜在踩坑项，其中 1 个为 high/blocking；最高优先级：安全/权限坑 - 来源证据：[feature] hermes support。\n\n## 1. 安全/权限坑 · 来源证据：[feature] hermes support\n\n- 严重度：high\n- 证据强度：source_linked\n- 发现：GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：[feature] hermes support\n- 对用户的影响：可能影响授权、密钥配置或安全边界。\n- 建议检查：来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- 防护动作：不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- 证据：community_evidence:github | cevd_c54919b2b8b340438a9e5aa17291b93a | https://github.com/Uranid/mnem/issues/27 | 来源类型 github_issue 暴露的待验证使用条件。\n\n## 2. 能力坑 · 能力判断依赖假设\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：README/documentation is current enough for a first validation pass.\n- 对用户的影响：假设不成立时，用户拿不到承诺的能力。\n- 建议检查：将假设转成下游验证清单。\n- 防护动作：假设必须转成验证项；没有验证结果前不能写成事实。\n- 证据：capability.assumptions | github_repo:1221867246 | https://github.com/Uranid/mnem | README/documentation is current enough for a first validation pass.\n\n## 3. 维护坑 · 来源证据：[bug] Broken docs links: SPEC.md, ROADMAP.md, and Architecture page\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：GitHub 社区证据显示该项目存在一个维护/版本相关的待验证问题：[bug] Broken docs links: SPEC.md, ROADMAP.md, and Architecture page\n- 对用户的影响：可能增加新用户试用和生产接入成本。\n- 建议检查：来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- 防护动作：不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- 证据：community_evidence:github | cevd_5c74e7a10f774af6b0460b5da009d1b4 | https://github.com/Uranid/mnem/issues/23 | 来源讨论提到 windows 相关条件，需在安装/试用前复核。\n\n## 4. 维护坑 · 维护活跃度未知\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：未记录 last_activity_observed。\n- 对用户的影响：新项目、停更项目和活跃项目会被混在一起，推荐信任度下降。\n- 建议检查：补 GitHub 最近 commit、release、issue/PR 响应信号。\n- 防护动作：维护活跃度未知时，推荐强度不能标为高信任。\n- 证据：evidence.maintainer_signals | github_repo:1221867246 | https://github.com/Uranid/mnem | last_activity_observed missing\n\n## 5. 安全/权限坑 · 下游验证发现风险项\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：no_demo\n- 对用户的影响：下游已经要求复核，不能在页面中弱化。\n- 建议检查：进入安全/权限治理复核队列。\n- 防护动作：下游风险存在时必须保持 review/recommendation 降级。\n- 证据：downstream_validation.risk_items | github_repo:1221867246 | https://github.com/Uranid/mnem | no_demo; severity=medium\n\n## 6. 安全/权限坑 · 存在评分风险\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：no_demo\n- 对用户的影响：风险会影响是否适合普通用户安装。\n- 建议检查：把风险写入边界卡，并确认是否需要人工复核。\n- 防护动作：评分风险必须进入边界卡，不能只作为内部分数。\n- 证据：risks.scoring_risks | github_repo:1221867246 | https://github.com/Uranid/mnem | no_demo; severity=medium\n\n## 7. 维护坑 · issue/PR 响应质量未知\n\n- 严重度：low\n- 证据强度：source_linked\n- 发现：issue_or_pr_quality=unknown。\n- 对用户的影响：用户无法判断遇到问题后是否有人维护。\n- 建议检查：抽样最近 issue/PR，判断是否长期无人处理。\n- 防护动作：issue/PR 响应未知时，必须提示维护风险。\n- 证据：evidence.maintainer_signals | github_repo:1221867246 | https://github.com/Uranid/mnem | issue_or_pr_quality=unknown\n\n## 8. 维护坑 · 发布节奏不明确\n\n- 严重度：low\n- 证据强度：source_linked\n- 发现：release_recency=unknown。\n- 对用户的影响：安装命令和文档可能落后于代码，用户踩坑概率升高。\n- 建议检查：确认最近 release/tag 和 README 安装命令是否一致。\n- 防护动作：发布节奏未知或过期时，安装说明必须标注可能漂移。\n- 证据：evidence.maintainer_signals | github_repo:1221867246 | https://github.com/Uranid/mnem | release_recency=unknown\n\n<!-- canonical_name: Uranid/mnem; human_manual_source: deepwiki_human_wiki -->\n",
      "markdown_key": "mnem",
      "pages": "draft",
      "source_refs": [
        {
          "evidence_id": "github_repo:1221867246",
          "kind": "repo",
          "supports_claim_ids": [
            "claim_identity",
            "claim_distribution",
            "claim_capability"
          ],
          "url": "https://github.com/Uranid/mnem"
        },
        {
          "evidence_id": "art_019fe669d31c4feeba30b494eebe6096",
          "kind": "docs",
          "supports_claim_ids": [
            "claim_identity",
            "claim_distribution",
            "claim_capability"
          ],
          "url": "https://github.com/Uranid/mnem#readme"
        }
      ],
      "summary": "DeepWiki/Human Wiki 完整输出，末尾追加 Discovery Agent 踩坑日志。",
      "title": "mnem 说明书",
      "toc": [
        "https://github.com/Uranid/mnem 项目说明书",
        "目录",
        "Introduction to mnem",
        "Overview",
        "Architecture",
        "Core Data Model",
        "Document Ingestion Pipeline",
        "Retrieval System",
        "Doramagic 踩坑日志"
      ]
    }
  },
  "quality_gate": {
    "blocking_gaps": [],
    "category_confidence": "medium",
    "compile_status": "ready_for_review",
    "five_assets_present": true,
    "install_sandbox_verified": true,
    "missing_evidence": [],
    "next_action": "publish to Doramagic.ai project surfaces",
    "prompt_preview_boundary_ok": true,
    "publish_status": "publishable",
    "quick_start_verified": true,
    "repo_clone_verified": true,
    "repo_commit": "1dfa0d9e928a5da11c03eb0389b6d5ee7c8e7388",
    "repo_inspection_error": null,
    "repo_inspection_files": [
      "Dockerfile",
      "README.md",
      "docker-compose.yml",
      "docs/system-prompt.md",
      "docs/SPEC.md",
      "docs/book.toml",
      "docs/README.md",
      "docs/ROADMAP.md",
      "docs/src/SUMMARY.md",
      "docs/src/cli.md",
      "docs/src/quickstart.md",
      "docs/src/configuration.md",
      "docs/src/install.md",
      "docs/src/introduction.md",
      "docs/src/core-concepts.md",
      "docs/src/mcp.md",
      "docs/features/content-addressing.md",
      "docs/features/integrations.md",
      "docs/features/single-binary.md",
      "docs/features/token-transparency.md",
      "docs/features/hybrid-retrieval.md",
      "docs/features/versioned-memory.md",
      "docs/features/deterministic-ingest.md",
      "docs/features/providers.md",
      "docs/features/skills-graph.md",
      "docs/features/rich-ingest.md",
      "docs/features/benchmarks.md",
      "docs/features/wasm-edge.md",
      "docs/src/migrations/v0.x-to-v0.1.0.md",
      "docs/src/guides/ingest.md",
      "docs/src/guides/embed-providers.md",
      "docs/src/comparisons/graphify.md",
      "docs/src/comparisons/mem0.md",
      "docs/src/comparisons/letta.md",
      "docs/src/comparisons/mempalace.md",
      "docs/src/comparisons/supermemory.md",
      "docs/src/comparisons/README.md",
      "docs/src/comparisons/cognee.md",
      "docs/src/benchmarks/methodology.md",
      "docs/src/benchmarks/results.md"
    ],
    "repo_inspection_verified": true,
    "review_reasons": [
      "community_discussion_evidence_below_public_threshold"
    ],
    "tag_count_ok": true,
    "unsupported_claims": []
  },
  "schema_version": "0.1",
  "user_assets": {
    "ai_context_pack": {
      "asset_id": "ai_context_pack",
      "filename": "AI_CONTEXT_PACK.md",
      "markdown": "# mnem-cli - Doramagic AI Context Pack\n\n> 定位：安装前体验与判断资产。它帮助宿主 AI 有一个好的开始，但不代表已经安装、执行或验证目标项目。\n\n## 充分原则\n\n- **充分原则，不是压缩原则**：AI Context Pack 应该充分到让宿主 AI 在开工前理解项目价值、能力边界、使用入口、风险和证据来源；它可以分层组织，但不以最短摘要为目标。\n- **压缩策略**：只压缩噪声和重复内容，不压缩会影响判断和开工质量的上下文。\n\n## 给宿主 AI 的使用方式\n\n你正在读取 Doramagic 为 mnem-cli 编译的 AI Context Pack。请把它当作开工前上下文：帮助用户理解适合谁、能做什么、如何开始、哪些必须安装后验证、风险在哪里。不要声称你已经安装、运行或执行了目标项目。\n\n## Claim 消费规则\n\n- **事实来源**：Repo Evidence + Claim/Evidence Graph；Human Wiki 只提供显著性、术语和叙事结构。\n- **事实最低状态**：`supported`\n- `supported`：可以作为项目事实使用，但回答中必须引用 claim_id 和证据路径。\n- `weak`：只能作为低置信度线索，必须要求用户继续核实。\n- `inferred`：只能用于风险提示或待确认问题，不能包装成项目事实。\n- `unverified`：不得作为事实使用，应明确说证据不足。\n- `contradicted`：必须展示冲突来源，不得替用户强行选择一个版本。\n\n## 它最适合谁\n\n- **正在使用 Claude/Codex/Cursor/Gemini 等宿主 AI 的开发者**：README 或插件配置提到多个宿主 AI。 证据：`README.md` Claim：`clm_0002` supported 0.86\n\n## 它能做什么\n\n- **命令行启动或安装流程**（需要安装后验证）：项目文档中存在可执行命令，真实使用需要在本地或宿主环境中运行这些命令。 证据：`README.md`, `docs/src/install.md`, `docs/src/quickstart.md` Claim：`clm_0001` supported 0.86\n\n## 怎么开始\n\n- `pip install mnem-cli         # gives you the` 证据：`README.md` Claim：`clm_0003` supported 0.86\n- `pip install mnem-py           # Python library — import with` 证据：`README.md` Claim：`clm_0004` supported 0.86\n- `npm install -g mnem-cli` 证据：`README.md` Claim：`clm_0005` supported 0.86\n- `npx mnem-cli --version` 证据：`README.md` Claim：`clm_0006` supported 0.86\n- `pip install mnem-cli` 证据：`README.md` Claim：`clm_0003` supported 0.86, `clm_0007` supported 0.86\n- `git clone https://github.com/Uranid/mnem` 证据：`README.md` Claim：`clm_0008` supported 0.86\n- `pip install mnem-py                 # package name on PyPI; import name is pymnem` 证据：`README.md` Claim：`clm_0009` supported 0.86\n- `pip install sentence-transformers   # brings ~200 MB of deps (torch, transformers) — one-time download` 证据：`README.md` Claim：`clm_0010` supported 0.86\n- `curl -L https://github.com/Uranid/mnem/releases/latest/download/mnem-linux-x86_64.tar.gz | tar xz` 证据：`docs/src/install.md` Claim：`clm_0011` supported 0.86\n- `curl http://127.0.0.1:9876/v1/retrieve -d '{\"text\": \"what does this do\"}'` 证据：`docs/src/quickstart.md` Claim：`clm_0012` supported 0.86\n\n## 继续前判断卡\n\n- **当前建议**：需要管理员/安全审批\n- **为什么**：继续前可能涉及密钥、账号、外部服务或敏感上下文，建议先经过管理员或安全审批。\n\n### 30 秒判断\n\n- **现在怎么做**：需要管理员/安全审批\n- **最小安全下一步**：先跑 Prompt Preview；若涉及凭证或企业环境，先审批再试装\n- **先别相信**：角色质量和任务匹配不能直接相信。\n- **继续会触碰**：角色选择偏差、命令执行、本地环境或项目文件\n\n### 现在可以相信\n\n- **适合人群线索：正在使用 Claude/Codex/Cursor/Gemini 等宿主 AI 的开发者**（supported）：有 supported claim 或项目证据支撑，但仍不等于真实安装效果。 证据：`README.md` Claim：`clm_0002` supported 0.86\n- **能力存在：命令行启动或安装流程**（supported）：可以相信项目包含这类能力线索；是否适合你的具体任务仍要试用或安装后验证。 证据：`README.md`, `docs/src/install.md`, `docs/src/quickstart.md` Claim：`clm_0001` supported 0.86\n- **存在 Quick Start / 安装命令线索**（supported）：可以相信项目文档出现过启动或安装入口；不要因此直接在主力环境运行。 证据：`README.md` Claim：`clm_0003` supported 0.86\n\n### 现在还不能相信\n\n- **角色质量和任务匹配不能直接相信。**（unverified）：角色库证明有很多角色，不证明每个角色都适合你的具体任务，也不证明角色能产生高质量结果。\n- **不能把角色文案当成真实执行能力。**（unverified）：安装前只能判断角色描述和任务画像是否匹配，不能证明它能在宿主 AI 里完成任务。\n- **真实输出质量不能在安装前相信。**（unverified）：Prompt Preview 只能展示引导方式，不能证明真实项目中的结果质量。\n- **宿主 AI 版本兼容性不能在安装前相信。**（unverified）：Claude、Cursor、Codex、Gemini 等宿主加载规则和版本差异必须在真实环境验证。\n- **不会污染现有宿主 AI 行为，不能直接相信。**（inferred）：Skill、plugin、AGENTS/CLAUDE/GEMINI 指令可能改变宿主 AI 的默认行为。\n- **可安全回滚不能默认相信。**（unverified）：除非项目明确提供卸载和恢复说明，否则必须先在隔离环境验证。\n- **真实安装后是否与用户当前宿主 AI 版本兼容？**（unverified）：兼容性只能通过实际宿主环境验证。\n- **项目输出质量是否满足用户具体任务？**（unverified）：安装前预览只能展示流程和边界，不能替代真实评测。\n\n### 继续会触碰什么\n\n- **角色选择偏差**：用户对任务应该由哪个专家角色处理的判断。 原因：选错角色会让 AI 从错误专业视角回答，浪费时间或误导决策。\n- **命令执行**：包管理器、网络下载、本地插件目录、项目配置或用户主目录。 原因：运行第一条命令就可能产生环境改动；必须先判断是否值得跑。 证据：`README.md`, `docs/src/install.md`, `docs/src/quickstart.md`\n- **本地环境或项目文件**：安装结果、插件缓存、项目配置或本地依赖目录。 原因：安装前无法证明写入范围和回滚方式，需要隔离验证。 证据：`README.md`, `docs/src/install.md`, `docs/src/quickstart.md`\n- **环境变量 / API Key**：项目入口文档明确出现 API key、token、secret 或账号凭证配置。 原因：如果真实安装需要凭证，应先使用测试凭证并经过权限/合规判断。 证据：`README.es.md`, `README.md`, `README.zh-CN.md`\n- **宿主 AI 上下文**：AI Context Pack、Prompt Preview、Skill 路由、风险规则和项目事实。 原因：导入上下文会影响宿主 AI 后续判断，必须避免把未验证项包装成事实。\n\n### 最小安全下一步\n\n- **先跑 Prompt Preview**：先用交互式试用验证任务画像和角色匹配，不要先导入整套角色库。（适用：任何项目都适用，尤其是输出质量未知时。）\n- **只在隔离目录或测试账号试装**：避免安装命令污染主力宿主 AI、真实项目或用户主目录。（适用：存在命令执行、插件配置或本地写入线索时。）\n- **不要使用真实生产凭证**：环境变量/API key 一旦进入宿主或工具链，可能产生账号和合规风险。（适用：出现 API、TOKEN、KEY、SECRET 等环境线索时。）\n- **安装后只验证一个最小任务**：先验证加载、兼容、输出质量和回滚，再决定是否深用。（适用：准备从试用进入真实工作流时。）\n\n### 退出方式\n\n- **保留安装前状态**：记录原始宿主配置和项目状态，后续才能判断是否可恢复。\n- **保留原始角色选择记录**：如果输出偏题，可以回到任务画像阶段重新选择角色，而不是继续沿着错误角色推进。\n- **记录安装命令和写入路径**：没有明确卸载说明时，至少要知道哪些目录或配置需要手动清理。\n- **准备撤销测试 API key 或 token**：测试凭证泄露或误用时，可以快速止损。\n- **如果没有回滚路径，不进入主力环境**：不可回滚是继续前阻断项，不应靠信任或运气继续。\n\n## 哪些只能预览\n\n- 解释项目适合谁和能做什么\n- 基于项目文档演示典型对话流程\n- 帮助用户判断是否值得安装或继续研究\n\n## 哪些必须安装后验证\n\n- 真实安装 Skill、插件或 CLI\n- 执行脚本、修改本地文件或访问外部服务\n- 验证真实输出质量、性能和兼容性\n\n## 边界与风险判断卡\n\n- **把安装前预览误认为真实运行**：用户可能高估项目已经完成的配置、权限和兼容性验证。 处理方式：明确区分 prompt_preview_can_do 与 runtime_required。 Claim：`clm_0013` inferred 0.45\n- **命令执行会修改本地环境**：安装命令可能写入用户主目录、宿主插件目录或项目配置。 处理方式：先在隔离环境或测试账号中运行。 证据：`README.md`, `docs/src/install.md`, `docs/src/quickstart.md` Claim：`clm_0014` supported 0.86\n- **待确认**：真实安装后是否与用户当前宿主 AI 版本兼容？。原因：兼容性只能通过实际宿主环境验证。\n- **待确认**：项目输出质量是否满足用户具体任务？。原因：安装前预览只能展示流程和边界，不能替代真实评测。\n- **待确认**：安装命令是否需要网络、权限或全局写入？。原因：这影响企业环境和个人环境的安装风险。\n\n## 开工前工作上下文\n\n### 加载顺序\n\n- 先读取 how_to_use.host_ai_instruction，建立安装前判断资产的边界。\n- 读取 claim_graph_summary，确认事实来自 Claim/Evidence Graph，而不是 Human Wiki 叙事。\n- 再读取 intended_users、capabilities 和 quick_start_candidates，判断用户是否匹配。\n- 需要执行具体任务时，优先查 role_skill_index，再查 evidence_index。\n- 遇到真实安装、文件修改、网络访问、性能或兼容性问题时，转入 risk_card 和 boundaries.runtime_required。\n\n### 任务路由\n\n- **命令行启动或安装流程**：先说明这是安装后验证能力，再给出安装前检查清单。 边界：必须真实安装或运行后验证。 证据：`README.md`, `docs/src/install.md`, `docs/src/quickstart.md` Claim：`clm_0001` supported 0.86\n\n### 上下文规模\n\n- 文件总数：579\n- 重要文件覆盖：40/579\n- 证据索引条目：80\n- 角色 / Skill 条目：77\n\n### 证据不足时的处理\n\n- **missing_evidence**：说明证据不足，要求用户提供目标文件、README 段落或安装后验证记录；不要补全事实。\n- **out_of_scope_request**：说明该任务超出当前 AI Context Pack 证据范围，并建议用户先查看 Human Manual 或真实安装后验证。\n- **runtime_request**：给出安装前检查清单和命令来源，但不要替用户执行命令或声称已执行。\n- **source_conflict**：同时展示冲突来源，标记为待核实，不要强行选择一个版本。\n\n## Prompt Recipes\n\n### 适配判断\n\n- 目标：判断这个项目是否适合用户当前任务。\n- 预期输出：适配结论、关键理由、证据引用、安装前可预览内容、必须安装后验证内容、下一步建议。\n\n```text\n请基于 mnem-cli 的 AI Context Pack，先问我 3 个必要问题，然后判断它是否适合我的任务。回答必须包含：适合谁、能做什么、不能做什么、是否值得安装、证据来自哪里。所有项目事实必须引用 evidence_refs、source_paths 或 claim_id。\n```\n\n### 安装前体验\n\n- 目标：让用户在安装前感受核心工作流，同时避免把预览包装成真实能力或营销承诺。\n- 预期输出：一段带边界标签的体验剧本、安装后验证清单和谨慎建议；不含真实运行承诺或强营销表述。\n\n```text\n请把 mnem-cli 当作安装前体验资产，而不是已安装工具或真实运行环境。\n\n请严格输出四段：\n1. 先问我 3 个必要问题。\n2. 给出一段“体验剧本”：用 [安装前可预览]、[必须安装后验证]、[证据不足] 三种标签展示它可能如何引导工作流。\n3. 给出安装后验证清单：列出哪些能力只有真实安装、真实宿主加载、真实项目运行后才能确认。\n4. 给出谨慎建议：只能说“值得继续研究/试装”“先补充信息后再判断”或“不建议继续”，不得替项目背书。\n\n硬性边界：\n- 不要声称已经安装、运行、执行测试、修改文件或产生真实结果。\n- 不要写“自动适配”“确保通过”“完美适配”“强烈建议安装”等承诺性表达。\n- 如果描述安装后的工作方式，必须使用“如果安装成功且宿主正确加载 Skill，它可能会……”这种条件句。\n- 体验剧本只能写成“示例台词/假设流程”：使用“可能会询问/可能会建议/可能会展示”，不要写“已写入、已生成、已通过、正在运行、正在生成”。\n- Prompt Preview 不负责给安装命令；如用户准备试装，只能提示先阅读 Quick Start 和 Risk Card，并在隔离环境验证。\n- 所有项目事实必须来自 supported claim、evidence_refs 或 source_paths；inferred/unverified 只能作风险或待确认项。\n\n```\n\n### 角色 / Skill 选择\n\n- 目标：从项目里的角色或 Skill 中挑选最匹配的资产。\n- 预期输出：候选角色或 Skill 列表，每项包含适用场景、证据路径、风险边界和是否需要安装后验证。\n\n```text\n请读取 role_skill_index，根据我的目标任务推荐 3-5 个最相关的角色或 Skill。每个推荐都要说明适用场景、可能输出、风险边界和 evidence_refs。\n```\n\n### 风险预检\n\n- 目标：安装或引入前识别环境、权限、规则冲突和质量风险。\n- 预期输出：环境、权限、依赖、许可、宿主冲突、质量风险和未知项的检查清单。\n\n```text\n请基于 risk_card、boundaries 和 quick_start_candidates，给我一份安装前风险预检清单。不要替我执行命令，只说明我应该检查什么、为什么检查、失败会有什么影响。\n```\n\n### 宿主 AI 开工指令\n\n- 目标：把项目上下文转成一次对话开始前的宿主 AI 指令。\n- 预期输出：一段边界明确、证据引用明确、适合复制给宿主 AI 的开工前指令。\n\n```text\n请基于 mnem-cli 的 AI Context Pack，生成一段我可以粘贴给宿主 AI 的开工前指令。这段指令必须遵守 not_runtime=true，不能声称项目已经安装、运行或产生真实结果。\n```\n\n\n## 角色 / Skill 索引\n\n- 共索引 77 个角色 / Skill / 项目文档条目。\n\n- **mnem documentation**（project_doc）：- Source : src/ ./src/SUMMARY.md - Build : mdbook build docs/ output to docs/book/ - Online : rendered site link added post-launch 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/README.md`\n- **Comparisons**（project_doc）：How mnem stacks up against other agent-memory and knowledge-graph systems. Each comparison is honest: where they win, where mnem wins, when to pick which. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/src/comparisons/README.md`\n- **Install**（project_doc）：mnem ships a single mnem binary plus optional Python and HTTP daemons. Pick the source that matches your platform. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/src/install.md`\n- **The Problem**（project_doc）：! License: Apache-2.0 https://img.shields.io/badge/license-Apache--2.0-blue?style=for-the-badge LICENSE ! CI https://img.shields.io/github/actions/workflow/status/Uranid/mnem/ci.yml?style=for-the-badge&label=CI https://github.com/Uranid/mnem/actions/workflows/ci.yml ! crates.io https://img.shields.io/crates/v/mnem-cli?style=for-the-badge https://crates.io/crates/mnem-cli ! PyPI https://img.shields.io/pypi/v/mnem-cli… 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`README.md`\n- **Benchmarks**（project_doc）：Reproducible head-to-head numbers for mnem. Every number ships with the harness, the dataset, and the raw artifacts. If you can't reproduce a number, that's a bug. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`benchmarks/README.md`\n- **mnem fuzz harness**（project_doc）：Coverage-guided fuzz targets for mnem's highest-value parsers. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`fuzz/README.md`\n- **mnem brand assets**（project_doc）：Minimal, deliberately simple. Two colors, one geometric mark, one wordmark. Everything else is composition. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`assets/logo/README.md`\n- **mnem-ann**（project_doc）：Approximate-nearest-neighbour HNSW vector index for mnem. Feature-gated alternative to the built-in brute-force index in mnem-core . 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`crates/mnem-ann/README.md`\n- **mnem-backend-redb**（project_doc）：Production embedded-KV backend for mnem - redb-backed Blockstore and OpHeadsStore . 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`crates/mnem-backend-redb/README.md`\n- **mnem-bench**（project_doc）：Benchmark harness for mnem. Ships the runner, dataset cache, LongMemEval / LoCoMo / ConvoMem / MemBench / hybrid-v4 scorers, the cpu-local mnem adapter, the bundled ONNX MiniLM-L6-v2 embedder, and the mnem bench interactive TUI. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`crates/mnem-bench/README.md`\n- **mnem-cli**（project_doc）：Command-line interface for mnem - Git for AI Agent Knowledge. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`crates/mnem-cli/README.md`\n- **mnem-core-testutils**（project_doc）：Shared test fixtures for the mnem workspace. Internal use only; never published to crates.io publish = false in Cargo.toml . 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`crates/mnem-core-testutils/README.md`\n- **mnem-core**（project_doc）：Content-addressed versioned substrate for AI agent memory - the core of mnem. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`crates/mnem-core/README.md`\n- **mnem-embed-providers**（project_doc）：Embedding-provider adapters for mnem OpenAI, Ollama . Sync, TLS-via-rustls, tokio-free. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`crates/mnem-embed-providers/README.md`\n- **mnem-extract**（project_doc）：Extraction strategies for mnem ingest pipelines. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`crates/mnem-extract/README.md`\n- **mnem-graphrag**（project_doc）：Extractive community summarization: Centroid + MMR . 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`crates/mnem-graphrag/README.md`\n- **mnem http**（project_doc）：HTTP JSON API for mnem - REST surface over the core repo operations. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`crates/mnem-http/README.md`\n- **mnem-ingest**（project_doc）：Ingest pipeline for mnem : converts external source artifacts Markdown, plain text, and - in later sub-waves - PDFs and chat conversations into content-addressed memory graphs. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`crates/mnem-ingest/README.md`\n- **mnem-llm-providers**（project_doc）：Text-generation adapters for mnem OpenAI chat, Ollama chat for HyDE, multi-query, and future LLM-in-the-loop features. Sync, TLS-via-rustls, tokio-free. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`crates/mnem-llm-providers/README.md`\n- **mnem mcp**（project_doc）：Model Context Protocol server for mnem - the AI-native, local-first memory substrate for agents. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`crates/mnem-mcp/README.md`\n- **mnem-ner-providers**（project_doc）：NER provider adapters for mnem. Ships RuleNer heuristic, zero-dependency and NullNer . 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`crates/mnem-ner-providers/README.md`\n- **mnem Python**（project_doc）：Python bindings for mnem https://github.com/Uranid/mnem - Git for AI Agent Knowledge. A persistent, versioned knowledge layer for AI agents. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`crates/mnem-py/README.md`\n- **mnem-rerank-providers**（project_doc）：Cross-encoder reranker adapters for mnem Cohere, Voyage, Jina . Sync, TLS-via-rustls, tokio-free. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`crates/mnem-rerank-providers/README.md`\n- **mnem-sparse-providers**（project_doc）：Learned-sparse encoder adapters for mnem SPLADE, BGE-M3-sparse, opensearch-doc-v3-distill . Sync, TLS-via-rustls, tokio-free. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`crates/mnem-sparse-providers/README.md`\n- **mnem-transport**（project_doc）：Offline transport for mnem: CAR v1 export/import of content-addressed subtrees, plus the frozen shapes of the remote wire protocol. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`crates/mnem-transport/README.md`\n- **mnem-cli**（project_doc）：Git for AI Agent Knowledge. A persistent, versioned knowledge layer for AI agents. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`npm-packages/mnem-cli/README.md`\n- **mnem-cli pip**（project_doc）：On first run mnem downloads the correct prebuilt binary for your platform from the GitHub release https://github.com/Uranid/mnem/releases and caches it in ~/.mnem cli/ . Subsequent calls run the cached binary directly. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`py-packages/mnem-cli/README.md`\n- **Contributing to mnem**（project_doc）：Thanks for your interest. mnem is an early-stage project, and the format spec is deliberately locked down - the whole value proposition is that two independent implementations produce byte-identical objects. This guide exists to keep contributions aligned with that bar. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`CONTRIBUTING.md`\n- **mnem Roadmap**（project_doc）：Planned work is tracked in GitHub Issues https://github.com/Uranid/mnem/issues and labelled by phase. The rough arc: 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/ROADMAP.md`\n- **mnem Format Specification**（project_doc）：Version: 0.1 Status: canonical - implementations MUST conform. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/SPEC.md`\n- **mnem system prompt**（project_doc）：This is the recommended system prompt to add to your agent host Claude Desktop, Claude Code, Cursor, Continue, Zed, Gemini CLI, ... so the LLM uses mnem transparently on every turn - without the user ever having to mention mnem. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/system-prompt.md`\n- **Benchmarks**（project_doc）：mnem is measured head-to-head against mem0 and MemPalace on six public datasets. All numbers are reproducible with the shipped harness. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/features/benchmarks.md`\n- **Content Addressing**（project_doc）：Every object in mnem - nodes, edges, commits, tree chunks - has an address derived from its content. The same bytes always produce the same address CID , on any machine. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/features/content-addressing.md`\n- **Deterministic Ingest**（project_doc）：mnem ingests documents without an LLM. The same bytes always produce the same nodes, on any machine. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/features/deterministic-ingest.md`\n- **Hybrid Retrieval**（project_doc）：mnem retrieves across three lanes simultaneously and fuses the results: 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/features/hybrid-retrieval.md`\n- **Integrations**（project_doc）：mnem exposes the same retrieval engine through four interfaces. Pick whichever fits your stack - they all hit the same graph. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/features/integrations.md`\n- **Provider Configuration**（project_doc）：mnem ships with a bundled ONNX MiniLM-L6-v2 embedder that runs in-process. No Ollama, no API keys, no configuration needed to get started. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/features/providers.md`\n- **Rich Ingest Pipeline**（project_doc）：mnem produces semantically coherent chunks from any source file - not arbitrary text windows. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/features/rich-ingest.md`\n- **Single Binary**（project_doc）：The mnem binary is ~40 MB and self-contained. There is no daemon to start, no cloud account to create, no database server to manage. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/features/single-binary.md`\n- **Skills Graph**（project_doc）：Agent conventions today live in flat files - .cursorrules , AGENTS.md , CLAUDE.md . They're useful, but they have no structure: you can't query them, diff them against a teammate's, branch them for an experiment, or merge two versions together. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/features/skills-graph.md`\n- **Token Transparency**（project_doc）：Every mnem retrieve response includes three counters that no other agent-memory system exposes: 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/features/token-transparency.md`\n- **Versioned Memory**（project_doc）：mnem treats every write as a commit - the same way git treats every save as a snapshot. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/features/versioned-memory.md`\n- **WASM and Edge Deployment**（project_doc）：mnem-core - the retrieval and storage engine - has no tokio, no filesystem calls, and no network dependencies. The same code that runs on your laptop compiles unchanged to wasm32 . 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/features/wasm-edge.md`\n- **v0.3 to v0.4 migration guide**（project_doc）：Dense embedding vectors are no longer stored inline on Node bytes. They now live in a per-commit Prolly sidecar tree Commit.embeddings , keyed by NodeCid . This makes NodeCid byte-identical across machines regardless of embedder thread counts or floating-point reduction order. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/guide/migrations/0.3-to-0.4.md`\n- **v0.4 to v0.5 migration guide**（project_doc）：Sparse embedding vectors SPLADE, BGE-M3-sparse, etc. are no longer stored inline on Node bytes. They now live in a per-commit Prolly sidecar tree Commit.sparse , keyed by NodeCid . This mirrors the G16 change that moved dense embeddings to Commit.embeddings . 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/guide/migrations/0.4-to-0.5.md`\n- **Summary**（project_doc）：- Install ./install.md - Quickstart ./quickstart.md 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/src/SUMMARY.md`\n- **Methodology**（project_doc）：Every published number ships with the harness, the dataset hash, and the raw artifacts. If you cannot reproduce a number, that is a bug. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/src/benchmarks/methodology.md`\n- **Reproduce**（project_doc）：End-to-end recipe to regenerate the 0.1.0 benchmark numbers locally. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/src/benchmarks/reproduce.md`\n- **Results**（project_doc）：mnem vs MemPalace published numbers. Dense retrieval vector + top-k . No LLM rerank. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/src/benchmarks/results.md`\n- **Run benchmarks locally with mnem bench**（project_doc）：Run benchmarks locally with mnem bench 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/src/benchmarks/run-locally.md`\n- **CLI reference**（project_doc）：mnem is the single entry point. Subcommands wrap repo operations. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/src/cli.md`\n- **mnem vs Cognee**（project_doc）：Cognee: \"Knowledge Engine for AI Agent Memory in 6 lines of code\" repo description, topoteretes/cognee mnem: a content-addressed, versioned graph substrate that ingests without an LLM. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/src/comparisons/cognee.md`\n- **mnem vs graphify**（project_doc）：graphify: \"AI coding assistant skill Claude Code, Codex, OpenCode, ... . Turn any folder of code, docs, papers, images, or videos into a queryable knowledge graph.\" repo description, safishamsi/graphify mnem: a content-addressed graph substrate. graphify builds a graph from a folder; mnem is the graph. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/src/comparisons/graphify.md`\n- **mnem vs Letta**（project_doc）：Letta: \"Letta is the platform for building stateful agents: AI with advanced memory that can learn and self-improve over time.\" repo description, letta-ai/letta mnem: a content-addressed graph substrate that stores the memory an agent uses, without assuming the agent. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/src/comparisons/letta.md`\n- **mnem vs mem0**（project_doc）：mem0: \"Universal memory layer for AI Agents\" repo description, mem0ai/mem0 mnem: a content-addressed, versioned graph substrate underneath the memory layer. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/src/comparisons/mem0.md`\n- **mnem vs MemPalace**（project_doc）：MemPalace: \"The best-benchmarked open-source AI memory system. And it's free.\" repo description, MemPalace/mempalace mnem: a content-addressed, versioned graph substrate that shares MemPalace's no-LLM-on-write philosophy and pushes further on identity and history. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/src/comparisons/mempalace.md`\n- **mnem vs Supermemory**（project_doc）：Supermemory: \"Memory engine and app that is extremely fast, scalable. The Memory API for the AI era.\" repo description, supermemoryai/supermemory mnem: an open-source, embedded, content-addressed knowledge-graph substrate. Self-host or nothing. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/src/comparisons/supermemory.md`\n- **Configuration**（project_doc）：mnem reads config from three sources, in priority order: 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/src/configuration.md`\n- **Core concepts**（project_doc）：Three primitives. Everything else is composed from these. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/src/core-concepts.md`\n- **Embedding providers**（project_doc）：mnem decouples embedder from store. Switch providers without re-ingesting. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/src/guides/embed-providers.md`\n- **Ingest pipeline**（project_doc）：mnem ingest is the only path content takes into the graph. The pipeline: 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/src/guides/ingest.md`\n- **Introduction**（project_doc）：mnem is a knowledge-graph substrate. It stores nodes as content-addressed objects, retrieves them with vector + sparse + graph signals, and exposes the result over CLI, HTTP, and MCP surfaces. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/src/introduction.md`\n- **MCP server**（project_doc）：mnem implements the Model Context Protocol https://modelcontextprotocol.io over stdio. Drop it into any MCP client Claude Desktop, Cursor, Zed, custom . 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/src/mcp.md`\n- **Migrating to 0.1.0**（project_doc）：0.1.0 is the first public release. There is no prior public version; this document exists for completeness and to fix the upgrade pathway forward. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/src/migrations/v0.x-to-v0.1.0.md`\n- **Quickstart**（project_doc）：Five minutes from zero to retrieve. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/src/quickstart.md`\n- **What this PR does**（project_doc）：- Bug fix non-breaking - New feature non-breaking - Breaking change API or wire-format - Documentation only - CI / tooling only - Refactor no behavior change 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`.github/PULL_REQUEST_TEMPLATE.md`\n- **Changelog**（project_doc）：Initial public release. Versioned, mergeable, content-addressed knowledge graph for AI agent memory. Local-first, Apache-2.0. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`CHANGELOG.md`\n- **Contributor Covenant Code of Conduct**（project_doc）：Contributor Covenant Code of Conduct 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`CODE_OF_CONDUCT.md`\n- **El problema**（project_doc）：! License: Apache-2.0 https://img.shields.io/badge/license-Apache--2.0-blue?style=for-the-badge LICENSE ! CI https://img.shields.io/github/actions/workflow/status/Uranid/mnem/ci.yml?style=for-the-badge&label=CI https://github.com/Uranid/mnem/actions/workflows/ci.yml ! crates.io https://img.shields.io/crates/v/mnem-cli?style=for-the-badge https://crates.io/crates/mnem-cli ! PyPI https://img.shields.io/pypi/v/mnem-cli… 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`README.es.md`\n- **问题所在**（project_doc）：! License: Apache-2.0 https://img.shields.io/badge/license-Apache--2.0-blue?style=for-the-badge LICENSE ! CI https://img.shields.io/github/actions/workflow/status/Uranid/mnem/ci.yml?style=for-the-badge&label=CI https://github.com/Uranid/mnem/actions/workflows/ci.yml ! crates.io https://img.shields.io/crates/v/mnem-cli?style=for-the-badge https://crates.io/crates/mnem-cli ! PyPI https://img.shields.io/pypi/v/mnem-cli… 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`README.zh-CN.md`\n- **Security policy**（project_doc）：Pre-launch baseline. Only 0.1.x on main receives fixes; older in-development versions are unsupported. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`SECURITY.md`\n- **Fuzz crash-regression policy**（project_doc）：When a fuzz run hits a crash locally or in nightly CI , the input must land as a permanent regression seed so the same shape can never re-crash silently. The flow is short and mechanical. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`fuzz/REGRESSIONS.md`\n- **Question**（project_doc）：<!-- Before opening: please check docs/SPEC.md the format spec is normative - your question is often answered there docs/ROADMAP.md if you're asking \"when will X happen\" 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`.github/ISSUE_TEMPLATE/question.md`\n- **ConvoMem**（project_doc）：5 categories × 50 items = 250 items total. Salesforce/ConvoMem dataset. Substring-match recall gold evidence text in retrieved item text . 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`benchmarks/results/analysis/convomem.md`\n- **FinanceBench**（project_doc）：150 questions, 168 unique evidence passages from financebench open source.jsonl Patronus AI 2024 open-source split, SEC 10-K/10-Q filings . 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`benchmarks/results/analysis/financebench.md`\n- **LoCoMo**（project_doc）：1986 questions across 10 conversations on locomo10.json snap-research/LoCoMo . Session-level granularity: score session id of evidence vs retrieved turn session id. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`benchmarks/results/analysis/locomo.md`\n- **LongMemEval**（project_doc）：500 questions on longmemeval s cleaned.json xiaowu0162/longmemeval-cleaned . Session-level retrieval: aggregate turn hits to session via MAX score. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`benchmarks/results/analysis/longmemeval.md`\n\n## 证据索引\n\n- 共索引 80 条证据。\n\n- **mnem documentation**（documentation）：- Source : src/ ./src/SUMMARY.md - Build : mdbook build docs/ output to docs/book/ - Online : rendered site link added post-launch 证据：`docs/README.md`\n- **Comparisons**（documentation）：How mnem stacks up against other agent-memory and knowledge-graph systems. Each comparison is honest: where they win, where mnem wins, when to pick which. 证据：`docs/src/comparisons/README.md`\n- **Install**（documentation）：mnem ships a single mnem binary plus optional Python and HTTP daemons. Pick the source that matches your platform. 证据：`docs/src/install.md`\n- **The Problem**（documentation）：! License: Apache-2.0 https://img.shields.io/badge/license-Apache--2.0-blue?style=for-the-badge LICENSE ! CI https://img.shields.io/github/actions/workflow/status/Uranid/mnem/ci.yml?style=for-the-badge&label=CI https://github.com/Uranid/mnem/actions/workflows/ci.yml ! crates.io https://img.shields.io/crates/v/mnem-cli?style=for-the-badge https://crates.io/crates/mnem-cli ! PyPI https://img.shields.io/pypi/v/mnem-cli?style=for-the-badge https://pypi.org/project/mnem-cli/ ! npm https://img.shields.io/npm/v/mnem-cli?style=for-the-badge https://www.npmjs.com/package/mnem-cli ! MSRV 1.95 https://img.shields.io/badge/MSRV-1.95-orange?style=for-the-badge rust-toolchain.toml ! Runs on Linux macOS W… 证据：`README.md`\n- **Benchmarks**（documentation）：Reproducible head-to-head numbers for mnem. Every number ships with the harness, the dataset, and the raw artifacts. If you can't reproduce a number, that's a bug. 证据：`benchmarks/README.md`\n- **mnem fuzz harness**（documentation）：Coverage-guided fuzz targets for mnem's highest-value parsers. 证据：`fuzz/README.md`\n- **mnem brand assets**（documentation）：Minimal, deliberately simple. Two colors, one geometric mark, one wordmark. Everything else is composition. 证据：`assets/logo/README.md`\n- **mnem-ann**（documentation）：Approximate-nearest-neighbour HNSW vector index for mnem. Feature-gated alternative to the built-in brute-force index in mnem-core . 证据：`crates/mnem-ann/README.md`\n- **mnem-backend-redb**（documentation）：Production embedded-KV backend for mnem - redb-backed Blockstore and OpHeadsStore . 证据：`crates/mnem-backend-redb/README.md`\n- **mnem-bench**（documentation）：Benchmark harness for mnem. Ships the runner, dataset cache, LongMemEval / LoCoMo / ConvoMem / MemBench / hybrid-v4 scorers, the cpu-local mnem adapter, the bundled ONNX MiniLM-L6-v2 embedder, and the mnem bench interactive TUI. 证据：`crates/mnem-bench/README.md`\n- **mnem-cli**（documentation）：Command-line interface for mnem - Git for AI Agent Knowledge. 证据：`crates/mnem-cli/README.md`\n- **mnem-core-testutils**（documentation）：Shared test fixtures for the mnem workspace. Internal use only; never published to crates.io publish = false in Cargo.toml . 证据：`crates/mnem-core-testutils/README.md`\n- **mnem-core**（documentation）：Content-addressed versioned substrate for AI agent memory - the core of mnem. 证据：`crates/mnem-core/README.md`\n- **mnem-embed-providers**（documentation）：Embedding-provider adapters for mnem OpenAI, Ollama . Sync, TLS-via-rustls, tokio-free. 证据：`crates/mnem-embed-providers/README.md`\n- **mnem-extract**（documentation）：Extraction strategies for mnem ingest pipelines. 证据：`crates/mnem-extract/README.md`\n- **mnem-graphrag**（documentation）：Extractive community summarization: Centroid + MMR . 证据：`crates/mnem-graphrag/README.md`\n- **mnem http**（documentation）：HTTP JSON API for mnem - REST surface over the core repo operations. 证据：`crates/mnem-http/README.md`\n- **mnem-ingest**（documentation）：Ingest pipeline for mnem : converts external source artifacts Markdown, plain text, and - in later sub-waves - PDFs and chat conversations into content-addressed memory graphs. 证据：`crates/mnem-ingest/README.md`\n- **mnem-llm-providers**（documentation）：Text-generation adapters for mnem OpenAI chat, Ollama chat for HyDE, multi-query, and future LLM-in-the-loop features. Sync, TLS-via-rustls, tokio-free. 证据：`crates/mnem-llm-providers/README.md`\n- **mnem mcp**（documentation）：Model Context Protocol server for mnem - the AI-native, local-first memory substrate for agents. 证据：`crates/mnem-mcp/README.md`\n- **mnem-ner-providers**（documentation）：NER provider adapters for mnem. Ships RuleNer heuristic, zero-dependency and NullNer . 证据：`crates/mnem-ner-providers/README.md`\n- **mnem Python**（documentation）：Python bindings for mnem https://github.com/Uranid/mnem - Git for AI Agent Knowledge. A persistent, versioned knowledge layer for AI agents. 证据：`crates/mnem-py/README.md`\n- **mnem-rerank-providers**（documentation）：Cross-encoder reranker adapters for mnem Cohere, Voyage, Jina . Sync, TLS-via-rustls, tokio-free. 证据：`crates/mnem-rerank-providers/README.md`\n- **mnem-sparse-providers**（documentation）：Learned-sparse encoder adapters for mnem SPLADE, BGE-M3-sparse, opensearch-doc-v3-distill . Sync, TLS-via-rustls, tokio-free. 证据：`crates/mnem-sparse-providers/README.md`\n- **mnem-transport**（documentation）：Offline transport for mnem: CAR v1 export/import of content-addressed subtrees, plus the frozen shapes of the remote wire protocol. 证据：`crates/mnem-transport/README.md`\n- **mnem-cli**（documentation）：Git for AI Agent Knowledge. A persistent, versioned knowledge layer for AI agents. 证据：`npm-packages/mnem-cli/README.md`\n- **mnem-cli pip**（documentation）：On first run mnem downloads the correct prebuilt binary for your platform from the GitHub release https://github.com/Uranid/mnem/releases and caches it in ~/.mnem cli/ . Subsequent calls run the cached binary directly. 证据：`py-packages/mnem-cli/README.md`\n- **Contributing to mnem**（documentation）：Thanks for your interest. mnem is an early-stage project, and the format spec is deliberately locked down - the whole value proposition is that two independent implementations produce byte-identical objects. This guide exists to keep contributions aligned with that bar. 证据：`CONTRIBUTING.md`\n- **Package**（package_manifest）：{ \"name\": \"mnem-cli\", \"version\": \"0.1.6\", \"description\": \"Git for knowledge graphs - versioned, content-addressed, embeddable agent memory\", \"author\": \"mnem contributors\", \"license\": \"Apache-2.0\", \"homepage\": \"https://github.com/Uranid/mnem readme\", \"repository\": { \"type\": \"git\", \"url\": \"git+https://github.com/Uranid/mnem.git\" }, \"keywords\": \"knowledge-graph\", \"memory\", \"agent\", \"rag\", \"embeddings\", \"mcp\", \"ai\" , \"bin\": { \"mnem\": \"bin/mnem.js\" }, \"os\": \"darwin\", \"linux\", \"win32\" , \"cpu\": \"x64\", \"arm64\" , \"publishConfig\": { \"access\": \"public\", \"registry\": \"https://registry.npmjs.org/\" }, \"files\": \"bin/mnem.js\", \"scripts/\", \"README.md\", \"LICENSE\" , \"scripts\": { \"postinstall\": \"node scripts/po… 证据：`npm-packages/mnem-cli/package.json`\n- **License**（source_file）：Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ 证据：`LICENSE`\n- **License**（source_file）：Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ 证据：`npm-packages/mnem-cli/LICENSE`\n- **mnem Roadmap**（documentation）：Planned work is tracked in GitHub Issues https://github.com/Uranid/mnem/issues and labelled by phase. The rough arc: 证据：`docs/ROADMAP.md`\n- **mnem Format Specification**（documentation）：Version: 0.1 Status: canonical - implementations MUST conform. 证据：`docs/SPEC.md`\n- **mnem system prompt**（documentation）：This is the recommended system prompt to add to your agent host Claude Desktop, Claude Code, Cursor, Continue, Zed, Gemini CLI, ... so the LLM uses mnem transparently on every turn - without the user ever having to mention mnem. 证据：`docs/system-prompt.md`\n- **Benchmarks**（documentation）：mnem is measured head-to-head against mem0 and MemPalace on six public datasets. All numbers are reproducible with the shipped harness. 证据：`docs/features/benchmarks.md`\n- **Content Addressing**（documentation）：Every object in mnem - nodes, edges, commits, tree chunks - has an address derived from its content. The same bytes always produce the same address CID , on any machine. 证据：`docs/features/content-addressing.md`\n- **Deterministic Ingest**（documentation）：mnem ingests documents without an LLM. The same bytes always produce the same nodes, on any machine. 证据：`docs/features/deterministic-ingest.md`\n- **Hybrid Retrieval**（documentation）：mnem retrieves across three lanes simultaneously and fuses the results: 证据：`docs/features/hybrid-retrieval.md`\n- **Integrations**（documentation）：mnem exposes the same retrieval engine through four interfaces. Pick whichever fits your stack - they all hit the same graph. 证据：`docs/features/integrations.md`\n- **Provider Configuration**（documentation）：mnem ships with a bundled ONNX MiniLM-L6-v2 embedder that runs in-process. No Ollama, no API keys, no configuration needed to get started. 证据：`docs/features/providers.md`\n- **Rich Ingest Pipeline**（documentation）：mnem produces semantically coherent chunks from any source file - not arbitrary text windows. 证据：`docs/features/rich-ingest.md`\n- **Single Binary**（documentation）：The mnem binary is ~40 MB and self-contained. There is no daemon to start, no cloud account to create, no database server to manage. 证据：`docs/features/single-binary.md`\n- **Skills Graph**（documentation）：Agent conventions today live in flat files - .cursorrules , AGENTS.md , CLAUDE.md . They're useful, but they have no structure: you can't query them, diff them against a teammate's, branch them for an experiment, or merge two versions together. 证据：`docs/features/skills-graph.md`\n- **Token Transparency**（documentation）：Every mnem retrieve response includes three counters that no other agent-memory system exposes: 证据：`docs/features/token-transparency.md`\n- **Versioned Memory**（documentation）：mnem treats every write as a commit - the same way git treats every save as a snapshot. 证据：`docs/features/versioned-memory.md`\n- **WASM and Edge Deployment**（documentation）：mnem-core - the retrieval and storage engine - has no tokio, no filesystem calls, and no network dependencies. The same code that runs on your laptop compiles unchanged to wasm32 . 证据：`docs/features/wasm-edge.md`\n- **v0.3 to v0.4 migration guide**（documentation）：Dense embedding vectors are no longer stored inline on Node bytes. They now live in a per-commit Prolly sidecar tree Commit.embeddings , keyed by NodeCid . This makes NodeCid byte-identical across machines regardless of embedder thread counts or floating-point reduction order. 证据：`docs/guide/migrations/0.3-to-0.4.md`\n- **v0.4 to v0.5 migration guide**（documentation）：Sparse embedding vectors SPLADE, BGE-M3-sparse, etc. are no longer stored inline on Node bytes. They now live in a per-commit Prolly sidecar tree Commit.sparse , keyed by NodeCid . This mirrors the G16 change that moved dense embeddings to Commit.embeddings . 证据：`docs/guide/migrations/0.4-to-0.5.md`\n- **Summary**（documentation）：- Install ./install.md - Quickstart ./quickstart.md 证据：`docs/src/SUMMARY.md`\n- **Methodology**（documentation）：Every published number ships with the harness, the dataset hash, and the raw artifacts. If you cannot reproduce a number, that is a bug. 证据：`docs/src/benchmarks/methodology.md`\n- **Reproduce**（documentation）：End-to-end recipe to regenerate the 0.1.0 benchmark numbers locally. 证据：`docs/src/benchmarks/reproduce.md`\n- **Results**（documentation）：mnem vs MemPalace published numbers. Dense retrieval vector + top-k . No LLM rerank. 证据：`docs/src/benchmarks/results.md`\n- **Run benchmarks locally with mnem bench**（documentation）：Run benchmarks locally with mnem bench 证据：`docs/src/benchmarks/run-locally.md`\n- **CLI reference**（documentation）：mnem is the single entry point. Subcommands wrap repo operations. 证据：`docs/src/cli.md`\n- **mnem vs Cognee**（documentation）：Cognee: \"Knowledge Engine for AI Agent Memory in 6 lines of code\" repo description, topoteretes/cognee mnem: a content-addressed, versioned graph substrate that ingests without an LLM. 证据：`docs/src/comparisons/cognee.md`\n- **mnem vs graphify**（documentation）：graphify: \"AI coding assistant skill Claude Code, Codex, OpenCode, ... . Turn any folder of code, docs, papers, images, or videos into a queryable knowledge graph.\" repo description, safishamsi/graphify mnem: a content-addressed graph substrate. graphify builds a graph from a folder; mnem is the graph. 证据：`docs/src/comparisons/graphify.md`\n- **mnem vs Letta**（documentation）：Letta: \"Letta is the platform for building stateful agents: AI with advanced memory that can learn and self-improve over time.\" repo description, letta-ai/letta mnem: a content-addressed graph substrate that stores the memory an agent uses, without assuming the agent. 证据：`docs/src/comparisons/letta.md`\n- **mnem vs mem0**（documentation）：mem0: \"Universal memory layer for AI Agents\" repo description, mem0ai/mem0 mnem: a content-addressed, versioned graph substrate underneath the memory layer. 证据：`docs/src/comparisons/mem0.md`\n- **mnem vs MemPalace**（documentation）：MemPalace: \"The best-benchmarked open-source AI memory system. And it's free.\" repo description, MemPalace/mempalace mnem: a content-addressed, versioned graph substrate that shares MemPalace's no-LLM-on-write philosophy and pushes further on identity and history. 证据：`docs/src/comparisons/mempalace.md`\n- **mnem vs Supermemory**（documentation）：Supermemory: \"Memory engine and app that is extremely fast, scalable. The Memory API for the AI era.\" repo description, supermemoryai/supermemory mnem: an open-source, embedded, content-addressed knowledge-graph substrate. Self-host or nothing. 证据：`docs/src/comparisons/supermemory.md`\n- 其余 20 条证据见 `AI_CONTEXT_PACK.json` 或 `EVIDENCE_INDEX.json`。\n\n## 宿主 AI 必须遵守的规则\n\n- **把本资产当作开工前上下文，而不是运行环境。**：AI Context Pack 只包含证据化项目理解，不包含目标项目的可执行状态。 证据：`docs/README.md`, `docs/src/comparisons/README.md`, `docs/src/install.md`\n- **回答用户时区分可预览内容与必须安装后才能验证的内容。**：安装前体验的消费者价值来自降低误装和误判，而不是伪装成真实运行。 证据：`docs/README.md`, `docs/src/comparisons/README.md`, `docs/src/install.md`\n\n## 用户开工前应该回答的问题\n\n- 你准备在哪个宿主 AI 或本地环境中使用它？\n- 你只是想先体验工作流，还是准备真实安装？\n- 你最在意的是安装成本、输出质量、还是和现有规则的冲突？\n\n## 验收标准\n\n- 所有能力声明都能回指到 evidence_refs 中的文件路径。\n- AI_CONTEXT_PACK.md 没有把预览包装成真实运行。\n- 用户能在 3 分钟内看懂适合谁、能做什么、如何开始和风险边界。\n\n---\n\n## Doramagic Context Augmentation\n\n下面内容用于强化 Repomix/AI Context Pack 主体。Human Manual 只提供阅读骨架；踩坑日志会被转成宿主 AI 必须遵守的工作约束。\n\n## Human Manual 骨架\n\n使用规则：这里只是项目阅读路线和显著性信号，不是事实权威。具体事实仍必须回到 repo evidence / Claim Graph。\n\n宿主 AI 硬性规则：\n- 不得把页标题、章节顺序、摘要或 importance 当作项目事实证据。\n- 解释 Human Manual 骨架时，必须明确说它只是阅读路线/显著性信号。\n- 能力、安装、兼容性、运行状态和风险判断必须引用 repo evidence、source path 或 Claim Graph。\n\n- **Introduction to mnem**：importance `high`\n  - source_paths: README.md, docs/src/introduction.md, crates/mnem-core/src/lib.rs\n- **Installation Guide**：importance `high`\n  - source_paths: docs/src/install.md, Cargo.toml, Dockerfile, docker-compose.yml\n- **System Architecture**：importance `high`\n  - source_paths: crates/mnem-core/src/id/cid.rs, crates/mnem-core/src/id/multihash.rs, crates/mnem-core/src/objects/mod.rs, crates/mnem-core/src/lib.rs\n- **Core Components**：importance `high`\n  - source_paths: crates/mnem-core/src/objects/commit.rs, crates/mnem-core/src/objects/node.rs, crates/mnem-core/src/objects/edge.rs, crates/mnem-core/src/repo/mod.rs, crates/mnem-core/src/repo/transaction.rs\n- **Hybrid Retrieval System**：importance `high`\n  - source_paths: crates/mnem-core/src/retrieve/mod.rs, crates/mnem-core/src/retrieve/retriever.rs, crates/mnem-core/src/retrieve/fusion.rs, crates/mnem-core/src/retrieve/types.rs, crates/mnem-core/src/index/hybrid.rs\n- **Embedding Providers**：importance `high`\n  - source_paths: crates/mnem-embed-providers/src/lib.rs, crates/mnem-embed-providers/src/embedder.rs, crates/mnem-embed-providers/src/openai.rs, crates/mnem-embed-providers/src/ollama.rs, crates/mnem-embed-providers/src/onnx.rs\n- **Storage Backend**：importance `high`\n  - source_paths: crates/mnem-core/src/store/mod.rs, crates/mnem-core/src/store/blockstore.rs, crates/mnem-core/src/store/op_heads.rs, crates/mnem-backend-redb/src/lib.rs, crates/mnem-backend-redb/src/blockstore.rs\n- **Ingestion Pipeline**：importance `high`\n  - source_paths: crates/mnem-ingest/src/lib.rs, crates/mnem-ingest/src/pipeline.rs, crates/mnem-ingest/src/chunk.rs, crates/mnem-ingest/src/pdf.rs, crates/mnem-ingest/src/md.rs\n\n## Repo Inspection Evidence / 源码检查证据\n\n- repo_clone_verified: true\n- repo_inspection_verified: true\n- repo_commit: `1dfa0d9e928a5da11c03eb0389b6d5ee7c8e7388`\n- inspected_files: `Dockerfile`, `README.md`, `docker-compose.yml`, `docs/system-prompt.md`, `docs/SPEC.md`, `docs/book.toml`, `docs/README.md`, `docs/ROADMAP.md`, `docs/src/SUMMARY.md`, `docs/src/cli.md`, `docs/src/quickstart.md`, `docs/src/configuration.md`, `docs/src/install.md`, `docs/src/introduction.md`, `docs/src/core-concepts.md`, `docs/src/mcp.md`, `docs/features/content-addressing.md`, `docs/features/integrations.md`, `docs/features/single-binary.md`, `docs/features/token-transparency.md`\n\n宿主 AI 硬性规则：\n- 没有 repo_clone_verified=true 时，不得声称已经读过源码。\n- 没有 repo_inspection_verified=true 时，不得把 README/docs/package 文件判断写成事实。\n- 没有 quick_start_verified=true 时，不得声称 Quick Start 已跑通。\n\n## Doramagic Pitfall Constraints / 踩坑约束\n\n这些规则来自 Doramagic 发现、验证或编译过程中的项目专属坑点。宿主 AI 必须把它们当作工作约束，而不是普通说明文字。\n\n### Constraint 1: 来源证据：[feature] hermes support\n\n- Trigger: GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：[feature] hermes support\n- Host AI rule: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Why it matters: 可能影响授权、密钥配置或安全边界。\n- Evidence: community_evidence:github | cevd_c54919b2b8b340438a9e5aa17291b93a | https://github.com/Uranid/mnem/issues/27 | 来源类型 github_issue 暴露的待验证使用条件。\n- Hard boundary: 不要把这个坑点包装成已解决、已验证或可忽略，除非后续验证证据明确证明它已经关闭。\n\n### Constraint 2: 能力判断依赖假设\n\n- Trigger: README/documentation is current enough for a first validation pass.\n- Host AI rule: 将假设转成下游验证清单。\n- Why it matters: 假设不成立时，用户拿不到承诺的能力。\n- Evidence: capability.assumptions | github_repo:1221867246 | https://github.com/Uranid/mnem | README/documentation is current enough for a first validation pass.\n- Hard boundary: 不要把这个坑点包装成已解决、已验证或可忽略，除非后续验证证据明确证明它已经关闭。\n\n### Constraint 3: 来源证据：[bug] Broken docs links: SPEC.md, ROADMAP.md, and Architecture page\n\n- Trigger: GitHub 社区证据显示该项目存在一个维护/版本相关的待验证问题：[bug] Broken docs links: SPEC.md, ROADMAP.md, and Architecture page\n- Host AI rule: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Why it matters: 可能增加新用户试用和生产接入成本。\n- Evidence: community_evidence:github | cevd_5c74e7a10f774af6b0460b5da009d1b4 | https://github.com/Uranid/mnem/issues/23 | 来源讨论提到 windows 相关条件，需在安装/试用前复核。\n- Hard boundary: 不要把这个坑点包装成已解决、已验证或可忽略，除非后续验证证据明确证明它已经关闭。\n\n### Constraint 4: 维护活跃度未知\n\n- Trigger: 未记录 last_activity_observed。\n- Host AI rule: 补 GitHub 最近 commit、release、issue/PR 响应信号。\n- Why it matters: 新项目、停更项目和活跃项目会被混在一起，推荐信任度下降。\n- Evidence: evidence.maintainer_signals | github_repo:1221867246 | https://github.com/Uranid/mnem | last_activity_observed missing\n- Hard boundary: 不要把这个坑点包装成已解决、已验证或可忽略，除非后续验证证据明确证明它已经关闭。\n\n### Constraint 5: 下游验证发现风险项\n\n- Trigger: no_demo\n- Host AI rule: 进入安全/权限治理复核队列。\n- Why it matters: 下游已经要求复核，不能在页面中弱化。\n- Evidence: downstream_validation.risk_items | github_repo:1221867246 | https://github.com/Uranid/mnem | no_demo; severity=medium\n- Hard boundary: 不要把这个坑点包装成已解决、已验证或可忽略，除非后续验证证据明确证明它已经关闭。\n\n### Constraint 6: 存在评分风险\n\n- Trigger: no_demo\n- Host AI rule: 把风险写入边界卡，并确认是否需要人工复核。\n- Why it matters: 风险会影响是否适合普通用户安装。\n- Evidence: risks.scoring_risks | github_repo:1221867246 | https://github.com/Uranid/mnem | no_demo; severity=medium\n- Hard boundary: 不要把这个坑点包装成已解决、已验证或可忽略，除非后续验证证据明确证明它已经关闭。\n\n### Constraint 7: issue/PR 响应质量未知\n\n- Trigger: issue_or_pr_quality=unknown。\n- Host AI rule: 抽样最近 issue/PR，判断是否长期无人处理。\n- Why it matters: 用户无法判断遇到问题后是否有人维护。\n- Evidence: evidence.maintainer_signals | github_repo:1221867246 | https://github.com/Uranid/mnem | issue_or_pr_quality=unknown\n- Hard boundary: 不要把这个坑点包装成已解决、已验证或可忽略，除非后续验证证据明确证明它已经关闭。\n\n### Constraint 8: 发布节奏不明确\n\n- Trigger: release_recency=unknown。\n- Host AI rule: 确认最近 release/tag 和 README 安装命令是否一致。\n- Why it matters: 安装命令和文档可能落后于代码，用户踩坑概率升高。\n- Evidence: evidence.maintainer_signals | github_repo:1221867246 | https://github.com/Uranid/mnem | release_recency=unknown\n- Hard boundary: 不要把这个坑点包装成已解决、已验证或可忽略，除非后续验证证据明确证明它已经关闭。\n",
      "summary": "给宿主 AI 的上下文和工作边界。",
      "title": "AI Context Pack / 带给我的 AI"
    },
    "boundary_risk_card": {
      "asset_id": "boundary_risk_card",
      "filename": "BOUNDARY_RISK_CARD.md",
      "markdown": "# Boundary & Risk Card / 安装前决策卡\n\n项目：Uranid/mnem\n\n## Doramagic 试用结论\n\n当前结论：可以进入发布前推荐检查；首次使用仍应从最小权限、临时目录和可回滚配置开始。\n\n## 用户现在可以做\n\n- 可以先阅读 Human Manual，理解项目目的和主要工作流。\n- 可以复制 Prompt Preview 做安装前体验；这只验证交互感，不代表真实运行。\n- 可以把官方 Quick Start 命令放到隔离环境中验证，不要直接进主力环境。\n\n## 现在不要做\n\n- 不要把 Prompt Preview 当成项目实际运行结果。\n- 不要把 metadata-only validation 当成沙箱安装验证。\n- 不要把未验证能力写成“已支持、已跑通、可放心安装”。\n- 不要在首次试用时交出生产数据、私人文件、真实密钥或主力配置目录。\n\n## 安装前检查\n\n- 宿主 AI 是否匹配：mcp_host\n- 官方安装入口状态：已发现官方入口\n- 是否在临时目录、临时宿主或容器中验证：必须是\n- 是否能回滚配置改动：必须能\n- 是否需要 API Key、网络访问、读写文件或修改宿主配置：未确认前按高风险处理\n- 是否记录了安装命令、实际输出和失败日志：必须记录\n\n## 当前阻塞项\n\n- review_required: community_discussion_evidence_below_public_threshold\n\n## 项目专属踩坑\n\n- 来源证据：[feature] hermes support（high）：可能影响授权、密钥配置或安全边界。 建议检查：来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- 能力判断依赖假设（medium）：假设不成立时，用户拿不到承诺的能力。 建议检查：将假设转成下游验证清单。\n- 来源证据：[bug] Broken docs links: SPEC.md, ROADMAP.md, and Architecture page（medium）：可能增加新用户试用和生产接入成本。 建议检查：来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- 维护活跃度未知（medium）：新项目、停更项目和活跃项目会被混在一起，推荐信任度下降。 建议检查：补 GitHub 最近 commit、release、issue/PR 响应信号。\n- 下游验证发现风险项（medium）：下游已经要求复核，不能在页面中弱化。 建议检查：进入安全/权限治理复核队列。\n\n## 风险与权限提示\n\n- no_demo: medium\n\n## 证据缺口\n\n- 暂未发现结构化证据缺口。\n",
      "summary": "安装、权限、验证和推荐前风险。",
      "title": "Boundary & Risk Card / 边界与风险卡"
    },
    "human_manual": {
      "asset_id": "human_manual",
      "filename": "HUMAN_MANUAL.md",
      "markdown": "# https://github.com/Uranid/mnem 项目说明书\n\n生成时间：2026-05-15 07:30:14 UTC\n\n## 目录\n\n- [Introduction to mnem](#page-introduction)\n- [Installation Guide](#page-installation)\n- [System Architecture](#page-architecture)\n- [Core Components](#page-core-components)\n- [Hybrid Retrieval System](#page-hybrid-retrieval)\n- [Embedding Providers](#page-embed-providers)\n- [Storage Backend](#page-storage-backend)\n- [Ingestion Pipeline](#page-ingestion)\n- [CLI Commands Reference](#page-cli-commands)\n- [GraphRAG Implementation](#page-graphrag)\n\n<a id='page-introduction'></a>\n\n## Introduction to mnem\n\n### 相关页面\n\n相关主题：[System Architecture](#page-architecture), [Installation Guide](#page-installation)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [crates/mnem-core/src/lib.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-core/src/lib.rs)\n- [crates/mnem-core/src/objects/node.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-core/src/objects/node.rs)\n- [crates/mnem-core/src/objects/operation.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-core/src/objects/operation.rs)\n- [crates/mnem-ingest/src/lib.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-ingest/src/lib.rs)\n- [crates/mnem-ingest/src/chunk.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-ingest/src/chunk.rs)\n- [crates/mnem-ingest/src/pipeline.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-ingest/src/pipeline.rs)\n- [crates/mnem-ingest/src/types.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-ingest/src/types.rs)\n</details>\n\n# Introduction to mnem\n\nmnem is a Rust-based knowledge management system designed for AI agents. It provides a structured approach to storing, retrieving, and managing information using a DAG-based (Directed Acyclic Graph) storage architecture with content-addressed data structures.\n\n## Overview\n\nmnem serves as a personal knowledge graph for AI agents, enabling them to:\n\n- **Store structured information** with nodes, edges, and properties in a version-controlled repository\n- **Ingest various document formats** including Markdown, PDF, plain text, code files, and conversation logs\n- **Retrieve relevant context** using vector search, sparse ranking, and token-budget packing\n- **Track changes** through a commit-based operation log with cryptographic signatures\n- **Support branching** for experimental or temporary state management\n\n资料来源：[crates/mnem-core/src/lib.rs:1-30]()\n\n## Architecture\n\nmnem is organized as a monorepo with multiple Rust crates:\n\n```mermaid\ngraph TD\n    subgraph \"mnem Repository Structure\"\n        CLI[\"mnem-cli<br/>Command Line Interface\"]\n        HTTP[\"mnem-http<br/>HTTP API Server\"]\n        INGEST[\"mnem-ingest<br/>Document Ingestion\"]\n        CORE[\"mnem-core<br/>Core Data Model & Retrieval\"]\n    end\n    \n    CLI --> CORE\n    HTTP --> CORE\n    INGEST --> CORE\n    \n    INGEST --> |\"parse/chunk/extract\"| RAW[(\"Raw Source<br/>.md .pdf .txt .json\")]\n    CORE --> |\"store/retrieve\"| GRAPH[(\"Knowledge<br/>Graph\")]\n```\n\n### Crate Responsibilities\n\n| Crate | Purpose |\n|-------|---------|\n| `mnem-core` | Core data models (`Node`, `Edge`, `Commit`, `Operation`), DAG-CBOR codec, prolly trees, vector/sparse indexing, agent-facing retrieval |\n| `mnem-ingest` | Document parsing, chunking strategies, entity extraction (rule-based, KeyBERT, or LLM) |\n| `mnem-cli` | Terminal interface for all operations |\n| `mnem-http` | REST API for remote agent access |\n\n资料来源：[crates/mnem-core/src/lib.rs:15-25]()\n\n## Core Data Model\n\n### Nodes\n\nNodes are the fundamental unit of information storage. Each node contains:\n\n```mermaid\ngraph LR\n    subgraph \"Node Structure\"\n        NTYPE[\"ntype<br/>Node Type Label\"]\n        CTX[\"context_sentence<br/>Positional Cue\"]\n        SUM[\"summary<br/>LLM-facing Text\"]\n        PROPS[\"props<br/>Property Map\"]\n        CONTENT[\"content<br/>Opaque Payload\"]\n    end\n```\n\n| Field | Type | Description |\n|-------|------|-------------|\n| `ntype` | `String` | Semantic label (e.g., `Fact`, `Doc`, `Person`) |\n| `context_sentence` | `Option<String>` | LLM-generated placement cue for contextual retrieval |\n| `summary` | `Option<String>` | Primary text for embedding and retrieval |\n| `props` | `BTreeMap<String, Ipld>` | Structured key-value metadata |\n| `content` | `Option<Bytes>` | Opaque payload (document body, file data) |\n\nThe `context_sentence` field implements Anthropic's 2024 contextual retrieval approach, storing an LLM-generated one-sentence placement cue that captures positional and relational context. 资料来源：[crates/mnem-core/src/objects/node.rs:30-75]()\n\n### Edges\n\nEdges represent relationships between nodes. They are typed links with source and target references.\n\n### Operations and Commits\n\n| Concept | Description |\n|---------|-------------|\n| `Operation` | A single atomic change to the repository state |\n| `Commit` | A snapshot referencing a sequence of operations |\n| `View` | Current head state with references and tombstones |\n\nOperations include metadata for provenance:\n\n```rust\npub struct Operation {\n    pub author: String,\n    pub agent_id: Option<String>,\n    pub task_id: Option<String>,\n    pub host: Option<String>,\n    pub time: u64,\n    pub description: String,\n    pub signature: Option<Signature>,\n}\n```\n\n资料来源：[crates/mnem-core/src/objects/operation.rs:15-35]()\n\n## Document Ingestion Pipeline\n\nThe ingestion system (`mnem-ingest`) handles the transformation of raw documents into chunked, indexed nodes:\n\n```mermaid\nflowchart LR\n    RAW[\"Raw Source<br/>.md .pdf .txt\"] --> PARSE[\"Parse\"]\n    PARSE --> SECTION[\"Sections\"]\n    SECTION --> CHUNK[\"Chunk\"]\n    CHUNK --> EXTRACT[\"Extract Entities<br/>Relations\"]\n    EXTRACT --> NODE[\"Nodes + Edges\"]\n    NODE --> STORE[\"Commit to Store\"]\n```\n\n资料来源：[crates/mnem-ingest/src/lib.rs:20-45]()\n\n### Supported Source Types\n\n| Source | Extensions | Strategy | Default Chunker |\n|--------|------------|----------|-----------------|\n| Markdown | `.md`, `.markdown` | CommonMark + GFM | `Paragraph` |\n| Text | `.txt`, unknown | Plain text | `SentenceRecursive` |\n| PDF | `.pdf` | Text layer extraction | `SentenceRecursive` |\n| Conversation | `.json`, `.jsonl` | Chat export formats | `Session` |\n| Code | `.rs`, `.py`, `.js`, `.ts`, `.go`, `.java`, `.c`, `.cpp`, `.rb`, `.cs` | Tree-sitter parsing | `Structural` |\n\n资料来源：[crates/mnem-ingest/src/pipeline.rs:45-60]()\n\n### Chunker Strategies\n\nFive chunking strategies are available:\n\n| Strategy | Description | Use Case |\n|----------|-------------|----------|\n| `Paragraph` | Splits on double-newlines | Markdown documents |\n| `Recursive` | Token-budgeted word-window sliding | Backwards compatibility |\n| `SentenceRecursive` | Sentence-aware token packing using Unicode boundaries | Prose (Text, PDF) |\n| `Session` | Groups messages up to `max_messages` | Conversation logs |\n| `Structural` | One chunk per section | Code (function/class level) |\n\nThe `SentenceRecursive` chunker is the preferred strategy for prose as it prevents cutting mid-sentence and produces more uniform chunk sizes. Token counts are estimated via whitespace split for speed and determinism. 资料来源：[crates/mnem-ingest/src/chunk.rs:1-45]()\n\n### Entity Extraction\n\nmnem supports multiple extraction providers:\n\n| Provider | Method |\n|----------|--------|\n| `rule` (default) | Capitalized phrase heuristic |\n| `keybert` | Statistical keyword extraction (requires feature flag) |\n| `ollama` | LLM-based extraction (requires feature flag) |\n| `none` | Suppress entity extraction |\n\n资料来源：[crates/mnem-cli/src/commands/ingest.rs:25-40]()\n\n## Retrieval System\n\nThe retrieval layer composes multiple ranking strategies to deliver relevant context to agents under a token budget:\n\n```mermaid\ngraph TD\n    QUERY[\"Query\"] --> VEC[\"Vector Search\"]\n    QUERY --> SPARSE[\"Sparse Ranking\"]\n    VEC --> RERANK[\"Rerank\"]\n    SPARSE --> RERANK\n    RERANK --> PACK[\"Token Budget Packing\"]\n    PACK --> RESULT[\"Context for Agent\"]\n```\n\nThe retriever renders nodes in a YAML-like format:\n\n```text\nntype: <ntype>\nid: <uuid>\ncontext: <context_sentence>\nsummary: <summary>\n<prop_key>: <prop_value>\n```\n\n- `ntype` and `id` are always present\n- `context` appears before `summary` (per Anthropic's contextual-retrieval recipe)\n- `summary` is clipped at 8192 chars by default\n- Scalar props are emitted in BTreeMap order; non-scalar props are skipped\n\n资料来源：[crates/mnem-core/src/retrieve/mod.rs:1-50]()\n\n## CLI Interface\n\nThe `mnem` CLI provides commands for repository management:\n\n| Command | Description |\n|---------|-------------|\n| `mnem ingest <path>` | Parse and commit documents to the graph |\n| `mnem tag` | Manage versioned references (create, list, delete) |\n| `mnem branch` | Create and manage branches |\n\n### Ingest Command Options\n\n| Option | Default | Description |\n|--------|---------|-------------|\n| `--chunker` | `auto` | Strategy: `auto`, `paragraph`, `recursive`, `sentence_recursive`, `session`, `structural` |\n| `--max-tokens` | `512` | Target tokens per chunk |\n| `--overlap` | `32` | Overlap tokens between chunks |\n| `--recursive` | false | Walk directory trees |\n| `--extractor` | `none` | Entity extraction provider |\n| `--ner-provider` | `rule` | NER method: `rule`, `none` |\n\n资料来源：[crates/mnem-cli/src/commands/ingest.rs:50-70]()\n\n## HTTP API\n\nThe HTTP server exposes REST endpoints for remote agent access:\n\n| Endpoint | Method | Description |\n|----------|--------|-------------|\n| `/v1/ingest` | POST | Ingest documents with JSON or multipart payload |\n| `/v1/branches` | GET | List all branches |\n| `/v1/branches` | POST | Create a new branch |\n\n### Ingest Request Parameters\n\n| Parameter | Type | Description |\n|-----------|------|-------------|\n| `chunker` | String | Strategy: `auto`, `paragraph`, `recursive`, `session` |\n| `max_tokens` | u32 | Target tokens per chunk |\n| `overlap` | u32 | Overlap tokens between chunks |\n| `author` | String | Required commit author |\n| `message` | String | Optional commit message |\n| `extractor` | String | Extraction provider |\n| `ner_provider` | String | NER method override |\n\n资料来源：[crates/mnem-http/src/handlers_ingest.rs:15-45]()\n\n## Key Design Principles\n\n1. **No unsafe code**: The entire `mnem-core` crate enforces `#![forbid(unsafe_code)]` 资料来源：[crates/mnem-core/src/lib.rs:30]()\n\n2. **Canonical encoding**: Every object type preserves byte-exact round-trip property (`decode(encode(x)) == x`)\n\n3. **Deterministic retrieval**: Node props use `BTreeMap` for consistent iteration order\n\n4. **Extensible architecture**: Sidecar support for external tools (docling, unstructured) via feature flags\n\n5. **Branch support**: Tags and branches enable experimental state management without losing history\n\n---\n\n<a id='page-installation'></a>\n\n## Installation Guide\n\n### 相关页面\n\n相关主题：[Introduction to mnem](#page-introduction)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [crates/mnem-cli/src/main.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-cli/src/main.rs)\n- [crates/mnem-cli/src/config.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-cli/src/config.rs)\n- [py-packages/mnem-cli/README.md](https://github.com/Uranid/mnem/blob/main/py-packages/mnem-cli/README.md)\n- [crates/mnem-ingest/src/lib.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-ingest/src/lib.rs)\n- [crates/mnem-core/src/lib.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-core/src/lib.rs)\n- [crates/mnem-cli/src/commands/ingest.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-cli/src/commands/ingest.rs)\n</details>\n\n# Installation Guide\n\n## Overview\n\nThe mnem project is a Git-like version control system designed specifically for AI Agent Knowledge management. It provides versioned storage, retrieval, and synchronization of structured knowledge nodes. This guide covers all supported installation methods, system requirements, and configuration steps to get mnem running on your platform.\n\nThe project is organized as a Rust monorepo with multiple crates and language bindings. Installation options include native binaries via multiple package managers, Python packages, Docker containers, and prebuilt releases. 资料来源：[crates/mnem-core/src/lib.rs:1-20]()\n\n## System Requirements\n\n### Supported Platforms\n\nmnem supports the following platforms and architectures:\n\n| Platform | Architecture | Notes |\n|----------|--------------|-------|\n| Linux | x86_64, aarch64 | Full support |\n| macOS | arm64 (Apple Silicon), x86_64 | Rosetta 2 compatible |\n| Windows | x86_64 | Full support |\n\n资料来源：[py-packages/mnem-cli/README.md:1-20]()\n\n### Runtime Dependencies\n\n| Component | Requirement | Purpose |\n|-----------|-------------|---------|\n| Rust toolchain | 1.70+ (stable) | Building from source |\n| Python | 3.9+ | Python bindings (`mnem-py`) |\n| Node.js | 18+ | npm package |\n| Docker | 20.10+ | Container deployment |\n\n## Installation Methods\n\n### CLI Installation via pip\n\nThe simplest method to install the mnem CLI is through Python's package manager:\n\n```bash\npip install mnem-cli\nmnem --version\n```\n\nOn first run, `mnem` automatically downloads the correct prebuilt binary for your platform from the GitHub release assets and caches it in `~/.mnem_cli/`. Subsequent calls run the cached binary directly. 资料来源：[py-packages/mnem-cli/README.md:1-15]()\n\n### CLI Installation via Cargo\n\nFor users with the Rust toolchain installed, install from crates.io:\n\n```bash\ncargo install --locked mnem-cli --features bundled-embedder\n```\n\nThe `--features bundled-embedder` flag compiles the embedder dependency into the binary, making it self-contained without external embedding services. 资料来源：[crates/mnem-cli/src/main.rs:1-50]()\n\n### CLI Installation via npm\n\nNode.js users can install globally via npm:\n\n```bash\nnpm install -g mnem-cli\n```\n\n资料来源：[py-packages/mnem-cli/README.md:1-20]()\n\n### Prebuilt Binaries\n\nDownload prebuilt binaries directly from the [GitHub Releases](https://github.com/Uranid/mnem/releases) page. Binaries are available for all supported platforms in the release assets.\n\nAfter downloading, make the binary executable:\n\n```bash\nchmod +x mnem-*-x86_64-unknown-linux-gnu\n./mnem-*-x86_64-unknown-linux-gnu --version\n```\n\n### Docker Installation\n\nContainer-based deployment is available via Docker. The project includes both a `Dockerfile` and `docker-compose.yml` for containerized deployments.\n\nTo build the Docker image:\n\n```bash\ndocker build -t mnem:latest .\n```\n\nFor orchestrated deployments using docker-compose:\n\n```bash\ndocker-compose up -d\n```\n\n资料来源：[Dockerfile](https://github.com/Uranid/mnem/blob/main/Dockerfile), [docker-compose.yml](https://github.com/Uranid/mnem/blob/main/docker-compose.yml)\n\n## Python Bindings\n\nFor programmatic access from Python applications, install the Python bindings package:\n\n```bash\npip install mnem-py\n```\n\nThis package provides the `import pymnem` interface for Python applications to interact with mnem repositories. 资料来源：[py-packages/mnem-cli/README.md:1-30]()\n\n## Build from Source\n\n### Prerequisites\n\n- Rust 1.70 or later (stable toolchain)\n- Cargo (included with Rust)\n- Git\n\n### Build Steps\n\n```bash\n# Clone the repository\ngit clone https://github.com/Uranid/mnem.git\ncd mnem\n\n# Build the CLI\ncargo build --release --bin mnem\n\n# Build all crates\ncargo build --release\n```\n\n### Feature Flags\n\nThe project supports several feature flags to customize the build:\n\n| Feature | Description |\n|---------|-------------|\n| `bundled-embedder` | Embedder for local vector storage |\n| `keybert` | Statistical keyphrase extraction |\n| `ollama` | LLM-based extraction via Ollama |\n| `sidecar-docling` | PDF extraction via docling CLI |\n| `sidecar-unstructured` | PDF extraction via unstructured |\n\n资料来源：[crates/mnem-ingest/src/lib.rs:1-60]()\n\n## Initial Configuration\n\n### Repository Initialization\n\nAfter installation, initialize a new mnem repository:\n\n```bash\nmnem init\n```\n\nThis creates the `.mnem/` directory with the repository database (`repo.redb`). 资料来源：[crates/mnem-cli/src/main.rs:1-80]()\n\n### Configuration File\n\nThe CLI reads configuration from `.mnem/config.toml` in the repository root. Configuration includes:\n\n```toml\n[user]\nname = \"Your Name\"\nemail = \"your.email@example.com\"\nagent_id = \"agent-identifier\"\n\n[llm]\nprovider = \"ollama\"  # or \"openai\", \"anthropic\"\nmodel = \"llama3.2\"\nbase_url = \"http://localhost:11434\"\ntimeout_secs = 120\n```\n\nThe author string for commits follows the format `name <email>` when both are present. If only one is available, it uses that value alone. When neither is configured, it falls back to the `agent_id` or defaults to `\"mnem-cli\"`. 资料来源：[crates/mnem-cli/src/config.rs:1-50]()\n\n### Repository Path Resolution\n\nThe CLI automatically searches for the `.mnem/` directory by walking up from the current working directory, similar to Git's behavior. You can override this with the `-R` / `--repo` flag:\n\n```bash\nmnem -R ~/notes status\n```\n\n资料来源：[crates/mnem-cli/src/main.rs:1-80]()\n\n## HTTP Server Deployment\n\n### Starting the Server\n\nThe HTTP server provides REST API access to mnem repositories:\n\n```bash\nmnem serve --port 8080\n```\n\n### API Endpoints\n\n| Endpoint | Method | Purpose |\n|----------|--------|---------|\n| `/v1/ingest` | POST | Ingest documents |\n| `/v1/branches` | GET | List branches |\n| `/v1/branches` | POST | Create branch |\n| `/v1/retrieve` | POST | Query knowledge |\n\n### Ingest Configuration\n\nThe ingest endpoint accepts JSON payloads with the following parameters:\n\n| Parameter | Type | Required | Default | Description |\n|-----------|------|----------|---------|-------------|\n| `content` | String | Yes | - | Content to ingest |\n| `chunker` | String | No | `auto` | Chunking strategy |\n| `max_tokens` | u32 | No | 512 | Target tokens per chunk |\n| `overlap` | u32 | No | 32 | Overlap tokens |\n| `author` | String | Yes | - | Commit author |\n| `message` | String | No | `\"mnem http ingest\"` | Commit message |\n| `extractor` | String | No | `\"none\"` | Entity extractor |\n| `ner_provider` | String | No | `\"rule\"` | NER provider |\n\n资料来源：[crates/mnem-http/src/handlers_ingest.rs:1-50]()\n\n## Verification\n\n### Verify Installation\n\nAfter installation, verify the CLI is working:\n\n```bash\nmnem --version\nmnem status\n```\n\n### First-Run Wizard\n\nOn first run with no repository present, mnem launches a first-run wizard to help configure the basic settings. Returning users see the `mnem status` output directly. 资料来源：[crates/mnem-cli/src/main.rs:1-80]()\n\n## Alternative Installation Summary\n\n| Method | Command | Notes |\n|--------|---------|-------|\n| pip | `pip install mnem-cli` | Auto-downloads binary |\n| cargo | `cargo install --locked mnem-cli --features bundled-embedder` | Self-contained binary |\n| npm | `npm install -g mnem-cli` | Node.js integration |\n| Docker | `docker-compose up -d` | Containerized deployment |\n| Binary | Download from Releases | Manual installation |\n\n资料来源：[py-packages/mnem-cli/README.md:1-20]()\n\n## Next Steps\n\nAfter installation, consult these related guides:\n\n- **Quick Start** - Create your first repository and add content\n- **Configuration Reference** - Complete configuration options\n- **Ingest Guide** - Document ingestion and chunking strategies\n- **Retrieve Guide** - Query your knowledge base\n\n---\n\n<a id='page-architecture'></a>\n\n## System Architecture\n\n### 相关页面\n\n相关主题：[Core Components](#page-core-components), [Storage Backend](#page-storage-backend)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [crates/mnem-core/src/lib.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-core/src/lib.rs)\n- [crates/mnem-core/src/id/link.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-core/src/id/link.rs)\n- [crates/mnem-core/src/objects/node.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-core/src/objects/node.rs)\n- [crates/mnem-core/src/retrieve/mod.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-core/src/retrieve/mod.rs)\n- [crates/mnem-ingest/src/lib.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-ingest/src/lib.rs)\n- [crates/mnem-ingest/src/pipeline.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-ingest/src/pipeline.rs)\n- [crates/mnem-ingest/src/types.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-ingest/src/types.rs)\n- [crates/mnem-ingest/src/chunk.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-ingest/src/chunk.rs)\n</details>\n\n# System Architecture\n\n## Overview\n\nmnem is a content-addressed, CRDT-based (Conflict-free Replicated Data Types) knowledge management system designed for agentic workflows. The system provides immutable content-addressed storage with a secondary vector index for retrieval-augmented generation (RAG) applications.\n\nThe architecture follows a modular design with distinct crates handling different concerns:\n\n| Crate | Purpose |\n|-------|---------|\n| `mnem-core` | Core data types, CRDT operations, storage, indexing, retrieval |\n| `mnem-ingest` | Source parsing, chunking, and entity extraction |\n| `mnem-cli` | Command-line interface |\n| `mnem-http` | HTTP API server |\n\n资料来源：[crates/mnem-core/src/lib.rs:1-30]()\n\n---\n\n## Core Data Model\n\n### Content Identifiers (CIDs)\n\nContent in mnem is identified by cryptographic hashes using IPLD CIDs (Content Identifiers). CIDs are self-describing content addresses that encode the hash function and compressed representation of the content.\n\nThe system uses DAG-CBOR encoding for canonical binary serialization, ensuring byte-exact round-trip encoding/decoding:\n\n```text\ndecode(encode(x)) == x\nencode(decode(b)) == b\n```\n\n资料来源：[crates/mnem-core/src/lib.rs:35-40]()\n\n### Phantom-Typed Links\n\nmnem introduces `Link<T>` - a phantom-typed CID that encodes the type of the referenced content at the type level. While a bare CID points to \"some content,\" a `Link<T>` points to \"content that is a `T`.\"\n\n```rust\npub struct Link<T: ?Sized> {\n    cid: Cid,\n    _target: PhantomData<fn() -> T>,\n}\n```\n\nThe phantom type prevents reference-mixing bugs at compile time. For example, `fn parents(&self) -> &[Link<Commit>]` will reject a `Link<Node>` at compile time.\n\nOn the wire, `Link<T>` is identical to a `Cid` - same bytes, same CBOR tag. The phantom type exists solely for Rust-level type safety.\n\n资料来源：[crates/mnem-core/src/id/link.rs:1-35]()\n\n### Node Object\n\nThe fundamental unit of content storage is the `Node`:\n\n```rust\npub struct Node {\n    pub id: NodeId,\n    pub ntype: NodeType,\n    pub context_sentence: Option<String>,\n    pub summary: Option<String>,\n    pub props: BTreeMap<String, Ipld>,\n    pub content: Option<Bytes>,\n    pub ext: Option<BTreeMap<String, Ipld>>,\n}\n```\n\n| Field | Purpose |\n|-------|---------|\n| `id` | Unique identifier |\n| `ntype` | Semantic type (e.g., `Chunk`, `Concept`, `Entity`) |\n| `context_sentence` | LLM-generated positional cue for contextual retrieval |\n| `summary` | Token-efficient content representation for LLM consumption |\n| `props` | Structured key-value properties |\n| `content` | Opaque payload (document body, file data) |\n| `ext` | Forward-compatibility extension map |\n\nThe `context_sentence` field implements Anthropic's 2024 Contextual Retrieval technique, storing LLM-generated placement cues alongside nodes. This reportedly reduces retrieval failure by 49-67%.\n\n资料来源：[crates/mnem-core/src/objects/node.rs:1-80]()\n\n---\n\n## Repository Structure\n\nmnem uses a CRDT-based repository model with the following object types:\n\n```mermaid\ngraph TD\n    A[View] --> B[Commit]\n    B --> C[Node]\n    B --> D[Edge]\n    C --> E[Node Content]\n    D --> F[Link<Node>]\n    D --> G[Link<Node>]\n    \n    B --> H[Operation]\n    H --> I[ChangeId]\n    H --> J[OperationId]\n```\n\n### Core Objects\n\n| Type | Description |\n|------|-------------|\n| `Node` | Atomic content unit with type, summary, and properties |\n| `Edge` | Directed relationship between two nodes |\n| `Commit` | Atomic snapshot of changes with parent references |\n| `Operation` | Individual change record for sync |\n| `View` | Immutable repository head view |\n| `IndexSet` | Secondary index collection |\n\n资料来源：[crates/mnem-core/src/lib.rs:10-20]()\n\n### Blockstore Abstraction\n\nThe system abstracts storage through the `Blockstore` trait, allowing different backends:\n\n```rust\npub trait Blockstore {\n    fn get(&self, cid: &Cid) -> Result<Option<Vec<u8>>>;\n    fn put(&self, data: &[u8]) -> Result<Cid>;\n}\n```\n\nReference implementations include in-memory stores, with pluggable backends for production use.\n\n---\n\n## Ingest Pipeline\n\nThe ingest pipeline handles parsing, chunking, and extracting content from various source types.\n\n```mermaid\ngraph LR\n    A[Raw Bytes] --> B[Source Detection]\n    B --> C[Parser]\n    C --> D[Chunker]\n    D --> E[Extractor]\n    E --> F[Transaction]\n    F --> G[Commit]\n```\n\n### Source Types\n\nThe system automatically detects source types from file extensions:\n\n| Extension | SourceKind | Chunker Strategy |\n|-----------|------------|------------------|\n| `md`, `markdown` | `Markdown` | Paragraph |\n| `pdf` | `Pdf` | SentenceRecursive (512 tokens, 64 overlap) |\n| `json`, `jsonl` | `Conversation` | Session (10 messages) |\n| `rs` | `Code(Rust)` | Structural |\n| `py`, `pyi` | `Code(Python)` | Structural |\n| `js`, `mjs`, `cjs` | `Code(JavaScript)` | Structural |\n| `ts`, `tsx`, `mts`, `cts` | `Code(TypeScript)` | Structural |\n| Other | `Text` | SentenceRecursive (256 tokens, 32 overlap) |\n\n资料来源：[crates/mnem-ingest/src/types.rs:1-50]()\n\n### Chunker Strategies\n\n| Strategy | Configuration | Use Case |\n|----------|---------------|----------|\n| `Paragraph` | - | Markdown documents |\n| `SentenceRecursive` | `max_tokens`, `overlap` | Text, PDFs |\n| `Session` | `max_messages` | Chat/conversation logs |\n| `Structural` | - | Code (tree-sitter based) |\n| `Recursive` | `max_tokens`, `overlap` | Generic recursive splitting |\n\nFor code parsing, mnem uses tree-sitter to extract named language constructs:\n\n| Language | Extracted Items |\n|----------|-----------------|\n| Rust | `function_item`, `struct_item`, `enum_item`, `trait_item` |\n| Python | `function_definition`, `class_definition` |\n| JavaScript/TypeScript | `function_declaration`, `class_declaration`, `method_definition` |\n| Go | `function_declaration`, `type_declaration` |\n\n资料来源：[crates/mnem-ingest/src/chunk.rs:1-50]()\n\n### Auto-Chunker Defaults\n\nThe `auto_chunker` function maps source kinds to default chunking strategies:\n\n```rust\npub fn auto_chunker(kind: SourceKind, heuristics: ChunkerAuto) -> ChunkerKind {\n    match kind {\n        SourceKind::Markdown => ChunkerKind::Paragraph,\n        SourceKind::Text => ChunkerKind::SentenceRecursive { \n            max_tokens: 256, \n            overlap: 32 \n        },\n        SourceKind::Pdf => ChunkerKind::SentenceRecursive { \n            max_tokens: 512, \n            overlap: 64 \n        },\n        SourceKind::Conversation => ChunkerKind::Session { \n            max_messages: 10 \n        },\n        SourceKind::Code(_) => ChunkerKind::Structural,\n    }\n}\n```\n\n资料来源：[crates/mnem-ingest/src/chunk.rs:1-60]()\n\n---\n\n## Retrieval System\n\nThe retrieval system provides agent-facing context assembly with token-budget packing.\n\n### Node Rendering\n\nNodes are rendered to a compact, deterministic YAML-like format for LLM consumption:\n\n```text\nntype: <ntype>\nid: <uuid>\ncontext: <context_sentence>\nsummary: <summary>\n<prop_key>: <prop_value>\n```\n\nRendering rules:\n- `ntype` and `id` are always present\n- `context` appears before `summary` (per Anthropic's contextual retrieval recipe)\n- `summary` is clipped at 8192 chars by default (configurable via `MNEM_RENDER_SUMMARY_CAP_CHARS`)\n- Scalar props (`String`, `Integer`, `Float`, `Bool`) are emitted in BTreeMap order\n- Non-scalar props (`Link`, `Map`, `List`, `Bytes`, `Null`) are skipped\n\n资料来源：[crates/mnem-core/src/retrieve/mod.rs:1-50]()\n\n### Retrieval Pipeline\n\n```mermaid\ngraph TD\n    A[Query] --> B[Filters]\n    B --> C[Vector Ranking]\n    C --> D[Sparse Ranking]\n    D --> E[Token Budget Packing]\n    E --> F[Rendered Context]\n```\n\nThe `Retriever` composes:\n1. **Filters** - Property-based pre-filtering\n2. **Vector Index** - Dense embedding similarity search\n3. **Sparse Ranking** - BM25-style keyword matching\n4. **Token Budget Packing** - Context window optimization\n\n---\n\n## Module Organization\n\n### mnem-core\n\nCore library providing:\n\n| Module | Exports |\n|--------|---------|\n| `id` | `Cid`, `NodeId`, `ChangeId`, `OperationId`, `Link<T>` |\n| `codec` | DAG-CBOR encode/decode, DAG-JSON debug export |\n| `objects` | `Node`, `Edge`, `Commit`, `Operation`, `View` |\n| `prolly` | Prolly tree algorithms, chunks, builder |\n| `store` | `Blockstore`, `OpHeadsStore` traits |\n| `repo` | `ReadonlyRepo`, `Transaction` facade |\n| `index` | `Query`, `BruteForceVectorIndex` |\n| `retrieve` | `Retriever` for agent-facing context |\n| `sign` | Ed25519 signing, revocation-list verification |\n\n### mnem-ingest\n\nContent ingestion library with optional LLM features:\n\n| Module | Description |\n|--------|-------------|\n| `chunk` | Text chunking strategies |\n| `code` | Tree-sitter based code parsing |\n| `conversation` | Chat log processing |\n| `extract` | Rule-based entity extraction |\n| `extract_keybert` | Statistical NER (feature-gated) |\n| `extract_llm` | LLM-based extraction (feature-gated) |\n| `md` | Markdown parsing |\n| `pdf` | PDF processing |\n| `sidecar` | External parser integration |\n\n资料来源：[crates/mnem-ingest/src/lib.rs:1-50]()\n\n---\n\n## Configuration\n\n### LLM Providers\n\nmnem supports multiple LLM providers for extraction and synthesis:\n\n| Provider | Config Keys |\n|----------|-------------|\n| OpenAI | `llm.provider=openai`, `llm.model`, `llm.api_key_env`, `llm.base_url` |\n| Ollama | `llm.provider=ollama`, `llm.model`, `llm.base_url` (defaults to `http://localhost:11434`) |\n\n### Retrieval Configuration\n\n| Key | Description | Default |\n|-----|-------------|---------|\n| `retrieve.limit` | Max results to return | - |\n| `retrieve.budget` | Token budget for context | - |\n| `retrieve.vector_cap` | Max vectors per query | - |\n| `retrieve.graph_expand` | Neighbor expansion count | - |\n| `retrieve.graph_depth` | Traversal depth | - |\n| `retrieve.rerank_top_k` | Top-k for reranking | - |\n| `retrieve.hyde_max_tokens` | HyDE document tokens | - |\n\n### Rerank Providers\n\nSupported rerankers: `cohere`, `voyage`, `jina`\n\n### NER Providers\n\n| Provider | Description |\n|----------|-------------|\n| `rule` | Capitalized-phrase heuristic (default) |\n| `none` | Disable entity extraction |\n\n---\n\n## Crate Dependencies\n\n```mermaid\ngraph TD\n    CLI[mnem-cli] --> CORE[mnem-core]\n    CLI --> INGEST[mnem-ingest]\n    HTTP[mnem-http] --> CORE\n    HTTP --> INGEST\n    INGEST --> CORE\n```\n\nAll crates maintain `#![forbid(unsafe_code)]` - no unsafe code permitted in the repository.\n\n资料来源：[crates/mnem-core/src/lib.rs:30-45]()\n\n---\n\n## Summary\n\nThe mnem system architecture implements a layered, modular design:\n\n1. **Storage Layer** - Content-addressed immutable storage with CRDT semantics\n2. **Data Model** - Nodes, edges, commits with phantom-typed links for type safety\n3. **Ingest Layer** - Pluggable parsers and chunkers for diverse source types\n4. **Retrieval Layer** - Hybrid vector/sparse search with token-budget aware packing\n5. **Interface Layer** - CLI and HTTP APIs for integration with agents and tools\n\nThe architecture prioritizes determinism, type safety, and modularity, making it suitable for building reliable agentic knowledge systems.\n\n---\n\n<a id='page-core-components'></a>\n\n## Core Components\n\n### 相关页面\n\n相关主题：[System Architecture](#page-architecture), [Hybrid Retrieval System](#page-hybrid-retrieval)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [crates/mnem-core/src/objects/commit.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-core/src/objects/commit.rs)\n- [crates/mnem-core/src/objects/node.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-core/src/objects/node.rs)\n- [crates/mnem-core/src/objects/edge.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-core/src/objects/edge.rs)\n- [crates/mnem-core/src/repo/mod.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-core/src/repo/mod.rs)\n- [crates/mnem-core/src/repo/transaction.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-core/src/repo/transaction.rs)\n- [crates/mnem-core/src/repo/merge.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-core/src/repo/merge.rs)\n- [crates/mnem-core/src/prolly/tree.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-core/src/prolly/tree.rs)\n</details>\n\n# Core Components\n\nThe mnem system is built around a set of core components that work together to provide a versioned, graph-based knowledge management system. The core is implemented entirely in Rust with `#![forbid(unsafe_code)]`, ensuring memory safety throughout the codebase. Every object type preserves byte-exact canonical-encoding round-trip properties (`decode(encode(x)) == x` and `encode(decode(b)) == b`). 资料来源：[crates/mnem-core/src/lib.rs]()\n\n## System Architecture Overview\n\nmnem implements a content-addressed graph database with prolly trees for efficient storage and retrieval. The architecture separates concerns between data structures (`objects`), storage (`store`), repository management (`repo`), and retrieval (`retrieve`).\n\n```mermaid\ngraph TD\n    subgraph \"mnem-core\"\n        OBJ[objects: Node, Edge, Commit, View]\n        PRO[prolly: TreeChunk, Builder, Cursor]\n        STORE[store: Blockstore, OpHeadsStore]\n        REPO[repo: ReadonlyRepo, Transaction]\n        IDX[index: Query, BruteForceVectorIndex]\n        RET[retrieve: Retriever]\n        CODEC[codec: DAG-CBOR, DAG-JSON]\n    end\n    \n    subgraph \"mnem-ingest\"\n        ING[Ingester Pipeline]\n        CHUNK[Chunking Strategies]\n        PARSE[Parsers: MD, PDF, Code, JSON]\n    end\n    \n    subgraph \"External Interfaces\"\n        CLI[mnem-cli]\n        HTTP[mnem-http]\n        MCP[MCP Server]\n    end\n    \n    ING -->|adds nodes/edges| REPO\n    REPO -->|reads/writes| STORE\n    RET -->|queries| IDX\n    IDX -->|indexes| OBJ\n    CODEC -->|encodes/decodes| OBJ\n    CLI --> REPO\n    HTTP --> REPO\n```\n\n## Data Objects\n\nThe fundamental building blocks of the mnem knowledge graph are the core object types defined in `crates/mnem-core/src/objects/`. Each object is serializable via DAG-CBOR for canonical encoding. 资料来源：[crates/mnem-core/src/lib.rs]()\n\n### Node\n\nThe `Node` is the primary unit of knowledge storage. It represents a single fact, entity, or chunk of content within the graph.\n\n```rust\n// Simplified structure from crates/mnem-core/src/objects/node.rs\npub struct Node {\n    pub id: NodeId,                                    // Unique identifier\n    pub ntype: String,                                 // Node type label (e.g., \"Fact\", \"Doc\")\n    pub summary: Option<String>,                       // LLM-facing retrieval text\n    pub props: BTreeMap<String, Ipld>,                 // Property map\n    pub content: Option<Bytes>,                        // Optional opaque payload\n    pub context_sentence: Option<String>,              // Positional chunk prefix\n    pub ext: Option<BTreeMap<String, Ipld>>,           // Forward-compat extension map\n}\n```\n\n| Field | Type | Description |\n|-------|------|-------------|\n| `id` | `NodeId` | Unique content-addressed identifier |\n| `ntype` | `String` | Free-form type label for the node |\n| `summary` | `Option<String>` | Text summary for LLM retrieval under token budget |\n| `props` | `BTreeMap<String, Ipld>` | Structured metadata with any DAG-CBOR value |\n| `content` | `Option<Bytes>` | Opaque payload (document body, file data) |\n| `context_sentence` | `Option<String>` | LLM-generated placement cue per Anthropic's contextual retrieval recipe |\n| `ext` | `Option<BTreeMap>` | Forward-compat extension map preserving unknown fields |\n\nThe `summary` field is designed for LLM consumption—the field agents read when assembling context under a token budget. It is distinct from `props` (structured) and `content` (opaque payload). 资料来源：[crates/mnem-core/src/objects/node.rs]()\n\nThe `context_sentence` implements Anthropic's 2024 Contextual Retrieval paper approach, which reports -49% to -67% retrieval-failure reduction when present. mnem stores it on the node so the render path can surface it back to the agent for faithful source attribution. 资料来源：[crates/mnem-core/src/objects/node.rs]()\n\n### Edge\n\nEdges connect nodes and represent relationships between entities.\n\n```rust\n// From crates/mnem-core/src/objects/edge.rs\npub struct Edge {\n    pub src: NodeId,           // Source node ID\n    pub rel: String,           // Relation label (e.g., \"works_at\", \"extracted_from\")\n    pub dst: NodeId,           // Destination node ID\n    pub props: BTreeMap<String, Ipld>,  // Optional edge properties\n}\n```\n\nEdges are used to create graph relationships like `works_at`, `lives_in`, `traveling_with`, `has_preference`, and `extracted_from`. The mnem-cli integration guidelines recommend using the compound `mnem_commit_relation` tool when both endpoints are entities—it resolve-or-creates both nodes and adds the edge in one call. 资料来源：[crates/mnem-cli/src/integrate.rs]()\n\n### Commit\n\nThe `Commit` object represents a point-in-time snapshot of the repository state.\n\n```rust\n// From crates/mnem-core/src/objects/commit.rs\npub struct Commit {\n    pub message: String,           // Commit message\n    pub author: Author,            // Author information\n    pub timestamp: Timestamp,       // Commit timestamp\n    pub root: NodeId,              // Root of the node tree\n    pub ops: Vec<Operation>,        // Operations applied in this commit\n}\n```\n\n### View\n\nThe `View` contains repository metadata including branch references and commit heads.\n\n```rust\n// Referenced in crates/mnem-http/src/handlers.rs\npub struct View {\n    pub heads: Vec<Cid>,           // Current head commit CIDs\n    pub refs: BTreeMap<String, RefTarget>,  // Named references\n}\n```\n\nThe View exposes branch information via the HTTP API with the schema `mnem.v1.branches`. 资料来源：[crates/mnem-http/src/handlers.rs]()\n\n### Operation\n\nOperations represent individual changes applied to the repository. They are collected within commits to provide a complete audit trail.\n\n## Repository Layer\n\nThe repository layer provides the main interface for interacting with the knowledge graph. It is defined across several modules in `crates/mnem-core/src/repo/`. 资料来源：[crates/mnem-core/src/repo/mod.rs]()\n\n### ReadonlyRepo\n\n`ReadonlyRepo` provides a read-only view into the repository state.\n\n```rust\n// Simplified from crates/mnem-core/src/repo/mod.rs\npub trait ReadonlyRepo {\n    fn view(&self) -> &View;\n    fn blockstore(&self) -> &dyn Blockstore;\n}\n```\n\n### Transaction\n\n`Transaction` enables write operations to the repository. All changes are staged until explicitly committed.\n\n```rust\n// From crates/mnem-core/src/repo/transaction.rs\npub struct Transaction {\n    // Internal state managing pending operations\n}\n\nimpl Transaction {\n    pub fn add_node(&mut self, node: Node) -> Result<NodeId, Error>;\n    pub fn add_edge(&mut self, edge: Edge) -> Result<EdgeId, Error>;\n    pub fn commit(self, author: Author, message: String) -> Result<ReadonlyRepo, Error>;\n}\n```\n\nThe `ingest` method on the Ingester pipeline uses Transaction to add nodes and edges:\n\n> Parse, chunk, extract, and write into `tx`. Does **not** commit. `bytes` is the raw source payload; `kind` says how to parse it. Returns an `IngestResult` with counts and elapsed time. The `commit_cid` field is left `None` - callers who want a CID should call `tx.commit(...)` afterwards and stash the returned `ReadonlyRepo`'s head commit CID. 资料来源：[crates/mnem-ingest/src/pipeline.rs]()\n\n### Merge and Conflict Detection\n\nThe merge system handles combining divergent repository states.\n\n```rust\n// From crates/mnem-core/src/repo/merge.rs\npub fn detect_conflicts(\n    repo: &ReadonlyRepo,\n    left: Cid,\n    right: Cid,\n    lca: Option<Cid>,\n) -> Result<MergeConflicts, Error>;\n```\n\nConflict detection supports an explicit `ConflictPolicy` for customizing merge behavior. The detector loads tombstone sets via the Views attached to each commit's operation. 资料来源：[crates/mnem-core/src/repo/conflict.rs]()\n\n```mermaid\ngraph LR\n    A[Commit A] -->|diverged| B[Common Ancestor]\n    C[Commit B] -->|diverged| B\n    B --> D[Detect Conflicts]\n    D --> E{MergeConflicts?}\n    E -->|Yes| F[Surface conflicts to caller]\n    E -->|No| G[Auto-merge possible]\n```\n\n## Prolly Trees\n\nmnem uses prolly trees (probabilistic trees) for efficient storage and lookup of the node graph. This is implemented in `crates/mnem-core/src/prolly/`. 资料来源：[crates/mnem-core/src/prolly/tree.rs]()\n\n```mermaid\ngraph TD\n    subgraph \"Prolly Tree Structure\"\n        ROOT[Root Node / TreeChunk] --> LEFT[Left Child TreeChunk]\n        ROOT --> RIGHT[Right Child TreeChunk]\n        LEFT --> LL[Leaf TreeChunk]\n        LEFT --> LR[Leaf TreeChunk]\n        RIGHT --> RL[Leaf TreeChunk]\n        RIGHT --> RR[Leaf TreeChunk]\n    end\n    \n    style ROOT fill:#e1f5fe\n    style LL fill:#f3e5f5\n    style LR fill:#f3e5f5\n    style RL fill:#f3e5f5\n    style RR fill:#f3e5f5\n```\n\nThe prolly tree implementation includes:\n\n| Component | Purpose |\n|-----------|---------|\n| `TreeChunk` | Immutable chunk containing sorted entries |\n| `Builder` | Constructs new trees from operations |\n| `Cursor` | Navigates tree structure for lookups |\n| `diff` | Computes differences between trees |\n| `merge` | Merges divergent tree versions |\n\nProlly trees provide logarithmic-time lookups and efficient diffing for collaborative editing scenarios. 资料来源：[crates/mnem-core/src/lib.rs]()\n\n## Storage Layer\n\nThe storage layer abstracts over different backend implementations.\n\n### Blockstore\n\n```rust\n// From crates/mnem-core/src/lib.rs\npub trait Blockstore {\n    fn get(&self, cid: &Cid) -> Result<Option<Vec<u8>>, Error>;\n    fn put(&self, cid: &Cid, data: &[u8]) -> Result<(), Error>;\n}\n```\n\n### OpHeadsStore\n\n```rust\n// From crates/mnem-core/src/lib.rs\npub trait OpHeadsStore {\n    fn get_heads(&self) -> Result<Vec<Cid>, Error>;\n    fn set_heads(&mut self, heads: &[Cid]) -> Result<(), Error>;\n}\n```\n\nThe codebase includes in-memory reference implementations of both traits for testing and development. 资料来源：[crates/mnem-core/src/lib.rs]()\n\n## Index System\n\nSecondary indexes enable efficient querying of the knowledge graph.\n\n### Query\n\nThe primary query interface for searching nodes and edges.\n\n### BruteForceVectorIndex\n\nA vector index implementation for semantic search capabilities. This works in conjunction with the retrieve module to provide dense + sparse retrieval lanes that capture positional and relational context. 资料来源：[crates/mnem-core/src/lib.rs]()\n\n## Retrieval System\n\nThe retrieve module provides the agent-facing interface for context assembly.\n\n```rust\n// From crates/mnem-core/src/retrieve/mod.rs\npub struct Retriever { /* ... */ }\n```\n\nThe retriever composes:\n\n1. **Filters** - Pre-filter nodes by type, properties, or time range\n2. **Vector ranking** - Dense embeddings from the configured embedder\n3. **Sparse ranking** - BM25-style keyword matching\n4. **Token-budget packing** - Assembles context within LLM token limits\n\n### Node Rendering\n\nNodes are rendered to a compact, deterministic YAML-like format suitable for LLM consumption:\n\n```text\nntype: <ntype>\nid: <uuid>\ncontext: <context_sentence>\nsummary: <summary>\n<prop_key>: <prop_value>\n```\n\n- `ntype` and `id` are always present\n- `context` is emitted if `node.context_sentence` is `Some` (sits BEFORE summary per Anthropic's contextual-retrieval recipe)\n- `summary` is emitted if `node.summary` is `Some`, clipped at `DEFAULT_RENDER_SUMMARY_CAP_CHARS` (8192) chars\n- Scalar props (`String`, `Integer`, `Float`, `Bool`) are emitted in BTreeMap order\n- Non-scalar props (`Link`, `Map`, `List`, `Bytes`, `Null`) are skipped\n- Opaque `content` bytes are never rendered 资料来源：[crates/mnem-core/src/retrieve/mod.rs]()\n\n## Identification System\n\nmnem uses phantom-typed identifiers for type safety:\n\n| Type | Description |\n|------|-------------|\n| `NodeId` | Identifies a node in the graph |\n| `EdgeId` | Identifies an edge |\n| `ChangeId` | Identifies a change operation |\n| `OperationId` | Identifies an operation |\n| `Link<T>` | Phantom-typed link to any type |\n\nAll CIDs are content-addressed, ensuring that the same content always produces the same identifier. 资料来源：[crates/mnem-core/src/lib.rs]()\n\n## Codec System\n\nThe codec system provides canonical encoding and decoding:\n\n```rust\n// From crates/mnem-core/src/lib.rs\npub mod codec {\n    pub fn encode<T: Encode>(&self, value: &T) -> Vec<u8>;\n    pub fn decode<T: Decode>(&self, bytes: &[u8]) -> Result<T, Error>;\n}\n```\n\n- **DAG-CBOR** - Primary serialization format with canonical encoding guarantees\n- **DAG-JSON** - Debug export format for human inspection\n\nEvery object type preserves the byte-exact canonical-encoding round-trip property. 资料来源：[crates/mnem-core/src/lib.rs]()\n\n## Signing System\n\nThe `sign` module provides Ed25519 signing and revocation-list verification for trust and integrity:\n\n```rust\n// From crates/mnem-core/src/lib.rs\npub mod sign {\n    // Ed25519 signing operations\n    // Revocation-list verification\n}\n```\n\n## Chunking Integration\n\nWhile chunking is primarily handled by the `mnem-ingest` crate, the core objects are designed to work seamlessly with chunked content:\n\nThe `Chunk` type is used throughout the system:\n\n```rust\n// Referenced from crates/mnem-ingest/src/chunk.rs\npub struct Chunk {\n    pub content: String,\n    pub tokens_estimate: usize,  // Fast whitespace-split estimation\n}\n```\n\nChunks preserve source order: section 0's chunks come before section 1's. Empty sections are skipped silently. 资料来源：[crates/mnem-ingest/src/chunk.rs]()\n\n## Summary\n\nThe mnem core components form a layered architecture:\n\n| Layer | Components | Responsibility |\n|-------|------------|----------------|\n| **Objects** | Node, Edge, Commit, View, Operation | Core data structures |\n| **Storage** | Blockstore, OpHeadsStore | Persistence abstraction |\n| **Trees** | ProllyTree, TreeChunk, Builder | Efficient ordered storage |\n| **Repository** | ReadonlyRepo, Transaction | Access control and mutation |\n| **Index** | Query, VectorIndex | Secondary access paths |\n| **Retrieval** | Retriever | Agent-facing context assembly |\n| **Codec** | DAG-CBOR, DAG-JSON | Canonical serialization |\n| **Crypto** | Ed25519 signing | Integrity and trust |\n\nThis architecture enables mnem to serve as a versioned, collaborative knowledge graph with strong consistency guarantees and efficient retrieval capabilities for LLM integration.\n\n---\n\n<a id='page-hybrid-retrieval'></a>\n\n## Hybrid Retrieval System\n\n### 相关页面\n\n相关主题：[Embedding Providers](#page-embed-providers)\n\n<details>\n<summary>Relevant Source Files</summary>\n\n以下源码文件用于生成本页说明：\n\n- [crates/mnem-core/src/retrieve/mod.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-core/src/retrieve/mod.rs)\n- [crates/mnem-core/src/objects/node.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-core/src/objects/node.rs)\n- [crates/mnem-http/src/handlers.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-http/src/handlers.rs)\n- [crates/mnem-cli/src/config.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-cli/src/config.rs)\n- [crates/mnem-ingest/src/types.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-ingest/src/types.rs)\n- [crates/mnem-ingest/src/chunk.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-ingest/src/chunk.rs)\n- [crates/mnem-core/src/lib.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-core/src/lib.rs)\n</details>\n\n# Hybrid Retrieval System\n\n## Overview\n\nThe Hybrid Retrieval System in mnem is an agent-facing retrieval subsystem that composes multiple ranking strategies—vector (dense), sparse, and graph-based expansion—into a unified token-budgeted context assembly pipeline. It is designed for LLM consumption, enabling autonomous agents to fetch relevant nodes from the repository under strict token budgets.\n\nThe system lives in `crates/mnem-core/src/retrieve/` and is exposed via HTTP API (`crates/mnem-http/src/handlers.rs`) and CLI (`crates/mnem-cli/`).\n\n资料来源：[crates/mnem-core/src/lib.rs:18-23]()\n\n## Architecture\n\n```mermaid\ngraph TD\n    subgraph \"Retrieval Entry Points\"\n        HTTP[HTTP API: POST /v1/retrieve]\n        CLI[CLI: mnem retrieve]\n    end\n    \n    subgraph \"Hybrid Retrieval Core\"\n        RT[Retriever]\n        HF[Hybrid Fuser]\n        VQ[Vector Query]\n        SQ[Sparse Query]\n        GQ[Graph Expansion]\n        TB[Token Budget Packer]\n    end\n    \n    subgraph \"Indexes\"\n        VI[Vector Index]\n        SI[Sparse Index]\n        GI[Graph Index]\n    end\n    \n    HTTP --> RT\n    CLI --> RT\n    RT --> HF\n    HF --> VQ\n    HF --> SQ\n    HF --> GQ\n    VQ --> VI\n    SQ --> SI\n    GQ --> GI\n    HF --> TB --> Output[LLM Context]\n```\n\n## Core Components\n\n### Retriever\n\nThe `Retriever` struct is the main facade for retrieval operations. It orchestrates query planning, index selection, and result fusion.\n\n**Key Responsibilities:**\n- Accept a query string and configuration parameters\n- Dispatch parallel queries to vector, sparse, and graph indexes\n- Fuse ranked results using configurable strategies\n- Pack results into token budgets suitable for LLM context windows\n\n资料来源：[crates/mnem-core/src/retrieve/mod.rs:1-50]()\n\n### Node Rendering\n\nBefore results reach the LLM, nodes are rendered to a compact, deterministic YAML-like text representation:\n\n```text\nntype: <ntype>\nid: <uuid>\ncontext: <context_sentence>\nsummary: <summary>\n<prop_key>: <prop_value>\n...\n```\n\n**Rendering Rules:**\n| Field | Condition | Notes |\n|-------|-----------|-------|\n| `ntype` | Always | Node type identifier |\n| `id` | Always | UUID |\n| `context` | If `node.context_sentence` is `Some` | Position cue, emitted BEFORE summary |\n| `summary` | If `node.summary` is `Some` | Clipped at 8192 chars by default |\n| Scalar props | Always | Strings, integers, floats, booleans in BTreeMap order |\n| Non-scalar props | Skipped | Links, Maps, Lists, Bytes, Null |\n\n资料来源：[crates/mnem-core/src/retrieve/mod.rs:60-95]()\n\n### Context Sentence (Anthropic Contextual Retrieval)\n\nmnem implements Anthropic's 2024 Contextual Retrieval recipe. Each node may carry an optional `context_sentence` field—an LLM-generated one-sentence placement cue.\n\n> \"This paragraph is from Section 3 of a legal contract between Alice and Bob's employer...\"\n\nThe ingest pipeline prepends this to `summary` before embedding so both dense and sparse lanes capture positional and relational context.\n\n资料来源：[crates/mnem-core/src/objects/node.rs:95-115]()\n\n## Retrieval Configuration\n\n### CLI Configuration Keys\n\n| Key | Type | Default | Description |\n|-----|------|---------|-------------|\n| `retrieve.limit` | `usize` | — | Maximum results to return |\n| `retrieve.budget` | `u32` | — | Token budget for result packing |\n| `retrieve.vector_cap` | `usize` | — | Vector index candidate cap |\n| `retrieve.graph_expand` | `usize` | — | Graph neighbor expansion count |\n| `retrieve.graph_depth` | `usize` | — | Graph traversal depth |\n| `retrieve.graph_decay` | `u32` | — | Decay factor for graph scores |\n| `retrieve.rerank_top_k` | `usize` | — | Top-K for re-ranking |\n| `retrieve.hyde_max_tokens` | `usize` | — | Max tokens for HyDE hypothesis |\n| `rerank.model` | `String` | — | Re-ranker model identifier |\n| `rerank.base_url` | `String` | — | Re-ranker service base URL |\n\n资料来源：[crates/mnem-cli/src/config.rs:1-100]()\n\n### HTTP API Parameters\n\nThe `POST /v1/retrieve` endpoint accepts the following JSON body:\n\n| Field | Type | Default | Description |\n|-------|------|---------|-------------|\n| `query` | `String` | Required | Search query |\n| `limit` | `usize` | 20 | Result limit (clamped to `MAX_RETRIEVE_LIMIT`) |\n| `vector_cap` | `usize` | — | Vector candidate cap (clamped to `MAX_VECTOR_CAP`) |\n| `rerank_top_k` | `usize` | — | Re-rank candidate count (clamped to `MAX_RERANK_TOP_K`) |\n| `hyde` | `bool` | false | Enable HyDE extractive summarization |\n| `summarize` | `bool` | false | Enable centroid + MMR summarization |\n| `summarize_k` | `usize` | 3 | Summary sentences count |\n\n**Clamping Constants:**\n- `MAX_RETRIEVE_LIMIT` — Prevents unbounded result sets\n- `MAX_VECTOR_CAP` — Bounds vector search candidates\n- `MAX_RERANK_TOP_K` — Limits re-ranking computation\n\n资料来源：[crates/mnem-http/src/handlers.rs:200-280]()\n\n## Chunking Strategies\n\nThe retrieval system operates on pre-chunked content. The ingest pipeline supports five chunking strategies, selectable per source kind:\n\n| Strategy | Source Kind | Configuration | Behavior |\n|----------|-------------|---------------|----------|\n| `Paragraph` | Markdown | None | Splits on double-newline boundaries |\n| `SentenceRecursive` | Text | `max_tokens`, `overlap` | Sentence-aware token-budgeted packing using Unicode UAX #29 boundaries |\n| `SentenceRecursive` | PDF | `max_tokens=512`, `overlap=64` | Same as above with larger defaults |\n| `Session` | Conversation | `max_messages=10` | Groups messages until role returns to `user` or max reached |\n| `Structural` | Code | None | One chunk per section (function/class body from tree-sitter parser) |\n| `Recursive` | (legacy) | `max_tokens`, `overlap` | Token-budgeted word-window sliding window |\n\n资料来源：[crates/mnem-ingest/src/chunk.rs:1-100]()\n\n### Auto-Chunking\n\nThe `auto_chunker(kind, heuristics)` function selects optimal strategies:\n\n```rust\nmatch kind {\n    SourceKind::Markdown => ChunkerKind::Paragraph,\n    SourceKind::Text => ChunkerKind::SentenceRecursive { max_tokens: 256, overlap: 32 },\n    SourceKind::Pdf => ChunkerKind::SentenceRecursive { max_tokens: 512, overlap: 64 },\n    SourceKind::Conversation => ChunkerKind::Session { max_messages: 10 },\n    SourceKind::Code(_) => ChunkerKind::Structural,\n}\n```\n\n资料来源：[crates/mnem-ingest/src/chunk.rs:40-65]()\n\n## Source Kind Taxonomy\n\n| Kind | Extensions | Parser | Index Type |\n|------|------------|--------|------------|\n| `Markdown` | `.md`, `.markdown` | `parse_markdown` | Hybrid |\n| `Pdf` | `.pdf` | Sidecar (docling/unstructured) | Hybrid |\n| `Conversation` | `.json`, `.jsonl` | Session parser | Session |\n| `Text` | Other/unspecified | Raw text | Hybrid |\n| `Code(Rust)` | `.rs` | Tree-sitter | Structural |\n| `Code(Python)` | `.py`, `.pyi` | Tree-sitter | Structural |\n| `Code(JavaScript)` | `.js`, `.mjs`, `.cjs` | Tree-sitter | Structural |\n| `Code(TypeScript)` | `.ts`, `.tsx`, `.mts`, `.cts` | Tree-sitter | Structural |\n| `Code(Go)` | `.go` | Tree-sitter | Structural |\n| `Code(Java)` | `.java` | Tree-sitter | Structural |\n| `Code(C)` | `.c`, `.h` | Tree-sitter | Structural |\n| `Code(Cpp)` | `.cpp`, `.cc`, `.cxx`, `.hpp` | Tree-sitter | Structural |\n| `Code(Ruby)` | `.rb`, `.gemspec`, `.rake`, `.erb` | Tree-sitter | Structural |\n| `Code(CSharp)` | `.cs`, `.csx` | Tree-sitter | Structural |\n\n资料来源：[crates/mnem-ingest/src/types.rs:1-80]()\n\n## Retrieval Flow\n\n```mermaid\nsequenceDiagram\n    participant Client\n    participant Retriever\n    participant VectorIndex\n    participant SparseIndex\n    participant GraphIndex\n    participant Fuser\n    participant TokenBudgetPacker\n    participant LLM\n\n    Client->>Retriever: query + config\n    Retriever->>VectorIndex: vector_search(query)\n    Retriever->>SparseIndex: sparse_search(query)\n    Retriever->>GraphIndex: graph_expand(seed_nodes)\n    VectorIndex-->>Fuser: ranked_candidates\n    SparseIndex-->>Fuser: ranked_candidates\n    GraphIndex-->>Fuser: ranked_candidates\n    Fuser->>Fuser: reciprocal_rank_fusion\n    Fuser->>TokenBudgetPacker: fused_results\n    alt summarize=true\n        TokenBudgetPacker->>TokenBudgetPacker: centroid_MMR_extraction\n    end\n    TokenBudgetPacker-->>LLM: token_budgeted_context\n```\n\n## HyDE (Hypothetical Document Embeddings)\n\nWhen `hyde=true`, the system generates extractive summaries of top-M candidate nodes before final ranking. This follows the HyDE (Hypothetical Document Embeddings) pattern where:\n\n1. Initial candidates are retrieved\n2. Extractive summarization produces hypotheses\n3. Hypotheses are re-embedded and ranked\n4. Final top-K are packed into the context budget\n\n资料来源：[crates/mnem-http/src/handlers.rs:250-270]()\n\n## Branch Name Validation\n\nThe HTTP API validates branch names before creating commit references during ingest operations:\n\n```\nInvalid characters: space, tab, newline, null, ~, ^, :, ?, *, [, \\, @{, .., //\nInvalid patterns: leading /, trailing /, trailing ., trailing .lock\n```\n\n资料来源：[crates/mnem-http/src/handlers.rs:180-210]()\n\n## Extractor Integration\n\nThe retrieval system works in conjunction with the entity extraction pipeline. Extractors produce entity spans and relation spans that populate the graph index:\n\n| Extractor | Provider | Features |\n|-----------|----------|----------|\n| `RuleExtractor` | Default (NER) | Capitalized phrase heuristic, verb-window regex relations |\n| `KeyBertAdapter` | Statistical | Requires `keybert` feature flag |\n| `LLM` | Ollama | Requires `ollama` feature flag |\n\n资料来源：[crates/mnem-ingest/src/extract.rs:1-100]()\n\n## Configuration Example\n\n```toml\n[retrieve]\nlimit = 20\nbudget = 4096\nvector_cap = 100\ngraph_expand = 5\ngraph_depth = 2\ngraph_decay = 80\nrerank_top_k = 10\nhyde_max_tokens = 256\n\n[rerank]\nmodel = \"cross-encoder/ms-marco-MiniLM-L-6-v2\"\nbase_url = \"http://localhost:8080\"\n\n[ner]\nprovider = \"rule\"  # or \"none\"\n```\n\n资料来源：[crates/mnem-cli/src/config.rs:50-120]()\n\n## See Also\n\n- [Node Data Model](objects/node.md) — Node structure with summary and context_sentence\n- [Chunking Strategies](ingest/chunk.md) — Five chunker implementations\n- [HTTP API Reference](http/api.md) — REST endpoint documentation\n- [Entity Extraction](ingest/extract.md) — NER and relation extraction\n\n---\n\n<a id='page-embed-providers'></a>\n\n## Embedding Providers\n\n### 相关页面\n\n相关主题：[Hybrid Retrieval System](#page-hybrid-retrieval)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [crates/mnem-cli/src/commands/mod.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-cli/src/commands/mod.rs)\n- [crates/mnem-cli/src/config.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-cli/src/config.rs)\n- [crates/mnem-http/src/lib.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-http/src/lib.rs)\n- [crates/mnem-mcp/src/tools/embed.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-mcp/src/tools/embed.rs)\n- [crates/mnem-core/src/objects/node.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-core/src/objects/node.rs)\n- [crates/mnem-embed-providers/src/http.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-embed-providers/src/http.rs)\n</details>\n\n# Embedding Providers\n\nEmbedding Providers is a pluggable subsystem in the mnem monorepo that abstracts the generation of vector embeddings for text content. It lives in the `crates/mnem-embed-providers` crate and is consumed by `mnem-cli`, `mnem-http`, and `mnem-mcp` to support dense vector indexing and semantic retrieval.\n\n## Architecture Overview\n\nThe provider system follows a **strategy pattern** with runtime-configurable backends. Each provider implements the same `Embedder` trait, returning `Vec<f32>` vectors regardless of the underlying implementation (HTTP API, local model, ONNX runtime).\n\n```mermaid\ngraph TD\n    A[\"mnem-cli / mnem-http / mnem-mcp\"] --> B[\"mnem-embed-providers\"]\n    B --> C[\"ProviderConfig\"]\n    C --> D[\"OpenAI Provider\"]\n    C --> E[\"Ollama Provider\"]\n    C --> F[\"ONNX Provider\"]\n    D --> G[\"REST API / OpenAI Compatible\"]\n    E --> H[\"Local Ollama Server\"]\n    F --> I[\"Local ONNX Runtime\"]\n    \n    J[\"config.toml / ENV vars\"] --> B\n```\n\n资料来源：[crates/mnem-cli/src/commands/mod.rs:1-50](https://github.com/Uranid/mnem/blob/main/crates/mnem-cli/src/commands/mod.rs)\n\n## Supported Providers\n\n| Provider | Backend Type | Model Selection | Configuration |\n|----------|-------------|-----------------|---------------|\n| **OpenAI** | Remote REST API | Via `model` field | `base_url`, `api_key`, `timeout_secs` |\n| **Ollama** | Local REST API | Via `model` field | `base_url` (default: `http://localhost:11434`), `timeout_secs` |\n| **ONNX** | Local ONNX Runtime | Bundled `all-MiniLM-L6-v2` | No network required |\n\n资料来源：[crates/mnem-cli/src/config.rs:1-80](https://github.com/Uranid/mnem/blob/main/crates/mnem-cli/src/config.rs)\n\n### OpenAI Provider\n\nSends text to OpenAI's embedding API or any OpenAI-compatible endpoint. Requires:\n- `base_url`: API endpoint (default: `https://api.openai.com/v1`)\n- `api_key`: Authentication token\n- `model`: Embedding model identifier\n\n### Ollama Provider\n\nConnects to a local Ollama server for running open-source embedding models. Default endpoint is `http://localhost:11434`. The provider sets a 120-second timeout by default.\n\n### ONNX Provider\n\nRuns inference entirely offline using the ONNX Runtime with the `all-MiniLM-L6-v2` model. This is the **bundled default** when mnem is compiled with the `bundled-embedder` feature, providing zero-configuration embeddings for single-machine deployments.\n\n资料来源：[crates/mnem-mcp/src/tools/embed.rs:1-60](https://github.com/Uranid/mnem/blob/main/crates/mnem-mcp/src/tools/embed.rs)\n\n## Configuration Resolution\n\nEmbedding providers are configured through a **precedence chain** that varies slightly between consumer applications.\n\n### mnem-cli Precedence\n\n| Priority | Source | Fields |\n|----------|--------|--------|\n| 1 | Environment variables | `MNEM_EMBED_PROVIDER`, `MNEM_EMBED_MODEL`, `MNEM_EMBED_API_KEY_ENV`, `MNEM_EMBED_BASE_URL`, `MNEM_EMBED_DIM` |\n| 2 | `~/.mnem/config.toml` | `[embed]` section |\n| 3 | `<repo>/config.toml` | `[embed]` section |\n| 4 | Bundled ONNX fallback | When compiled with `bundled-embedder` feature |\n\n资料来源：[crates/mnem-cli/src/config.rs:80-120](https://github.com/Uranid/mnem/blob/main/crates/mnem-cli/src/config.rs)\n\n### mnem-http Precedence\n\n| Priority | Source | Behavior |\n|----------|--------|----------|\n| 1 | `POST /v1/embed` request body | Per-request model override |\n| 2 | `<data_dir>/config.toml` | Server-wide `[embed]` section |\n\nThe HTTP server loads embed configuration lazily at startup. A malformed `[embed]` section logs a warning but does not prevent server startup—auto-embed simply remains disabled.\n\n```rust\nfn load_embed_config(data_dir: &Path) -> Option<mnem_embed_providers::ProviderConfig> {\n    #[derive(serde::Deserialize)]\n    struct MiniCfg {\n        embed: Option<mnem_embed_providers::ProviderConfig>,\n    }\n    let path = data_dir.join(\"config.toml\");\n    let s = std::fs::read_to_string(&path).ok()?;\n    match toml::from_str::<MiniCfg>(&s) {\n        Ok(parsed) => parsed.embed,\n        Err(e) => {\n            tracing::warn!(path = %path.display(), error = %e,\n                \"config.toml [embed] parse failed; auto-embed disabled\"\n            );\n            None\n        }\n    }\n}\n```\n\n资料来源：[crates/mnem-http/src/lib.rs:1-50](https://github.com/Uranid/mnem/blob/main/crates/mnem-http/src/lib.rs)\n\n### mnem-mcp Precedence\n\nThe MCP server uses a simplified three-tier chain without the global `~/.mnem/config.toml` lookup (design point: per-repo isolation):\n\n1. `MNEM_EMBED_*` environment variables\n2. `<repo>/config.toml` `[embed]` section\n3. Bundled ONNX fallback (only when `bundled-embedder` feature is compiled)\n\n资料来源：[crates/mnem-mcp/src/tools/embed.rs:20-40](https://github.com/Uranid/mnem/blob/main/crates/mnem-mcp/src/tools/embed.rs)\n\n## ProviderConfig Schema\n\nThe configuration is parsed from TOML into a discriminated union:\n\n```rust\npub enum ProviderConfig {\n    Openai(OpenaiConfig),\n    Ollama(OllamaConfig),\n    Onnx(OnnxConfig),\n}\n```\n\nEach variant carries only the parameters relevant to that provider, keeping the configuration minimal.\n\n## Error Handling\n\nAll embedding operations return `EmbedError`, which is mapped from transport failures into actionable diagnostics:\n\n```mermaid\ngraph LR\n    A[\"ureq::Error\"] --> B{\"EmbedError\"}\n    B --> C[\"RateLimited\"]\n    B --> D[\"BadRequest<br/>status + body\"]\n    B --> E[\"Server<br/>status + body\"]\n    B --> F[\"Network<br/>transport message\"]\n    B --> G[\"Decode<br/>JSON parse failure\"]\n```\n\n资料来源：[crates/mnem-embed-providers/src/http.rs:1-50](https://github.com/Uranid/mnem/blob/main/crates/mnem-embed-providers/src/http.rs)\n\n### Error Display for Users\n\nWhen embedding fails, mnem-cli formats the error into a short, actionable one-liner suitable for `eprintln!`:\n\n| Provider | Common Cause | Suggestion |\n|----------|--------------|------------|\n| OpenAI | Invalid API key | Check `MNEM_EMBED_API_KEY_ENV` |\n| Ollama | Server not running | Verify `ollama serve` is active |\n| ONNX | Missing model file | Ensure `all-MiniLM-L6-v2` is bundled |\n\nThe `format_embed_failure` function accepts a `context` parameter (`\"embedding\"` for writes, `\"query embedding\"` for retrieval) to tailor suggestions.\n\n资料来源：[crates/mnem-cli/src/commands/mod.rs:50-100](https://github.com/Uranid/mnem/blob/main/crates/mnem-cli/src/commands/mod.rs)\n\n## Integration with Node Storage\n\nEmbedding vectors are stored on `Node` objects for use during semantic retrieval:\n\n```rust\npub struct Node {\n    pub id: NodeId,\n    pub label: String,\n    pub summary: Option<String>,           // LLM-facing retrieval text\n    pub content: Option<Bytes>,           // Opaque payload\n    pub context_sentence: Option<String>,  // Anthropic contextual retrieval prefix\n    pub props: BTreeMap<String, Ipld>,    // Structured properties\n}\n```\n\nThe `summary` field is the primary text indexed by the dense embedder. The `context_sentence` (per Anthropic's 2024 Contextual Retrieval paper) is prepended to `summary` before embedding to capture positional context, reducing retrieval failure by 49-67%.\n\n资料来源：[crates/mnem-core/src/objects/node.rs:1-50](https://github.com/Uranid/mnem/blob/main/crates/mnem-core/src/objects/node.rs)\n\n## Bundled Embedder Feature\n\nThe `bundled-embedder` Cargo feature compiles in an ONNX provider with `all-MiniLM-L6-v2`. When enabled:\n\n- `mnem embed` works out-of-the-box without external services\n- The MCP `mnem_retrieve` tool has a tier-3 fallback when no explicit vector provider is configured\n- Ideal for air-gapped environments or local-first workflows\n\nWhen not enabled, missing embedder configuration results in a warning during ingest; nodes are created without vectors, and a recovery path via `mnem reindex` is promoted.\n\n## Summary\n\nEmbedding Providers abstracts vector generation behind a common interface, supporting three backends with distinct deployment profiles:\n\n- **OpenAI**: Cloud-hosted, highest quality, requires API credentials\n- **Ollama**: Self-hosted, flexible model selection, local compute\n- **ONNX**: Offline-capable, bundled model, zero-configuration\n\nConfiguration flows from environment variables through TOML files, with graceful fallback behavior that never prevents core operations from functioning.\n\n---\n\n<a id='page-storage-backend'></a>\n\n## Storage Backend\n\n### 相关页面\n\n相关主题：[System Architecture](#page-architecture)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [crates/mnem-core/src/store/mod.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-core/src/store/mod.rs)\n- [crates/mnem-core/src/store/blockstore.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-core/src/store/blockstore.rs)\n- [crates/mnem-core/src/store/op_heads.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-core/src/store/op_heads.rs)\n- [crates/mnem-backend-redb/src/lib.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-backend-redb/src/lib.rs)\n- [crates/mnem-backend-redb/src/blockstore.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-backend-redb/src/blockstore.rs)\n- [crates/mnem-backend-redb/src/knn_edges_store.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-backend-redb/src/knn_edges_store.rs)\n</details>\n\n# Storage Backend\n\nThe storage backend is a critical subsystem in mnem that provides persistent storage for the content-addressable object graph. It abstracts storage operations behind well-defined traits, enabling pluggable storage implementations while maintaining a consistent API for the core data layer.\n\n## Architecture Overview\n\nThe storage backend follows a trait-based abstraction pattern where `mnem-core` defines the storage interfaces and concrete implementations are provided by backend crates. This separation allows the core logic to remain independent of specific storage technologies.\n\n```mermaid\ngraph TD\n    subgraph \"Application Layer\"\n        CLI[mnem-cli]\n        HTTP[mnem-http]\n    end\n    \n    subgraph \"mnem-core\"\n        Repo[Repository]\n        Transaction[Transaction]\n        Objects[Node / Edge / Commit]\n    end\n    \n    subgraph \"Storage Traits\"\n        Blockstore[BlockStore Trait]\n        OpHeadsStore[OpHeadsStore Trait]\n        KnnEdgesStore[KnnEdgesStore Trait]\n    end\n    \n    subgraph \"Backend Implementations\"\n        RedbBackend[mnem-backend-redb]\n    end\n    \n    CLI --> Repo\n    HTTP --> Repo\n    Repo --> Transaction\n    Transaction --> Blockstore\n    Transaction --> OpHeadsStore\n    Repo --> KnnEdgesStore\n    Blockstore --> RedbBackend\n    OpHeadsStore --> RedbBackend\n    KnnEdgesStore --> RedbBackend\n```\n\n## Core Storage Traits\n\nThe storage layer is built on three fundamental traits that define the contract between the core library and storage implementations.\n\n### BlockStore Trait\n\nThe `BlockStore` trait provides low-level operations for storing and retrieving binary data blocks identified by Content Identifiers (CIDs).\n\n```rust\n// crates/mnem-core/src/store/blockstore.rs\npub trait Blockstore: Send + Sync {\n    fn get(&self, cid: &Cid) -> Result<Option<Vec<u8>>>;\n    fn put(&self, block: &[u8]) -> Result<Cid>;\n    fn put_many<I>(&self, blocks: I) -> Result<Vec<Cid>>\n    where\n        I: IntoIterator<Item = Vec<u8>>,\n        I::IntoIter: Send + Sync;\n}\n```\n\n| Method | Purpose | Return Type |\n|--------|---------|-------------|\n| `get(cid)` | Retrieve a block by its CID | `Result<Option<Vec<u8>>>` |\n| `put(block)` | Store a single block, returning its CID | `Result<Cid>` |\n| `put_many(blocks)` | Batch insert multiple blocks | `Result<Vec<Cid>>` |\n\nThe trait implements the CAR (Content Addressable Archive) storage pattern where data integrity is verified through content hashing. 资料来源：[crates/mnem-core/src/store/blockstore.rs:1-20]()\n\n### OpHeadsStore Trait\n\nThe `OpHeadsStore` trait manages operation heads—references to the latest operations in the operational transform system. It supports both single-head and multi-head scenarios with conflict detection.\n\n```rust\n// crates/mnem-core/src/store/op_heads.rs\npub trait OpHeadsStore: Send + Sync {\n    fn get_heads(&self) -> Result<Vec<Cid>>;\n    fn put_head(&self, op: &Op) -> Result<()>;\n    fn put_heads(&self, ops: &[Op]) -> Result<()>;\n    fn merge_heads(&self, merged: Vec<Cid>) -> Result<()>;\n}\n```\n\n| Method | Purpose |\n|--------|---------|\n| `get_heads()` | Retrieve all current operation head CIDs |\n| `put_head(op)` | Atomically update the single head |\n| `put_heads(ops)` | Set multiple operation heads |\n| `merge_heads(merged)` | Replace heads with merged result after conflict resolution |\n\n资料来源：[crates/mnem-core/src/store/op_heads.rs:1-50]()\n\n### KnnEdgesStore Trait\n\nThe `KnnEdgesStore` trait provides specialized storage for k-nearest-neighbor graph edges, enabling efficient vector similarity searches.\n\n```rust\n// Backend interface for KNN edge storage\npub trait KnnEdgesStore: Send + Sync {\n    fn insert(&self, source_id: NodeId, embedding: &[f32]) -> Result<()>;\n    fn search(&self, query: &[f32], k: usize) -> Result<Vec<(NodeId, f32)>>;\n}\n```\n\n资料来源：[crates/mnem-backend-redb/src/knn_edges_store.rs:1-30]()\n\n## Redb Backend Implementation\n\nThe `mnem-backend-redb` crate provides the reference implementation using the Redb embedded database, a fast, lightweight key-value store written in Rust.\n\n### Module Structure\n\n```\nmnem-backend-redb/\n├── src/\n│   ├── lib.rs           # Main entry point and configuration\n│   ├── blockstore.rs    # BlockStore implementation\n│   └── knn_edges_store.rs # KNN edge storage with HNSW\n```\n\n### Initialization\n\nThe backend initializes by opening or creating a Redb database file:\n\n```rust\n// crates/mnem-backend-redb/src/lib.rs\npub struct Backend {\n    db: redb::Database,\n    path: PathBuf,\n}\n\nimpl Backend {\n    pub fn open(path: &Path) -> Result<Self> {\n        let db = redb::Database::create(path)?;\n        Ok(Self { db, path: path.to_path_buf() })\n    }\n}\n```\n\n资料来源：[crates/mnem-backend-redb/src/lib.rs:1-50]()\n\n### BlockStore Implementation\n\nThe Redb blockstore implementation wraps the database with CAR-compatible semantics:\n\n```rust\n// crates/mnem-backend-redb/src/blockstore.rs\nimpl Blockstore for RedbBlockstore {\n    fn get(&self, cid: &Cid) -> Result<Option<Vec<u8>>> {\n        let key = cid.to_bytes();\n        let guard = self.db.begin()?;\n        let table = guard.open_table(BLOCKS_TABLE)?;\n        Ok(table.get(key)?.map(|v| v.value().as_bytes().to_vec()))\n    }\n    \n    fn put(&self, block: &[u8]) -> Result<Cid> {\n        let hash = multihash::Sha256::digest(block);\n        let cid = Cid::new_v1(DAG_CBOR, hash);\n        // Store with CID bytes as key\n    }\n}\n```\n\n资料来源：[crates/mnem-backend-redb/src/blockstore.rs:1-100]()\n\n### Storage Format\n\nThe redb backend organizes data into multiple tables:\n\n| Table Name | Key Type | Value Type | Purpose |\n|------------|----------|------------|---------|\n| `blocks` | CID bytes | Raw block data | Content-addressed storage |\n| `op_heads` | Fixed key | CID bytes | Operation head references |\n| `knn_edges` | NodeId | Serialized edges | Vector similarity graph |\n\n## Transaction Model\n\nmnem implements a transactional write model through the `Transaction` type in the repository layer. Transactions provide ACID-like semantics for graph modifications.\n\n```mermaid\ngraph LR\n    A[Begin Transaction] --> B[Add Nodes]\n    B --> C[Add Edges]\n    C --> D[Commit]\n    D --> E[Update OpHeads]\n    E --> F[Success]\n    \n    D --> G[Abort]\n    G --> H[Rollback]\n```\n\n### Write Operations\n\nTransactions support atomic batch operations:\n\n```rust\n// Conceptual transaction interface\nimpl Transaction {\n    pub fn add_node(&mut self, node: Node) -> Result<NodeId>;\n    pub fn add_edge(&mut self, source: NodeId, target: NodeId, relation: &str) -> Result<()>;\n    pub fn commit(self) -> Result<Commit>;\n}\n```\n\n资料来源：[crates/mnem-core/src/repo/mod.rs:1-100]()\n\n### Commit Structure\n\nEach commit creates an immutable snapshot of the repository state:\n\n```rust\n// crates/mnem-core/src/objects/commit.rs\npub struct Commit {\n    pub operation: Operation,\n    pub parent: Option<Cid>,\n    pub author: Author,\n    pub message: String,\n    pub timestamp: DateTime<Utc>,\n}\n```\n\n资料来源：[crates/mnem-core/src/objects/commit.rs:1-50]()\n\n## Data Persistence Flow\n\n```mermaid\nsequenceDiagram\n    participant App as Application\n    participant Tx as Transaction\n    participant BS as BlockStore\n    participant OHS as OpHeadsStore\n    participant Redb as Redb DB\n\n    App->>Tx: begin()\n    Tx->>Tx: add_node(node)\n    Tx->>BS: put(block)\n    BS->>Redb: write(cid, data)\n    Tx->>Tx: add_edge(src, dst)\n    Tx->>BS: put(block)\n    Tx->>Tx: commit()\n    Tx->>BS: put(commit_block)\n    Tx->>OHS: put_head(new_op)\n    OHS->>Redb: update_heads()\n    Redb-->>Tx: success\n    Tx-->>App: commit_cid\n```\n\n## Configuration\n\nStorage backend behavior is configured through the `Config` structure:\n\n```rust\n// crates/mnem-cli/src/config.rs\npub struct Config {\n    pub store: Option<StoreConfig>,\n    pub data_dir: PathBuf,\n    // ...\n}\n\npub struct StoreConfig {\n    pub path: PathBuf,\n    pub flush_interval_ms: Option<u64>,\n}\n```\n\n资料来源：[crates/mnem-cli/src/config.rs:1-100]()\n\n### Configuration Options\n\n| Option | Type | Default | Description |\n|--------|------|---------|-------------|\n| `path` | `PathBuf` | `data/` | Base directory for storage files |\n| `flush_interval_ms` | `u64` | `1000` | Periodic flush interval in milliseconds |\n\n## Object Types and Serialization\n\n### Node Storage\n\nNodes are serialized using DAG-CBOR and stored as blocks:\n\n```rust\n// crates/mnem-core/src/objects/node.rs\npub struct Node {\n    pub id: NodeId,\n    pub ntype: NodeType,\n    pub summary: Option<String>,\n    pub props: BTreeMap<String, Ipld>,\n    pub content: Option<Bytes>,\n    pub context_sentence: Option<String>,\n}\n```\n\n资料来源：[crates/mnem-core/src/objects/node.rs:1-100]()\n\n### Edge Storage\n\nEdges link nodes with labeled relationships:\n\n```rust\n// crates/mnem-core/src/objects/edge.rs\npub struct Edge {\n    pub source: NodeId,\n    pub target: NodeId,\n    pub relation: String,\n    pub confidence: Option<f32>,\n}\n```\n\n资料来源：[crates/mnem-core/src/objects/edge.rs:1-50]()\n\n## Error Handling\n\nStorage operations return the `Error` type defined in the core crate:\n\n```rust\n// crates/mnem-core/src/store/mod.rs\npub enum Error {\n    #[error(\"block not found: {0}\")]\n    BlockNotFound(Cid),\n    #[error(\"serialization failed: {0}\")]\n    SerializationFailed(String),\n    #[error(\"database error: {0}\")]\n    DatabaseError(String),\n}\n```\n\n资料来源：[crates/mnem-core/src/store/mod.rs:1-100]()\n\n### Error Recovery\n\n| Error Type | Recovery Strategy |\n|------------|-------------------|\n| `BlockNotFound` | Indicates data corruption; repository repair required |\n| `SerializationFailed` | Check data integrity; may indicate schema mismatch |\n| `DatabaseError` | Retry operation; check disk space and permissions |\n\n## Indexes and Secondary Storage\n\n### Vector Index\n\nThe KNN edges store maintains a vector index for similarity search operations:\n\n```mermaid\ngraph TD\n    A[Query Vector] --> B[KnnEdgesStore]\n    B --> C[HNSW Index]\n    C --> D[Approximate KNN Search]\n    D --> E[Top-K Results]\n```\n\nThe implementation uses HNSW (Hierarchical Navigable Small World) algorithm for efficient approximate nearest neighbor search. 资料来源：[crates/mnem-backend-redb/src/knn_edges_store.rs:1-100]()\n\n### Query Interface\n\nThe retrieve module composes vector search with graph traversal:\n\n```rust\n// crates/mnem-core/src/retrieve/mod.rs\npub struct Retriever {\n    blockstore: Arc<dyn Blockstore>,\n    knn_edges: Arc<dyn KnnEdgesStore>,\n    // ...\n}\n```\n\n## Related Documentation\n\n- [Repository Model](../core-concepts/repository-model) - Transaction and commit management\n- [Object Schema](../core-concepts/object-schema) - Node, Edge, and Commit structures\n- [Retrieval System](../features/retrieval) - Query and retrieval workflows\n- [Configuration Guide](../getting-started/configuration) - Storage configuration options\n\n---\n\n<a id='page-ingestion'></a>\n\n## Ingestion Pipeline\n\n### 相关页面\n\n相关主题：[Core Components](#page-core-components)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [crates/mnem-ingest/src/lib.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-ingest/src/lib.rs)\n- [crates/mnem-ingest/src/pipeline.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-ingest/src/pipeline.rs)\n- [crates/mnem-ingest/src/chunk.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-ingest/src/chunk.rs)\n- [crates/mnem-ingest/src/pdf.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-ingest/src/pdf.rs)\n- [crates/mnem-ingest/src/types.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-ingest/src/types.rs)\n- [crates/mnem-cli/src/commands/ingest.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-cli/src/commands/ingest.rs)\n- [crates/mnem-core/src/objects/node.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-core/src/objects/node.rs)\n</details>\n\n# Ingestion Pipeline\n\nThe Ingestion Pipeline is the core system in mnem responsible for transforming external source documents (Markdown, PDFs, code files, conversations) into structured graph nodes within the repository. It handles parsing, chunking, entity extraction, and writing to the graph transaction—all without committing, allowing callers to control transaction boundaries.\n\n## Overview\n\nThe pipeline orchestrates a multi-stage process:\n\n1. **Detection** — Determine source kind from file extension or explicit configuration\n2. **Parsing** — Convert raw bytes into a list of `Section` objects\n3. **Chunking** — Split sections into semantically meaningful `Chunk` objects\n4. **Extraction** — Optionally identify entities and relations via rule-based or LLM providers\n5. **Writing** — Add nodes and edges to a borrowed `Transaction`\n\n```mermaid\ngraph TD\n    A[Raw Bytes] --> B[SourceKind Detection]\n    B --> C[Parser Selection]\n    C --> D[Parse to Sections]\n    D --> E[Chunker Strategy]\n    E --> F[Extract Entities & Relations]\n    F --> G[Transaction Write]\n    G --> H[IngestResult]\n    \n    C -->|md| C1[Markdown Parser]\n    C -->|pdf| C2[PDF Parser]\n    C -->|code| C3[Tree-sitter Parser]\n    C -->|json/jsonl| C4[Conversation Parser]\n    C -->|text| C5[Plain Text]\n```\n\n## Source Kind Detection\n\nThe `Ingester` automatically detects the source kind based on file extension. This determines both the parser and the default chunking strategy.\n\n| Extension(s) | SourceKind | Default Chunker |\n|--------------|------------|-----------------|\n| `.md`, `.markdown` | `Markdown` | `Paragraph` |\n| `.txt` | `Text` | `SentenceRecursive` (256 tokens, 32 overlap) |\n| `.pdf` | `Pdf` | `SentenceRecursive` (512 tokens, 64 overlap) |\n| `.json`, `.jsonl` | `Conversation` | `Session` (max 10 messages) |\n| `.rs` | `Code(Rust)` | `Structural` |\n| `.py`, `.pyi` | `Code(Python)` | `Structural` |\n| `.js`, `.mjs`, `.cjs` | `Code(JavaScript)` | `Structural` |\n| `.ts`, `.tsx`, `.mts`, `.cts` | `Code(TypeScript)` | `Structural` |\n| `.go` | `Code(Go)` | `Structural` |\n| `.java` | `Code(Java)` | `Structural` |\n| `.c`, `.h` | `Code(C)` | `Structural` |\n| `.cpp`, `.cc`, `.cxx`, `.hpp` | `Code(Cpp)` | `Structural` |\n| `.rb`, `.gemspec`, `.rake` | `Code(Ruby)` | `Structural` |\n| `.cs`, `.csx` | `Code(CSharp)` | `Structural` |\n| Unknown/ext none | `Text` | `SentenceRecursive` |\n\n资料来源：[pipeline.rs:source_kind_from_ext()]() [types.rs:SourceKind]() [types.rs:CodeLanguage::from_extension()]()\n\n## Supported File Formats\n\n### Markdown (`.md`, `.markdown`)\n\nParsed using CommonMark + GitHub Flavored Markdown (GFM) support. The parser extracts headings with depth information, creating section boundaries that respect document structure. Each heading becomes a section boundary.\n\n### PDF (`.pdf`)\n\nPure-Rust text-layer extraction using `pdf-extract`. One section per page is created, with heading set to `\"Page {n}\"` at depth 1. PDFs with fewer than 100 text characters per page are flagged as potentially scanned. Malformed PDFs return `Error::ParseFailed`. 资料来源：[pdf.rs:MIN_TEXT_PER_PAGE]() [pdf.rs:parse_pdf()]()\n\n### Code Files\n\nParsed using tree-sitter for supported languages (Rust, Python, JavaScript, TypeScript, Go, Java, C, Cpp, Ruby, CSharp). The parser extracts function and class bodies as sections, preserving structural boundaries. 资料来源：[code.rs]()\n\n### Conversations (`.json`, `.jsonl`)\n\nSupports chat exports from ChatGPT, Claude, and generic conversation formats. Messages are extracted with role (user/assistant/system), content, and timestamps when available. 资料来源：[conversation.rs]()\n\n### Plain Text (`.txt` and others)\n\nFalls back to plain text parsing for unknown extensions, including files without extensions like `README`.\n\n## Chunker Strategies\n\nThe `ChunkerKind` enum defines five chunking strategies. Callers can override the auto-selected strategy via CLI or API.\n\n### Strategy Selection\n\n```mermaid\ngraph TD\n    A[SourceKind] --> B[auto_chunker]\n    B -->|Markdown| C[Paragraph]\n    B -->|Text| D[SentenceRecursive<br/>256 tokens, 32 overlap]\n    B -->|Pdf| E[SentenceRecursive<br/>512 tokens, 64 overlap]\n    B -->|Conversation| F[Session<br/>max 10 messages]\n    B -->|Code| G[Structural]\n```\n\n### Paragraph Chunker\n\nSplits each section's body on double-newline boundaries. Fast and deterministic, ideal for Markdown where authoring structure already matches desired chunk boundaries. 资料来源：[chunk.rs:ChunkerKind::Paragraph]()\n\n### Recursive Chunker\n\nToken-budgeted word-window sliding window with configurable overlap. Kept for backwards compatibility. 资料来源：[chunk.rs:ChunkerKind::Recursive]()\n\n### SentenceRecursive Chunker\n\nSentence-aware token-budgeted packing using Unicode sentence boundaries (UAX #29). Preferred for prose:\n\n- Chunks never cut mid-sentence\n- Overlap measured at sentence granularity\n- Average chunk size is more uniform\n\nDefault for `Text` (256 tokens, 32 overlap) and `Pdf` (512 tokens, 64 overlap) source kinds. 资料来源：[chunk.rs:ChunkerKind::SentenceRecursive]() [chunk.rs:auto_chunker()]()\n\n### Session Chunker\n\nGroups contiguous conversation messages into session chunks. Boundaries fire on:\n- Role returning to `user`, OR\n- Reaching `max_messages` (default: 10)\n\nPreserves turn ordering. Default for `Conversation` source kind. 资料来源：[chunk.rs:ChunkerKind::Session]()\n\n### Structural Chunker\n\nOne chunk per section. Used for code sources where each section is already a function or class body extracted by the tree-sitter parser. 资料来源：[chunk.rs:ChunkerKind::Structural]()\n\n## Entity Extraction\n\nThe pipeline optionally extracts entities and relations using configured extractors.\n\n### RuleExtractor (Default)\n\nDelegates entity detection to a `NerProvider` (default: capitalized-phrase heuristic) and proximity-based relation detection via verb-window regex. Supported relation patterns include: `joined`, `founded`, `acquired`, `owns`, `hired`, etc. 资料来源：[extract.rs:RuleExtractor]() [extract.rs:verb_window]()\n\n### Optional: OllamaExtractor\n\nSchema-constrained NER via a local Ollama server (gated behind `ollama` feature). Hallucinated spans are verified against section text and rejected. Failures degrade gracefully to empty results, keeping the rule-based baseline as the load-bearing path. 资料来源：[lib.rs:extract_llm]()\n\n### Optional: KeyBertAdapter\n\nStatistical entity extraction adapter driven by the server's configured embedder (gated behind `keybert` feature). 资料来源：[lib.rs:extract_keybert]()\n\n## Pipeline API\n\n### Ingester Configuration\n\n```rust\npub struct IngestConfig {\n    pub chunker: ChunkerKind,\n    pub extractor: ExtractorKind,\n    pub ner_provider: Option<NerProviderKind>,\n    pub include_text: bool,\n}\n```\n\n资料来源：[pipeline.rs:IngestConfig]() [lib.rs:IngestConfig]()\n\n### Core Method\n\n```rust\npub fn ingest(\n    &self,\n    tx: &mut Transaction,\n    bytes: &[u8],\n    kind: SourceKind,\n) -> Result<IngestResult, Error>\n```\n\n**Returns** an `IngestResult` with counts and elapsed time. The `commit_cid` field is left `None`—callers who want a CID should call `tx.commit(...)` afterwards.\n\n**Errors:**\n- `Error::ParseFailed` — parser rejects the input\n- `Error::UnsupportedSource` — source kind not covered\n- `Error::Commit` — upstream codec/blockstore failures from `Transaction::add_node`/`add_edge`\n\n资料来源：[pipeline.rs:Ingester::ingest()]()\n\n## CLI Integration\n\nThe `mnem ingest` command provides CLI access to the pipeline.\n\n```bash\nmnem ingest notes.md\nmnem ingest --text \"The quick brown fox\"\nmnem ingest --chunker recursive --max-tokens 1024 book.pdf\nmnem ingest --recursive docs/\n```\n\n### CLI Options\n\n| Flag | Description | Default |\n|------|-------------|---------|\n| `--chunker` | Strategy selection | `auto` |\n| `--max-tokens` | Target tokens per chunk | 512 |\n| `--overlap` | Overlap tokens (recursive) | 32 |\n| `--recursive` | Walk directory trees | false |\n| `--ntype` | Root Doc node label | `Doc` |\n| `-m`, `--message` | Commit message | Auto-generated |\n\n资料来源：[commands/ingest.rs:Args]()\n\n## Output Nodes\n\nThe pipeline writes three node types to the graph:\n\n1. **Doc node** — Root node representing the ingested document\n2. **Chunk nodes** — Smaller content pieces with `summary`, `content`, `context_sentence`, and `props` fields\n3. **Entity nodes** — Extracted entities with span information\n4. **Relation edges** — Connections between entities based on relation extraction\n\n### Contextual Retrieval\n\nEach chunk optionally stores a `context_sentence`—an LLM-generated one-sentence placement cue (e.g., \"This paragraph is from Section 3 of a legal contract...\"). This is prepended to the summary before embedding, following Anthropic's 2024 Contextual Retrieval recipe, which reports -49% to -67% retrieval-failure reduction. 资料来源：[node.rs:Node.context_sentence]()\n\n## Sidecar Support\n\nFor PDFs with poor text-layer extraction, the pipeline supports escalation to external tools:\n\n- **docling** (gated behind `sidecar-docling` feature)\n- **unstructured-ingest** (gated behind `sidecar-unstructured` feature)\n\nSidecars are invoked when built-in PDF extraction quality is insufficient. 资料来源：[lib.rs:sidecar]()\n\n## Token Estimation\n\nToken counts are estimated via whitespace split (`tokens_estimate` field on `Chunk`). This is intentionally fast and deterministic. Cl100k accuracy is a documented future improvement. 资料来源：[chunk.rs:token estimation comment]()\n\n## Error Handling\n\n| Error Type | Cause | Recovery |\n|------------|-------|----------|\n| `ParseFailed` | Malformed input, encryption | Return error, don't create nodes |\n| `UnsupportedSource` | Unknown source kind | Return error |\n| `Commit` | Blockstore failure | Return error |\n| Sidecar errors | Missing binary, CLI failure | Return `Error::Sidecar` |\n| LLM extraction failure | Timeout, schema mismatch | Degrade to empty Vec |\n\n资料来源：[pipeline.rs:ingest errors]() [lib.rs:Error types]()\n\n---\n\n<a id='page-cli-commands'></a>\n\n## CLI Commands Reference\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [crates/mnem-cli/src/main.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-cli/src/main.rs)\n- [crates/mnem-cli/src/commands/ingest.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-cli/src/commands/ingest.rs)\n- [crates/mnem-cli/src/commands/tag.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-cli/src/commands/tag.rs)\n- [crates/mnem-cli/src/commands/refs.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-cli/src/commands/refs.rs)\n- [crates/mnem-cli/src/integrate.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-cli/src/integrate.rs)\n- [crates/mnem-cli/src/config.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-cli/src/config.rs)\n</details>\n\n# CLI Commands Reference\n\nThis page documents all command-line interface commands available in `mnem-cli`, the primary user-facing tool for interacting with mnem repositories.\n\n## Overview\n\nThe mnem CLI provides a unified interface for managing a local knowledge graph repository. It supports operations including repository initialization, content ingestion, node/edge manipulation, branching, tagging, retrieval, and third-party tool integration.\n\n```mermaid\ngraph TD\n    A[mnem CLI] --> B[Repository Operations]\n    A --> C[Content Ingestion]\n    A --> D[Graph Manipulation]\n    A --> E[Version Control]\n    A --> F[Retrieval]\n    A --> G[Integration]\n    \n    B --> B1[init]\n    B --> B2[status]\n    \n    C --> C1[ingest]\n    \n    D --> D1[add node]\n    D --> D2[add edge]\n    \n    E --> E1[log]\n    E --> E2[show]\n    E --> E3[refs]\n    E --> E4[tag]\n    E --> E5[branches]\n    \n    F --> F1[retrieve]\n    \n    G --> G1[integrate]\n```\n\n资料来源：[crates/mnem-cli/src/main.rs:40-90](https://github.com/Uranid/mnem/blob/main/crates/mnem-cli/src/main.rs)\n\n## Global Options\n\nThe following options are available for all commands:\n\n| Option | Short | Description |\n|--------|-------|-------------|\n| `--repo <PATH>` | `-R` | Path to the repository directory (`.mnem/`). Defaults to walking up from the current directory, like `git` does. |\n\n资料来源：[crates/mnem-cli/src/main.rs:33-36](https://github.com/Uranid/mnem/blob/main/crates/mnem-cli/src/main.rs)\n\n## Repository Operations\n\n### init\n\nInitializes a new mnem repository.\n\n```bash\nmnem init [OPTIONS]\n```\n\n| Option | Description |\n|--------|-------------|\n| `--path <PATH>` | Custom repository path |\n| `--name <NAME>` | Repository name |\n| `--author <NAME>` | Default author name |\n| `--email <EMAIL>` | Default author email |\n\n### status\n\nPrints current op-head, head commit, ref summary, and label counts.\n\n```bash\nmnem status [OPTIONS]\n```\n\n```bash\n# Examples:\nmnem status                    # current op + head commit + ref count\nmnem -R ~/notes status         # explicit repo path\n```\n\n## Content Ingestion\n\n### ingest\n\nParses external source files into the graph, creating Doc + Chunk + Entity nodes.\n\n```bash\nmnem ingest <PATH> [OPTIONS]\n```\n\n#### Supported Source Types\n\n| Extension | Source Kind | Chunker Strategy |\n|-----------|-------------|------------------|\n| `.md`, `.markdown` | Markdown | Paragraph |\n| `.txt` | Plain Text | SentenceRecursive (256 tokens, 32 overlap) |\n| `.pdf` | PDF | SentenceRecursive (512 tokens, 64 overlap) |\n| `.json`, `.jsonl` | Conversation | Session (10 messages max) |\n| `.rs` | Rust Code | Structural |\n| `.py`, `.pyi` | Python Code | Structural |\n| `.js`, `.mjs`, `.cjs` | JavaScript Code | Structural |\n| `.ts`, `.tsx`, `.mts`, `.cts` | TypeScript Code | Structural |\n| `.go` | Go Code | Structural |\n| `.java` | Java Code | Structural |\n| `.c`, `.h` | C Code | Structural |\n| `.cpp`, `.cc`, `.cxx`, `.hpp`, `.hxx` | C++ Code | Structural |\n| `.rb`, `.gemspec`, `.rake`, `.erb` | Ruby Code | Structural |\n| `.cs`, `.csx` | C# Code | Structural |\n| Other | Text | SentenceRecursive |\n\n资料来源：[crates/mnem-cli/src/commands/ingest.rs:1-50](https://github.com/Uranid/mnem/blob/main/crates/mnem-cli/src/commands/ingest.rs)\n\n#### ingest Options\n\n| Option | Description | Default |\n|--------|-------------|---------|\n| `<PATH>` | File or directory to ingest | Required (unless `--text`) |\n| `--text` | Inline text to ingest | - |\n| `--ntype <LABEL>` | Root Doc node label | `Doc` |\n| `--chunker <STRATEGY>` | Chunker strategy | `auto` |\n| `--max-tokens <N>` | Target tokens per chunk | `512` |\n| `--overlap <N>` | Overlap tokens between chunks | `32` |\n| `--recursive` | Walk directory trees | `false` |\n| `-m`, `--message <MSG>` | Commit message | Auto-generated |\n\n#### Chunker Strategies\n\n| Strategy | Description |\n|----------|-------------|\n| `auto` | Picks strategy based on source kind |\n| `paragraph` | Splits on double-newline (Markdown) |\n| `recursive` | Token-budgeted sliding window |\n| `sentence_recursive` | Sentence-aware token packing |\n| `session` | Groups conversation messages |\n| `structural` | One chunk per section (code) |\n\n资料来源：[crates/mnem-cli/src/commands/ingest.rs:60-80](https://github.com/Uranid/mnem/blob/main/crates/mnem-cli/src/commands/ingest.rs)\n\n## Graph Manipulation\n\n### add node\n\nCreates a new node in the graph.\n\n```bash\nmnem add node [OPTIONS]\n```\n\n| Option | Description |\n|--------|-------------|\n| `-s`, `--summary <TEXT>` | Node summary |\n| `--label <LABEL>` | Node type label |\n| `--prop <KEY=VALUE>` | Property (can be repeated) |\n| `--context-sentence <TEXT>` | Positional context for retrieval |\n\n```bash\n# Examples:\nmnem add node -s \"Alice lives in Berlin\"\nmnem add node --label Person --prop name=Alice --prop city=Berlin -s \"Alice is a climber\"\n```\n\n### add edge\n\nCreates a directed edge between two nodes.\n\n```bash\nmnem add edge [OPTIONS]\n```\n\n| Option | Description |\n|--------|-------------|\n| `--from <UUID>` | Source node UUID |\n| `--to <UUID>` | Target node UUID |\n| `--label <LABEL>` | Edge type label |\n| `--prop <KEY=VALUE>` | Property (can be repeated) |\n\n```bash\n# Examples:\nmnem add edge --from <src-uuid> --to <dst-uuid> --label knows\n```\n\n资料来源：[crates/mnem-cli/src/main.rs:65-80](https://github.com/Uranid/mnem/blob/main/crates/mnem-cli/src/main.rs)\n\n## Version Control\n\n### log\n\nWalks the op-log backwards from the current head.\n\n```bash\nmnem log [OPTIONS]\n```\n\n| Option | Description |\n|--------|-------------|\n| `--limit <N>` | Maximum number of operations to show |\n| `--format <FORMAT>` | Output format (`short`, `full`, `json`) |\n\n### show\n\nShows the full detail of one operation.\n\n```bash\nmnem show <OPERATION_ID>\n```\n\n```bash\n# Examples:\nmnem show 01HZ...\n```\n\n### refs\n\nManages symbolic references to commits.\n\n```bash\nmnem refs <SUBCOMMAND>\n```\n\n#### refs Subcommands\n\n| Subcommand | Description |\n|------------|-------------|\n| `list` | List every ref in the current view |\n| `set <name> <target>` | Set ref to point at a target CID |\n| `delete <name>` | Delete a ref |\n\n```bash\n# Examples:\nmnem refs list\nmnem refs set feature_branch 01HXYZ...\nmnem refs delete old_branch\n```\n\n资料来源：[crates/mnem-cli/src/commands/refs.rs:1-45](https://github.com/Uranid/mnem/blob/main/crates/mnem-cli/src/commands/refs.rs)\n\n### tag\n\nManages named tags that point to commits.\n\n```bash\nmnem tag <SUBCOMMAND>\n```\n\n#### tag Subcommands\n\n| Subcommand | Description |\n|------------|-------------|\n| `list` | List every `refs/tags/<name>` ref with their target CIDs |\n| `create <name>` | Create a new tag |\n| `delete <name>` | Delete a tag |\n\n#### tag create Options\n\n| Option | Description |\n|--------|-------------|\n| `<name>` | Tag name (stored as `refs/tags/<name>`) |\n| `target` | Optional commit CID, ref name, branch shortname, or `HEAD` |\n| `--from <CID>` | Commit CID / ref / branch to point the tag at |\n\n```bash\n# Examples:\nmnem tag list\nmnem tag create v0.9\nmnem tag create release-2024 --from 01HZ...\nmnem tag delete v0.9\n```\n\n资料来源：[crates/mnem-cli/src/commands/tag.rs:1-60](https://github.com/Uranid/mnem/blob/main/crates/mnem-cli/src/commands/tag.rs)\n\n### branches\n\nManages named branches in the repository.\n\n```bash\nmnem branches [OPTIONS]\n```\n\n| Option | Description |\n|--------|-------------|\n| `--list` | List all branches |\n| `--create <NAME>` | Create a new branch |\n| `--delete <NAME>` | Delete a branch |\n| `--switch <NAME>` | Switch to a branch |\n\n#### Branch Output Format\n\n```json\n{\n  \"schema\": \"mnem.v1.branches\",\n  \"branches\": [\n    {\"name\": \"main\", \"head\": \"<commit-cid>\", \"is_current\": true},\n    ...\n  ]\n}\n```\n\n## Retrieval\n\n### retrieve\n\nSearches the graph for nodes matching a query.\n\n```bash\nmnem retrieve [OPTIONS] <QUERY>\n```\n\n| Option | Description | Default |\n|--------|-------------|---------|\n| `--top-k <N>` | Number of results to return | `10` |\n| `--max-tokens <N>` | Maximum tokens in response | `4096` |\n| `--include <FIELD>` | Fields to include (summary, context, props) | All |\n| `--format <FORMAT>` | Output format (`text`, `json`) | `text` |\n\n```bash\n# Examples:\nmnem retrieve \"query\"\nmnem retrieve --top-k 5 --max-tokens 2048 \"machine learning\"\n```\n\n## Integration\n\n### integrate\n\nIntegrates mnem system prompts with third-party AI tools.\n\n```bash\nmnem integrate <HOST> [OPTIONS]\n```\n\n#### Supported Hosts\n\n| Host | System Prompt Path |\n|------|-------------------|\n| `claude-code` | `~/.claude/CLAUDE.md` |\n| `gemini-cli` | `~/.gemini/GEMINI.md` |\n| `cursor` | `~/.cursor/rules/mnem.mdc` |\n| `continue` | `~/.continue/config.json` |\n| `zed` | `~/.config/zed/settings.json` (Linux) or `~/Library/Application Support/Zed/settings.json` (macOS) |\n\n#### integrate Options\n\n| Option | Description |\n|--------|-------------|\n| `--install` | Install system prompt to host |\n| `--uninstall` | Remove system prompt from host |\n| `--status` | Show integration status |\n\n```mermaid\ngraph LR\n    A[mnem integrate] --> B{Host Selection}\n    B --> C[Claude Code]\n    B --> D[Cursor]\n    B --> E[Continue]\n    B --> F[Zed]\n    B --> G[Gemini CLI]\n    \n    C --> H[Markdown Marker]\n    D --> H\n    E --> I[JSON Field: systemMessage]\n    F --> J[JSON Field: assistant.system_prompt]\n    G --> H\n```\n\n资料来源：[crates/mnem-cli/src/integrate.rs:1-60](https://github.com/Uranid/mnem/blob/main/crates/mnem-cli/src/integrate.rs)\n\n## Configuration\n\nThe CLI loads configuration from `~/.config/mnem/config.toml` or `.mnem/config.toml` in the repository root.\n\n| Setting | Description |\n|---------|-------------|\n| `user.name` | Author name for commits |\n| `user.email` | Author email for commits |\n| `user.agent_id` | Agent identifier fallback |\n| `llm.provider` | LLM provider (`ollama`, `openai`, `anthropic`) |\n| `llm.model` | Model name |\n| `llm.base_url` | API base URL (default: `http://localhost:11434`) |\n| `llm.timeout_secs` | Request timeout (default: `120`) |\n\n#### Author String Format\n\nThe author string for commits follows this precedence:\n1. `name <email>` if both present\n2. `name` if only name present\n3. `email` if only email present\n4. `agent_id` if only that present\n5. `mnem-cli` as fallback\n\n资料来源：[crates/mnem-cli/src/config.rs:1-80](https://github.com/Uranid/mnem/blob/main/crates/mnem-cli/src/config.rs)\n\n## Command Pipeline\n\nThe following diagram shows how commands interact with the repository:\n\n```mermaid\ngraph TD\n    subgraph \"CLI Layer\"\n        A[mnem CLI] --> B[Commands]\n        B --> C[Ingest]\n        B --> D[Add]\n        B --> E[Retrieve]\n        B --> F[Refs/Tags]\n    end\n    \n    subgraph \"Core Layer\"\n        C --> G[Ingester Pipeline]\n        G --> H[Parser]\n        H --> I[Chunker]\n        I --> J[Extractor]\n        J --> K[Transaction]\n        \n        D --> K\n        F --> K\n        E --> L[Retriever]\n    end\n    \n    subgraph \"Storage Layer\"\n        K --> M[Transaction]\n        M --> N[Blockstore]\n        M --> O[OpHeadsStore]\n        L --> P[VectorIndex]\n        P --> N\n    end\n```\n\n## Exit Codes\n\n| Code | Meaning |\n|------|---------|\n| `0` | Success |\n| `1` | General error |\n| `2` | Invalid arguments |\n| `3` | Repository not found |\n| `4` | Object not found |\n| `5` | Conflict detected |\n\n---\n\n<a id='page-graphrag'></a>\n\n## GraphRAG Implementation\n\n### 相关页面\n\n相关主题：[Hybrid Retrieval System](#page-hybrid-retrieval), [Core Components](#page-core-components)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [crates/mnem-graphrag/src/lib.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-graphrag/src/lib.rs)\n- [crates/mnem-graphrag/src/community.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-graphrag/src/community.rs)\n- [crates/mnem-graphrag/src/calibration.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-graphrag/src/calibration.rs)\n- [crates/mnem-graphrag/src/confidence.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-graphrag/src/confidence.rs)\n- [crates/mnem-graphrag/src/summarize.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-graphrag/src/summarize.rs)\n- [crates/mnem-core/src/retrieve/community_filter.rs](https://github.com/Uranid/mnem/blob/main/crates/mnem-core/src/retrieve/community_filter.rs)\n</details>\n\n# GraphRAG Implementation\n\nGraphRAG (Graph-based Retrieval Augmented Generation) is a hybrid retrieval approach that combines vector similarity search with graph-structured knowledge representation. In mnem, the GraphRAG implementation provides community-based entity extraction, confidence scoring, and intelligent graph traversal for enhanced context retrieval.\n\n## Overview\n\nThe mnem GraphRAG system operates as a layered architecture that:\n\n1. Extracts entities and relationships from ingested documents\n2. Builds a knowledge graph with typed edges and communities\n3. Enables community-aware retrieval that goes beyond simple vector similarity\n4. Provides confidence-calibrated results suitable for agentic workflows\n\nThe implementation lives in `crates/mnem-graphrag/` and integrates with the core retrieval pipeline in `crates/mnem-core/src/retrieve/`.\n\n## Core Components\n\n### Module Structure\n\n| Module | Purpose |\n|--------|---------|\n| `lib.rs` | Main entry point and public API exports |\n| `community.rs` | Community detection and hierarchy management |\n| `calibration.rs` | Confidence score calibration utilities |\n| `confidence.rs` | Confidence scoring algorithms |\n| `summarize.rs` | Community and entity summarization |\n\n### Community Detection (`community.rs`)\n\nCommunity detection partitions the knowledge graph into semantically coherent clusters. The implementation supports hierarchical community structures where:\n\n- **Leaf communities** contain tightly interconnected entities\n- **Parent communities** aggregate related sub-communities\n- **Cross-community edges** connect related concepts across boundaries\n\nCommunities are used during retrieval to:\n- Expand candidate sets by including related entities within the same community\n- Filter results to the most relevant community cluster\n- Enable \"zoom-in\" and \"zoom-out\" traversal patterns\n\n资料来源：[crates/mnem-graphrag/src/community.rs]()\n\n### Confidence Scoring (`confidence.rs`)\n\nEvery extracted entity and relationship receives a confidence score based on:\n\n- **Extraction evidence**: Frequency and clarity of mentions in source documents\n- **Graph connectivity**: Number and strength of edges connecting to other entities\n- **Source reliability**: Document-level trust signals from the ingest pipeline\n\nConfidence scores are normalized to a `[0.0, 1.0]` range and drive downstream filtering decisions.\n\n资料来源：[crates/mnem-graphrag/src/confidence.rs]()\n\n### Calibration (`calibration.rs`)\n\nCalibration ensures that confidence scores accurately reflect true extraction quality. The module provides:\n\n- **Score distribution analysis**: Histogram-based validation of score distributions\n- **Threshold tuning**: Per-use-case threshold adjustment for precision/recall tradeoffs\n- **Calibration curves**: Tools for evaluating score reliability\n\n资料来源：[crates/mnem-graphrag/src/calibration.rs]()\n\n### Summarization (`summarize.rs`)\n\nThe summarization module generates concise descriptions for:\n\n- **Individual entities**: One-sentence summaries capturing core identity\n- **Relationships**: Edge labels and descriptions explaining connections\n- **Communities**: Multi-sentence overviews of community purpose and membership\n\nSummaries are stored as `context_sentence` on Node objects, enabling contextual retrieval patterns described in the Anthropic Contextual Retrieval paper.\n\n资料来源：[crates/mnem-graphrag/src/summarize.rs]()\n\n## Retrieval Integration\n\n### Community Filter (`community_filter.rs`)\n\nThe retrieval pipeline integrates GraphRAG through the community filter stage. When enabled, the retriever:\n\n1. Identifies the community containing the top-scoring candidate\n2. Expands the candidate set to include other high-confidence entities in that community\n3. Re-ranks the expanded set using the configured reranker\n\n```mermaid\ngraph TD\n    A[Query Embedding] --> B[Vector Search]\n    B --> C[Initial Candidates]\n    C --> D[Community Detection]\n    D --> E[Community Expansion]\n    E --> F[Reranker]\n    F --> G[Final Results]\n```\n\n资料来源：[crates/mnem-core/src/retrieve/community_filter.rs]()\n\n### Retriever Configuration\n\nThe `Retriever` struct in `crates/mnem-core/src/retrieve/retriever.rs` exposes GraphRAG-related options:\n\n| Parameter | Type | Default | Description |\n|-----------|------|---------|-------------|\n| `graph_expand` | `Option<usize>` | `None` | Expansion radius for community-based retrieval |\n| `graph_decay` | `Option<f32>` | `None` | Decay factor for graph traversal weights |\n| `graph_depth` | `Option<usize>` | `None` | Maximum traversal depth |\n| `community_filter_enabled` | `bool` | `false` | Enable community-based filtering |\n| `ppr_size_gate` | `Option<usize>` | `None` | PPR personalization size threshold |\n\n### PPR-Based Expansion\n\nFor larger graphs, mnem supports Personalized PageRank (PPR) based expansion using the adjacency index:\n\n```rust\nadjacency_index: Option<Arc<dyn AdjacencyIndex + Send + Sync>>\n```\n\nWhen the adjacency index is available, PPR mode provides:\n- Personalized scoring based on seed nodes\n- Cohesive community member inclusion\n- Falls back to historical decay walk when index is unavailable\n\n资料来源：[crates/mnem-core/src/retrieve/retriever.rs:10-50]()\n\n## Data Flow\n\n### Ingest Pipeline to GraphRAG\n\n```mermaid\ngraph LR\n    A[Source File] --> B[Parser]\n    B --> C[Chunker]\n    C --> D[Entity Extractor]\n    D --> E[Graph Builder]\n    E --> F[Community Detection]\n    F --> G[Confidence Scoring]\n    G --> H[Committed Nodes/Edges]\n```\n\nThe ingest pipeline in `crates/mnem-ingest/src/pipeline.rs` coordinates:\n\n1. **Parsing**: Detect source type and extract raw content\n2. **Chunking**: Split into manageable units using auto-selected chunker\n3. **Extraction**: Rule-based or LLM-powered entity extraction\n4. **Graph building**: Create nodes and edges in the transaction\n5. **Commit**: Persist to the IPLD-based object store\n\n资料来源：[crates/mnem-ingest/src/pipeline.rs]()\n\n### Chunk Strategy by Source Type\n\n| Source Kind | Chunker | Tokens | Overlap |\n|-------------|---------|--------|---------|\n| Markdown | Paragraph | - | - |\n| Text | SentenceRecursive | 256 | 32 |\n| PDF | SentenceRecursive | 512 | 64 |\n| Conversation | Session | 10 messages | - |\n| Code | Structural | - | - |\n\n资料来源：[crates/mnem-ingest/src/chunk.rs]()\n\n## Configuration\n\n### TOML Configuration\n\n```toml\n[retrieve]\nlimit = 20              # Maximum results\nbudget = 8192           # Token budget\nvector_cap = 10         # Vector search candidates\ngraph_expand = 5        # Community expansion size\ngraph_depth = 3         # Traversal depth\nrerank_top_k = 5        # Final reranking pool\n\n[community]\nenabled = true          # Enable community filtering\nmin_community_size = 3  # Minimum entities per community\n```\n\n### CLI Configuration\n\n```bash\n# Ingest with community extraction\nmnem ingest --extractor keybert docs/\n\n# Retrieve with community expansion\nmnem retrieve \"query\" --graph-expand 10\n\n# Configure via config command\nmnem config set retrieve.graph_expand 5\n```\n\n资料来源：[crates/mnem-cli/src/config.rs]()\n\n## API Reference\n\n### Core Types\n\n#### `Community`\n\n```rust\npub struct Community {\n    pub id: CommunityId,\n    pub parent: Option<CommunityId>,\n    pub members: Vec<EntityId>,\n    pub summary: Option<String>,\n    pub depth: u32,\n}\n```\n\n#### `Entity`\n\n```rust\npub struct Entity {\n    pub id: EntityId,\n    pub ntype: String,\n    pub summary: Option<String>,\n    pub context_sentence: Option<String>,\n    pub confidence: f32,\n    pub community: Option<CommunityId>,\n}\n```\n\n### Public API (`lib.rs`)\n\n| Function | Signature | Description |\n|----------|-----------|-------------|\n| `detect_communities` | `(graph: &Graph) -> Vec<Community>` | Run community detection |\n| `score_entity` | `(entity: &Entity, graph: &Graph) -> f32` | Calculate confidence |\n| `calibrate_scores` | `(scores: Vec<f32>) -> Vec<f32>` | Apply calibration |\n| `summarize_community` | `(community: &Community) -> String` | Generate summary |\n| `expand_from_seed` | `(seed: &[NodeId], depth: usize) -> Vec<NodeId>` | Graph expansion |\n\n## Architecture Diagram\n\n```mermaid\ngraph TD\n    subgraph \"Ingest Layer\"\n        I1[Markdown]\n        I2[PDF]\n        I3[Code]\n        I4[Conversation]\n    end\n    \n    subgraph \"Extract Layer\"\n        E1[Rule Extractor]\n        E2[LLM Extractor]\n        E3[KeyBERT Adapter]\n    end\n    \n    subgraph \"Graph Layer\"\n        G1[Node Builder]\n        G2[Edge Builder]\n        G3[Community Detector]\n    end\n    \n    subgraph \"Score Layer\"\n        S1[Confidence Scorer]\n        S2[Calibrator]\n    end\n    \n    subgraph \"Retrieve Layer\"\n        R1[Vector Index]\n        R2[Community Filter]\n        R3[Reranker]\n    end\n    \n    I1 --> E1\n    I2 --> E2\n    I3 --> E1\n    I4 --> E3\n    \n    E1 --> G1\n    E2 --> G1\n    E3 --> G1\n    \n    G1 --> G2\n    G2 --> G3\n    \n    G3 --> S1\n    S1 --> S2\n    \n    S2 --> R1\n    R1 --> R2\n    R2 --> R3\n```\n\n## Experimental Features\n\n### E1: Community Expander\n\nExperiment E1 enables community-expansion during retrieval:\n\n- When `cfg.enabled` is `false` (default): Stage is a no-op\n- When enabled: Top-N seeds' communities pull in additional cohesive members\n- **Additive only**: Never drops existing candidates\n- Matrix v4 showed -29pp R@10 regression with the old drop-filter semantic\n\n### E2: PPR Graph Expansion\n\nExperiment E2 introduces Personalized PageRank for graph expansion:\n\n- Uses optional `AdjacencyIndex` for efficient neighborhood queries\n- Falls back to historical decay walk when index unavailable\n- Maintains byte-identical retrieval for default configuration\n\n资料来源：[crates/mnem-core/src/retrieve/retriever.rs]()\n\n## Warnings and Diagnostics\n\nThe retrieval system emits warnings when GraphRAG features encounter issues:\n\n| Warning Code | Feature | Description |\n|--------------|---------|-------------|\n| `community_filter` | Community Filter | No-op community filter triggered |\n| `graph_mode` | PPR | PPR ran without substrate graph |\n| `graph_expand` | Expansion | Authored adjacency list was empty |\n| `min_confidence` | Confidence | Results fell below confidence floor |\n| `warnings_truncated` | Diagnostics | Warning list was truncated |\n\n资料来源：[crates/mnem-core/src/retrieve/warnings.rs]()\n\n## Best Practices\n\n1. **Enable community filtering** for queries requiring holistic context\n2. **Tune `graph_expand`** based on graph density—larger graphs need smaller expansion radii\n3. **Calibrate confidence thresholds** per use case using the calibration module\n4. **Use structural chunking** for codebases to capture function-level entity granularity\n5. **Set `context_sentence`** on high-value nodes to improve contextual retrieval\n\n## See Also\n\n- [Retrieval System](../core/retrieve.md)\n- [Ingest Pipeline](../ingest/pipeline.md)\n- [Configuration Guide](../cli/config.md)\n- [Contextual Retrieval](../core/contextual_retrieval.md)\n\n---\n\n---\n\n## Doramagic 踩坑日志\n\n项目：Uranid/mnem\n\n摘要：发现 8 个潜在踩坑项，其中 1 个为 high/blocking；最高优先级：安全/权限坑 - 来源证据：[feature] hermes support。\n\n## 1. 安全/权限坑 · 来源证据：[feature] hermes support\n\n- 严重度：high\n- 证据强度：source_linked\n- 发现：GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：[feature] hermes support\n- 对用户的影响：可能影响授权、密钥配置或安全边界。\n- 建议检查：来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- 防护动作：不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- 证据：community_evidence:github | cevd_c54919b2b8b340438a9e5aa17291b93a | https://github.com/Uranid/mnem/issues/27 | 来源类型 github_issue 暴露的待验证使用条件。\n\n## 2. 能力坑 · 能力判断依赖假设\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：README/documentation is current enough for a first validation pass.\n- 对用户的影响：假设不成立时，用户拿不到承诺的能力。\n- 建议检查：将假设转成下游验证清单。\n- 防护动作：假设必须转成验证项；没有验证结果前不能写成事实。\n- 证据：capability.assumptions | github_repo:1221867246 | https://github.com/Uranid/mnem | README/documentation is current enough for a first validation pass.\n\n## 3. 维护坑 · 来源证据：[bug] Broken docs links: SPEC.md, ROADMAP.md, and Architecture page\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：GitHub 社区证据显示该项目存在一个维护/版本相关的待验证问题：[bug] Broken docs links: SPEC.md, ROADMAP.md, and Architecture page\n- 对用户的影响：可能增加新用户试用和生产接入成本。\n- 建议检查：来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- 防护动作：不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- 证据：community_evidence:github | cevd_5c74e7a10f774af6b0460b5da009d1b4 | https://github.com/Uranid/mnem/issues/23 | 来源讨论提到 windows 相关条件，需在安装/试用前复核。\n\n## 4. 维护坑 · 维护活跃度未知\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：未记录 last_activity_observed。\n- 对用户的影响：新项目、停更项目和活跃项目会被混在一起，推荐信任度下降。\n- 建议检查：补 GitHub 最近 commit、release、issue/PR 响应信号。\n- 防护动作：维护活跃度未知时，推荐强度不能标为高信任。\n- 证据：evidence.maintainer_signals | github_repo:1221867246 | https://github.com/Uranid/mnem | last_activity_observed missing\n\n## 5. 安全/权限坑 · 下游验证发现风险项\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：no_demo\n- 对用户的影响：下游已经要求复核，不能在页面中弱化。\n- 建议检查：进入安全/权限治理复核队列。\n- 防护动作：下游风险存在时必须保持 review/recommendation 降级。\n- 证据：downstream_validation.risk_items | github_repo:1221867246 | https://github.com/Uranid/mnem | no_demo; severity=medium\n\n## 6. 安全/权限坑 · 存在评分风险\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：no_demo\n- 对用户的影响：风险会影响是否适合普通用户安装。\n- 建议检查：把风险写入边界卡，并确认是否需要人工复核。\n- 防护动作：评分风险必须进入边界卡，不能只作为内部分数。\n- 证据：risks.scoring_risks | github_repo:1221867246 | https://github.com/Uranid/mnem | no_demo; severity=medium\n\n## 7. 维护坑 · issue/PR 响应质量未知\n\n- 严重度：low\n- 证据强度：source_linked\n- 发现：issue_or_pr_quality=unknown。\n- 对用户的影响：用户无法判断遇到问题后是否有人维护。\n- 建议检查：抽样最近 issue/PR，判断是否长期无人处理。\n- 防护动作：issue/PR 响应未知时，必须提示维护风险。\n- 证据：evidence.maintainer_signals | github_repo:1221867246 | https://github.com/Uranid/mnem | issue_or_pr_quality=unknown\n\n## 8. 维护坑 · 发布节奏不明确\n\n- 严重度：low\n- 证据强度：source_linked\n- 发现：release_recency=unknown。\n- 对用户的影响：安装命令和文档可能落后于代码，用户踩坑概率升高。\n- 建议检查：确认最近 release/tag 和 README 安装命令是否一致。\n- 防护动作：发布节奏未知或过期时，安装说明必须标注可能漂移。\n- 证据：evidence.maintainer_signals | github_repo:1221867246 | https://github.com/Uranid/mnem | release_recency=unknown\n\n<!-- canonical_name: Uranid/mnem; human_manual_source: deepwiki_human_wiki -->\n",
      "summary": "DeepWiki/Human Wiki 完整输出，末尾追加 Discovery Agent 踩坑日志。",
      "title": "Human Manual / 人类版说明书"
    },
    "pitfall_log": {
      "asset_id": "pitfall_log",
      "filename": "PITFALL_LOG.md",
      "markdown": "# Pitfall Log / 踩坑日志\n\n项目：Uranid/mnem\n\n摘要：发现 8 个潜在踩坑项，其中 1 个为 high/blocking；最高优先级：安全/权限坑 - 来源证据：[feature] hermes support。\n\n## 1. 安全/权限坑 · 来源证据：[feature] hermes support\n\n- 严重度：high\n- 证据强度：source_linked\n- 发现：GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：[feature] hermes support\n- 对用户的影响：可能影响授权、密钥配置或安全边界。\n- 建议检查：来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- 防护动作：不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- 证据：community_evidence:github | cevd_c54919b2b8b340438a9e5aa17291b93a | https://github.com/Uranid/mnem/issues/27 | 来源类型 github_issue 暴露的待验证使用条件。\n\n## 2. 能力坑 · 能力判断依赖假设\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：README/documentation is current enough for a first validation pass.\n- 对用户的影响：假设不成立时，用户拿不到承诺的能力。\n- 建议检查：将假设转成下游验证清单。\n- 防护动作：假设必须转成验证项；没有验证结果前不能写成事实。\n- 证据：capability.assumptions | github_repo:1221867246 | https://github.com/Uranid/mnem | README/documentation is current enough for a first validation pass.\n\n## 3. 维护坑 · 来源证据：[bug] Broken docs links: SPEC.md, ROADMAP.md, and Architecture page\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：GitHub 社区证据显示该项目存在一个维护/版本相关的待验证问题：[bug] Broken docs links: SPEC.md, ROADMAP.md, and Architecture page\n- 对用户的影响：可能增加新用户试用和生产接入成本。\n- 建议检查：来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- 防护动作：不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- 证据：community_evidence:github | cevd_5c74e7a10f774af6b0460b5da009d1b4 | https://github.com/Uranid/mnem/issues/23 | 来源讨论提到 windows 相关条件，需在安装/试用前复核。\n\n## 4. 维护坑 · 维护活跃度未知\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：未记录 last_activity_observed。\n- 对用户的影响：新项目、停更项目和活跃项目会被混在一起，推荐信任度下降。\n- 建议检查：补 GitHub 最近 commit、release、issue/PR 响应信号。\n- 防护动作：维护活跃度未知时，推荐强度不能标为高信任。\n- 证据：evidence.maintainer_signals | github_repo:1221867246 | https://github.com/Uranid/mnem | last_activity_observed missing\n\n## 5. 安全/权限坑 · 下游验证发现风险项\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：no_demo\n- 对用户的影响：下游已经要求复核，不能在页面中弱化。\n- 建议检查：进入安全/权限治理复核队列。\n- 防护动作：下游风险存在时必须保持 review/recommendation 降级。\n- 证据：downstream_validation.risk_items | github_repo:1221867246 | https://github.com/Uranid/mnem | no_demo; severity=medium\n\n## 6. 安全/权限坑 · 存在评分风险\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：no_demo\n- 对用户的影响：风险会影响是否适合普通用户安装。\n- 建议检查：把风险写入边界卡，并确认是否需要人工复核。\n- 防护动作：评分风险必须进入边界卡，不能只作为内部分数。\n- 证据：risks.scoring_risks | github_repo:1221867246 | https://github.com/Uranid/mnem | no_demo; severity=medium\n\n## 7. 维护坑 · issue/PR 响应质量未知\n\n- 严重度：low\n- 证据强度：source_linked\n- 发现：issue_or_pr_quality=unknown。\n- 对用户的影响：用户无法判断遇到问题后是否有人维护。\n- 建议检查：抽样最近 issue/PR，判断是否长期无人处理。\n- 防护动作：issue/PR 响应未知时，必须提示维护风险。\n- 证据：evidence.maintainer_signals | github_repo:1221867246 | https://github.com/Uranid/mnem | issue_or_pr_quality=unknown\n\n## 8. 维护坑 · 发布节奏不明确\n\n- 严重度：low\n- 证据强度：source_linked\n- 发现：release_recency=unknown。\n- 对用户的影响：安装命令和文档可能落后于代码，用户踩坑概率升高。\n- 建议检查：确认最近 release/tag 和 README 安装命令是否一致。\n- 防护动作：发布节奏未知或过期时，安装说明必须标注可能漂移。\n- 证据：evidence.maintainer_signals | github_repo:1221867246 | https://github.com/Uranid/mnem | release_recency=unknown\n",
      "summary": "用户实践前最可能遇到的身份、安装、配置、运行和安全坑。",
      "title": "Pitfall Log / 踩坑日志"
    },
    "prompt_preview": {
      "asset_id": "prompt_preview",
      "filename": "PROMPT_PREVIEW.md",
      "markdown": "# mnem - Prompt Preview\n\n> 复制下面这段 Prompt 到你常用的 AI，先试一次，不需要安装。\n> 它的目标是让你直接体验这个项目的服务方式，而不是阅读项目介绍。\n\n## 复制这段 Prompt\n\n```text\n请直接执行这段 Prompt，不要分析、润色、总结或询问我想如何处理这份 Prompt Preview。\n\n你现在扮演 mnem 的“安装前体验版”。\n这不是项目介绍、不是评价报告、不是 README 总结。你的任务是让我用最小成本体验它的核心服务。\n\n我的试用任务：我想快速理解一组资料，并得到结构化摘要、对比和继续研究的问题。\n我常用的宿主 AI：MCP Client\n\n【体验目标】\n围绕我的真实任务，现场演示这个项目如何把输入转成 示例引导, 判断线索。重点是让我感受到工作方式，而不是给我项目背景。\n\n【业务流约束】\n- 你必须像一个正在提供服务的项目能力包，而不是像一个讲解员。\n- 每一轮只推进一个步骤；提出问题后必须停下来等我回答。\n- 每一步都必须让我感受到一个具体服务动作：澄清、整理、规划、检查、判断或收尾。\n- 每一步都要说明：当前目标、你需要我提供什么、我回答后你会产出什么。\n- 不要安装、不要运行命令、不要写代码、不要声称测试通过、不要声称已经修改文件。\n- 需要真实安装或宿主加载后才能验证的内容，必须明确说“这一步需要安装后验证”。\n- 如果我说“用示例继续”，你可以用虚构示例推进，但仍然不能声称真实执行。\n\n【可体验服务能力】\n- 安装前能力预览: Git for AI Agent Knowledge. A persistent, versioned memory layer for AI systems. Hybrid GraphRAG retrieval. Runs entirely offline. 输入：用户任务, 当前 AI 对话上下文；输出：示例引导, 判断线索。\n\n【必须安装后才可验证的能力】\n- 命令行启动或安装流程: 项目文档中存在可执行命令，真实使用需要在本地或宿主环境中运行这些命令。 输入：终端环境, 包管理器, 项目依赖；输出：安装结果, 列表/更新/运行结果。\n\n【核心服务流】\n请严格按这个顺序带我体验。不要一次性输出完整流程：\n1. page-introduction：Introduction to mnem。围绕“Introduction to mnem”模拟一次用户任务，不展示安装或运行结果。\n2. page-installation：Installation Guide。围绕“Installation Guide”模拟一次用户任务，不展示安装或运行结果。\n3. page-architecture：System Architecture。围绕“System Architecture”模拟一次用户任务，不展示安装或运行结果。\n4. page-core-components：Core Components。围绕“Core Components”模拟一次用户任务，不展示安装或运行结果。\n5. page-hybrid-retrieval：Hybrid Retrieval System。围绕“Hybrid Retrieval System”模拟一次用户任务，不展示安装或运行结果。\n\n【核心能力体验剧本】\n每一步都必须按“输入 -> 服务动作 -> 中间产物”执行。不要只说流程名：\n1. page-introduction\n输入：用户提供的“Introduction to mnem”相关信息。\n服务动作：模拟项目在这一步的核心判断和整理方式。\n中间产物：一个可检查的小结果。\n\n2. page-installation\n输入：用户提供的“Installation Guide”相关信息。\n服务动作：模拟项目在这一步的核心判断和整理方式。\n中间产物：一个可检查的小结果。\n\n3. page-architecture\n输入：用户提供的“System Architecture”相关信息。\n服务动作：模拟项目在这一步的核心判断和整理方式。\n中间产物：一个可检查的小结果。\n\n4. page-core-components\n输入：用户提供的“Core Components”相关信息。\n服务动作：模拟项目在这一步的核心判断和整理方式。\n中间产物：一个可检查的小结果。\n\n5. page-hybrid-retrieval\n输入：用户提供的“Hybrid Retrieval System”相关信息。\n服务动作：模拟项目在这一步的核心判断和整理方式。\n中间产物：一个可检查的小结果。\n\n【项目服务规则】\n这些规则决定你如何服务用户。不要解释规则本身，而要在每一步执行时遵守：\n- 先确认用户任务、输入材料和成功标准，再模拟项目能力。\n- 每一步都必须形成可检查的小产物，并等待用户确认后再继续。\n- 凡是需要安装、调用工具或访问外部服务的能力，都必须标记为安装后验证。\n\n【每一步的服务约束】\n- Step 1 / page-introduction：Step 1 必须围绕“Introduction to mnem”形成一个小中间产物，并等待用户确认。\n- Step 2 / page-installation：Step 2 必须围绕“Installation Guide”形成一个小中间产物，并等待用户确认。\n- Step 3 / page-architecture：Step 3 必须围绕“System Architecture”形成一个小中间产物，并等待用户确认。\n- Step 4 / page-core-components：Step 4 必须围绕“Core Components”形成一个小中间产物，并等待用户确认。\n- Step 5 / page-hybrid-retrieval：Step 5 必须围绕“Hybrid Retrieval System”形成一个小中间产物，并等待用户确认。\n\n【边界与风险】\n- 不要声称已经安装、运行、调用 API、读写本地文件或完成真实任务。\n- 安装前预览只能展示工作方式，不能证明兼容性、性能或输出质量。\n- 涉及安装、插件加载、工具调用或外部服务的能力必须安装后验证。\n\n【可追溯依据】\n这些路径只用于你内部校验或在我追问“依据是什么”时简要引用。不要在首次回复主动展开：\n- https://github.com/Uranid/mnem\n- https://github.com/Uranid/mnem#readme\n- README.md\n- docs/src/introduction.md\n- crates/mnem-core/src/lib.rs\n- docs/src/install.md\n- Cargo.toml\n- Dockerfile\n- docker-compose.yml\n- crates/mnem-core/src/id/cid.rs\n- crates/mnem-core/src/id/multihash.rs\n- crates/mnem-core/src/objects/mod.rs\n\n【首次问题规则】\n- 首次三问必须先确认用户目标、成功标准和边界，不要提前进入工具、安装或实现细节。\n- 如果后续需要技术条件、文件路径或运行环境，必须等用户确认目标后再追问。\n\n首次回复必须只输出下面 4 个部分：\n1. 体验开始：用 1 句话说明你将带我体验 mnem 的核心服务。\n2. 当前步骤：明确进入 Step 1，并说明这一步要解决什么。\n3. 你会如何服务我：说明你会先改变我完成任务的哪个动作。\n4. 只问我 3 个问题，然后停下等待回答。\n\n首次回复禁止输出：后续完整流程、证据清单、安装命令、项目评价、营销文案、已经安装或运行的说法。\n\nStep 1 / brainstorming 的二轮协议：\n- 我回答首次三问后，你仍然停留在 Step 1 / brainstorming，不要进入 Step 2。\n- 第二次回复必须产出 6 个部分：澄清后的任务定义、成功标准、边界条件、\n  2-3 个可选方案、每个方案的权衡、推荐方案。\n- 第二次回复最后必须问我是否确认推荐方案；只有我明确确认后，才能进入下一步。\n- 第二次回复禁止输出 git worktree、代码计划、测试文件、命令或真实执行结果。\n\n后续对话规则：\n- 我回答后，你先完成当前步骤的中间产物并等待确认；只有我确认后，才能进入下一步。\n- 每一步都要生成一个小的中间产物，例如澄清后的目标、计划草案、测试意图、验证清单或继续/停止判断。\n- 所有演示都写成“我会建议/我会引导/这一步会形成”，不要写成已经真实执行。\n- 不要声称已经测试通过、文件已修改、命令已运行或结果已产生。\n- 如果某个能力必须安装后验证，请直接说“这一步需要安装后验证”。\n- 如果证据不足，请明确说“证据不足”，不要补事实。\n```\n",
      "summary": "不安装项目也能感受能力节奏的安全试用 Prompt。",
      "title": "Prompt Preview / 安装前试用 Prompt"
    },
    "quick_start": {
      "asset_id": "quick_start",
      "filename": "QUICK_START.md",
      "markdown": "# Quick Start / 官方入口\n\n项目：Uranid/mnem\n\n## 官方安装入口\n\n### Python / pip · 官方安装入口\n\n```bash\npip install mnem-cli\n```\n\n来源：https://github.com/Uranid/mnem#readme\n\n## 来源\n\n- repo: https://github.com/Uranid/mnem\n- docs: https://github.com/Uranid/mnem#readme\n",
      "summary": "从项目官方 README 或安装文档提取的开工入口。",
      "title": "Quick Start / 官方入口"
    }
  },
  "validation_id": "dval_aa4979168e914161b416107790486e05"
}
