{
  "canonical_name": "antoinezambelli/forge",
  "compilation_id": "pack_254bace1d8744ded990aae0a53f42e4d",
  "created_at": "2026-05-19T20:06:23.487875+00:00",
  "created_by": "project-pack-compiler",
  "feedback": {
    "carrier_selection_notes": [
      "viable_asset_types=mcp_config, recipe, host_instruction, eval, preflight",
      "recommended_asset_types=mcp_config, recipe, host_instruction, eval, preflight"
    ],
    "evidence_delta": {
      "confirmed_claims": [
        "identity_anchor_present",
        "capability_and_host_targets_present",
        "install_path_declared_or_better"
      ],
      "missing_required_fields": [],
      "must_verify_forwarded": [
        "Run or inspect `pip install forge-guardrails` in an isolated environment.",
        "Confirm the project exposes the claimed capability to at least one target host."
      ],
      "quickstart_execution_scope": "allowlisted_sandbox_smoke",
      "sandbox_command": "pip install forge-guardrails",
      "sandbox_container_image": "python:3.12-slim",
      "sandbox_execution_backend": "docker",
      "sandbox_planner_decision": "llm_execute_isolated_install",
      "sandbox_validation_id": "sbx_d7f86e82b70843fd84ac111fe8f1523a"
    },
    "feedback_event_type": "project_pack_compilation_feedback",
    "learning_candidate_reasons": [],
    "template_gaps": []
  },
  "identity": {
    "canonical_id": "project_bfaa280ff5fdd17d67aef16011b25542",
    "canonical_name": "antoinezambelli/forge",
    "homepage_url": null,
    "license": "unknown",
    "repo_url": "https://github.com/antoinezambelli/forge",
    "slug": "forge",
    "source_packet_id": "phit_a0567a8456844c358ea5c25a82960af7",
    "source_validation_id": "dval_7a58244d8bf74e91982ffb1c5a2b2cc8"
  },
  "merchandising": {
    "best_for": "需要软件开发与交付能力，并使用 chatgpt的用户",
    "github_forks": null,
    "github_stars": null,
    "one_liner_en": "[![PyPI](https://img.shields.io/pypi/v/forge-guardrails.svg)](https://pypi.org/project/forge-guardrails/)",
    "one_liner_zh": "[![PyPI](https://img.shields.io/pypi/v/forge-guardrails.svg)](https://pypi.org/project/forge-guardrails/)",
    "primary_category": {
      "category_id": "software-development",
      "confidence": "medium",
      "name_en": "Software Development",
      "name_zh": "软件开发与交付",
      "reason": "matched_keywords:git"
    },
    "target_user": "使用 chatgpt 等宿主 AI 的用户",
    "title_en": "forge",
    "title_zh": "forge 能力包",
    "visible_tags": [
      {
        "label_en": "AI Agent Framework",
        "label_zh": "AI Agent 框架",
        "source": "repo_evidence_project_characteristics",
        "tag_id": "product_domain-ai-agent-framework",
        "type": "product_domain"
      },
      {
        "label_en": "Open Source Capability Building",
        "label_zh": "开源能力构建",
        "source": "repo_evidence_project_characteristics",
        "tag_id": "user_job-open-source-capability-building",
        "type": "user_job"
      },
      {
        "label_en": "Natural-language Web Actions",
        "label_zh": "自然语言网页操作",
        "source": "repo_evidence_project_characteristics",
        "tag_id": "core_capability-natural-language-web-actions",
        "type": "core_capability"
      },
      {
        "label_en": "Multi-role Workflow",
        "label_zh": "多角色协作流程",
        "source": "repo_evidence_project_characteristics",
        "tag_id": "workflow_pattern-multi-role-workflow",
        "type": "workflow_pattern"
      },
      {
        "label_en": "Evaluation Suite",
        "label_zh": "评测体系",
        "source": "repo_evidence_project_characteristics",
        "tag_id": "selection_signal-evaluation-suite",
        "type": "selection_signal"
      }
    ]
  },
  "packet_id": "phit_a0567a8456844c358ea5c25a82960af7",
  "page_model": {
    "artifacts": {
      "artifact_slug": "forge",
      "files": [
        "PROJECT_PACK.json",
        "QUICK_START.md",
        "PROMPT_PREVIEW.md",
        "HUMAN_MANUAL.md",
        "AI_CONTEXT_PACK.md",
        "BOUNDARY_RISK_CARD.md",
        "PITFALL_LOG.md",
        "REPO_INSPECTION.json",
        "REPO_INSPECTION.md",
        "CAPABILITY_CONTRACT.json",
        "EVIDENCE_INDEX.json",
        "CLAIM_GRAPH.json"
      ],
      "required_files": [
        "PROJECT_PACK.json",
        "QUICK_START.md",
        "PROMPT_PREVIEW.md",
        "HUMAN_MANUAL.md",
        "AI_CONTEXT_PACK.md",
        "BOUNDARY_RISK_CARD.md",
        "PITFALL_LOG.md",
        "REPO_INSPECTION.json"
      ]
    },
    "detail": {
      "capability_source": "Project Hit Packet + DownstreamValidationResult",
      "commands": [
        {
          "command": "pip install forge-guardrails",
          "label": "Python / pip · 官方安装入口",
          "source": "https://github.com/antoinezambelli/forge#readme",
          "verified": true
        }
      ],
      "display_tags": [
        "AI Agent 框架",
        "开源能力构建",
        "自然语言网页操作",
        "多角色协作流程",
        "评测体系"
      ],
      "eyebrow": "软件开发与交付",
      "glance": [
        {
          "body": "判断自己是不是目标用户。",
          "label": "最适合谁",
          "value": "需要软件开发与交付能力，并使用 chatgpt的用户"
        },
        {
          "body": "先理解能力边界，再决定是否继续。",
          "label": "核心价值",
          "value": "[![PyPI](https://img.shields.io/pypi/v/forge-guardrails.svg)](https://pypi.org/project/forge-guardrails/)"
        },
        {
          "body": "未完成验证前保持审慎。",
          "label": "继续前",
          "value": "publish to Doramagic.ai project surfaces"
        }
      ],
      "guardrail_source": "Boundary & Risk Card",
      "guardrails": [
        {
          "body": "Prompt Preview 只展示流程，不证明项目已安装或运行。",
          "label": "Check 1",
          "value": "不要把试用当真实运行"
        },
        {
          "body": "chatgpt",
          "label": "Check 2",
          "value": "确认宿主兼容"
        },
        {
          "body": "publish to Doramagic.ai project surfaces",
          "label": "Check 3",
          "value": "先隔离验证"
        }
      ],
      "mode": "mcp_config, recipe, host_instruction, eval, preflight",
      "pitfall_log": {
        "items": [
          {
            "body": "GitHub 社区证据显示该项目存在一个安装相关的待验证问题：Client sampling params: thread top_p/top_k/min_p/repeat_penalty through request body",
            "category": "安装坑",
            "evidence": [
              "community_evidence:github | cevd_148dff87195e42549d0ffb88b99e9cbf | https://github.com/antoinezambelli/forge/issues/58 | 来源类型 github_issue 暴露的待验证使用条件。"
            ],
            "severity": "medium",
            "suggested_check": "来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。",
            "title": "来源证据：Client sampling params: thread top_p/top_k/min_p/repeat_penalty through request body",
            "user_impact": "可能增加新用户试用和生产接入成本。"
          },
          {
            "body": "GitHub 社区证据显示该项目存在一个安装相关的待验证问题：Investigate: integration paths with Hermes Agent",
            "category": "安装坑",
            "evidence": [
              "community_evidence:github | cevd_e3cbd2d1c9a84a1887887bf24b036865 | https://github.com/antoinezambelli/forge/issues/51 | 来源类型 github_issue 暴露的待验证使用条件。"
            ],
            "severity": "medium",
            "suggested_check": "来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。",
            "title": "来源证据：Investigate: integration paths with Hermes Agent",
            "user_impact": "可能增加新用户试用和生产接入成本。"
          },
          {
            "body": "GitHub 社区证据显示该项目存在一个安装相关的待验证问题：Per-model recommended sampling defaults (map keyed by HF model cards)",
            "category": "安装坑",
            "evidence": [
              "community_evidence:github | cevd_057ca2af912e4a608259ffb2a3654d4f | https://github.com/antoinezambelli/forge/issues/59 | 来源讨论提到 python 相关条件，需在安装/试用前复核。"
            ],
            "severity": "medium",
            "suggested_check": "来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。",
            "title": "来源证据：Per-model recommended sampling defaults (map keyed by HF model cards)",
            "user_impact": "可能阻塞安装或首次运行。"
          },
          {
            "body": "GitHub 社区证据显示该项目存在一个安装相关的待验证问题：Rescue-parse ChatGPT-style XML tool calls",
            "category": "安装坑",
            "evidence": [
              "community_evidence:github | cevd_471c674c8d73451da75d6b8c9349aabf | https://github.com/antoinezambelli/forge/issues/55 | 来源类型 github_issue 暴露的待验证使用条件。"
            ],
            "severity": "medium",
            "suggested_check": "来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。",
            "title": "来源证据：Rescue-parse ChatGPT-style XML tool calls",
            "user_impact": "可能增加新用户试用和生产接入成本。"
          },
          {
            "body": "GitHub 社区证据显示该项目存在一个配置相关的待验证问题：Proxy external mode hardcodes native FC — no prompt-injection fallback",
            "category": "配置坑",
            "evidence": [
              "community_evidence:github | cevd_f3a85ec8447a4838b3bc4c846cd9e7a0 | https://github.com/antoinezambelli/forge/issues/53 | 来源类型 github_issue 暴露的待验证使用条件。"
            ],
            "severity": "medium",
            "suggested_check": "来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。",
            "title": "来源证据：Proxy external mode hardcodes native FC — no prompt-injection fallback",
            "user_impact": "可能增加新用户试用和生产接入成本。"
          },
          {
            "body": "README/documentation is current enough for a first validation pass.",
            "category": "能力坑",
            "evidence": [
              "capability.assumptions | hn_item:48192383 | https://news.ycombinator.com/item?id=48192383 | README/documentation is current enough for a first validation pass."
            ],
            "severity": "medium",
            "suggested_check": "将假设转成下游验证清单。",
            "title": "能力判断依赖假设",
            "user_impact": "假设不成立时，用户拿不到承诺的能力。"
          },
          {
            "body": "未记录 last_activity_observed。",
            "category": "维护坑",
            "evidence": [
              "evidence.maintainer_signals | hn_item:48192383 | https://news.ycombinator.com/item?id=48192383 | last_activity_observed missing"
            ],
            "severity": "medium",
            "suggested_check": "补 GitHub 最近 commit、release、issue/PR 响应信号。",
            "title": "维护活跃度未知",
            "user_impact": "新项目、停更项目和活跃项目会被混在一起，推荐信任度下降。"
          },
          {
            "body": "no_demo",
            "category": "安全/权限坑",
            "evidence": [
              "downstream_validation.risk_items | hn_item:48192383 | https://news.ycombinator.com/item?id=48192383 | no_demo; severity=medium"
            ],
            "severity": "medium",
            "suggested_check": "进入安全/权限治理复核队列。",
            "title": "下游验证发现风险项",
            "user_impact": "下游已经要求复核，不能在页面中弱化。"
          },
          {
            "body": "no_demo",
            "category": "安全/权限坑",
            "evidence": [
              "risks.scoring_risks | hn_item:48192383 | https://news.ycombinator.com/item?id=48192383 | no_demo; severity=medium"
            ],
            "severity": "medium",
            "suggested_check": "把风险写入边界卡，并确认是否需要人工复核。",
            "title": "存在评分风险",
            "user_impact": "风险会影响是否适合普通用户安装。"
          },
          {
            "body": "GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：Hardware detection: AMD unified-memory rigs fall through to 4K Ollama budget",
            "category": "安全/权限坑",
            "evidence": [
              "community_evidence:github | cevd_4ad226a6d1fa4a5f89fa7702bec11188 | https://github.com/antoinezambelli/forge/issues/61 | 来源讨论提到 python 相关条件，需在安装/试用前复核。"
            ],
            "severity": "medium",
            "suggested_check": "来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。",
            "title": "来源证据：Hardware detection: AMD unified-memory rigs fall through to 4K Ollama budget",
            "user_impact": "可能影响授权、密钥配置或安全边界。"
          },
          {
            "body": "GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：Sub-agent support: dynamic slot splitting",
            "category": "安全/权限坑",
            "evidence": [
              "community_evidence:github | cevd_5b35873cf63c4647bca8a0611d441189 | https://github.com/antoinezambelli/forge/issues/28 | 来源类型 github_issue 暴露的待验证使用条件。"
            ],
            "severity": "medium",
            "suggested_check": "来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。",
            "title": "来源证据：Sub-agent support: dynamic slot splitting",
            "user_impact": "可能影响授权、密钥配置或安全边界。"
          },
          {
            "body": "GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：Sub-agent support: slot pool",
            "category": "安全/权限坑",
            "evidence": [
              "community_evidence:github | cevd_070d9a3d20d24123b62d7d76ee16078a | https://github.com/antoinezambelli/forge/issues/29 | 来源类型 github_issue 暴露的待验证使用条件。"
            ],
            "severity": "medium",
            "suggested_check": "来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。",
            "title": "来源证据：Sub-agent support: slot pool",
            "user_impact": "可能影响授权、密钥配置或安全边界。"
          },
          {
            "body": "GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：llama.cpp reasoning budget sampler causes silent hangs after April 10 builds",
            "category": "安全/权限坑",
            "evidence": [
              "community_evidence:github | cevd_673be4a583984219bab90cbadff631fe | https://github.com/antoinezambelli/forge/issues/54 | 来源类型 github_issue 暴露的待验证使用条件。"
            ],
            "severity": "medium",
            "suggested_check": "来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。",
            "title": "来源证据：llama.cpp reasoning budget sampler causes silent hangs after April 10 builds",
            "user_impact": "可能阻塞安装或首次运行。"
          },
          {
            "body": "issue_or_pr_quality=unknown。",
            "category": "维护坑",
            "evidence": [
              "evidence.maintainer_signals | hn_item:48192383 | https://news.ycombinator.com/item?id=48192383 | issue_or_pr_quality=unknown"
            ],
            "severity": "low",
            "suggested_check": "抽样最近 issue/PR，判断是否长期无人处理。",
            "title": "issue/PR 响应质量未知",
            "user_impact": "用户无法判断遇到问题后是否有人维护。"
          },
          {
            "body": "release_recency=unknown。",
            "category": "维护坑",
            "evidence": [
              "evidence.maintainer_signals | hn_item:48192383 | https://news.ycombinator.com/item?id=48192383 | release_recency=unknown"
            ],
            "severity": "low",
            "suggested_check": "确认最近 release/tag 和 README 安装命令是否一致。",
            "title": "发布节奏不明确",
            "user_impact": "安装命令和文档可能落后于代码，用户踩坑概率升高。"
          }
        ],
        "source": "ProjectPitfallLog + ProjectHitPacket + validation + community signals",
        "summary": "发现 15 个潜在踩坑项，其中 0 个为 high/blocking；最高优先级：安装坑 - 来源证据：Client sampling params: thread top_p/top_k/min_p/repeat_penalty through request body。",
        "title": "踩坑日志"
      },
      "snapshot": {
        "contributors": null,
        "forks": null,
        "license": "unknown",
        "note": "站点快照，非实时质量证明；用于开工前背景判断。",
        "stars": null
      },
      "source_url": "https://github.com/antoinezambelli/forge",
      "steps": [
        {
          "body": "不安装项目，先体验能力节奏。",
          "code": "preview",
          "title": "先试 Prompt"
        },
        {
          "body": "理解输入、输出、失败模式和边界。",
          "code": "manual",
          "title": "读说明书"
        },
        {
          "body": "把上下文交给宿主 AI 继续工作。",
          "code": "context",
          "title": "带给 AI"
        },
        {
          "body": "进入主力环境前先完成安装入口与风险边界验证。",
          "code": "verify",
          "title": "沙箱验证"
        }
      ],
      "subtitle": "[![PyPI](https://img.shields.io/pypi/v/forge-guardrails.svg)](https://pypi.org/project/forge-guardrails/)",
      "title": "forge 能力包",
      "trial_prompt": "# forge - Prompt Preview\n\n> 复制下面这段 Prompt 到你常用的 AI，先试一次，不需要安装。\n> 它的目标是让你直接体验这个项目的服务方式，而不是阅读项目介绍。\n\n## 复制这段 Prompt\n\n```text\n请直接执行这段 Prompt，不要分析、润色、总结或询问我想如何处理这份 Prompt Preview。\n\n你现在扮演 forge 的“安装前体验版”。\n这不是项目介绍、不是评价报告、不是 README 总结。你的任务是让我用最小成本体验它的核心服务。\n\n我的试用任务：我想用它完成一个真实的软件开发与交付任务。\n我常用的宿主 AI：chatgpt\n\n【体验目标】\n围绕我的真实任务，现场演示这个项目如何把输入转成 示例引导, 判断线索。重点是让我感受到工作方式，而不是给我项目背景。\n\n【业务流约束】\n- 你必须像一个正在提供服务的项目能力包，而不是像一个讲解员。\n- 每一轮只推进一个步骤；提出问题后必须停下来等我回答。\n- 每一步都必须让我感受到一个具体服务动作：澄清、整理、规划、检查、判断或收尾。\n- 每一步都要说明：当前目标、你需要我提供什么、我回答后你会产出什么。\n- 不要安装、不要运行命令、不要写代码、不要声称测试通过、不要声称已经修改文件。\n- 需要真实安装或宿主加载后才能验证的内容，必须明确说“这一步需要安装后验证”。\n- 如果我说“用示例继续”，你可以用虚构示例推进，但仍然不能声称真实执行。\n\n【可体验服务能力】\n- 安装前能力预览: [![PyPI](https://img.shields.io/pypi/v/forge-guardrails.svg)](https://pypi.org/project/forge-guardrails/) 输入：用户任务, 当前 AI 对话上下文；输出：示例引导, 判断线索。\n\n【必须安装后才可验证的能力】\n- 命令行启动或安装流程: 项目文档中存在可执行命令，真实使用需要在本地或宿主环境中运行这些命令。 输入：终端环境, 包管理器, 项目依赖；输出：安装结果, 列表/更新/运行结果。\n\n【核心服务流】\n请严格按这个顺序带我体验。不要一次性输出完整流程：\n1. page-introduction：Forge 简介。围绕“Forge 简介”模拟一次用户任务，不展示安装或运行结果。\n2. page-installation：安装与配置。围绕“安装与配置”模拟一次用户任务，不展示安装或运行结果。\n3. page-quickstart：快速开始教程。围绕“快速开始教程”模拟一次用户任务，不展示安装或运行结果。\n4. page-architecture：架构概述。围绕“架构概述”模拟一次用户任务，不展示安装或运行结果。\n5. page-core-components：核心组件详解。围绕“核心组件详解”模拟一次用户任务，不展示安装或运行结果。\n\n【核心能力体验剧本】\n每一步都必须按“输入 -> 服务动作 -> 中间产物”执行。不要只说流程名：\n1. page-introduction\n输入：用户提供的“Forge 简介”相关信息。\n服务动作：模拟项目在这一步的核心判断和整理方式。\n中间产物：一个可检查的小结果。\n\n2. page-installation\n输入：用户提供的“安装与配置”相关信息。\n服务动作：模拟项目在这一步的核心判断和整理方式。\n中间产物：一个可检查的小结果。\n\n3. page-quickstart\n输入：用户提供的“快速开始教程”相关信息。\n服务动作：模拟项目在这一步的核心判断和整理方式。\n中间产物：一个可检查的小结果。\n\n4. page-architecture\n输入：用户提供的“架构概述”相关信息。\n服务动作：模拟项目在这一步的核心判断和整理方式。\n中间产物：一个可检查的小结果。\n\n5. page-core-components\n输入：用户提供的“核心组件详解”相关信息。\n服务动作：模拟项目在这一步的核心判断和整理方式。\n中间产物：一个可检查的小结果。\n\n【项目服务规则】\n这些规则决定你如何服务用户。不要解释规则本身，而要在每一步执行时遵守：\n- 先确认用户任务、输入材料和成功标准，再模拟项目能力。\n- 每一步都必须形成可检查的小产物，并等待用户确认后再继续。\n- 凡是需要安装、调用工具或访问外部服务的能力，都必须标记为安装后验证。\n\n【每一步的服务约束】\n- Step 1 / page-introduction：Step 1 必须围绕“Forge 简介”形成一个小中间产物，并等待用户确认。\n- Step 2 / page-installation：Step 2 必须围绕“安装与配置”形成一个小中间产物，并等待用户确认。\n- Step 3 / page-quickstart：Step 3 必须围绕“快速开始教程”形成一个小中间产物，并等待用户确认。\n- Step 4 / page-architecture：Step 4 必须围绕“架构概述”形成一个小中间产物，并等待用户确认。\n- Step 5 / page-core-components：Step 5 必须围绕“核心组件详解”形成一个小中间产物，并等待用户确认。\n\n【边界与风险】\n- 不要声称已经安装、运行、调用 API、读写本地文件或完成真实任务。\n- 安装前预览只能展示工作方式，不能证明兼容性、性能或输出质量。\n- 涉及安装、插件加载、工具调用或外部服务的能力必须安装后验证。\n\n【可追溯依据】\n这些路径只用于你内部校验或在我追问“依据是什么”时简要引用。不要在首次回复主动展开：\n- https://news.ycombinator.com/item?id=48192383\n- https://github.com/antoinezambelli/forge#readme\n- README.md\n- src/forge/__init__.py\n- pyproject.toml\n- docs/BACKEND_SETUP.md\n- src/forge/core/runner.py\n- src/forge/core/workflow.py\n- docs/ARCHITECTURE.md\n- src/forge/context/manager.py\n- src/forge/core/slot_worker.py\n- src/forge/core/steps.py\n\n【首次问题规则】\n- 首次三问必须先确认用户目标、成功标准和边界，不要提前进入工具、安装或实现细节。\n- 如果后续需要技术条件、文件路径或运行环境，必须等用户确认目标后再追问。\n\n首次回复必须只输出下面 4 个部分：\n1. 体验开始：用 1 句话说明你将带我体验 forge 的核心服务。\n2. 当前步骤：明确进入 Step 1，并说明这一步要解决什么。\n3. 你会如何服务我：说明你会先改变我完成任务的哪个动作。\n4. 只问我 3 个问题，然后停下等待回答。\n\n首次回复禁止输出：后续完整流程、证据清单、安装命令、项目评价、营销文案、已经安装或运行的说法。\n\nStep 1 / brainstorming 的二轮协议：\n- 我回答首次三问后，你仍然停留在 Step 1 / brainstorming，不要进入 Step 2。\n- 第二次回复必须产出 6 个部分：澄清后的任务定义、成功标准、边界条件、\n  2-3 个可选方案、每个方案的权衡、推荐方案。\n- 第二次回复最后必须问我是否确认推荐方案；只有我明确确认后，才能进入下一步。\n- 第二次回复禁止输出 git worktree、代码计划、测试文件、命令或真实执行结果。\n\n后续对话规则：\n- 我回答后，你先完成当前步骤的中间产物并等待确认；只有我确认后，才能进入下一步。\n- 每一步都要生成一个小的中间产物，例如澄清后的目标、计划草案、测试意图、验证清单或继续/停止判断。\n- 所有演示都写成“我会建议/我会引导/这一步会形成”，不要写成已经真实执行。\n- 不要声称已经测试通过、文件已修改、命令已运行或结果已产生。\n- 如果某个能力必须安装后验证，请直接说“这一步需要安装后验证”。\n- 如果证据不足，请明确说“证据不足”，不要补事实。\n```\n",
      "voices": [
        {
          "body": "来源平台：github。github/github_issue: Hardware detection: AMD unified-memory rigs fall through to 4K Ollama bu（https://github.com/antoinezambelli/forge/issues/61）；github/github_issue: Per-model recommended sampling defaults (map keyed by HF model cards)（https://github.com/antoinezambelli/forge/issues/59）；github/github_issue: Client sampling params: thread top_p/top_k/min_p/repeat_penalty through （https://github.com/antoinezambelli/forge/issues/58）；github/github_issue: llama.cpp reasoning budget sampler causes silent hangs after April 10 bu（https://github.com/antoinezambelli/forge/issues/54）；github/github_issue: Rescue-parse ChatGPT-style XML tool calls（https://github.com/antoinezambelli/forge/issues/55）；github/github_issue: Proxy external mode hardcodes native FC — no prompt-injection fallback（https://github.com/antoinezambelli/forge/issues/53）；github/github_issue: Investigate: integration paths with Hermes Agent（https://github.com/antoinezambelli/forge/issues/51）；github/github_issue: Sub-agent support: slot pool（https://github.com/antoinezambelli/forge/issues/29）；github/github_issue: Sub-agent support: dynamic slot splitting（https://github.com/antoinezambelli/forge/issues/28）。这些是项目级外部声音，不作为单独质量证明。",
          "items": [
            {
              "kind": "github_issue",
              "source": "github",
              "title": "Hardware detection: AMD unified-memory rigs fall through to 4K Ollama bu",
              "url": "https://github.com/antoinezambelli/forge/issues/61"
            },
            {
              "kind": "github_issue",
              "source": "github",
              "title": "Per-model recommended sampling defaults (map keyed by HF model cards)",
              "url": "https://github.com/antoinezambelli/forge/issues/59"
            },
            {
              "kind": "github_issue",
              "source": "github",
              "title": "Client sampling params: thread top_p/top_k/min_p/repeat_penalty through ",
              "url": "https://github.com/antoinezambelli/forge/issues/58"
            },
            {
              "kind": "github_issue",
              "source": "github",
              "title": "llama.cpp reasoning budget sampler causes silent hangs after April 10 bu",
              "url": "https://github.com/antoinezambelli/forge/issues/54"
            },
            {
              "kind": "github_issue",
              "source": "github",
              "title": "Rescue-parse ChatGPT-style XML tool calls",
              "url": "https://github.com/antoinezambelli/forge/issues/55"
            },
            {
              "kind": "github_issue",
              "source": "github",
              "title": "Proxy external mode hardcodes native FC — no prompt-injection fallback",
              "url": "https://github.com/antoinezambelli/forge/issues/53"
            },
            {
              "kind": "github_issue",
              "source": "github",
              "title": "Investigate: integration paths with Hermes Agent",
              "url": "https://github.com/antoinezambelli/forge/issues/51"
            },
            {
              "kind": "github_issue",
              "source": "github",
              "title": "Sub-agent support: slot pool",
              "url": "https://github.com/antoinezambelli/forge/issues/29"
            },
            {
              "kind": "github_issue",
              "source": "github",
              "title": "Sub-agent support: dynamic slot splitting",
              "url": "https://github.com/antoinezambelli/forge/issues/28"
            }
          ],
          "status": "已收录 9 条来源",
          "title": "社区讨论"
        }
      ]
    },
    "homepage_card": {
      "category": "软件开发与交付",
      "desc": "[![PyPI](https://img.shields.io/pypi/v/forge-guardrails.svg)](https://pypi.org/project/forge-guardrails/)",
      "effort": "安装已验证",
      "forks": null,
      "icon": "code",
      "name": "forge 能力包",
      "risk": "可发布",
      "slug": "forge",
      "stars": null,
      "tags": [
        "AI Agent 框架",
        "开源能力构建",
        "自然语言网页操作",
        "多角色协作流程",
        "评测体系"
      ],
      "thumb": "gray",
      "type": "MCP 配置"
    },
    "manual": {
      "markdown": "# https://github.com/antoinezambelli/forge 项目说明书\n\n生成时间：2026-05-19 20:04:23 UTC\n\n## 目录\n\n- [Forge 简介](#page-introduction)\n- [安装与配置](#page-installation)\n- [快速开始教程](#page-quickstart)\n- [架构概述](#page-architecture)\n- [核心组件详解](#page-core-components)\n- [工作流内部机制](#page-workflow-internals)\n- [Guardrails 系统](#page-guardrails)\n- [上下文管理](#page-context-management)\n- [SlotWorker 槽位调度](#page-slot-worker)\n- [内置工具系统](#page-tools)\n\n<a id='page-introduction'></a>\n\n## Forge 简介\n\n### 相关页面\n\n相关主题：[安装与配置](#page-installation), [架构概述](#page-architecture)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [README.md](https://github.com/antoinezambelli/forge/blob/main/README.md)\n- [CONTRIBUTING.md](https://github.com/antoinezambelli/forge/blob/main/CONTRIBUTING.md)\n- [src/forge/clients/sampling_defaults.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/clients/sampling_defaults.py)\n- [src/forge/server.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/server.py)\n- [src/forge/guardrails/guardrails.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/guardrails/guardrails.py)\n- [src/forge/core/workflow.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/core/workflow.py)\n- [src/forge/proxy/__main__.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/proxy/__main__.py)\n- [examples/foreign_loop.py](https://github.com/antoinezambelli/forge/blob/main/examples/foreign_loop.py)\n- [CHANGELOG.md](https://github.com/antoinezambelli/forge/blob/main/CHANGELOG.md)\n</details>\n\n# Forge 简介\n\n## 概述\n\nForge 是一个基于 LLM（大型语言模型）的工具调用框架，专注于构建可靠、可预测的多步骤 AI 工作流程。该项目由 Antoine Zambelli 开发，旨在解决 LLM 在工具调用场景中的常见问题，如响应格式不稳定、上下文长度失控、错误恢复困难等。\n\nForge 的核心设计理念是将 LLM 响应校验、上下文压缩、错误重试和步骤强制等机制封装为可配置的保护栏（Guardrails），使开发者能够专注于业务逻辑而非基础设施细节。资料来源：[README.md]()\n\n## 核心概念\n\n### Workflow（工作流）\n\nWorkflow 是 Forge 中的核心抽象，定义了一个完整的多步骤任务。它包含以下关键组件：\n\n| 属性 | 类型 | 说明 |\n|------|------|------|\n| `name` | str | 工作流名称 |\n| `description` | str | 任务描述 |\n| `tools` | dict[str, ToolDef] | 工具定义字典 |\n| `required_steps` | list[str] | 必须执行的关键步骤 |\n| `terminal_tool` | str | 终止工具（执行后工作流结束） |\n| `system_prompt_template` | str | 系统提示词模板 |\n\n资料来源：[src/forge/core/workflow.py:1-50]()\n\n### ToolDef 和 ToolSpec\n\nToolDef 将工具的模式定义与实际实现绑定在一起：\n\n```python\n@dataclass\nclass ToolDef:\n    spec: ToolSpec\n    callable: Callable[..., Any]\n    prerequisites: list[str | dict[str, str]] = field(default_factory=list)\n```\n\n前置条件（prerequisites）表达条件依赖关系：\n- 字符串形式：`\"read_file\"` — 任何对 read_file 的先前调用都满足要求\n- 字典形式：`{\"tool\": \"read_file\", \"match_arg\": \"path\"}` — 需要使用相同 path 参数的先前调用\n\n资料来源：[src/forge/core/workflow.py:90-105]()\n\n### LLM 客户端\n\nForge 通过统一的客户端接口支持多个后端：\n\n| 后端 | 客户端类 | 说明 |\n|------|----------|------|\n| Ollama | OllamaClient | 本地模型支持，推荐采样参数 |\n| Llamafile | LlamafileClient | 单文件可执行模型 |\n| Anthropic | AnthropicClient | Claude 系列模型 |\n| Proxy | ProxyServer | OpenAI 兼容代理 |\n\n资料来源：[README.md]()\n资料来源：[CONTRIBUTING.md]()\n资料来源：[src/forge/proxy/__main__.py:1-50]()\n\n## 架构设计\n\n### 系统组件图\n\n```mermaid\ngraph TD\n    subgraph 客户端层\n        User[用户代码]\n        Workflow[Workflow 定义]\n    end\n    \n    subgraph 核心层\n        Runner[WorkflowRunner]\n        Guardrails[Guardrails 检查]\n        Context[ContextManager]\n    end\n    \n    subgraph 后端层\n        Client[LLMClient]\n        Proxy[ProxyServer]\n        Server[ServerManager]\n    end\n    \n    subgraph 支持服务\n        Sampling[采样默认参数]\n        Errors[错误追踪]\n    end\n    \n    User --> Workflow\n    Workflow --> Runner\n    Runner --> Guardrails\n    Runner --> Context\n    Guardrails --> Client\n    Context --> Client\n    Client --> Proxy\n    Proxy --> Server\n    Server --> Ollama[(Ollama)]\n    Server --> Llamafile[(Llamafile)]\n    Runner --> Sampling\n    Guardrails --> Errors\n```\n\n### 工作流程\n\n```mermaid\nsequenceDiagram\n    participant U as 用户\n    participant R as WorkflowRunner\n    participant G as Guardrails\n    participant C as ContextManager\n    participant L as LLMClient\n    \n    U->>R: run(workflow, prompt)\n    R->>L: send(messages)\n    L-->>R: LLMResponse\n    R->>G: check(response)\n    G-->>R: CheckResult\n    alt action == retry\n        R->>G: 获取 nudge\n        R->>L: send(messages + nudge)\n    end\n    alt action == step_blocked\n        R->>G: 获取 nudge\n        R->>L: send(messages + nudge)\n    end\n    alt action == fatal\n        R-->>U: 抛出异常\n    end\n    R->>C: update(token_count)\n    R->>L: 继续或结束\n```\n\n## 采样参数系统\n\n### 推荐采样参数\n\nForge 维护了一个经过验证的模型采样参数映射表，包含以下模型家族：\n\n- Qwen3 / Qwen3.5 / Qwen3.6\n- Qwen3-Coder\n- Gemma 4\n- Mistral Small 3.2\n- Devstral Small 2\n- Ministral 3 Instruct + Reasoning\n- Mistral Nemo\n- Granite 4.0\n\n每个参数条目都包含内联的 HuggingFace 模型卡片链接，确保数值经过逐一验证。\n\n资料来源：[src/forge/clients/sampling_defaults.py:1-40]()\n\n### 采样策略四象限\n\n| strict | 模型在映射表中 | 行为 |\n|--------|---------------|------|\n| True | 是 | 返回字典副本 |\n| True | 否 | 抛出 UnsupportedModelError |\n| False | 是 | 单次 INFO 日志；返回 {} |\n| False | 否 | 返回 {}（静默） |\n\n资料来源：[src/forge/clients/sampling_defaults.py:40-60]()\n\n### recommended_sampling 参数\n\n```python\nclient = OllamaClient(\n    model=\"ministral-3:8b-instruct-2512-q4_K_M\",\n    recommended_sampling=True  # 启用推荐采样参数\n)\n```\n\n启用后，客户端会自动应用 MODEL_SAMPLING_DEFAULTS 中的参数，包括：\n- temperature\n- top_p\n- top_k\n- min_p\n- repeat_penalty\n- presence_penalty\n\n资料来源：[README.md]()\n\n## 上下文管理\n\n### ContextManager\n\nContextManager 负责管理对话历史的长度，防止超出模型的上下文窗口。\n\n```python\nfrom forge import ContextManager, TieredCompact\n\nctx = ContextManager(\n    strategy=TieredCompact(keep_recent=2),\n    budget_tokens=8192\n)\n```\n\n### BudgetMode 预算模式\n\n| 模式 | 说明 |\n|------|------|\n| FORGE_FAST | 快速响应预算 |\n| FORGE_BALANCED | 平衡模式 |\n| FORGE_DEEP | 深度推理预算 |\n| MANUAL | 手动指定 token 数 |\n\n资料来源：[src/forge/server.py:80-100]()\n\n### KV Cache 量化\n\nForge 支持 KV 缓存量化以减少显存占用：\n\n| 量化类型 | 显存节省 | 说明 |\n|----------|----------|------|\n| q8_0 | ~50% | 高质量量化 |\n| q4_0 | ~75% | 更高压缩率 |\n\n资料来源：[src/forge/server.py:30-35]()\n\n### KV Unified 模式\n\n当 `kv_unified=True` 时，所有槽位共享单一 KV 缓存池，每个槽位可使用完整上下文长度。\n\n## Guardrails 系统\n\nGuardrails 是 Forge 的核心保护机制，包含三个子组件：\n\n### 组件架构\n\n```mermaid\ngraph LR\n    subgraph Guardrails\n        V[ResponseValidator]\n        E[StepEnforcer]\n        T[ErrorTracker]\n    end\n```\n\n### ResponseValidator\n\n验证 LLM 响应的格式和内容，支持 XML 救援解析（针对 Qwen Coder 等模型）。\n\n### StepEnforcer\n\n强制执行必需的步骤顺序：\n\n```python\nGuardrails(\n    tool_names=[\"search\", \"lookup\", \"answer\"],\n    required_steps=[\"search\", \"lookup\"],\n    terminal_tool=\"answer\"\n)\n```\n\n### ErrorTracker\n\n追踪错误并控制重试次数：\n\n| 参数 | 默认值 | 说明 |\n|------|--------|------|\n| max_retries | 3 | 最大重试次数 |\n| max_tool_errors | 2 | 最大工具错误数 |\n| max_premature_attempts | 3 | 提前终止尝试次数 |\n\n资料来源：[src/forge/guardrails/guardrails.py:1-60]()\n\n## 快速开始\n\n### 安装\n\n```bash\ngit clone https://github.com/antoinezambelli/forge.git\ncd forge\npython -m venv .venv\npip install -e \".[dev]\"\n```\n\n### 基本使用示例\n\n```python\nimport asyncio\nfrom pydantic import BaseModel, Field\nfrom forge import (\n    Workflow, ToolDef, ToolSpec,\n    WorkflowRunner, OllamaClient,\n    ContextManager, TieredCompact,\n)\n\ndef get_weather(city: str) -> str:\n    return f\"72°F and sunny in {city}\"\n\nclass GetWeatherParams(BaseModel):\n    city: str = Field(description=\"City name\")\n\nworkflow = Workflow(\n    name=\"weather\",\n    description=\"Look up weather for a city.\",\n    tools={\n        \"get_weather\": ToolDef(\n            spec=ToolSpec(\n                name=\"get_weather\",\n                description=\"Get current weather\",\n                parameters=GetWeatherParams,\n            ),\n            callable=get_weather,\n        ),\n    },\n    required_steps=[],\n    terminal_tool=\"get_weather\",\n    system_prompt_template=\"You are a helpful assistant. Use the available tools to answer the user.\",\n)\n\nasync def main():\n    client = OllamaClient(\n        model=\"ministral-3:8b-instruct-2512-q4_K_M\",\n        recommended_sampling=True\n    )\n    ctx = ContextManager(\n        strategy=TieredCompact(keep_recent=2),\n        budget_tokens=8192\n    )\n    runner = WorkflowRunner(client=client, context_manager=ctx)\n    await runner.run(workflow, \"What's the weather in Paris?\")\n\nasyncio.run(main())\n```\n\n资料来源：[README.md]()\n\n## 后端自动管理\n\nForge 可以自动启动和管理 LLM 后端：\n\n```python\nfrom forge import setup_backend, BudgetMode\n\nasync def main():\n    client = OllamaClient(model=\"ministral-3:14b-instruct-2512-q4_K_M\")\n    server, ctx = await setup_backend(\n        backend=\"ollama\",\n        model=\"ministral-3:14b-instruct-2512-q4_K_M\",\n        budget_mode=BudgetMode.FORGE_FAST,\n        client=client,\n    )\n    runner = WorkflowRunner(client=client, context_manager=ctx)\n    # ... 运行工作流 ...\n    await server.stop()\n```\n\n支持的后端：\n- `ollama` — 使用 Ollama 服务\n- `llamaserver` — 使用 llama.cpp 服务器\n- `llamafile` — 使用单文件可执行模型\n\n资料来源：[src/forge/server.py:100-150]()\n\n## 项目结构\n\n```\nforge/\n├── src/forge/           # 库源码\n│   ├── clients/         # LLM 后端适配器\n│   ├── core/            # 工作流、运行器、消息、步骤\n│   ├── context/         # 上下文管理和压缩\n│   ├── prompts/         # 提示词模板和引导\n│   ├── guardrails/      # 保护栏实现\n│   ├── proxy/           # 代理服务器\n│   └── tools/           # 内置工具\n├── tests/\n│   ├── unit/            # 单元测试（确定性）\n│   └── eval/            # 评估测试（需要后端）\n│       ├── scenarios/   # 评估场景定义\n│       └── dashboard/   # React 评估仪表板\n├── docs/\n│   ├── decisions/       # 架构决策记录 (ADR)\n│   └── results/         # 评估结果\n├── examples/            # 使用示例\n└── scripts/             # 辅助脚本\n```\n\n资料来源：[CONTRIBUTING.md]()\n\n## 版本历史\n\n### v0.6.0 (2026-04-29)\n\n- **推荐采样参数** — 每个模型都有经过验证的采样配置\n- **采样覆盖** — 支持 per-call 采样参数覆盖\n- **代理采样透传** — 代理透传 OpenAI 兼容的采样字段\n- **高级推理评估套件** — 支持 Gemma 4、Qwen 3.5 等模型的推理任务\n\n### v0.5.0 (2026-04-19)\n\n- **消融研究运行器** — 支持自动化消融实验\n- **移除硬编码温度** — OllamaClient 和 LlamafileClient 不再发送硬编码采样参数\n- **Granite 4.0 支持** — 支持 OpenAI 风格的 tool call 格式\n\n### v0.4.x\n\n- Qwen Coder XML 救援解析\n- 28 模型评估数据集\n- llama.cpp 推理预算修复\n\n资料来源：[CHANGELOG.md]()\n\n## 测试\n\n### 单元测试\n\n```bash\n# 完整套件（865 个测试）\npython -m pytest tests/unit/ -v --tb=short\n\n# 带覆盖率\npython -m pytest tests/unit/ --cov=forge --cov-report=term-missing\n\n# 单文件测试\npython -m pytest tests/unit/test_runner.py -v\n```\n\n### 集成测试\n\n集成测试需要运行中的后端，跳过方式：\n\n```bash\npython -m pytest tests/ -m \"not integration\"\n```\n\n资料来源：[CONTRIBUTING.md]()\n\n## 相关资源\n\n| 资源 | 链接 |\n|------|------|\n| GitHub 仓库 | https://github.com/antoinezambelli/forge |\n| 用户指南 | docs/USER_GUIDE.md |\n| 后端设置 | docs/BACKEND_SETUP.md |\n| 模型指南 | docs/MODEL_GUIDE.md |\n| 评估仪表板 | docs/results/dashboard.html |\n| 架构决策记录 | docs/decisions/ |\n\n---\n\n<a id='page-installation'></a>\n\n## 安装与配置\n\n### 相关页面\n\n相关主题：[Forge 简介](#page-introduction)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [README.md](https://github.com/antoinezambelli/forge/blob/main/README.md)\n- [CONTRIBUTING.md](https://github.com/antoinezambelli/forge/blob/main/CONTRIBUTING.md)\n- [src/forge/server.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/server.py)\n- [src/forge/clients/sampling_defaults.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/clients/sampling_defaults.py)\n- [src/forge/proxy/__main__.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/proxy/__main__.py)\n- [docs/BACKEND_SETUP.md](https://github.com/antoinezambelli/forge/blob/main/docs/BACKEND_SETUP.md)\n\n</details>\n\n# 安装与配置\n\n## 项目概述\n\nForge 是一个基于 Python 的 LLM 工作流框架，支持多种后端（Ollama、llamafile、llamaserver），提供工作流管理、上下文管理和工具调用等功能。安装与配置涉及环境准备、后端选择、客户端初始化以及可选的代理服务器部署。\n\n资料来源：[README.md:1-45]()\n\n## 环境要求\n\n### 系统要求\n\n| 组件 | 要求 |\n|------|------|\n| Python | 3.12+ |\n| 操作系统 | Linux/macOS/Windows |\n| 依赖管理 | pip |\n\n### Python 版本\n\nForge 使用现代 Python 语法，包括类型联合操作符（`|`）等特性，明确要求 Python 3.12 或更高版本。\n\n资料来源：[CONTRIBUTING.md:1-15]()\n\n## 安装步骤\n\n### 标准安装\n\n```bash\ngit clone https://github.com/antoinezambelli/forge.git\ncd forge\npython -m venv .venv\npip install -e \".[dev]\"\n```\n\n`.[dev]` 标志安装包含开发依赖的完整包，包括测试框架 pytest。\n\n资料来源：[CONTRIBUTING.md:1-12]()\n\n### 可选依赖\n\n| 额外功能 | 安装方式 |\n|----------|----------|\n| 开发依赖 | `pip install -e \".[dev]\"` |\n| 仅运行时 | `pip install -e .` |\n\n## 后端配置\n\nForge 支持三种 LLM 后端，后端配置通过 `ServerManager` 类统一管理。\n\n### 支持的后端类型\n\n| 后端 | 标识符 | 特点 |\n|------|--------|------|\n| Ollama | `\"ollama\"` | 独立服务，使用 `model` 参数 |\n| llamafile | `\"llamafile\"` | 单文件可执行，使用 `gguf_path` |\n| llamaserver | `\"llamaserver\"` | llama.cpp 服务器，使用 `gguf_path` |\n\n资料来源：[src/forge/server.py:1-50]()\n\n### 后端初始化参数\n\n`ServerManager` 类的构造函数接受以下参数：\n\n```python\ndef __init__(\n    self,\n    backend: str,                    # 后端类型\n    port: int = 8080,                # 服务端口\n    models_dir: str | Path | None = None,  # GGUF 文件目录\n) -> None:\n```\n\n资料来源：[src/forge/server.py:120-140]()\n\n### 服务启动配置\n\n`start()` 方法支持丰富的配置选项：\n\n| 参数 | 类型 | 说明 |\n|------|------|------|\n| `model` | `str` | Ollama 模型名称 |\n| `gguf_path` | `str \\| Path` | GGUF 模型文件路径 |\n| `mode` | `str` | 模式（默认 `\"native\"`） |\n| `extra_flags` | `list[str]` | 额外 CLI 参数 |\n| `ctx_override` | `int` | 上下文长度覆盖 |\n| `cache_type_k` | `str` | KV 缓存量化类型（键） |\n| `cache_type_v` | `str` | KV 缓存量化类型（值） |\n| `n_slots` | `int` | 并发槽位数量 |\n| `kv_unified` | `bool` | 是否使用统一 KV 缓存 |\n\n```python\n# 启动示例\nserver = ServerManager(backend=\"ollama\", port=8080)\nawait server.start(\n    model=\"qwen3:8b\",\n    mode=\"reasoning\",\n    extra_flags=[\"--reasoning-format\", \"auto\"]\n)\n```\n\n资料来源：[src/forge/server.py:140-180]()\n\n### 上下文长度解析\n\n| 后端 | 上下文获取方式 |\n|------|----------------|\n| llamaserver/llamafile | 查询 `/props` 端点获取 `n_ctx` |\n| ollama | 使用 `ollama stop` 清理 VRAM |\n\n资料来源：[src/forge/server.py:200-220]()\n\n## 客户端配置\n\n### OllamaClient\n\n```python\nfrom forge.clients import OllamaClient\n\nclient = OllamaClient(\n    model=\"ministral-3:8b-instruct-2512-q4_K_M\",\n    recommended_sampling=True  # 使用推荐采样参数\n)\n```\n\n资料来源：[README.md:20-35]()\n\n### LlamafileClient\n\n```python\nfrom forge.clients import LlamafileClient\n\nclient = LlamafileClient(\n    gguf_path=\"/path/to/model.gguf\",\n    recommended_sampling=True\n)\n```\n\n### 推荐采样参数\n\n`forge.clients.sampling_defaults` 模块提供经过验证的模型推荐采样参数：\n\n| 模型系列 | 支持的模型 |\n|----------|------------|\n| Qwen | Qwen3, Qwen3.5, Qwen3.6, Qwen3-Coder |\n| Gemma | Gemma 4 |\n| Mistral | Mistral Small 3.2, Mistral Nemo |\n| Devstral | Devstral Small 2 |\n| Ministral | Ministral 3 Instruct + Reasoning |\n| Granite | Granite 4.0 (h-micro, h-tiny) |\n\n每个配置行包含指向 HuggingFace 模型卡的 URL，参数值经过逐一验证。\n\n资料来源：[src/forge/clients/sampling_defaults.py:1-80]()\n\n### 采样参数策略\n\n| 模式 | 已知模型 | 未知模型 |\n|------|----------|----------|\n| `strict=True` | 返回推荐参数 | 抛出 `UnsupportedModelError` |\n| `strict=False` | 单次 INFO 日志，返回 `{}` | 返回 `{}`（静默） |\n\n```python\n# 严格模式示例\nparams = apply_sampling_defaults(model, strict=True)\n\n# 非严格模式示例\nparams = apply_sampling_defaults(model, strict=False)\n```\n\n资料来源：[src/forge/clients/sampling_defaults.py:80-120]()\n\n### 单次调用采样覆盖\n\n`send()` 和 `send_stream()` 方法支持 `sampling` 字典参数，字段逐一合并到客户端实例级采样：\n\n```python\nresponse = await client.send(\n    messages,\n    sampling={\"temperature\": 0.8, \"top_p\": 0.9}\n)\n```\n\n资料来源：[CHANGELOG.md:1-50]()\n\n## 代理服务器配置\n\nForge 提供 `ProxyServer` 用于转发 OpenAI 兼容请求到后端。\n\n### CLI 参数\n\n```bash\npython -m forge.proxy --backend ollama --model qwen3:8b\n```\n\n| 参数 | 默认值 | 说明 |\n|------|--------|------|\n| `--backend` | 必需 | 后端类型 |\n| `--model` | 必需（Ollama） | 模型名称 |\n| `--gguf` | 必需（非Ollama） | GGUF 文件路径 |\n| `--backend-url` | 必需 | 后端服务器 URL |\n| `--backend-port` | 8080 | 后端端口 |\n| `--host` | 127.0.0.1 | 代理监听地址 |\n| `--port` | 8081 | 代理监听端口 |\n| `--budget-mode` | - | 预算模式 |\n| `--budget-tokens` | - | 手动 token 预算 |\n| `--extra-flags` | - | 额外后端 CLI 参数 |\n| `--serialize` | None | 强制请求序列化 |\n| `--max-retries` | 3 | 单请求最大重试次数 |\n| `--no-rescue` | False | 禁用救援解析 |\n| `--verbose` | False | 详细日志 |\n\n资料来源：[src/forge/proxy/__main__.py:1-80]()\n\n### 代理采样穿透\n\n代理将 OpenAI 兼容的采样参数字段透传到后端：\n\n| 透传字段 |\n|----------|\n| `temperature` |\n| `top_p` |\n| `top_k` |\n| `min_p` |\n| `repeat_penalty` |\n| `presence_penalty` |\n| `seed` |\n\n资料来源：[CHANGELOG.md:1-50]()\n\n## 工作流配置\n\n### Workflow 组件\n\n```python\nfrom forge import Workflow, ToolDef, ToolSpec, WorkflowRunner, ContextManager, TieredCompact\n\nworkflow = Workflow(\n    name=\"weather\",\n    description=\"查询城市天气\",\n    tools={\n        \"get_weather\": ToolDef(\n            spec=ToolSpec(\n                name=\"get_weather\",\n                description=\"获取当前天气\",\n                parameters=GetWeatherParams,\n            ),\n            callable=get_weather,\n        ),\n    },\n    required_steps=[],\n    terminal_tool=\"get_weather\",\n    system_prompt_template=\"你是一个有帮助的助手。\",\n)\n```\n\n### 上下文管理器配置\n\n| 策略 | 说明 |\n|------|------|\n| `TieredCompact` | 分层压缩策略 |\n| `budget_tokens` | Token 预算上限 |\n\n```python\nctx = ContextManager(\n    strategy=TieredCompact(keep_recent=2),\n    budget_tokens=8192\n)\n```\n\n资料来源：[README.md:1-50]()\n\n## 完整运行示例\n\n```python\nimport asyncio\nfrom pydantic import BaseModel, Field\nfrom forge import (\n    Workflow, ToolDef, ToolSpec,\n    WorkflowRunner, OllamaClient,\n    ContextManager, TieredCompact,\n)\n\ndef get_weather(city: str) -> str:\n    return f\"72°F and sunny in {city}\"\n\nclass GetWeatherParams(BaseModel):\n    city: str = Field(description=\"城市名称\")\n\nworkflow = Workflow(\n    name=\"weather\",\n    description=\"查询城市天气\",\n    tools={\n        \"get_weather\": ToolDef(\n            spec=ToolSpec(\n                name=\"get_weather\",\n                description=\"获取当前天气\",\n                parameters=GetWeatherParams,\n            ),\n            callable=get_weather,\n        ),\n    },\n    required_steps=[],\n    terminal_tool=\"get_weather\",\n    system_prompt_template=\"你是一个有帮助的助手。\",\n)\n\nasync def main():\n    client = OllamaClient(\n        model=\"ministral-3:8b-instruct-2512-q4_K_M\",\n        recommended_sampling=True\n    )\n    ctx = ContextManager(\n        strategy=TieredCompact(keep_recent=2),\n        budget_tokens=8192\n    )\n    runner = WorkflowRunner(\n        client=client,\n        context_manager=ctx\n    )\n    await runner.run(workflow, \"What's the weather in Paris?\")\n\nasyncio.run(main())\n```\n\n资料来源：[README.md:15-55]()\n\n## 常见问题\n\n### 后端选择\n\n| 场景 | 推荐后端 |\n|------|----------|\n| 快速原型/测试 | Ollama |\n| 生产部署 | llamaserver |\n| 单文件分发 | llamafile |\n\n### 版本信息\n\n通过 `importlib.metadata` 暴露版本：\n\n```python\nfrom importlib.metadata import version\nprint(version(\"forge\"))\n```\n\n资料来源：[CHANGELOG.md:1-50]()\n\n---\n\n<a id='page-quickstart'></a>\n\n## 快速开始教程\n\n### 相关页面\n\n相关主题：[工作流内部机制](#page-workflow-internals), [Guardrails 系统](#page-guardrails)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [README.md](https://github.com/antoinezambelli/forge/blob/main/README.md)\n- [src/forge/core/workflow.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/core/workflow.py)\n- [src/forge/core/runner.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/core/runner.py)\n- [src/forge/guardrails/guardrails.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/guardrails/guardrails.py)\n- [src/forge/clients/llamafile.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/clients/llamafile.py)\n- [examples/foreign_loop.py](https://github.com/antoinezambelli/forge/blob/main/examples/foreign_loop.py)\n- [CONTRIBUTING.md](https://github.com/antoinezambelli/forge/blob/main/CONTRIBUTING.md)\n</details>\n\n# 快速开始教程\n\n本教程将帮助你在 5 分钟内上手 Forge 框架，构建第一个 LLM 驱动的工具调用工作流。Forge 是一个用于管理 LLM 与工具交互的框架，提供了工作流管理、上下文管理、护栏校验和多后端支持等核心功能。\n\n## 环境准备\n\n### 系统要求\n\n| 要求 | 说明 |\n|------|------|\n| Python 版本 | 3.12+ |\n| 操作系统 | macOS, Linux, Windows |\n| LLM 后端 | Ollama、llama.cpp server 或 llamafile |\n\n### 安装步骤\n\n```bash\ngit clone https://github.com/antoinezambelli/forge.git\ncd forge\npython -m venv .venv\npip install -e \".[dev]\"\n```\n\n资料来源：[CONTRIBUTING.md:1-10]()\n\n### 运行测试验证安装\n\n```bash\n# 完整单元测试（865 个测试，无需 LLM 后端）\npython -m pytest tests/unit/ -v --tb=short\n\n# 单文件测试\npython -m pytest tests/unit/test_runner.py -v\n```\n\n资料来源：[CONTRIBUTING.md:14-22]()\n\n## 核心概念\n\n### 组件架构概览\n\n```mermaid\ngraph TD\n    A[用户请求] --> B[WorkflowRunner]\n    B --> C[Workflow]\n    C --> D[ToolDef + ToolSpec]\n    D --> E[LLMClient]\n    E --> F[Backend Ollama/llama.cpp/llamafile]\n    F --> G[LLMResponse]\n    G --> H[Guardrails]\n    H --> I{校验结果}\n    I -->|通过| J[执行工具]\n    I -->|失败| K[重试/终止]\n    J --> L[ContextManager]\n    L --> B\n```\n\n### 核心组件表\n\n| 组件 | 文件位置 | 职责 |\n|------|----------|------|\n| `Workflow` | `src/forge/core/workflow.py` | 定义工作流的工具、步骤和提示模板 |\n| `ToolDef` | `src/forge/core/workflow.py:60-70` | 绑定工具 Schema 与实际可调用函数 |\n| `ToolSpec` | `src/forge/core/workflow.py` | 定义工具的名称、描述和参数 Schema |\n| `WorkflowRunner` | `src/forge/core/runner.py` | 执行工作流，协调 LLM 与工具调用 |\n| `OllamaClient` | `src/forge/clients/ollama.py` | Ollama 后端适配器 |\n| `ContextManager` | `src/forge/core/context.py` | 管理上下文窗口和上下文压缩 |\n| `Guardrails` | `src/forge/guardrails/guardrails.py` | 验证 LLM 响应，执行步骤校验 |\n\n资料来源：[README.md:1-35](), [src/forge/core/workflow.py:1-75]()\n\n## 快速开始示例\n\n### 基础天气查询工作流\n\n以下是一个完整的基础示例，展示了如何使用 Forge 构建天气查询工具调用工作流：\n\n```python\nimport asyncio\nfrom pydantic import BaseModel, Field\nfrom forge import (\n    Workflow, ToolDef, ToolSpec,\n    WorkflowRunner, OllamaClient,\n    ContextManager, TieredCompact,\n)\n\n# 第一步：定义工具函数\ndef get_weather(city: str) -> str:\n    return f\"72°F and sunny in {city}\"\n\n# 第二步：定义工具参数 Schema\nclass GetWeatherParams(BaseModel):\n    city: str = Field(description=\"City name\")\n\n# 第三步：构建工作流\nworkflow = Workflow(\n    name=\"weather\",\n    description=\"Look up weather for a city.\",\n    tools={\n        \"get_weather\": ToolDef(\n            spec=ToolSpec(\n                name=\"get_weather\",\n                description=\"Get current weather\",\n                parameters=GetWeatherParams,\n            ),\n            callable=get_weather,\n        ),\n    },\n    required_steps=[],\n    terminal_tool=\"get_weather\",\n    system_prompt_template=\"You are a helpful assistant. Use the available tools to answer the user.\",\n)\n\n# 第四步：初始化并运行\nasync def main():\n    client = OllamaClient(\n        model=\"ministral-3:8b-instruct-2512-q4_K_M\",\n        recommended_sampling=True\n    )\n    ctx = ContextManager(\n        strategy=TieredCompact(keep_recent=2),\n        budget_tokens=8192\n    )\n    runner = WorkflowRunner(client=client, context_manager=ctx)\n    await runner.run(workflow, \"What's the weather in Paris?\")\n\nasyncio.run(main())\n```\n\n资料来源：[README.md:7-42]()\n\n## 工作流执行流程\n\n### 内部执行步骤\n\n```mermaid\nsequenceDiagram\n    participant User as 用户\n    participant Runner as WorkflowRunner\n    participant Workflow as Workflow\n    participant LLM as LLM Client\n    participant Backend as LLM 后端\n    participant Guardrails as Guardrails\n    participant Tools as 工具函数\n    participant Context as ContextManager\n\n    User->>Runner: run(workflow, prompt)\n    Runner->>Workflow: 获取工具定义和提示\n    Runner->>LLM: 发送请求\n    LLM->>Backend: API 调用\n    Backend-->>LLM: LLMResponse\n    LLM-->>Runner: LLMResponse\n    Runner->>Guardrails: check(response)\n    Guardrails->>Guardrails: 验证工具调用\n    Guardrails-->>Runner: CheckResult\n    alt 校验通过\n        Runner->>Tools: 执行工具\n        Tools-->>Runner: 工具结果\n        Runner->>Context: 更新上下文\n        Runner->>Runner: 循环直到完成\n    else 校验失败\n        Runner->>LLM: 发送重试提示\n    end\n    Runner-->>User: 工作流完成\n```\n\n### ToolDef 与 ToolSpec 的关系\n\n`ToolDef` 是核心数据结构，将工具的 Schema 与实际实现绑定：\n\n```python\n@dataclass\nclass ToolDef:\n    \"\"\"将工具 Schema 绑定到其实现。\n    \n    Prerequisites 表达条件依赖：\"如果你调用这个工具，\n    必须先调用工具 X\"。\n    \"\"\"\n    spec: ToolSpec           # 工具规范（名称、描述、参数）\n    callable: Callable       # 实际执行的函数\n    prerequisites: list[str | dict[str, str]] = field(default_factory=list)\n```\n\n| 字段 | 类型 | 说明 |\n|------|------|------|\n| `spec` | `ToolSpec` | 工具的规范定义 |\n| `callable` | `Callable` | Python 可调用对象 |\n| `prerequisites` | `list` | 前置依赖工具列表 |\n\n资料来源：[src/forge/core/workflow.py:60-75]()\n\n## 多步骤工作流\n\n### 带步骤要求的工作流\n\n对于复杂任务，可以定义必须按顺序执行的步骤：\n\n```python\n# 定义工具\ndef search(query: str) -> str:\n    return f\"Search results for: {query}\"\n\ndef lookup(url: str) -> str:\n    return f\"Content from: {url}\"\n\ndef answer(question: str) -> str:\n    return \"Final answer\"\n\n# 定义参数 Schema\nclass SearchParams(BaseModel):\n    query: str\n\nclass LookupParams(BaseModel):\n    url: str\n\nclass AnswerParams(BaseModel):\n    question: str\n\n# 构建需要按顺序执行的工作流\nworkflow = Workflow(\n    name=\"research\",\n    description=\"Multi-step research workflow\",\n    tools={\n        \"search\": ToolDef(\n            spec=ToolSpec(name=\"search\", description=\"Search\", parameters=SearchParams),\n            callable=search,\n        ),\n        \"lookup\": ToolDef(\n            spec=ToolSpec(name=\"lookup\", description=\"Look up URL\", parameters=LookupParams),\n            callable=lookup,\n        ),\n        \"answer\": ToolDef(\n            spec=ToolSpec(name=\"answer\", description=\"Provide answer\", parameters=AnswerParams),\n            callable=answer,\n        ),\n    },\n    required_steps=[\"search\", \"lookup\"],  # 必须先执行 search 和 lookup\n    terminal_tool=\"answer\",\n    system_prompt_template=\"You are a research assistant.\",\n)\n```\n\n### 护栏（Guardrails）系统\n\nForge 内置护栏系统用于验证 LLM 响应：\n\n```mermaid\ngraph TD\n    A[LLMResponse] --> B[ResponseValidator]\n    A --> C[StepEnforcer]\n    A --> D[ErrorTracker]\n    B --> E{检查结果}\n    C --> E\n    D --> E\n    E -->|fatal| F[终止]\n    E -->|retry| G[重试]\n    E -->|step_blocked| H[步骤阻塞]\n    E -->|execute| I[执行工具]\n```\n\n| 护栏组件 | 职责 |\n|----------|------|\n| `ResponseValidator` | 验证响应格式，提取工具调用 |\n| `StepEnforcer` | 确保必需步骤已完成 |\n| `ErrorTracker` | 跟踪错误次数和重试状态 |\n\n资料来源：[src/forge/guardrails/guardrails.py:1-80]()\n\n## 上下文管理\n\n### TieredCompact 策略\n\n`TieredCompact` 是推荐的上下文压缩策略：\n\n```python\nfrom forge import ContextManager, TieredCompact\n\nctx = ContextManager(\n    strategy=TieredCompact(keep_recent=2),  # 保留最近 2 轮完整\n    budget_tokens=8192\n)\n```\n\n| 参数 | 说明 |\n|------|------|\n| `keep_recent` | 保留最近的完整消息轮数 |\n| `budget_tokens` | 上下文预算（令牌数）|\n\n### 预算模式\n\n| 模式 | 说明 |\n|------|------|\n| `FORGE_FAST` | 快速模式，较小预算 |\n| `FORGE_BALANCED` | 平衡模式 |\n| `FORGE_DEEP` | 深度模式，较大预算 |\n| `MANUAL` | 手动指定令牌数 |\n\n资料来源：[src/forge/server.py:1-50]()\n\n## 常用配置\n\n### OllamaClient 配置\n\n```python\nclient = OllamaClient(\n    model=\"ministral-3:8b-instruct-2512-q4_K_M\",\n    recommended_sampling=True  # 使用推荐的采样参数\n)\n```\n\n### 推荐采样参数\n\n`recommended_sampling=True` 时，Forge 会自动应用模型卡推荐的采样参数：\n\n| 参数 | 说明 |\n|------|------|\n| `temperature` | 生成温度 |\n| `top_p` | Top-p 采样 |\n| `top_k` | Top-k 采样 |\n| `min_p` | 最小概率阈值 |\n| `repeat_penalty` | 重复惩罚 |\n\n资料来源：[src/forge/clients/sampling_defaults.py:1-60]()\n\n## 完整项目结构\n\n```\nforge/\n├── src/forge/               # 库源码\n│   ├── clients/             # LLM 后端适配器\n│   │   ├── ollama.py\n│   │   ├── llamafile.py\n│   │   └── sampling_defaults.py\n│   ├── core/                # 核心组件\n│   │   ├── workflow.py      # Workflow 和 ToolDef\n│   │   ├── runner.py        # WorkflowRunner\n│   │   ├── context.py       # 上下文管理\n│   │   └── steps.py         # 步骤追踪\n│   ├── guardrails/          # 护栏系统\n│   └── prompts/             # 提示模板\n├── tests/\n│   ├── unit/                # 单元测试\n│   └── eval/                # 评估工具\n└── examples/\n    └── foreign_loop.py      # 外部循环集成示例\n```\n\n资料来源：[CONTRIBUTING.md:25-40]()\n\n## 外部循环集成\n\n如果你已有 LLM 调用逻辑，可以使用 Forge 的护栏组件进行验证：\n\n```python\nfrom forge.guardrails import Guardrails, ErrorTracker\n\nguardrails = Guardrails(\n    tool_names=[\"search\", \"lookup\", \"answer\"],\n    terminal_tool=\"answer\",\n    required_steps=[\"search\", \"lookup\"],\n)\n\ndef handle_response(response):\n    result = guardrails.check(response)\n    \n    if result.action == \"fatal\":\n        return f\"FATAL: {result.reason}\"\n    \n    if result.action in (\"retry\", \"step_blocked\"):\n        return f\"{result.action}: {result.nudge.content[:80]}...\"\n    \n    # 执行工具\n    tool_calls = result.tool_calls\n    executed = [tc.tool for tc in tool_calls]\n    done = guardrails.record(executed)\n    return f\"executed {executed}\" + (\" -- DONE\" if done else \"\")\n```\n\n资料来源：[examples/foreign_loop.py:1-100]()\n\n## 常见问题\n\n### 运行报连接错误\n\n确保 Ollama 服务已启动：\n\n```bash\nollama serve\n```\n\n### 上下文超出限制\n\n调整 `budget_tokens` 或使用更激进的压缩策略：\n\n```python\nctx = ContextManager(\n    strategy=TieredCompact(keep_recent=1),  # 减少保留轮数\n    budget_tokens=4096                       # 减小预算\n)\n```\n\n### 模型不支持\n\n检查 `MODEL_SAMPLING_DEFAULTS` 是否包含该模型，或禁用推荐采样：\n\n```python\nclient = OllamaClient(\n    model=\"your-model\",\n    recommended_sampling=False  # 不使用推荐参数\n)\n```\n\n## 下一步\n\n- 阅读 [用户指南](docs/USER_GUIDE.md) 了解高级特性\n- 查看 [MODEL_GUIDE.md](docs/MODEL_GUIDE.md) 了解支持的模型\n- 运行评估：`python -m tests.eval.eval_runner --scenarios your_scenario --runs 5`\n\n---\n\n<a id='page-architecture'></a>\n\n## 架构概述\n\n### 相关页面\n\n相关主题：[核心组件详解](#page-core-components), [工作流内部机制](#page-workflow-internals)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [src/forge/core/runner.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/core/runner.py)\n- [src/forge/context/manager.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/context/manager.py)\n- [src/forge/server.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/server.py)\n- [src/forge/core/workflow.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/core/workflow.py)\n- [src/forge/guardrails/guardrails.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/guardrails/guardrails.py)\n- [CONTRIBUTING.md](https://github.com/antoinezambelli/forge/blob/main/CONTRIBUTING.md)\n- [README.md](https://github.com/antoinezambelli/forge/blob/main/README.md)\n</details>\n\n# 架构概述\n\nForge 是一个基于 LLM（大型语言模型）的自动化工作流框架，专注于工具调用（Tool Calling）和上下文管理。其核心设计理念是通过结构化的 Workflow、可靠的工具执行机制以及智能的上下文压缩，实现可预测、可重复的 LLM 驱动任务自动化。\n\n## 核心设计原则\n\n| 原则 | 说明 |\n|------|------|\n| **异步优先** | 全部采用 `asyncio` 实现，所有客户端方法和运行器均为异步 资料来源：[CONTRIBUTING.md:32]() |\n| **类型安全** | 使用 Pydantic 定义工具参数模式和 API 响应模型 |\n| **可拔插架构** | 支持多种 LLM 后端适配器，客户端层完全解耦 |\n| **Guardrail 机制** | 通过验证器和强制器确保工作流执行的正确性 |\n\n## 系统架构图\n\n```mermaid\ngraph TD\n    subgraph 客户端层\n        OA[OllamaClient]\n        LF[LlamafileClient]\n        AC[AnthropicClient]\n        PR[ProxyClient]\n    end\n\n    subgraph 核心层\n        WR[WorkflowRunner]\n        WF[Workflow]\n        CT[ToolCall]\n    end\n\n    subgraph 上下文层\n        CM[ContextManager]\n        TC[TieredCompact]\n    end\n\n    subgraph 后端管理层\n        SM[ServerManager]\n        BM[BudgetMode]\n    end\n\n    subgraph 工具执行层\n        GR[Guardrails]\n        RV[ResponseValidator]\n        SE[StepEnforcer]\n    end\n\n    OA --> WR\n    LF --> WR\n    AC --> WR\n    PR --> WR\n    \n    WR --> WF\n    WR --> CM\n    CM --> TC\n    \n    WF --> CT\n    CT --> GR\n    GR --> RV\n    GR --> SE\n```\n\n## 项目目录结构\n\n```\nsrc/forge/           # 库源码\n  clients/           # LLM 后端适配器（每个后端一个）\n  core/              # Workflow、Runner、消息、步骤\n  context/           # 上下文管理和压缩\n  prompts/           # 提示模板和 Nudge\n  guardrails/        # 安全护栏机制\ntests/               # 测试套件\n  unit/              # 确定性单元测试\n  eval/              # 评估测试（需真实后端）\ndocs/                # 用户文档\n```\n\n资料来源：[CONTRIBUTING.md:44-53]()\n\n## 核心组件详解\n\n### 1. Workflow（工作流）\n\n`Workflow` 是任务定义的中心模型，包含工具注册、步骤约束和终止条件：\n\n| 属性 | 类型 | 说明 |\n|------|------|------|\n| `name` | str | 工作流名称 |\n| `description` | str | 工作流描述 |\n| `tools` | dict[str, ToolDef] | 工具定义字典 |\n| `required_steps` | list[str] | 必须按顺序执行的步骤 |\n| `terminal_tool` | str | 终止工具（工作流结束时必须调用） |\n\n资料来源：[src/forge/core/workflow.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/core/workflow.py)\n\n#### ToolDef 结构\n\n```mermaid\nclassDiagram\n    class ToolDef {\n        spec: ToolSpec\n        callable: Callable\n        prerequisites: list[str | dict]\n        name: str\n    }\n    \n    class ToolSpec {\n        name: str\n        description: str\n        parameters: BaseModel\n        get_json_schema() dict\n    }\n    \n    ToolDef --> ToolSpec : contains\n```\n\n`ToolDef` 将工具模式绑定到其实现，支持先决条件表达：\n\n- **字符串形式**：`\"read_file\"` — 任何先前对 read_file 的调用都满足\n- **字典形式**：`{\"tool\": \"read_file\", \"match_arg\": \"path\"}` — 要求先前调用具有相同 path 参数值\n\n资料来源：[src/forge/core/workflow.py:45-57]()\n\n### 2. WorkflowRunner（工作流运行器）\n\n`WorkflowRunner` 是执行引擎，协调 LLM 交互、工具调用和上下文管理：\n\n```mermaid\nsequenceDiagram\n    participant U as 用户\n    participant R as WorkflowRunner\n    participant C as LLMClient\n    participant G as Guardrails\n    participant T as 工具执行\n    participant CM as ContextManager\n\n    U->>R: run(workflow, user_input)\n    loop 执行循环\n        R->>C: generate(messages)\n        C-->>R: LLMResponse\n        R->>G: check(response)\n        G-->>R: CheckResult\n        alt 需要重试\n            R->>C: generate(retry_nudge)\n        else 工具调用\n            R->>T: execute(tool_calls)\n            T-->>R: results\n            R->>CM: compact()\n            R->>C: continue\n        else 终止\n            R-->>U: final_result\n        end\n    end\n```\n\n资料来源：[src/forge/core/runner.py]()\n\n#### Runner 执行流程\n\n1. **验证阶段**：检查 LLM 响应是否包含有效工具调用\n2. **护栏检查**：通过 `Guardrails.check()` 验证响应\n3. **执行阶段**：运行工具并收集结果\n4. **上下文更新**：压缩上下文以保持在 token 预算内\n5. **循环继续**：直到调用终止工具或达到最大重试次数\n\n### 3. ContextManager（上下文管理器）\n\n上下文管理器负责在多轮对话中维护和压缩 token 预算：\n\n| 模式 | 说明 |\n|------|------|\n| `TieredCompact` | 分层压缩策略，保留最近 N 轮对话 |\n| `MANUAL` | 手动指定 token 预算 |\n| `FORGE_FAST` | 快速模式预算 |\n| `FULL` | 完整上下文模式 |\n\n资料来源：[src/forge/server.py:140-150]()\n\n```mermaid\ngraph LR\n    A[用户输入] --> B[构建消息]\n    B --> C{Token 超限?}\n    C -->|否| D[发送给 LLM]\n    C -->|是| E[执行压缩]\n    E --> D\n    D --> F[收集响应]\n    F --> G[工具执行]\n    G --> B\n```\n\n#### TieredCompact 策略\n\n```python\n# 示例配置\nctx = ContextManager(\n    strategy=TieredCompact(keep_recent=2),\n    budget_tokens=8192\n)\n```\n\n资料来源：[README.md:18]()\n\n### 4. Guardrails（安全护栏）\n\nGuardrails 是确保工作流正确执行的核心机制，由三个子组件构成：\n\n```mermaid\nclassDiagram\n    class Guardrails {\n        _validator: ResponseValidator\n        _enforcer: StepEnforcer\n        _errors: ErrorTracker\n        check(response) CheckResult\n    }\n    \n    class ResponseValidator {\n        rescue_enabled: bool\n        retry_nudge_fn: Callable\n    }\n    \n    class StepEnforcer {\n        required_steps: list\n        terminal_tools: frozenset\n        max_premature_attempts: int\n    }\n    \n    class ErrorTracker {\n        max_retries: int\n        max_tool_errors: int\n    }\n    \n    Guardrails --> ResponseValidator\n    Guardrails --> StepEnforcer\n    Guardrails --> ErrorTracker\n```\n\n资料来源：[src/forge/guardrails/guardrails.py:45-75]()\n\n#### Guardrails 参数\n\n| 参数 | 默认值 | 说明 |\n|------|--------|------|\n| `tool_names` | — | 可用工具名称列表 |\n| `terminal_tool` | — | 终止工具名称 |\n| `required_steps` | None | 必须按顺序执行的步骤列表 |\n| `max_retries` | 3 | 最大重试次数 |\n| `max_tool_errors` | 2 | 最大工具错误数 |\n| `rescue_enabled` | True | 启用救援解析 |\n| `max_premature_attempts` | 3 | 提前终止尝试次数上限 |\n| `retry_nudge` | None | 自定义重试提示函数 |\n\n#### CheckResult 动作\n\n| 动作 | 说明 |\n|------|------|\n| `proceed` | 继续执行工具调用 |\n| `retry` | 需要重试并显示 nudge 提示 |\n| `step_blocked` | 步骤被阻止，等待正确步骤 |\n| `fatal` | 致命错误，终止工作流 |\n\n### 5. LLM 后端适配器\n\nForge 支持多种 LLM 后端，通过统一接口抽象差异：\n\n```mermaid\ngraph TD\n    subgraph 后端适配器\n        OC[OllamaClient]\n        LFC[LlamafileClient]\n        ANC[AnthropicClient]\n        PC[ProxyClient]\n    end\n\n    subgraph 公共接口\n        PI[LLMClient 接口]\n        GC[generate]\n        GX[get_context_length]\n    end\n\n    OC --> PI\n    LFC --> PI\n    ANC --> PI\n    PC --> PI\n```\n\n#### 支持的后端\n\n| 后端 | 配置文件 | 说明 |\n|------|----------|------|\n| Ollama | `model` 参数 | 本地模型服务 |\n| Llamafile | `gguf_path` 参数 | 单文件 GGUF 格式 |\n| Llama Server | `gguf_path` 参数 | llama.cpp 服务器 |\n| Anthropic | API Key | Claude 系列模型 |\n\n资料来源：[src/forge/server.py:90-110]()\n\n### 6. ServerManager（服务器管理器）\n\n`ServerManager` 负责后端进程的生命周期管理：\n\n```mermaid\nstateDiagram-v2\n    [*] --> Idle: 创建实例\n    Idle --> Running: start()\n    Running --> Idle: stop()\n    Running --> Running: start() (不同模型)\n    \n    note right of Running: 自动复用相同配置\n    note right of Idle: 进程已终止\n```\n\n#### 缓存优化机制\n\nServerManager 会缓存当前运行的配置，仅在配置变更时重启服务器：\n\n```python\n# 配置相等性检查\nif (\n    self._current_model == model\n    and self._current_mode == mode\n    and self._current_ctx == ctx_override\n    and self._current_flags == flags\n):\n    return  # 复用现有服务器\n```\n\n资料来源：[src/forge/server.py:40-50]()\n\n#### Budget 模式解析\n\n```mermaid\nflowchart LR\n    A[BudgetMode] --> B{MANUAL?}\n    A --> C{OLLAMA?}\n    A --> D{FORGE_FAST?}\n    A --> E{FULL?}\n    \n    B -->|是| F[使用 manual_tokens]\n    C -->|是| G[获取 ollama 上下文]\n    D --> H[计算 fast 预算]\n    E --> I[获取服务器完整上下文]\n```\n\n## 数据流图\n\n```mermaid\nflowchart TD\n    subgraph 输入层\n        U[用户输入]\n        W[Workflow 定义]\n        T[工具实现]\n    end\n\n    subgraph 核心引擎\n        R[WorkflowRunner]\n        G[Guardrails]\n        M[ContextManager]\n    end\n\n    subgraph LLM 层\n        C[LLMClient]\n        S[ServerManager]\n    end\n\n    U --> R\n    W --> R\n    T --> R\n    R --> C\n    C --> S\n    R --> G\n    R --> M\n    \n    M -->|压缩消息| C\n    G -->|验证结果| R\n```\n\n## 快速启动示例\n\n```python\nimport asyncio\nfrom pydantic import BaseModel, Field\nfrom forge import (\n    Workflow, ToolDef, ToolSpec,\n    WorkflowRunner, OllamaClient,\n    ContextManager, TieredCompact,\n)\n\ndef get_weather(city: str) -> str:\n    return f\"72°F and sunny in {city}\"\n\nclass GetWeatherParams(BaseModel):\n    city: str = Field(description=\"City name\")\n\nworkflow = Workflow(\n    name=\"weather\",\n    description=\"Look up weather for a city.\",\n    tools={\n        \"get_weather\": ToolDef(\n            spec=ToolSpec(\n                name=\"get_weather\",\n                description=\"Get current weather\",\n                parameters=GetWeatherParams,\n            ),\n            callable=get_weather,\n        ),\n    },\n    required_steps=[],\n    terminal_tool=\"get_weather\",\n    system_prompt_template=\"You are a helpful assistant. Use the available tools to answer the user.\",\n)\n\nasync def main():\n    client = OllamaClient(\n        model=\"ministral-3:8b-instruct-2512-q4_K_M\",\n        recommended_sampling=True\n    )\n    ctx = ContextManager(\n        strategy=TieredCompact(keep_recent=2),\n        budget_tokens=8192\n    )\n    runner = WorkflowRunner(client=client, context_manager=ctx)\n    await runner.run(workflow, \"What's the weather in Paris?\")\n\nasyncio.run(main())\n```\n\n资料来源：[README.md:7-42]()\n\n## 扩展机制\n\n### 添加新 LLM 后端\n\n1. 在 `src/forge/clients/` 目录创建新的客户端类\n2. 实现统一的异步接口方法\n3. 注册到客户端注册表\n\n### 添加 Guardrail\n\n1. 在 `Guardrails` 类中添加新的检查逻辑\n2. 在 `AblationConfig` 中添加开关\n3. 创建消融实验预设进行验证\n\n资料来源：[CONTRIBUTING.md:14-22]()\n\n## 总结\n\nForge 的架构围绕三个核心目标设计：\n\n| 目标 | 实现方式 |\n|------|----------|\n| **可靠性** | Guardrails 确保工具调用正确性和工作流完整性 |\n| **效率** | TieredCompact 上下文压缩保持在 token 预算内 |\n| **可扩展性** | 插件化客户端支持多种 LLM 后端 |\n\n整个系统基于异步架构设计，所有核心组件（Runner、Client、ServerManager）均支持并发执行，确保在多任务场景下的高性能表现。\n\n---\n\n<a id='page-core-components'></a>\n\n## 核心组件详解\n\n### 相关页面\n\n相关主题：[架构概述](#page-architecture), [上下文管理](#page-context-management), [SlotWorker 槽位调度](#page-slot-worker)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [src/forge/core/runner.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/core/runner.py)\n- [src/forge/core/steps.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/core/steps.py)\n- [src/forge/core/workflow.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/core/workflow.py)\n- [src/forge/core/messages.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/core/messages.py)\n- [src/forge/context/manager.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/context/manager.py)\n- [src/forge/guardrails/guardrails.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/guardrails/guardrails.py)\n- [src/forge/server.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/server.py)\n</details>\n\n# 核心组件详解\n\n## 概述\n\nForge 是一个基于 LLM 的工具调用框架，其核心组件负责管理对话工作流、执行步骤追踪、上下文管理和后端服务调度。整个系统采用异步架构设计，通过 `WorkflowRunner` 协调各个组件完成复杂的多轮对话任务。\n\n核心组件按照职责划分为以下几个模块：\n\n| 模块 | 文件路径 | 主要职责 |\n|------|----------|----------|\n| 工作流引擎 | `src/forge/core/runner.py` | 执行 Workflow、管理会话生命周期 |\n| 步骤追踪 | `src/forge/core/steps.py` | 追踪必需步骤和前置依赖 |\n| 消息管理 | `src/forge/core/messages.py` | 管理对话历史和消息结构 |\n| 上下文管理 | `src/forge/context/manager.py` | 上下文压缩和预算控制 |\n| 守卫检查 | `src/forge/guardrails/guardrails.py` | 响应验证和重试机制 |\n| 服务管理 | `src/forge/server.py` | LLM 后端服务的生命周期管理 |\n\n资料来源：[src/forge/core/workflow.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/core/workflow.py)\n\n---\n\n## 工作流引擎 (WorkflowRunner)\n\n### 架构概述\n\n`WorkflowRunner` 是 Forge 框架的核心执行引擎，负责协调整个对话工作流的生命周期。它接收用户输入、与 LLM 后端交互、执行工具调用，并管理上下文预算。\n\n```mermaid\ngraph TD\n    A[用户输入] --> B[WorkflowRunner.run]\n    B --> C[初始化 StepTracker]\n    C --> D[LLM 推理]\n    D --> E{Guardrails 检查}\n    E -->|通过| F{工具调用?}\n    E -->|失败| G[重试或终止]\n    F -->|是| H[执行工具]\n    F -->|否| I[返回结果]\n    H --> J[上下文压缩]\n    J --> D\n    I --> K[返回最终响应]\n```\n\n资料来源：[src/forge/core/runner.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/core/runner.py)\n\n### 核心数据结构\n\n#### Workflow\n\n`Workflow` 是工作流的定义容器，包含工具配置、必需步骤和终端条件：\n\n```python\n@dataclass\nclass Workflow:\n    name: str                          # 工作流名称\n    description: str                   # 描述\n    tools: dict[str, ToolDef]          # 工具定义字典\n    required_steps: list[str]          # 必需执行步骤列表\n    terminal_tool: str | frozenset[str] # 终端工具名称\n    system_prompt_template: str        # 系统提示模板\n```\n\n资料来源：[src/forge/core/workflow.py:1-50](https://github.com/antoinezambelli/forge/blob/main/src/forge/core/workflow.py)\n\n#### ToolDef\n\n`ToolDef` 将工具模式绑定到具体实现：\n\n```python\n@dataclass\nclass ToolDef:\n    \"\"\"绑定工具模式到其实现\"\"\"\n    spec: ToolSpec                     # 工具规格\n    callable: Callable[..., Any]       # 可调用实现\n    prerequisites: list[str | dict[str, str]] = field(default_factory=list)\n```\n\n前置条件 (`prerequisites`) 支持两种形式：\n- **字符串形式**：名称匹配（`\"read_file\"` — 任何对 `read_file` 的调用都满足）\n- **字典形式**：参数匹配（`{\"tool\": \"read_file\", \"match_arg\": \"path\"}` — 需要相同 `path` 参数的调用）\n\n资料来源：[src/forge/core/workflow.py:85-100](https://github.com/antoinezambelli/forge/blob/main/src/forge/core/workflow.py)\n\n#### ToolSpec\n\n`ToolSpec` 定义工具的外部接口规范：\n\n```python\nclass ToolSpec(BaseModel):\n    \"\"\"LLM 看到的工具规格\"\"\"\n    name: str\n    description: str\n    parameters: type[BaseModel]  # Pydantic 模型\n```\n\n资料来源：[src/forge/core/workflow.py:20-30](https://github.com/antoinezambelli/forge/blob/main/src/forge/core/workflow.py)\n\n---\n\n## 步骤追踪系统 (StepTracker)\n\n### 功能概述\n\n`StepTracker` 负责追踪必需步骤的完成状态和工具执行历史，用于强制执行前置条件约束。该组件独立于消息历史存在，上下文压缩不会影响步骤完成状态。\n\n```mermaid\ngraph LR\n    A[ToolCall] --> B{检查前置条件}\n    B -->|满足| C[记录执行]\n    B -->|不满足| D[返回错误]\n    C --> E[更新 completed_steps]\n    C --> F[更新 executed_tools]\n```\n\n资料来源：[src/forge/core/steps.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/core/steps.py)\n\n### 核心数据结构\n\n| 类/数据结构 | 职责 |\n|------------|------|\n| `StepTracker` | 追踪已完成步骤和已执行工具 |\n| `PrerequisiteCheck` | 前置条件检查结果 |\n\n#### StepTracker\n\n```python\n@dataclass\nclass StepTracker:\n    \"\"\"追踪必需步骤完成状态和工具执行历史\"\"\"\n    required_steps: list[str]\n    completed_steps: dict[str, None] = field(default_factory=dict)\n    executed_tools: dict[str, list[dict[str, Any]]] = field(default_factory=dict)\n\n    def record(self, tool_name: str, args: dict[str, Any] | None = None) -> None:\n        \"\"\"记录成功的工具执行\"\"\"\n        self.completed_steps[tool_name] = None\n        self.executed_tools.setdefault(tool_name, []).append(args or {})\n\n    def is_satisfied(self) -> bool:\n        \"\"\"所有必需步骤是否都已调用\"\"\"\n        return all(s in self.completed_steps for s in self.required_steps)\n```\n\n#### PrerequisiteCheck\n\n```python\n@dataclass\nclass PrerequisiteCheck:\n    \"\"\"工具调用的前置条件检查结果\"\"\"\n    satisfied: bool\n    missing: list[str]  # 未满足的前置工具名列表\n```\n\n资料来源：[src/forge/core/steps.py:1-50](https://github.com/antoinezambelli/forge/blob/main/src/forge/core/steps.py)\n\n---\n\n## 上下文管理器 (ContextManager)\n\n### 功能概述\n\n`ContextManager` 负责管理对话上下文的大小和 token 预算。通过上下文压缩策略，确保长对话不会超出模型的上下文窗口限制。\n\n### 压缩策略\n\n框架支持多种上下文压缩策略，主要包括：\n\n| 策略 | 描述 |\n|------|------|\n| `TieredCompact` | 分层压缩，保留最近 N 条消息 |\n| `KeepRecentStrategy` | 仅保留最近的固定数量消息 |\n\n### 预算模式\n\n| 模式 | 说明 |\n|------|------|\n| `FORGE_FAST` | 快速模式，使用较小的上下文预算 |\n| `FORGE_BALANCED` | 平衡模式 |\n| `MANUAL` | 手动指定 token 数量 |\n\n资料来源：[src/forge/context/manager.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/context/manager.py)\n\n---\n\n## 守卫系统 (Guardrails)\n\n### 架构概述\n\n守卫系统通过多层检查确保 LLM 响应符合预期，包括响应验证、前置步骤强制和错误追踪。\n\n```mermaid\ngraph TD\n    A[LLM Response] --> B[ResponseValidator]\n    B --> C{格式正确?}\n    C -->|是| D[StepEnforcer]\n    C -->|否| E[Rescue Parser]\n    E --> F{解析成功?}\n    F -->|是| D\n    F -->|否| G[Retry Nudge]\n    D --> H{步骤满足?}\n    H -->|是| I[Tool Execution]\n    H -->|否| J[ErrorTracker]\n    G --> K[重新发送 LLM]\n```\n\n资料来源：[src/forge/guardrails/guardrails.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/guardrails/guardrails.py)\n\n### 核心组件\n\n| 组件 | 职责 |\n|------|------|\n| `ResponseValidator` | 验证响应格式和工具调用提取 |\n| `StepEnforcer` | 强制执行必需步骤 |\n| `ErrorTracker` | 追踪重试次数和错误类型 |\n\n### Guardrails 配置参数\n\n| 参数 | 类型 | 默认值 | 说明 |\n|------|------|--------|------|\n| `tool_names` | `list[str]` | 必需 | 可用工具名称列表 |\n| `terminal_tool` | `str \\| frozenset[str]` | 必需 | 终端工具名称 |\n| `required_steps` | `list[str] \\| None` | `None` | 必需步骤列表 |\n| `max_retries` | `int` | `3` | 最大重试次数 |\n| `max_tool_errors` | `int` | `2` | 最大工具错误次数 |\n| `rescue_enabled` | `bool` | `True` | 启用救援解析 |\n| `max_premature_attempts` | `int` | `3` | 过早终止尝试次数 |\n| `retry_nudge` | `Callable[[str], str] \\| None` | `None` | 自定义重试提示 |\n\n资料来源：[src/forge/guardrails/guardrails.py:50-80](https://github.com/antoinezambelli/forge/blob/main/src/forge/guardrails/guardrails.py)\n\n### CheckResult 返回值\n\n`Guardrails.check()` 方法返回 `CheckResult`，包含：\n\n| 字段 | 类型 | 说明 |\n|------|------|------|\n| `status` | `str` | `\"pass\"`, `\"retry\"`, `\"fatal\"` |\n| `message` | `str \\| None` | 状态消息 |\n| `nudge` | `str \\| None` | 重试时发送给 LLM 的提示 |\n\n---\n\n## 服务管理器 (ServerManager)\n\n### 功能概述\n\n`ServerManager` 负责管理 LLM 后端服务的生命周期，包括启动、停止和配置。\n\n### 支持的后端\n\n| 后端 | 配置要求 | 说明 |\n|------|----------|------|\n| `ollama` | `model` 参数 | 使用 Ollama 模型 |\n| `llamaserver` | `gguf_path` 参数 | 本地 GGUF 文件 |\n| `llamafile` | `gguf_path` 参数 | Llamafile 可执行文件 |\n\n### 核心功能\n\n```python\nclass ServerManager:\n    async def start(\n        self,\n        model: str,\n        gguf_path: str | Path,\n        mode: str = \"native\",\n        extra_flags: list[str] | None = None,\n        ctx_override: int | None = None,\n        cache_type_k: str | None = None,\n        cache_type_v: str | None = None,\n        n_slots: int | None = None,\n        kv_unified: bool = False,\n    ) -> None:\n        \"\"\"启动后端服务\"\"\"\n```\n\n### 配置参数\n\n| 参数 | 类型 | 说明 |\n|------|------|------|\n| `model` | `str` | 模型标识符 |\n| `gguf_path` | `str \\| Path` | GGUF 文件路径 |\n| `mode` | `str` | 运行模式 (`native`, `reasoning` 等) |\n| `extra_flags` | `list[str] \\| None` | 额外的 CLI 参数 |\n| `ctx_override` | `int \\| None` | 上下文长度覆盖 |\n| `cache_type_k` | `str \\| None` | KV 缓存键量化类型 |\n| `cache_type_v` | `str \\| None` | KV 缓存值量化类型 |\n| `n_slots` | `int \\| None` | 并发槽位数 |\n| `kv_unified` | `bool` | 统一 KV 缓存 |\n\n资料来源：[src/forge/server.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/server.py)\n\n---\n\n## 消息系统 (Messages)\n\n### 消息类型\n\n框架定义了标准化的消息类型用于通信：\n\n| 消息类型 | 描述 |\n|----------|------|\n| `UserMessage` | 用户输入消息 |\n| `AssistantMessage` | 助手响应消息 |\n| `ToolMessage` | 工具执行结果消息 |\n| `SystemMessage` | 系统级消息 |\n\n### ToolCall 结构\n\n`ToolCall` 是 LLM 返回的标准化工具调用表示：\n\n```python\nclass ToolCall(BaseModel):\n    \"\"\"LLM 返回的已验证工具调用\"\"\"\n    tool: str\n```\n\n资料来源：[src/forge/core/workflow.py:110-115](https://github.com/antoinezambelli/forge/blob/main/src/forge/core/workflow.py)\n\n---\n\n## 组件协作流程\n\n### 完整执行流程\n\n```mermaid\nsequenceDiagram\n    participant User as 用户\n    participant Runner as WorkflowRunner\n    participant LLM as LLM Client\n    participant Step as StepTracker\n    participant Context as ContextManager\n    participant Guard as Guardrails\n    participant Tool as 工具执行\n\n    User->>Runner: run(workflow, user_input)\n    Runner->>Step: 初始化 StepTracker\n    Runner->>Context: 初始化 ContextManager\n    Runner->>LLM: 发送推理请求\n    LLM-->>Runner: LLMResponse\n    Runner->>Guard: check(response)\n    Guard->>Step: 检查前置条件\n    Step-->>Guard: PrerequisiteCheck\n    alt 响应通过\n        Guard-->>Runner: CheckResult.pass\n        Runner->>Tool: 执行工具\n        Tool-->>Runner: 工具结果\n        Runner->>Context: 压缩上下文\n        Runner->>LLM: 发送下一轮\n    else 需要重试\n        Guard-->>Runner: CheckResult.retry + nudge\n        Runner->>LLM: 重试\n    else 致命错误\n        Guard-->>Runner: CheckResult.fatal\n        Runner-->>User: 返回错误\n    end\n```\n\n### 错误恢复机制\n\n| 错误类型 | 处理策略 | 最大重试 |\n|----------|----------|----------|\n| 格式错误 | 触发救援解析 | 由 `max_retries` 控制 |\n| 工具执行失败 | 记录错误，重试 | 由 `max_tool_errors` 控制 |\n| 过早终止 | 发送提示重试 | 由 `max_premature_attempts` 控制 |\n| 超时 | 记录超时错误 | 300s 固定超时 |\n\n---\n\n## 快速开始示例\n\n```python\nimport asyncio\nfrom pydantic import BaseModel, Field\nfrom forge import (\n    Workflow, ToolDef, ToolSpec,\n    WorkflowRunner, OllamaClient,\n    ContextManager, TieredCompact,\n)\n\ndef get_weather(city: str) -> str:\n    return f\"72°F and sunny in {city}\"\n\nclass GetWeatherParams(BaseModel):\n    city: str = Field(description=\"城市名称\")\n\nworkflow = Workflow(\n    name=\"weather\",\n    description=\"查询城市天气\",\n    tools={\n        \"get_weather\": ToolDef(\n            spec=ToolSpec(\n                name=\"get_weather\",\n                description=\"获取当前天气\",\n                parameters=GetWeatherParams,\n            ),\n            callable=get_weather,\n        ),\n    },\n    required_steps=[],\n    terminal_tool=\"get_weather\",\n    system_prompt_template=\"你是一个有帮助的助手。\",\n)\n\nasync def main():\n    client = OllamaClient(model=\"ministral-3:8b-instruct-2512-q4_K_M\", recommended_sampling=True)\n    ctx = ContextManager(strategy=TieredCompact(keep_recent=2), budget_tokens=8192)\n    runner = WorkflowRunner(client=client, context_manager=ctx)\n    await runner.run(workflow, \"巴黎天气怎么样？\")\n\nasyncio.run(main())\n```\n\n资料来源：[README.md](https://github.com/antoinezambelli/forge/blob/main/README.md)\n\n---\n\n## 总结\n\nForge 框架的核心组件通过清晰的职责划分和异步架构，提供了强大的 LLM 工具调用能力：\n\n- **WorkflowRunner** 作为中央协调器，管理整个执行生命周期\n- **StepTracker** 确保必需步骤的执行顺序和前置条件满足\n- **ContextManager** 处理长对话的上下文压缩\n- **Guardrails** 提供多层次的响应验证和错误恢复\n- **ServerManager** 抽象了不同 LLM 后端的差异\n\n这些组件协同工作，使得 Forge 能够可靠地执行复杂的多轮对话任务。\n\n---\n\n<a id='page-workflow-internals'></a>\n\n## 工作流内部机制\n\n### 相关页面\n\n相关主题：[核心组件详解](#page-core-components), [内置工具系统](#page-tools)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [src/forge/core/workflow.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/core/workflow.py)\n- [src/forge/core/steps.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/core/steps.py)\n- [src/forge/guardrails/guardrails.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/guardrails/guardrails.py)\n- [src/forge/prompts/nudges.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/prompts/nudges.py)\n- [examples/foreign_loop.py](https://github.com/antoinezambelli/forge/blob/main/examples/foreign_loop.py)\n</details>\n\n# 工作流内部机制\n\n## 概述\n\nForge 的工作流系统是一个异步优先的 LLM 工具调用编排框架，核心设计目标是将复杂的 LLM 交互与工具执行解耦。工作流由 `Workflow` 类定义，封装了工具定义、步骤要求、终端工具等元信息，通过 `WorkflowRunner` 驱动执行。\n\n工作流机制包含三个核心子系统：\n\n1. **工具定义与绑定系统** — 定义可用工具及其参数模式\n2. **步骤追踪与前置条件系统** — 确保工具按正确顺序调用\n3. **Guardrails 保护系统** — 验证响应、处理重试、阻止过早终止\n\n资料来源：[src/forge/core/workflow.py:1-50]()\n\n## 核心数据模型\n\n### Workflow 类\n\n`Workflow` 是工作流的核心抽象，作为 dataclass 存储所有元信息。\n\n```python\n@dataclass\nclass Workflow:\n    name: str\n    description: str\n    tools: dict[str, ToolDef]\n    required_steps: list[str]\n    terminal_tool: str\n    system_prompt_template: str\n```\n\n| 属性 | 类型 | 说明 |\n|------|------|------|\n| `name` | `str` | 工作流唯一标识名 |\n| `description` | `str` | 工作流功能描述 |\n| `tools` | `dict[str, ToolDef]` | 工具名称到定义的映射 |\n| `required_steps` | `list[str]` | 必须按顺序调用的工具列表 |\n| `terminal_tool` | `str` | 结束工作流的工具名 |\n| `system_prompt_template` | `str` | 系统提示模板 |\n\n资料来源：[src/forge/core/workflow.py:60-80]()\n\n### ToolDef 与 ToolSpec\n\n`ToolDef` 将工具模式绑定到实际可执行函数，`ToolSpec` 定义工具的 JSON Schema 参数规范。\n\n```python\n@dataclass\nclass ToolDef:\n    spec: ToolSpec\n    callable: Callable[..., Any]\n    prerequisites: list[str | dict[str, str]] = field(default_factory=list)\n```\n\n`prerequisites` 字段支持两种前置条件表达方式：\n\n| 类型 | 示例 | 含义 |\n|------|------|------|\n| `str` | `\"read_file\"` | 调用过任意参数的 read_file 即可 |\n| `dict` | `{\"tool\": \"read_file\", \"match_arg\": \"path\"}` | 必须调用过 path 参数相同的 read_file |\n\n资料来源：[src/forge/core/workflow.py:85-110]()\n\n### ToolCall 数据模型\n\n`ToolCall` 是 LLM 返回的工具调用验证结果。\n\n```python\nclass ToolCall(BaseModel):\n    tool: str\n    args: dict[str, Any] = {}\n    reasoning: str | None = None\n```\n\n| 字段 | 类型 | 说明 |\n|------|------|------|\n| `tool` | `str` | 工具名称 |\n| `args` | `dict[str, Any]` | 工具参数 |\n| `reasoning` | `str \\| None` | 推理过程（用于带 thinking 的模型） |\n\n资料来源：[src/forge/core/workflow.py:120-130]()\n\n## 步骤追踪机制\n\n### StepTracker\n\n`StepTracker` 维护在工作流运行器上，独立于消息历史，紧跟在上下文压缩之外。\n\n```python\n@dataclass\nclass StepTracker:\n    required_steps: list[str]\n    completed_steps: dict[str, None] = field(default_factory=dict)\n    executed_tools: dict[str, list[dict[str, Any]]] = field(default_factory=dict)\n```\n\n| 方法 | 功能 |\n|------|------|\n| `record(tool_name, args)` | 记录工具已执行及参数 |\n| `is_satisfied()` | 检查所有必需步骤是否完成 |\n| `pending()` | 返回未完成步骤列表 |\n\n资料来源：[src/forge/core/steps.py:30-55]()\n\n### PrerequisiteCheck\n\n前置条件检查结果用于判断工具调用是否满足依赖要求。\n\n```python\n@dataclass\nclass PrerequisiteCheck:\n    satisfied: bool\n    missing: list[str]\n```\n\n资料来源：[src/forge/core/steps.py:15-25]()\n\n## Guardrails 系统\n\nGuardrails 是 Forge 的保护中间件，将三个独立检查器组合成统一的两阶段 API：\n\n```mermaid\ngraph LR\n    A[LLM Response] --> B[Guardrails.check]\n    B --> C{ResponseValidator}\n    B --> D{StepEnforcer}\n    B --> E{ErrorTracker}\n    C --> F[CheckResult]\n    D --> F\n    E --> F\n```\n\n### CheckResult 结果类型\n\n```python\naction: Literal[\"execute\", \"retry\", \"step_blocked\", \"fatal\"]\ntool_calls: list[ToolCall] | None\nnudge: Nudge | None\nreason: str | None\n```\n\n| Action | 含义 | 后续处理 |\n|--------|------|----------|\n| `execute` | 响应有效，可执行工具 | 执行 tool_calls |\n| `retry` | 响应无效需重试 | 注入 nudge 后重新请求 |\n| `step_blocked` | 违反步骤顺序 | 注入 nudge 后重新请求 |\n| `fatal` | 达到最大重试次数 | 终止工作流 |\n\n资料来源：[src/forge/guardrails/guardrails.py:140-160]()\n\n### ResponseValidator\n\n验证 LLM 响应是否包含有效工具调用，支持救援解析。\n\n```python\nclass ResponseValidator:\n    def __init__(\n        self,\n        tool_names: list[str],\n        rescue_enabled: bool = True,\n        retry_nudge_fn: Callable[[str], str] | None = None,\n    )\n```\n\n| 参数 | 默认值 | 说明 |\n|------|--------|------|\n| `tool_names` | — | 有效工具名列表 |\n| `rescue_enabled` | `True` | 是否启用救援解析 |\n| `retry_nudge_fn` | `None` | 自定义重试提示生成器 |\n\n救援解析支持以下格式：\n- OpenAI 格式：`{\"name\": ..., \"arguments\": ...}`\n- Qwen Coder XML 格式：`<function=name><parameter=key>value</parameter></function>`\n\n资料来源：[src/forge/guardrails/guardrails.py:40-80]()\n\n### StepEnforcer\n\n强制执行步骤顺序，防止过早调用终端工具。\n\n```python\nclass StepEnforcer:\n    def __init__(\n        self,\n        required_steps: list[str],\n        terminal_tools: frozenset[str],\n        max_premature_attempts: int = 3,\n    )\n```\n\n超过 `max_premature_attempts` 次过早终止尝试后返回 `fatal`。\n\n资料来源：[src/forge/guardrails/guardrails.py:80-110]()\n\n### ErrorTracker\n\n追踪连续错误和工具执行失败。\n\n```python\nclass ErrorTracker:\n    def __init__(\n        self,\n        max_retries: int = 3,\n        max_tool_errors: int = 2,\n    )\n```\n\n| 参数 | 默认值 | 说明 |\n|------|--------|------|\n| `max_retries` | `3` | 连续错误响应后终止 |\n| `max_tool_errors` | `2` | 连续工具执行失败后终止 |\n\n资料来源：[src/forge/guardrails/guardrails.py:110-140]()\n\n## 提示模板与 Nudge\n\n### 步骤阻止提示\n\n当模型尝试在完成必需步骤前调用终端工具时，系统注入阻止提示：\n\n```python\ndef step_blocked_nudge(terminal_tool, pending_steps, tier=1):\n    tier = max(1, min(3, tier))\n    steps = \", \".join(pending_steps)\n    if tier == 1:\n        return f\"You cannot call {terminal_tool} yet. You must first complete these required steps: {steps}.\"\n```\n\n| 层级 | 语气强度 | 使用场景 |\n|------|----------|----------|\n| 1 | 礼貌提示 | 首次违规 |\n| 2 | 直接命令 | 第二次违规 |\n| 3 | 强烈警告 | 第三次违规 |\n\n资料来源：[src/forge/prompts/nudges.py:1-30]()\n\n### 前置条件提示\n\n当工具调用缺少前置依赖时触发：\n\n```python\ndef prerequisite_nudge(tool_name, missing_prereqs):\n    prereqs = \", \".join(missing_prereqs)\n    return f\"You cannot call {tool_name} yet. You must first call: {prereqs}.\"\n```\n\n资料来源：[src/forge/prompts/nudges.py:35-55]()\n\n## 工作流执行流程\n\n### 完整执行状态机\n\n```mermaid\ngraph TD\n    A[初始化 WorkflowRunner] --> B[构建系统提示]\n    B --> C[发送初始请求到 LLM]\n    C --> D[接收 LLM 响应]\n    D --> E{Guardrails.check}\n    E -->|execute| F[执行工具]\n    E -->|retry| G[注入 nudge]\n    E -->|step_blocked| G\n    E -->|fatal| H[终止工作流]\n    F --> I{工具为终端工具?}\n    I -->|是| J[返回最终结果]\n    I -->|否| K[记录步骤]\n    K --> C\n    G --> C\n```\n\n### 分阶段 API 用法\n\n对于外部编排循环，Forge 提供两种使用方式：\n\n**简化 API（全部集成）**\n```python\nfrom forge.guardrails import Guardrails\n\nguardrails = Guardrails(\n    tool_names=[\"search\", \"lookup\", \"answer\"],\n    required_steps=[\"search\", \"lookup\"],\n    terminal_tool=\"answer\",\n)\n\nresult = guardrails.check(response)\nif result.action == \"execute\":\n    executed = [tc.tool for tc in result.tool_calls]\n    done = guardrails.record(executed)\n```\n\n**分阶段 API（细粒度控制）**\n```python\nfrom forge.guardrails import ErrorTracker, ResponseValidator, StepEnforcer\n\nvalidator = ResponseValidator(tool_names=[...], rescue_enabled=True)\nenforcer = StepEnforcer(required_steps=[...], terminal_tool=\"answer\")\nerrors = ErrorTracker(max_retries=3, max_tool_errors=2)\n\n# 每阶段可自定义处理逻辑\n```\n\n资料来源：[examples/foreign_loop.py:1-80]()\n\n## 快速开始示例\n\n```python\nfrom pydantic import BaseModel, Field\nfrom forge import (\n    Workflow, ToolDef, ToolSpec,\n    WorkflowRunner, OllamaClient,\n    ContextManager, TieredCompact,\n)\n\nclass GetWeatherParams(BaseModel):\n    city: str = Field(description=\"City name\")\n\ndef get_weather(city: str) -> str:\n    return f\"72°F and sunny in {city}\"\n\nworkflow = Workflow(\n    name=\"weather\",\n    description=\"Look up weather for a city.\",\n    tools={\n        \"get_weather\": ToolDef(\n            spec=ToolSpec(\n                name=\"get_weather\",\n                description=\"Get current weather\",\n                parameters=GetWeatherParams,\n            ),\n            callable=get_weather,\n        ),\n    },\n    required_steps=[],\n    terminal_tool=\"get_weather\",\n    system_prompt_template=\"You are a helpful assistant.\",\n)\n\nasync def main():\n    client = OllamaClient(model=\"ministral-3:8b-instruct-2512-q4_K_M\")\n    ctx = ContextManager(strategy=TieredCompact(keep_recent=2), budget_tokens=8192)\n    runner = WorkflowRunner(client=client, context_manager=ctx)\n    await runner.run(workflow, \"What's the weather in Paris?\")\n```\n\n资料来源：[README.md:1-50]()\n\n## 架构设计要点\n\n### 异步优先设计\n\n所有客户端方法和运行器均为 `async` 实现，支持高并发工具调用场景：\n\n```python\nasync def run(self, workflow: Workflow, user_input: str) -> WorkflowResult:\n    ...\n```\n\n资料来源：[src/forge/core/workflow.py:1-30]()\n\n### 上下文管理隔离\n\n`ContextManager` 独立于步骤追踪，允许上下文压缩不影响步骤完成状态：\n\n```mermaid\ngraph LR\n    subgraph WorkflowRunner\n        A[StepTracker] --- B[ContextManager]\n    end\n    B --- C[消息历史]\n    A --- D[completed_steps]\n```\n\n### 工具参数模式\n\n使用 Pydantic 模型定义工具参数，确保类型安全：\n\n```python\nparameters: type[BaseModel]  # 必须是 Pydantic 模型\n```\n\n可通过 `get_json_schema()` 方法导出 JSON Schema：\n\n```python\ndef get_json_schema(self) -> dict[str, Any]:\n    return self.parameters.model_json_schema()\n```\n\n资料来源：[src/forge/core/workflow.py:50-65]()\n\n## 总结\n\nForge 的工作流机制通过清晰的职责分离实现了可靠的 LLM 工具调用编排：\n\n| 组件 | 职责 |\n|------|------|\n| `Workflow` | 定义工作流元信息和工具集合 |\n| `StepTracker` | 追踪步骤完成状态和前置条件 |\n| `Guardrails` | 统一验证、重试、阻止逻辑 |\n| `WorkflowRunner` | 协调执行流程和上下文管理 |\n\n该设计支持从简单的单工具调用到复杂的多步骤工作流，同时为外部编排系统提供了灵活的中间件接口。\n\n---\n\n<a id='page-guardrails'></a>\n\n## Guardrails 系统\n\n### 相关页面\n\n相关主题：[工作流内部机制](#page-workflow-internals), [核心组件详解](#page-core-components)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [src/forge/guardrails/__init__.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/guardrails/__init__.py)\n- [src/forge/guardrails/response_validator.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/guardrails/response_validator.py)\n- [src/forge/guardrails/step_enforcer.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/guardrails/step_enforcer.py)\n- [src/forge/guardrails/error_tracker.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/guardrails/error_tracker.py)\n- [src/forge/guardrails/nudge.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/guardrails/nudge.py)\n- [src/forge/guardrails/guardrails.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/guardrails/guardrails.py)\n- [src/forge/prompts/nudges.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/prompts/nudges.py)\n</details>\n\n# Guardrails 系统\n\n## 概述\n\nGuardrails 系统是 Forge 框架的核心安全与流程控制中间件，设计用于在外部编排循环中确保 LLM 响应符合预期的工作流约束。该系统通过三重验证机制（响应验证、步骤强制、错误追踪）保护工具调用流程的完整性和可靠性。 资料来源：[src/forge/guardrails/guardrails.py:46-52]()\n\n## 系统架构\n\nGuardrails 系统由四个核心组件构成，它们协同工作以实现完整的中间件功能：\n\n```mermaid\ngraph TB\n    subgraph Guardrails系统\n        RV[ResponseValidator<br/>响应验证器]\n        SE[StepEnforcer<br/>步骤强制器]\n        ET[ErrorTracker<br/>错误追踪器]\n        NR[Nudge模块<br/>提示生成]\n    end\n    \n    LLM[LLM响应] --> RV\n    RV --> SE\n    SE --> ET\n    NR --> SE\n    \n    subgraph CheckResult\n        AC[action]\n        TC[tool_calls]\n        ND[nudge]\n        RS[reason]\n    end\n    \n    ET --> CheckResult\n```\n\n| 组件 | 职责 | 文件位置 |\n|------|------|----------|\n| ResponseValidator | 解析 LLM 响应，提取工具调用，处理文本救援 | `response_validator.py` |\n| StepEnforcer | 强制执行必需步骤顺序，阻止提前终止 | `step_enforcer.py` |\n| ErrorTracker | 追踪连续重试和工具错误次数 | `error_tracker.py` |\n| Nudge | 生成用户友好的引导提示消息 | `nudge.py` / `prompts/nudges.py` |\n\n资料来源：[src/forge/guardrails/guardrails.py:24-45]()\n\n## CheckResult 数据模型\n\n`check()` 方法返回 `CheckResult` 对象，包含四个字段：\n\n| 字段 | 类型 | 说明 |\n|------|------|------|\n| `action` | `Literal[\"execute\", \"retry\", \"step_blocked\", \"fatal\"]` | 下一步操作指令 |\n| `tool_calls` | `list[ToolCall] \\| None` | 提取的工具调用列表 |\n| `nudge` | `Nudge \\| None` | 注入的提示消息（retry/step_blocked 时设置） |\n| `reason` | `str \\| None` | 人类可读的解释（仅 fatal 时设置） |\n\n```python\nclass CheckResult(BaseModel):\n    action: Literal[\"execute\", \"retry\", \"step_blocked\", \"fatal\"]\n    tool_calls: list[ToolCall] | None = None\n    nudge: Nudge | None = None\n    reason: str | None = None\n```\n\n资料来源：[src/forge/guardrails/guardrails.py:67-74]()\n\n## Guardrails 核心 API\n\n### 构造函数参数\n\n| 参数 | 类型 | 默认值 | 说明 |\n|------|------|--------|------|\n| `tool_names` | `list[str]` | 必需 | 有效工具名称列表 |\n| `terminal_tool` | `str \\| frozenset[str]` | 必需 | 可结束工作流的工具 |\n| `required_steps` | `list[str] \\| None` | `None` | 终端工具前必须调用的步骤 |\n| `max_retries` | `int` | `3` | 连续错误响应后返回 fatal 的阈值 |\n| `max_tool_errors` | `int` | `2` | 工具执行失败后耗尽的阈值 |\n| `rescue_enabled` | `bool` | `True` | 是否启用文本救援解析 |\n| `max_premature_attempts` | `int` | `3` | 提前调用终端工具的次数上限 |\n| `retry_nudge` | `Callable[[str], str] \\| None` | `None` | 自定义重试提示生成器 |\n\n资料来源：[src/forge/guardrails/guardrails.py:76-91]()\n\n### 主要方法\n\n#### check() 方法\n\n在每次 LLM 响应后、执行任何工具前调用此方法。它依次通过三个验证组件：\n\n```mermaid\nsequenceDiagram\n    participant App as 调用方\n    participant G as Guardrails\n    participant RV as ResponseValidator\n    participant SE as StepEnforcer\n    participant ET as ErrorTracker\n    \n    App->>G: check(response)\n    G->>RV: validate(response)\n    RV-->>G: parsed_tool_calls\n    G->>SE: enforce(tool_calls)\n    SE-->>G: step_result\n    G->>ET: track_errors()\n    ET-->>G: error_status\n    G-->>App: CheckResult\n```\n\n资料来源：[src/forge/guardrails/guardrails.py:93-126]()\n\n#### record() 方法\n\n在工具执行完成后调用，用于更新错误追踪器状态：\n\n```python\ndef record(self, executed: list[str]) -> bool:\n    \"\"\"Record executed tools and check if workflow is done.\n    \n    Returns:\n        True if the terminal tool was reached (workflow complete).\n    \"\"\"\n    self._errors.tool_succeeded()\n    return self._enforcer.is_terminal_reached(executed)\n```\n\n资料来源：[src/forge/guardrails/guardrails.py:128-140]()\n\n## 组件详解\n\n### ResponseValidator（响应验证器）\n\n负责解析 LLM 返回的原始响应，提取结构化工具调用，并处理救援解析场景：\n\n- **工具调用提取**：从 JSON 格式或特定文本格式中提取工具名和参数\n- **救援解析**：当模型输出纯文本而非工具调用时，可配置地尝试恢复\n- **自定义重试提示**：支持通过 `retry_nudge_fn` 生成动态重试消息\n\n资料来源：[src/forge/guardrails/response_validator.py]()\n\n### StepEnforcer（步骤强制器）\n\n确保工作流按正确的步骤顺序执行：\n\n```mermaid\ngraph LR\n    A[search] --> B[lookup]\n    B --> C[answer]\n    \n    style A fill:#90EE90\n    style B fill:#90EE90\n    style C fill:#FFD700\n```\n\n**关键特性**：\n\n- 验证 `required_steps` 列表中的所有工具已被调用\n- 检测提前调用 `terminal_tool` 的情况（`max_premature_attempts` 控制）\n- 支持通过 `is_terminal_reached()` 判断工作流是否完成\n\n资料来源：[src/forge/guardrails/step_enforcer.py]()\n\n### ErrorTracker（错误追踪器）\n\n维护错误状态机，防止无限重试循环：\n\n| 错误类型 | 计数器 | 达到上限后果 |\n|----------|--------|--------------|\n| 连续重试 | `_consecutive_retries` | `check()` 返回 `\"fatal\"` |\n| 工具错误 | `_consecutive_tool_errors` | 工作流标记为\"耗尽\" |\n\n当工具成功执行时，两个计数器都会重置。\n\n资料来源：[src/forge/guardrails/error_tracker.py]()\n\n## Nudge 提示系统\n\nNudge 模块负责生成用户友好的引导消息，帮助模型理解并修正其行为。\n\n### step_nudge() 函数\n\n当模型试图跳过必需步骤直接调用终端工具时触发：\n\n| tier 值 | 语气强度 | 示例消息 |\n|---------|----------|----------|\n| 1 | 礼貌 | \"You cannot call answer yet. You must first complete these required steps: search, lookup.\" |\n| 2 | 直接 | \"You must call one of these tools now: search, lookup. Pick one.\" |\n| 3 | 强制 | \"STOP. You MUST call one of: search, lookup. Do NOT call answer.\" |\n\n```python\ndef step_nudge(\n    terminal_tool: str,\n    pending_steps: list[str],\n    tier: int = 1,\n) -> str:\n    tier = max(1, min(3, tier))  # 限制在 1-3 范围内\n```\n\n资料来源：[src/forge/prompts/nudges.py:18-40]()\n\n### prerequisite_nudge() 函数\n\n当模型调用带有前置依赖的工具但未满足前置条件时触发：\n\n```python\ndef prerequisite_nudge(tool_name: str, missing_prereqs: list[str]) -> str:\n    prereqs = \", \".join(missing_prereqs)\n    return (\n        f\"You cannot call {tool_name} yet. \"\n        f\"You must first call: {prereqs}. \"\n        \"Call the prerequisite tool now.\"\n    )\n```\n\n资料来源：[src/forge/prompts/nudges.py:42-56]()\n\n## 使用模式\n\n### 简化 API（推荐）\n\n使用 `Guardrails` 类进行一站式验证：\n\n```python\nfrom forge.guardrails import Guardrails\n\nguardrails = Guardrails(\n    tool_names=[\"search\", \"lookup\", \"answer\"],\n    required_steps=[\"search\", \"lookup\"],\n    terminal_tool=\"answer\",\n)\n\ndef handle_response(response):\n    result = guardrails.check(response)\n    \n    if result.action == \"fatal\":\n        return f\"FATAL: {result.reason}\"\n    \n    if result.action in (\"retry\", \"step_blocked\"):\n        # 将 nudge.content 注入对话历史\n        return f\"{result.action}: {result.nudge.content}\"\n    \n    # 执行工具\n    executed = [tc.tool for tc in result.tool_calls]\n    done = guardrails.record(executed)\n    return f\"executed {executed}\" + (\" -- DONE\" if done else \"\")\n```\n\n资料来源：[examples/foreign_loop.py:19-40]()\n\n### 粒度 API（高级）\n\n直接访问各个验证组件，实现自定义行为：\n\n```python\nfrom forge.guardrails import ErrorTracker, ResponseValidator, StepEnforcer\n\nvalidator = ResponseValidator(\n    tool_names=[\"search\", \"lookup\", \"answer\"],\n    rescue_enabled=True,\n)\nenforcer = StepEnforcer(\n    required_steps=[\"search\", \"lookup\"],\n    terminal_tool=\"answer\",\n)\nerrors = ErrorTracker(max_retries=3, max_tool_errors=2)\n```\n\n适用于需要日志记录、指标收集或条件性救援的场景。\n\n资料来源：[examples/foreign_loop.py:52-66]()\n\n### 与 respond 工具集成\n\nForge 支持通过 `respond()` 工具让模型直接返回文本响应：\n\n```python\nfrom forge.tools import RESPOND_TOOL_NAME, respond_spec\n\nguardrails = Guardrails(\n    tool_names=[\"search\", \"lookup\", \"answer\", RESPOND_TOOL_NAME],\n    required_steps=[\"search\", \"lookup\"],\n    terminal_tool=\"answer\",\n)\n\ndef handle_response(response):\n    result = guardrails.check(response)\n    # ...\n    for tc in result.tool_calls:\n        if tc.tool == RESPOND_TOOL_NAME:\n            message = tc.args.get(\"message\", \"\")\n            return f\"MODEL SAYS: {message}\"\n```\n\n资料来源：[examples/foreign_loop.py:95-115]()\n\n## Guardrails 工作流状态图\n\n```mermaid\nstateDiagram-v2\n    [*] --> 等待响应\n    等待响应 --> 验证响应: LLM 返回\n    验证响应 --> 响应有效: 工具调用已提取\n    验证响应 --> 响应无效: 解析失败\n    响应无效 --> 检查重试次数: 递增 retry\n    检查重试次数 --> 等待响应: retry_nudge\n    检查重试次数 --> 致命错误: 达到上限\n    响应有效 --> 检查步骤顺序\n    检查步骤顺序 --> 步骤通过: 验证通过\n    检查步骤顺序 --> 步骤受阻: 缺少步骤\n    步骤受阻 --> 重置提前尝试: step_nudge\n    步骤通过 --> 执行工具\n    执行工具 --> 记录执行: record()\n    记录执行 --> 工作流完成?: terminal reached\n    记录执行 --> 工具错误?: 工具执行失败\n    工作流完成? --> [*]: 是\n    工作流完成? --> 等待响应: 否\n    工具错误? --> 等待响应: 重试\n    工具错误? --> [*]: 达到上限\n```\n\n## 配置与消融研究\n\nGuardrails 的各个组件可通过 `AblationConfig` 独立切换，用于评估每个防护措施的性能影响：\n\n```python\n# tests/eval/ablation.py 中的配置示例\n@dataclass\nclass AblationConfig:\n    enable_rescue: bool = True\n    enable_step_enforcement: bool = True\n    enable_error_tracker: bool = True\n```\n\n在 CONTRIBUTING.md 中提到，每个 guardrail 可以独立消融：\n\n> Guardrails live in the runner (`src/forge/core/runner.py`) and nudge templates (`src/forge/prompts/nudges.py`). Each guardrail can be independently toggled via ablation presets in `tests/eval/ablation.py`.\n\n资料来源：[CONTRIBUTING.md:3-8]()\n\n## 总结\n\nGuardrails 系统通过模块化的验证、强制和追踪机制，为 Forge 工作流提供了可靠的安全防护：\n\n| 特性 | 描述 |\n|------|------|\n| **模块化设计** | ResponseValidator、StepEnforcer、ErrorTracker 可独立使用 |\n| **灵活配置** | 支持自定义重试次数、错误阈值、救援行为 |\n| **可观测性** | 通过 `nudge` 机制提供清晰的错误引导 |\n| **状态管理** | 内置错误计数和步骤追踪，自动防止循环 |\n| **消融支持** | 完整的独立切换能力，便于性能分析 |\n\n---\n\n<a id='page-context-management'></a>\n\n## 上下文管理\n\n### 相关页面\n\n相关主题：[核心组件详解](#page-core-components), [架构概述](#page-architecture)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [src/forge/context/manager.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/context/manager.py)\n- [src/forge/context/strategies.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/context/strategies.py)\n- [src/forge/context/hardware.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/context/hardware.py)\n- [src/forge/server.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/server.py)\n</details>\n\n# 上下文管理\n\n## 概述\n\n上下文管理（Context Management）是 forge 框架中负责管理对话历史和令牌预算的核心系统。在与大语言模型（LLM）进行多轮交互时，上下文窗口的大小直接影响能保留多少历史信息和工具调用记录。forge 的上下文管理系统通过预算模式（Budget Mode）和压缩策略（Compaction Strategy）的组合，确保在有限的令牌预算内最大化有效信息的保留。\n\n该系统位于 `src/forge/context/` 模块下，包含三个核心子模块：\n\n| 模块 | 职责 |\n|------|------|\n| `manager.py` | 上下文管理的主入口，负责预算解析和消息压缩协调 |\n| `strategies.py` | 定义压缩策略，如 `TieredCompact`（分层压缩） |\n| `hardware.py` | 硬件感知配置，根据设备资源调整上下文参数 |\n\n资料来源：[CONTRIBUTING.md](https://github.com/antoinezambelli/forge/blob/main/CONTRIBUTING.md)\n\n---\n\n## 核心概念\n\n### 令牌预算（Budget Tokens）\n\n令牌预算是上下文管理的基础参数，定义了允许保留在上下文窗口中的最大令牌数量。在初始化 `ContextManager` 时通过 `budget_tokens` 参数指定：\n\n```python\nfrom forge import ContextManager, TieredCompact\n\nctx = ContextManager(\n    strategy=TieredCompact(keep_recent=2),\n    budget_tokens=8192\n)\n```\n\n资料来源：[README.md](https://github.com/antoinezambelli/forge/blob/main/README.md)\n\n### 预算模式（Budget Mode）\n\n预算模式决定了如何确定令牌预算的来源。forge 支持多种预算解析策略：\n\n| 模式 | 说明 | 适用场景 |\n|------|------|----------|\n| `FORGE_FAST` | 快速模式，使用较小的上下文预算 | 简单查询、快速响应 |\n| `FORGE_BALANCED` | 平衡模式，在速度和深度间取得平衡 | 常规工作流 |\n| `FORGE_DEEP` | 深度模式，使用更大的上下文预算 | 复杂推理任务 |\n| `MANUAL` | 手动指定预算值 | 精确控制令牌使用 |\n\n```python\nfrom forge.server import BudgetMode\n\nbudget = await server.start_with_budget(\n    model=identity,\n    gguf_path=gguf_path or \"\",\n    mode=mode,\n    budget_mode=BudgetMode.FORGE_FAST,\n    client=client,\n)\n```\n\n资料来源：[src/forge/server.py:1-50](https://github.com/antoinezambelli/forge/blob/main/src/forge/server.py)\n\n---\n\n## 架构设计\n\n### 系统架构图\n\n```mermaid\ngraph TD\n    A[WorkflowRunner] --> B[ContextManager]\n    B --> C[压缩策略 Strategy]\n    B --> D[预算解析 Budget Resolution]\n    \n    C --> E[TieredCompact]\n    C --> F[其他策略...]\n    \n    D --> G[ServerManager]\n    G --> H[服务端上下文查询]\n    G --> I[手动预算指定]\n    \n    J[硬件配置 hardware.py] --> B\n```\n\n### 预算解析流程\n\n当使用服务端管理时，预算的解析遵循以下逻辑：\n\n```mermaid\ngraph TD\n    A[resolve_budget 调用] --> B{预算模式}\n    B -->|MANUAL| C[使用 manual_tokens]\n    B -->|OLLAMA| D{模式判断}\n    B -->|LLAMASERVER| E[查询服务端上下文]\n    B -->|LLAMAFILE| E\n    \n    D -->|MANUAL| C\n    D -->|其他| F[查询服务端上下文]\n    \n    C --> G[返回预算令牌数]\n    E --> G\n    F --> G\n```\n\n资料来源：[src/forge/server.py:200-280](https://github.com/antoinezambelli/forge/blob/main/src/forge/server.py)\n\n---\n\n## 分层压缩策略（TieredCompact）\n\n### 核心思想\n\n`TieredCompact` 是一种分层保留策略，其核心思想是保留最近的消息完整不变，同时对较早的消息进行摘要或丢弃。这种策略在保持对话连贯性的同时，最大化地利用有限的令牌预算。\n\n### 参数配置\n\n| 参数 | 类型 | 说明 |\n|------|------|------|\n| `keep_recent` | `int` | 保留最近 N 轮完整对话 | 默认值：2 |\n| `budget_tokens` | `int` | 总令牌预算 | 继承自 ContextManager |\n\n```python\n# 保留最近 2 轮完整消息，剩余预算用于早期消息摘要\nstrategy = TieredCompact(keep_recent=2)\n```\n\n资料来源：[README.md](https://github.com/antoinezambelli/forge/blob/main/README.md)\n\n### 工作流程\n\n```mermaid\ngraph LR\n    A[消息列表] --> B[识别最近 N 轮]\n    B --> C[保留完整]\n    D[早期消息] --> E{是否可摘要}\n    E -->|是| F[生成摘要]\n    E -->|否| G[丢弃]\n    F --> H[放入上下文]\n    C --> H\n    G --> H\n```\n\n---\n\n## 硬件感知配置\n\n### 硬件适配\n\n`hardware.py` 模块负责根据运行环境的硬件资源（主要是 GPU 显存）调整上下文配置。这对于在消费级硬件上运行大模型尤为重要。\n\n### 主要考量因素\n\n| 因素 | 影响 |\n|------|------|\n| GPU 显存大小 | 决定最大并发槽数（n_slots） |\n| KV Cache 类型 | 影响上下文长度和质量 |\n| 上下文分区策略 | 共享 vs 分区 KV Cache |\n\n```python\n# 多槽位配置示例\nserver = ServerManager(...)\nawait server.start_with_budget(\n    n_slots=4,           # 4 个并发槽位\n    kv_unified=True,    # 共享 KV Cache\n)\n```\n\n资料来源：[src/forge/server.py:30-40](https://github.com/antoinezambelli/forge/blob/main/src/forge/server.py)\n\n---\n\n## 服务端上下文管理\n\n### 上下文长度查询\n\n服务端通过 `/props` 端点报告其配置的上下文长度：\n\n```python\nasync def get_server_context(self) -> int:\n    \"\"\"获取服务端报告的上下文长度\"\"\"\n    try:\n        props = await self.query_props()\n    except (httpx.HTTPError, BackendError) as exc:\n        raise BudgetResolutionError(cause=exc) from exc\n    \n    ctx = props.get(\"default_generation_settings\", {}).get(\"n_ctx\")\n    if ctx is None:\n        raise BudgetResolutionError()\n    return ctx\n```\n\n资料来源：[src/forge/server.py:180-195](https://github.com/antoinezambelli/forge/blob/main/src/forge/server.py)\n\n### KV Cache 配置\n\n对于非 Ollama 后端（llamaserver / llamafile），支持细粒度的 KV Cache 量化配置：\n\n| 参数 | 说明 | 示例值 |\n|------|------|--------|\n| `cache_type_k` | Key 缓存量化类型 | `\"q8_0\"`, `\"q4_0\"` |\n| `cache_type_v` | Value 缓存量化类型 | `\"q8_0\"`, `\"q4_0\"` |\n\n```python\nserver = ServerManager(backend=\"llamaserver\", port=8080)\nawait server.start_with_budget(\n    cache_type_k=\"q4_0\",\n    cache_type_v=\"q8_0\",\n)\n```\n\n资料来源：[src/forge/server.py:25-35](https://github.com/antoinezambelli/forge/blob/main/src/forge/server.py)\n\n---\n\n## 使用指南\n\n### 基础用法\n\n```python\nimport asyncio\nfrom forge import (\n    WorkflowRunner,\n    OllamaClient,\n    ContextManager,\n    TieredCompact,\n)\n\nasync def main():\n    # 创建上下文管理器\n    ctx = ContextManager(\n        strategy=TieredCompact(keep_recent=2),\n        budget_tokens=8192\n    )\n    \n    # 创建客户端和运行器\n    client = OllamaClient(model=\"ministral-3:8b-q4_K_M\")\n    runner = WorkflowRunner(client=client, context_manager=ctx)\n    \n    # 运行工作流\n    # await runner.run(workflow, user_message)\n\nasyncio.run(main())\n```\n\n### 与服务端集成\n\n```python\nfrom forge.server import ServerManager, BudgetMode\n\nasync def main():\n    # 启动服务端并自动解析预算\n    server, ctx = await ServerManager.start_with_budget(\n        backend=\"llamaserver\",\n        gguf_path=\"/path/to/model.gguf\",\n        budget_mode=BudgetMode.FORGE_BALANCED,\n    )\n    \n    # ... 使用 server 和 ctx ...\n    \n    await server.stop()\n\nasyncio.run(main())\n```\n\n---\n\n## 与守卫系统的协同\n\n上下文管理与守卫系统（Guardrails）紧密配合，共同确保工作流的正确执行：\n\n```mermaid\ngraph LR\n    A[LLM 响应] --> B[Guardrails.check]\n    B --> C{响应类型}\n    C -->|工具调用| D[执行工具]\n    C -->|文本响应| E[上下文压缩]\n    C -->|错误| F[重试/提示]\n    \n    D --> G[ContextManager 压缩]\n    E --> G\n```\n\n守卫系统会拦截 LLM 的响应，进行验证后交由上下文管理器决定是否需要压缩历史记录：\n\n```python\nclass Guardrails:\n    def __init__(\n        self,\n        tool_names: list[str],\n        terminal_tool: str | frozenset[str],\n        required_steps: list[str] | None = None,\n        max_retries: int = 3,\n        max_tool_errors: int = 2,\n        rescue_enabled: bool = True,\n        max_premature_attempts: int = 3,\n        retry_nudge: Callable[[str], str] | None = None,\n    ) -> None:\n        # ...\n```\n\n资料来源：[src/forge/guardrails/guardrails.py:40-60](https://github.com/antoinezambelli/forge/blob/main/src/forge/guardrails/guardrails.py)\n\n---\n\n## 错误处理\n\n### BudgetResolutionError\n\n当无法解析预算时会抛出此异常，通常发生在服务端不可达或未返回有效上下文长度时：\n\n```python\ntry:\n    ctx_length = await server.get_server_context()\nexcept BudgetResolutionError as e:\n    # 回退到手动指定\n    ctx = ContextManager(\n        strategy=TieredCompact(keep_recent=2),\n        budget_tokens=4096  # 手动指定\n    )\n```\n\n资料来源：[src/forge/server.py:170-180](https://github.com/antoinezambelli/forge/blob/main/src/forge/server.py)\n\n---\n\n## 最佳实践\n\n1. **选择合适的预算模式**：简单查询使用 `FORGE_FAST`，复杂推理任务使用 `FORGE_DEEP`\n2. **合理设置 keep_recent**：通常 2-3 轮即可保留对话上下文\n3. **监控令牌使用**：定期检查实际使用的令牌数，避免溢出\n4. **硬件感知配置**：在资源受限环境使用较小的 n_slots 和共享 KV Cache\n5. **与服务端配合使用**：利用服务端自动解析上下文长度，减少手动配置\n\n---\n\n<a id='page-slot-worker'></a>\n\n## SlotWorker 槽位调度\n\n### 相关页面\n\n相关主题：[核心组件详解](#page-core-components), [架构概述](#page-architecture)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [src/forge/server.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/server.py) — ServerManager 槽位启动参数定义\n- [src/forge/core/runner.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/core/runner.py) — WorkflowRunner 槽位上下文管理引用\n- [src/forge/core/steps.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/core/steps.py) — 步骤追踪与工具执行记录\n- [src/forge/guardrails/guardrails.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/guardrails/guardrails.py) — 响应验证与错误处理\n- [CONTRIBUTING.md](https://github.com/antoinezambelli/forge/blob/main/CONTRIBUTING.md) — 槽位调度相关架构说明\n</details>\n\n# SlotWorker 槽位调度\n\n## 概述\n\nSlotWorker 是 forge 框架中用于管理 **llama-server / llamafile 后端并发槽位** 的核心机制。它通过 `ServerManager` 类实现，支持在同一进程中启动具有多个独立 KV Cache 槽位的推理服务器，从而支持多智能体（multi-agent）架构的并发执行。\n\n槽位调度的核心目的是：**在单个推理服务器实例上并行运行多个独立的 agent 工作流，每个槽位拥有独立的上下文状态，彼此互不干扰。**\n\n资料来源：[src/forge/server.py:1-50]()\n\n## 架构设计\n\n### 槽位模型\n\nforge 的槽位调度采用 **硬分区 + 可选统一缓存** 两种模式：\n\n| 模式 | `kv_unified` | 行为描述 | 适用场景 |\n|------|-------------|----------|----------|\n| 独立槽位 | `False`（默认） | 每个槽位拥有独立的 KV Cache 切片，上下文长度在槽位创建时硬性划分 | 资源受限环境、需要严格隔离的并发任务 |\n| 统一缓存 | `True` | 所有槽位共享单一 KV Cache 池，每个槽位可使用完整上下文容量 | 需要灵活分配上下文的高并发场景 |\n\n资料来源：[src/forge/server.py:35-42]()\n\n### 组件关系图\n\n```mermaid\ngraph TD\n    A[WorkflowRunner] --> B[ContextManager]\n    A --> C[ServerManager]\n    C --> D[llama-server / llamafile]\n    D --> E[Slot 0: KV Cache]\n    D --> F[Slot 1: KV Cache]\n    D --> G[Slot N: KV Cache]\n    E --> H[独立上下文]\n    F --> H\n    G --> H\n```\n\n## ServerManager 槽位管理\n\n### 初始化参数\n\n`ServerManager` 类负责启动和管理带有槽位配置的推理服务器。其构造函数接受以下参数：\n\n| 参数 | 类型 | 默认值 | 说明 |\n|------|------|--------|------|\n| `backend` | `str` | — | 后端类型：`\"ollama\"`、`\"llamaserver\"` 或 `\"llamafile\"` |\n| `port` | `int` | `8080` | 服务端口（llama-server / llamafile 专用） |\n| `models_dir` | `str \\| Path` | `None` | GGUF 模型文件目录 |\n\n资料来源：[src/forge/server.py:120-133]()\n\n### 槽位状态追踪\n\n`ServerManager` 内部维护以下状态字段用于槽位调度决策：\n\n```python\n_current_model: str | None       # 当前加载的模型标识\n_current_mode: str | None         # 运行模式 (native/prompt/reforged)\n_current_ctx: int | None          # 上下文长度配置\n_current_flags: tuple[str, ...]   # 额外 CLI 参数\n_current_cache_type_k: str | None # KV Cache Key 量化类型\n_current_cache_type_v: str | None # KV Cache Value 量化类型\n_current_n_slots: int | None      # 并发槽位数量\n_current_kv_unified: bool         # 是否启用统一 KV Cache\n```\n\n资料来源：[src/forge/server.py:135-146]()\n\n### 启动方法签名\n\n```python\nasync def start(\n    self,\n    model: str,\n    gguf_path: str | Path,\n    mode: str = \"native\",\n    extra_flags: list[str] | None = None,\n    ctx_override: int | None = None,\n    cache_type_k: str | None = None,  # e.g. \"q8_0\", \"q4_0\"\n    cache_type_v: str | None = None,  # e.g. \"q8_0\", \"q4_0\"\n    n_slots: int | None = None,       # 并发槽位数量\n    kv_unified: bool = False,         # 统一 KV Cache 模式\n) -> None:\n```\n\n资料来源：[src/forge/server.py:60-77]()\n\n## 槽位配置与缓存量化\n\n### KV Cache 量化类型\n\nforge 支持对 Key 和 Value 缓存分别进行量化，以节省显存：\n\n| 量化类型 | 说明 | 显存节省 | 精度损失 |\n|----------|------|----------|----------|\n| `q8_0` | 8-bit 量化 | 中等 | 较低 |\n| `q4_0` | 4-bit 量化 | 高 | 中等 |\n| `q4_K_M` | 混合 4-bit（中等块大小） | 高 | 较低-中等 |\n\n### 槽位复用机制\n\n`ServerManager` 实现了**智能复用逻辑**：当新请求的配置与当前运行配置完全一致时，跳过重启直接复用现有槽位：\n\n```python\nif (\n    self._current_model == model\n    and self._current_mode == mode\n    and self._current_ctx == ctx_override\n    and self._current_flags == flags\n    and self._current_cache_type_k == cache_type_k\n    and self._current_cache_type_v == cache_type_v\n    and self._current_n_slots == n_slots\n    and self._current_kv_unified == kv_unified\n):\n    return  # 复用现有槽位\n```\n\n资料来源：[src/forge/server.py:69-81]()\n\n## 工作流程集成\n\n### 槽位与 WorkflowRunner 的协作\n\n`WorkflowRunner` 在执行工作流时通过 `ContextManager` 管理上下文，并通过 `ServerManager` 与后端交互：\n\n```mermaid\nsequenceDiagram\n    participant User\n    participant WorkflowRunner\n    participant ContextManager\n    participant ServerManager\n    participant LlamaServer\n    \n    User->>WorkflowRunner: run(workflow, input)\n    WorkflowRunner->>ContextManager: 请求上下文预算\n    ContextManager->>ServerManager: resolve_budget(mode)\n    ServerManager->>LlamaServer: 查询 /props 获取 n_ctx\n    LlamaServer-->>ServerManager: 返回上下文长度\n    ServerManager-->>ContextManager: 返回预算\n    WorkflowRunner->>LlamaServer: 发送推理请求（槽位分配）\n```\n\n### 步骤追踪与槽位隔离\n\n每个槽位在执行过程中会独立维护步骤状态：\n\n```python\n@dataclass\nclass StepTracker:\n    \"\"\"追踪必需步骤完成情况和工具执行记录\"\"\"\n    required_steps: list[str]\n    completed_steps: dict[str, None] = field(default_factory=dict)\n    executed_tools: dict[str, list[dict[str, Any]]] = field(default_factory=dict)\n```\n\n资料来源：[src/forge/core/steps.py:22-30]()\n\n## 槽位调度模式\n\n### 模式对比\n\n| 模式 | 上下文分配 | 资源利用率 | 隔离性 | 配置参数 |\n|------|-----------|-----------|--------|----------|\n| **独立槽位** | 硬性分区，每个槽位固定容量 | 中等 | 强 | `n_slots=N, kv_unified=False` |\n| **统一缓存** | 动态共享，任意槽位可用全部容量 | 高 | 中等 | `n_slots=N, kv_unified=True` |\n\n资料来源：[src/forge/server.py:38-42]()\n\n### 配置示例\n\n```python\nfrom forge import ServerManager, BudgetMode\n\n# 创建支持 4 个并发槽位的服务器管理器\nserver = ServerManager(backend=\"llamaserver\", port=8080)\n\n# 启动服务器，配置统一 KV Cache\nawait server.start(\n    model=\"qwen3:14b-q4_K_M\",\n    gguf_path=\"/models/qwen3-14b-q4_k_m.gguf\",\n    mode=\"reforged\",\n    n_slots=4,\n    kv_unified=True,\n    cache_type_k=\"q8_0\",\n    cache_type_v=\"q4_0\",\n)\n```\n\n## 错误处理与超时\n\n### 错误追踪机制\n\n`Guardrails` 模块提供槽位级别的错误追踪：\n\n```python\n@dataclass\nclass ErrorTracker:\n    \"\"\"追踪重试次数和工具错误\"\"\"\n    max_retries: int\n    max_tool_errors: int\n    _consecutive_retries: int = 0\n    _tool_errors: int = 0\n```\n\n资料来源：[src/forge/guardrails/guardrails.py:45-52]()\n\n### 超时配置\n\n批量评估时，每个场景有 **300 秒墙钟超时**：\n\n> 批量评估时，每个场景有 300s 墙钟超时；超时时运行记录为 `completeness=False, error_type='Timeout'`，批次继续执行。\n\n资料来源：[CHANGELOG.md:30-32]()\n\n## 最佳实践\n\n### 槽位数量选择\n\n| 模型大小 | 推荐槽位数 | 说明 |\n|----------|-----------|------|\n| ≤ 8B | 2-4 | 显存充裕时可增加并发 |\n| 14B-32B | 1-2 | 需更多显存用于单槽 |\n| > 32B | 1 | 大模型建议独占槽位 |\n\n### 缓存量化建议\n\n| 量化类型 | 推荐场景 | 显存占用 |\n|----------|----------|----------|\n| `q8_0/q8_0` | 高精度需求 | 较高 |\n| `q4_K_M/q4_K_M` | 平衡场景 | 中等 |\n| `q4_0/q8_0` | 内存受限 | 较低 |\n\n## 限制与已知问题\n\n### llama.cpp reasoning budget 挂起问题\n\n> **已知问题**：2026 年 4 月 10 日之后的构建版本中，Gemma 4、Qwen 3.5 和 Ministral Reasoning 模型使用无界 reasoning budget sampler 会导致静默挂起。\n\n**临时解决方案**：使用 `--reasoning-budget 0` 参数禁用。\n\n资料来源：[CHANGELOG.md:24-28]()\n\n### 后端兼容性\n\n| 后端 | 槽位支持 | 说明 |\n|------|---------|------|\n| Ollama | ❌ 不支持 | 使用原生模型管理 |\n| llama-server | ✅ 完全支持 | 推荐用于多槽位场景 |\n| llamafile | ✅ 完全支持 | 与 llama-server 行为一致 |\n\n## 总结\n\nSlotWorker 槽位调度是 forge 框架支持多智能体并发执行的核心基础设施。通过 `ServerManager` 类，开发者可以：\n\n1. **配置并发槽位数量** (`n_slots`) 以支持多 agent 并行\n2. **选择 KV Cache 模式** (`kv_unified`) 平衡隔离性与资源利用\n3. **优化显存使用** 通过 Key/Value 缓存量化\n4. **实现智能复用** 避免不必要的服务器重启\n\n该机制使 forge 能够在单个推理服务器实例上高效运行多个独立工作流，特别适合需要并行评估或异步执行多个 agent 任务的场景。\n\n---\n\n<a id='page-tools'></a>\n\n## 内置工具系统\n\n### 相关页面\n\n相关主题：[工作流内部机制](#page-workflow-internals)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [src/forge/tools/__init__.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/tools/__init__.py)\n- [src/forge/tools/respond.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/tools/respond.py)\n- [src/forge/core/workflow.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/core/workflow.py)\n- [src/forge/guardrails/guardrails.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/guardrails/guardrails.py)\n- [src/forge/guardrails/response_validator.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/guardrails/response_validator.py)\n- [examples/foreign_loop.py](https://github.com/antoinezambelli/forge/blob/main/examples/foreign_loop.py)\n</details>\n\n# 内置工具系统\n\n## 概述\n\nForge 的内置工具系统是一套用于定义、注册和执行 LLM 工具调用的核心机制。该系统通过 `ToolSpec`、`ToolDef` 和 `ToolCall` 三个核心类实现工具的schema定义与实际执行逻辑的绑定，并为工作流引擎提供验证、执行和依赖管理能力。\n\n工具系统在整个架构中扮演关键角色：\n\n- **工具发现**：工作流通过 `ToolDef` 字典声明可用工具\n- **LLM 接口**：工具规格自动转换为 JSON Schema，供 LLM 理解调用格式\n- **执行验证**：GuardRails 组件验证工具调用合法性\n- **依赖管理**：通过前置条件（Prerequisites）表达工具间依赖关系\n\n资料来源：[src/forge/core/workflow.py:1-50]()\n\n## 核心数据模型\n\n### ToolSpec\n\n`ToolSpec` 是工具的静态声明，使用 Pydantic 模型定义参数schema：\n\n```python\n@dataclass\nclass ToolSpec:\n    \"\"\"Tool specification with name, description, and parameters.\"\"\"\n    name: str\n    description: str\n    parameters: type[BaseModel]\n```\n\n| 字段 | 类型 | 说明 |\n|------|------|------|\n| `name` | `str` | 工具唯一标识名 |\n| `description` | `str` | 工具功能描述，供 LLM 理解用途 |\n| `parameters` | `type[BaseModel]` | Pydantic 模型类，定义参数结构 |\n\n`parameters` 字段接收一个 Pydantic `BaseModel` 子类，Forge 自动从中提取 JSON Schema：\n\n```python\ndef get_json_schema(self) -> dict[str, Any]:\n    \"\"\"Return JSON Schema dict for this tool's parameters.\"\"\"\n    return self.parameters.model_json_schema()\n```\n\n资料来源：[src/forge/core/workflow.py:100-140]()\n\n### ToolDef\n\n`ToolDef` 将工具规格与实现绑定，并声明执行依赖：\n\n```python\n@dataclass\nclass ToolDef:\n    \"\"\"Binds a tool schema to its implementation.\"\"\"\n    spec: ToolSpec\n    callable: Callable[..., Any]\n    prerequisites: list[str | dict[str, str]] = field(default_factory=list)\n```\n\n| 字段 | 类型 | 说明 |\n|------|------|------|\n| `spec` | `ToolSpec` | 工具规格（名称、描述、参数schema） |\n| `callable` | `Callable` | 实际执行的 Python 函数 |\n| `prerequisites` | `list` | 前置依赖条件列表 |\n\n`prerequisites` 支持两种表达方式：\n\n1. **字符串形式**（名称匹配）：\n   ```python\n   prerequisites=[\"read_file\"]\n   ```\n   表示调用此工具前必须曾调用过 `read_file`\n\n2. **字典形式**（参数匹配）：\n   ```python\n   prerequisites=[{\"tool\": \"read_file\", \"match_arg\": \"path\"}]\n   ```\n   表示调用此工具前必须调用过 `read_file` 且 `path` 参数值相同\n\n资料来源：[src/forge/core/workflow.py:143-175]()\n\n### ToolCall\n\n`ToolCall` 是经 LLM 返回并验证后的工具调用对象：\n\n```python\nclass ToolCall(BaseModel):\n    \"\"\"Validated tool invocation returned by an LLMClient.\"\"\"\n    tool: str\n```\n\n由 `ResponseValidator` 验证通过后生成，传递给执行层。\n\n资料来源：[src/forge/core/workflow.py:177-179]()\n\n## 工具注册与使用\n\n### 工作流中的工具声明\n\n在创建工作流时，通过 `tools` 字典注册所有可用工具：\n\n```python\nfrom forge import Workflow, ToolDef, ToolSpec\n\nworkflow = Workflow(\n    name=\"weather\",\n    description=\"Look up weather for a city.\",\n    tools={\n        \"get_weather\": ToolDef(\n            spec=ToolSpec(\n                name=\"get_weather\",\n                description=\"Get current weather\",\n                parameters=GetWeatherParams,\n            ),\n            callable=get_weather,\n        ),\n    },\n    required_steps=[],\n    terminal_tool=\"get_weather\",\n    system_prompt_template=\"You are a helpful assistant. Use the available tools to answer the user.\",\n)\n```\n\n工作流内部从 `tools` 字典派生出：\n- **工具规格列表**：供 LLM 理解可调用工具\n- **可调用对象映射**：执行时的函数查找表\n\n资料来源：[README.md:1-40]()\n\n### 参数模型定义\n\n使用 Pydantic 定义工具参数，每个参数可带描述供 LLM 理解：\n\n```python\nfrom pydantic import BaseModel, Field\n\nclass GetWeatherParams(BaseModel):\n    city: str = Field(description=\"City name\")\n```\n\nPydantic 自动处理类型验证和类型转换。\n\n资料来源：[README.md:5-15]()\n\n## 前置依赖机制\n\n### 依赖声明语法\n\n工具的前置依赖通过 `prerequisites` 字段声明，支持细粒度控制：\n\n```python\ndef answer(question: str) -> str:\n    \"\"\"Answer a question.\"\"\"\n    return \"The answer is 42.\"\n\nworkflow_tools = {\n    \"search\": ToolDef(\n        spec=ToolSpec(name=\"search\", description=\"Search the web\", parameters=SearchParams),\n        callable=search,\n    ),\n    \"lookup\": ToolDef(\n        spec=ToolSpec(name=\"lookup\", description=\"Look up details\", parameters=LookupParams),\n        callable=lookup,\n    ),\n    \"answer\": ToolDef(\n        spec=ToolSpec(name=\"answer\", description=\"Answer the question\", parameters=AnswerParams),\n        callable=answer,\n        prerequisites=[\"search\", \"lookup\"],  # 简单依赖\n    ),\n}\n```\n\n### 依赖执行流程\n\n```mermaid\ngraph TD\n    A[LLM 返回工具调用] --> B[StepEnforcer 检查依赖]\n    B --> C{依赖满足?}\n    C -->|是| D[执行工具]\n    C -->|否| E[返回 Nudge 提示]\n    D --> F[记录执行结果]\n    F --> G{所有必需步骤完成?}\n    G -->|是| H[允许终止工具]\n    G -->|否| A\n```\n\n`StepEnforcer` 组件负责验证依赖是否满足：\n\n```python\nenforcer = StepEnforcer(\n    required_steps=[\"search\", \"lookup\"],\n    terminal_tool=\"answer\",\n)\n```\n\n资料来源：[examples/foreign_loop.py:1-80]()\n\n## 工具验证流程\n\n### ResponseValidator\n\n`ResponseValidator` 负责验证 LLM 返回的工具调用：\n\n1. **工具名称验证**：检查是否在允许列表中\n2. **参数验证**：通过 Pydantic 模型验证参数合法性\n3. **未知工具检测**：识别并提示未知工具名\n\n```python\nvalidator = ResponseValidator(\n    tool_names=[\"search\", \"lookup\", \"answer\"],\n    rescue_enabled=True,\n)\n```\n\n资料来源：[src/forge/guardrails/response_validator.py:1-60]()\n\n### 验证结果处理\n\n验证结果通过 `ValidationResult` 返回：\n\n| 结果 | 说明 |\n|------|------|\n| `tool_calls` | 验证通过的工具调用列表 |\n| `nudge` | 需要重试时的提示信息 |\n| `needs_retry` | 是否需要 LLM 重试 |\n\n```python\nif result.action == \"fatal\":\n    return f\"FATAL: {result.reason}\"\n\nif result.action in (\"retry\", \"step_blocked\"):\n    # 注入 nudge 到对话历史\n    return f\"{result.action}: {result.nudge.content[:80]}...\"\n\n# result.action == \"execute\"\n# 执行工具并通知 forge\ntool_calls = result.tool_calls\nexecuted = [tc.tool for tc in tool_calls]\ndone = guardrails.record(executed)\n```\n\n资料来源：[examples/foreign_loop.py:30-50]()\n\n## GuardRails 集成\n\n`GuardRails` 是工具系统的顶层编排组件，整合验证、依赖检查和错误追踪：\n\n```python\n@dataclass\nclass GuardRails:\n    \"\"\"Check LLM responses against tool-calling guardrails.\"\"\"\n\n    def __init__(\n        self,\n        tool_names: list[str],\n        terminal_tool: str | frozenset[str],\n        required_steps: list[str] | None = None,\n        max_retries: int = 3,\n        max_tool_errors: int = 2,\n        rescue_enabled: bool = True,\n        max_premature_attempts: int = 3,\n        retry_nudge: Callable[[str], str] | None = None,\n    ) -> None:\n```\n\n| 参数 | 默认值 | 说明 |\n|------|--------|------|\n| `tool_names` | - | 允许的工具名称列表 |\n| `terminal_tool` | - | 终止工具名称 |\n| `required_steps` | `None` | 必需的执行步骤 |\n| `max_retries` | `3` | 最大重试次数 |\n| `max_tool_errors` | `2` | 最大工具错误数 |\n| `rescue_enabled` | `True` | 是否启用救援解析 |\n| `max_premature_attempts` | `3` | 提前终止最大次数 |\n| `retry_nudge` | `None` | 自定义重试提示函数 |\n\n资料来源：[src/forge/guardrails/guardrails.py:1-50]()\n\n## 架构图\n\n### 工具系统完整架构\n\n```mermaid\ngraph TB\n    subgraph \"工具定义层\"\n        PS[ToolSpec<br/>工具规格]\n        PD[ToolDef<br/>工具定义]\n        PC[Callable<br/>可调用对象]\n        PR[Prerequisites<br/>前置依赖]\n    end\n    \n    subgraph \"工作流层\"\n        WF[Workflow<br/>工作流]\n        SE[StepEnforcer<br/>步骤执行器]\n        CM[ContextManager<br/>上下文管理]\n    end\n    \n    subgraph \"验证层\"\n        RV[ResponseValidator<br/>响应验证器]\n        ET[ErrorTracker<br/>错误追踪]\n        GR[GuardRails<br/>防护栏]\n    end\n    \n    subgraph \"LLM 层\"\n        LL[LLM Client<br/>LLM 客户端]\n        TC[ToolCall<br/>工具调用]\n    end\n    \n    PS --> PD\n    PC --> PD\n    PR --> PD\n    PD --> WF\n    WF --> SE\n    WF --> CM\n    LL --> TC\n    TC --> RV\n    RV --> GR\n    SE --> GR\n    ET --> GR\n```\n\n## 最佳实践\n\n### 工具命名规范\n\n- 使用小写字母和下划线：`get_weather`, `read_file`\n- 动词开头描述操作：`search`, `lookup`, `answer`\n- 名词描述数据：`user_info`, `document_content`\n\n### 参数设计\n\n```python\nclass SearchParams(BaseModel):\n    query: str = Field(description=\"Search query string\")\n    limit: int = Field(default=10, description=\"Maximum results to return\")\n```\n\n### 依赖声明\n\n对于多步骤工作流，清晰声明依赖关系：\n\n```python\nToolDef(\n    spec=...,\n    callable=answer,\n    prerequisites=[\n        \"search\",                           # 必须执行过 search\n        {\"tool\": \"lookup\", \"match_arg\": \"id\"}  # 必须用相同 id 执行过 lookup\n    ],\n)\n```\n\n## 总结\n\nForge 的内置工具系统通过类型安全的 Pydantic 模型、清晰的前置依赖声明和完善的验证机制，为构建可靠的 LLM 工具调用工作流提供了坚实基础。系统设计强调：\n\n1. **类型安全**：参数schema与执行逻辑强绑定\n2. **可验证性**：每个工具调用都经过多层验证\n3. **可追踪性**：完整记录工具执行历史和依赖满足状态\n4. **可扩展性**：通过 `ToolDef` 轻松注册新工具\n\n---\n\n---\n\n## Doramagic 踩坑日志\n\n项目：antoinezambelli/forge\n\n摘要：发现 15 个潜在踩坑项，其中 0 个为 high/blocking；最高优先级：安装坑 - 来源证据：Client sampling params: thread top_p/top_k/min_p/repeat_penalty through request body。\n\n## 1. 安装坑 · 来源证据：Client sampling params: thread top_p/top_k/min_p/repeat_penalty through request body\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：GitHub 社区证据显示该项目存在一个安装相关的待验证问题：Client sampling params: thread top_p/top_k/min_p/repeat_penalty through request body\n- 对用户的影响：可能增加新用户试用和生产接入成本。\n- 建议检查：来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- 防护动作：不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- 证据：community_evidence:github | cevd_148dff87195e42549d0ffb88b99e9cbf | https://github.com/antoinezambelli/forge/issues/58 | 来源类型 github_issue 暴露的待验证使用条件。\n\n## 2. 安装坑 · 来源证据：Investigate: integration paths with Hermes Agent\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：GitHub 社区证据显示该项目存在一个安装相关的待验证问题：Investigate: integration paths with Hermes Agent\n- 对用户的影响：可能增加新用户试用和生产接入成本。\n- 建议检查：来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- 防护动作：不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- 证据：community_evidence:github | cevd_e3cbd2d1c9a84a1887887bf24b036865 | https://github.com/antoinezambelli/forge/issues/51 | 来源类型 github_issue 暴露的待验证使用条件。\n\n## 3. 安装坑 · 来源证据：Per-model recommended sampling defaults (map keyed by HF model cards)\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：GitHub 社区证据显示该项目存在一个安装相关的待验证问题：Per-model recommended sampling defaults (map keyed by HF model cards)\n- 对用户的影响：可能阻塞安装或首次运行。\n- 建议检查：来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- 防护动作：不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- 证据：community_evidence:github | cevd_057ca2af912e4a608259ffb2a3654d4f | https://github.com/antoinezambelli/forge/issues/59 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 4. 安装坑 · 来源证据：Rescue-parse ChatGPT-style XML tool calls\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：GitHub 社区证据显示该项目存在一个安装相关的待验证问题：Rescue-parse ChatGPT-style XML tool calls\n- 对用户的影响：可能增加新用户试用和生产接入成本。\n- 建议检查：来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- 防护动作：不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- 证据：community_evidence:github | cevd_471c674c8d73451da75d6b8c9349aabf | https://github.com/antoinezambelli/forge/issues/55 | 来源类型 github_issue 暴露的待验证使用条件。\n\n## 5. 配置坑 · 来源证据：Proxy external mode hardcodes native FC — no prompt-injection fallback\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：GitHub 社区证据显示该项目存在一个配置相关的待验证问题：Proxy external mode hardcodes native FC — no prompt-injection fallback\n- 对用户的影响：可能增加新用户试用和生产接入成本。\n- 建议检查：来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- 防护动作：不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- 证据：community_evidence:github | cevd_f3a85ec8447a4838b3bc4c846cd9e7a0 | https://github.com/antoinezambelli/forge/issues/53 | 来源类型 github_issue 暴露的待验证使用条件。\n\n## 6. 能力坑 · 能力判断依赖假设\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：README/documentation is current enough for a first validation pass.\n- 对用户的影响：假设不成立时，用户拿不到承诺的能力。\n- 建议检查：将假设转成下游验证清单。\n- 防护动作：假设必须转成验证项；没有验证结果前不能写成事实。\n- 证据：capability.assumptions | hn_item:48192383 | https://news.ycombinator.com/item?id=48192383 | README/documentation is current enough for a first validation pass.\n\n## 7. 维护坑 · 维护活跃度未知\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：未记录 last_activity_observed。\n- 对用户的影响：新项目、停更项目和活跃项目会被混在一起，推荐信任度下降。\n- 建议检查：补 GitHub 最近 commit、release、issue/PR 响应信号。\n- 防护动作：维护活跃度未知时，推荐强度不能标为高信任。\n- 证据：evidence.maintainer_signals | hn_item:48192383 | https://news.ycombinator.com/item?id=48192383 | last_activity_observed missing\n\n## 8. 安全/权限坑 · 下游验证发现风险项\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：no_demo\n- 对用户的影响：下游已经要求复核，不能在页面中弱化。\n- 建议检查：进入安全/权限治理复核队列。\n- 防护动作：下游风险存在时必须保持 review/recommendation 降级。\n- 证据：downstream_validation.risk_items | hn_item:48192383 | https://news.ycombinator.com/item?id=48192383 | no_demo; severity=medium\n\n## 9. 安全/权限坑 · 存在评分风险\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：no_demo\n- 对用户的影响：风险会影响是否适合普通用户安装。\n- 建议检查：把风险写入边界卡，并确认是否需要人工复核。\n- 防护动作：评分风险必须进入边界卡，不能只作为内部分数。\n- 证据：risks.scoring_risks | hn_item:48192383 | https://news.ycombinator.com/item?id=48192383 | no_demo; severity=medium\n\n## 10. 安全/权限坑 · 来源证据：Hardware detection: AMD unified-memory rigs fall through to 4K Ollama budget\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：Hardware detection: AMD unified-memory rigs fall through to 4K Ollama budget\n- 对用户的影响：可能影响授权、密钥配置或安全边界。\n- 建议检查：来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- 防护动作：不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- 证据：community_evidence:github | cevd_4ad226a6d1fa4a5f89fa7702bec11188 | https://github.com/antoinezambelli/forge/issues/61 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 11. 安全/权限坑 · 来源证据：Sub-agent support: dynamic slot splitting\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：Sub-agent support: dynamic slot splitting\n- 对用户的影响：可能影响授权、密钥配置或安全边界。\n- 建议检查：来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- 防护动作：不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- 证据：community_evidence:github | cevd_5b35873cf63c4647bca8a0611d441189 | https://github.com/antoinezambelli/forge/issues/28 | 来源类型 github_issue 暴露的待验证使用条件。\n\n## 12. 安全/权限坑 · 来源证据：Sub-agent support: slot pool\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：Sub-agent support: slot pool\n- 对用户的影响：可能影响授权、密钥配置或安全边界。\n- 建议检查：来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- 防护动作：不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- 证据：community_evidence:github | cevd_070d9a3d20d24123b62d7d76ee16078a | https://github.com/antoinezambelli/forge/issues/29 | 来源类型 github_issue 暴露的待验证使用条件。\n\n## 13. 安全/权限坑 · 来源证据：llama.cpp reasoning budget sampler causes silent hangs after April 10 builds\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：llama.cpp reasoning budget sampler causes silent hangs after April 10 builds\n- 对用户的影响：可能阻塞安装或首次运行。\n- 建议检查：来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- 防护动作：不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- 证据：community_evidence:github | cevd_673be4a583984219bab90cbadff631fe | https://github.com/antoinezambelli/forge/issues/54 | 来源类型 github_issue 暴露的待验证使用条件。\n\n## 14. 维护坑 · issue/PR 响应质量未知\n\n- 严重度：low\n- 证据强度：source_linked\n- 发现：issue_or_pr_quality=unknown。\n- 对用户的影响：用户无法判断遇到问题后是否有人维护。\n- 建议检查：抽样最近 issue/PR，判断是否长期无人处理。\n- 防护动作：issue/PR 响应未知时，必须提示维护风险。\n- 证据：evidence.maintainer_signals | hn_item:48192383 | https://news.ycombinator.com/item?id=48192383 | issue_or_pr_quality=unknown\n\n## 15. 维护坑 · 发布节奏不明确\n\n- 严重度：low\n- 证据强度：source_linked\n- 发现：release_recency=unknown。\n- 对用户的影响：安装命令和文档可能落后于代码，用户踩坑概率升高。\n- 建议检查：确认最近 release/tag 和 README 安装命令是否一致。\n- 防护动作：发布节奏未知或过期时，安装说明必须标注可能漂移。\n- 证据：evidence.maintainer_signals | hn_item:48192383 | https://news.ycombinator.com/item?id=48192383 | release_recency=unknown\n\n<!-- canonical_name: antoinezambelli/forge; human_manual_source: deepwiki_human_wiki -->\n",
      "markdown_key": "forge",
      "pages": "draft",
      "source_refs": [
        {
          "evidence_id": "hn_item:48192383",
          "kind": "hn",
          "supports_claim_ids": [
            "claim_identity",
            "claim_distribution",
            "claim_capability"
          ],
          "url": "https://news.ycombinator.com/item?id=48192383"
        },
        {
          "evidence_id": "art_9416e509908347e09477e1870e1e192a",
          "kind": "docs",
          "supports_claim_ids": [
            "claim_identity",
            "claim_distribution",
            "claim_capability"
          ],
          "url": "https://github.com/antoinezambelli/forge#readme"
        }
      ],
      "summary": "DeepWiki/Human Wiki 完整输出，末尾追加 Discovery Agent 踩坑日志。",
      "title": "forge 说明书",
      "toc": [
        "https://github.com/antoinezambelli/forge 项目说明书",
        "目录",
        "Forge 简介",
        "概述",
        "核心概念",
        "架构设计",
        "采样参数系统",
        "上下文管理",
        "Doramagic 踩坑日志"
      ]
    }
  },
  "quality_gate": {
    "blocking_gaps": [],
    "category_confidence": "medium",
    "compile_status": "ready_for_review",
    "five_assets_present": true,
    "install_sandbox_verified": true,
    "missing_evidence": [],
    "next_action": "publish to Doramagic.ai project surfaces",
    "prompt_preview_boundary_ok": true,
    "publish_status": "publishable",
    "quick_start_verified": true,
    "repo_clone_verified": true,
    "repo_commit": "f1b87b05b863c7d12927f3dbdbd716af2dc3ace1",
    "repo_inspection_error": null,
    "repo_inspection_files": [
      "pyproject.toml",
      "README.md",
      "docs/USER_GUIDE.md",
      "docs/ARCHITECTURE.md",
      "docs/EVAL_GUIDE.md",
      "docs/MODEL_GUIDE.md",
      "docs/BACKEND_SETUP.md",
      "docs/WORKFLOW.md",
      "docs/results/index.md",
      "docs/decisions/006-tool-prerequisites.md",
      "docs/decisions/002-anthropic-baseline.md",
      "docs/decisions/MULTI_MODEL_ROUTING.md",
      "docs/decisions/009-bfcl-integration.md",
      "docs/decisions/010-tool-resolution-error.md",
      "docs/decisions/013-text-response-intent.md",
      "docs/decisions/003-thinking-label-ux.md",
      "docs/decisions/004-async-on-chunk.md",
      "docs/decisions/012-openai-proxy.md",
      "docs/decisions/001-ablation-framework.md",
      "docs/decisions/005-parallel-tool-calls.md",
      "docs/decisions/014-recommended-sampling-opt-in.md",
      "docs/decisions/007-report-views.md",
      "docs/decisions/011-guardrail-middleware.md",
      "docs/decisions/008-stateful-eval-scenarios.md",
      "docs/results/raw/reforged-vs-bare.md",
      "docs/results/raw/native-vs-prompt.md",
      "docs/results/raw/reforged/by-backend.md",
      "docs/results/raw/reforged/all.md",
      "docs/results/raw/reforged/by-family.md",
      "examples/foreign_loop.py",
      "src/forge/server.py",
      "src/forge/errors.py",
      "src/forge/__init__.py",
      "src/forge/context/hardware.py",
      "src/forge/context/manager.py",
      "src/forge/context/strategies.py",
      "src/forge/context/__init__.py",
      "src/forge/guardrails/guardrails.py",
      "src/forge/guardrails/step_enforcer.py",
      "src/forge/guardrails/nudge.py"
    ],
    "repo_inspection_verified": true,
    "review_reasons": [],
    "tag_count_ok": true,
    "unsupported_claims": []
  },
  "schema_version": "0.1",
  "user_assets": {
    "ai_context_pack": {
      "asset_id": "ai_context_pack",
      "filename": "AI_CONTEXT_PACK.md",
      "markdown": "# dashboard - Doramagic AI Context Pack\n\n> 定位：安装前体验与判断资产。它帮助宿主 AI 有一个好的开始，但不代表已经安装、执行或验证目标项目。\n\n## 充分原则\n\n- **充分原则，不是压缩原则**：AI Context Pack 应该充分到让宿主 AI 在开工前理解项目价值、能力边界、使用入口、风险和证据来源；它可以分层组织，但不以最短摘要为目标。\n- **压缩策略**：只压缩噪声和重复内容，不压缩会影响判断和开工质量的上下文。\n\n## 给宿主 AI 的使用方式\n\n你正在读取 Doramagic 为 dashboard 编译的 AI Context Pack。请把它当作开工前上下文：帮助用户理解适合谁、能做什么、如何开始、哪些必须安装后验证、风险在哪里。不要声称你已经安装、运行或执行了目标项目。\n\n## Claim 消费规则\n\n- **事实来源**：Repo Evidence + Claim/Evidence Graph；Human Wiki 只提供显著性、术语和叙事结构。\n- **事实最低状态**：`supported`\n- `supported`：可以作为项目事实使用，但回答中必须引用 claim_id 和证据路径。\n- `weak`：只能作为低置信度线索，必须要求用户继续核实。\n- `inferred`：只能用于风险提示或待确认问题，不能包装成项目事实。\n- `unverified`：不得作为事实使用，应明确说证据不足。\n- `contradicted`：必须展示冲突来源，不得替用户强行选择一个版本。\n\n## 它最适合谁\n\n- **AI 研究者或研究型 Agent 构建者**：README 明确围绕研究、实验或论文工作流展开。 证据：`README.md` Claim：`clm_0002` supported 0.86\n- **正在使用 Claude/Codex/Cursor/Gemini 等宿主 AI 的开发者**：README 或插件配置提到多个宿主 AI。 证据：`README.md` Claim：`clm_0003` supported 0.86\n\n## 它能做什么\n\n- **命令行启动或安装流程**（需要安装后验证）：项目文档中存在可执行命令，真实使用需要在本地或宿主环境中运行这些命令。 证据：`README.md` Claim：`clm_0001` supported 0.86\n\n## 怎么开始\n\n- `pip install forge-guardrails                # core only` 证据：`README.md` Claim：`clm_0004` supported 0.86\n- `pip install \"forge-guardrails[anthropic]\"   # + Anthropic client` 证据：`README.md` Claim：`clm_0005` supported 0.86\n- `git clone https://github.com/antoinezambelli/forge.git` 证据：`README.md` Claim：`clm_0006` supported 0.86\n- `pip install -e \".[dev]\"` 证据：`README.md` Claim：`clm_0007` supported 0.86\n- `pip install -e \".[anthropic]\"` 证据：`README.md` Claim：`clm_0008` supported 0.86\n\n## 继续前判断卡\n\n- **当前建议**：需要管理员/安全审批\n- **为什么**：继续前可能涉及密钥、账号、外部服务或敏感上下文，建议先经过管理员或安全审批。\n\n### 30 秒判断\n\n- **现在怎么做**：需要管理员/安全审批\n- **最小安全下一步**：先跑 Prompt Preview；若涉及凭证或企业环境，先审批再试装\n- **先别相信**：角色质量和任务匹配不能直接相信。\n- **继续会触碰**：角色选择偏差、命令执行、本地环境或项目文件\n\n### 现在可以相信\n\n- **适合人群线索：AI 研究者或研究型 Agent 构建者**（supported）：有 supported claim 或项目证据支撑，但仍不等于真实安装效果。 证据：`README.md` Claim：`clm_0002` supported 0.86\n- **适合人群线索：正在使用 Claude/Codex/Cursor/Gemini 等宿主 AI 的开发者**（supported）：有 supported claim 或项目证据支撑，但仍不等于真实安装效果。 证据：`README.md` Claim：`clm_0003` supported 0.86\n- **能力存在：命令行启动或安装流程**（supported）：可以相信项目包含这类能力线索；是否适合你的具体任务仍要试用或安装后验证。 证据：`README.md` Claim：`clm_0001` supported 0.86\n- **存在 Quick Start / 安装命令线索**（supported）：可以相信项目文档出现过启动或安装入口；不要因此直接在主力环境运行。 证据：`README.md` Claim：`clm_0004` supported 0.86\n\n### 现在还不能相信\n\n- **角色质量和任务匹配不能直接相信。**（unverified）：角色库证明有很多角色，不证明每个角色都适合你的具体任务，也不证明角色能产生高质量结果。\n- **不能把角色文案当成真实执行能力。**（unverified）：安装前只能判断角色描述和任务画像是否匹配，不能证明它能在宿主 AI 里完成任务。\n- **真实输出质量不能在安装前相信。**（unverified）：Prompt Preview 只能展示引导方式，不能证明真实项目中的结果质量。\n- **宿主 AI 版本兼容性不能在安装前相信。**（unverified）：Claude、Cursor、Codex、Gemini 等宿主加载规则和版本差异必须在真实环境验证。\n- **不会污染现有宿主 AI 行为，不能直接相信。**（inferred）：Skill、plugin、AGENTS/CLAUDE/GEMINI 指令可能改变宿主 AI 的默认行为。\n- **可安全回滚不能默认相信。**（unverified）：除非项目明确提供卸载和恢复说明，否则必须先在隔离环境验证。\n- **真实安装后是否与用户当前宿主 AI 版本兼容？**（unverified）：兼容性只能通过实际宿主环境验证。\n- **项目输出质量是否满足用户具体任务？**（unverified）：安装前预览只能展示流程和边界，不能替代真实评测。\n\n### 继续会触碰什么\n\n- **角色选择偏差**：用户对任务应该由哪个专家角色处理的判断。 原因：选错角色会让 AI 从错误专业视角回答，浪费时间或误导决策。\n- **命令执行**：包管理器、网络下载、本地插件目录、项目配置或用户主目录。 原因：运行第一条命令就可能产生环境改动；必须先判断是否值得跑。 证据：`README.md`\n- **本地环境或项目文件**：安装结果、插件缓存、项目配置或本地依赖目录。 原因：安装前无法证明写入范围和回滚方式，需要隔离验证。 证据：`README.md`\n- **环境变量 / API Key**：项目入口文档明确出现 API key、token、secret 或账号凭证配置。 原因：如果真实安装需要凭证，应先使用测试凭证并经过权限/合规判断。 证据：`README.md`, `docs/EVAL_GUIDE.md`\n- **宿主 AI 上下文**：AI Context Pack、Prompt Preview、Skill 路由、风险规则和项目事实。 原因：导入上下文会影响宿主 AI 后续判断，必须避免把未验证项包装成事实。\n\n### 最小安全下一步\n\n- **先跑 Prompt Preview**：先用交互式试用验证任务画像和角色匹配，不要先导入整套角色库。（适用：任何项目都适用，尤其是输出质量未知时。）\n- **只在隔离目录或测试账号试装**：避免安装命令污染主力宿主 AI、真实项目或用户主目录。（适用：存在命令执行、插件配置或本地写入线索时。）\n- **不要使用真实生产凭证**：环境变量/API key 一旦进入宿主或工具链，可能产生账号和合规风险。（适用：出现 API、TOKEN、KEY、SECRET 等环境线索时。）\n- **安装后只验证一个最小任务**：先验证加载、兼容、输出质量和回滚，再决定是否深用。（适用：准备从试用进入真实工作流时。）\n\n### 退出方式\n\n- **保留安装前状态**：记录原始宿主配置和项目状态，后续才能判断是否可恢复。\n- **保留原始角色选择记录**：如果输出偏题，可以回到任务画像阶段重新选择角色，而不是继续沿着错误角色推进。\n- **记录安装命令和写入路径**：没有明确卸载说明时，至少要知道哪些目录或配置需要手动清理。\n- **准备撤销测试 API key 或 token**：测试凭证泄露或误用时，可以快速止损。\n- **如果没有回滚路径，不进入主力环境**：不可回滚是继续前阻断项，不应靠信任或运气继续。\n\n## 哪些只能预览\n\n- 解释项目适合谁和能做什么\n- 基于项目文档演示典型对话流程\n- 帮助用户判断是否值得安装或继续研究\n\n## 哪些必须安装后验证\n\n- 真实安装 Skill、插件或 CLI\n- 执行脚本、修改本地文件或访问外部服务\n- 验证真实输出质量、性能和兼容性\n\n## 边界与风险判断卡\n\n- **把安装前预览误认为真实运行**：用户可能高估项目已经完成的配置、权限和兼容性验证。 处理方式：明确区分 prompt_preview_can_do 与 runtime_required。 Claim：`clm_0009` inferred 0.45\n- **命令执行会修改本地环境**：安装命令可能写入用户主目录、宿主插件目录或项目配置。 处理方式：先在隔离环境或测试账号中运行。 证据：`README.md` Claim：`clm_0010` supported 0.86\n- **待确认**：真实安装后是否与用户当前宿主 AI 版本兼容？。原因：兼容性只能通过实际宿主环境验证。\n- **待确认**：项目输出质量是否满足用户具体任务？。原因：安装前预览只能展示流程和边界，不能替代真实评测。\n- **待确认**：安装命令是否需要网络、权限或全局写入？。原因：这影响企业环境和个人环境的安装风险。\n\n## 开工前工作上下文\n\n### 加载顺序\n\n- 先读取 how_to_use.host_ai_instruction，建立安装前判断资产的边界。\n- 读取 claim_graph_summary，确认事实来自 Claim/Evidence Graph，而不是 Human Wiki 叙事。\n- 再读取 intended_users、capabilities 和 quick_start_candidates，判断用户是否匹配。\n- 需要执行具体任务时，优先查 role_skill_index，再查 evidence_index。\n- 遇到真实安装、文件修改、网络访问、性能或兼容性问题时，转入 risk_card 和 boundaries.runtime_required。\n\n### 任务路由\n\n- **命令行启动或安装流程**：先说明这是安装后验证能力，再给出安装前检查清单。 边界：必须真实安装或运行后验证。 证据：`README.md` Claim：`clm_0001` supported 0.86\n\n### 上下文规模\n\n- 文件总数：150\n- 重要文件覆盖：40/150\n- 证据索引条目：45\n- 角色 / Skill 条目：30\n\n### 证据不足时的处理\n\n- **missing_evidence**：说明证据不足，要求用户提供目标文件、README 段落或安装后验证记录；不要补全事实。\n- **out_of_scope_request**：说明该任务超出当前 AI Context Pack 证据范围，并建议用户先查看 Human Manual 或真实安装后验证。\n- **runtime_request**：给出安装前检查清单和命令来源，但不要替用户执行命令或声称已执行。\n- **source_conflict**：同时展示冲突来源，标记为待核实，不要强行选择一个版本。\n\n## Prompt Recipes\n\n### 适配判断\n\n- 目标：判断这个项目是否适合用户当前任务。\n- 预期输出：适配结论、关键理由、证据引用、安装前可预览内容、必须安装后验证内容、下一步建议。\n\n```text\n请基于 dashboard 的 AI Context Pack，先问我 3 个必要问题，然后判断它是否适合我的任务。回答必须包含：适合谁、能做什么、不能做什么、是否值得安装、证据来自哪里。所有项目事实必须引用 evidence_refs、source_paths 或 claim_id。\n```\n\n### 安装前体验\n\n- 目标：让用户在安装前感受核心工作流，同时避免把预览包装成真实能力或营销承诺。\n- 预期输出：一段带边界标签的体验剧本、安装后验证清单和谨慎建议；不含真实运行承诺或强营销表述。\n\n```text\n请把 dashboard 当作安装前体验资产，而不是已安装工具或真实运行环境。\n\n请严格输出四段：\n1. 先问我 3 个必要问题。\n2. 给出一段“体验剧本”：用 [安装前可预览]、[必须安装后验证]、[证据不足] 三种标签展示它可能如何引导工作流。\n3. 给出安装后验证清单：列出哪些能力只有真实安装、真实宿主加载、真实项目运行后才能确认。\n4. 给出谨慎建议：只能说“值得继续研究/试装”“先补充信息后再判断”或“不建议继续”，不得替项目背书。\n\n硬性边界：\n- 不要声称已经安装、运行、执行测试、修改文件或产生真实结果。\n- 不要写“自动适配”“确保通过”“完美适配”“强烈建议安装”等承诺性表达。\n- 如果描述安装后的工作方式，必须使用“如果安装成功且宿主正确加载 Skill，它可能会……”这种条件句。\n- 体验剧本只能写成“示例台词/假设流程”：使用“可能会询问/可能会建议/可能会展示”，不要写“已写入、已生成、已通过、正在运行、正在生成”。\n- Prompt Preview 不负责给安装命令；如用户准备试装，只能提示先阅读 Quick Start 和 Risk Card，并在隔离环境验证。\n- 所有项目事实必须来自 supported claim、evidence_refs 或 source_paths；inferred/unverified 只能作风险或待确认项。\n\n```\n\n### 角色 / Skill 选择\n\n- 目标：从项目里的角色或 Skill 中挑选最匹配的资产。\n- 预期输出：候选角色或 Skill 列表，每项包含适用场景、证据路径、风险边界和是否需要安装后验证。\n\n```text\n请读取 role_skill_index，根据我的目标任务推荐 3-5 个最相关的角色或 Skill。每个推荐都要说明适用场景、可能输出、风险边界和 evidence_refs。\n```\n\n### 风险预检\n\n- 目标：安装或引入前识别环境、权限、规则冲突和质量风险。\n- 预期输出：环境、权限、依赖、许可、宿主冲突、质量风险和未知项的检查清单。\n\n```text\n请基于 risk_card、boundaries 和 quick_start_candidates，给我一份安装前风险预检清单。不要替我执行命令，只说明我应该检查什么、为什么检查、失败会有什么影响。\n```\n\n### 宿主 AI 开工指令\n\n- 目标：把项目上下文转成一次对话开始前的宿主 AI 指令。\n- 预期输出：一段边界明确、证据引用明确、适合复制给宿主 AI 的开工前指令。\n\n```text\n请基于 dashboard 的 AI Context Pack，生成一段我可以粘贴给宿主 AI 的开工前指令。这段指令必须遵守 not_runtime=true，不能声称项目已经安装、运行或产生真实结果。\n```\n\n\n## 角色 / Skill 索引\n\n- 共索引 30 个角色 / Skill / 项目文档条目。\n\n- **forge**（project_doc）：! PyPI https://img.shields.io/pypi/v/forge-guardrails.svg https://pypi.org/project/forge-guardrails/ ! Tests https://github.com/antoinezambelli/forge/actions/workflows/tests.yml/badge.svg https://github.com/antoinezambelli/forge/actions/workflows/tests.yml ! codecov https://codecov.io/gh/antoinezambelli/forge/branch/main/graph/badge.svg https://codecov.io/gh/antoinezambelli/forge ! Python 3.12+ https://img.shields.i… 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`README.md`\n- **Contributing to forge**（project_doc）：Thanks for your interest in contributing. This guide covers how to get set up, run tests, and where to look when adding new functionality. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`CONTRIBUTING.md`\n- **Architecture: Agentic Tool-Calling Library**（project_doc）：Architecture: Agentic Tool-Calling Library 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/ARCHITECTURE.md`\n- **Backend Setup Guide**（project_doc）：How to install and run each LLM backend for forge eval and development. All instructions assume Windows 11 with an NVIDIA GPU 16GB VRAM . 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/BACKEND_SETUP.md`\n- **Eval Guide**（project_doc）：Internal tooling for measuring how reliably a model + backend combo navigates multi-step tool-calling workflows. Not a test suite — run manually against a live backend. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/EVAL_GUIDE.md`\n- **Model Guide**（project_doc）：Which model and backend to use with forge, based on your hardware and goals. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/MODEL_GUIDE.md`\n- **User Guide**（project_doc）：Practical usage patterns for forge — from single-turn tool calling to multi-turn conversations. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/USER_GUIDE.md`\n- **Workflow**（project_doc）：Visual guide to the forge agentic tool-calling loop. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/WORKFLOW.md`\n- **ADR-001: Ablation Framework**（project_doc）：Status: Implemented az/ablation branch, Feb 2026 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/decisions/001-ablation-framework.md`\n- **ADR-002: Anthropic Baseline Client**（project_doc）：Status: Implemented az/ablation branch, Feb 2026 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/decisions/002-anthropic-baseline.md`\n- **ADR-003: thinking Label UX — Reasoning Capture Gating**（project_doc）：ADR-003: thinking Label UX — Reasoning Capture Gating 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/decisions/003-thinking-label-ux.md`\n- **ADR-004: Async on chunk Callback**（project_doc）：Status: Done implemented on az/async think branch 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/decisions/004-async-on-chunk.md`\n- **ADR-005: Parallel Tool Calling**（project_doc）：Status: Done branch az/parallel tools , commit cd2bd69 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/decisions/005-parallel-tool-calls.md`\n- **ADR-006: Tool Prerequisites**（project_doc）：Status: Accepted and implemented March 2026 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/decisions/006-tool-prerequisites.md`\n- **ADR-007: Report Views**（project_doc）：Status: Planned README roadmap item 5 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/decisions/007-report-views.md`\n- **ADR-008: Stateful Eval Scenarios**（project_doc）：All 11 eval scenarios use argument-blind tool callables. get info query=\"rome\" returns the Paris canned string. check supplier supplier=\"anything\" routes through a fuzzy match but fundamentally returns a static blob. The only exception is error recovery , which validates a 4-digit format — a type check, not stateful behavior. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/decisions/008-stateful-eval-scenarios.md`\n- **ADR-009: BFCL Integration**（project_doc）：Status: Implemented az/bfcl eval branch, Feb 2026 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/decisions/009-bfcl-integration.md`\n- **ADR-010: ToolResolutionError**（project_doc）：Status: Implemented az/tre branch, Mar 2026 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/decisions/010-tool-resolution-error.md`\n- **ADR-011: Guardrail Middleware — Composable Reliability Without Loop Ownership**（project_doc）：ADR-011: Guardrail Middleware — Composable Reliability Without Loop Ownership 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/decisions/011-guardrail-middleware.md`\n- **ADR-012: OpenAI-Compatible Proxy Server**（project_doc）：ADR-012: OpenAI-Compatible Proxy Server 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/decisions/012-openai-proxy.md`\n- **ADR-013: Text Response Intent -- When the Model Chooses Not to Call Tools**（project_doc）：ADR-013: Text Response Intent -- When the Model Chooses Not to Call Tools 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/decisions/013-text-response-intent.md`\n- **ADR-014: Recommended sampling — opt-in flag and proxy pass-through**（project_doc）：ADR-014: Recommended sampling — opt-in flag and proxy pass-through 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/decisions/014-recommended-sampling-opt-in.md`\n- **Multi-Model Routing — Concept Doc**（project_doc）：Allow forge to manage multiple model backends simultaneously and expose them as named clients to the consumer. Forge handles the pool lifecycle, health, budgets . The consumer handles orchestration which workflow uses which model, when to swap, event dispatch . 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/decisions/MULTI_MODEL_ROUTING.md`\n- **Forge Eval Reports**（project_doc）：For model and backend recommendations, see Model Guide ../MODEL GUIDE.md . 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/results/index.md`\n- **Forge Eval — Native vs Prompt llama-server**（project_doc）：Forge Eval — Native vs Prompt llama-server 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/results/raw/native-vs-prompt.md`\n- **Forge Eval — Reforged vs Bare**（project_doc）：claude-haiku-4-5-20251001 anthropic/native 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/results/raw/reforged-vs-bare.md`\n- **Forge Eval — Reforged Leaderboard**（project_doc）：Scr=score correct/total , Acc=accuracy correct/total, excl validate errors , Cmp=completeness completed/total , Eff=efficiency ideal/actual calls , Wst=avg wasted calls, Spd=avg time excl compaction rel=relevance detection, arg=argument fidelity, tsl=tool selection, b2s=basic 2step, s3s=sequential 3step, crt=conditional routing, srn=sequential reasoning, err=error recovery, dgr=data gap recovery, dge=data gap recove… 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/results/raw/reforged/all.md`\n- **Forge Eval — Reforged by Backend**（project_doc）：Scr=score correct/total , Acc=accuracy correct/total, excl validate errors , Cmp=completeness completed/total , Eff=efficiency ideal/actual calls , Wst=avg wasted calls, Spd=avg time excl compaction rel=relevance detection, arg=argument fidelity, tsl=tool selection, b2s=basic 2step, s3s=sequential 3step, crt=conditional routing, srn=sequential reasoning, err=error recovery, dgr=data gap recovery, dge=data gap recove… 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/results/raw/reforged/by-backend.md`\n- **Forge Eval — Reforged by Model Family**（project_doc）：Forge Eval — Reforged by Model Family 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/results/raw/reforged/by-family.md`\n- **Changelog**（project_doc）：All notable changes to forge are documented here. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`CHANGELOG.md`\n\n## 证据索引\n\n- 共索引 45 条证据。\n\n- **forge**（documentation）：! PyPI https://img.shields.io/pypi/v/forge-guardrails.svg https://pypi.org/project/forge-guardrails/ ! Tests https://github.com/antoinezambelli/forge/actions/workflows/tests.yml/badge.svg https://github.com/antoinezambelli/forge/actions/workflows/tests.yml ! codecov https://codecov.io/gh/antoinezambelli/forge/branch/main/graph/badge.svg https://codecov.io/gh/antoinezambelli/forge ! Python 3.12+ https://img.shields.io/badge/python-3.12%2B-blue.svg https://www.python.org/downloads/ ! License: MIT https://img.shields.io/badge/license-MIT-green.svg LICENSE 证据：`README.md`\n- **Contributing to forge**（documentation）：Thanks for your interest in contributing. This guide covers how to get set up, run tests, and where to look when adding new functionality. 证据：`CONTRIBUTING.md`\n- **Package**（package_manifest）：{ \"name\": \"dashboard\", \"private\": true, \"version\": \"0.0.0\", \"type\": \"module\", \"scripts\": { \"dev\": \"vite\", \"build\": \"tsc -b && vite build\", \"lint\": \"eslint .\", \"preview\": \"vite preview\" }, \"dependencies\": { \"react\": \"^19.2.0\", \"react-dom\": \"^19.2.0\" }, \"devDependencies\": { \"@eslint/js\": \"^9.39.1\", \"@tailwindcss/vite\": \"^4.2.1\", \"@types/node\": \"^24.10.1\", \"@types/react\": \"^19.2.7\", \"@types/react-dom\": \"^19.2.3\", \"@vitejs/plugin-react\": \"^5.1.1\", \"eslint\": \"^9.39.1\", \"eslint-plugin-react-hooks\": \"^7.0.1\", \"eslint-plugin-react-refresh\": \"^0.4.24\", \"globals\": \"^16.5.0\", \"tailwindcss\": \"^4.2.1\", \"typescript\": \"~5.9.3\", \"typescript-eslint\": \"^8.48.0\", \"vite\": \"^7.3.1\", \"vite-plugin-singlefile\": \"^… 证据：`tests/eval/dashboard/package.json`\n- **License**（source_file）：Copyright c 2025-2026 Antoine Zambelli 证据：`LICENSE`\n- **Architecture: Agentic Tool-Calling Library**（documentation）：Architecture: Agentic Tool-Calling Library 证据：`docs/ARCHITECTURE.md`\n- **Backend Setup Guide**（documentation）：How to install and run each LLM backend for forge eval and development. All instructions assume Windows 11 with an NVIDIA GPU 16GB VRAM . 证据：`docs/BACKEND_SETUP.md`\n- **Eval Guide**（documentation）：Internal tooling for measuring how reliably a model + backend combo navigates multi-step tool-calling workflows. Not a test suite — run manually against a live backend. 证据：`docs/EVAL_GUIDE.md`\n- **Model Guide**（documentation）：Which model and backend to use with forge, based on your hardware and goals. 证据：`docs/MODEL_GUIDE.md`\n- **User Guide**（documentation）：Practical usage patterns for forge — from single-turn tool calling to multi-turn conversations. 证据：`docs/USER_GUIDE.md`\n- **Workflow**（documentation）：Visual guide to the forge agentic tool-calling loop. 证据：`docs/WORKFLOW.md`\n- **ADR-001: Ablation Framework**（documentation）：Status: Implemented az/ablation branch, Feb 2026 证据：`docs/decisions/001-ablation-framework.md`\n- **ADR-002: Anthropic Baseline Client**（documentation）：Status: Implemented az/ablation branch, Feb 2026 证据：`docs/decisions/002-anthropic-baseline.md`\n- **ADR-003: thinking Label UX — Reasoning Capture Gating**（documentation）：ADR-003: thinking Label UX — Reasoning Capture Gating 证据：`docs/decisions/003-thinking-label-ux.md`\n- **ADR-004: Async on chunk Callback**（documentation）：Status: Done implemented on az/async think branch 证据：`docs/decisions/004-async-on-chunk.md`\n- **ADR-005: Parallel Tool Calling**（documentation）：Status: Done branch az/parallel tools , commit cd2bd69 证据：`docs/decisions/005-parallel-tool-calls.md`\n- **ADR-006: Tool Prerequisites**（documentation）：Status: Accepted and implemented March 2026 证据：`docs/decisions/006-tool-prerequisites.md`\n- **ADR-007: Report Views**（documentation）：Status: Planned README roadmap item 5 证据：`docs/decisions/007-report-views.md`\n- **ADR-008: Stateful Eval Scenarios**（documentation）：All 11 eval scenarios use argument-blind tool callables. get info query=\"rome\" returns the Paris canned string. check supplier supplier=\"anything\" routes through a fuzzy match but fundamentally returns a static blob. The only exception is error recovery , which validates a 4-digit format — a type check, not stateful behavior. 证据：`docs/decisions/008-stateful-eval-scenarios.md`\n- **ADR-009: BFCL Integration**（documentation）：Status: Implemented az/bfcl eval branch, Feb 2026 证据：`docs/decisions/009-bfcl-integration.md`\n- **ADR-010: ToolResolutionError**（documentation）：Status: Implemented az/tre branch, Mar 2026 证据：`docs/decisions/010-tool-resolution-error.md`\n- **ADR-011: Guardrail Middleware — Composable Reliability Without Loop Ownership**（documentation）：ADR-011: Guardrail Middleware — Composable Reliability Without Loop Ownership 证据：`docs/decisions/011-guardrail-middleware.md`\n- **ADR-012: OpenAI-Compatible Proxy Server**（documentation）：ADR-012: OpenAI-Compatible Proxy Server 证据：`docs/decisions/012-openai-proxy.md`\n- **ADR-013: Text Response Intent -- When the Model Chooses Not to Call Tools**（documentation）：ADR-013: Text Response Intent -- When the Model Chooses Not to Call Tools 证据：`docs/decisions/013-text-response-intent.md`\n- **ADR-014: Recommended sampling — opt-in flag and proxy pass-through**（documentation）：ADR-014: Recommended sampling — opt-in flag and proxy pass-through 证据：`docs/decisions/014-recommended-sampling-opt-in.md`\n- **Multi-Model Routing — Concept Doc**（documentation）：Allow forge to manage multiple model backends simultaneously and expose them as named clients to the consumer. Forge handles the pool lifecycle, health, budgets . The consumer handles orchestration which workflow uses which model, when to swap, event dispatch . 证据：`docs/decisions/MULTI_MODEL_ROUTING.md`\n- **Forge Eval Reports**（documentation）：For model and backend recommendations, see Model Guide ../MODEL GUIDE.md . 证据：`docs/results/index.md`\n- **Forge Eval — Native vs Prompt llama-server**（documentation）：Forge Eval — Native vs Prompt llama-server 证据：`docs/results/raw/native-vs-prompt.md`\n- **Forge Eval — Reforged vs Bare**（documentation）：claude-haiku-4-5-20251001 anthropic/native 证据：`docs/results/raw/reforged-vs-bare.md`\n- **Forge Eval — Reforged Leaderboard**（documentation）：Scr=score correct/total , Acc=accuracy correct/total, excl validate errors , Cmp=completeness completed/total , Eff=efficiency ideal/actual calls , Wst=avg wasted calls, Spd=avg time excl compaction rel=relevance detection, arg=argument fidelity, tsl=tool selection, b2s=basic 2step, s3s=sequential 3step, crt=conditional routing, srn=sequential reasoning, err=error recovery, dgr=data gap recovery, dge=data gap recovery extended, art=argument transformation, grs=grounded synthesis, iar=inconsistent api recovery, rel s=relevance detection stateful, arg s=argument fidelity stateful, tsl s=tool selection stateful, b2s s=basic 2step stateful, s3s s=sequential 3step stateful, crt s=conditional rou… 证据：`docs/results/raw/reforged/all.md`\n- **Forge Eval — Reforged by Backend**（documentation）：Scr=score correct/total , Acc=accuracy correct/total, excl validate errors , Cmp=completeness completed/total , Eff=efficiency ideal/actual calls , Wst=avg wasted calls, Spd=avg time excl compaction rel=relevance detection, arg=argument fidelity, tsl=tool selection, b2s=basic 2step, s3s=sequential 3step, crt=conditional routing, srn=sequential reasoning, err=error recovery, dgr=data gap recovery, dge=data gap recovery extended, art=argument transformation, grs=grounded synthesis, iar=inconsistent api recovery, rel s=relevance detection stateful, arg s=argument fidelity stateful, tsl s=tool selection stateful, b2s s=basic 2step stateful, s3s s=sequential 3step stateful, crt s=conditional rou… 证据：`docs/results/raw/reforged/by-backend.md`\n- **Forge Eval — Reforged by Model Family**（documentation）：Forge Eval — Reforged by Model Family 证据：`docs/results/raw/reforged/by-family.md`\n- **Changelog**（documentation）：All notable changes to forge are documented here. 证据：`CHANGELOG.md`\n- **Eval Rigs**（structured_config）：{ \"rig-00\": {\"gpu\": \"RTX 5070\", \"platform\": \"windows\"}, \"rig-01\": {\"gpu\": \"RTX 5070 Ti\", \"platform\": \"linux/ubuntu24.04\"}, \"rig-02\": {\"gpu\": \"2x RTX 5070 Ti\", \"platform\": \"linux/ubuntu24.04\"}, \"rig-03\": {\"gpu\": \"AMD Strix Halo APU 128GB unified \", \"platform\": \"linux/fedora43\"} } 证据：`eval_rigs.json`\n- **Tsconfig.App**（structured_config）：{ \"compilerOptions\": { \"tsBuildInfoFile\": \"./node modules/.tmp/tsconfig.app.tsbuildinfo\", \"target\": \"ES2022\", \"useDefineForClassFields\": true, \"lib\": \"ES2022\", \"DOM\", \"DOM.Iterable\" , \"module\": \"ESNext\", \"types\": \"vite/client\" , \"skipLibCheck\": true, 证据：`tests/eval/dashboard/tsconfig.app.json`\n- **Tsconfig**（structured_config）：{ \"files\": , \"references\": { \"path\": \"./tsconfig.app.json\" }, { \"path\": \"./tsconfig.node.json\" } } 证据：`tests/eval/dashboard/tsconfig.json`\n- **Tsconfig.Node**（structured_config）：{ \"compilerOptions\": { \"tsBuildInfoFile\": \"./node modules/.tmp/tsconfig.node.tsbuildinfo\", \"target\": \"ES2023\", \"lib\": \"ES2023\" , \"module\": \"ESNext\", \"types\": \"node\" , \"skipLibCheck\": true, 证据：`tests/eval/dashboard/tsconfig.node.json`\n- **Normalize line endings**（source_file）：Normalize line endings text=auto .py text eol=lf .md text eol=lf .toml text eol=lf .yml text eol=lf .yaml text eol=lf eval results.jsonl filter=lfs diff=lfs merge=lfs -text eval results rig .jsonl filter=lfs diff=lfs merge=lfs -text 证据：`.gitattributes`\n- **Python**（source_file）：Python pycache / .py cod .egg-info/ dist/ build/ .egg 证据：`.gitignore`\n- **Codecov**（source_file）：comment: false 证据：`codecov.yml`\n- **Eval Results**（source_file）：version https://git-lfs.github.com/spec/v1 oid sha256:b4393d257ba3e22c5bac7b4cf7ab9431f85bdd2aafe662f1ffb118a73203bc2c size 67078449 证据：`eval_results.jsonl`\n- **=====================================================================**（source_file）：\"\"\"Using forge's guardrail middleware in your own agentic loop. 证据：`examples/foreign_loop.py`\n- **Integration-only: lifecycle orchestration requiring real backends/threads.**（source_file）：build-system requires = \"hatchling\" build-backend = \"hatchling.build\" 证据：`pyproject.toml`\n- **Pinned-in-time translation tables. These are the GGUF MAP and LLAMAFILE MAP**（source_file）：\"\"\"One-shot migration: rewrite llamaserver/llamafile rows to GGUF-stem identity. 证据：`scripts/migrate_eval_jsonl_gguf_identity.py`\n- **Rig-00 plan for the model-params re-run: all Qwen3 variants × all backends,**（source_file）：\"\"\"Unattended ablation study runner. 证据：`scripts/run_ablation.py`\n- **Read request**（source_file）：\"\"\"Smoke test for the proxy — starts proxy in external mode against a mock backend, sends one request, verifies the response. 证据：`scripts/smoke_test_proxy.py`\n\n## 宿主 AI 必须遵守的规则\n\n- **把本资产当作开工前上下文，而不是运行环境。**：AI Context Pack 只包含证据化项目理解，不包含目标项目的可执行状态。 证据：`README.md`, `CONTRIBUTING.md`, `tests/eval/dashboard/package.json`\n- **回答用户时区分可预览内容与必须安装后才能验证的内容。**：安装前体验的消费者价值来自降低误装和误判，而不是伪装成真实运行。 证据：`README.md`, `CONTRIBUTING.md`, `tests/eval/dashboard/package.json`\n\n## 用户开工前应该回答的问题\n\n- 你准备在哪个宿主 AI 或本地环境中使用它？\n- 你只是想先体验工作流，还是准备真实安装？\n- 你最在意的是安装成本、输出质量、还是和现有规则的冲突？\n\n## 验收标准\n\n- 所有能力声明都能回指到 evidence_refs 中的文件路径。\n- AI_CONTEXT_PACK.md 没有把预览包装成真实运行。\n- 用户能在 3 分钟内看懂适合谁、能做什么、如何开始和风险边界。\n\n---\n\n## Doramagic Context Augmentation\n\n下面内容用于强化 Repomix/AI Context Pack 主体。Human Manual 只提供阅读骨架；踩坑日志会被转成宿主 AI 必须遵守的工作约束。\n\n## Human Manual 骨架\n\n使用规则：这里只是项目阅读路线和显著性信号，不是事实权威。具体事实仍必须回到 repo evidence / Claim Graph。\n\n宿主 AI 硬性规则：\n- 不得把页标题、章节顺序、摘要或 importance 当作项目事实证据。\n- 解释 Human Manual 骨架时，必须明确说它只是阅读路线/显著性信号。\n- 能力、安装、兼容性、运行状态和风险判断必须引用 repo evidence、source path 或 Claim Graph。\n\n- **Forge 简介**：importance `high`\n  - source_paths: README.md, src/forge/__init__.py\n- **安装与配置**：importance `high`\n  - source_paths: README.md, pyproject.toml, docs/BACKEND_SETUP.md\n- **快速开始教程**：importance `high`\n  - source_paths: README.md, src/forge/core/runner.py, src/forge/core/workflow.py\n- **架构概述**：importance `high`\n  - source_paths: docs/ARCHITECTURE.md, src/forge/__init__.py, src/forge/core/runner.py, src/forge/context/manager.py\n- **核心组件详解**：importance `high`\n  - source_paths: src/forge/core/runner.py, src/forge/core/slot_worker.py, src/forge/core/steps.py, src/forge/core/messages.py, src/forge/context/manager.py\n- **工作流内部机制**：importance `medium`\n  - source_paths: src/forge/core/workflow.py, src/forge/core/runner.py, src/forge/core/inference.py, docs/WORKFLOW.md\n- **Guardrails 系统**：importance `high`\n  - source_paths: src/forge/guardrails/__init__.py, src/forge/guardrails/response_validator.py, src/forge/guardrails/step_enforcer.py, src/forge/guardrails/error_tracker.py, src/forge/guardrails/nudge.py\n- **上下文管理**：importance `high`\n  - source_paths: src/forge/context/manager.py, src/forge/context/strategies.py, src/forge/context/hardware.py, src/forge/server.py\n\n## Repo Inspection Evidence / 源码检查证据\n\n- repo_clone_verified: true\n- repo_inspection_verified: true\n- repo_commit: `f1b87b05b863c7d12927f3dbdbd716af2dc3ace1`\n- inspected_files: `pyproject.toml`, `README.md`, `docs/USER_GUIDE.md`, `docs/ARCHITECTURE.md`, `docs/EVAL_GUIDE.md`, `docs/MODEL_GUIDE.md`, `docs/BACKEND_SETUP.md`, `docs/WORKFLOW.md`, `docs/results/index.md`, `docs/decisions/006-tool-prerequisites.md`, `docs/decisions/002-anthropic-baseline.md`, `docs/decisions/MULTI_MODEL_ROUTING.md`, `docs/decisions/009-bfcl-integration.md`, `docs/decisions/010-tool-resolution-error.md`, `docs/decisions/013-text-response-intent.md`, `docs/decisions/003-thinking-label-ux.md`, `docs/decisions/004-async-on-chunk.md`, `docs/decisions/012-openai-proxy.md`, `docs/decisions/001-ablation-framework.md`, `docs/decisions/005-parallel-tool-calls.md`\n\n宿主 AI 硬性规则：\n- 没有 repo_clone_verified=true 时，不得声称已经读过源码。\n- 没有 repo_inspection_verified=true 时，不得把 README/docs/package 文件判断写成事实。\n- 没有 quick_start_verified=true 时，不得声称 Quick Start 已跑通。\n\n## Doramagic Pitfall Constraints / 踩坑约束\n\n这些规则来自 Doramagic 发现、验证或编译过程中的项目专属坑点。宿主 AI 必须把它们当作工作约束，而不是普通说明文字。\n\n### Constraint 1: 来源证据：Client sampling params: thread top_p/top_k/min_p/repeat_penalty through request body\n\n- Trigger: GitHub 社区证据显示该项目存在一个安装相关的待验证问题：Client sampling params: thread top_p/top_k/min_p/repeat_penalty through request body\n- Host AI rule: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Why it matters: 可能增加新用户试用和生产接入成本。\n- Evidence: community_evidence:github | cevd_148dff87195e42549d0ffb88b99e9cbf | https://github.com/antoinezambelli/forge/issues/58 | 来源类型 github_issue 暴露的待验证使用条件。\n- Hard boundary: 不要把这个坑点包装成已解决、已验证或可忽略，除非后续验证证据明确证明它已经关闭。\n\n### Constraint 2: 来源证据：Investigate: integration paths with Hermes Agent\n\n- Trigger: GitHub 社区证据显示该项目存在一个安装相关的待验证问题：Investigate: integration paths with Hermes Agent\n- Host AI rule: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Why it matters: 可能增加新用户试用和生产接入成本。\n- Evidence: community_evidence:github | cevd_e3cbd2d1c9a84a1887887bf24b036865 | https://github.com/antoinezambelli/forge/issues/51 | 来源类型 github_issue 暴露的待验证使用条件。\n- Hard boundary: 不要把这个坑点包装成已解决、已验证或可忽略，除非后续验证证据明确证明它已经关闭。\n\n### Constraint 3: 来源证据：Per-model recommended sampling defaults (map keyed by HF model cards)\n\n- Trigger: GitHub 社区证据显示该项目存在一个安装相关的待验证问题：Per-model recommended sampling defaults (map keyed by HF model cards)\n- Host AI rule: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Why it matters: 可能阻塞安装或首次运行。\n- Evidence: community_evidence:github | cevd_057ca2af912e4a608259ffb2a3654d4f | https://github.com/antoinezambelli/forge/issues/59 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n- Hard boundary: 不要把这个坑点包装成已解决、已验证或可忽略，除非后续验证证据明确证明它已经关闭。\n\n### Constraint 4: 来源证据：Rescue-parse ChatGPT-style XML tool calls\n\n- Trigger: GitHub 社区证据显示该项目存在一个安装相关的待验证问题：Rescue-parse ChatGPT-style XML tool calls\n- Host AI rule: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Why it matters: 可能增加新用户试用和生产接入成本。\n- Evidence: community_evidence:github | cevd_471c674c8d73451da75d6b8c9349aabf | https://github.com/antoinezambelli/forge/issues/55 | 来源类型 github_issue 暴露的待验证使用条件。\n- Hard boundary: 不要把这个坑点包装成已解决、已验证或可忽略，除非后续验证证据明确证明它已经关闭。\n\n### Constraint 5: 来源证据：Proxy external mode hardcodes native FC — no prompt-injection fallback\n\n- Trigger: GitHub 社区证据显示该项目存在一个配置相关的待验证问题：Proxy external mode hardcodes native FC — no prompt-injection fallback\n- Host AI rule: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Why it matters: 可能增加新用户试用和生产接入成本。\n- Evidence: community_evidence:github | cevd_f3a85ec8447a4838b3bc4c846cd9e7a0 | https://github.com/antoinezambelli/forge/issues/53 | 来源类型 github_issue 暴露的待验证使用条件。\n- Hard boundary: 不要把这个坑点包装成已解决、已验证或可忽略，除非后续验证证据明确证明它已经关闭。\n\n### Constraint 6: 能力判断依赖假设\n\n- Trigger: README/documentation is current enough for a first validation pass.\n- Host AI rule: 将假设转成下游验证清单。\n- Why it matters: 假设不成立时，用户拿不到承诺的能力。\n- Evidence: capability.assumptions | hn_item:48192383 | https://news.ycombinator.com/item?id=48192383 | README/documentation is current enough for a first validation pass.\n- Hard boundary: 不要把这个坑点包装成已解决、已验证或可忽略，除非后续验证证据明确证明它已经关闭。\n\n### Constraint 7: 维护活跃度未知\n\n- Trigger: 未记录 last_activity_observed。\n- Host AI rule: 补 GitHub 最近 commit、release、issue/PR 响应信号。\n- Why it matters: 新项目、停更项目和活跃项目会被混在一起，推荐信任度下降。\n- Evidence: evidence.maintainer_signals | hn_item:48192383 | https://news.ycombinator.com/item?id=48192383 | last_activity_observed missing\n- Hard boundary: 不要把这个坑点包装成已解决、已验证或可忽略，除非后续验证证据明确证明它已经关闭。\n\n### Constraint 8: 下游验证发现风险项\n\n- Trigger: no_demo\n- Host AI rule: 进入安全/权限治理复核队列。\n- Why it matters: 下游已经要求复核，不能在页面中弱化。\n- Evidence: downstream_validation.risk_items | hn_item:48192383 | https://news.ycombinator.com/item?id=48192383 | no_demo; severity=medium\n- Hard boundary: 不要把这个坑点包装成已解决、已验证或可忽略，除非后续验证证据明确证明它已经关闭。\n\n### Constraint 9: 存在评分风险\n\n- Trigger: no_demo\n- Host AI rule: 把风险写入边界卡，并确认是否需要人工复核。\n- Why it matters: 风险会影响是否适合普通用户安装。\n- Evidence: risks.scoring_risks | hn_item:48192383 | https://news.ycombinator.com/item?id=48192383 | no_demo; severity=medium\n- Hard boundary: 不要把这个坑点包装成已解决、已验证或可忽略，除非后续验证证据明确证明它已经关闭。\n\n### Constraint 10: 来源证据：Hardware detection: AMD unified-memory rigs fall through to 4K Ollama budget\n\n- Trigger: GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：Hardware detection: AMD unified-memory rigs fall through to 4K Ollama budget\n- Host AI rule: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Why it matters: 可能影响授权、密钥配置或安全边界。\n- Evidence: community_evidence:github | cevd_4ad226a6d1fa4a5f89fa7702bec11188 | https://github.com/antoinezambelli/forge/issues/61 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n- Hard boundary: 不要把这个坑点包装成已解决、已验证或可忽略，除非后续验证证据明确证明它已经关闭。\n",
      "summary": "给宿主 AI 的上下文和工作边界。",
      "title": "AI Context Pack / 带给我的 AI"
    },
    "boundary_risk_card": {
      "asset_id": "boundary_risk_card",
      "filename": "BOUNDARY_RISK_CARD.md",
      "markdown": "# Boundary & Risk Card / 安装前决策卡\n\n项目：antoinezambelli/forge\n\n## Doramagic 试用结论\n\n当前结论：可以进入发布前推荐检查；首次使用仍应从最小权限、临时目录和可回滚配置开始。\n\n## 用户现在可以做\n\n- 可以先阅读 Human Manual，理解项目目的和主要工作流。\n- 可以复制 Prompt Preview 做安装前体验；这只验证交互感，不代表真实运行。\n- 可以把官方 Quick Start 命令放到隔离环境中验证，不要直接进主力环境。\n\n## 现在不要做\n\n- 不要把 Prompt Preview 当成项目实际运行结果。\n- 不要把 metadata-only validation 当成沙箱安装验证。\n- 不要把未验证能力写成“已支持、已跑通、可放心安装”。\n- 不要在首次试用时交出生产数据、私人文件、真实密钥或主力配置目录。\n\n## 安装前检查\n\n- 宿主 AI 是否匹配：chatgpt\n- 官方安装入口状态：已发现官方入口\n- 是否在临时目录、临时宿主或容器中验证：必须是\n- 是否能回滚配置改动：必须能\n- 是否需要 API Key、网络访问、读写文件或修改宿主配置：未确认前按高风险处理\n- 是否记录了安装命令、实际输出和失败日志：必须记录\n\n## 当前阻塞项\n\n- 无阻塞项。\n\n## 项目专属踩坑\n\n- 来源证据：Client sampling params: thread top_p/top_k/min_p/repeat_penalty through request body（medium）：可能增加新用户试用和生产接入成本。 建议检查：来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- 来源证据：Investigate: integration paths with Hermes Agent（medium）：可能增加新用户试用和生产接入成本。 建议检查：来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- 来源证据：Per-model recommended sampling defaults (map keyed by HF model cards)（medium）：可能阻塞安装或首次运行。 建议检查：来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- 来源证据：Rescue-parse ChatGPT-style XML tool calls（medium）：可能增加新用户试用和生产接入成本。 建议检查：来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- 来源证据：Proxy external mode hardcodes native FC — no prompt-injection fallback（medium）：可能增加新用户试用和生产接入成本。 建议检查：来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n\n## 风险与权限提示\n\n- no_demo: medium\n\n## 证据缺口\n\n- 暂未发现结构化证据缺口。\n",
      "summary": "安装、权限、验证和推荐前风险。",
      "title": "Boundary & Risk Card / 边界与风险卡"
    },
    "human_manual": {
      "asset_id": "human_manual",
      "filename": "HUMAN_MANUAL.md",
      "markdown": "# https://github.com/antoinezambelli/forge 项目说明书\n\n生成时间：2026-05-19 20:04:23 UTC\n\n## 目录\n\n- [Forge 简介](#page-introduction)\n- [安装与配置](#page-installation)\n- [快速开始教程](#page-quickstart)\n- [架构概述](#page-architecture)\n- [核心组件详解](#page-core-components)\n- [工作流内部机制](#page-workflow-internals)\n- [Guardrails 系统](#page-guardrails)\n- [上下文管理](#page-context-management)\n- [SlotWorker 槽位调度](#page-slot-worker)\n- [内置工具系统](#page-tools)\n\n<a id='page-introduction'></a>\n\n## Forge 简介\n\n### 相关页面\n\n相关主题：[安装与配置](#page-installation), [架构概述](#page-architecture)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [README.md](https://github.com/antoinezambelli/forge/blob/main/README.md)\n- [CONTRIBUTING.md](https://github.com/antoinezambelli/forge/blob/main/CONTRIBUTING.md)\n- [src/forge/clients/sampling_defaults.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/clients/sampling_defaults.py)\n- [src/forge/server.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/server.py)\n- [src/forge/guardrails/guardrails.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/guardrails/guardrails.py)\n- [src/forge/core/workflow.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/core/workflow.py)\n- [src/forge/proxy/__main__.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/proxy/__main__.py)\n- [examples/foreign_loop.py](https://github.com/antoinezambelli/forge/blob/main/examples/foreign_loop.py)\n- [CHANGELOG.md](https://github.com/antoinezambelli/forge/blob/main/CHANGELOG.md)\n</details>\n\n# Forge 简介\n\n## 概述\n\nForge 是一个基于 LLM（大型语言模型）的工具调用框架，专注于构建可靠、可预测的多步骤 AI 工作流程。该项目由 Antoine Zambelli 开发，旨在解决 LLM 在工具调用场景中的常见问题，如响应格式不稳定、上下文长度失控、错误恢复困难等。\n\nForge 的核心设计理念是将 LLM 响应校验、上下文压缩、错误重试和步骤强制等机制封装为可配置的保护栏（Guardrails），使开发者能够专注于业务逻辑而非基础设施细节。资料来源：[README.md]()\n\n## 核心概念\n\n### Workflow（工作流）\n\nWorkflow 是 Forge 中的核心抽象，定义了一个完整的多步骤任务。它包含以下关键组件：\n\n| 属性 | 类型 | 说明 |\n|------|------|------|\n| `name` | str | 工作流名称 |\n| `description` | str | 任务描述 |\n| `tools` | dict[str, ToolDef] | 工具定义字典 |\n| `required_steps` | list[str] | 必须执行的关键步骤 |\n| `terminal_tool` | str | 终止工具（执行后工作流结束） |\n| `system_prompt_template` | str | 系统提示词模板 |\n\n资料来源：[src/forge/core/workflow.py:1-50]()\n\n### ToolDef 和 ToolSpec\n\nToolDef 将工具的模式定义与实际实现绑定在一起：\n\n```python\n@dataclass\nclass ToolDef:\n    spec: ToolSpec\n    callable: Callable[..., Any]\n    prerequisites: list[str | dict[str, str]] = field(default_factory=list)\n```\n\n前置条件（prerequisites）表达条件依赖关系：\n- 字符串形式：`\"read_file\"` — 任何对 read_file 的先前调用都满足要求\n- 字典形式：`{\"tool\": \"read_file\", \"match_arg\": \"path\"}` — 需要使用相同 path 参数的先前调用\n\n资料来源：[src/forge/core/workflow.py:90-105]()\n\n### LLM 客户端\n\nForge 通过统一的客户端接口支持多个后端：\n\n| 后端 | 客户端类 | 说明 |\n|------|----------|------|\n| Ollama | OllamaClient | 本地模型支持，推荐采样参数 |\n| Llamafile | LlamafileClient | 单文件可执行模型 |\n| Anthropic | AnthropicClient | Claude 系列模型 |\n| Proxy | ProxyServer | OpenAI 兼容代理 |\n\n资料来源：[README.md]()\n资料来源：[CONTRIBUTING.md]()\n资料来源：[src/forge/proxy/__main__.py:1-50]()\n\n## 架构设计\n\n### 系统组件图\n\n```mermaid\ngraph TD\n    subgraph 客户端层\n        User[用户代码]\n        Workflow[Workflow 定义]\n    end\n    \n    subgraph 核心层\n        Runner[WorkflowRunner]\n        Guardrails[Guardrails 检查]\n        Context[ContextManager]\n    end\n    \n    subgraph 后端层\n        Client[LLMClient]\n        Proxy[ProxyServer]\n        Server[ServerManager]\n    end\n    \n    subgraph 支持服务\n        Sampling[采样默认参数]\n        Errors[错误追踪]\n    end\n    \n    User --> Workflow\n    Workflow --> Runner\n    Runner --> Guardrails\n    Runner --> Context\n    Guardrails --> Client\n    Context --> Client\n    Client --> Proxy\n    Proxy --> Server\n    Server --> Ollama[(Ollama)]\n    Server --> Llamafile[(Llamafile)]\n    Runner --> Sampling\n    Guardrails --> Errors\n```\n\n### 工作流程\n\n```mermaid\nsequenceDiagram\n    participant U as 用户\n    participant R as WorkflowRunner\n    participant G as Guardrails\n    participant C as ContextManager\n    participant L as LLMClient\n    \n    U->>R: run(workflow, prompt)\n    R->>L: send(messages)\n    L-->>R: LLMResponse\n    R->>G: check(response)\n    G-->>R: CheckResult\n    alt action == retry\n        R->>G: 获取 nudge\n        R->>L: send(messages + nudge)\n    end\n    alt action == step_blocked\n        R->>G: 获取 nudge\n        R->>L: send(messages + nudge)\n    end\n    alt action == fatal\n        R-->>U: 抛出异常\n    end\n    R->>C: update(token_count)\n    R->>L: 继续或结束\n```\n\n## 采样参数系统\n\n### 推荐采样参数\n\nForge 维护了一个经过验证的模型采样参数映射表，包含以下模型家族：\n\n- Qwen3 / Qwen3.5 / Qwen3.6\n- Qwen3-Coder\n- Gemma 4\n- Mistral Small 3.2\n- Devstral Small 2\n- Ministral 3 Instruct + Reasoning\n- Mistral Nemo\n- Granite 4.0\n\n每个参数条目都包含内联的 HuggingFace 模型卡片链接，确保数值经过逐一验证。\n\n资料来源：[src/forge/clients/sampling_defaults.py:1-40]()\n\n### 采样策略四象限\n\n| strict | 模型在映射表中 | 行为 |\n|--------|---------------|------|\n| True | 是 | 返回字典副本 |\n| True | 否 | 抛出 UnsupportedModelError |\n| False | 是 | 单次 INFO 日志；返回 {} |\n| False | 否 | 返回 {}（静默） |\n\n资料来源：[src/forge/clients/sampling_defaults.py:40-60]()\n\n### recommended_sampling 参数\n\n```python\nclient = OllamaClient(\n    model=\"ministral-3:8b-instruct-2512-q4_K_M\",\n    recommended_sampling=True  # 启用推荐采样参数\n)\n```\n\n启用后，客户端会自动应用 MODEL_SAMPLING_DEFAULTS 中的参数，包括：\n- temperature\n- top_p\n- top_k\n- min_p\n- repeat_penalty\n- presence_penalty\n\n资料来源：[README.md]()\n\n## 上下文管理\n\n### ContextManager\n\nContextManager 负责管理对话历史的长度，防止超出模型的上下文窗口。\n\n```python\nfrom forge import ContextManager, TieredCompact\n\nctx = ContextManager(\n    strategy=TieredCompact(keep_recent=2),\n    budget_tokens=8192\n)\n```\n\n### BudgetMode 预算模式\n\n| 模式 | 说明 |\n|------|------|\n| FORGE_FAST | 快速响应预算 |\n| FORGE_BALANCED | 平衡模式 |\n| FORGE_DEEP | 深度推理预算 |\n| MANUAL | 手动指定 token 数 |\n\n资料来源：[src/forge/server.py:80-100]()\n\n### KV Cache 量化\n\nForge 支持 KV 缓存量化以减少显存占用：\n\n| 量化类型 | 显存节省 | 说明 |\n|----------|----------|------|\n| q8_0 | ~50% | 高质量量化 |\n| q4_0 | ~75% | 更高压缩率 |\n\n资料来源：[src/forge/server.py:30-35]()\n\n### KV Unified 模式\n\n当 `kv_unified=True` 时，所有槽位共享单一 KV 缓存池，每个槽位可使用完整上下文长度。\n\n## Guardrails 系统\n\nGuardrails 是 Forge 的核心保护机制，包含三个子组件：\n\n### 组件架构\n\n```mermaid\ngraph LR\n    subgraph Guardrails\n        V[ResponseValidator]\n        E[StepEnforcer]\n        T[ErrorTracker]\n    end\n```\n\n### ResponseValidator\n\n验证 LLM 响应的格式和内容，支持 XML 救援解析（针对 Qwen Coder 等模型）。\n\n### StepEnforcer\n\n强制执行必需的步骤顺序：\n\n```python\nGuardrails(\n    tool_names=[\"search\", \"lookup\", \"answer\"],\n    required_steps=[\"search\", \"lookup\"],\n    terminal_tool=\"answer\"\n)\n```\n\n### ErrorTracker\n\n追踪错误并控制重试次数：\n\n| 参数 | 默认值 | 说明 |\n|------|--------|------|\n| max_retries | 3 | 最大重试次数 |\n| max_tool_errors | 2 | 最大工具错误数 |\n| max_premature_attempts | 3 | 提前终止尝试次数 |\n\n资料来源：[src/forge/guardrails/guardrails.py:1-60]()\n\n## 快速开始\n\n### 安装\n\n```bash\ngit clone https://github.com/antoinezambelli/forge.git\ncd forge\npython -m venv .venv\npip install -e \".[dev]\"\n```\n\n### 基本使用示例\n\n```python\nimport asyncio\nfrom pydantic import BaseModel, Field\nfrom forge import (\n    Workflow, ToolDef, ToolSpec,\n    WorkflowRunner, OllamaClient,\n    ContextManager, TieredCompact,\n)\n\ndef get_weather(city: str) -> str:\n    return f\"72°F and sunny in {city}\"\n\nclass GetWeatherParams(BaseModel):\n    city: str = Field(description=\"City name\")\n\nworkflow = Workflow(\n    name=\"weather\",\n    description=\"Look up weather for a city.\",\n    tools={\n        \"get_weather\": ToolDef(\n            spec=ToolSpec(\n                name=\"get_weather\",\n                description=\"Get current weather\",\n                parameters=GetWeatherParams,\n            ),\n            callable=get_weather,\n        ),\n    },\n    required_steps=[],\n    terminal_tool=\"get_weather\",\n    system_prompt_template=\"You are a helpful assistant. Use the available tools to answer the user.\",\n)\n\nasync def main():\n    client = OllamaClient(\n        model=\"ministral-3:8b-instruct-2512-q4_K_M\",\n        recommended_sampling=True\n    )\n    ctx = ContextManager(\n        strategy=TieredCompact(keep_recent=2),\n        budget_tokens=8192\n    )\n    runner = WorkflowRunner(client=client, context_manager=ctx)\n    await runner.run(workflow, \"What's the weather in Paris?\")\n\nasyncio.run(main())\n```\n\n资料来源：[README.md]()\n\n## 后端自动管理\n\nForge 可以自动启动和管理 LLM 后端：\n\n```python\nfrom forge import setup_backend, BudgetMode\n\nasync def main():\n    client = OllamaClient(model=\"ministral-3:14b-instruct-2512-q4_K_M\")\n    server, ctx = await setup_backend(\n        backend=\"ollama\",\n        model=\"ministral-3:14b-instruct-2512-q4_K_M\",\n        budget_mode=BudgetMode.FORGE_FAST,\n        client=client,\n    )\n    runner = WorkflowRunner(client=client, context_manager=ctx)\n    # ... 运行工作流 ...\n    await server.stop()\n```\n\n支持的后端：\n- `ollama` — 使用 Ollama 服务\n- `llamaserver` — 使用 llama.cpp 服务器\n- `llamafile` — 使用单文件可执行模型\n\n资料来源：[src/forge/server.py:100-150]()\n\n## 项目结构\n\n```\nforge/\n├── src/forge/           # 库源码\n│   ├── clients/         # LLM 后端适配器\n│   ├── core/            # 工作流、运行器、消息、步骤\n│   ├── context/         # 上下文管理和压缩\n│   ├── prompts/         # 提示词模板和引导\n│   ├── guardrails/      # 保护栏实现\n│   ├── proxy/           # 代理服务器\n│   └── tools/           # 内置工具\n├── tests/\n│   ├── unit/            # 单元测试（确定性）\n│   └── eval/            # 评估测试（需要后端）\n│       ├── scenarios/   # 评估场景定义\n│       └── dashboard/   # React 评估仪表板\n├── docs/\n│   ├── decisions/       # 架构决策记录 (ADR)\n│   └── results/         # 评估结果\n├── examples/            # 使用示例\n└── scripts/             # 辅助脚本\n```\n\n资料来源：[CONTRIBUTING.md]()\n\n## 版本历史\n\n### v0.6.0 (2026-04-29)\n\n- **推荐采样参数** — 每个模型都有经过验证的采样配置\n- **采样覆盖** — 支持 per-call 采样参数覆盖\n- **代理采样透传** — 代理透传 OpenAI 兼容的采样字段\n- **高级推理评估套件** — 支持 Gemma 4、Qwen 3.5 等模型的推理任务\n\n### v0.5.0 (2026-04-19)\n\n- **消融研究运行器** — 支持自动化消融实验\n- **移除硬编码温度** — OllamaClient 和 LlamafileClient 不再发送硬编码采样参数\n- **Granite 4.0 支持** — 支持 OpenAI 风格的 tool call 格式\n\n### v0.4.x\n\n- Qwen Coder XML 救援解析\n- 28 模型评估数据集\n- llama.cpp 推理预算修复\n\n资料来源：[CHANGELOG.md]()\n\n## 测试\n\n### 单元测试\n\n```bash\n# 完整套件（865 个测试）\npython -m pytest tests/unit/ -v --tb=short\n\n# 带覆盖率\npython -m pytest tests/unit/ --cov=forge --cov-report=term-missing\n\n# 单文件测试\npython -m pytest tests/unit/test_runner.py -v\n```\n\n### 集成测试\n\n集成测试需要运行中的后端，跳过方式：\n\n```bash\npython -m pytest tests/ -m \"not integration\"\n```\n\n资料来源：[CONTRIBUTING.md]()\n\n## 相关资源\n\n| 资源 | 链接 |\n|------|------|\n| GitHub 仓库 | https://github.com/antoinezambelli/forge |\n| 用户指南 | docs/USER_GUIDE.md |\n| 后端设置 | docs/BACKEND_SETUP.md |\n| 模型指南 | docs/MODEL_GUIDE.md |\n| 评估仪表板 | docs/results/dashboard.html |\n| 架构决策记录 | docs/decisions/ |\n\n---\n\n<a id='page-installation'></a>\n\n## 安装与配置\n\n### 相关页面\n\n相关主题：[Forge 简介](#page-introduction)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [README.md](https://github.com/antoinezambelli/forge/blob/main/README.md)\n- [CONTRIBUTING.md](https://github.com/antoinezambelli/forge/blob/main/CONTRIBUTING.md)\n- [src/forge/server.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/server.py)\n- [src/forge/clients/sampling_defaults.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/clients/sampling_defaults.py)\n- [src/forge/proxy/__main__.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/proxy/__main__.py)\n- [docs/BACKEND_SETUP.md](https://github.com/antoinezambelli/forge/blob/main/docs/BACKEND_SETUP.md)\n\n</details>\n\n# 安装与配置\n\n## 项目概述\n\nForge 是一个基于 Python 的 LLM 工作流框架，支持多种后端（Ollama、llamafile、llamaserver），提供工作流管理、上下文管理和工具调用等功能。安装与配置涉及环境准备、后端选择、客户端初始化以及可选的代理服务器部署。\n\n资料来源：[README.md:1-45]()\n\n## 环境要求\n\n### 系统要求\n\n| 组件 | 要求 |\n|------|------|\n| Python | 3.12+ |\n| 操作系统 | Linux/macOS/Windows |\n| 依赖管理 | pip |\n\n### Python 版本\n\nForge 使用现代 Python 语法，包括类型联合操作符（`|`）等特性，明确要求 Python 3.12 或更高版本。\n\n资料来源：[CONTRIBUTING.md:1-15]()\n\n## 安装步骤\n\n### 标准安装\n\n```bash\ngit clone https://github.com/antoinezambelli/forge.git\ncd forge\npython -m venv .venv\npip install -e \".[dev]\"\n```\n\n`.[dev]` 标志安装包含开发依赖的完整包，包括测试框架 pytest。\n\n资料来源：[CONTRIBUTING.md:1-12]()\n\n### 可选依赖\n\n| 额外功能 | 安装方式 |\n|----------|----------|\n| 开发依赖 | `pip install -e \".[dev]\"` |\n| 仅运行时 | `pip install -e .` |\n\n## 后端配置\n\nForge 支持三种 LLM 后端，后端配置通过 `ServerManager` 类统一管理。\n\n### 支持的后端类型\n\n| 后端 | 标识符 | 特点 |\n|------|--------|------|\n| Ollama | `\"ollama\"` | 独立服务，使用 `model` 参数 |\n| llamafile | `\"llamafile\"` | 单文件可执行，使用 `gguf_path` |\n| llamaserver | `\"llamaserver\"` | llama.cpp 服务器，使用 `gguf_path` |\n\n资料来源：[src/forge/server.py:1-50]()\n\n### 后端初始化参数\n\n`ServerManager` 类的构造函数接受以下参数：\n\n```python\ndef __init__(\n    self,\n    backend: str,                    # 后端类型\n    port: int = 8080,                # 服务端口\n    models_dir: str | Path | None = None,  # GGUF 文件目录\n) -> None:\n```\n\n资料来源：[src/forge/server.py:120-140]()\n\n### 服务启动配置\n\n`start()` 方法支持丰富的配置选项：\n\n| 参数 | 类型 | 说明 |\n|------|------|------|\n| `model` | `str` | Ollama 模型名称 |\n| `gguf_path` | `str \\| Path` | GGUF 模型文件路径 |\n| `mode` | `str` | 模式（默认 `\"native\"`） |\n| `extra_flags` | `list[str]` | 额外 CLI 参数 |\n| `ctx_override` | `int` | 上下文长度覆盖 |\n| `cache_type_k` | `str` | KV 缓存量化类型（键） |\n| `cache_type_v` | `str` | KV 缓存量化类型（值） |\n| `n_slots` | `int` | 并发槽位数量 |\n| `kv_unified` | `bool` | 是否使用统一 KV 缓存 |\n\n```python\n# 启动示例\nserver = ServerManager(backend=\"ollama\", port=8080)\nawait server.start(\n    model=\"qwen3:8b\",\n    mode=\"reasoning\",\n    extra_flags=[\"--reasoning-format\", \"auto\"]\n)\n```\n\n资料来源：[src/forge/server.py:140-180]()\n\n### 上下文长度解析\n\n| 后端 | 上下文获取方式 |\n|------|----------------|\n| llamaserver/llamafile | 查询 `/props` 端点获取 `n_ctx` |\n| ollama | 使用 `ollama stop` 清理 VRAM |\n\n资料来源：[src/forge/server.py:200-220]()\n\n## 客户端配置\n\n### OllamaClient\n\n```python\nfrom forge.clients import OllamaClient\n\nclient = OllamaClient(\n    model=\"ministral-3:8b-instruct-2512-q4_K_M\",\n    recommended_sampling=True  # 使用推荐采样参数\n)\n```\n\n资料来源：[README.md:20-35]()\n\n### LlamafileClient\n\n```python\nfrom forge.clients import LlamafileClient\n\nclient = LlamafileClient(\n    gguf_path=\"/path/to/model.gguf\",\n    recommended_sampling=True\n)\n```\n\n### 推荐采样参数\n\n`forge.clients.sampling_defaults` 模块提供经过验证的模型推荐采样参数：\n\n| 模型系列 | 支持的模型 |\n|----------|------------|\n| Qwen | Qwen3, Qwen3.5, Qwen3.6, Qwen3-Coder |\n| Gemma | Gemma 4 |\n| Mistral | Mistral Small 3.2, Mistral Nemo |\n| Devstral | Devstral Small 2 |\n| Ministral | Ministral 3 Instruct + Reasoning |\n| Granite | Granite 4.0 (h-micro, h-tiny) |\n\n每个配置行包含指向 HuggingFace 模型卡的 URL，参数值经过逐一验证。\n\n资料来源：[src/forge/clients/sampling_defaults.py:1-80]()\n\n### 采样参数策略\n\n| 模式 | 已知模型 | 未知模型 |\n|------|----------|----------|\n| `strict=True` | 返回推荐参数 | 抛出 `UnsupportedModelError` |\n| `strict=False` | 单次 INFO 日志，返回 `{}` | 返回 `{}`（静默） |\n\n```python\n# 严格模式示例\nparams = apply_sampling_defaults(model, strict=True)\n\n# 非严格模式示例\nparams = apply_sampling_defaults(model, strict=False)\n```\n\n资料来源：[src/forge/clients/sampling_defaults.py:80-120]()\n\n### 单次调用采样覆盖\n\n`send()` 和 `send_stream()` 方法支持 `sampling` 字典参数，字段逐一合并到客户端实例级采样：\n\n```python\nresponse = await client.send(\n    messages,\n    sampling={\"temperature\": 0.8, \"top_p\": 0.9}\n)\n```\n\n资料来源：[CHANGELOG.md:1-50]()\n\n## 代理服务器配置\n\nForge 提供 `ProxyServer` 用于转发 OpenAI 兼容请求到后端。\n\n### CLI 参数\n\n```bash\npython -m forge.proxy --backend ollama --model qwen3:8b\n```\n\n| 参数 | 默认值 | 说明 |\n|------|--------|------|\n| `--backend` | 必需 | 后端类型 |\n| `--model` | 必需（Ollama） | 模型名称 |\n| `--gguf` | 必需（非Ollama） | GGUF 文件路径 |\n| `--backend-url` | 必需 | 后端服务器 URL |\n| `--backend-port` | 8080 | 后端端口 |\n| `--host` | 127.0.0.1 | 代理监听地址 |\n| `--port` | 8081 | 代理监听端口 |\n| `--budget-mode` | - | 预算模式 |\n| `--budget-tokens` | - | 手动 token 预算 |\n| `--extra-flags` | - | 额外后端 CLI 参数 |\n| `--serialize` | None | 强制请求序列化 |\n| `--max-retries` | 3 | 单请求最大重试次数 |\n| `--no-rescue` | False | 禁用救援解析 |\n| `--verbose` | False | 详细日志 |\n\n资料来源：[src/forge/proxy/__main__.py:1-80]()\n\n### 代理采样穿透\n\n代理将 OpenAI 兼容的采样参数字段透传到后端：\n\n| 透传字段 |\n|----------|\n| `temperature` |\n| `top_p` |\n| `top_k` |\n| `min_p` |\n| `repeat_penalty` |\n| `presence_penalty` |\n| `seed` |\n\n资料来源：[CHANGELOG.md:1-50]()\n\n## 工作流配置\n\n### Workflow 组件\n\n```python\nfrom forge import Workflow, ToolDef, ToolSpec, WorkflowRunner, ContextManager, TieredCompact\n\nworkflow = Workflow(\n    name=\"weather\",\n    description=\"查询城市天气\",\n    tools={\n        \"get_weather\": ToolDef(\n            spec=ToolSpec(\n                name=\"get_weather\",\n                description=\"获取当前天气\",\n                parameters=GetWeatherParams,\n            ),\n            callable=get_weather,\n        ),\n    },\n    required_steps=[],\n    terminal_tool=\"get_weather\",\n    system_prompt_template=\"你是一个有帮助的助手。\",\n)\n```\n\n### 上下文管理器配置\n\n| 策略 | 说明 |\n|------|------|\n| `TieredCompact` | 分层压缩策略 |\n| `budget_tokens` | Token 预算上限 |\n\n```python\nctx = ContextManager(\n    strategy=TieredCompact(keep_recent=2),\n    budget_tokens=8192\n)\n```\n\n资料来源：[README.md:1-50]()\n\n## 完整运行示例\n\n```python\nimport asyncio\nfrom pydantic import BaseModel, Field\nfrom forge import (\n    Workflow, ToolDef, ToolSpec,\n    WorkflowRunner, OllamaClient,\n    ContextManager, TieredCompact,\n)\n\ndef get_weather(city: str) -> str:\n    return f\"72°F and sunny in {city}\"\n\nclass GetWeatherParams(BaseModel):\n    city: str = Field(description=\"城市名称\")\n\nworkflow = Workflow(\n    name=\"weather\",\n    description=\"查询城市天气\",\n    tools={\n        \"get_weather\": ToolDef(\n            spec=ToolSpec(\n                name=\"get_weather\",\n                description=\"获取当前天气\",\n                parameters=GetWeatherParams,\n            ),\n            callable=get_weather,\n        ),\n    },\n    required_steps=[],\n    terminal_tool=\"get_weather\",\n    system_prompt_template=\"你是一个有帮助的助手。\",\n)\n\nasync def main():\n    client = OllamaClient(\n        model=\"ministral-3:8b-instruct-2512-q4_K_M\",\n        recommended_sampling=True\n    )\n    ctx = ContextManager(\n        strategy=TieredCompact(keep_recent=2),\n        budget_tokens=8192\n    )\n    runner = WorkflowRunner(\n        client=client,\n        context_manager=ctx\n    )\n    await runner.run(workflow, \"What's the weather in Paris?\")\n\nasyncio.run(main())\n```\n\n资料来源：[README.md:15-55]()\n\n## 常见问题\n\n### 后端选择\n\n| 场景 | 推荐后端 |\n|------|----------|\n| 快速原型/测试 | Ollama |\n| 生产部署 | llamaserver |\n| 单文件分发 | llamafile |\n\n### 版本信息\n\n通过 `importlib.metadata` 暴露版本：\n\n```python\nfrom importlib.metadata import version\nprint(version(\"forge\"))\n```\n\n资料来源：[CHANGELOG.md:1-50]()\n\n---\n\n<a id='page-quickstart'></a>\n\n## 快速开始教程\n\n### 相关页面\n\n相关主题：[工作流内部机制](#page-workflow-internals), [Guardrails 系统](#page-guardrails)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [README.md](https://github.com/antoinezambelli/forge/blob/main/README.md)\n- [src/forge/core/workflow.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/core/workflow.py)\n- [src/forge/core/runner.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/core/runner.py)\n- [src/forge/guardrails/guardrails.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/guardrails/guardrails.py)\n- [src/forge/clients/llamafile.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/clients/llamafile.py)\n- [examples/foreign_loop.py](https://github.com/antoinezambelli/forge/blob/main/examples/foreign_loop.py)\n- [CONTRIBUTING.md](https://github.com/antoinezambelli/forge/blob/main/CONTRIBUTING.md)\n</details>\n\n# 快速开始教程\n\n本教程将帮助你在 5 分钟内上手 Forge 框架，构建第一个 LLM 驱动的工具调用工作流。Forge 是一个用于管理 LLM 与工具交互的框架，提供了工作流管理、上下文管理、护栏校验和多后端支持等核心功能。\n\n## 环境准备\n\n### 系统要求\n\n| 要求 | 说明 |\n|------|------|\n| Python 版本 | 3.12+ |\n| 操作系统 | macOS, Linux, Windows |\n| LLM 后端 | Ollama、llama.cpp server 或 llamafile |\n\n### 安装步骤\n\n```bash\ngit clone https://github.com/antoinezambelli/forge.git\ncd forge\npython -m venv .venv\npip install -e \".[dev]\"\n```\n\n资料来源：[CONTRIBUTING.md:1-10]()\n\n### 运行测试验证安装\n\n```bash\n# 完整单元测试（865 个测试，无需 LLM 后端）\npython -m pytest tests/unit/ -v --tb=short\n\n# 单文件测试\npython -m pytest tests/unit/test_runner.py -v\n```\n\n资料来源：[CONTRIBUTING.md:14-22]()\n\n## 核心概念\n\n### 组件架构概览\n\n```mermaid\ngraph TD\n    A[用户请求] --> B[WorkflowRunner]\n    B --> C[Workflow]\n    C --> D[ToolDef + ToolSpec]\n    D --> E[LLMClient]\n    E --> F[Backend Ollama/llama.cpp/llamafile]\n    F --> G[LLMResponse]\n    G --> H[Guardrails]\n    H --> I{校验结果}\n    I -->|通过| J[执行工具]\n    I -->|失败| K[重试/终止]\n    J --> L[ContextManager]\n    L --> B\n```\n\n### 核心组件表\n\n| 组件 | 文件位置 | 职责 |\n|------|----------|------|\n| `Workflow` | `src/forge/core/workflow.py` | 定义工作流的工具、步骤和提示模板 |\n| `ToolDef` | `src/forge/core/workflow.py:60-70` | 绑定工具 Schema 与实际可调用函数 |\n| `ToolSpec` | `src/forge/core/workflow.py` | 定义工具的名称、描述和参数 Schema |\n| `WorkflowRunner` | `src/forge/core/runner.py` | 执行工作流，协调 LLM 与工具调用 |\n| `OllamaClient` | `src/forge/clients/ollama.py` | Ollama 后端适配器 |\n| `ContextManager` | `src/forge/core/context.py` | 管理上下文窗口和上下文压缩 |\n| `Guardrails` | `src/forge/guardrails/guardrails.py` | 验证 LLM 响应，执行步骤校验 |\n\n资料来源：[README.md:1-35](), [src/forge/core/workflow.py:1-75]()\n\n## 快速开始示例\n\n### 基础天气查询工作流\n\n以下是一个完整的基础示例，展示了如何使用 Forge 构建天气查询工具调用工作流：\n\n```python\nimport asyncio\nfrom pydantic import BaseModel, Field\nfrom forge import (\n    Workflow, ToolDef, ToolSpec,\n    WorkflowRunner, OllamaClient,\n    ContextManager, TieredCompact,\n)\n\n# 第一步：定义工具函数\ndef get_weather(city: str) -> str:\n    return f\"72°F and sunny in {city}\"\n\n# 第二步：定义工具参数 Schema\nclass GetWeatherParams(BaseModel):\n    city: str = Field(description=\"City name\")\n\n# 第三步：构建工作流\nworkflow = Workflow(\n    name=\"weather\",\n    description=\"Look up weather for a city.\",\n    tools={\n        \"get_weather\": ToolDef(\n            spec=ToolSpec(\n                name=\"get_weather\",\n                description=\"Get current weather\",\n                parameters=GetWeatherParams,\n            ),\n            callable=get_weather,\n        ),\n    },\n    required_steps=[],\n    terminal_tool=\"get_weather\",\n    system_prompt_template=\"You are a helpful assistant. Use the available tools to answer the user.\",\n)\n\n# 第四步：初始化并运行\nasync def main():\n    client = OllamaClient(\n        model=\"ministral-3:8b-instruct-2512-q4_K_M\",\n        recommended_sampling=True\n    )\n    ctx = ContextManager(\n        strategy=TieredCompact(keep_recent=2),\n        budget_tokens=8192\n    )\n    runner = WorkflowRunner(client=client, context_manager=ctx)\n    await runner.run(workflow, \"What's the weather in Paris?\")\n\nasyncio.run(main())\n```\n\n资料来源：[README.md:7-42]()\n\n## 工作流执行流程\n\n### 内部执行步骤\n\n```mermaid\nsequenceDiagram\n    participant User as 用户\n    participant Runner as WorkflowRunner\n    participant Workflow as Workflow\n    participant LLM as LLM Client\n    participant Backend as LLM 后端\n    participant Guardrails as Guardrails\n    participant Tools as 工具函数\n    participant Context as ContextManager\n\n    User->>Runner: run(workflow, prompt)\n    Runner->>Workflow: 获取工具定义和提示\n    Runner->>LLM: 发送请求\n    LLM->>Backend: API 调用\n    Backend-->>LLM: LLMResponse\n    LLM-->>Runner: LLMResponse\n    Runner->>Guardrails: check(response)\n    Guardrails->>Guardrails: 验证工具调用\n    Guardrails-->>Runner: CheckResult\n    alt 校验通过\n        Runner->>Tools: 执行工具\n        Tools-->>Runner: 工具结果\n        Runner->>Context: 更新上下文\n        Runner->>Runner: 循环直到完成\n    else 校验失败\n        Runner->>LLM: 发送重试提示\n    end\n    Runner-->>User: 工作流完成\n```\n\n### ToolDef 与 ToolSpec 的关系\n\n`ToolDef` 是核心数据结构，将工具的 Schema 与实际实现绑定：\n\n```python\n@dataclass\nclass ToolDef:\n    \"\"\"将工具 Schema 绑定到其实现。\n    \n    Prerequisites 表达条件依赖：\"如果你调用这个工具，\n    必须先调用工具 X\"。\n    \"\"\"\n    spec: ToolSpec           # 工具规范（名称、描述、参数）\n    callable: Callable       # 实际执行的函数\n    prerequisites: list[str | dict[str, str]] = field(default_factory=list)\n```\n\n| 字段 | 类型 | 说明 |\n|------|------|------|\n| `spec` | `ToolSpec` | 工具的规范定义 |\n| `callable` | `Callable` | Python 可调用对象 |\n| `prerequisites` | `list` | 前置依赖工具列表 |\n\n资料来源：[src/forge/core/workflow.py:60-75]()\n\n## 多步骤工作流\n\n### 带步骤要求的工作流\n\n对于复杂任务，可以定义必须按顺序执行的步骤：\n\n```python\n# 定义工具\ndef search(query: str) -> str:\n    return f\"Search results for: {query}\"\n\ndef lookup(url: str) -> str:\n    return f\"Content from: {url}\"\n\ndef answer(question: str) -> str:\n    return \"Final answer\"\n\n# 定义参数 Schema\nclass SearchParams(BaseModel):\n    query: str\n\nclass LookupParams(BaseModel):\n    url: str\n\nclass AnswerParams(BaseModel):\n    question: str\n\n# 构建需要按顺序执行的工作流\nworkflow = Workflow(\n    name=\"research\",\n    description=\"Multi-step research workflow\",\n    tools={\n        \"search\": ToolDef(\n            spec=ToolSpec(name=\"search\", description=\"Search\", parameters=SearchParams),\n            callable=search,\n        ),\n        \"lookup\": ToolDef(\n            spec=ToolSpec(name=\"lookup\", description=\"Look up URL\", parameters=LookupParams),\n            callable=lookup,\n        ),\n        \"answer\": ToolDef(\n            spec=ToolSpec(name=\"answer\", description=\"Provide answer\", parameters=AnswerParams),\n            callable=answer,\n        ),\n    },\n    required_steps=[\"search\", \"lookup\"],  # 必须先执行 search 和 lookup\n    terminal_tool=\"answer\",\n    system_prompt_template=\"You are a research assistant.\",\n)\n```\n\n### 护栏（Guardrails）系统\n\nForge 内置护栏系统用于验证 LLM 响应：\n\n```mermaid\ngraph TD\n    A[LLMResponse] --> B[ResponseValidator]\n    A --> C[StepEnforcer]\n    A --> D[ErrorTracker]\n    B --> E{检查结果}\n    C --> E\n    D --> E\n    E -->|fatal| F[终止]\n    E -->|retry| G[重试]\n    E -->|step_blocked| H[步骤阻塞]\n    E -->|execute| I[执行工具]\n```\n\n| 护栏组件 | 职责 |\n|----------|------|\n| `ResponseValidator` | 验证响应格式，提取工具调用 |\n| `StepEnforcer` | 确保必需步骤已完成 |\n| `ErrorTracker` | 跟踪错误次数和重试状态 |\n\n资料来源：[src/forge/guardrails/guardrails.py:1-80]()\n\n## 上下文管理\n\n### TieredCompact 策略\n\n`TieredCompact` 是推荐的上下文压缩策略：\n\n```python\nfrom forge import ContextManager, TieredCompact\n\nctx = ContextManager(\n    strategy=TieredCompact(keep_recent=2),  # 保留最近 2 轮完整\n    budget_tokens=8192\n)\n```\n\n| 参数 | 说明 |\n|------|------|\n| `keep_recent` | 保留最近的完整消息轮数 |\n| `budget_tokens` | 上下文预算（令牌数）|\n\n### 预算模式\n\n| 模式 | 说明 |\n|------|------|\n| `FORGE_FAST` | 快速模式，较小预算 |\n| `FORGE_BALANCED` | 平衡模式 |\n| `FORGE_DEEP` | 深度模式，较大预算 |\n| `MANUAL` | 手动指定令牌数 |\n\n资料来源：[src/forge/server.py:1-50]()\n\n## 常用配置\n\n### OllamaClient 配置\n\n```python\nclient = OllamaClient(\n    model=\"ministral-3:8b-instruct-2512-q4_K_M\",\n    recommended_sampling=True  # 使用推荐的采样参数\n)\n```\n\n### 推荐采样参数\n\n`recommended_sampling=True` 时，Forge 会自动应用模型卡推荐的采样参数：\n\n| 参数 | 说明 |\n|------|------|\n| `temperature` | 生成温度 |\n| `top_p` | Top-p 采样 |\n| `top_k` | Top-k 采样 |\n| `min_p` | 最小概率阈值 |\n| `repeat_penalty` | 重复惩罚 |\n\n资料来源：[src/forge/clients/sampling_defaults.py:1-60]()\n\n## 完整项目结构\n\n```\nforge/\n├── src/forge/               # 库源码\n│   ├── clients/             # LLM 后端适配器\n│   │   ├── ollama.py\n│   │   ├── llamafile.py\n│   │   └── sampling_defaults.py\n│   ├── core/                # 核心组件\n│   │   ├── workflow.py      # Workflow 和 ToolDef\n│   │   ├── runner.py        # WorkflowRunner\n│   │   ├── context.py       # 上下文管理\n│   │   └── steps.py         # 步骤追踪\n│   ├── guardrails/          # 护栏系统\n│   └── prompts/             # 提示模板\n├── tests/\n│   ├── unit/                # 单元测试\n│   └── eval/                # 评估工具\n└── examples/\n    └── foreign_loop.py      # 外部循环集成示例\n```\n\n资料来源：[CONTRIBUTING.md:25-40]()\n\n## 外部循环集成\n\n如果你已有 LLM 调用逻辑，可以使用 Forge 的护栏组件进行验证：\n\n```python\nfrom forge.guardrails import Guardrails, ErrorTracker\n\nguardrails = Guardrails(\n    tool_names=[\"search\", \"lookup\", \"answer\"],\n    terminal_tool=\"answer\",\n    required_steps=[\"search\", \"lookup\"],\n)\n\ndef handle_response(response):\n    result = guardrails.check(response)\n    \n    if result.action == \"fatal\":\n        return f\"FATAL: {result.reason}\"\n    \n    if result.action in (\"retry\", \"step_blocked\"):\n        return f\"{result.action}: {result.nudge.content[:80]}...\"\n    \n    # 执行工具\n    tool_calls = result.tool_calls\n    executed = [tc.tool for tc in tool_calls]\n    done = guardrails.record(executed)\n    return f\"executed {executed}\" + (\" -- DONE\" if done else \"\")\n```\n\n资料来源：[examples/foreign_loop.py:1-100]()\n\n## 常见问题\n\n### 运行报连接错误\n\n确保 Ollama 服务已启动：\n\n```bash\nollama serve\n```\n\n### 上下文超出限制\n\n调整 `budget_tokens` 或使用更激进的压缩策略：\n\n```python\nctx = ContextManager(\n    strategy=TieredCompact(keep_recent=1),  # 减少保留轮数\n    budget_tokens=4096                       # 减小预算\n)\n```\n\n### 模型不支持\n\n检查 `MODEL_SAMPLING_DEFAULTS` 是否包含该模型，或禁用推荐采样：\n\n```python\nclient = OllamaClient(\n    model=\"your-model\",\n    recommended_sampling=False  # 不使用推荐参数\n)\n```\n\n## 下一步\n\n- 阅读 [用户指南](docs/USER_GUIDE.md) 了解高级特性\n- 查看 [MODEL_GUIDE.md](docs/MODEL_GUIDE.md) 了解支持的模型\n- 运行评估：`python -m tests.eval.eval_runner --scenarios your_scenario --runs 5`\n\n---\n\n<a id='page-architecture'></a>\n\n## 架构概述\n\n### 相关页面\n\n相关主题：[核心组件详解](#page-core-components), [工作流内部机制](#page-workflow-internals)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [src/forge/core/runner.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/core/runner.py)\n- [src/forge/context/manager.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/context/manager.py)\n- [src/forge/server.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/server.py)\n- [src/forge/core/workflow.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/core/workflow.py)\n- [src/forge/guardrails/guardrails.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/guardrails/guardrails.py)\n- [CONTRIBUTING.md](https://github.com/antoinezambelli/forge/blob/main/CONTRIBUTING.md)\n- [README.md](https://github.com/antoinezambelli/forge/blob/main/README.md)\n</details>\n\n# 架构概述\n\nForge 是一个基于 LLM（大型语言模型）的自动化工作流框架，专注于工具调用（Tool Calling）和上下文管理。其核心设计理念是通过结构化的 Workflow、可靠的工具执行机制以及智能的上下文压缩，实现可预测、可重复的 LLM 驱动任务自动化。\n\n## 核心设计原则\n\n| 原则 | 说明 |\n|------|------|\n| **异步优先** | 全部采用 `asyncio` 实现，所有客户端方法和运行器均为异步 资料来源：[CONTRIBUTING.md:32]() |\n| **类型安全** | 使用 Pydantic 定义工具参数模式和 API 响应模型 |\n| **可拔插架构** | 支持多种 LLM 后端适配器，客户端层完全解耦 |\n| **Guardrail 机制** | 通过验证器和强制器确保工作流执行的正确性 |\n\n## 系统架构图\n\n```mermaid\ngraph TD\n    subgraph 客户端层\n        OA[OllamaClient]\n        LF[LlamafileClient]\n        AC[AnthropicClient]\n        PR[ProxyClient]\n    end\n\n    subgraph 核心层\n        WR[WorkflowRunner]\n        WF[Workflow]\n        CT[ToolCall]\n    end\n\n    subgraph 上下文层\n        CM[ContextManager]\n        TC[TieredCompact]\n    end\n\n    subgraph 后端管理层\n        SM[ServerManager]\n        BM[BudgetMode]\n    end\n\n    subgraph 工具执行层\n        GR[Guardrails]\n        RV[ResponseValidator]\n        SE[StepEnforcer]\n    end\n\n    OA --> WR\n    LF --> WR\n    AC --> WR\n    PR --> WR\n    \n    WR --> WF\n    WR --> CM\n    CM --> TC\n    \n    WF --> CT\n    CT --> GR\n    GR --> RV\n    GR --> SE\n```\n\n## 项目目录结构\n\n```\nsrc/forge/           # 库源码\n  clients/           # LLM 后端适配器（每个后端一个）\n  core/              # Workflow、Runner、消息、步骤\n  context/           # 上下文管理和压缩\n  prompts/           # 提示模板和 Nudge\n  guardrails/        # 安全护栏机制\ntests/               # 测试套件\n  unit/              # 确定性单元测试\n  eval/              # 评估测试（需真实后端）\ndocs/                # 用户文档\n```\n\n资料来源：[CONTRIBUTING.md:44-53]()\n\n## 核心组件详解\n\n### 1. Workflow（工作流）\n\n`Workflow` 是任务定义的中心模型，包含工具注册、步骤约束和终止条件：\n\n| 属性 | 类型 | 说明 |\n|------|------|------|\n| `name` | str | 工作流名称 |\n| `description` | str | 工作流描述 |\n| `tools` | dict[str, ToolDef] | 工具定义字典 |\n| `required_steps` | list[str] | 必须按顺序执行的步骤 |\n| `terminal_tool` | str | 终止工具（工作流结束时必须调用） |\n\n资料来源：[src/forge/core/workflow.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/core/workflow.py)\n\n#### ToolDef 结构\n\n```mermaid\nclassDiagram\n    class ToolDef {\n        spec: ToolSpec\n        callable: Callable\n        prerequisites: list[str | dict]\n        name: str\n    }\n    \n    class ToolSpec {\n        name: str\n        description: str\n        parameters: BaseModel\n        get_json_schema() dict\n    }\n    \n    ToolDef --> ToolSpec : contains\n```\n\n`ToolDef` 将工具模式绑定到其实现，支持先决条件表达：\n\n- **字符串形式**：`\"read_file\"` — 任何先前对 read_file 的调用都满足\n- **字典形式**：`{\"tool\": \"read_file\", \"match_arg\": \"path\"}` — 要求先前调用具有相同 path 参数值\n\n资料来源：[src/forge/core/workflow.py:45-57]()\n\n### 2. WorkflowRunner（工作流运行器）\n\n`WorkflowRunner` 是执行引擎，协调 LLM 交互、工具调用和上下文管理：\n\n```mermaid\nsequenceDiagram\n    participant U as 用户\n    participant R as WorkflowRunner\n    participant C as LLMClient\n    participant G as Guardrails\n    participant T as 工具执行\n    participant CM as ContextManager\n\n    U->>R: run(workflow, user_input)\n    loop 执行循环\n        R->>C: generate(messages)\n        C-->>R: LLMResponse\n        R->>G: check(response)\n        G-->>R: CheckResult\n        alt 需要重试\n            R->>C: generate(retry_nudge)\n        else 工具调用\n            R->>T: execute(tool_calls)\n            T-->>R: results\n            R->>CM: compact()\n            R->>C: continue\n        else 终止\n            R-->>U: final_result\n        end\n    end\n```\n\n资料来源：[src/forge/core/runner.py]()\n\n#### Runner 执行流程\n\n1. **验证阶段**：检查 LLM 响应是否包含有效工具调用\n2. **护栏检查**：通过 `Guardrails.check()` 验证响应\n3. **执行阶段**：运行工具并收集结果\n4. **上下文更新**：压缩上下文以保持在 token 预算内\n5. **循环继续**：直到调用终止工具或达到最大重试次数\n\n### 3. ContextManager（上下文管理器）\n\n上下文管理器负责在多轮对话中维护和压缩 token 预算：\n\n| 模式 | 说明 |\n|------|------|\n| `TieredCompact` | 分层压缩策略，保留最近 N 轮对话 |\n| `MANUAL` | 手动指定 token 预算 |\n| `FORGE_FAST` | 快速模式预算 |\n| `FULL` | 完整上下文模式 |\n\n资料来源：[src/forge/server.py:140-150]()\n\n```mermaid\ngraph LR\n    A[用户输入] --> B[构建消息]\n    B --> C{Token 超限?}\n    C -->|否| D[发送给 LLM]\n    C -->|是| E[执行压缩]\n    E --> D\n    D --> F[收集响应]\n    F --> G[工具执行]\n    G --> B\n```\n\n#### TieredCompact 策略\n\n```python\n# 示例配置\nctx = ContextManager(\n    strategy=TieredCompact(keep_recent=2),\n    budget_tokens=8192\n)\n```\n\n资料来源：[README.md:18]()\n\n### 4. Guardrails（安全护栏）\n\nGuardrails 是确保工作流正确执行的核心机制，由三个子组件构成：\n\n```mermaid\nclassDiagram\n    class Guardrails {\n        _validator: ResponseValidator\n        _enforcer: StepEnforcer\n        _errors: ErrorTracker\n        check(response) CheckResult\n    }\n    \n    class ResponseValidator {\n        rescue_enabled: bool\n        retry_nudge_fn: Callable\n    }\n    \n    class StepEnforcer {\n        required_steps: list\n        terminal_tools: frozenset\n        max_premature_attempts: int\n    }\n    \n    class ErrorTracker {\n        max_retries: int\n        max_tool_errors: int\n    }\n    \n    Guardrails --> ResponseValidator\n    Guardrails --> StepEnforcer\n    Guardrails --> ErrorTracker\n```\n\n资料来源：[src/forge/guardrails/guardrails.py:45-75]()\n\n#### Guardrails 参数\n\n| 参数 | 默认值 | 说明 |\n|------|--------|------|\n| `tool_names` | — | 可用工具名称列表 |\n| `terminal_tool` | — | 终止工具名称 |\n| `required_steps` | None | 必须按顺序执行的步骤列表 |\n| `max_retries` | 3 | 最大重试次数 |\n| `max_tool_errors` | 2 | 最大工具错误数 |\n| `rescue_enabled` | True | 启用救援解析 |\n| `max_premature_attempts` | 3 | 提前终止尝试次数上限 |\n| `retry_nudge` | None | 自定义重试提示函数 |\n\n#### CheckResult 动作\n\n| 动作 | 说明 |\n|------|------|\n| `proceed` | 继续执行工具调用 |\n| `retry` | 需要重试并显示 nudge 提示 |\n| `step_blocked` | 步骤被阻止，等待正确步骤 |\n| `fatal` | 致命错误，终止工作流 |\n\n### 5. LLM 后端适配器\n\nForge 支持多种 LLM 后端，通过统一接口抽象差异：\n\n```mermaid\ngraph TD\n    subgraph 后端适配器\n        OC[OllamaClient]\n        LFC[LlamafileClient]\n        ANC[AnthropicClient]\n        PC[ProxyClient]\n    end\n\n    subgraph 公共接口\n        PI[LLMClient 接口]\n        GC[generate]\n        GX[get_context_length]\n    end\n\n    OC --> PI\n    LFC --> PI\n    ANC --> PI\n    PC --> PI\n```\n\n#### 支持的后端\n\n| 后端 | 配置文件 | 说明 |\n|------|----------|------|\n| Ollama | `model` 参数 | 本地模型服务 |\n| Llamafile | `gguf_path` 参数 | 单文件 GGUF 格式 |\n| Llama Server | `gguf_path` 参数 | llama.cpp 服务器 |\n| Anthropic | API Key | Claude 系列模型 |\n\n资料来源：[src/forge/server.py:90-110]()\n\n### 6. ServerManager（服务器管理器）\n\n`ServerManager` 负责后端进程的生命周期管理：\n\n```mermaid\nstateDiagram-v2\n    [*] --> Idle: 创建实例\n    Idle --> Running: start()\n    Running --> Idle: stop()\n    Running --> Running: start() (不同模型)\n    \n    note right of Running: 自动复用相同配置\n    note right of Idle: 进程已终止\n```\n\n#### 缓存优化机制\n\nServerManager 会缓存当前运行的配置，仅在配置变更时重启服务器：\n\n```python\n# 配置相等性检查\nif (\n    self._current_model == model\n    and self._current_mode == mode\n    and self._current_ctx == ctx_override\n    and self._current_flags == flags\n):\n    return  # 复用现有服务器\n```\n\n资料来源：[src/forge/server.py:40-50]()\n\n#### Budget 模式解析\n\n```mermaid\nflowchart LR\n    A[BudgetMode] --> B{MANUAL?}\n    A --> C{OLLAMA?}\n    A --> D{FORGE_FAST?}\n    A --> E{FULL?}\n    \n    B -->|是| F[使用 manual_tokens]\n    C -->|是| G[获取 ollama 上下文]\n    D --> H[计算 fast 预算]\n    E --> I[获取服务器完整上下文]\n```\n\n## 数据流图\n\n```mermaid\nflowchart TD\n    subgraph 输入层\n        U[用户输入]\n        W[Workflow 定义]\n        T[工具实现]\n    end\n\n    subgraph 核心引擎\n        R[WorkflowRunner]\n        G[Guardrails]\n        M[ContextManager]\n    end\n\n    subgraph LLM 层\n        C[LLMClient]\n        S[ServerManager]\n    end\n\n    U --> R\n    W --> R\n    T --> R\n    R --> C\n    C --> S\n    R --> G\n    R --> M\n    \n    M -->|压缩消息| C\n    G -->|验证结果| R\n```\n\n## 快速启动示例\n\n```python\nimport asyncio\nfrom pydantic import BaseModel, Field\nfrom forge import (\n    Workflow, ToolDef, ToolSpec,\n    WorkflowRunner, OllamaClient,\n    ContextManager, TieredCompact,\n)\n\ndef get_weather(city: str) -> str:\n    return f\"72°F and sunny in {city}\"\n\nclass GetWeatherParams(BaseModel):\n    city: str = Field(description=\"City name\")\n\nworkflow = Workflow(\n    name=\"weather\",\n    description=\"Look up weather for a city.\",\n    tools={\n        \"get_weather\": ToolDef(\n            spec=ToolSpec(\n                name=\"get_weather\",\n                description=\"Get current weather\",\n                parameters=GetWeatherParams,\n            ),\n            callable=get_weather,\n        ),\n    },\n    required_steps=[],\n    terminal_tool=\"get_weather\",\n    system_prompt_template=\"You are a helpful assistant. Use the available tools to answer the user.\",\n)\n\nasync def main():\n    client = OllamaClient(\n        model=\"ministral-3:8b-instruct-2512-q4_K_M\",\n        recommended_sampling=True\n    )\n    ctx = ContextManager(\n        strategy=TieredCompact(keep_recent=2),\n        budget_tokens=8192\n    )\n    runner = WorkflowRunner(client=client, context_manager=ctx)\n    await runner.run(workflow, \"What's the weather in Paris?\")\n\nasyncio.run(main())\n```\n\n资料来源：[README.md:7-42]()\n\n## 扩展机制\n\n### 添加新 LLM 后端\n\n1. 在 `src/forge/clients/` 目录创建新的客户端类\n2. 实现统一的异步接口方法\n3. 注册到客户端注册表\n\n### 添加 Guardrail\n\n1. 在 `Guardrails` 类中添加新的检查逻辑\n2. 在 `AblationConfig` 中添加开关\n3. 创建消融实验预设进行验证\n\n资料来源：[CONTRIBUTING.md:14-22]()\n\n## 总结\n\nForge 的架构围绕三个核心目标设计：\n\n| 目标 | 实现方式 |\n|------|----------|\n| **可靠性** | Guardrails 确保工具调用正确性和工作流完整性 |\n| **效率** | TieredCompact 上下文压缩保持在 token 预算内 |\n| **可扩展性** | 插件化客户端支持多种 LLM 后端 |\n\n整个系统基于异步架构设计，所有核心组件（Runner、Client、ServerManager）均支持并发执行，确保在多任务场景下的高性能表现。\n\n---\n\n<a id='page-core-components'></a>\n\n## 核心组件详解\n\n### 相关页面\n\n相关主题：[架构概述](#page-architecture), [上下文管理](#page-context-management), [SlotWorker 槽位调度](#page-slot-worker)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [src/forge/core/runner.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/core/runner.py)\n- [src/forge/core/steps.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/core/steps.py)\n- [src/forge/core/workflow.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/core/workflow.py)\n- [src/forge/core/messages.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/core/messages.py)\n- [src/forge/context/manager.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/context/manager.py)\n- [src/forge/guardrails/guardrails.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/guardrails/guardrails.py)\n- [src/forge/server.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/server.py)\n</details>\n\n# 核心组件详解\n\n## 概述\n\nForge 是一个基于 LLM 的工具调用框架，其核心组件负责管理对话工作流、执行步骤追踪、上下文管理和后端服务调度。整个系统采用异步架构设计，通过 `WorkflowRunner` 协调各个组件完成复杂的多轮对话任务。\n\n核心组件按照职责划分为以下几个模块：\n\n| 模块 | 文件路径 | 主要职责 |\n|------|----------|----------|\n| 工作流引擎 | `src/forge/core/runner.py` | 执行 Workflow、管理会话生命周期 |\n| 步骤追踪 | `src/forge/core/steps.py` | 追踪必需步骤和前置依赖 |\n| 消息管理 | `src/forge/core/messages.py` | 管理对话历史和消息结构 |\n| 上下文管理 | `src/forge/context/manager.py` | 上下文压缩和预算控制 |\n| 守卫检查 | `src/forge/guardrails/guardrails.py` | 响应验证和重试机制 |\n| 服务管理 | `src/forge/server.py` | LLM 后端服务的生命周期管理 |\n\n资料来源：[src/forge/core/workflow.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/core/workflow.py)\n\n---\n\n## 工作流引擎 (WorkflowRunner)\n\n### 架构概述\n\n`WorkflowRunner` 是 Forge 框架的核心执行引擎，负责协调整个对话工作流的生命周期。它接收用户输入、与 LLM 后端交互、执行工具调用，并管理上下文预算。\n\n```mermaid\ngraph TD\n    A[用户输入] --> B[WorkflowRunner.run]\n    B --> C[初始化 StepTracker]\n    C --> D[LLM 推理]\n    D --> E{Guardrails 检查}\n    E -->|通过| F{工具调用?}\n    E -->|失败| G[重试或终止]\n    F -->|是| H[执行工具]\n    F -->|否| I[返回结果]\n    H --> J[上下文压缩]\n    J --> D\n    I --> K[返回最终响应]\n```\n\n资料来源：[src/forge/core/runner.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/core/runner.py)\n\n### 核心数据结构\n\n#### Workflow\n\n`Workflow` 是工作流的定义容器，包含工具配置、必需步骤和终端条件：\n\n```python\n@dataclass\nclass Workflow:\n    name: str                          # 工作流名称\n    description: str                   # 描述\n    tools: dict[str, ToolDef]          # 工具定义字典\n    required_steps: list[str]          # 必需执行步骤列表\n    terminal_tool: str | frozenset[str] # 终端工具名称\n    system_prompt_template: str        # 系统提示模板\n```\n\n资料来源：[src/forge/core/workflow.py:1-50](https://github.com/antoinezambelli/forge/blob/main/src/forge/core/workflow.py)\n\n#### ToolDef\n\n`ToolDef` 将工具模式绑定到具体实现：\n\n```python\n@dataclass\nclass ToolDef:\n    \"\"\"绑定工具模式到其实现\"\"\"\n    spec: ToolSpec                     # 工具规格\n    callable: Callable[..., Any]       # 可调用实现\n    prerequisites: list[str | dict[str, str]] = field(default_factory=list)\n```\n\n前置条件 (`prerequisites`) 支持两种形式：\n- **字符串形式**：名称匹配（`\"read_file\"` — 任何对 `read_file` 的调用都满足）\n- **字典形式**：参数匹配（`{\"tool\": \"read_file\", \"match_arg\": \"path\"}` — 需要相同 `path` 参数的调用）\n\n资料来源：[src/forge/core/workflow.py:85-100](https://github.com/antoinezambelli/forge/blob/main/src/forge/core/workflow.py)\n\n#### ToolSpec\n\n`ToolSpec` 定义工具的外部接口规范：\n\n```python\nclass ToolSpec(BaseModel):\n    \"\"\"LLM 看到的工具规格\"\"\"\n    name: str\n    description: str\n    parameters: type[BaseModel]  # Pydantic 模型\n```\n\n资料来源：[src/forge/core/workflow.py:20-30](https://github.com/antoinezambelli/forge/blob/main/src/forge/core/workflow.py)\n\n---\n\n## 步骤追踪系统 (StepTracker)\n\n### 功能概述\n\n`StepTracker` 负责追踪必需步骤的完成状态和工具执行历史，用于强制执行前置条件约束。该组件独立于消息历史存在，上下文压缩不会影响步骤完成状态。\n\n```mermaid\ngraph LR\n    A[ToolCall] --> B{检查前置条件}\n    B -->|满足| C[记录执行]\n    B -->|不满足| D[返回错误]\n    C --> E[更新 completed_steps]\n    C --> F[更新 executed_tools]\n```\n\n资料来源：[src/forge/core/steps.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/core/steps.py)\n\n### 核心数据结构\n\n| 类/数据结构 | 职责 |\n|------------|------|\n| `StepTracker` | 追踪已完成步骤和已执行工具 |\n| `PrerequisiteCheck` | 前置条件检查结果 |\n\n#### StepTracker\n\n```python\n@dataclass\nclass StepTracker:\n    \"\"\"追踪必需步骤完成状态和工具执行历史\"\"\"\n    required_steps: list[str]\n    completed_steps: dict[str, None] = field(default_factory=dict)\n    executed_tools: dict[str, list[dict[str, Any]]] = field(default_factory=dict)\n\n    def record(self, tool_name: str, args: dict[str, Any] | None = None) -> None:\n        \"\"\"记录成功的工具执行\"\"\"\n        self.completed_steps[tool_name] = None\n        self.executed_tools.setdefault(tool_name, []).append(args or {})\n\n    def is_satisfied(self) -> bool:\n        \"\"\"所有必需步骤是否都已调用\"\"\"\n        return all(s in self.completed_steps for s in self.required_steps)\n```\n\n#### PrerequisiteCheck\n\n```python\n@dataclass\nclass PrerequisiteCheck:\n    \"\"\"工具调用的前置条件检查结果\"\"\"\n    satisfied: bool\n    missing: list[str]  # 未满足的前置工具名列表\n```\n\n资料来源：[src/forge/core/steps.py:1-50](https://github.com/antoinezambelli/forge/blob/main/src/forge/core/steps.py)\n\n---\n\n## 上下文管理器 (ContextManager)\n\n### 功能概述\n\n`ContextManager` 负责管理对话上下文的大小和 token 预算。通过上下文压缩策略，确保长对话不会超出模型的上下文窗口限制。\n\n### 压缩策略\n\n框架支持多种上下文压缩策略，主要包括：\n\n| 策略 | 描述 |\n|------|------|\n| `TieredCompact` | 分层压缩，保留最近 N 条消息 |\n| `KeepRecentStrategy` | 仅保留最近的固定数量消息 |\n\n### 预算模式\n\n| 模式 | 说明 |\n|------|------|\n| `FORGE_FAST` | 快速模式，使用较小的上下文预算 |\n| `FORGE_BALANCED` | 平衡模式 |\n| `MANUAL` | 手动指定 token 数量 |\n\n资料来源：[src/forge/context/manager.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/context/manager.py)\n\n---\n\n## 守卫系统 (Guardrails)\n\n### 架构概述\n\n守卫系统通过多层检查确保 LLM 响应符合预期，包括响应验证、前置步骤强制和错误追踪。\n\n```mermaid\ngraph TD\n    A[LLM Response] --> B[ResponseValidator]\n    B --> C{格式正确?}\n    C -->|是| D[StepEnforcer]\n    C -->|否| E[Rescue Parser]\n    E --> F{解析成功?}\n    F -->|是| D\n    F -->|否| G[Retry Nudge]\n    D --> H{步骤满足?}\n    H -->|是| I[Tool Execution]\n    H -->|否| J[ErrorTracker]\n    G --> K[重新发送 LLM]\n```\n\n资料来源：[src/forge/guardrails/guardrails.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/guardrails/guardrails.py)\n\n### 核心组件\n\n| 组件 | 职责 |\n|------|------|\n| `ResponseValidator` | 验证响应格式和工具调用提取 |\n| `StepEnforcer` | 强制执行必需步骤 |\n| `ErrorTracker` | 追踪重试次数和错误类型 |\n\n### Guardrails 配置参数\n\n| 参数 | 类型 | 默认值 | 说明 |\n|------|------|--------|------|\n| `tool_names` | `list[str]` | 必需 | 可用工具名称列表 |\n| `terminal_tool` | `str \\| frozenset[str]` | 必需 | 终端工具名称 |\n| `required_steps` | `list[str] \\| None` | `None` | 必需步骤列表 |\n| `max_retries` | `int` | `3` | 最大重试次数 |\n| `max_tool_errors` | `int` | `2` | 最大工具错误次数 |\n| `rescue_enabled` | `bool` | `True` | 启用救援解析 |\n| `max_premature_attempts` | `int` | `3` | 过早终止尝试次数 |\n| `retry_nudge` | `Callable[[str], str] \\| None` | `None` | 自定义重试提示 |\n\n资料来源：[src/forge/guardrails/guardrails.py:50-80](https://github.com/antoinezambelli/forge/blob/main/src/forge/guardrails/guardrails.py)\n\n### CheckResult 返回值\n\n`Guardrails.check()` 方法返回 `CheckResult`，包含：\n\n| 字段 | 类型 | 说明 |\n|------|------|------|\n| `status` | `str` | `\"pass\"`, `\"retry\"`, `\"fatal\"` |\n| `message` | `str \\| None` | 状态消息 |\n| `nudge` | `str \\| None` | 重试时发送给 LLM 的提示 |\n\n---\n\n## 服务管理器 (ServerManager)\n\n### 功能概述\n\n`ServerManager` 负责管理 LLM 后端服务的生命周期，包括启动、停止和配置。\n\n### 支持的后端\n\n| 后端 | 配置要求 | 说明 |\n|------|----------|------|\n| `ollama` | `model` 参数 | 使用 Ollama 模型 |\n| `llamaserver` | `gguf_path` 参数 | 本地 GGUF 文件 |\n| `llamafile` | `gguf_path` 参数 | Llamafile 可执行文件 |\n\n### 核心功能\n\n```python\nclass ServerManager:\n    async def start(\n        self,\n        model: str,\n        gguf_path: str | Path,\n        mode: str = \"native\",\n        extra_flags: list[str] | None = None,\n        ctx_override: int | None = None,\n        cache_type_k: str | None = None,\n        cache_type_v: str | None = None,\n        n_slots: int | None = None,\n        kv_unified: bool = False,\n    ) -> None:\n        \"\"\"启动后端服务\"\"\"\n```\n\n### 配置参数\n\n| 参数 | 类型 | 说明 |\n|------|------|------|\n| `model` | `str` | 模型标识符 |\n| `gguf_path` | `str \\| Path` | GGUF 文件路径 |\n| `mode` | `str` | 运行模式 (`native`, `reasoning` 等) |\n| `extra_flags` | `list[str] \\| None` | 额外的 CLI 参数 |\n| `ctx_override` | `int \\| None` | 上下文长度覆盖 |\n| `cache_type_k` | `str \\| None` | KV 缓存键量化类型 |\n| `cache_type_v` | `str \\| None` | KV 缓存值量化类型 |\n| `n_slots` | `int \\| None` | 并发槽位数 |\n| `kv_unified` | `bool` | 统一 KV 缓存 |\n\n资料来源：[src/forge/server.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/server.py)\n\n---\n\n## 消息系统 (Messages)\n\n### 消息类型\n\n框架定义了标准化的消息类型用于通信：\n\n| 消息类型 | 描述 |\n|----------|------|\n| `UserMessage` | 用户输入消息 |\n| `AssistantMessage` | 助手响应消息 |\n| `ToolMessage` | 工具执行结果消息 |\n| `SystemMessage` | 系统级消息 |\n\n### ToolCall 结构\n\n`ToolCall` 是 LLM 返回的标准化工具调用表示：\n\n```python\nclass ToolCall(BaseModel):\n    \"\"\"LLM 返回的已验证工具调用\"\"\"\n    tool: str\n```\n\n资料来源：[src/forge/core/workflow.py:110-115](https://github.com/antoinezambelli/forge/blob/main/src/forge/core/workflow.py)\n\n---\n\n## 组件协作流程\n\n### 完整执行流程\n\n```mermaid\nsequenceDiagram\n    participant User as 用户\n    participant Runner as WorkflowRunner\n    participant LLM as LLM Client\n    participant Step as StepTracker\n    participant Context as ContextManager\n    participant Guard as Guardrails\n    participant Tool as 工具执行\n\n    User->>Runner: run(workflow, user_input)\n    Runner->>Step: 初始化 StepTracker\n    Runner->>Context: 初始化 ContextManager\n    Runner->>LLM: 发送推理请求\n    LLM-->>Runner: LLMResponse\n    Runner->>Guard: check(response)\n    Guard->>Step: 检查前置条件\n    Step-->>Guard: PrerequisiteCheck\n    alt 响应通过\n        Guard-->>Runner: CheckResult.pass\n        Runner->>Tool: 执行工具\n        Tool-->>Runner: 工具结果\n        Runner->>Context: 压缩上下文\n        Runner->>LLM: 发送下一轮\n    else 需要重试\n        Guard-->>Runner: CheckResult.retry + nudge\n        Runner->>LLM: 重试\n    else 致命错误\n        Guard-->>Runner: CheckResult.fatal\n        Runner-->>User: 返回错误\n    end\n```\n\n### 错误恢复机制\n\n| 错误类型 | 处理策略 | 最大重试 |\n|----------|----------|----------|\n| 格式错误 | 触发救援解析 | 由 `max_retries` 控制 |\n| 工具执行失败 | 记录错误，重试 | 由 `max_tool_errors` 控制 |\n| 过早终止 | 发送提示重试 | 由 `max_premature_attempts` 控制 |\n| 超时 | 记录超时错误 | 300s 固定超时 |\n\n---\n\n## 快速开始示例\n\n```python\nimport asyncio\nfrom pydantic import BaseModel, Field\nfrom forge import (\n    Workflow, ToolDef, ToolSpec,\n    WorkflowRunner, OllamaClient,\n    ContextManager, TieredCompact,\n)\n\ndef get_weather(city: str) -> str:\n    return f\"72°F and sunny in {city}\"\n\nclass GetWeatherParams(BaseModel):\n    city: str = Field(description=\"城市名称\")\n\nworkflow = Workflow(\n    name=\"weather\",\n    description=\"查询城市天气\",\n    tools={\n        \"get_weather\": ToolDef(\n            spec=ToolSpec(\n                name=\"get_weather\",\n                description=\"获取当前天气\",\n                parameters=GetWeatherParams,\n            ),\n            callable=get_weather,\n        ),\n    },\n    required_steps=[],\n    terminal_tool=\"get_weather\",\n    system_prompt_template=\"你是一个有帮助的助手。\",\n)\n\nasync def main():\n    client = OllamaClient(model=\"ministral-3:8b-instruct-2512-q4_K_M\", recommended_sampling=True)\n    ctx = ContextManager(strategy=TieredCompact(keep_recent=2), budget_tokens=8192)\n    runner = WorkflowRunner(client=client, context_manager=ctx)\n    await runner.run(workflow, \"巴黎天气怎么样？\")\n\nasyncio.run(main())\n```\n\n资料来源：[README.md](https://github.com/antoinezambelli/forge/blob/main/README.md)\n\n---\n\n## 总结\n\nForge 框架的核心组件通过清晰的职责划分和异步架构，提供了强大的 LLM 工具调用能力：\n\n- **WorkflowRunner** 作为中央协调器，管理整个执行生命周期\n- **StepTracker** 确保必需步骤的执行顺序和前置条件满足\n- **ContextManager** 处理长对话的上下文压缩\n- **Guardrails** 提供多层次的响应验证和错误恢复\n- **ServerManager** 抽象了不同 LLM 后端的差异\n\n这些组件协同工作，使得 Forge 能够可靠地执行复杂的多轮对话任务。\n\n---\n\n<a id='page-workflow-internals'></a>\n\n## 工作流内部机制\n\n### 相关页面\n\n相关主题：[核心组件详解](#page-core-components), [内置工具系统](#page-tools)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [src/forge/core/workflow.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/core/workflow.py)\n- [src/forge/core/steps.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/core/steps.py)\n- [src/forge/guardrails/guardrails.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/guardrails/guardrails.py)\n- [src/forge/prompts/nudges.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/prompts/nudges.py)\n- [examples/foreign_loop.py](https://github.com/antoinezambelli/forge/blob/main/examples/foreign_loop.py)\n</details>\n\n# 工作流内部机制\n\n## 概述\n\nForge 的工作流系统是一个异步优先的 LLM 工具调用编排框架，核心设计目标是将复杂的 LLM 交互与工具执行解耦。工作流由 `Workflow` 类定义，封装了工具定义、步骤要求、终端工具等元信息，通过 `WorkflowRunner` 驱动执行。\n\n工作流机制包含三个核心子系统：\n\n1. **工具定义与绑定系统** — 定义可用工具及其参数模式\n2. **步骤追踪与前置条件系统** — 确保工具按正确顺序调用\n3. **Guardrails 保护系统** — 验证响应、处理重试、阻止过早终止\n\n资料来源：[src/forge/core/workflow.py:1-50]()\n\n## 核心数据模型\n\n### Workflow 类\n\n`Workflow` 是工作流的核心抽象，作为 dataclass 存储所有元信息。\n\n```python\n@dataclass\nclass Workflow:\n    name: str\n    description: str\n    tools: dict[str, ToolDef]\n    required_steps: list[str]\n    terminal_tool: str\n    system_prompt_template: str\n```\n\n| 属性 | 类型 | 说明 |\n|------|------|------|\n| `name` | `str` | 工作流唯一标识名 |\n| `description` | `str` | 工作流功能描述 |\n| `tools` | `dict[str, ToolDef]` | 工具名称到定义的映射 |\n| `required_steps` | `list[str]` | 必须按顺序调用的工具列表 |\n| `terminal_tool` | `str` | 结束工作流的工具名 |\n| `system_prompt_template` | `str` | 系统提示模板 |\n\n资料来源：[src/forge/core/workflow.py:60-80]()\n\n### ToolDef 与 ToolSpec\n\n`ToolDef` 将工具模式绑定到实际可执行函数，`ToolSpec` 定义工具的 JSON Schema 参数规范。\n\n```python\n@dataclass\nclass ToolDef:\n    spec: ToolSpec\n    callable: Callable[..., Any]\n    prerequisites: list[str | dict[str, str]] = field(default_factory=list)\n```\n\n`prerequisites` 字段支持两种前置条件表达方式：\n\n| 类型 | 示例 | 含义 |\n|------|------|------|\n| `str` | `\"read_file\"` | 调用过任意参数的 read_file 即可 |\n| `dict` | `{\"tool\": \"read_file\", \"match_arg\": \"path\"}` | 必须调用过 path 参数相同的 read_file |\n\n资料来源：[src/forge/core/workflow.py:85-110]()\n\n### ToolCall 数据模型\n\n`ToolCall` 是 LLM 返回的工具调用验证结果。\n\n```python\nclass ToolCall(BaseModel):\n    tool: str\n    args: dict[str, Any] = {}\n    reasoning: str | None = None\n```\n\n| 字段 | 类型 | 说明 |\n|------|------|------|\n| `tool` | `str` | 工具名称 |\n| `args` | `dict[str, Any]` | 工具参数 |\n| `reasoning` | `str \\| None` | 推理过程（用于带 thinking 的模型） |\n\n资料来源：[src/forge/core/workflow.py:120-130]()\n\n## 步骤追踪机制\n\n### StepTracker\n\n`StepTracker` 维护在工作流运行器上，独立于消息历史，紧跟在上下文压缩之外。\n\n```python\n@dataclass\nclass StepTracker:\n    required_steps: list[str]\n    completed_steps: dict[str, None] = field(default_factory=dict)\n    executed_tools: dict[str, list[dict[str, Any]]] = field(default_factory=dict)\n```\n\n| 方法 | 功能 |\n|------|------|\n| `record(tool_name, args)` | 记录工具已执行及参数 |\n| `is_satisfied()` | 检查所有必需步骤是否完成 |\n| `pending()` | 返回未完成步骤列表 |\n\n资料来源：[src/forge/core/steps.py:30-55]()\n\n### PrerequisiteCheck\n\n前置条件检查结果用于判断工具调用是否满足依赖要求。\n\n```python\n@dataclass\nclass PrerequisiteCheck:\n    satisfied: bool\n    missing: list[str]\n```\n\n资料来源：[src/forge/core/steps.py:15-25]()\n\n## Guardrails 系统\n\nGuardrails 是 Forge 的保护中间件，将三个独立检查器组合成统一的两阶段 API：\n\n```mermaid\ngraph LR\n    A[LLM Response] --> B[Guardrails.check]\n    B --> C{ResponseValidator}\n    B --> D{StepEnforcer}\n    B --> E{ErrorTracker}\n    C --> F[CheckResult]\n    D --> F\n    E --> F\n```\n\n### CheckResult 结果类型\n\n```python\naction: Literal[\"execute\", \"retry\", \"step_blocked\", \"fatal\"]\ntool_calls: list[ToolCall] | None\nnudge: Nudge | None\nreason: str | None\n```\n\n| Action | 含义 | 后续处理 |\n|--------|------|----------|\n| `execute` | 响应有效，可执行工具 | 执行 tool_calls |\n| `retry` | 响应无效需重试 | 注入 nudge 后重新请求 |\n| `step_blocked` | 违反步骤顺序 | 注入 nudge 后重新请求 |\n| `fatal` | 达到最大重试次数 | 终止工作流 |\n\n资料来源：[src/forge/guardrails/guardrails.py:140-160]()\n\n### ResponseValidator\n\n验证 LLM 响应是否包含有效工具调用，支持救援解析。\n\n```python\nclass ResponseValidator:\n    def __init__(\n        self,\n        tool_names: list[str],\n        rescue_enabled: bool = True,\n        retry_nudge_fn: Callable[[str], str] | None = None,\n    )\n```\n\n| 参数 | 默认值 | 说明 |\n|------|--------|------|\n| `tool_names` | — | 有效工具名列表 |\n| `rescue_enabled` | `True` | 是否启用救援解析 |\n| `retry_nudge_fn` | `None` | 自定义重试提示生成器 |\n\n救援解析支持以下格式：\n- OpenAI 格式：`{\"name\": ..., \"arguments\": ...}`\n- Qwen Coder XML 格式：`<function=name><parameter=key>value</parameter></function>`\n\n资料来源：[src/forge/guardrails/guardrails.py:40-80]()\n\n### StepEnforcer\n\n强制执行步骤顺序，防止过早调用终端工具。\n\n```python\nclass StepEnforcer:\n    def __init__(\n        self,\n        required_steps: list[str],\n        terminal_tools: frozenset[str],\n        max_premature_attempts: int = 3,\n    )\n```\n\n超过 `max_premature_attempts` 次过早终止尝试后返回 `fatal`。\n\n资料来源：[src/forge/guardrails/guardrails.py:80-110]()\n\n### ErrorTracker\n\n追踪连续错误和工具执行失败。\n\n```python\nclass ErrorTracker:\n    def __init__(\n        self,\n        max_retries: int = 3,\n        max_tool_errors: int = 2,\n    )\n```\n\n| 参数 | 默认值 | 说明 |\n|------|--------|------|\n| `max_retries` | `3` | 连续错误响应后终止 |\n| `max_tool_errors` | `2` | 连续工具执行失败后终止 |\n\n资料来源：[src/forge/guardrails/guardrails.py:110-140]()\n\n## 提示模板与 Nudge\n\n### 步骤阻止提示\n\n当模型尝试在完成必需步骤前调用终端工具时，系统注入阻止提示：\n\n```python\ndef step_blocked_nudge(terminal_tool, pending_steps, tier=1):\n    tier = max(1, min(3, tier))\n    steps = \", \".join(pending_steps)\n    if tier == 1:\n        return f\"You cannot call {terminal_tool} yet. You must first complete these required steps: {steps}.\"\n```\n\n| 层级 | 语气强度 | 使用场景 |\n|------|----------|----------|\n| 1 | 礼貌提示 | 首次违规 |\n| 2 | 直接命令 | 第二次违规 |\n| 3 | 强烈警告 | 第三次违规 |\n\n资料来源：[src/forge/prompts/nudges.py:1-30]()\n\n### 前置条件提示\n\n当工具调用缺少前置依赖时触发：\n\n```python\ndef prerequisite_nudge(tool_name, missing_prereqs):\n    prereqs = \", \".join(missing_prereqs)\n    return f\"You cannot call {tool_name} yet. You must first call: {prereqs}.\"\n```\n\n资料来源：[src/forge/prompts/nudges.py:35-55]()\n\n## 工作流执行流程\n\n### 完整执行状态机\n\n```mermaid\ngraph TD\n    A[初始化 WorkflowRunner] --> B[构建系统提示]\n    B --> C[发送初始请求到 LLM]\n    C --> D[接收 LLM 响应]\n    D --> E{Guardrails.check}\n    E -->|execute| F[执行工具]\n    E -->|retry| G[注入 nudge]\n    E -->|step_blocked| G\n    E -->|fatal| H[终止工作流]\n    F --> I{工具为终端工具?}\n    I -->|是| J[返回最终结果]\n    I -->|否| K[记录步骤]\n    K --> C\n    G --> C\n```\n\n### 分阶段 API 用法\n\n对于外部编排循环，Forge 提供两种使用方式：\n\n**简化 API（全部集成）**\n```python\nfrom forge.guardrails import Guardrails\n\nguardrails = Guardrails(\n    tool_names=[\"search\", \"lookup\", \"answer\"],\n    required_steps=[\"search\", \"lookup\"],\n    terminal_tool=\"answer\",\n)\n\nresult = guardrails.check(response)\nif result.action == \"execute\":\n    executed = [tc.tool for tc in result.tool_calls]\n    done = guardrails.record(executed)\n```\n\n**分阶段 API（细粒度控制）**\n```python\nfrom forge.guardrails import ErrorTracker, ResponseValidator, StepEnforcer\n\nvalidator = ResponseValidator(tool_names=[...], rescue_enabled=True)\nenforcer = StepEnforcer(required_steps=[...], terminal_tool=\"answer\")\nerrors = ErrorTracker(max_retries=3, max_tool_errors=2)\n\n# 每阶段可自定义处理逻辑\n```\n\n资料来源：[examples/foreign_loop.py:1-80]()\n\n## 快速开始示例\n\n```python\nfrom pydantic import BaseModel, Field\nfrom forge import (\n    Workflow, ToolDef, ToolSpec,\n    WorkflowRunner, OllamaClient,\n    ContextManager, TieredCompact,\n)\n\nclass GetWeatherParams(BaseModel):\n    city: str = Field(description=\"City name\")\n\ndef get_weather(city: str) -> str:\n    return f\"72°F and sunny in {city}\"\n\nworkflow = Workflow(\n    name=\"weather\",\n    description=\"Look up weather for a city.\",\n    tools={\n        \"get_weather\": ToolDef(\n            spec=ToolSpec(\n                name=\"get_weather\",\n                description=\"Get current weather\",\n                parameters=GetWeatherParams,\n            ),\n            callable=get_weather,\n        ),\n    },\n    required_steps=[],\n    terminal_tool=\"get_weather\",\n    system_prompt_template=\"You are a helpful assistant.\",\n)\n\nasync def main():\n    client = OllamaClient(model=\"ministral-3:8b-instruct-2512-q4_K_M\")\n    ctx = ContextManager(strategy=TieredCompact(keep_recent=2), budget_tokens=8192)\n    runner = WorkflowRunner(client=client, context_manager=ctx)\n    await runner.run(workflow, \"What's the weather in Paris?\")\n```\n\n资料来源：[README.md:1-50]()\n\n## 架构设计要点\n\n### 异步优先设计\n\n所有客户端方法和运行器均为 `async` 实现，支持高并发工具调用场景：\n\n```python\nasync def run(self, workflow: Workflow, user_input: str) -> WorkflowResult:\n    ...\n```\n\n资料来源：[src/forge/core/workflow.py:1-30]()\n\n### 上下文管理隔离\n\n`ContextManager` 独立于步骤追踪，允许上下文压缩不影响步骤完成状态：\n\n```mermaid\ngraph LR\n    subgraph WorkflowRunner\n        A[StepTracker] --- B[ContextManager]\n    end\n    B --- C[消息历史]\n    A --- D[completed_steps]\n```\n\n### 工具参数模式\n\n使用 Pydantic 模型定义工具参数，确保类型安全：\n\n```python\nparameters: type[BaseModel]  # 必须是 Pydantic 模型\n```\n\n可通过 `get_json_schema()` 方法导出 JSON Schema：\n\n```python\ndef get_json_schema(self) -> dict[str, Any]:\n    return self.parameters.model_json_schema()\n```\n\n资料来源：[src/forge/core/workflow.py:50-65]()\n\n## 总结\n\nForge 的工作流机制通过清晰的职责分离实现了可靠的 LLM 工具调用编排：\n\n| 组件 | 职责 |\n|------|------|\n| `Workflow` | 定义工作流元信息和工具集合 |\n| `StepTracker` | 追踪步骤完成状态和前置条件 |\n| `Guardrails` | 统一验证、重试、阻止逻辑 |\n| `WorkflowRunner` | 协调执行流程和上下文管理 |\n\n该设计支持从简单的单工具调用到复杂的多步骤工作流，同时为外部编排系统提供了灵活的中间件接口。\n\n---\n\n<a id='page-guardrails'></a>\n\n## Guardrails 系统\n\n### 相关页面\n\n相关主题：[工作流内部机制](#page-workflow-internals), [核心组件详解](#page-core-components)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [src/forge/guardrails/__init__.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/guardrails/__init__.py)\n- [src/forge/guardrails/response_validator.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/guardrails/response_validator.py)\n- [src/forge/guardrails/step_enforcer.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/guardrails/step_enforcer.py)\n- [src/forge/guardrails/error_tracker.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/guardrails/error_tracker.py)\n- [src/forge/guardrails/nudge.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/guardrails/nudge.py)\n- [src/forge/guardrails/guardrails.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/guardrails/guardrails.py)\n- [src/forge/prompts/nudges.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/prompts/nudges.py)\n</details>\n\n# Guardrails 系统\n\n## 概述\n\nGuardrails 系统是 Forge 框架的核心安全与流程控制中间件，设计用于在外部编排循环中确保 LLM 响应符合预期的工作流约束。该系统通过三重验证机制（响应验证、步骤强制、错误追踪）保护工具调用流程的完整性和可靠性。 资料来源：[src/forge/guardrails/guardrails.py:46-52]()\n\n## 系统架构\n\nGuardrails 系统由四个核心组件构成，它们协同工作以实现完整的中间件功能：\n\n```mermaid\ngraph TB\n    subgraph Guardrails系统\n        RV[ResponseValidator<br/>响应验证器]\n        SE[StepEnforcer<br/>步骤强制器]\n        ET[ErrorTracker<br/>错误追踪器]\n        NR[Nudge模块<br/>提示生成]\n    end\n    \n    LLM[LLM响应] --> RV\n    RV --> SE\n    SE --> ET\n    NR --> SE\n    \n    subgraph CheckResult\n        AC[action]\n        TC[tool_calls]\n        ND[nudge]\n        RS[reason]\n    end\n    \n    ET --> CheckResult\n```\n\n| 组件 | 职责 | 文件位置 |\n|------|------|----------|\n| ResponseValidator | 解析 LLM 响应，提取工具调用，处理文本救援 | `response_validator.py` |\n| StepEnforcer | 强制执行必需步骤顺序，阻止提前终止 | `step_enforcer.py` |\n| ErrorTracker | 追踪连续重试和工具错误次数 | `error_tracker.py` |\n| Nudge | 生成用户友好的引导提示消息 | `nudge.py` / `prompts/nudges.py` |\n\n资料来源：[src/forge/guardrails/guardrails.py:24-45]()\n\n## CheckResult 数据模型\n\n`check()` 方法返回 `CheckResult` 对象，包含四个字段：\n\n| 字段 | 类型 | 说明 |\n|------|------|------|\n| `action` | `Literal[\"execute\", \"retry\", \"step_blocked\", \"fatal\"]` | 下一步操作指令 |\n| `tool_calls` | `list[ToolCall] \\| None` | 提取的工具调用列表 |\n| `nudge` | `Nudge \\| None` | 注入的提示消息（retry/step_blocked 时设置） |\n| `reason` | `str \\| None` | 人类可读的解释（仅 fatal 时设置） |\n\n```python\nclass CheckResult(BaseModel):\n    action: Literal[\"execute\", \"retry\", \"step_blocked\", \"fatal\"]\n    tool_calls: list[ToolCall] | None = None\n    nudge: Nudge | None = None\n    reason: str | None = None\n```\n\n资料来源：[src/forge/guardrails/guardrails.py:67-74]()\n\n## Guardrails 核心 API\n\n### 构造函数参数\n\n| 参数 | 类型 | 默认值 | 说明 |\n|------|------|--------|------|\n| `tool_names` | `list[str]` | 必需 | 有效工具名称列表 |\n| `terminal_tool` | `str \\| frozenset[str]` | 必需 | 可结束工作流的工具 |\n| `required_steps` | `list[str] \\| None` | `None` | 终端工具前必须调用的步骤 |\n| `max_retries` | `int` | `3` | 连续错误响应后返回 fatal 的阈值 |\n| `max_tool_errors` | `int` | `2` | 工具执行失败后耗尽的阈值 |\n| `rescue_enabled` | `bool` | `True` | 是否启用文本救援解析 |\n| `max_premature_attempts` | `int` | `3` | 提前调用终端工具的次数上限 |\n| `retry_nudge` | `Callable[[str], str] \\| None` | `None` | 自定义重试提示生成器 |\n\n资料来源：[src/forge/guardrails/guardrails.py:76-91]()\n\n### 主要方法\n\n#### check() 方法\n\n在每次 LLM 响应后、执行任何工具前调用此方法。它依次通过三个验证组件：\n\n```mermaid\nsequenceDiagram\n    participant App as 调用方\n    participant G as Guardrails\n    participant RV as ResponseValidator\n    participant SE as StepEnforcer\n    participant ET as ErrorTracker\n    \n    App->>G: check(response)\n    G->>RV: validate(response)\n    RV-->>G: parsed_tool_calls\n    G->>SE: enforce(tool_calls)\n    SE-->>G: step_result\n    G->>ET: track_errors()\n    ET-->>G: error_status\n    G-->>App: CheckResult\n```\n\n资料来源：[src/forge/guardrails/guardrails.py:93-126]()\n\n#### record() 方法\n\n在工具执行完成后调用，用于更新错误追踪器状态：\n\n```python\ndef record(self, executed: list[str]) -> bool:\n    \"\"\"Record executed tools and check if workflow is done.\n    \n    Returns:\n        True if the terminal tool was reached (workflow complete).\n    \"\"\"\n    self._errors.tool_succeeded()\n    return self._enforcer.is_terminal_reached(executed)\n```\n\n资料来源：[src/forge/guardrails/guardrails.py:128-140]()\n\n## 组件详解\n\n### ResponseValidator（响应验证器）\n\n负责解析 LLM 返回的原始响应，提取结构化工具调用，并处理救援解析场景：\n\n- **工具调用提取**：从 JSON 格式或特定文本格式中提取工具名和参数\n- **救援解析**：当模型输出纯文本而非工具调用时，可配置地尝试恢复\n- **自定义重试提示**：支持通过 `retry_nudge_fn` 生成动态重试消息\n\n资料来源：[src/forge/guardrails/response_validator.py]()\n\n### StepEnforcer（步骤强制器）\n\n确保工作流按正确的步骤顺序执行：\n\n```mermaid\ngraph LR\n    A[search] --> B[lookup]\n    B --> C[answer]\n    \n    style A fill:#90EE90\n    style B fill:#90EE90\n    style C fill:#FFD700\n```\n\n**关键特性**：\n\n- 验证 `required_steps` 列表中的所有工具已被调用\n- 检测提前调用 `terminal_tool` 的情况（`max_premature_attempts` 控制）\n- 支持通过 `is_terminal_reached()` 判断工作流是否完成\n\n资料来源：[src/forge/guardrails/step_enforcer.py]()\n\n### ErrorTracker（错误追踪器）\n\n维护错误状态机，防止无限重试循环：\n\n| 错误类型 | 计数器 | 达到上限后果 |\n|----------|--------|--------------|\n| 连续重试 | `_consecutive_retries` | `check()` 返回 `\"fatal\"` |\n| 工具错误 | `_consecutive_tool_errors` | 工作流标记为\"耗尽\" |\n\n当工具成功执行时，两个计数器都会重置。\n\n资料来源：[src/forge/guardrails/error_tracker.py]()\n\n## Nudge 提示系统\n\nNudge 模块负责生成用户友好的引导消息，帮助模型理解并修正其行为。\n\n### step_nudge() 函数\n\n当模型试图跳过必需步骤直接调用终端工具时触发：\n\n| tier 值 | 语气强度 | 示例消息 |\n|---------|----------|----------|\n| 1 | 礼貌 | \"You cannot call answer yet. You must first complete these required steps: search, lookup.\" |\n| 2 | 直接 | \"You must call one of these tools now: search, lookup. Pick one.\" |\n| 3 | 强制 | \"STOP. You MUST call one of: search, lookup. Do NOT call answer.\" |\n\n```python\ndef step_nudge(\n    terminal_tool: str,\n    pending_steps: list[str],\n    tier: int = 1,\n) -> str:\n    tier = max(1, min(3, tier))  # 限制在 1-3 范围内\n```\n\n资料来源：[src/forge/prompts/nudges.py:18-40]()\n\n### prerequisite_nudge() 函数\n\n当模型调用带有前置依赖的工具但未满足前置条件时触发：\n\n```python\ndef prerequisite_nudge(tool_name: str, missing_prereqs: list[str]) -> str:\n    prereqs = \", \".join(missing_prereqs)\n    return (\n        f\"You cannot call {tool_name} yet. \"\n        f\"You must first call: {prereqs}. \"\n        \"Call the prerequisite tool now.\"\n    )\n```\n\n资料来源：[src/forge/prompts/nudges.py:42-56]()\n\n## 使用模式\n\n### 简化 API（推荐）\n\n使用 `Guardrails` 类进行一站式验证：\n\n```python\nfrom forge.guardrails import Guardrails\n\nguardrails = Guardrails(\n    tool_names=[\"search\", \"lookup\", \"answer\"],\n    required_steps=[\"search\", \"lookup\"],\n    terminal_tool=\"answer\",\n)\n\ndef handle_response(response):\n    result = guardrails.check(response)\n    \n    if result.action == \"fatal\":\n        return f\"FATAL: {result.reason}\"\n    \n    if result.action in (\"retry\", \"step_blocked\"):\n        # 将 nudge.content 注入对话历史\n        return f\"{result.action}: {result.nudge.content}\"\n    \n    # 执行工具\n    executed = [tc.tool for tc in result.tool_calls]\n    done = guardrails.record(executed)\n    return f\"executed {executed}\" + (\" -- DONE\" if done else \"\")\n```\n\n资料来源：[examples/foreign_loop.py:19-40]()\n\n### 粒度 API（高级）\n\n直接访问各个验证组件，实现自定义行为：\n\n```python\nfrom forge.guardrails import ErrorTracker, ResponseValidator, StepEnforcer\n\nvalidator = ResponseValidator(\n    tool_names=[\"search\", \"lookup\", \"answer\"],\n    rescue_enabled=True,\n)\nenforcer = StepEnforcer(\n    required_steps=[\"search\", \"lookup\"],\n    terminal_tool=\"answer\",\n)\nerrors = ErrorTracker(max_retries=3, max_tool_errors=2)\n```\n\n适用于需要日志记录、指标收集或条件性救援的场景。\n\n资料来源：[examples/foreign_loop.py:52-66]()\n\n### 与 respond 工具集成\n\nForge 支持通过 `respond()` 工具让模型直接返回文本响应：\n\n```python\nfrom forge.tools import RESPOND_TOOL_NAME, respond_spec\n\nguardrails = Guardrails(\n    tool_names=[\"search\", \"lookup\", \"answer\", RESPOND_TOOL_NAME],\n    required_steps=[\"search\", \"lookup\"],\n    terminal_tool=\"answer\",\n)\n\ndef handle_response(response):\n    result = guardrails.check(response)\n    # ...\n    for tc in result.tool_calls:\n        if tc.tool == RESPOND_TOOL_NAME:\n            message = tc.args.get(\"message\", \"\")\n            return f\"MODEL SAYS: {message}\"\n```\n\n资料来源：[examples/foreign_loop.py:95-115]()\n\n## Guardrails 工作流状态图\n\n```mermaid\nstateDiagram-v2\n    [*] --> 等待响应\n    等待响应 --> 验证响应: LLM 返回\n    验证响应 --> 响应有效: 工具调用已提取\n    验证响应 --> 响应无效: 解析失败\n    响应无效 --> 检查重试次数: 递增 retry\n    检查重试次数 --> 等待响应: retry_nudge\n    检查重试次数 --> 致命错误: 达到上限\n    响应有效 --> 检查步骤顺序\n    检查步骤顺序 --> 步骤通过: 验证通过\n    检查步骤顺序 --> 步骤受阻: 缺少步骤\n    步骤受阻 --> 重置提前尝试: step_nudge\n    步骤通过 --> 执行工具\n    执行工具 --> 记录执行: record()\n    记录执行 --> 工作流完成?: terminal reached\n    记录执行 --> 工具错误?: 工具执行失败\n    工作流完成? --> [*]: 是\n    工作流完成? --> 等待响应: 否\n    工具错误? --> 等待响应: 重试\n    工具错误? --> [*]: 达到上限\n```\n\n## 配置与消融研究\n\nGuardrails 的各个组件可通过 `AblationConfig` 独立切换，用于评估每个防护措施的性能影响：\n\n```python\n# tests/eval/ablation.py 中的配置示例\n@dataclass\nclass AblationConfig:\n    enable_rescue: bool = True\n    enable_step_enforcement: bool = True\n    enable_error_tracker: bool = True\n```\n\n在 CONTRIBUTING.md 中提到，每个 guardrail 可以独立消融：\n\n> Guardrails live in the runner (`src/forge/core/runner.py`) and nudge templates (`src/forge/prompts/nudges.py`). Each guardrail can be independently toggled via ablation presets in `tests/eval/ablation.py`.\n\n资料来源：[CONTRIBUTING.md:3-8]()\n\n## 总结\n\nGuardrails 系统通过模块化的验证、强制和追踪机制，为 Forge 工作流提供了可靠的安全防护：\n\n| 特性 | 描述 |\n|------|------|\n| **模块化设计** | ResponseValidator、StepEnforcer、ErrorTracker 可独立使用 |\n| **灵活配置** | 支持自定义重试次数、错误阈值、救援行为 |\n| **可观测性** | 通过 `nudge` 机制提供清晰的错误引导 |\n| **状态管理** | 内置错误计数和步骤追踪，自动防止循环 |\n| **消融支持** | 完整的独立切换能力，便于性能分析 |\n\n---\n\n<a id='page-context-management'></a>\n\n## 上下文管理\n\n### 相关页面\n\n相关主题：[核心组件详解](#page-core-components), [架构概述](#page-architecture)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [src/forge/context/manager.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/context/manager.py)\n- [src/forge/context/strategies.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/context/strategies.py)\n- [src/forge/context/hardware.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/context/hardware.py)\n- [src/forge/server.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/server.py)\n</details>\n\n# 上下文管理\n\n## 概述\n\n上下文管理（Context Management）是 forge 框架中负责管理对话历史和令牌预算的核心系统。在与大语言模型（LLM）进行多轮交互时，上下文窗口的大小直接影响能保留多少历史信息和工具调用记录。forge 的上下文管理系统通过预算模式（Budget Mode）和压缩策略（Compaction Strategy）的组合，确保在有限的令牌预算内最大化有效信息的保留。\n\n该系统位于 `src/forge/context/` 模块下，包含三个核心子模块：\n\n| 模块 | 职责 |\n|------|------|\n| `manager.py` | 上下文管理的主入口，负责预算解析和消息压缩协调 |\n| `strategies.py` | 定义压缩策略，如 `TieredCompact`（分层压缩） |\n| `hardware.py` | 硬件感知配置，根据设备资源调整上下文参数 |\n\n资料来源：[CONTRIBUTING.md](https://github.com/antoinezambelli/forge/blob/main/CONTRIBUTING.md)\n\n---\n\n## 核心概念\n\n### 令牌预算（Budget Tokens）\n\n令牌预算是上下文管理的基础参数，定义了允许保留在上下文窗口中的最大令牌数量。在初始化 `ContextManager` 时通过 `budget_tokens` 参数指定：\n\n```python\nfrom forge import ContextManager, TieredCompact\n\nctx = ContextManager(\n    strategy=TieredCompact(keep_recent=2),\n    budget_tokens=8192\n)\n```\n\n资料来源：[README.md](https://github.com/antoinezambelli/forge/blob/main/README.md)\n\n### 预算模式（Budget Mode）\n\n预算模式决定了如何确定令牌预算的来源。forge 支持多种预算解析策略：\n\n| 模式 | 说明 | 适用场景 |\n|------|------|----------|\n| `FORGE_FAST` | 快速模式，使用较小的上下文预算 | 简单查询、快速响应 |\n| `FORGE_BALANCED` | 平衡模式，在速度和深度间取得平衡 | 常规工作流 |\n| `FORGE_DEEP` | 深度模式，使用更大的上下文预算 | 复杂推理任务 |\n| `MANUAL` | 手动指定预算值 | 精确控制令牌使用 |\n\n```python\nfrom forge.server import BudgetMode\n\nbudget = await server.start_with_budget(\n    model=identity,\n    gguf_path=gguf_path or \"\",\n    mode=mode,\n    budget_mode=BudgetMode.FORGE_FAST,\n    client=client,\n)\n```\n\n资料来源：[src/forge/server.py:1-50](https://github.com/antoinezambelli/forge/blob/main/src/forge/server.py)\n\n---\n\n## 架构设计\n\n### 系统架构图\n\n```mermaid\ngraph TD\n    A[WorkflowRunner] --> B[ContextManager]\n    B --> C[压缩策略 Strategy]\n    B --> D[预算解析 Budget Resolution]\n    \n    C --> E[TieredCompact]\n    C --> F[其他策略...]\n    \n    D --> G[ServerManager]\n    G --> H[服务端上下文查询]\n    G --> I[手动预算指定]\n    \n    J[硬件配置 hardware.py] --> B\n```\n\n### 预算解析流程\n\n当使用服务端管理时，预算的解析遵循以下逻辑：\n\n```mermaid\ngraph TD\n    A[resolve_budget 调用] --> B{预算模式}\n    B -->|MANUAL| C[使用 manual_tokens]\n    B -->|OLLAMA| D{模式判断}\n    B -->|LLAMASERVER| E[查询服务端上下文]\n    B -->|LLAMAFILE| E\n    \n    D -->|MANUAL| C\n    D -->|其他| F[查询服务端上下文]\n    \n    C --> G[返回预算令牌数]\n    E --> G\n    F --> G\n```\n\n资料来源：[src/forge/server.py:200-280](https://github.com/antoinezambelli/forge/blob/main/src/forge/server.py)\n\n---\n\n## 分层压缩策略（TieredCompact）\n\n### 核心思想\n\n`TieredCompact` 是一种分层保留策略，其核心思想是保留最近的消息完整不变，同时对较早的消息进行摘要或丢弃。这种策略在保持对话连贯性的同时，最大化地利用有限的令牌预算。\n\n### 参数配置\n\n| 参数 | 类型 | 说明 |\n|------|------|------|\n| `keep_recent` | `int` | 保留最近 N 轮完整对话 | 默认值：2 |\n| `budget_tokens` | `int` | 总令牌预算 | 继承自 ContextManager |\n\n```python\n# 保留最近 2 轮完整消息，剩余预算用于早期消息摘要\nstrategy = TieredCompact(keep_recent=2)\n```\n\n资料来源：[README.md](https://github.com/antoinezambelli/forge/blob/main/README.md)\n\n### 工作流程\n\n```mermaid\ngraph LR\n    A[消息列表] --> B[识别最近 N 轮]\n    B --> C[保留完整]\n    D[早期消息] --> E{是否可摘要}\n    E -->|是| F[生成摘要]\n    E -->|否| G[丢弃]\n    F --> H[放入上下文]\n    C --> H\n    G --> H\n```\n\n---\n\n## 硬件感知配置\n\n### 硬件适配\n\n`hardware.py` 模块负责根据运行环境的硬件资源（主要是 GPU 显存）调整上下文配置。这对于在消费级硬件上运行大模型尤为重要。\n\n### 主要考量因素\n\n| 因素 | 影响 |\n|------|------|\n| GPU 显存大小 | 决定最大并发槽数（n_slots） |\n| KV Cache 类型 | 影响上下文长度和质量 |\n| 上下文分区策略 | 共享 vs 分区 KV Cache |\n\n```python\n# 多槽位配置示例\nserver = ServerManager(...)\nawait server.start_with_budget(\n    n_slots=4,           # 4 个并发槽位\n    kv_unified=True,    # 共享 KV Cache\n)\n```\n\n资料来源：[src/forge/server.py:30-40](https://github.com/antoinezambelli/forge/blob/main/src/forge/server.py)\n\n---\n\n## 服务端上下文管理\n\n### 上下文长度查询\n\n服务端通过 `/props` 端点报告其配置的上下文长度：\n\n```python\nasync def get_server_context(self) -> int:\n    \"\"\"获取服务端报告的上下文长度\"\"\"\n    try:\n        props = await self.query_props()\n    except (httpx.HTTPError, BackendError) as exc:\n        raise BudgetResolutionError(cause=exc) from exc\n    \n    ctx = props.get(\"default_generation_settings\", {}).get(\"n_ctx\")\n    if ctx is None:\n        raise BudgetResolutionError()\n    return ctx\n```\n\n资料来源：[src/forge/server.py:180-195](https://github.com/antoinezambelli/forge/blob/main/src/forge/server.py)\n\n### KV Cache 配置\n\n对于非 Ollama 后端（llamaserver / llamafile），支持细粒度的 KV Cache 量化配置：\n\n| 参数 | 说明 | 示例值 |\n|------|------|--------|\n| `cache_type_k` | Key 缓存量化类型 | `\"q8_0\"`, `\"q4_0\"` |\n| `cache_type_v` | Value 缓存量化类型 | `\"q8_0\"`, `\"q4_0\"` |\n\n```python\nserver = ServerManager(backend=\"llamaserver\", port=8080)\nawait server.start_with_budget(\n    cache_type_k=\"q4_0\",\n    cache_type_v=\"q8_0\",\n)\n```\n\n资料来源：[src/forge/server.py:25-35](https://github.com/antoinezambelli/forge/blob/main/src/forge/server.py)\n\n---\n\n## 使用指南\n\n### 基础用法\n\n```python\nimport asyncio\nfrom forge import (\n    WorkflowRunner,\n    OllamaClient,\n    ContextManager,\n    TieredCompact,\n)\n\nasync def main():\n    # 创建上下文管理器\n    ctx = ContextManager(\n        strategy=TieredCompact(keep_recent=2),\n        budget_tokens=8192\n    )\n    \n    # 创建客户端和运行器\n    client = OllamaClient(model=\"ministral-3:8b-q4_K_M\")\n    runner = WorkflowRunner(client=client, context_manager=ctx)\n    \n    # 运行工作流\n    # await runner.run(workflow, user_message)\n\nasyncio.run(main())\n```\n\n### 与服务端集成\n\n```python\nfrom forge.server import ServerManager, BudgetMode\n\nasync def main():\n    # 启动服务端并自动解析预算\n    server, ctx = await ServerManager.start_with_budget(\n        backend=\"llamaserver\",\n        gguf_path=\"/path/to/model.gguf\",\n        budget_mode=BudgetMode.FORGE_BALANCED,\n    )\n    \n    # ... 使用 server 和 ctx ...\n    \n    await server.stop()\n\nasyncio.run(main())\n```\n\n---\n\n## 与守卫系统的协同\n\n上下文管理与守卫系统（Guardrails）紧密配合，共同确保工作流的正确执行：\n\n```mermaid\ngraph LR\n    A[LLM 响应] --> B[Guardrails.check]\n    B --> C{响应类型}\n    C -->|工具调用| D[执行工具]\n    C -->|文本响应| E[上下文压缩]\n    C -->|错误| F[重试/提示]\n    \n    D --> G[ContextManager 压缩]\n    E --> G\n```\n\n守卫系统会拦截 LLM 的响应，进行验证后交由上下文管理器决定是否需要压缩历史记录：\n\n```python\nclass Guardrails:\n    def __init__(\n        self,\n        tool_names: list[str],\n        terminal_tool: str | frozenset[str],\n        required_steps: list[str] | None = None,\n        max_retries: int = 3,\n        max_tool_errors: int = 2,\n        rescue_enabled: bool = True,\n        max_premature_attempts: int = 3,\n        retry_nudge: Callable[[str], str] | None = None,\n    ) -> None:\n        # ...\n```\n\n资料来源：[src/forge/guardrails/guardrails.py:40-60](https://github.com/antoinezambelli/forge/blob/main/src/forge/guardrails/guardrails.py)\n\n---\n\n## 错误处理\n\n### BudgetResolutionError\n\n当无法解析预算时会抛出此异常，通常发生在服务端不可达或未返回有效上下文长度时：\n\n```python\ntry:\n    ctx_length = await server.get_server_context()\nexcept BudgetResolutionError as e:\n    # 回退到手动指定\n    ctx = ContextManager(\n        strategy=TieredCompact(keep_recent=2),\n        budget_tokens=4096  # 手动指定\n    )\n```\n\n资料来源：[src/forge/server.py:170-180](https://github.com/antoinezambelli/forge/blob/main/src/forge/server.py)\n\n---\n\n## 最佳实践\n\n1. **选择合适的预算模式**：简单查询使用 `FORGE_FAST`，复杂推理任务使用 `FORGE_DEEP`\n2. **合理设置 keep_recent**：通常 2-3 轮即可保留对话上下文\n3. **监控令牌使用**：定期检查实际使用的令牌数，避免溢出\n4. **硬件感知配置**：在资源受限环境使用较小的 n_slots 和共享 KV Cache\n5. **与服务端配合使用**：利用服务端自动解析上下文长度，减少手动配置\n\n---\n\n<a id='page-slot-worker'></a>\n\n## SlotWorker 槽位调度\n\n### 相关页面\n\n相关主题：[核心组件详解](#page-core-components), [架构概述](#page-architecture)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [src/forge/server.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/server.py) — ServerManager 槽位启动参数定义\n- [src/forge/core/runner.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/core/runner.py) — WorkflowRunner 槽位上下文管理引用\n- [src/forge/core/steps.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/core/steps.py) — 步骤追踪与工具执行记录\n- [src/forge/guardrails/guardrails.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/guardrails/guardrails.py) — 响应验证与错误处理\n- [CONTRIBUTING.md](https://github.com/antoinezambelli/forge/blob/main/CONTRIBUTING.md) — 槽位调度相关架构说明\n</details>\n\n# SlotWorker 槽位调度\n\n## 概述\n\nSlotWorker 是 forge 框架中用于管理 **llama-server / llamafile 后端并发槽位** 的核心机制。它通过 `ServerManager` 类实现，支持在同一进程中启动具有多个独立 KV Cache 槽位的推理服务器，从而支持多智能体（multi-agent）架构的并发执行。\n\n槽位调度的核心目的是：**在单个推理服务器实例上并行运行多个独立的 agent 工作流，每个槽位拥有独立的上下文状态，彼此互不干扰。**\n\n资料来源：[src/forge/server.py:1-50]()\n\n## 架构设计\n\n### 槽位模型\n\nforge 的槽位调度采用 **硬分区 + 可选统一缓存** 两种模式：\n\n| 模式 | `kv_unified` | 行为描述 | 适用场景 |\n|------|-------------|----------|----------|\n| 独立槽位 | `False`（默认） | 每个槽位拥有独立的 KV Cache 切片，上下文长度在槽位创建时硬性划分 | 资源受限环境、需要严格隔离的并发任务 |\n| 统一缓存 | `True` | 所有槽位共享单一 KV Cache 池，每个槽位可使用完整上下文容量 | 需要灵活分配上下文的高并发场景 |\n\n资料来源：[src/forge/server.py:35-42]()\n\n### 组件关系图\n\n```mermaid\ngraph TD\n    A[WorkflowRunner] --> B[ContextManager]\n    A --> C[ServerManager]\n    C --> D[llama-server / llamafile]\n    D --> E[Slot 0: KV Cache]\n    D --> F[Slot 1: KV Cache]\n    D --> G[Slot N: KV Cache]\n    E --> H[独立上下文]\n    F --> H\n    G --> H\n```\n\n## ServerManager 槽位管理\n\n### 初始化参数\n\n`ServerManager` 类负责启动和管理带有槽位配置的推理服务器。其构造函数接受以下参数：\n\n| 参数 | 类型 | 默认值 | 说明 |\n|------|------|--------|------|\n| `backend` | `str` | — | 后端类型：`\"ollama\"`、`\"llamaserver\"` 或 `\"llamafile\"` |\n| `port` | `int` | `8080` | 服务端口（llama-server / llamafile 专用） |\n| `models_dir` | `str \\| Path` | `None` | GGUF 模型文件目录 |\n\n资料来源：[src/forge/server.py:120-133]()\n\n### 槽位状态追踪\n\n`ServerManager` 内部维护以下状态字段用于槽位调度决策：\n\n```python\n_current_model: str | None       # 当前加载的模型标识\n_current_mode: str | None         # 运行模式 (native/prompt/reforged)\n_current_ctx: int | None          # 上下文长度配置\n_current_flags: tuple[str, ...]   # 额外 CLI 参数\n_current_cache_type_k: str | None # KV Cache Key 量化类型\n_current_cache_type_v: str | None # KV Cache Value 量化类型\n_current_n_slots: int | None      # 并发槽位数量\n_current_kv_unified: bool         # 是否启用统一 KV Cache\n```\n\n资料来源：[src/forge/server.py:135-146]()\n\n### 启动方法签名\n\n```python\nasync def start(\n    self,\n    model: str,\n    gguf_path: str | Path,\n    mode: str = \"native\",\n    extra_flags: list[str] | None = None,\n    ctx_override: int | None = None,\n    cache_type_k: str | None = None,  # e.g. \"q8_0\", \"q4_0\"\n    cache_type_v: str | None = None,  # e.g. \"q8_0\", \"q4_0\"\n    n_slots: int | None = None,       # 并发槽位数量\n    kv_unified: bool = False,         # 统一 KV Cache 模式\n) -> None:\n```\n\n资料来源：[src/forge/server.py:60-77]()\n\n## 槽位配置与缓存量化\n\n### KV Cache 量化类型\n\nforge 支持对 Key 和 Value 缓存分别进行量化，以节省显存：\n\n| 量化类型 | 说明 | 显存节省 | 精度损失 |\n|----------|------|----------|----------|\n| `q8_0` | 8-bit 量化 | 中等 | 较低 |\n| `q4_0` | 4-bit 量化 | 高 | 中等 |\n| `q4_K_M` | 混合 4-bit（中等块大小） | 高 | 较低-中等 |\n\n### 槽位复用机制\n\n`ServerManager` 实现了**智能复用逻辑**：当新请求的配置与当前运行配置完全一致时，跳过重启直接复用现有槽位：\n\n```python\nif (\n    self._current_model == model\n    and self._current_mode == mode\n    and self._current_ctx == ctx_override\n    and self._current_flags == flags\n    and self._current_cache_type_k == cache_type_k\n    and self._current_cache_type_v == cache_type_v\n    and self._current_n_slots == n_slots\n    and self._current_kv_unified == kv_unified\n):\n    return  # 复用现有槽位\n```\n\n资料来源：[src/forge/server.py:69-81]()\n\n## 工作流程集成\n\n### 槽位与 WorkflowRunner 的协作\n\n`WorkflowRunner` 在执行工作流时通过 `ContextManager` 管理上下文，并通过 `ServerManager` 与后端交互：\n\n```mermaid\nsequenceDiagram\n    participant User\n    participant WorkflowRunner\n    participant ContextManager\n    participant ServerManager\n    participant LlamaServer\n    \n    User->>WorkflowRunner: run(workflow, input)\n    WorkflowRunner->>ContextManager: 请求上下文预算\n    ContextManager->>ServerManager: resolve_budget(mode)\n    ServerManager->>LlamaServer: 查询 /props 获取 n_ctx\n    LlamaServer-->>ServerManager: 返回上下文长度\n    ServerManager-->>ContextManager: 返回预算\n    WorkflowRunner->>LlamaServer: 发送推理请求（槽位分配）\n```\n\n### 步骤追踪与槽位隔离\n\n每个槽位在执行过程中会独立维护步骤状态：\n\n```python\n@dataclass\nclass StepTracker:\n    \"\"\"追踪必需步骤完成情况和工具执行记录\"\"\"\n    required_steps: list[str]\n    completed_steps: dict[str, None] = field(default_factory=dict)\n    executed_tools: dict[str, list[dict[str, Any]]] = field(default_factory=dict)\n```\n\n资料来源：[src/forge/core/steps.py:22-30]()\n\n## 槽位调度模式\n\n### 模式对比\n\n| 模式 | 上下文分配 | 资源利用率 | 隔离性 | 配置参数 |\n|------|-----------|-----------|--------|----------|\n| **独立槽位** | 硬性分区，每个槽位固定容量 | 中等 | 强 | `n_slots=N, kv_unified=False` |\n| **统一缓存** | 动态共享，任意槽位可用全部容量 | 高 | 中等 | `n_slots=N, kv_unified=True` |\n\n资料来源：[src/forge/server.py:38-42]()\n\n### 配置示例\n\n```python\nfrom forge import ServerManager, BudgetMode\n\n# 创建支持 4 个并发槽位的服务器管理器\nserver = ServerManager(backend=\"llamaserver\", port=8080)\n\n# 启动服务器，配置统一 KV Cache\nawait server.start(\n    model=\"qwen3:14b-q4_K_M\",\n    gguf_path=\"/models/qwen3-14b-q4_k_m.gguf\",\n    mode=\"reforged\",\n    n_slots=4,\n    kv_unified=True,\n    cache_type_k=\"q8_0\",\n    cache_type_v=\"q4_0\",\n)\n```\n\n## 错误处理与超时\n\n### 错误追踪机制\n\n`Guardrails` 模块提供槽位级别的错误追踪：\n\n```python\n@dataclass\nclass ErrorTracker:\n    \"\"\"追踪重试次数和工具错误\"\"\"\n    max_retries: int\n    max_tool_errors: int\n    _consecutive_retries: int = 0\n    _tool_errors: int = 0\n```\n\n资料来源：[src/forge/guardrails/guardrails.py:45-52]()\n\n### 超时配置\n\n批量评估时，每个场景有 **300 秒墙钟超时**：\n\n> 批量评估时，每个场景有 300s 墙钟超时；超时时运行记录为 `completeness=False, error_type='Timeout'`，批次继续执行。\n\n资料来源：[CHANGELOG.md:30-32]()\n\n## 最佳实践\n\n### 槽位数量选择\n\n| 模型大小 | 推荐槽位数 | 说明 |\n|----------|-----------|------|\n| ≤ 8B | 2-4 | 显存充裕时可增加并发 |\n| 14B-32B | 1-2 | 需更多显存用于单槽 |\n| > 32B | 1 | 大模型建议独占槽位 |\n\n### 缓存量化建议\n\n| 量化类型 | 推荐场景 | 显存占用 |\n|----------|----------|----------|\n| `q8_0/q8_0` | 高精度需求 | 较高 |\n| `q4_K_M/q4_K_M` | 平衡场景 | 中等 |\n| `q4_0/q8_0` | 内存受限 | 较低 |\n\n## 限制与已知问题\n\n### llama.cpp reasoning budget 挂起问题\n\n> **已知问题**：2026 年 4 月 10 日之后的构建版本中，Gemma 4、Qwen 3.5 和 Ministral Reasoning 模型使用无界 reasoning budget sampler 会导致静默挂起。\n\n**临时解决方案**：使用 `--reasoning-budget 0` 参数禁用。\n\n资料来源：[CHANGELOG.md:24-28]()\n\n### 后端兼容性\n\n| 后端 | 槽位支持 | 说明 |\n|------|---------|------|\n| Ollama | ❌ 不支持 | 使用原生模型管理 |\n| llama-server | ✅ 完全支持 | 推荐用于多槽位场景 |\n| llamafile | ✅ 完全支持 | 与 llama-server 行为一致 |\n\n## 总结\n\nSlotWorker 槽位调度是 forge 框架支持多智能体并发执行的核心基础设施。通过 `ServerManager` 类，开发者可以：\n\n1. **配置并发槽位数量** (`n_slots`) 以支持多 agent 并行\n2. **选择 KV Cache 模式** (`kv_unified`) 平衡隔离性与资源利用\n3. **优化显存使用** 通过 Key/Value 缓存量化\n4. **实现智能复用** 避免不必要的服务器重启\n\n该机制使 forge 能够在单个推理服务器实例上高效运行多个独立工作流，特别适合需要并行评估或异步执行多个 agent 任务的场景。\n\n---\n\n<a id='page-tools'></a>\n\n## 内置工具系统\n\n### 相关页面\n\n相关主题：[工作流内部机制](#page-workflow-internals)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [src/forge/tools/__init__.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/tools/__init__.py)\n- [src/forge/tools/respond.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/tools/respond.py)\n- [src/forge/core/workflow.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/core/workflow.py)\n- [src/forge/guardrails/guardrails.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/guardrails/guardrails.py)\n- [src/forge/guardrails/response_validator.py](https://github.com/antoinezambelli/forge/blob/main/src/forge/guardrails/response_validator.py)\n- [examples/foreign_loop.py](https://github.com/antoinezambelli/forge/blob/main/examples/foreign_loop.py)\n</details>\n\n# 内置工具系统\n\n## 概述\n\nForge 的内置工具系统是一套用于定义、注册和执行 LLM 工具调用的核心机制。该系统通过 `ToolSpec`、`ToolDef` 和 `ToolCall` 三个核心类实现工具的schema定义与实际执行逻辑的绑定，并为工作流引擎提供验证、执行和依赖管理能力。\n\n工具系统在整个架构中扮演关键角色：\n\n- **工具发现**：工作流通过 `ToolDef` 字典声明可用工具\n- **LLM 接口**：工具规格自动转换为 JSON Schema，供 LLM 理解调用格式\n- **执行验证**：GuardRails 组件验证工具调用合法性\n- **依赖管理**：通过前置条件（Prerequisites）表达工具间依赖关系\n\n资料来源：[src/forge/core/workflow.py:1-50]()\n\n## 核心数据模型\n\n### ToolSpec\n\n`ToolSpec` 是工具的静态声明，使用 Pydantic 模型定义参数schema：\n\n```python\n@dataclass\nclass ToolSpec:\n    \"\"\"Tool specification with name, description, and parameters.\"\"\"\n    name: str\n    description: str\n    parameters: type[BaseModel]\n```\n\n| 字段 | 类型 | 说明 |\n|------|------|------|\n| `name` | `str` | 工具唯一标识名 |\n| `description` | `str` | 工具功能描述，供 LLM 理解用途 |\n| `parameters` | `type[BaseModel]` | Pydantic 模型类，定义参数结构 |\n\n`parameters` 字段接收一个 Pydantic `BaseModel` 子类，Forge 自动从中提取 JSON Schema：\n\n```python\ndef get_json_schema(self) -> dict[str, Any]:\n    \"\"\"Return JSON Schema dict for this tool's parameters.\"\"\"\n    return self.parameters.model_json_schema()\n```\n\n资料来源：[src/forge/core/workflow.py:100-140]()\n\n### ToolDef\n\n`ToolDef` 将工具规格与实现绑定，并声明执行依赖：\n\n```python\n@dataclass\nclass ToolDef:\n    \"\"\"Binds a tool schema to its implementation.\"\"\"\n    spec: ToolSpec\n    callable: Callable[..., Any]\n    prerequisites: list[str | dict[str, str]] = field(default_factory=list)\n```\n\n| 字段 | 类型 | 说明 |\n|------|------|------|\n| `spec` | `ToolSpec` | 工具规格（名称、描述、参数schema） |\n| `callable` | `Callable` | 实际执行的 Python 函数 |\n| `prerequisites` | `list` | 前置依赖条件列表 |\n\n`prerequisites` 支持两种表达方式：\n\n1. **字符串形式**（名称匹配）：\n   ```python\n   prerequisites=[\"read_file\"]\n   ```\n   表示调用此工具前必须曾调用过 `read_file`\n\n2. **字典形式**（参数匹配）：\n   ```python\n   prerequisites=[{\"tool\": \"read_file\", \"match_arg\": \"path\"}]\n   ```\n   表示调用此工具前必须调用过 `read_file` 且 `path` 参数值相同\n\n资料来源：[src/forge/core/workflow.py:143-175]()\n\n### ToolCall\n\n`ToolCall` 是经 LLM 返回并验证后的工具调用对象：\n\n```python\nclass ToolCall(BaseModel):\n    \"\"\"Validated tool invocation returned by an LLMClient.\"\"\"\n    tool: str\n```\n\n由 `ResponseValidator` 验证通过后生成，传递给执行层。\n\n资料来源：[src/forge/core/workflow.py:177-179]()\n\n## 工具注册与使用\n\n### 工作流中的工具声明\n\n在创建工作流时，通过 `tools` 字典注册所有可用工具：\n\n```python\nfrom forge import Workflow, ToolDef, ToolSpec\n\nworkflow = Workflow(\n    name=\"weather\",\n    description=\"Look up weather for a city.\",\n    tools={\n        \"get_weather\": ToolDef(\n            spec=ToolSpec(\n                name=\"get_weather\",\n                description=\"Get current weather\",\n                parameters=GetWeatherParams,\n            ),\n            callable=get_weather,\n        ),\n    },\n    required_steps=[],\n    terminal_tool=\"get_weather\",\n    system_prompt_template=\"You are a helpful assistant. Use the available tools to answer the user.\",\n)\n```\n\n工作流内部从 `tools` 字典派生出：\n- **工具规格列表**：供 LLM 理解可调用工具\n- **可调用对象映射**：执行时的函数查找表\n\n资料来源：[README.md:1-40]()\n\n### 参数模型定义\n\n使用 Pydantic 定义工具参数，每个参数可带描述供 LLM 理解：\n\n```python\nfrom pydantic import BaseModel, Field\n\nclass GetWeatherParams(BaseModel):\n    city: str = Field(description=\"City name\")\n```\n\nPydantic 自动处理类型验证和类型转换。\n\n资料来源：[README.md:5-15]()\n\n## 前置依赖机制\n\n### 依赖声明语法\n\n工具的前置依赖通过 `prerequisites` 字段声明，支持细粒度控制：\n\n```python\ndef answer(question: str) -> str:\n    \"\"\"Answer a question.\"\"\"\n    return \"The answer is 42.\"\n\nworkflow_tools = {\n    \"search\": ToolDef(\n        spec=ToolSpec(name=\"search\", description=\"Search the web\", parameters=SearchParams),\n        callable=search,\n    ),\n    \"lookup\": ToolDef(\n        spec=ToolSpec(name=\"lookup\", description=\"Look up details\", parameters=LookupParams),\n        callable=lookup,\n    ),\n    \"answer\": ToolDef(\n        spec=ToolSpec(name=\"answer\", description=\"Answer the question\", parameters=AnswerParams),\n        callable=answer,\n        prerequisites=[\"search\", \"lookup\"],  # 简单依赖\n    ),\n}\n```\n\n### 依赖执行流程\n\n```mermaid\ngraph TD\n    A[LLM 返回工具调用] --> B[StepEnforcer 检查依赖]\n    B --> C{依赖满足?}\n    C -->|是| D[执行工具]\n    C -->|否| E[返回 Nudge 提示]\n    D --> F[记录执行结果]\n    F --> G{所有必需步骤完成?}\n    G -->|是| H[允许终止工具]\n    G -->|否| A\n```\n\n`StepEnforcer` 组件负责验证依赖是否满足：\n\n```python\nenforcer = StepEnforcer(\n    required_steps=[\"search\", \"lookup\"],\n    terminal_tool=\"answer\",\n)\n```\n\n资料来源：[examples/foreign_loop.py:1-80]()\n\n## 工具验证流程\n\n### ResponseValidator\n\n`ResponseValidator` 负责验证 LLM 返回的工具调用：\n\n1. **工具名称验证**：检查是否在允许列表中\n2. **参数验证**：通过 Pydantic 模型验证参数合法性\n3. **未知工具检测**：识别并提示未知工具名\n\n```python\nvalidator = ResponseValidator(\n    tool_names=[\"search\", \"lookup\", \"answer\"],\n    rescue_enabled=True,\n)\n```\n\n资料来源：[src/forge/guardrails/response_validator.py:1-60]()\n\n### 验证结果处理\n\n验证结果通过 `ValidationResult` 返回：\n\n| 结果 | 说明 |\n|------|------|\n| `tool_calls` | 验证通过的工具调用列表 |\n| `nudge` | 需要重试时的提示信息 |\n| `needs_retry` | 是否需要 LLM 重试 |\n\n```python\nif result.action == \"fatal\":\n    return f\"FATAL: {result.reason}\"\n\nif result.action in (\"retry\", \"step_blocked\"):\n    # 注入 nudge 到对话历史\n    return f\"{result.action}: {result.nudge.content[:80]}...\"\n\n# result.action == \"execute\"\n# 执行工具并通知 forge\ntool_calls = result.tool_calls\nexecuted = [tc.tool for tc in tool_calls]\ndone = guardrails.record(executed)\n```\n\n资料来源：[examples/foreign_loop.py:30-50]()\n\n## GuardRails 集成\n\n`GuardRails` 是工具系统的顶层编排组件，整合验证、依赖检查和错误追踪：\n\n```python\n@dataclass\nclass GuardRails:\n    \"\"\"Check LLM responses against tool-calling guardrails.\"\"\"\n\n    def __init__(\n        self,\n        tool_names: list[str],\n        terminal_tool: str | frozenset[str],\n        required_steps: list[str] | None = None,\n        max_retries: int = 3,\n        max_tool_errors: int = 2,\n        rescue_enabled: bool = True,\n        max_premature_attempts: int = 3,\n        retry_nudge: Callable[[str], str] | None = None,\n    ) -> None:\n```\n\n| 参数 | 默认值 | 说明 |\n|------|--------|------|\n| `tool_names` | - | 允许的工具名称列表 |\n| `terminal_tool` | - | 终止工具名称 |\n| `required_steps` | `None` | 必需的执行步骤 |\n| `max_retries` | `3` | 最大重试次数 |\n| `max_tool_errors` | `2` | 最大工具错误数 |\n| `rescue_enabled` | `True` | 是否启用救援解析 |\n| `max_premature_attempts` | `3` | 提前终止最大次数 |\n| `retry_nudge` | `None` | 自定义重试提示函数 |\n\n资料来源：[src/forge/guardrails/guardrails.py:1-50]()\n\n## 架构图\n\n### 工具系统完整架构\n\n```mermaid\ngraph TB\n    subgraph \"工具定义层\"\n        PS[ToolSpec<br/>工具规格]\n        PD[ToolDef<br/>工具定义]\n        PC[Callable<br/>可调用对象]\n        PR[Prerequisites<br/>前置依赖]\n    end\n    \n    subgraph \"工作流层\"\n        WF[Workflow<br/>工作流]\n        SE[StepEnforcer<br/>步骤执行器]\n        CM[ContextManager<br/>上下文管理]\n    end\n    \n    subgraph \"验证层\"\n        RV[ResponseValidator<br/>响应验证器]\n        ET[ErrorTracker<br/>错误追踪]\n        GR[GuardRails<br/>防护栏]\n    end\n    \n    subgraph \"LLM 层\"\n        LL[LLM Client<br/>LLM 客户端]\n        TC[ToolCall<br/>工具调用]\n    end\n    \n    PS --> PD\n    PC --> PD\n    PR --> PD\n    PD --> WF\n    WF --> SE\n    WF --> CM\n    LL --> TC\n    TC --> RV\n    RV --> GR\n    SE --> GR\n    ET --> GR\n```\n\n## 最佳实践\n\n### 工具命名规范\n\n- 使用小写字母和下划线：`get_weather`, `read_file`\n- 动词开头描述操作：`search`, `lookup`, `answer`\n- 名词描述数据：`user_info`, `document_content`\n\n### 参数设计\n\n```python\nclass SearchParams(BaseModel):\n    query: str = Field(description=\"Search query string\")\n    limit: int = Field(default=10, description=\"Maximum results to return\")\n```\n\n### 依赖声明\n\n对于多步骤工作流，清晰声明依赖关系：\n\n```python\nToolDef(\n    spec=...,\n    callable=answer,\n    prerequisites=[\n        \"search\",                           # 必须执行过 search\n        {\"tool\": \"lookup\", \"match_arg\": \"id\"}  # 必须用相同 id 执行过 lookup\n    ],\n)\n```\n\n## 总结\n\nForge 的内置工具系统通过类型安全的 Pydantic 模型、清晰的前置依赖声明和完善的验证机制，为构建可靠的 LLM 工具调用工作流提供了坚实基础。系统设计强调：\n\n1. **类型安全**：参数schema与执行逻辑强绑定\n2. **可验证性**：每个工具调用都经过多层验证\n3. **可追踪性**：完整记录工具执行历史和依赖满足状态\n4. **可扩展性**：通过 `ToolDef` 轻松注册新工具\n\n---\n\n---\n\n## Doramagic 踩坑日志\n\n项目：antoinezambelli/forge\n\n摘要：发现 15 个潜在踩坑项，其中 0 个为 high/blocking；最高优先级：安装坑 - 来源证据：Client sampling params: thread top_p/top_k/min_p/repeat_penalty through request body。\n\n## 1. 安装坑 · 来源证据：Client sampling params: thread top_p/top_k/min_p/repeat_penalty through request body\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：GitHub 社区证据显示该项目存在一个安装相关的待验证问题：Client sampling params: thread top_p/top_k/min_p/repeat_penalty through request body\n- 对用户的影响：可能增加新用户试用和生产接入成本。\n- 建议检查：来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- 防护动作：不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- 证据：community_evidence:github | cevd_148dff87195e42549d0ffb88b99e9cbf | https://github.com/antoinezambelli/forge/issues/58 | 来源类型 github_issue 暴露的待验证使用条件。\n\n## 2. 安装坑 · 来源证据：Investigate: integration paths with Hermes Agent\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：GitHub 社区证据显示该项目存在一个安装相关的待验证问题：Investigate: integration paths with Hermes Agent\n- 对用户的影响：可能增加新用户试用和生产接入成本。\n- 建议检查：来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- 防护动作：不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- 证据：community_evidence:github | cevd_e3cbd2d1c9a84a1887887bf24b036865 | https://github.com/antoinezambelli/forge/issues/51 | 来源类型 github_issue 暴露的待验证使用条件。\n\n## 3. 安装坑 · 来源证据：Per-model recommended sampling defaults (map keyed by HF model cards)\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：GitHub 社区证据显示该项目存在一个安装相关的待验证问题：Per-model recommended sampling defaults (map keyed by HF model cards)\n- 对用户的影响：可能阻塞安装或首次运行。\n- 建议检查：来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- 防护动作：不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- 证据：community_evidence:github | cevd_057ca2af912e4a608259ffb2a3654d4f | https://github.com/antoinezambelli/forge/issues/59 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 4. 安装坑 · 来源证据：Rescue-parse ChatGPT-style XML tool calls\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：GitHub 社区证据显示该项目存在一个安装相关的待验证问题：Rescue-parse ChatGPT-style XML tool calls\n- 对用户的影响：可能增加新用户试用和生产接入成本。\n- 建议检查：来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- 防护动作：不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- 证据：community_evidence:github | cevd_471c674c8d73451da75d6b8c9349aabf | https://github.com/antoinezambelli/forge/issues/55 | 来源类型 github_issue 暴露的待验证使用条件。\n\n## 5. 配置坑 · 来源证据：Proxy external mode hardcodes native FC — no prompt-injection fallback\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：GitHub 社区证据显示该项目存在一个配置相关的待验证问题：Proxy external mode hardcodes native FC — no prompt-injection fallback\n- 对用户的影响：可能增加新用户试用和生产接入成本。\n- 建议检查：来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- 防护动作：不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- 证据：community_evidence:github | cevd_f3a85ec8447a4838b3bc4c846cd9e7a0 | https://github.com/antoinezambelli/forge/issues/53 | 来源类型 github_issue 暴露的待验证使用条件。\n\n## 6. 能力坑 · 能力判断依赖假设\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：README/documentation is current enough for a first validation pass.\n- 对用户的影响：假设不成立时，用户拿不到承诺的能力。\n- 建议检查：将假设转成下游验证清单。\n- 防护动作：假设必须转成验证项；没有验证结果前不能写成事实。\n- 证据：capability.assumptions | hn_item:48192383 | https://news.ycombinator.com/item?id=48192383 | README/documentation is current enough for a first validation pass.\n\n## 7. 维护坑 · 维护活跃度未知\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：未记录 last_activity_observed。\n- 对用户的影响：新项目、停更项目和活跃项目会被混在一起，推荐信任度下降。\n- 建议检查：补 GitHub 最近 commit、release、issue/PR 响应信号。\n- 防护动作：维护活跃度未知时，推荐强度不能标为高信任。\n- 证据：evidence.maintainer_signals | hn_item:48192383 | https://news.ycombinator.com/item?id=48192383 | last_activity_observed missing\n\n## 8. 安全/权限坑 · 下游验证发现风险项\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：no_demo\n- 对用户的影响：下游已经要求复核，不能在页面中弱化。\n- 建议检查：进入安全/权限治理复核队列。\n- 防护动作：下游风险存在时必须保持 review/recommendation 降级。\n- 证据：downstream_validation.risk_items | hn_item:48192383 | https://news.ycombinator.com/item?id=48192383 | no_demo; severity=medium\n\n## 9. 安全/权限坑 · 存在评分风险\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：no_demo\n- 对用户的影响：风险会影响是否适合普通用户安装。\n- 建议检查：把风险写入边界卡，并确认是否需要人工复核。\n- 防护动作：评分风险必须进入边界卡，不能只作为内部分数。\n- 证据：risks.scoring_risks | hn_item:48192383 | https://news.ycombinator.com/item?id=48192383 | no_demo; severity=medium\n\n## 10. 安全/权限坑 · 来源证据：Hardware detection: AMD unified-memory rigs fall through to 4K Ollama budget\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：Hardware detection: AMD unified-memory rigs fall through to 4K Ollama budget\n- 对用户的影响：可能影响授权、密钥配置或安全边界。\n- 建议检查：来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- 防护动作：不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- 证据：community_evidence:github | cevd_4ad226a6d1fa4a5f89fa7702bec11188 | https://github.com/antoinezambelli/forge/issues/61 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 11. 安全/权限坑 · 来源证据：Sub-agent support: dynamic slot splitting\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：Sub-agent support: dynamic slot splitting\n- 对用户的影响：可能影响授权、密钥配置或安全边界。\n- 建议检查：来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- 防护动作：不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- 证据：community_evidence:github | cevd_5b35873cf63c4647bca8a0611d441189 | https://github.com/antoinezambelli/forge/issues/28 | 来源类型 github_issue 暴露的待验证使用条件。\n\n## 12. 安全/权限坑 · 来源证据：Sub-agent support: slot pool\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：Sub-agent support: slot pool\n- 对用户的影响：可能影响授权、密钥配置或安全边界。\n- 建议检查：来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- 防护动作：不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- 证据：community_evidence:github | cevd_070d9a3d20d24123b62d7d76ee16078a | https://github.com/antoinezambelli/forge/issues/29 | 来源类型 github_issue 暴露的待验证使用条件。\n\n## 13. 安全/权限坑 · 来源证据：llama.cpp reasoning budget sampler causes silent hangs after April 10 builds\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：llama.cpp reasoning budget sampler causes silent hangs after April 10 builds\n- 对用户的影响：可能阻塞安装或首次运行。\n- 建议检查：来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- 防护动作：不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- 证据：community_evidence:github | cevd_673be4a583984219bab90cbadff631fe | https://github.com/antoinezambelli/forge/issues/54 | 来源类型 github_issue 暴露的待验证使用条件。\n\n## 14. 维护坑 · issue/PR 响应质量未知\n\n- 严重度：low\n- 证据强度：source_linked\n- 发现：issue_or_pr_quality=unknown。\n- 对用户的影响：用户无法判断遇到问题后是否有人维护。\n- 建议检查：抽样最近 issue/PR，判断是否长期无人处理。\n- 防护动作：issue/PR 响应未知时，必须提示维护风险。\n- 证据：evidence.maintainer_signals | hn_item:48192383 | https://news.ycombinator.com/item?id=48192383 | issue_or_pr_quality=unknown\n\n## 15. 维护坑 · 发布节奏不明确\n\n- 严重度：low\n- 证据强度：source_linked\n- 发现：release_recency=unknown。\n- 对用户的影响：安装命令和文档可能落后于代码，用户踩坑概率升高。\n- 建议检查：确认最近 release/tag 和 README 安装命令是否一致。\n- 防护动作：发布节奏未知或过期时，安装说明必须标注可能漂移。\n- 证据：evidence.maintainer_signals | hn_item:48192383 | https://news.ycombinator.com/item?id=48192383 | release_recency=unknown\n\n<!-- canonical_name: antoinezambelli/forge; human_manual_source: deepwiki_human_wiki -->\n",
      "summary": "DeepWiki/Human Wiki 完整输出，末尾追加 Discovery Agent 踩坑日志。",
      "title": "Human Manual / 人类版说明书"
    },
    "pitfall_log": {
      "asset_id": "pitfall_log",
      "filename": "PITFALL_LOG.md",
      "markdown": "# Pitfall Log / 踩坑日志\n\n项目：antoinezambelli/forge\n\n摘要：发现 15 个潜在踩坑项，其中 0 个为 high/blocking；最高优先级：安装坑 - 来源证据：Client sampling params: thread top_p/top_k/min_p/repeat_penalty through request body。\n\n## 1. 安装坑 · 来源证据：Client sampling params: thread top_p/top_k/min_p/repeat_penalty through request body\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：GitHub 社区证据显示该项目存在一个安装相关的待验证问题：Client sampling params: thread top_p/top_k/min_p/repeat_penalty through request body\n- 对用户的影响：可能增加新用户试用和生产接入成本。\n- 建议检查：来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- 防护动作：不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- 证据：community_evidence:github | cevd_148dff87195e42549d0ffb88b99e9cbf | https://github.com/antoinezambelli/forge/issues/58 | 来源类型 github_issue 暴露的待验证使用条件。\n\n## 2. 安装坑 · 来源证据：Investigate: integration paths with Hermes Agent\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：GitHub 社区证据显示该项目存在一个安装相关的待验证问题：Investigate: integration paths with Hermes Agent\n- 对用户的影响：可能增加新用户试用和生产接入成本。\n- 建议检查：来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- 防护动作：不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- 证据：community_evidence:github | cevd_e3cbd2d1c9a84a1887887bf24b036865 | https://github.com/antoinezambelli/forge/issues/51 | 来源类型 github_issue 暴露的待验证使用条件。\n\n## 3. 安装坑 · 来源证据：Per-model recommended sampling defaults (map keyed by HF model cards)\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：GitHub 社区证据显示该项目存在一个安装相关的待验证问题：Per-model recommended sampling defaults (map keyed by HF model cards)\n- 对用户的影响：可能阻塞安装或首次运行。\n- 建议检查：来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- 防护动作：不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- 证据：community_evidence:github | cevd_057ca2af912e4a608259ffb2a3654d4f | https://github.com/antoinezambelli/forge/issues/59 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 4. 安装坑 · 来源证据：Rescue-parse ChatGPT-style XML tool calls\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：GitHub 社区证据显示该项目存在一个安装相关的待验证问题：Rescue-parse ChatGPT-style XML tool calls\n- 对用户的影响：可能增加新用户试用和生产接入成本。\n- 建议检查：来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- 防护动作：不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- 证据：community_evidence:github | cevd_471c674c8d73451da75d6b8c9349aabf | https://github.com/antoinezambelli/forge/issues/55 | 来源类型 github_issue 暴露的待验证使用条件。\n\n## 5. 配置坑 · 来源证据：Proxy external mode hardcodes native FC — no prompt-injection fallback\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：GitHub 社区证据显示该项目存在一个配置相关的待验证问题：Proxy external mode hardcodes native FC — no prompt-injection fallback\n- 对用户的影响：可能增加新用户试用和生产接入成本。\n- 建议检查：来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- 防护动作：不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- 证据：community_evidence:github | cevd_f3a85ec8447a4838b3bc4c846cd9e7a0 | https://github.com/antoinezambelli/forge/issues/53 | 来源类型 github_issue 暴露的待验证使用条件。\n\n## 6. 能力坑 · 能力判断依赖假设\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：README/documentation is current enough for a first validation pass.\n- 对用户的影响：假设不成立时，用户拿不到承诺的能力。\n- 建议检查：将假设转成下游验证清单。\n- 防护动作：假设必须转成验证项；没有验证结果前不能写成事实。\n- 证据：capability.assumptions | hn_item:48192383 | https://news.ycombinator.com/item?id=48192383 | README/documentation is current enough for a first validation pass.\n\n## 7. 维护坑 · 维护活跃度未知\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：未记录 last_activity_observed。\n- 对用户的影响：新项目、停更项目和活跃项目会被混在一起，推荐信任度下降。\n- 建议检查：补 GitHub 最近 commit、release、issue/PR 响应信号。\n- 防护动作：维护活跃度未知时，推荐强度不能标为高信任。\n- 证据：evidence.maintainer_signals | hn_item:48192383 | https://news.ycombinator.com/item?id=48192383 | last_activity_observed missing\n\n## 8. 安全/权限坑 · 下游验证发现风险项\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：no_demo\n- 对用户的影响：下游已经要求复核，不能在页面中弱化。\n- 建议检查：进入安全/权限治理复核队列。\n- 防护动作：下游风险存在时必须保持 review/recommendation 降级。\n- 证据：downstream_validation.risk_items | hn_item:48192383 | https://news.ycombinator.com/item?id=48192383 | no_demo; severity=medium\n\n## 9. 安全/权限坑 · 存在评分风险\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：no_demo\n- 对用户的影响：风险会影响是否适合普通用户安装。\n- 建议检查：把风险写入边界卡，并确认是否需要人工复核。\n- 防护动作：评分风险必须进入边界卡，不能只作为内部分数。\n- 证据：risks.scoring_risks | hn_item:48192383 | https://news.ycombinator.com/item?id=48192383 | no_demo; severity=medium\n\n## 10. 安全/权限坑 · 来源证据：Hardware detection: AMD unified-memory rigs fall through to 4K Ollama budget\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：Hardware detection: AMD unified-memory rigs fall through to 4K Ollama budget\n- 对用户的影响：可能影响授权、密钥配置或安全边界。\n- 建议检查：来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- 防护动作：不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- 证据：community_evidence:github | cevd_4ad226a6d1fa4a5f89fa7702bec11188 | https://github.com/antoinezambelli/forge/issues/61 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 11. 安全/权限坑 · 来源证据：Sub-agent support: dynamic slot splitting\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：Sub-agent support: dynamic slot splitting\n- 对用户的影响：可能影响授权、密钥配置或安全边界。\n- 建议检查：来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- 防护动作：不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- 证据：community_evidence:github | cevd_5b35873cf63c4647bca8a0611d441189 | https://github.com/antoinezambelli/forge/issues/28 | 来源类型 github_issue 暴露的待验证使用条件。\n\n## 12. 安全/权限坑 · 来源证据：Sub-agent support: slot pool\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：Sub-agent support: slot pool\n- 对用户的影响：可能影响授权、密钥配置或安全边界。\n- 建议检查：来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- 防护动作：不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- 证据：community_evidence:github | cevd_070d9a3d20d24123b62d7d76ee16078a | https://github.com/antoinezambelli/forge/issues/29 | 来源类型 github_issue 暴露的待验证使用条件。\n\n## 13. 安全/权限坑 · 来源证据：llama.cpp reasoning budget sampler causes silent hangs after April 10 builds\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：llama.cpp reasoning budget sampler causes silent hangs after April 10 builds\n- 对用户的影响：可能阻塞安装或首次运行。\n- 建议检查：来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- 防护动作：不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- 证据：community_evidence:github | cevd_673be4a583984219bab90cbadff631fe | https://github.com/antoinezambelli/forge/issues/54 | 来源类型 github_issue 暴露的待验证使用条件。\n\n## 14. 维护坑 · issue/PR 响应质量未知\n\n- 严重度：low\n- 证据强度：source_linked\n- 发现：issue_or_pr_quality=unknown。\n- 对用户的影响：用户无法判断遇到问题后是否有人维护。\n- 建议检查：抽样最近 issue/PR，判断是否长期无人处理。\n- 防护动作：issue/PR 响应未知时，必须提示维护风险。\n- 证据：evidence.maintainer_signals | hn_item:48192383 | https://news.ycombinator.com/item?id=48192383 | issue_or_pr_quality=unknown\n\n## 15. 维护坑 · 发布节奏不明确\n\n- 严重度：low\n- 证据强度：source_linked\n- 发现：release_recency=unknown。\n- 对用户的影响：安装命令和文档可能落后于代码，用户踩坑概率升高。\n- 建议检查：确认最近 release/tag 和 README 安装命令是否一致。\n- 防护动作：发布节奏未知或过期时，安装说明必须标注可能漂移。\n- 证据：evidence.maintainer_signals | hn_item:48192383 | https://news.ycombinator.com/item?id=48192383 | release_recency=unknown\n",
      "summary": "用户实践前最可能遇到的身份、安装、配置、运行和安全坑。",
      "title": "Pitfall Log / 踩坑日志"
    },
    "prompt_preview": {
      "asset_id": "prompt_preview",
      "filename": "PROMPT_PREVIEW.md",
      "markdown": "# forge - Prompt Preview\n\n> 复制下面这段 Prompt 到你常用的 AI，先试一次，不需要安装。\n> 它的目标是让你直接体验这个项目的服务方式，而不是阅读项目介绍。\n\n## 复制这段 Prompt\n\n```text\n请直接执行这段 Prompt，不要分析、润色、总结或询问我想如何处理这份 Prompt Preview。\n\n你现在扮演 forge 的“安装前体验版”。\n这不是项目介绍、不是评价报告、不是 README 总结。你的任务是让我用最小成本体验它的核心服务。\n\n我的试用任务：我想用它完成一个真实的软件开发与交付任务。\n我常用的宿主 AI：chatgpt\n\n【体验目标】\n围绕我的真实任务，现场演示这个项目如何把输入转成 示例引导, 判断线索。重点是让我感受到工作方式，而不是给我项目背景。\n\n【业务流约束】\n- 你必须像一个正在提供服务的项目能力包，而不是像一个讲解员。\n- 每一轮只推进一个步骤；提出问题后必须停下来等我回答。\n- 每一步都必须让我感受到一个具体服务动作：澄清、整理、规划、检查、判断或收尾。\n- 每一步都要说明：当前目标、你需要我提供什么、我回答后你会产出什么。\n- 不要安装、不要运行命令、不要写代码、不要声称测试通过、不要声称已经修改文件。\n- 需要真实安装或宿主加载后才能验证的内容，必须明确说“这一步需要安装后验证”。\n- 如果我说“用示例继续”，你可以用虚构示例推进，但仍然不能声称真实执行。\n\n【可体验服务能力】\n- 安装前能力预览: [![PyPI](https://img.shields.io/pypi/v/forge-guardrails.svg)](https://pypi.org/project/forge-guardrails/) 输入：用户任务, 当前 AI 对话上下文；输出：示例引导, 判断线索。\n\n【必须安装后才可验证的能力】\n- 命令行启动或安装流程: 项目文档中存在可执行命令，真实使用需要在本地或宿主环境中运行这些命令。 输入：终端环境, 包管理器, 项目依赖；输出：安装结果, 列表/更新/运行结果。\n\n【核心服务流】\n请严格按这个顺序带我体验。不要一次性输出完整流程：\n1. page-introduction：Forge 简介。围绕“Forge 简介”模拟一次用户任务，不展示安装或运行结果。\n2. page-installation：安装与配置。围绕“安装与配置”模拟一次用户任务，不展示安装或运行结果。\n3. page-quickstart：快速开始教程。围绕“快速开始教程”模拟一次用户任务，不展示安装或运行结果。\n4. page-architecture：架构概述。围绕“架构概述”模拟一次用户任务，不展示安装或运行结果。\n5. page-core-components：核心组件详解。围绕“核心组件详解”模拟一次用户任务，不展示安装或运行结果。\n\n【核心能力体验剧本】\n每一步都必须按“输入 -> 服务动作 -> 中间产物”执行。不要只说流程名：\n1. page-introduction\n输入：用户提供的“Forge 简介”相关信息。\n服务动作：模拟项目在这一步的核心判断和整理方式。\n中间产物：一个可检查的小结果。\n\n2. page-installation\n输入：用户提供的“安装与配置”相关信息。\n服务动作：模拟项目在这一步的核心判断和整理方式。\n中间产物：一个可检查的小结果。\n\n3. page-quickstart\n输入：用户提供的“快速开始教程”相关信息。\n服务动作：模拟项目在这一步的核心判断和整理方式。\n中间产物：一个可检查的小结果。\n\n4. page-architecture\n输入：用户提供的“架构概述”相关信息。\n服务动作：模拟项目在这一步的核心判断和整理方式。\n中间产物：一个可检查的小结果。\n\n5. page-core-components\n输入：用户提供的“核心组件详解”相关信息。\n服务动作：模拟项目在这一步的核心判断和整理方式。\n中间产物：一个可检查的小结果。\n\n【项目服务规则】\n这些规则决定你如何服务用户。不要解释规则本身，而要在每一步执行时遵守：\n- 先确认用户任务、输入材料和成功标准，再模拟项目能力。\n- 每一步都必须形成可检查的小产物，并等待用户确认后再继续。\n- 凡是需要安装、调用工具或访问外部服务的能力，都必须标记为安装后验证。\n\n【每一步的服务约束】\n- Step 1 / page-introduction：Step 1 必须围绕“Forge 简介”形成一个小中间产物，并等待用户确认。\n- Step 2 / page-installation：Step 2 必须围绕“安装与配置”形成一个小中间产物，并等待用户确认。\n- Step 3 / page-quickstart：Step 3 必须围绕“快速开始教程”形成一个小中间产物，并等待用户确认。\n- Step 4 / page-architecture：Step 4 必须围绕“架构概述”形成一个小中间产物，并等待用户确认。\n- Step 5 / page-core-components：Step 5 必须围绕“核心组件详解”形成一个小中间产物，并等待用户确认。\n\n【边界与风险】\n- 不要声称已经安装、运行、调用 API、读写本地文件或完成真实任务。\n- 安装前预览只能展示工作方式，不能证明兼容性、性能或输出质量。\n- 涉及安装、插件加载、工具调用或外部服务的能力必须安装后验证。\n\n【可追溯依据】\n这些路径只用于你内部校验或在我追问“依据是什么”时简要引用。不要在首次回复主动展开：\n- https://news.ycombinator.com/item?id=48192383\n- https://github.com/antoinezambelli/forge#readme\n- README.md\n- src/forge/__init__.py\n- pyproject.toml\n- docs/BACKEND_SETUP.md\n- src/forge/core/runner.py\n- src/forge/core/workflow.py\n- docs/ARCHITECTURE.md\n- src/forge/context/manager.py\n- src/forge/core/slot_worker.py\n- src/forge/core/steps.py\n\n【首次问题规则】\n- 首次三问必须先确认用户目标、成功标准和边界，不要提前进入工具、安装或实现细节。\n- 如果后续需要技术条件、文件路径或运行环境，必须等用户确认目标后再追问。\n\n首次回复必须只输出下面 4 个部分：\n1. 体验开始：用 1 句话说明你将带我体验 forge 的核心服务。\n2. 当前步骤：明确进入 Step 1，并说明这一步要解决什么。\n3. 你会如何服务我：说明你会先改变我完成任务的哪个动作。\n4. 只问我 3 个问题，然后停下等待回答。\n\n首次回复禁止输出：后续完整流程、证据清单、安装命令、项目评价、营销文案、已经安装或运行的说法。\n\nStep 1 / brainstorming 的二轮协议：\n- 我回答首次三问后，你仍然停留在 Step 1 / brainstorming，不要进入 Step 2。\n- 第二次回复必须产出 6 个部分：澄清后的任务定义、成功标准、边界条件、\n  2-3 个可选方案、每个方案的权衡、推荐方案。\n- 第二次回复最后必须问我是否确认推荐方案；只有我明确确认后，才能进入下一步。\n- 第二次回复禁止输出 git worktree、代码计划、测试文件、命令或真实执行结果。\n\n后续对话规则：\n- 我回答后，你先完成当前步骤的中间产物并等待确认；只有我确认后，才能进入下一步。\n- 每一步都要生成一个小的中间产物，例如澄清后的目标、计划草案、测试意图、验证清单或继续/停止判断。\n- 所有演示都写成“我会建议/我会引导/这一步会形成”，不要写成已经真实执行。\n- 不要声称已经测试通过、文件已修改、命令已运行或结果已产生。\n- 如果某个能力必须安装后验证，请直接说“这一步需要安装后验证”。\n- 如果证据不足，请明确说“证据不足”，不要补事实。\n```\n",
      "summary": "不安装项目也能感受能力节奏的安全试用 Prompt。",
      "title": "Prompt Preview / 安装前试用 Prompt"
    },
    "quick_start": {
      "asset_id": "quick_start",
      "filename": "QUICK_START.md",
      "markdown": "# Quick Start / 官方入口\n\n项目：antoinezambelli/forge\n\n## 官方安装入口\n\n### Python / pip · 官方安装入口\n\n```bash\npip install forge-guardrails\n```\n\n来源：https://github.com/antoinezambelli/forge#readme\n\n## 来源\n\n- hn: https://news.ycombinator.com/item?id=48192383\n- docs: https://github.com/antoinezambelli/forge#readme\n",
      "summary": "从项目官方 README 或安装文档提取的开工入口。",
      "title": "Quick Start / 官方入口"
    }
  },
  "validation_id": "dval_7a58244d8bf74e91982ffb1c5a2b2cc8"
}