{
  "canonical_name": "deepset-ai/haystack",
  "compilation_id": "pack_26d2ef92121c4038befbe9df60f8b1ca",
  "created_at": "2026-05-22T04:11:46.476636+00:00",
  "created_by": "project-pack-compiler",
  "feedback": {
    "carrier_selection_notes": [
      "viable_asset_types=skill, recipe, host_instruction, eval, preflight",
      "recommended_asset_types=skill, recipe, host_instruction, eval, preflight"
    ],
    "evidence_delta": {
      "confirmed_claims": [
        "identity_anchor_present",
        "capability_and_host_targets_present",
        "install_path_declared_or_better"
      ],
      "missing_required_fields": [],
      "must_verify_forwarded": [
        "Run or inspect `pip install haystack-ai` in an isolated environment.",
        "Confirm the project exposes the claimed capability to at least one target host."
      ],
      "quickstart_execution_scope": "allowlisted_sandbox_smoke",
      "sandbox_command": "pip install haystack-ai",
      "sandbox_container_image": "python:3.12-slim",
      "sandbox_execution_backend": "docker",
      "sandbox_planner_decision": "deterministic_isolated_install",
      "sandbox_validation_id": "sbx_d336b800b998467f9068e37341398e57"
    },
    "feedback_event_type": "project_pack_compilation_feedback",
    "learning_candidate_reasons": [],
    "template_gaps": []
  },
  "identity": {
    "canonical_id": "project_d505f37aaa8c7179e18d6cbd98d466e2",
    "canonical_name": "deepset-ai/haystack",
    "homepage_url": null,
    "license": "unknown",
    "repo_url": "https://github.com/deepset-ai/haystack",
    "slug": "haystack",
    "source_packet_id": "phit_bff911166c5e4405a736f9977aba6bd4",
    "source_validation_id": "dval_2f942674156344b28c4ea6516b388287"
  },
  "merchandising": {
    "best_for": "需要信息检索与知识管理能力，并使用 local_cli的用户",
    "github_forks": 2776,
    "github_stars": 25142,
    "one_liner_en": "Open-source AI orchestration framework for building context-engineered, production-ready LLM applications. Design modular pipelines and agent workflows with explicit control over retrieval, routing, memory, and generation. Built for scalable agents, RAG, multimodal applications, semantic search, and conversational systems.",
    "one_liner_zh": "Open-source AI orchestration framework for building context-engineered, production-ready LLM applications. Design modular pipelines and agent workflows with explicit control over retrieval, routing, memory, and generation. Built for scalable agents, RAG, multimodal applications, semantic search, and conversational systems.",
    "primary_category": {
      "category_id": "research-knowledge",
      "confidence": "high",
      "name_en": "Research & Knowledge",
      "name_zh": "信息检索与知识管理",
      "reason": "matched_keywords:rag, retrieval, search"
    },
    "target_user": "使用 local_cli 等宿主 AI 的用户",
    "title_en": "haystack",
    "title_zh": "haystack 能力包",
    "visible_tags": [
      {
        "label_en": "Security & Permissions",
        "label_zh": "安全审查与权限治理",
        "source": "repo_evidence_project_characteristics",
        "tag_id": "product_domain-security-permissions",
        "type": "product_domain"
      },
      {
        "label_en": "Knowledge Base Q&A",
        "label_zh": "知识库问答",
        "source": "repo_evidence_project_characteristics",
        "tag_id": "user_job-knowledge-base-q-a",
        "type": "user_job"
      },
      {
        "label_en": "Structured Data Extraction",
        "label_zh": "结构化数据提取",
        "source": "repo_evidence_project_characteristics",
        "tag_id": "core_capability-structured-data-extraction",
        "type": "core_capability"
      },
      {
        "label_en": "Multi-role Workflow",
        "label_zh": "多角色协作流程",
        "source": "repo_evidence_project_characteristics",
        "tag_id": "workflow_pattern-multi-role-workflow",
        "type": "workflow_pattern"
      },
      {
        "label_en": "Open Source Tool",
        "label_zh": "开源工具",
        "source": "repo_evidence_project_characteristics",
        "tag_id": "selection_signal-open-source-tool",
        "type": "selection_signal"
      }
    ]
  },
  "packet_id": "phit_bff911166c5e4405a736f9977aba6bd4",
  "page_model": {
    "artifacts": {
      "artifact_slug": "haystack",
      "files": [
        "PROJECT_PACK.json",
        "QUICK_START.md",
        "PROMPT_PREVIEW.md",
        "HUMAN_MANUAL.md",
        "AI_CONTEXT_PACK.md",
        "BOUNDARY_RISK_CARD.md",
        "PITFALL_LOG.md",
        "REPO_INSPECTION.json",
        "REPO_INSPECTION.md",
        "CAPABILITY_CONTRACT.json",
        "EVIDENCE_INDEX.json",
        "CLAIM_GRAPH.json"
      ],
      "required_files": [
        "PROJECT_PACK.json",
        "QUICK_START.md",
        "PROMPT_PREVIEW.md",
        "HUMAN_MANUAL.md",
        "AI_CONTEXT_PACK.md",
        "BOUNDARY_RISK_CARD.md",
        "PITFALL_LOG.md",
        "REPO_INSPECTION.json"
      ]
    },
    "detail": {
      "capability_source": "Project Hit Packet + DownstreamValidationResult",
      "commands": [
        {
          "command": "pip install haystack-ai",
          "label": "Python / pip · 官方安装入口",
          "source": "https://github.com/deepset-ai/haystack#readme",
          "verified": true
        }
      ],
      "display_tags": [
        "安全审查与权限治理",
        "知识库问答",
        "结构化数据提取",
        "多角色协作流程",
        "开源工具"
      ],
      "eyebrow": "信息检索与知识管理",
      "glance": [
        {
          "body": "判断自己是不是目标用户。",
          "label": "最适合谁",
          "value": "需要信息检索与知识管理能力，并使用 local_cli的用户"
        },
        {
          "body": "先理解能力边界，再决定是否继续。",
          "label": "核心价值",
          "value": "Open-source AI orchestration framework for building context-engineered, production-ready LLM applications. Design modular pipelines and agent workflows with explicit control over retrieval, routing, memory, and generation. Built for scalable agents, RAG, multimodal applications, semantic search, and conversational systems."
        },
        {
          "body": "未完成验证前保持审慎。",
          "label": "继续前",
          "value": "publish to Doramagic.ai project surfaces"
        }
      ],
      "guardrail_source": "Boundary & Risk Card",
      "guardrails": [
        {
          "body": "Prompt Preview 只展示流程，不证明项目已安装或运行。",
          "label": "Check 1",
          "value": "不要把试用当真实运行"
        },
        {
          "body": "local_cli",
          "label": "Check 2",
          "value": "确认宿主兼容"
        },
        {
          "body": "publish to Doramagic.ai project surfaces",
          "label": "Check 3",
          "value": "先隔离验证"
        }
      ],
      "mode": "skill, recipe, host_instruction, eval, preflight",
      "pitfall_log": {
        "items": [
          {
            "body": "GitHub 社区证据显示该项目存在一个安装相关的待验证问题：RFC: Signed receipts for Haystack pipeline component calls",
            "category": "安装坑",
            "evidence": [
              "community_evidence:github | cevd_192c840953e54837869723f54ccfdd1a | https://github.com/deepset-ai/haystack/issues/11039 | 来源讨论提到 python 相关条件，需在安装/试用前复核。"
            ],
            "severity": "high",
            "suggested_check": "来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。",
            "title": "来源证据：RFC: Signed receipts for Haystack pipeline component calls",
            "user_impact": "可能增加新用户试用和生产接入成本。"
          },
          {
            "body": "GitHub 社区证据显示该项目存在一个安装相关的待验证问题：feat: Add `run_async` to `MultiQueryEmbeddingRetriever`, `MultiQueryTextRetriever`, and `TextEmbeddingRetriever`",
            "category": "安装坑",
            "evidence": [
              "community_evidence:github | cevd_4b8f3323f54c4fd6b8de4e2d466cfe8b | https://github.com/deepset-ai/haystack/issues/11358 | 来源讨论提到 python 相关条件，需在安装/试用前复核。"
            ],
            "severity": "high",
            "suggested_check": "来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。",
            "title": "来源证据：feat: Add `run_async` to `MultiQueryEmbeddingRetriever`, `MultiQueryTextRetriever`, and `TextEmbeddingRetriever`",
            "user_impact": "可能阻塞安装或首次运行。"
          },
          {
            "body": "GitHub 社区证据显示该项目存在一个安装相关的待验证问题：feat: add INTERSECTION join mode to DocumentJoiner",
            "category": "安装坑",
            "evidence": [
              "community_evidence:github | cevd_00757f9859234e9cab8f8d4ce4f3e771 | https://github.com/deepset-ai/haystack/issues/11365 | 来源类型 github_issue 暴露的待验证使用条件。"
            ],
            "severity": "high",
            "suggested_check": "来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。",
            "title": "来源证据：feat: add INTERSECTION join mode to DocumentJoiner",
            "user_impact": "可能增加新用户试用和生产接入成本。"
          },
          {
            "body": "GitHub 社区证据显示该项目存在一个维护/版本相关的待验证问题：docs: Update Ragas docs",
            "category": "维护坑",
            "evidence": [
              "community_evidence:github | cevd_3204fffa09664d9f8553be2a3008f270 | https://github.com/deepset-ai/haystack/issues/11178 | 来源类型 github_issue 暴露的待验证使用条件。"
            ],
            "severity": "high",
            "suggested_check": "来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。",
            "title": "来源证据：docs: Update Ragas docs",
            "user_impact": "可能影响升级、迁移或版本选择。"
          },
          {
            "body": "GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：EnvVarSecrets: add multi-tenant context support (ContextVar / pipeline-run context)",
            "category": "安全/权限坑",
            "evidence": [
              "community_evidence:github | cevd_8f72793700a1416891c2eedddc379129 | https://github.com/deepset-ai/haystack/issues/11366 | 来源讨论提到 python 相关条件，需在安装/试用前复核。"
            ],
            "severity": "high",
            "suggested_check": "来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。",
            "title": "来源证据：EnvVarSecrets: add multi-tenant context support (ContextVar / pipeline-run context)",
            "user_impact": "可能影响升级、迁移或版本选择。"
          },
          {
            "body": "GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：Security: OWASP Agent Memory Guard for pipeline memory poisoning defense",
            "category": "安全/权限坑",
            "evidence": [
              "community_evidence:github | cevd_4f0868673100472fb74d831b5a04735f | https://github.com/deepset-ai/haystack/issues/11311 | 来源讨论提到 python 相关条件，需在安装/试用前复核。"
            ],
            "severity": "high",
            "suggested_check": "来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。",
            "title": "来源证据：Security: OWASP Agent Memory Guard for pipeline memory poisoning defense",
            "user_impact": "可能阻塞安装或首次运行。"
          },
          {
            "body": "GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：feat: support token-based budget in LostInTheMiddleRanker",
            "category": "安全/权限坑",
            "evidence": [
              "community_evidence:github | cevd_7ad00787309c442eb497b10879fb3b28 | https://github.com/deepset-ai/haystack/issues/11351 | 来源类型 github_issue 暴露的待验证使用条件。"
            ],
            "severity": "high",
            "suggested_check": "来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。",
            "title": "来源证据：feat: support token-based budget in LostInTheMiddleRanker",
            "user_impact": "可能影响授权、密钥配置或安全边界。"
          },
          {
            "body": "Developers should check this installation risk before relying on the project: Proposal: Transaction Protocol for idempotent, auditable agent pipelines",
            "category": "安装坑",
            "evidence": [
              "failure_mode_cluster:github_issue | fmev_58038e9b6373edf9376049b42d4b7bb4 | https://github.com/deepset-ai/haystack/issues/11266 | Proposal: Transaction Protocol for idempotent, auditable agent pipelines"
            ],
            "severity": "medium",
            "suggested_check": "Before packaging this project, run the relevant install/config/quickstart check for: Proposal: Transaction Protocol for idempotent, auditable agent pipelines. Context: Observed when using python",
            "title": "失败模式：installation: Proposal: Transaction Protocol for idempotent, auditable agent pipelines",
            "user_impact": "Developers may fail before the first successful local run: Proposal: Transaction Protocol for idempotent, auditable agent pipelines"
          },
          {
            "body": "Developers should check this installation risk before relying on the project: RFC: Signed receipts for Haystack pipeline component calls",
            "category": "安装坑",
            "evidence": [
              "failure_mode_cluster:github_issue | fmev_ce0b9c65d21126dcf11ede12120e154f | https://github.com/deepset-ai/haystack/issues/11039 | RFC: Signed receipts for Haystack pipeline component calls"
            ],
            "severity": "medium",
            "suggested_check": "Before packaging this project, run the relevant install/config/quickstart check for: RFC: Signed receipts for Haystack pipeline component calls. Context: Observed when using node, python",
            "title": "失败模式：installation: RFC: Signed receipts for Haystack pipeline component calls",
            "user_impact": "Developers may fail before the first successful local run: RFC: Signed receipts for Haystack pipeline component calls"
          },
          {
            "body": "Developers should check this installation risk before relying on the project: Security: OWASP Agent Memory Guard for pipeline memory poisoning defense",
            "category": "安装坑",
            "evidence": [
              "failure_mode_cluster:github_issue | fmev_4d3276b6b9938595cb2dbb864a5509da | https://github.com/deepset-ai/haystack/issues/11311 | Security: OWASP Agent Memory Guard for pipeline memory poisoning defense"
            ],
            "severity": "medium",
            "suggested_check": "Before packaging this project, run the relevant install/config/quickstart check for: Security: OWASP Agent Memory Guard for pipeline memory poisoning defense. Context: Observed when using python",
            "title": "失败模式：installation: Security: OWASP Agent Memory Guard for pipeline memory poisoning defense",
            "user_impact": "Developers may fail before the first successful local run: Security: OWASP Agent Memory Guard for pipeline memory poisoning defense"
          },
          {
            "body": "Developers should check this installation risk before relying on the project: [FEATURE] Support for code syntax-aware Document Splitters",
            "category": "安装坑",
            "evidence": [
              "failure_mode_cluster:github_issue | fmev_997b84068ae32409b1d8d55daaddd984 | https://github.com/deepset-ai/haystack/issues/11354 | [FEATURE] Support for code syntax-aware Document Splitters"
            ],
            "severity": "medium",
            "suggested_check": "Before packaging this project, run the relevant install/config/quickstart check for: [FEATURE] Support for code syntax-aware Document Splitters. Context: Observed when using python",
            "title": "失败模式：installation: [FEATURE] Support for code syntax-aware Document Splitters",
            "user_impact": "Developers may fail before the first successful local run: [FEATURE] Support for code syntax-aware Document Splitters"
          },
          {
            "body": "GitHub 社区证据显示该项目存在一个安装相关的待验证问题：MCP Server for Haystack docs",
            "category": "安装坑",
            "evidence": [
              "community_evidence:github | cevd_398390cf2fcd41d589dd5614a3bc646d | https://github.com/deepset-ai/haystack/issues/11346 | 来源类型 github_issue 暴露的待验证使用条件。"
            ],
            "severity": "medium",
            "suggested_check": "来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。",
            "title": "来源证据：MCP Server for Haystack docs",
            "user_impact": "可能增加新用户试用和生产接入成本。"
          },
          {
            "body": "GitHub 社区证据显示该项目存在一个安装相关的待验证问题：[FEATURE] Support for code syntax-aware Document Splitters",
            "category": "安装坑",
            "evidence": [
              "community_evidence:github | cevd_76b3b1b8eae94593a2cd248d0ec55e2a | https://github.com/deepset-ai/haystack/issues/11354 | 来源讨论提到 python 相关条件，需在安装/试用前复核。"
            ],
            "severity": "medium",
            "suggested_check": "来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。",
            "title": "来源证据：[FEATURE] Support for code syntax-aware Document Splitters",
            "user_impact": "可能阻塞安装或首次运行。"
          },
          {
            "body": "GitHub 社区证据显示该项目存在一个安装相关的待验证问题：v2.25.2",
            "category": "安装坑",
            "evidence": [
              "community_evidence:github | cevd_55d8aef5d1c3417ba9bdf05c0f5a3053 | https://github.com/deepset-ai/haystack/releases/tag/v2.25.2 | 来源类型 github_release 暴露的待验证使用条件。"
            ],
            "severity": "medium",
            "suggested_check": "来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。",
            "title": "来源证据：v2.25.2",
            "user_impact": "可能增加新用户试用和生产接入成本。"
          },
          {
            "body": "GitHub 社区证据显示该项目存在一个安装相关的待验证问题：v2.26.0",
            "category": "安装坑",
            "evidence": [
              "community_evidence:github | cevd_d73f121017b64b04a8ad885da241fc6f | https://github.com/deepset-ai/haystack/releases/tag/v2.26.0 | 来源讨论提到 python 相关条件，需在安装/试用前复核。"
            ],
            "severity": "medium",
            "suggested_check": "来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。",
            "title": "来源证据：v2.26.0",
            "user_impact": "可能增加新用户试用和生产接入成本。"
          },
          {
            "body": "GitHub 社区证据显示该项目存在一个安装相关的待验证问题：v2.28.0",
            "category": "安装坑",
            "evidence": [
              "community_evidence:github | cevd_d9746a9178f0445d853c95cbb4a5241b | https://github.com/deepset-ai/haystack/releases/tag/v2.28.0 | 来源讨论提到 python 相关条件，需在安装/试用前复核。"
            ],
            "severity": "medium",
            "suggested_check": "来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。",
            "title": "来源证据：v2.28.0",
            "user_impact": "可能影响升级、迁移或版本选择。"
          }
        ],
        "source": "ProjectPitfallLog + ProjectHitPacket + validation + community signals",
        "summary": "发现 38 个潜在踩坑项，其中 7 个为 high/blocking；最高优先级：安装坑 - 来源证据：RFC: Signed receipts for Haystack pipeline component calls。",
        "title": "踩坑日志"
      },
      "snapshot": {
        "contributors": 354,
        "forks": 2776,
        "license": "unknown",
        "note": "站点快照，非实时质量证明；用于开工前背景判断。",
        "stars": 25142
      },
      "source_url": "https://github.com/deepset-ai/haystack",
      "steps": [
        {
          "body": "不安装项目，先体验能力节奏。",
          "code": "preview",
          "title": "先试 Prompt"
        },
        {
          "body": "理解输入、输出、失败模式和边界。",
          "code": "manual",
          "title": "读说明书"
        },
        {
          "body": "把上下文交给宿主 AI 继续工作。",
          "code": "context",
          "title": "带给 AI"
        },
        {
          "body": "进入主力环境前先完成安装入口与风险边界验证。",
          "code": "verify",
          "title": "沙箱验证"
        }
      ],
      "subtitle": "Open-source AI orchestration framework for building context-engineered, production-ready LLM applications. Design modular pipelines and agent workflows with explicit control over retrieval, routing, memory, and generation. Built for scalable agents, RAG, multimodal applications, semantic search, and conversational systems.",
      "title": "haystack 能力包",
      "trial_prompt": "# haystack - Prompt Preview\n\n> Copy the prompt below into your AI host before installing anything.\n> Its purpose is to let you safely feel the project's workflow, not to claim the project has already run.\n\n## Copy this prompt\n\n```text\nYou are using an independent Doramagic capability pack for deepset-ai/haystack.\n\nProject:\n- Name: haystack\n- Repository: https://github.com/deepset-ai/haystack\n- Summary: Open-source AI orchestration framework for building context-engineered, production-ready LLM applications. Design modular pipelines and agent workflows with explicit control over retrieval, routing, memory, and generation. Built for scalable agents, RAG, multimodal applications, semantic search, and conversational systems.\n- Host target: local_cli\n\nGoal:\nHelp me evaluate this project for the following task without installing it yet: Open-source AI orchestration framework for building context-engineered, production-ready LLM applications. Design modular pipelines and agent workflows with explicit control over retrieval, routing, memory, and generation. Built for scalable agents, RAG, multimodal applications, semantic search, and conversational systems.\n\nBefore taking action:\n1. Restate my task, success standard, and boundary.\n2. Identify whether the next step requires tools, browser access, network access, filesystem access, credentials, package installation, or host configuration.\n3. Use only the Doramagic Project Pack, the upstream repository, and the source-linked evidence listed below.\n4. If a real command, install step, API call, file write, or host integration is required, mark it as \"requires post-install verification\" and ask for approval first.\n5. If evidence is missing, say \"evidence is missing\" instead of filling the gap.\n\nPreviewable capabilities:\n- Capability 1: Open-source AI orchestration framework for building context-engineered, production-ready LLM applications. Design modular pipelines and agent workflows with explicit control over retrieval, routing, memory, and generation. Built for scalable agents, RAG, multimodal applications, semantic search, and conversational systems.\n\nCapabilities that require post-install verification:\n- Capability 1: Use the source-backed project context to guide one small, checkable workflow step.\n\nCore service flow:\n1. introduction: Introduction to Haystack. Produce one small intermediate artifact and wait for confirmation.\n2. pipeline-architecture: Pipeline Architecture. Produce one small intermediate artifact and wait for confirmation.\n3. core-concepts: Core Concepts. Produce one small intermediate artifact and wait for confirmation.\n4. component-types: Pipeline Component Types. Produce one small intermediate artifact and wait for confirmation.\n5. llm-integrations: LLM and Embedder Integrations. Produce one small intermediate artifact and wait for confirmation.\n\nSource-backed evidence to keep in mind:\n- https://github.com/deepset-ai/haystack\n- https://github.com/deepset-ai/haystack#readme\n- README.md\n- AGENTS.md\n- VERSION.txt\n- docs-website/docs/concepts/pipelines.mdx\n- docs-website/docs/concepts/pipelines/asyncpipeline.mdx\n- docs-website/docs/concepts/pipelines/serialization.mdx\n- docs-website/docs/concepts/pipelines/debugging-pipelines.mdx\n- docs-website/docs/concepts/pipelines/pipeline-breakpoints.mdx\n\nFirst response rules:\n1. Start Step 1 only.\n2. Explain the one service action you will perform first.\n3. Ask exactly three questions about my target workflow, success standard, and sandbox boundary.\n4. Stop and wait for my answers.\n\nStep 1 follow-up protocol:\n- After I answer the first three questions, stay in Step 1.\n- Produce six parts only: clarified task, success standard, boundary conditions, two or three options, tradeoffs for each option, and one recommendation.\n- End by asking whether I confirm the recommendation.\n- Do not move to Step 2 until I explicitly confirm.\n\nConversation rules:\n- Advance one step at a time and wait for confirmation after each small artifact.\n- Write outputs as recommendations or planned checks, not as completed execution.\n- Do not claim tests passed, files changed, commands ran, APIs were called, or the project was installed.\n- If the user asks for execution, first provide the sandbox setup, expected output, rollback, and approval checkpoint.\n```\n",
      "voices": [
        {
          "body": "来源平台：github。github/github_issue: EnvVarSecrets: add multi-tenant context support (ContextVar / pipeline-r（https://github.com/deepset-ai/haystack/issues/11366）；github/github_issue: feat: add INTERSECTION join mode to DocumentJoiner（https://github.com/deepset-ai/haystack/issues/11365）；github/github_issue: DocumentJoiner concatenate mode incorrectly drops documents with score=0（https://github.com/deepset-ai/haystack/issues/11352）；github/github_issue: feat: Add `run_async` to `MultiQueryEmbeddingRetriever`, `MultiQueryText（https://github.com/deepset-ai/haystack/issues/11358）；github/github_issue: MCP Server for Haystack docs（https://github.com/deepset-ai/haystack/issues/11346）；github/github_issue: feat: Add `run_async` to `MultiQueryEmbeddingRetriever`, `MultiQueryText（https://github.com/deepset-ai/haystack/issues/11358）；github/github_issue: RFC: Signed receipts for Haystack pipeline component calls（https://github.com/deepset-ai/haystack/issues/11039）；github/github_issue: [FEATURE] Support for code syntax-aware Document Splitters（https://github.com/deepset-ai/haystack/issues/11354）；github/github_issue: Security: OWASP Agent Memory Guard for pipeline memory poisoning defense（https://github.com/deepset-ai/haystack/issues/11311）；github/github_issue: DocumentJoiner concatenate mode incorrectly drops documents with score=0（https://github.com/deepset-ai/haystack/issues/11352）；github/github_issue: feat: support token-based budget in LostInTheMiddleRanker（https://github.com/deepset-ai/haystack/issues/11351）；github/github_issue: MCP Server for Haystack docs（https://github.com/deepset-ai/haystack/issues/11346）。这些是项目级外部声音，不作为单独质量证明。",
          "items": [
            {
              "kind": "github_issue",
              "source": "github",
              "title": "EnvVarSecrets: add multi-tenant context support (ContextVar / pipeline-r",
              "url": "https://github.com/deepset-ai/haystack/issues/11366"
            },
            {
              "kind": "github_issue",
              "source": "github",
              "title": "feat: add INTERSECTION join mode to DocumentJoiner",
              "url": "https://github.com/deepset-ai/haystack/issues/11365"
            },
            {
              "kind": "github_issue",
              "source": "github",
              "title": "DocumentJoiner concatenate mode incorrectly drops documents with score=0",
              "url": "https://github.com/deepset-ai/haystack/issues/11352"
            },
            {
              "kind": "github_issue",
              "source": "github",
              "title": "feat: Add `run_async` to `MultiQueryEmbeddingRetriever`, `MultiQueryText",
              "url": "https://github.com/deepset-ai/haystack/issues/11358"
            },
            {
              "kind": "github_issue",
              "source": "github",
              "title": "MCP Server for Haystack docs",
              "url": "https://github.com/deepset-ai/haystack/issues/11346"
            },
            {
              "kind": "github_issue",
              "source": "github",
              "title": "feat: Add `run_async` to `MultiQueryEmbeddingRetriever`, `MultiQueryText",
              "url": "https://github.com/deepset-ai/haystack/issues/11358"
            },
            {
              "kind": "github_issue",
              "source": "github",
              "title": "RFC: Signed receipts for Haystack pipeline component calls",
              "url": "https://github.com/deepset-ai/haystack/issues/11039"
            },
            {
              "kind": "github_issue",
              "source": "github",
              "title": "[FEATURE] Support for code syntax-aware Document Splitters",
              "url": "https://github.com/deepset-ai/haystack/issues/11354"
            },
            {
              "kind": "github_issue",
              "source": "github",
              "title": "Security: OWASP Agent Memory Guard for pipeline memory poisoning defense",
              "url": "https://github.com/deepset-ai/haystack/issues/11311"
            },
            {
              "kind": "github_issue",
              "source": "github",
              "title": "DocumentJoiner concatenate mode incorrectly drops documents with score=0",
              "url": "https://github.com/deepset-ai/haystack/issues/11352"
            },
            {
              "kind": "github_issue",
              "source": "github",
              "title": "feat: support token-based budget in LostInTheMiddleRanker",
              "url": "https://github.com/deepset-ai/haystack/issues/11351"
            },
            {
              "kind": "github_issue",
              "source": "github",
              "title": "MCP Server for Haystack docs",
              "url": "https://github.com/deepset-ai/haystack/issues/11346"
            }
          ],
          "status": "已收录 12 条来源",
          "title": "社区讨论"
        }
      ]
    },
    "homepage_card": {
      "category": "信息检索与知识管理",
      "desc": "Open-source AI orchestration framework for building context-engineered, production-ready LLM applications. Design modular pipelines and agent workflows with explicit control over retrieval, routing, memory, and generation. Built for scalable agents, RAG, multimodal applications, semantic search, and conversational systems.",
      "effort": "安装已验证",
      "forks": 2776,
      "icon": "search",
      "name": "haystack 能力包",
      "risk": "可发布",
      "slug": "haystack",
      "stars": 25142,
      "tags": [
        "安全审查与权限治理",
        "知识库问答",
        "结构化数据提取",
        "多角色协作流程",
        "开源工具"
      ],
      "thumb": "blue",
      "type": "Skill Pack"
    },
    "manual": {
      "markdown": "# https://github.com/deepset-ai/haystack 项目说明书\n\n生成时间：2026-05-15 20:17:22 UTC\n\n## 目录\n\n- [Introduction to Haystack](#introduction)\n- [Pipeline Architecture](#pipeline-architecture)\n- [Core Concepts](#core-concepts)\n- [Pipeline Component Types](#component-types)\n- [Data Processing Components](#data-processing)\n- [LLM and Embedder Integrations](#llm-integrations)\n- [Document Stores and Retrievers](#document-stores)\n- [Agent Systems](#agents)\n- [Development Guide](#development-guide)\n- [Deployment and Infrastructure](#deployment)\n\n<a id='introduction'></a>\n\n## Introduction to Haystack\n\n### 相关页面\n\n相关主题：[Pipeline Architecture](#pipeline-architecture), [Core Concepts](#core-concepts)\n\n<details>\n<summary>Relevant Source Files</summary>\n\n以下源码文件用于生成本页说明：\n\n- [README.md](https://github.com/deepset-ai/haystack/blob/main/README.md)\n- [docs-website/README.md](https://github.com/deepset-ai/haystack/blob/main/docs-website/README.md)\n- [docker/README.md](https://github.com/deepset-ai/haystack/blob/main/docker/README.md)\n- [pydoc/README.md](https://github.com/deepset-ai/haystack/blob/main/pydoc/README.md)\n- [examples/README.md](https://github.com/deepset-ai/haystack/blob/main/examples/README.md)\n</details>\n\n# Introduction to Haystack\n\nHaystack is an end-to-end LLM framework that enables developers to build applications powered by Large Language Models (LLMs), Transformer models, vector search, and more. The framework provides a flexible architecture for orchestrating state-of-the-art embedding models and LLMs into pipelines to solve real-world NLP use cases.\n\n## What is Haystack?\n\nHaystack is designed to facilitate the development of production-ready AI applications with a focus on **context engineering**—giving developers explicit control over how information is retrieved, ranked, filtered, combined, structured, and routed before it reaches the language model.\n\n资料来源：[README.md:1]()()\n\n### Core Capabilities\n\n| Capability | Description |\n|------------|-------------|\n| **Retrieval-Augmented Generation (RAG)** | Combine vector search with LLMs for accurate, context-grounded responses |\n| **Document Search** | Full-featured document indexing and semantic search |\n| **Question Answering** | Extract answers from large document collections |\n| **Pipeline Orchestration** | Build complex workflows with customizable components |\n| **Agent Integration** | Deploy autonomous agents with tool-use capabilities |\n\n资料来源：[docker/README.md:4-6]()()\n\n## Architecture Overview\n\nHaystack follows a component-based architecture where pipelines serve as the foundational building blocks. Pipelines connect various components including document stores, retrievers, readers, generators, and custom tools.\n\n```mermaid\ngraph TD\n    A[User Query] --> B[Pipeline]\n    B --> C[Retrievers]\n    B --> D[Document Stores]\n    C --> E[Rankers]\n    E --> F[LLM / Generator]\n    F --> G[Response]\n    \n    H[Documents] --> D\n    \n    style F fill:#e1f5fe\n    style D fill:#fff3e0\n    style C fill:#e8f5e9\n```\n\n### Pipeline Components\n\nPipelines in Haystack are composed of interconnected nodes that process data sequentially or in parallel. Each component handles a specific stage of the document processing or inference workflow.\n\n| Component Type | Function |\n|----------------|----------|\n| **DocumentStore** | Stores and indexes documents for retrieval |\n| **Retriever** | Finds relevant documents from the store |\n| **Ranker** | Reorders retrieved documents by relevance |\n| **Reader/Generator** | Extracts answers or generates responses |\n| **Preprocessor** | Cleans and splits documents before indexing |\n| **Custom Nodes** | User-defined processing logic |\n\n资料来源：[README.md:54-58]()()\n\n## Key Features\n\n### Built for Context Engineering\n\nHaystack provides fine-grained control over the entire retrieval and generation pipeline. Developers can:\n\n- Define custom retrieval strategies\n- Implement multi-stage ranking pipelines\n- Route queries to specialized processing branches\n- Control how context is assembled before reaching the LLM\n\n### Flexible Pipeline Design\n\nThe framework supports both declarative and programmatic pipeline construction, allowing developers to define workflows through configuration files or Python code.\n\n```mermaid\ngraph LR\n    A[Query Input] --> B[Retriever Node]\n    B --> C[Ranker Node]\n    C --> D[LLM Node]\n    D --> E[Output]\n    \n    F[Documents] --> G[Document Store]\n    G --> B\n```\n\n### Production-Ready Architecture\n\nHaystack includes enterprise features such as:\n\n- **Telemetry**: Anonymous usage statistics collection for component initialization tracking (opt-out available)\n- **Container Support**: Docker images for consistent deployment environments\n- **CI/CD Integration**: Automated testing with GitHub Actions workflows\n- **Type Checking**: Full MyPy type annotation support\n\n资料来源：[README.md:60-62]()()\n\n## Installation\n\n### Package Installation\n\nThe primary method for installing Haystack is via pip:\n\n```bash\npip install haystack-ai\n```\n\nFor testing pre-release features:\n\n```bash\npip install --pre haystack-ai\n```\n\n资料来源：[README.md:28-34]()()\n\n### Docker Installation\n\nHaystack provides Docker images for containerized deployments:\n\n| Image | Description |\n|-------|-------------|\n| `haystack:base-<version>` | Base image with Haystack preinstalled for derivation |\n\nMulti-platform builds are supported for various architectures including `linux/arm64` and `linux/amd64`.\n\n```bash\ndocker buildx bake base\n```\n\n资料来源：[docker/README.md:8-14]()()\n\n## Documentation Structure\n\nThe Haystack documentation is hosted at [docs.haystack.deepset.ai](https://docs.haystack.deepset.ai) and organized into several sections:\n\n| Section | Content |\n|---------|---------|\n| **Overview/Intro** | Getting started guides and project introduction |\n| **Get Started** | Quick-start guide for building first LLM applications |\n| **Tutorials** | Step-by-step learning paths |\n| **Cookbook** | Pre-built recipes and example implementations |\n| **API Reference** | Auto-generated documentation from docstrings |\n| **Concepts** | Core architectural concepts and design patterns |\n\n资料来源：[docs-website/README.md:1-8]()()\n\n### Documentation Versioning\n\nThe documentation site supports multiple versions:\n\n- **Next (Unreleased)**: Documentation for upcoming features\n- **Current (Stable)**: Documentation for the latest stable release\n- **Past Versions**: Archived documentation for previous releases\n\n资料来源：[docs-website/src/pages/versions.js:1-25]()()\n\n### API Reference Generation\n\nThe API reference pages are automatically generated from docstrings using [haystack-pydoc-tools](https://github.com/deepset-ai/haystack-pydoc-tools). A GitHub workflow regenerates the API reference when code changes are merged.\n\n资料来源：[pydoc/README.md:1-12]()()\n\n## Project Structure\n\n```\nhaystack/\n├── haystack/                    # Main package source code\n├── docs-website/                # Docusaurus documentation site\n│   ├── docs/                    # Main documentation content\n│   ├── reference/               # Auto-generated API reference\n│   └── versioned_docs/           # Versioned documentation snapshots\n├── docker/                      # Docker image configurations\n├── pydoc/                       # PyDoc configuration files\n└── examples/                    # Example implementations\n```\n\n> **Note**: Example implementations have been moved to the [haystack-cookbook](https://github.com/deepset-ai/haystack-cookbook/) repository.\n\n资料来源：[examples/README.md:1-5]()()\n\n## Community and Contributing\n\nHaystack is open to contributions from developers of all skill levels. There are multiple ways to contribute:\n\n| Contribution Area | Repository |\n|-------------------|------------|\n| Core Framework | `deepset-ai/haystack` |\n| Integrations | `deepset-ai/haystack-core-integrations` |\n| Documentation | `deepset-ai/haystack/tree/main/docs-website` |\n\n### Community Resources\n\n- **GitHub Issues**: Bug reports and feature requests\n- **GitHub Discussions**: General questions and community support\n- **Discord**: Real-time community engagement\n- **Stack Overflow**: Tagged questions at `haystack`\n- **Twitter/X**: Updates and announcements\n\n资料来源：[README.md:89-95]()()\n\n## Organizations Using Haystack\n\nHaystack is trusted by thousands of production AI teams across industries:\n\n| Industry | Organizations |\n|----------|---------------|\n| **Technology & AI** | Apple, Meta, Databricks, NVIDIA, Intel |\n| **Public Sector** | European Commission |\n\n资料来源：[README.md:78-85]()()\n\n## Licensing and Compliance\n\n- **License**: Apache 2.0\n- **Type Checking**: MyPy validated\n- **Coverage**: Automated test coverage tracking\n- **License Compliance**: Automated workflow verification\n\n资料来源：[README.md:10-11]()()\n\n## Summary\n\nHaystack provides a comprehensive framework for building production-ready LLM applications with emphasis on retrieval-augmented generation, flexible pipeline design, and context engineering. The framework's component-based architecture enables developers to customize every stage of the document processing and inference pipeline while maintaining production-grade reliability through integrated testing, documentation, and deployment tooling.\n\nWith support for Docker containerization, comprehensive documentation, and an active open-source community, Haystack serves as a robust foundation for teams implementing enterprise AI solutions across diverse industries.\n\n---\n\n<a id='pipeline-architecture'></a>\n\n## Pipeline Architecture\n\n### 相关页面\n\n相关主题：[Introduction to Haystack](#introduction), [Pipeline Component Types](#component-types), [Core Concepts](#core-concepts)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [docs-website/docs/concepts/pipelines.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/concepts/pipelines.mdx)\n- [docs-website/docs/concepts/pipelines/asyncpipeline.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/concepts/pipelines/asyncpipeline.mdx)\n- [docs-website/docs/concepts/pipelines/serialization.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/concepts/pipelines/serialization.mdx)\n- [docs-website/docs/concepts/pipelines/debugging-pipelines.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/concepts/pipelines/debugging-pipelines.mdx)\n- [docs-website/docs/concepts/pipelines/pipeline-breakpoints.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/concepts/pipelines/pipeline-breakpoints.mdx)\n</details>\n\n# Pipeline Architecture\n\n## Overview\n\nThe Pipeline architecture is the foundational component of the Haystack framework, enabling developers to construct flexible, modular workflows for building LLM-powered applications. Pipelines orchestrate the execution of various components—including retrievers, readers, generators, and custom processors—into cohesive data processing flows.\n\nPipelines in Haystack 2.x provide a declarative approach to defining application workflows, allowing developers to:\n\n- Connect multiple components in directed acyclic graphs (DAGs)\n- Route data between components with explicit connections\n- Handle both synchronous and asynchronous execution models\n- Debug and inspect execution through breakpoints and hooks\n- Persist and share pipeline configurations through serialization\n\n资料来源：[docs-website/docs/concepts/pipelines.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/concepts/pipelines.mdx)\n\n## Core Concepts\n\n### Component Connections\n\nComponents in a Haystack Pipeline are connected through named input/output connections. Each component exposes specific input and output slots that define how data flows through the pipeline.\n\n```mermaid\ngraph LR\n    A[Document Store] -->|query results| B[Retriever]\n    B -->|retrieved docs| C[Reader]\n    C -->|answers| D[Output]\n    \n    style A fill:#e1f5fe\n    style B fill:#fff3e0\n    style C fill:#e8f5e9\n    style D fill:#fce4ec\n```\n\nThe connection model requires that:\n- Output types must be compatible with target input types\n- Components can have multiple inputs and outputs\n- Connections form a directed graph structure\n\n资料来源：[docs-website/docs/concepts/pipelines.mdx:1-20](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/concepts/pipelines.mdx)\n\n### Pipeline Types\n\nHaystack provides multiple pipeline implementations optimized for different use cases:\n\n| Pipeline Type | Use Case | Execution Model |\n|---------------|----------|-----------------|\n| Standard Pipeline | General-purpose workflows | Synchronous |\n| AsyncPipeline | High-throughput I/O operations | Asynchronous with `async/await` |\n| SearchPipeline | Retrieval-focused workflows | Optimized for search |\n| GenerativePipeline | LLM-centric applications | Optimized for generation |\n\n资料来源：[docs-website/docs/concepts/pipelines.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/concepts/pipelines.mdx)\n\n## AsyncPipeline\n\nThe AsyncPipeline extends the standard Pipeline with asynchronous execution capabilities, making it suitable for applications requiring high concurrency and non-blocking I/O operations.\n\n### Key Features\n\n- **Non-blocking execution**: Components can execute concurrently when dependencies are satisfied\n- **Streaming support**: Better handling of streaming responses from LLMs\n- **Resource efficiency**: Improved CPU and memory utilization for I/O-bound workloads\n\n```python\nasync def run_async_pipeline(pipeline, query):\n    result = await pipeline.run_async(query=query)\n    return result\n```\n\n资料来源：[docs-website/docs/concepts/pipelines/asyncpipeline.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/concepts/pipelines/asyncpipeline.mdx)\n\n### Execution Flow\n\n```mermaid\ngraph TD\n    A[Start] --> B{AsyncPipeline.run_async}\n    B --> C[Execute Independent Components]\n    C --> D{Wait for Dependencies?}\n    D -->|No| E[Collect Results]\n    D -->|Yes| F[Await Dependency]\n    F --> E\n    E --> G[Return Unified Result]\n    \n    style B fill:#bbdefb\n    style C fill:#c8e6c9\n    style G fill:#ffe0b2\n```\n\n## Serialization\n\nPipeline configurations can be serialized to YAML format, enabling:\n\n- Persistence of pipeline definitions\n- Sharing configurations across environments\n- Version control for pipeline definitions\n- Reproducible deployments\n\n### Serialization Format\n\n```yaml\nversion: '2.0'\ncomponents:\n  - name: MyRetriever\n    type: BM25Retriever\n    init_parameters:\n      document_store: MyDocumentStore\n  - name: MyReader\n    type: FARMReader\n    init_parameters:\n      model_name_or_path: deepset/roberta-base-squad2\nedges: []\n```\n\n资料来源：[docs-website/docs/concepts/pipelines/serialization.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/concepts/pipelines/serialization.mdx)\n\n### Loading Serialized Pipelines\n\n```python\nfrom haystack import Pipeline\n\n# Load from YAML\npipeline = Pipeline.load_from_yaml(path=\"pipeline_config.yaml\")\n```\n\n## Debugging Pipelines\n\nHaystack provides comprehensive debugging capabilities to inspect and troubleshoot pipeline execution.\n\n### Execution Tracing\n\nThe debugging system tracks:\n- Component execution order\n- Input/output data at each stage\n- Execution timing and performance metrics\n- Error locations and stack traces\n\n```python\nfrom haystack import Pipeline\n\npipeline = Pipeline()\npipeline.debug = True  # Enable debug mode\nresult = pipeline.run(query=\"What is Haystack?\")\n```\n\n资料来源：[docs-website/docs/concepts/pipelines/debugging-pipelines.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/concepts/pipelines/debugging-pipelines.mdx)\n\n### Pipeline Inspector\n\nThe Pipeline Inspector provides detailed visibility into:\n\n| Inspection Target | Information Provided |\n|-------------------|---------------------|\n| Component Graph | Node and edge relationships |\n| Data Flow | Input/output shapes and types |\n| Execution State | Runtime values at breakpoints |\n| Performance | Timing and memory profiles |\n\n## Pipeline Breakpoints\n\nBreakpoints allow execution to pause at specific points, enabling detailed inspection of intermediate results.\n\n```mermaid\ngraph LR\n    A[Pipeline Run] --> B{Breakpoint 1?}\n    B -->|Yes| C[Pause & Inspect]\n    C --> D{Continue?}\n    D -->|Yes| E{Breakpoint 2?}\n    D -->|No| Z[Abort]\n    E -->|Yes| F[Pause & Inspect]\n    E -->|No| G[Continue to End]\n    B -->|No| E\n    \n    style C fill:#fff9c4\n    style F fill:#fff9c4\n    style Z fill:#ffcdd2\n```\n\n### Breakpoint Configuration\n\nBreakpoints can be configured at:\n\n- **Component level**: Pause before or after specific component execution\n- **Connection level**: Inspect data flowing through specific connections\n- **Condition level**: Pause only when certain conditions are met\n\n资料来源：[docs-website/docs/concepts/pipelines/pipeline-breakpoints.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/concepts/pipelines/pipeline-breakpoints.mdx)\n\n## Best Practices\n\n### Pipeline Design\n\n1. **Modularity**: Keep components focused on single responsibilities\n2. **Clear naming**: Use descriptive names for components and connections\n3. **Error handling**: Implement proper error handling at component boundaries\n4. **Testing**: Unit test individual components before integration\n\n### Performance Optimization\n\n| Strategy | Description |\n|----------|-------------|\n| Caching | Enable caching for expensive operations |\n| Batching | Use batch processing for multiple queries |\n| Async execution | Prefer AsyncPipeline for I/O-bound workflows |\n| Resource limits | Set appropriate timeouts and memory limits |\n\n## Architecture Summary\n\n```mermaid\ngraph TD\n    subgraph \"Pipeline Layer\"\n        A[Pipeline] --> B[AsyncPipeline]\n        A --> C[SearchPipeline]\n        A --> D[GenerativePipeline]\n    end\n    \n    subgraph \"Component Layer\"\n        E[Retrievers] --> A\n        F[Readers] --> A\n        G[Generators] --> A\n        H[Custom Processors] --> A\n    end\n    \n    subgraph \"Data Layer\"\n        I[Document Stores] --> E\n        J[Models] --> F\n        J --> G\n    end\n    \n    subgraph \"Infrastructure\"\n        K[Serialization] -.-> A\n        L[Debugging] -.-> A\n        M[Breakpoints] -.-> A\n    end\n```\n\n## Related Documentation\n\n- [Components Overview](https://docs.haystack.deepset.ai/docs/intro)\n- [Pipeline Components](https://docs.haystack.deepset.ai/docs/pipeline-components)\n- [API Reference](https://docs.haystack.deepset.ai/reference/pipeline)\n- [Cookbook Examples](https://haystack.deepset.ai/cookbook)\n\n---\n\n<a id='core-concepts'></a>\n\n## Core Concepts\n\n### 相关页面\n\n相关主题：[Pipeline Architecture](#pipeline-architecture), [Pipeline Component Types](#component-types), [Introduction to Haystack](#introduction)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [README.md](https://github.com/deepset-ai/haystack/blob/main/README.md)\n- [docs-website/README.md](https://github.com/deepset-ai/haystack/blob/main/docs-website/README.md)\n- [pydoc/README.md](https://github.com/deepset-ai/haystack/blob/main/pydoc/README.md)\n- [docker/README.md](https://github.com/deepset-ai/haystack/blob/main/docker/README.md)\n- [examples/README.md](https://github.com/deepset-ai/haystack/blob/main/examples/README.md)\n- [docs-website/src/theme/SearchBar.js](https://github.com/deepset-ai/haystack/blob/main/docs-website/src/theme/SearchBar.js)\n- [docs-website/src/components/CopyDropdown/index.tsx](https://github.com/deepset-ai/haystack/blob/main/docs-website/src/components/CopyDropdown/index.tsx)\n</details>\n\n# Core Concepts\n\nHaystack is an end-to-end LLM (Large Language Model) framework that enables developers to build applications powered by LLMs, Transformer models, vector search, and more. The framework orchestrates state-of-the-art embedding models and LLMs into pipelines to solve use cases such as retrieval-augmented generation (RAG), document search, question answering, and answer generation.\n\n## What is Haystack?\n\nHaystack provides a flexible architecture for designing systems with explicit control over how information is retrieved, ranked, filtered, combined, structured, and routed before it reaches the model. The framework allows developers to define pipelines and agent workflows where retrieval, memory, tools, and other components work together seamlessly.\n\n资料来源：[README.md](https://github.com/deepset-ai/haystack/blob/main/README.md)\n\n## Architecture Overview\n\nHaystack's architecture is built around the concept of **pipelines** that orchestrate various components. These pipelines provide explicit control over the data flow from input to output, enabling developers to build complex LLM applications with fine-grained control.\n\n```mermaid\ngraph TD\n    A[Input Query] --> B[Pipeline]\n    B --> C[Components]\n    C --> D[Retrievers]\n    C --> E[Rankers]\n    C --> F[Memory]\n    C --> G[Tools]\n    D --> H[Document Store]\n    E --> I[LLM]\n    H --> J[Context Engineering]\n    I --> K[Generated Response]\n    J --> I\n```\n\n资料来源：[README.md](https://github.com/deepset-ai/haystack/blob/main/README.md)\n\n## Installation\n\nHaystack can be installed via pip using the main package:\n\n```sh\npip install haystack-ai\n```\n\nFor trying newest features, install nightly pre-releases:\n\n```sh\npip install --pre haystack-ai\n```\n\n资料来源：[README.md](https://github.com/deepset-ai/haystack/blob/main/README.md)\n\n## Docker Support\n\nHaystack provides Docker images for containerized deployments. The base image `haystack:base-<version>` contains a working Python environment with Haystack preinstalled and is designed to be derived `FROM`.\n\nImages are built with BuildKit and orchestrated using `bake`:\n\n```sh\ndocker buildx bake base\n```\n\nCustom images can be built by overriding variables defined in the `docker-bake.hcl` file:\n\n```sh\nHAYSTACK_VERSION=mybranch_or_tag BASE_IMAGE_TAG_SUFFIX=latest docker buildx bake base --no-cache\n```\n\n资料来源：[docker/README.md](https://github.com/deepset-ai/haystack/blob/main/docker/README.md)\n\n## Documentation System\n\nHaystack maintains comprehensive documentation at [docs.haystack.deepset.ai](https://docs.haystack.deepset.ai). The documentation is built with Docusaurus 3 and provides guides, tutorials, API references, and best practices.\n\n### Documentation Structure\n\n| Directory | Purpose |\n|-----------|---------|\n| `docs/` | Main documentation (guides, tutorials, concepts) |\n| `docs/concepts/` | Core Haystack concepts |\n| `docs/pipeline-components/` | Component documentation |\n| `reference/` | API reference (auto-generated) |\n| `versioned_docs/` | Versioned copies of docs |\n| `src/` | React components and custom code |\n\n资料来源：[docs-website/README.md](https://github.com/deepset-ai/haystack/blob/main/docs-website/README.md)\n\n### Versioning\n\nDocumentation versions are released alongside Haystack releases and are fully automated through GitHub workflows. The versioning process includes:\n\n- `promote_unstable_docs.yml` - Automatically triggered during Haystack releases\n- `minor_version_release.yml` - Creates new version directories and updates version configuration\n\n资料来源：[docs-website/README.md](https://github.com/deepset-ai/haystack/blob/main/docs-website/README.md)\n\n## API Reference\n\nThe API reference is generated from docstrings in the codebase using [haystack-pydoc-tools](https://github.com/deepset-ai/haystack-pydoc-tools). A GitHub workflow regenerates the API reference when code changes.\n\nTo add documentation for a new module:\n\n1. Create a `.yml` file in the `pydoc` directory\n2. Configure how haystack-pydoc-tools will generate the page\n3. Commit to main\n\nAll API reference updates are initially deployed to unstable docs and promoted to stable docs during releases.\n\n资料来源：[pydoc/README.md](https://github.com/deepset-ai/haystack/blob/main/pydoc/README.md)\n\n## Documentation Website Development\n\nThe documentation site can be run locally for development:\n\n```bash\ngit clone https://github.com/deepset-ai/haystack.git\ncd haystack/docs-website\nnpm install\nnpm start\n```\n\nThe site opens at http://localhost:3000 with live reload functionality.\n\nCommon development tasks include:\n\n- Edit a page: update files under `docs/` or `versioned_docs/`\n- Add to sidebar: update `sidebars.js` with your doc ID\n- Production check: `npm run build && npm run serve`\n\n资料来源：[docs-website/README.md](https://github.com/deepset-ai/haystack/blob/main/docs-website/README.md)\n\n## Search Functionality\n\nThe documentation website includes a custom search bar that groups results by page and sorts them by relevance score. The search system supports filtering by category and provides snippets from matching documents.\n\n### Search Architecture\n\n```mermaid\ngraph TD\n    A[User Query] --> B[Search Input]\n    B --> C[Debounced Search]\n    C --> D[Search Algorithm]\n    D --> E{Results Found?}\n    E -->|Yes| F[Group by Page]\n    E -->|No| G[No Results State]\n    F --> H[Sort by Score]\n    H --> I[Display Results]\n    G --> J[Show Error/Message]\n```\n\n资料来源：[docs-website/src/theme/SearchBar.js](https://github.com/deepset-ai/haystack/blob/main/docs-website/src/theme/SearchBar.js)\n\n## Documentation Export Features\n\nThe documentation site provides multiple ways to export and share content:\n\n| Feature | Description |\n|---------|-------------|\n| Copy as Markdown | Copy page content in Markdown format for LLMs |\n| View as Markdown | View page as plain text |\n| Export as PDF | Save page as PDF file |\n| Ask AI | Open page in external AI assistants |\n\n资料来源：[docs-website/src/components/CopyDropdown/index.tsx](https://github.com/deepset-ai/haystack/blob/main/docs-website/src/components/CopyDropdown/index.tsx)\n\n### Markdown Conversion Rules\n\nThe export feature uses custom Turndown rules:\n\n- Code blocks: Wrapped in backticks\n- Admonitions: Converted to blockquotes with type labels (NOTE, TIP, WARNING, etc.)\n- Navigation elements: Removed from export\n- Scripts and styles: Filtered out\n\n资料来源：[docs-website/src/components/CopyDropdown/index.tsx](https://github.com/deepset-ai/haystack/blob/main/docs-website/src/components/CopyDropdown/index.tsx)\n\n## Examples and Cookbooks\n\nExample code and cookbooks have been moved to a dedicated repository: [haystack-cookbook](https://github.com/deepset-ai/haystack-cookbook/)\n\nThis separation allows for easier maintenance and discovery of example applications.\n\n资料来源：[examples/README.md](https://github.com/deepset-ai/haystack/blob/main/examples/README.md)\n\n## CI/CD and Quality Assurance\n\nHaystack maintains high code quality through automated workflows:\n\n| Workflow | Purpose |\n|----------|---------|\n| tests.yml | Run test suite |\n| types (Mypy) | Type checking |\n| Coverage | Code coverage tracking |\n| Ruff | Linting |\n| license_compliance.yml | License verification |\n\n资料来源：[README.md](https://github.com/deepset-ai/haystack/blob/main/README.md)\n\n## Contributing to Haystack\n\nHaystack welcomes community contributions in various forms:\n\n- **Main project**: Contribute to the core Haystack repository\n- **Integrations**: Contribute on [haystack-core-integrations](https://github.com/deepset-ai/haystack-core-integrations)\n- **Documentation**: Contribute to [haystack/docs-website](https://github.com/deepset-ai/haystack/tree/main/docs-website)\n\nThe project provides a [full list of issues open to contributions](https://github.com/orgs/deepset-ai/projects/14) for both new and experienced contributors.\n\n资料来源：[README.md](https://github.com/deepset-ai/haystack/blob/main/README.md)\n\n## Organizations Using Haystack\n\nHaystack is used in production by numerous organizations across industries:\n\n| Industry | Organizations |\n|----------|---------------|\n| Technology & AI | Apple, Meta, Databricks, NVIDIA, Intel |\n| Public Sector | European Commission |\n| Various | Thousands of teams building production AI systems |\n\n资料来源：[README.md](https://github.com/deepset-ai/haystack/blob/main/README.md)\n\n---\n\n<a id='component-types'></a>\n\n## Pipeline Component Types\n\n### 相关页面\n\n相关主题：[Pipeline Architecture](#pipeline-architecture), [Data Processing Components](#data-processing), [LLM and Embedder Integrations](#llm-integrations)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [docs-website/docs/pipeline-components/generators.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/generators.mdx)\n- [docs-website/docs/pipeline-components/embedders.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/embedders.mdx)\n- [docs-website/docs/pipeline-components/retrievers.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/retrievers.mdx)\n- [docs-website/docs/pipeline-components/rankers.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/rankers.mdx)\n- [docs-website/docs/pipeline-components/preprocessors.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/preprocessors.mdx)\n- [docs-website/docs/pipeline-components/converters.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/converters.mdx)\n- [docs-website/docs/pipeline-components/builders.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/builders.mdx)\n- [docs-website/docs/pipeline-components/routers.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/routers.mdx)\n- [docs-website/docs/pipeline-components/joiners.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/joiners.mdx)\n</details>\n\n# Pipeline Component Types\n\nPipeline components are the fundamental building blocks of Haystack pipelines. They are modular units that perform specific operations such as retrieving documents, converting file formats, generating responses, and routing data between pipeline stages. Each component follows a consistent interface that enables seamless integration into pipeline workflows, allowing developers to compose complex LLM applications from reusable, interchangeable parts.\n\n## Overview\n\nHaystack provides a comprehensive set of built-in pipeline components that cover the full lifecycle of LLM-powered applications. These components are designed to work together through a unified API, enabling developers to build retrieval-augmented generation (RAG) systems, question-answering pipelines, document processing workflows, and agent-based applications with minimal configuration.\n\nThe architecture follows a modular pattern where each component receives inputs, performs a specific transformation or operation, and produces outputs that can be consumed by subsequent components in the pipeline. This design philosophy ensures that components remain loosely coupled and highly reusable across different use cases.\n\nComponents in Haystack are categorized based on their primary function within the data flow. Some components handle input preparation (converters, preprocessors), others manage information retrieval (retrievers, embedders), some optimize result ordering (rankers), and others control program flow (routers, joiners). Understanding these categories is essential for designing effective pipelines that balance performance, accuracy, and resource utilization.\n\n## Component Architecture\n\n### Component Lifecycle\n\nComponents in Haystack follow a standardized lifecycle that includes initialization, execution, and optional teardown phases. During initialization, components receive their configuration parameters and prepare any required resources such as model weights, API connections, or index data. The execution phase processes input data through the component's core logic, while the teardown phase releases resources when the component is no longer needed.\n\n```mermaid\ngraph TD\n    A[Initialize Component] --> B[Load Resources]\n    B --> C[Receive Input Data]\n    C --> D[Process Data]\n    D --> E[Produce Output]\n    E --> F{Check Pipeline Status}\n    F -->|Continue| C\n    F -->|Complete| G[Release Resources]\n    G --> H[Component Lifecycle End]\n```\n\n### Data Flow Patterns\n\nHaystack pipelines support multiple data flow patterns that determine how information moves between components. Linear flow passes output directly to the next component, while branching flow sends data to multiple paths based on conditions. Parallel flow distributes work across multiple components simultaneously, and feedback flow allows outputs to influence earlier pipeline stages.\n\n## Input Processing Components\n\nInput processing components prepare raw data for use by downstream pipeline stages. These components handle the transformation of unstructured or heterogeneous data sources into standardized formats that can be processed consistently throughout the pipeline.\n\n### Converters\n\nConverters transform documents from various file formats into Haystack's internal document representation. They handle the extraction of text content from source files while preserving metadata that may be useful for subsequent processing or retrieval operations.\n\n| Converter Type | Supported Formats | Primary Use Case |\n|---------------|-------------------|------------------|\n| PDF Converter | PDF | Extract text from PDF documents |\n| Text Converter | TXT, MD | Plain text and markdown files |\n| DOCX Converter | DOCX | Microsoft Word documents |\n| HTML Converter | HTML | Web page content extraction |\n\nConverters are typically placed at the beginning of indexing pipelines where they process source documents before the content is split, embedded, and stored. The output of converters feeds directly into preprocessors that further refine the content.\n\n资料来源：[docs-website/docs/pipeline-components/converters.mdx]()\n\n### Preprocessors\n\nPreprocessors clean, normalize, and transform document content to improve retrieval quality and downstream processing. They apply transformations such as text cleaning, language detection, and content segmentation to prepare documents for embedding and storage.\n\n```mermaid\ngraph LR\n    A[Raw Document] --> B[Clean Text]\n    B --> C[Detect Language]\n    C --> D[Split Document]\n    D --> E[Normalize Content]\n    E --> F[Processed Document]\n```\n\nKey preprocessing operations include removing unnecessary whitespace, normalizing unicode characters, splitting long documents into manageable chunks, and filtering out low-quality content. These operations significantly impact the quality of retrieval results and should be configured based on the specific characteristics of your data.\n\nPreprocessors work closely with converters to form the input preparation stage of indexing pipelines. The processed output is then passed to embedders or directly to storage depending on the pipeline configuration.\n\n资料来源：[docs-website/docs/pipeline-components/preprocessors.mdx]()\n\n### Builders\n\nBuilders construct specialized data structures or artifacts that support pipeline operations. Unlike converters that handle file formats, builders create complex objects such as prompt templates, search indexes, or custom data representations required by other components.\n\nBuilders enable the composition of reusable building blocks that can be shared across multiple pipelines. They abstract away the complexity of constructing complex objects, allowing pipeline developers to focus on workflow design rather than implementation details.\n\n资料来源：[docs-website/docs/pipeline-components/builders.mdx]()\n\n## Information Retrieval Components\n\nInformation retrieval components locate and retrieve relevant content from data stores. These components form the core of RAG systems and document search applications, enabling pipelines to find the most relevant information based on query semantics or keywords.\n\n### Retrievers\n\nRetrievers search document stores to find content relevant to a given query. Haystack supports multiple retrieval strategies ranging from keyword-based sparse retrieval to semantic dense retrieval, enabling developers to choose the approach that best fits their use case.\n\n| Retrieval Type | Description | Best For |\n|--------------|-------------|----------|\n| Dense Retrieval | Uses neural embeddings for semantic matching | Conceptual queries, semantic similarity |\n| Sparse Retrieval | Traditional keyword-based matching | Exact matches, specific terminology |\n| Hybrid Retrieval | Combines dense and sparse methods | Balanced performance across query types |\n\nRetrievers are fundamental to RAG pipelines where they identify the documents or passages most likely to contain information relevant to the user's question. The retrieved content is then passed to generators that synthesize the final response.\n\n资料来源：[docs-website/docs/pipeline-components/retrievers.mdx]()\n\n### Embedders\n\nEmbedders convert text content into vector representations that capture semantic meaning. These vectors enable semantic similarity searches where documents are matched based on meaning rather than exact keyword occurrence.\n\n```mermaid\ngraph TD\n    A[Text Input] --> B[Embedding Model]\n    B --> C[Vector Representation]\n    C --> D[Vector Store]\n    \n    E[Query] --> F[Same Embedding Model]\n    F --> G[Query Vector]\n    G --> D\n    D --> H[Similarity Search]\n    H --> I[Ranked Results]\n```\n\nEmbedders are used both during indexing (to create document vectors) and at query time (to create query vectors). The choice of embedding model significantly impacts retrieval quality, and Haystack supports integration with various embedding providers including OpenAI, Hugging Face, and local models.\n\n资料来源：[docs-website/docs/pipeline-components/embedders.mdx]()\n\n### Rankers\n\nRankers improve retrieval results by reordering documents based on additional relevance signals. While retrievers perform the initial candidate selection, rankers apply more sophisticated scoring models to identify the most relevant results.\n\nRankers typically use cross-encoder models that jointly analyze query-document pairs to produce relevance scores. This approach is computationally more expensive than bi-encoder retrieval but provides higher accuracy for tasks where precision is critical.\n\nThe typical pipeline arrangement places rankers after retrievers, with retrievers performing the broad candidate selection and rankers performing the refined reordering. This two-stage approach balances computational efficiency with result quality.\n\n资料来源：[docs-website/docs/pipeline-components/rankers.mdx]()\n\n## Output Generation Components\n\nOutput generation components synthesize final responses or artifacts from the information retrieved and processed by earlier pipeline stages. These components transform raw retrieved content into user-facing outputs.\n\n### Generators\n\nGenerators produce final outputs such as text responses, summaries, or structured data from retrieved context and user queries. In RAG systems, generators receive relevant documents and formulate answers that incorporate information from the retrieved content.\n\n```mermaid\ngraph TD\n    A[User Query] --> E[Generator]\n    B[Retrieved Context] --> E\n    E --> F[Generate Response]\n    F --> G[Response Output]\n    \n    H[LLM Provider] <--> E\n    H --> |API Key| E\n```\n\nGenerators integrate with various LLM providers including OpenAI, Anthropic, Cohere, Hugging Face, and local models. Configuration options control parameters such as temperature, max tokens, and response format to customize generator behavior for specific applications.\n\n资料来源：[docs-website/docs/pipeline-components/generators.mdx]()\n\n## Flow Control Components\n\nFlow control components manage how data moves through pipelines, enabling conditional logic, parallel processing, and result aggregation. These components add flexibility to pipeline design beyond simple linear data flow.\n\n### Routers\n\nRouters direct input data to different pipeline branches based on conditions or classifications. They enable conditional execution where different components handle different types of inputs or queries.\n\n| Router Type | Decision Basis | Use Case |\n|------------|---------------|----------|\n| Conditional Router | User-defined rules | Route queries to appropriate handlers |\n| Semantic Router | Query classification | Direct to specialized pipelines |\n| Custom Router | Any Python logic | Flexible routing strategies |\n\nRouters are essential for building multi-stage pipelines that handle diverse input types or implement complex query routing strategies. They enable pipelines to adapt their behavior based on the specific requirements of each input.\n\n资料来源：[docs-website/docs/pipeline-components/routers.mdx]()\n\n### Joiners\n\nJoiners combine outputs from multiple pipeline branches into unified inputs for downstream components. They handle the aggregation of results from parallel processing paths or the merging of different data streams.\n\n```mermaid\ngraph TD\n    A[Input] --> B[Branch 1]\n    A --> C[Branch 2]\n    A --> D[Branch N]\n    B --> E[Joiner]\n    C --> E\n    D --> E\n    E --> F[Combined Output]\n```\n\nJoiners implement various combination strategies including concatenation, interleaving, and weighted merging. The appropriate strategy depends on the data types being combined and the requirements of downstream components.\n\n资料来源：[docs-website/docs/pipeline-components/joiners.mdx]()\n\n## Component Configuration Patterns\n\n### Initialization Parameters\n\nComponents accept configuration during initialization that determines their behavior, resource connections, and operational parameters. Common configuration categories include model selection, connection settings, and behavioral parameters.\n\n### Default Parameters\n\nComponents provide sensible defaults for most parameters, enabling quick pipeline construction while allowing customization when needed. Default values are documented in each component's reference documentation.\n\n### Runtime Parameters\n\nSome components accept parameters at runtime (during pipeline execution) in addition to initialization-time configuration. Runtime parameters enable dynamic behavior adjustment based on input characteristics or pipeline state.\n\n## Building Custom Components\n\nHaystack's component architecture supports extension through custom implementations. Custom components follow the same interface patterns as built-in components, ensuring compatibility with existing pipeline infrastructure.\n\n### Component Interface Requirements\n\nCustom components must implement the standard component methods including initialization, execution, and any component-specific lifecycle hooks. The exact interface depends on the component type, but all components must be serializable for pipeline persistence.\n\n### Integration with Pipeline\n\nCustom components integrate seamlessly with built-in components through the unified pipeline interface. They can receive inputs from and produce outputs for any other component type, enabling flexible composition of custom and built-in functionality.\n\n## Best Practices\n\n### Component Selection\n\nChoose components based on your specific use case requirements including accuracy needs, latency constraints, and resource availability. Consider the trade-offs between different retrieval strategies, embedding models, and generation approaches.\n\n### Pipeline Design\n\nDesign pipelines with clear separation of concerns between components. Input processing, retrieval, and generation should be logically separated to enable independent optimization and testing.\n\n### Performance Optimization\n\nOptimize component ordering based on computational cost. Place computationally expensive operations later in the pipeline where they operate on reduced candidate sets. Use rankers selectively based on the required result quality.\n\n## Summary\n\nPipeline components form the foundation of Haystack's architecture, enabling modular construction of LLM-powered applications. The component taxonomy spans input processing (converters, preprocessors, builders), information retrieval (retrievers, embedders, rankers), output generation (generators), and flow control (routers, joiners). Each component category serves a distinct purpose in the pipeline data flow, and understanding these roles enables effective pipeline design and customization.\n\n---\n\n<a id='data-processing'></a>\n\n## Data Processing Components\n\n### 相关页面\n\n相关主题：[Document Stores and Retrievers](#document-stores), [Pipeline Component Types](#component-types)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [docs-website/docs/pipeline-components/preprocessors/documentsplitter.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/preprocessors/documentsplitter.mdx)\n- [docs-website/docs/pipeline-components/preprocessors/recursivesplitter.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/preprocessors/recursivesplitter.mdx)\n- [docs-website/docs/pipeline-components/preprocessors/hierarchicaldocumentsplitter.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/preprocessors/hierarchicaldocumentsplitter.mdx)\n- [docs-website/docs/pipeline-components/converters.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/converters.mdx)\n- [docs-website/docs/pipeline-components/preprocessors/documentcleaner.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/preprocessors/documentcleaner.mdx)\n</details>\n\n# Data Processing Components\n\nData Processing Components are fundamental pipeline elements in Haystack that transform, clean, and prepare documents for downstream operations such as retrieval, indexing, and LLM processing. These components operate on `Document` objects, enabling structured manipulation of content while preserving metadata integrity throughout the processing pipeline.\n\n## Overview\n\nData Processing Components in Haystack serve as the preprocessing layer that bridges raw document ingestion with semantic retrieval and generation tasks. They are designed to handle various document formats, split long content into manageable chunks, and ensure data quality through cleaning operations.\n\nThe architecture follows a modular design pattern where each component type specializes in a specific transformation task:\n\n- **Document Splitters**: Divide documents into smaller, semantically coherent chunks\n- **Document Cleaners**: Remove noise, normalize text, and enhance readability\n- **Converters**: Transform external file formats into Haystack `Document` objects\n\n资料来源：[docs-website/docs/pipeline-components/preprocessors/documentsplitter.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/preprocessors/documentsplitter.mdx)\n\n## Architecture and Processing Flow\n\n```mermaid\ngraph TD\n    A[Raw Document Input] --> B[Converters]\n    B --> C[Document Objects]\n    C --> D[Document Cleaners]\n    D --> E[Document Splitters]\n    E --> F[Processed Chunks]\n    F --> G[Embedding Stores]\n    G --> H[Retrieval Pipelines]\n    \n    B -.->|File Types| I[TXT]\n    B -.->|File Types| J[PDF]\n    B -.->|File Types| K[Markdown]\n    B -.->|File Types| L[HTML]\n    B -.->|File Types| M[Docx]\n    \n    D -.->|Operations| N[Text Normalization]\n    D -.->|Operations| O[Whitespace Cleaning]\n    D -.->|Operations| P[Metadata Preservation]\n    \n    E -.->|Strategies| Q[Character Split]\n    E -.->|Strategies| R[Recursive Split]\n    E -.->|Strategies| S[Hierarchical Split]\n```\n\n## Document Splitters\n\nDocument splitters are preprocessors that divide long documents into smaller, manageable chunks while attempting to preserve semantic coherence. This is critical for effective retrieval since chunk size directly impacts retrieval precision and context window utilization.\n\n资料来源：[docs-website/docs/pipeline-components/preprocessors/recursivesplitter.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/preprocessors/recursivesplitter.mdx)\n\n### Splitter Types\n\n| Splitter Type | Use Case | Splitting Strategy |\n|---------------|----------|---------------------|\n| `DocumentSplitter` | Basic character or token-based splitting | Fixed-length chunks |\n| `RecursiveSplitter` | Hierarchical splitting by delimiters | Recursive character/separator traversal |\n| `HierarchicalDocumentSplitter` | Multi-level document structure | Preserves headings and sections |\n\n### DocumentSplitter\n\nThe base `DocumentSplitter` provides fundamental splitting capabilities using either character count or token count as the primary division criterion.\n\n**Key Parameters:**\n\n| Parameter | Type | Default | Description |\n|-----------|------|---------|-------------|\n| `split_length` | `int` | Required | Target size of each chunk |\n| `split_overlap` | `int` | `0` | Number of overlapping elements between chunks |\n| `split_by` | `str` | `\"word\"` | Splitting criterion: `\"word\"`, `\"sentence\"`, `\"passage\"`, or `\"token\"` |\n\n资料来源：[docs-website/docs/pipeline-components/preprocessors/documentsplitter.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/preprocessors/documentsplitter.mdx)\n\n### RecursiveSplitter\n\nThe `RecursiveSplitter` implements an intelligent multi-level splitting strategy that attempts to split documents at natural boundaries before falling back to smaller units.\n\n```python\nfrom haystack.components.preprocessors import RecursiveSplitter\n\nsplitter = RecursiveSplitter(\n    split_by=\"sentence\",\n    split_length=5,\n    split_overlap=2,\n    separators=[\"\\n\\n\", \"\\n\", \". \", \" \", \"\"]\n)\n```\n\nThe splitter iterates through the `separators` list, attempting to split at each level. If a split produces chunks larger than `split_length`, it moves to the next (smaller) separator in the list.\n\n资料来源：[docs-website/docs/pipeline-components/preprocessors/recursivesplitter.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/preprocessors/recursivesplitter.mdx)\n\n**Separator Priority:**\n\n| Priority | Separator | Context |\n|----------|-----------|---------|\n| 1 | `\"\\n\\n\"` | Paragraph breaks |\n| 2 | `\"\\n\"` | Line breaks |\n| 3 | `\". \"` | Sentence endings |\n| 4 | `\" \"` | Word boundaries |\n| 5 | `\"\"` | Character-level fallback |\n\n### HierarchicalDocumentSplitter\n\nThe `HierarchicalDocumentSplitter` is designed for structured documents that contain hierarchical headings and section markers. It preserves document structure by splitting at heading boundaries first.\n\n**Key Features:**\n\n- Detects heading patterns (e.g., `#`, `##`, `###` in Markdown)\n- Splits at the highest heading level available\n- Maintains hierarchical relationships between sections and subsections\n- Ideal for technical documentation and Markdown-based content\n\n```python\nfrom haystack.components.preprocessors import HierarchicalDocumentSplitter\n\nsplitter = HierarchicalDocumentSplitter(\n    split_by=\"sentence\",\n    split_length=10,\n    split_overlap=3\n)\n```\n\n资料来源：[docs-website/docs/pipeline-components/preprocessors/hierarchicaldocumentsplitter.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/preprocessors/hierarchicaldocumentsplitter.mdx)\n\n## Document Cleaners\n\nDocument cleaners are preprocessing components that normalize and sanitize text content while preserving essential structure and metadata. They remove unwanted artifacts, standardize formatting, and enhance downstream processing quality.\n\n资料来源：[docs-website/docs/pipeline-components/preprocessors/documentcleaner.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/preprocessors/documentcleaner.mdx)\n\n### Core Cleaning Operations\n\n| Operation | Description | Example |\n|-----------|-------------|---------|\n| Whitespace normalization | Collapse multiple spaces, trim line breaks | `\"  Hello\\n\\n  World  \"` → `\"Hello World\"` |\n| Character removal | Strip control characters and special symbols | Removes `\\x00` to `\\x1f` except `\\n`, `\\t` |\n| Quote normalization | Standardize quote characters | Smart quotes → straight quotes |\n| Heading normalization | Clean heading markers | Removes `#` from Markdown headings |\n\n### Common Parameters\n\n| Parameter | Type | Default | Description |\n|-----------|------|---------|-------------|\n| `remove_empty_lines` | `bool` | `True` | Remove lines with no content |\n| `remove_extra_whitespace` | `bool` | `True` | Normalize whitespace between words |\n| `remove_repeated_substrings` | `bool` | `False` | Eliminate duplicate consecutive substrings |\n\n## Converters\n\nConverters are components that transform external file formats into Haystack `Document` objects. They handle the ingestion pipeline by parsing various document formats and extracting both content and metadata.\n\n资料来源：[docs-website/docs/pipeline-components/converters.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/converters.mdx)\n\n### Supported Formats\n\n| Format | Converter Class | Features |\n|--------|-----------------|----------|\n| Plain Text | `TextConverter` | Direct text extraction |\n| PDF | `PdfToDocumentConverter` | Text and table extraction |\n| Markdown | `MarkdownToDocumentConverter` | Preserves structure and headings |\n| HTML | `HtmlToDocumentConverter` | Extracts text from HTML elements |\n| Microsoft Word | `DocxToDocumentConverter` | Document and paragraph parsing |\n\n### Converter Architecture\n\n```mermaid\ngraph LR\n    A[Input File] --> B[Format Detection]\n    B --> C[Format-Specific Parser]\n    C --> D[Content Extraction]\n    D --> E[Metadata Enrichment]\n    E --> F[Haystack Document]\n    \n    G[File Path] -.->|Direct Input| D\n    H[Binary Content] -.->|Raw Data| C\n```\n\n### Common Converter Parameters\n\n| Parameter | Type | Default | Description |\n|-----------|------|---------|-------------|\n| `encoding` | `str` | `\"utf-8\"` | Text encoding for file reading |\n| `encoding_errors` | `str` | `\"strict\"` | How to handle encoding errors |\n| `id_hash_keys` | `List[str]` | `[\"content\"]` | Keys for document ID generation |\n| `meta` | `Dict[str, Any]` | `{}` | Additional metadata to attach |\n\n资料来源：[docs-website/docs/pipeline-components/converters.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/converters.mdx)\n\n## Integration with Pipelines\n\nData Processing Components integrate seamlessly into Haystack pipelines as standard pipeline nodes. They can be composed in any order to create custom preprocessing workflows.\n\n### Typical Pipeline Configuration\n\n```python\nfrom haystack import Pipeline\nfrom haystack.components.preprocessors import DocumentCleaner, RecursiveSplitter\nfrom haystack.components.converters import TextConverter\n\npipeline = Pipeline()\npipeline.add_component(\"converter\", TextConverter())\npipeline.add_component(\"cleaner\", DocumentCleaner())\npipeline.add_component(\"splitter\", RecursiveSplitter(split_length=200, split_by=\"word\"))\n\npipeline.connect(\"converter\", \"cleaner\")\npipeline.connect(\"cleaner\", \"splitter\")\n```\n\n### Processing Order Recommendation\n\nWhile components can be connected in various orders, the recommended processing sequence is:\n\n1. **Convert** - Transform source files into `Document` objects\n2. **Clean** - Normalize and sanitize the text content\n3. **Split** - Divide documents into retrieval-optimized chunks\n\nThis sequence ensures that cleaning operations apply to the complete document before splitting, maintaining consistency across chunks.\n\n## Metadata Preservation\n\nAll Data Processing Components preserve and propagate document metadata throughout the processing pipeline. Metadata added during conversion is carried through cleaning and splitting operations.\n\n**Automatic Metadata Fields:**\n\n| Field | Source | Description |\n|-------|--------|-------------|\n| `source` | Converter | Original file path or URI |\n| `file_type` | Converter | Document format (pdf, txt, etc.) |\n| `page_number` | PDF Converter | Page number for page-level tracking |\n| `split_id` | Splitter | Unique identifier for each chunk |\n| `split_idx_start` | Splitter | Character offset where chunk begins |\n\n## Best Practices\n\n### Chunk Size Selection\n\n| Chunk Size | Recommended Use Case |\n|------------|---------------------|\n| 50-100 tokens | High-precision queries, precise fact extraction |\n| 200-300 tokens | Balanced retrieval, general Q&A |\n| 500+ tokens | Complex reasoning, multi-document synthesis |\n\n### Cleaning Configuration\n\n- Enable `remove_extra_whitespace` for all text-based content\n- Use `remove_empty_lines` when building dense indexes\n- Disable cleaning for Markdown/HTML if structure preservation is critical\n\n### Overlap Strategy\n\nWhen configuring `split_overlap`, consider:\n\n- **Low overlap (0-10%)**: Maximizes diversity, suitable for unique content\n- **Medium overlap (10-20%)**: Balances context preservation and diversity\n- **High overlap (20%+**: Essential for documents with continuous context\n\n## Related Components\n\n- **Embedding Generators**: Process chunks to create vector representations\n- **Document Stores**: Store and index processed chunks for retrieval\n- **Rankers**: Reorder retrieved chunks by relevance\n- **Prompt Engineers**: Combine chunks for LLM context windows\n\n---\n\n<a id='llm-integrations'></a>\n\n## LLM and Embedder Integrations\n\n### 相关页面\n\n相关主题：[Document Stores and Retrievers](#document-stores), [Pipeline Component Types](#component-types), [Development Guide](#development-guide)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [docs-website/docs/pipeline-components/generators/guides-to-generators/choosing-the-right-generator.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/generators/guides-to-generators/choosing-the-right-generator.mdx)\n- [docs-website/docs/pipeline-components/generators/guides-to-generators/function-calling.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/generators/guides-to-generators/function-calling.mdx)\n- [docs-website/docs/pipeline-components/embedders/choosing-the-right-embedder.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/embedders/choosing-the-right-embedder.mdx)\n- [docs-website/docs/concepts/integrations.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/concepts/integrations.mdx)\n</details>\n\n# LLM and Embedder Integrations\n\n## Overview\n\nLLM and Embedder Integrations in Haystack provide the core components for interfacing with Large Language Models and embedding services. These integrations enable developers to build production-ready applications powered by LLMs, Transformer models, and vector search capabilities.\n\n资料来源：[README.md:1-10]()\n\n## Architecture\n\nHaystack's integration architecture follows a modular pipeline design where Generators (LLMs) and Embedders serve as fundamental building blocks within the orchestration framework.\n\n```mermaid\ngraph TD\n    A[Haystack Pipeline] --> B[Retrieval Components]\n    A --> C[Generator Components]\n    A --> D[Embedder Components]\n    C --> E[LLM Providers]\n    D --> F[Embedding Models]\n    B --> F\n    E --> G[API Services]\n    F --> G\n```\n\n## Generator Integration\n\n### Purpose\n\nGenerators in Haystack are components that interact with Large Language Models to generate responses based on prompts and retrieved context. They serve as the core reasoning engine within RAG (Retrieval-Augmented Generation) pipelines.\n\n资料来源：[docs-website/docs/pipeline-components/generators/guides-to-generators/choosing-the-right-generator.mdx:1-15]()\n\n### Supported Providers\n\nHaystack supports multiple LLM providers through its integration system. The framework provides standardized interfaces for:\n\n| Provider | Integration Type | API Access |\n|----------|------------------|------------|\n| OpenAI | Chat Completions API | API Key |\n| Anthropic | Claude API | API Key |\n| Azure OpenAI | Azure OpenAI Service | Azure Credentials |\n| Hugging Face | Inference API / Local | API Key / Local |\n| Ollama | Local Models | Local Host |\n\n### Component Configuration\n\nGenerator components in Haystack follow a consistent initialization pattern:\n\n```python\nfrom haystack import Pipeline\nfrom haystack.components.generators import OpenAIChatGenerator\n\ngenerator = OpenAIChatGenerator(\n    api_key=\"your-api-key\",\n    model=\"gpt-4\",\n    streaming_callback=None,\n    generation_kwargs={\"temperature\": 0.7, \"max_tokens\": 500}\n)\n```\n\n## Embedder Integration\n\n### Purpose\n\nEmbedders are components that convert text into vector representations (embeddings) suitable for semantic search and similarity comparisons. They are essential for the retrieval portion of RAG pipelines.\n\n资料来源：[docs-website/docs/pipeline-components/embedders/choosing-the-right-embedder.mdx:1-20]()\n\n### Embedder Types\n\n| Type | Use Case | Deployment |\n|------|----------|------------|\n| Sentence Transformers | General text embeddings | Local / API |\n| OpenAI Embeddings | API-based generation | Remote |\n| Hugging Face | Transformer models | Local / Inference API |\n| Cohere | Multi-lingual support | API |\n\n### Integration with Retrievers\n\nEmbedders work in conjunction with document stores to enable semantic search:\n\n```mermaid\ngraph LR\n    A[Documents] --> B[Embedder]\n    B --> C[Vector Store]\n    C --> D[Retriever]\n    E[Query] --> F[Query Embedder]\n    F --> D\n    D --> G[Retrieved Docs]\n```\n\n## Function Calling\n\nFunction calling extends LLM integrations to enable structured interactions between LLMs and external tools. This feature allows Generators to produce structured outputs that can trigger specific actions.\n\n资料来源：[docs-website/docs/pipeline-components/generators/guides-to-generators/function-calling.mdx:1-30]()\n\n### Workflow\n\n```mermaid\nsequenceDiagram\n    participant User\n    participant Pipeline\n    participant LLM\n    participant Tool\n    \n    User->>Pipeline: Query with function definitions\n    Pipeline->>LLM: Send prompt + function specs\n    LLM->>LLM: Analyze request\n    LLM-->>Pipeline: Function call + parameters\n    Pipeline->>Tool: Execute function\n    Tool-->>Pipeline: Function result\n    Pipeline->>LLM: Send result + original context\n    LLM-->>Pipeline: Final response\n    Pipeline-->>User: Return answer\n```\n\n## Integration Configuration\n\n### Environment Setup\n\nIntegrations in Haystack typically require API credentials which can be configured via environment variables:\n\n```bash\nexport OPENAI_API_KEY=\"your-openai-key\"\nexport ANTHROPIC_API_KEY=\"your-anthropic-key\"\nexport HUGGINGFACE_TOKEN=\"your-hf-token\"\n```\n\n资料来源：[docs-website/docs/concepts/integrations.mdx:1-25]()\n\n### Configuration Options\n\n| Parameter | Description | Default |\n|-----------|-------------|---------|\n| `api_key` | Provider API key | Environment variable |\n| `model` | Model identifier | Provider default |\n| `timeout` | Request timeout in seconds | 60 |\n| `max_retries` | Number of retry attempts | 3 |\n\n## Pipeline Integration Example\n\n```python\nfrom haystack import Pipeline\nfrom haystack.components.retrievers import InMemoryBM25Retriever\nfrom haystack.components.generators import OpenAIChatGenerator\nfrom haystack.document_stores import InMemoryDocumentStore\n\n# Initialize components\ndocument_store = InMemoryDocumentStore()\nretriever = InMemoryBM25Retriever(document_store=document_store)\ngenerator = OpenAIChatGenerator(model=\"gpt-4\")\n\n# Build pipeline\npipeline = Pipeline()\npipeline.add_component(\"retriever\", retriever)\npipeline.add_component(\"generator\", generator)\npipeline.connect(\"retriever\", \"generator\")\n```\n\n## Installation\n\nTo use LLM and Embedder integrations, install the appropriate Haystack packages:\n\n```sh\n# Core package\npip install haystack-ai\n\n# For specific integrations\npip install \"haystack-ai[openai]\"    # OpenAI models\npip install \"haystack-ai[anthropic]\"  # Anthropic Claude\npip install \"haystack-ai[transformers]\" # Hugging Face\n```\n\n## Additional Resources\n\n- [Documentation Site](https://docs.haystack.deepset.ai)\n- [GitHub Repository](https://github.com/deepset-ai/haystack)\n- [Integration Guides](https://docs.haystack.deepset.ai/docs/integrations)\n\n---\n\n<a id='document-stores'></a>\n\n## Document Stores and Retrievers\n\n### 相关页面\n\n相关主题：[LLM and Embedder Integrations](#llm-integrations), [Data Processing Components](#data-processing)\n\n<details>\n<summary>Related Source Files</summary>\n\nThe following source files were used to generate this page:\n\n- [docs-website/docs/concepts/document-store.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/concepts/document-store.mdx)\n- [docs-website/docs/concepts/document-store/choosing-a-document-store.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/concepts/document-store/choosing-a-document-store.mdx)\n- [docs-website/docs/document-stores/inmemorydocumentstore.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/document-stores/inmemorydocumentstore.mdx)\n- [docs-website/docs/document-stores/elasticsearch-document-store.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/document-stores/elasticsearch-document-store.mdx)\n- [docs-website/docs/document-stores/qdrant-document-store.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/document-stores/qdrant-document-store.mdx)\n- [docs-website/docs/document-stores/pinecone-document-store.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/document-stores/pinecone-document-store.mdx)\n</details>\n\n# Document Stores and Retrievers\n\nDocument Stores and Retrievers are fundamental components in the Haystack framework that enable efficient storage, indexing, and retrieval of documents for LLM-powered applications. These components form the backbone of retrieval-augmented generation (RAG) pipelines and semantic search systems.\n\n## Overview\n\nHaystack provides a unified abstraction layer for document storage and retrieval, allowing developers to work with different backend technologies through a consistent interface. The framework supports multiple document store implementations, each optimized for different use cases, scales, and deployment requirements.\n\nDocument Stores in Haystack handle the persistence and indexing of documents, while Retrievers are specialized components that query these stores to find relevant documents based on user queries. This separation of concerns allows for flexible pipeline composition and easy swapping of storage backends.\n\n## Architecture\n\n```mermaid\ngraph TD\n    A[User Query] --> B[Retriever]\n    B --> C[Document Store]\n    C --> D[(Vector Index)]\n    C --> E[(Document DB)]\n    F[Documents] --> C\n    G[Embedding Model] --> D\n    B --> H[Query Embedding]\n    H --> D\n    D --> I[Relevant Documents]\n    I --> J[RAG Pipeline]\n```\n\nThe architecture separates concerns between storage and retrieval, enabling optimized implementations for each layer.\n\n## Document Store Types\n\nHaystack supports multiple document store implementations, each with distinct characteristics:\n\n| Document Store | Type | Use Case | Scalability |\n|----------------|------|----------|--------------|\n| InMemoryDocumentStore | In-memory | Development, testing, small datasets | Single machine, limited scale |\n| ElasticsearchDocumentStore | Distributed search | Production, full-text search | Horizontal scaling |\n| QdrantDocumentStore | Vector database | Semantic search, embeddings | High-dimensional vectors |\n| PineconeDocumentStore | Managed vector DB | Cloud-native, managed infrastructure | Global distribution |\n\n### InMemoryDocumentStore\n\nThe `InMemoryDocumentStore` is the simplest document store implementation, storing all data in memory. It is primarily used for development, testing, and prototyping scenarios where persistence is not required.\n\n**Key Characteristics:**\n- No external dependencies required\n- Fast read/write operations for small datasets\n- Data lost on application restart\n- Not suitable for production deployments with large volumes\n\n资料来源：[docs-website/docs/document-stores/inmemorydocumentstore.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/document-stores/inmemorydocumentstore.mdx)\n\n### ElasticsearchDocumentStore\n\nElasticsearch provides a mature, production-ready document store with powerful full-text search capabilities. It is well-suited for applications requiring sophisticated text analysis, faceted search, and scalable infrastructure.\n\n**Key Characteristics:**\n- Distributed architecture for high availability\n- Rich query DSL for complex search operations\n- BM25 ranking algorithm for relevance scoring\n- Supports millions of documents\n\n资料来源：[docs-website/docs/document-stores/elasticsearch-document-store.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/document-stores/elasticsearch-document-store.mdx)\n\n### QdrantDocumentStore\n\nQdrant is a vector database optimized for similarity search and high-dimensional embeddings. It provides efficient nearest neighbor search operations essential for semantic retrieval.\n\n**Key Characteristics:**\n- Optimized for vector similarity search\n- Supports payload filtering\n- Hybrid sparse-dense vector search\n- gRPC-based API for performance\n\n资料来源：[docs-website/docs/document-stores/qdrant-document-store.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/document-stores/qdrant-document-store.mdx)\n\n### PineconeDocumentStore\n\nPinecone is a managed vector database service that eliminates infrastructure management overhead. It provides global distribution and automatic scaling for production deployments.\n\n**Key Characteristics:**\n- Fully managed cloud service\n- Automatic scaling and sharding\n- Multi-tenancy support\n- Low-latency querying at scale\n\n资料来源：[docs-website/docs/document-stores/pinecone-document-store.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/document-stores/pinecone-document-store.mdx)\n\n## Choosing a Document Store\n\nSelecting the appropriate document store depends on several factors including scale, performance requirements, deployment environment, and feature needs.\n\n资料来源：[docs-website/docs/concepts/document-store/choosing-a-document-store.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/concepts/document-store/choosing-a-document-store.mdx)\n\n### Decision Criteria\n\n| Factor | InMemory | Elasticsearch | Qdrant | Pinecone |\n|--------|----------|---------------|--------|----------|\n| Dataset Size | < 100K docs | Unlimited | Unlimited | Unlimited |\n| Latency | Very low | Medium | Low | Low |\n| Persistence | None | Full | Full | Full |\n| Full-text Search | Basic | Advanced | Limited | Limited |\n| Vector Search | Basic | Plugin required | Native | Native |\n| Managed Service | No | Self-hosted/Cloud | Self-hosted/Cloud | Yes (managed) |\n| Cost | Free | Infrastructure | Infrastructure | Usage-based |\n\n### Recommendations\n\n**Development and Testing:**\nUse `InMemoryDocumentStore` for rapid prototyping and unit testing. It requires no setup and provides immediate feedback.\n\n**Production with Full-text Search:**\nChoose `ElasticsearchDocumentStore` when your application requires complex text queries, aggregations, or you already have an Elasticsearch infrastructure.\n\n**Semantic Search at Scale:**\nSelect `QdrantDocumentStore` or `PineconeDocumentStore` for applications primarily relying on embedding-based similarity search. Both provide native vector operations with efficient indexing.\n\n## Document Model\n\nDocuments in Haystack follow a standardized data model that captures content, metadata, and embedding vectors.\n\n```mermaid\nclassDiagram\n    class Document {\n        +str id\n        +str content\n        +dict meta\n        +List[float] embedding\n        +str blob\n        +str blob_mime_type\n    }\n```\n\n**Core Document Fields:**\n\n| Field | Type | Description |\n|-------|------|-------------|\n| `id` | string | Unique identifier for the document |\n| `content` | string | Main text content of the document |\n| `meta` | dict | Arbitrary metadata (source, author, date, etc.) |\n| `embedding` | list[float] | Vector representation for semantic search |\n\n资料来源：[docs-website/docs/concepts/document-store.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/concepts/document-store.mdx)\n\n## Retriever Types\n\nRetrievers query document stores to find the most relevant documents for a given query. Haystack provides multiple retriever implementations optimized for different search strategies.\n\n### Dense Retrievers\n\nDense retrievers use neural network models to encode queries and documents into dense vector representations. They excel at capturing semantic meaning and handling synonyms.\n\n### Sparse Retrievers\n\nSparse retrievers use traditional information retrieval techniques like BM25 or TF-IDF. They are effective for exact term matching and keyword-based queries.\n\n### Hybrid Retrievers\n\nHybrid retrievers combine both dense and sparse approaches, leveraging the strengths of each to provide robust retrieval across different query types.\n\n## Pipeline Integration\n\n```mermaid\ngraph LR\n    A[Query] --> B[Retriever]\n    B --> C[Document Store]\n    C --> D[Top-K Documents]\n    D --> E[Ranker]\n    E --> F[Reader/Generator]\n    F --> G[Answer]\n```\n\nDocument Stores and Retrievers integrate seamlessly into Haystack pipelines, typically appearing early in the pipeline to fetch candidate documents before passing them to downstream components like Readers or Generators.\n\n## Basic Usage Example\n\n```python\nfrom haystack import Document\nfrom haystack.document_stores import InMemoryDocumentStore\nfrom haystack.nodes import BM25Retriever\n\n# Initialize document store\ndocument_store = InMemoryDocumentStore()\n\n# Write documents\ndocuments = [\n    Document(content=\"Haystack is an open-source NLP framework\", meta={\"source\": \"docs\"}),\n    Document(content=\"It supports retrieval-augmented generation\", meta={\"source\": \"blog\"}),\n]\ndocument_store.write_documents(documents)\n\n# Initialize retriever\nretriever = BM25Retriever(document_store=document_store)\n\n# Query\nresults = retriever.retrieve(query=\"What is Haystack?\", top_k=10)\n```\n\n## Performance Considerations\n\n### Indexing Performance\n\n| Store | Indexing Speed | Memory Usage |\n|-------|----------------|--------------|\n| InMemory | Very Fast | Proportional to dataset |\n| Elasticsearch | Medium | Distributed across nodes |\n| Qdrant | Fast | Optimized for vectors |\n| Pinecone | Fast | Managed externally |\n\n### Query Performance\n\nQuery latency depends on the number of documents, vector dimensions, and the complexity of filters applied. Vector databases like Qdrant and Pinecone use specialized indexing structures (HNSW, IVF) to achieve sub-millisecond query times on large datasets.\n\n## See Also\n\n- [Document Store Concepts](docs/concepts/document-store.mdx) - Detailed conceptual overview\n- [Choosing a Document Store](docs/concepts/document-store/choosing-a-document-store.mdx) - Selection guide\n- [Pipeline Components](../pipeline-components/overview.mdx) - How retrievers fit into pipelines\n- [Embedding Models](../components/embedder.mdx) - Generating document embeddings\n\n---\n\n<a id='agents'></a>\n\n## Agent Systems\n\n### 相关页面\n\n相关主题：[Introduction to Haystack](#introduction), [Pipeline Architecture](#pipeline-architecture), [LLM and Embedder Integrations](#llm-integrations)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [docs-website/docs/concepts/agents.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/concepts/agents.mdx)\n- [docs-website/docs/concepts/agents/multi-agent-systems.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/concepts/agents/multi-agent-systems.mdx)\n- [docs-website/docs/pipeline-components/agents-1/agent.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/agents-1/agent.mdx)\n- [docs-website/docs/pipeline-components/agents-1/state.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/agents-1/state.mdx)\n- [docs-website/docs/pipeline-components/agents-1/human-in-the-loop.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/agents-1/human-in-the-loop.mdx)\n</details>\n\n# Agent Systems\n\nAgent systems in Haystack represent a powerful paradigm for building autonomous and semi-autonomous AI applications that can perceive, reason, act, and interact with their environment. Haystack's agent framework enables developers to create sophisticated LLM-powered applications where agents can use tools, maintain state, collaborate with other agents, and incorporate human feedback into their decision-making processes.\n\n## Overview\n\nHaystack agents are designed to extend beyond simple prompt-response interactions by providing a structured mechanism for Large Language Models to take actions, make decisions, and execute multi-step workflows. The agent system in Haystack is built with flexibility and modularity in mind, allowing developers to customize every aspect of agent behavior from the underlying model to the specific tools available and the logic governing agent decisions.\n\nThe framework supports a variety of agent types and architectures, ranging from single-agent systems that handle specific tasks to complex multi-agent ecosystems where multiple specialized agents collaborate to solve problems. This flexibility makes Haystack suitable for a wide range of use cases, from simple question-answering applications to sophisticated autonomous systems that can browse the web, execute code, and coordinate with other agents to complete complex tasks.\n\n## Core Architecture\n\nThe agent architecture in Haystack is built around a pipeline-based model that connects perception, reasoning, action selection, and execution into a cohesive workflow. At its core, an agent consists of several key components that work together to enable autonomous behavior.\n\n### Agent Components\n\n| Component | Purpose | Description |\n|-----------|---------|-------------|\n| LLM | Reasoning Engine | The underlying language model that drives decision-making |\n| Tools | Action Interface | Capabilities that allow the agent to interact with external systems |\n| Prompt Builder | Instruction Assembly | Constructs prompts that guide agent behavior |\n| Output Handler | Response Processing | Interprets and executes agent decisions |\n| Memory | State Management | Maintains conversation history and context |\n\n资料来源：[docs-website/docs/pipeline-components/agents-1/agent.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/agents-1/agent.mdx)\n\n### Execution Flow\n\n```mermaid\ngraph TD\n    A[User Input] --> B[Agent Receives Task]\n    B --> C[LLM Reasoning]\n    C --> D{Tool Selection?}\n    D -->|Yes| E[Execute Tool]\n    E --> F[Process Result]\n    D -->|No| G[Generate Response]\n    F --> C\n    G --> H[Return to User]\n    C --> I{Human Input Needed?}\n    I -->|Yes| J[Pause for Human Feedback]\n    J --> C\n    I -->|No| D\n```\n\nThe execution flow demonstrates how Haystack agents operate in a loop, continuously reasoning about the best course of action until the task is complete. The agent receives input, reasons about what to do, selects and executes tools as needed, and continues until it can provide a final response or requires additional input from the user or human overseer.\n\n## State Management\n\nState management is a critical aspect of agent systems, enabling agents to maintain context across multiple interactions and track the progress of complex, multi-step tasks. Haystack provides a flexible state management system that allows agents to store, retrieve, and update information throughout their execution lifecycle.\n\n### State Structure\n\nThe state system in Haystack agents typically includes several key elements that together form a comprehensive view of the agent's current situation and history. These elements enable the agent to maintain awareness of what has happened previously, what actions have been taken, and what information has been gathered.\n\n| State Element | Type | Description |\n|--------------|------|-------------|\n| Conversation History | List | Previous messages and interactions |\n| Tool Usage Log | List | Record of tools called and results |\n| Intermediate Results | Dict | Data collected during task execution |\n| User Preferences | Dict | Learned user preferences and feedback |\n| Task Progress | Dict | Current status of ongoing tasks |\n\n资料来源：[docs-website/docs/pipeline-components/agents-1/state.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/agents-1/state.mdx)\n\n### State Persistence\n\nAgents in Haystack can maintain state across sessions, enabling persistent memory and long-term learning. This is particularly valuable for applications where the agent needs to build relationships with users over time or maintain knowledge about specific domains or tasks. The state management system supports various backends for persistence, from simple in-memory storage to distributed databases for production deployments.\n\n## Multi-Agent Systems\n\nHaystack supports the creation of sophisticated multi-agent systems where multiple specialized agents work together to solve problems. This architectural pattern enables the decomposition of complex tasks into smaller, manageable subtasks that can be handled by agents with specialized capabilities.\n\n### Agent Collaboration Patterns\n\n```mermaid\ngraph TD\n    subgraph Coordinator Agent\n        A[Task Received] --> B{Analyze Task}\n        B --> C[Decompose into Subtasks]\n    end\n    \n    subgraph Specialized Agents\n        D[Agent A: Research]\n        E[Agent B: Analysis]\n        F[Agent C: Synthesis]\n    end\n    \n    C --> D\n    C --> E\n    C --> F\n    D --> G[Results Aggregation]\n    E --> G\n    F --> G\n    G --> H[Final Response]\n```\n\nMulti-agent systems in Haystack can be configured with various collaboration patterns. In the supervisor pattern, a single coordinating agent directs the work of subordinate agents, assigning tasks and collecting results. In the collaborative pattern, agents work together as equals, sharing information and contributing their expertise to solve problems collectively.\n\n### Communication Protocols\n\nAgents in a multi-agent system communicate through well-defined interfaces that specify how messages are passed between agents, how responses are aggregated, and how conflicts are resolved. This structured approach to agent communication ensures reliable operation even in complex agent ecosystems with many participants.\n\n资料来源：[docs-website/docs/concepts/agents/multi-agent-systems.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/concepts/agents/multi-agent-systems.mdx)\n\n## Human-in-the-Loop\n\nHaystack agents support human-in-the-loop workflows, enabling humans to provide guidance, approval, or corrections during agent execution. This capability is essential for applications where autonomous operation must be balanced with human oversight and control.\n\n### Interaction Modes\n\n| Mode | Description | Use Case |\n|------|-------------|----------|\n| Approval | Human approves agent actions before execution | High-stakes decisions |\n| Feedback | Human provides corrective feedback during execution | Fine-tuning agent behavior |\n| Escalation | Agent defers to human when uncertain | Handling edge cases |\n| Validation | Human validates agent outputs before completion | Quality assurance |\n\n资料来源：[docs-website/docs/pipeline-components/agents-1/human-in-the-loop.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/agents-1/human-in-the-loop.mdx)\n\n### Workflow Integration\n\n```mermaid\ngraph TD\n    A[Agent Task] --> B{Requires Human Input?}\n    B -->|Yes| C[Pause Execution]\n    C --> D[Notify Human]\n    D --> E[Await Response]\n    E --> F{Human Action}\n    F -->|Approve| G[Continue Execution]\n    F -->|Reject| H[Abort or Retry]\n    F -->|Modify| I[Apply Modifications]\n    B -->|No| G\n    I --> G\n    G --> J[Task Complete]\n```\n\nThe human-in-the-loop system is designed to be non-intrusive, minimizing the cognitive load on human overseers while ensuring that critical decisions receive appropriate human review. Agents can be configured to automatically escalate certain types of decisions based on predefined rules, such as actions that affect sensitive data or exceed specified cost thresholds.\n\n## Tool Integration\n\nA defining characteristic of Haystack agents is their ability to use tools to interact with external systems and perform actions beyond text generation. The tool integration system provides a standardized interface for defining, registering, and invoking tools that extend agent capabilities.\n\n### Available Tool Categories\n\n| Category | Examples | Capabilities |\n|----------|----------|--------------|\n| Web Search | Google Search, Bing Search | Internet research, fact checking |\n| API Clients | REST, GraphQL | External service integration |\n| Code Execution | Python, Shell | Computation, automation |\n| Document Processing | PDF, CSV parsers | Information extraction |\n| Database | SQL, Vector DB | Data retrieval, storage |\n\nTools in Haystack follow a consistent interface that makes it easy to create custom tools for domain-specific applications. Each tool is defined with a name, description, input schema, and implementation, and the agent automatically learns when and how to use tools based on their descriptions.\n\n## Configuration Options\n\nHaystack agents expose a wide range of configuration options that allow developers to customize agent behavior for specific use cases. These options control aspects ranging from the underlying model selection to detailed parameters governing agent decision-making.\n\n### Core Configuration Parameters\n\n| Parameter | Type | Default | Description |\n|-----------|------|---------|-------------|\n| `model` | String | Required | The LLM to use for reasoning |\n| `max_iterations` | Integer | 10 | Maximum tool-calling loops |\n| `tools` | List | Empty | Available tools for the agent |\n| `prompt_template` | String | Default | Custom instruction template |\n| `verbose` | Boolean | False | Enable detailed logging |\n\nAdvanced configuration options allow developers to customize how the agent reasons, how it selects tools, and how it handles errors and edge cases. These options can be set at the agent level or overridden for specific use cases.\n\n## Best Practices\n\nWhen building agent systems with Haystack, several best practices can help ensure reliable and maintainable applications. Careful attention to prompt design, tool definitions, and error handling will significantly improve agent performance and user experience.\n\nClear and specific tool descriptions are essential for guiding agent behavior. Tools should have descriptive names and comprehensive descriptions that explain not just what the tool does, but when and why an agent should consider using it. This helps the underlying LLM make informed decisions about tool selection.\n\nState management should be designed with the target use case in mind. For simple single-turn interactions, minimal state management is appropriate. For complex multi-step tasks, comprehensive state tracking ensures the agent maintains context and can recover from errors gracefully.\n\nHuman-in-the-loop integration should be thoughtfully designed to balance autonomy with oversight. Critical decisions should require human approval, while routine operations can proceed autonomously. The escalation criteria should be clearly defined and regularly reviewed.\n\n## Summary\n\nHaystack's agent systems provide a comprehensive framework for building LLM-powered applications that can perceive, reason, and act. The architecture supports everything from simple single-agent applications to complex multi-agent ecosystems with human oversight. Key features include flexible state management, extensive tool integration, human-in-the-loop workflows, and configurable agent behavior.\n\n资料来源：[docs-website/docs/concepts/agents.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/concepts/agents.mdx)\n\n---\n\n<a id='development-guide'></a>\n\n## Development Guide\n\n### 相关页面\n\n相关主题：[Deployment and Infrastructure](#deployment), [Introduction to Haystack](#introduction)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [README.md](https://github.com/deepset-ai/haystack/blob/main/README.md)\n- [docs-website/README.md](https://github.com/deepset-ai/haystack/blob/main/docs-website/README.md)\n- [docker/README.md](https://github.com/deepset-ai/haystack/blob/main/docker/README.md)\n- [pydoc/README.md](https://github.com/deepset-ai/haystack/blob/main/pydoc/README.md)\n- [examples/README.md](https://github.com/deepset-ai/haystack/blob/main/examples/README.md)\n</details>\n\n# Development Guide\n\nThis guide provides comprehensive information for developers who want to contribute to Haystack or extend its functionality. Haystack is an end-to-end LLM framework that enables building applications powered by Large Language Models, Transformer models, and vector search capabilities.\n\n## Overview\n\nHaystack is an open-source framework maintained by deepset that allows developers to build production-ready AI applications. The framework supports retrieval-augmented generation (RAG), document search, question answering, and answer generation by orchestrating state-of-the-art embedding models and LLMs into pipelines.\n\n资料来源：[README.md:1-10]()\n\n## Project Structure\n\nThe Haystack repository is organized into several main directories, each serving a specific purpose in the overall project ecosystem.\n\n```mermaid\ngraph TD\n    A[haystack/ root] --> B[Main Package]\n    A --> C[docs-website/]\n    A --> D[docker/]\n    A --> E[pydoc/]\n    A --> F[examples/]\n    \n    B --> G[Core Framework Code]\n    C --> H[Documentation Site]\n    D --> I[Docker Images]\n    E --> J[API Reference Generation]\n    F --> K[Example Cookbooks]\n```\n\n### Directory Breakdown\n\n| Directory | Purpose |\n|-----------|---------|\n| `haystack/` | Main Python package containing core framework code |\n| `docs-website/` | Docusaurus-powered documentation site |\n| `docker/` | Docker image definitions and build configurations |\n| `pydoc/` | YAML configurations for API reference generation |\n| `examples/` | Example applications and cookbooks (moved to haystack-cookbook) |\n\n资料来源：[docs-website/README.md:40-55]()\n\n## Installation for Development\n\n### Standard Installation\n\nTo set up Haystack for development, install the package via pip:\n\n```bash\npip install haystack-ai\n```\n\n### Nightly Pre-releases\n\nFor trying the newest features before official releases:\n\n```bash\npip install --pre haystack-ai\n```\n\n### Docker-based Development\n\nHaystack provides Docker images for development environments. The base image contains a working Python environment with Haystack preinstalled and is designed to be derived `FROM`.\n\n```bash\ndocker buildx bake base\n```\n\nTo build custom images with specific branches or tags:\n\n```sh\nHAYSTACK_VERSION=mybranch_or_tag BASE_IMAGE_TAG_SUFFIX=latest docker buildx bake base --no-cache\n```\n\n资料来源：[docker/README.md:15-30]()\n\n### Multi-Platform Docker Builds\n\nHaystack images support multiple architectures. To limit builds to your local architecture:\n\n```bash\n# For Apple M1 (ARM)\ndocker buildx bake base --set \"*.platform=linux/arm64\"\n```\n\n资料来源：[docker/README.md:40-45]()\n\n## Documentation Development\n\nThe documentation website is built with Docusaurus 3 and provides comprehensive guides, tutorials, API references, and best practices for using Haystack.\n\n### Prerequisites\n\n- **Node.js** 18 or higher\n- **npm** (included with Node.js) or Yarn\n\n### Setting Up the Documentation Site\n\n```bash\n# Clone the repository and navigate to docs-website\ngit clone https://github.com/deepset-ai/haystack.git\ncd haystack/docs-website\n\n# Install dependencies\nnpm install\n\n# Start the development server\nnpm start\n\n# The site opens at http://localhost:3000 with live reload\n```\n\n### Common Documentation Tasks\n\n| Task | Command | Location |\n|------|---------|----------|\n| Edit a page | Update files under `docs/` or `versioned_docs/` | Preview at http://localhost:3000 |\n| Add to sidebar | Update `sidebars.js` with doc ID | `docs-website/` |\n| Production check | `npm run build && npm run serve` | `docs-website/` |\n\n资料来源：[docs-website/README.md:20-35]()\n\n### Documentation Project Structure\n\n```\ndocs-website/\n├── docs/                          # Main documentation (guides, tutorials, concepts)\n│   ├── _templates/               # Authoring templates (excluded from build)\n│   ├── concepts/                 # Core Haystack concepts\n│   ├── pipeline-components/      # Component documentation\n│   └── ...\n├── reference/                     # API reference (auto-generated, do not edit manually)\n├── versioned_docs/               # Versioned copies of docs/\n├── reference_versioned_docs/     # Versioned copies of reference/\n├── src/                          # React components and custom code\n│   ├── components/              # Custom React components\n│   ├── css/                     # Global styles\n│   ├── pages/                   # Custom pages\n│   ├── remark/                  # Remark plugins\n│   └── theme/                   # Docusaurus theme customization\n```\n\n资料来源：[docs-website/README.md:45-60]()\n\n## API Reference Development\n\nThe API reference is generated automatically from docstrings in the code using [haystack-pydoc-tools](https://github.com/deepset-ai/haystack-pydoc-tools). A GitHub workflow regenerates the API reference when code changes.\n\n### How API Reference Works\n\n1. Create a `.yml` file in the `pydoc` directory\n2. Configure how haystack-pydoc-tools will generate the page\n3. Commit the configuration to the main branch\n4. The GitHub workflow automatically generates the Markdown files\n\n### Version Management\n\nAll updates to API reference live in unstable docs version and are promoted to stable docs version when a new version is released.\n\n资料来源：[pydoc/README.md:1-20]()\n\n## Contributing to Haystack\n\nHaystack welcomes community contributions ranging from quick fixes like typo corrections to entirely new features.\n\n### Contribution Areas\n\n| Area | Repository | Description |\n|------|------------|-------------|\n| Main Haystack | `deepset-ai/haystack` | Core framework development |\n| Integrations | `deepset-ai/haystack-core-integrations` | Integration components |\n| Documentation | `haystack/docs-website` | Documentation content |\n\n### Getting Started\n\n1. Review the Contributor Guidelines in [CONTRIBUTING.md](https://github.com/deepset-ai/haystack/blob/main/CONTRIBUTING.md)\n2. Check the [full list of open issues](https://github.com/orgs/deepset-ai/projects/14) available for contributions\n3. You don't need to be a Haystack expert to provide meaningful improvements\n\n### CI/CD and Quality Standards\n\nThe project maintains high quality standards through automated checks:\n\n| Check | Badge | Description |\n|-------|-------|-------------|\n| Tests | GitHub Actions | Automated test suite |\n| Type Checking | Mypy | Static type analysis |\n| Code Coverage | Coverage Badge | Test coverage reporting |\n| Linting | Ruff | Code style enforcement |\n| License Compliance | License Check | Dependency license verification |\n\n资料来源：[README.md:30-55]()\n\n## Development Workflow\n\n```mermaid\ngraph TD\n    A[Start Development] --> B[Clone Repository]\n    B --> C[Set Up Environment]\n    C --> D[Install Dependencies]\n    D --> E[Make Changes]\n    E --> F[Run Tests]\n    F --> G{Tests Pass?}\n    G -->|No| H[Fix Issues]\n    H --> E\n    G -->|Yes| I[Run Linters]\n    I --> J{Code Quality OK?}\n    J -->|No| K[Address Linter Issues]\n    K --> E\n    J -->|Yes| L[Submit Pull Request]\n    L --> M[Review Process]\n    M --> N[Merge to Main]\n```\n\n## Examples and Cookbooks\n\nExample applications have been moved to a dedicated repository. All example cookbooks are now located at:\n\n**Repository:** [https://github.com/deepset-ai/haystack-cookbook/](https://github.com/deepset-ai/haystack-cookbook/)\n\nThis separation allows for more focused development and easier discovery of example applications.\n\n资料来源：[examples/README.md:1-10]()\n\n## License and Compliance\n\nAll contributions must comply with the project's license. View license information at:\n\n- [https://github.com/deepset-ai/haystack/blob/main/LICENSE](https://github.com/deepset-ai/haystack/blob/main/LICENSE)\n\nThe project includes automated license compliance checking through GitHub workflows.\n\n资料来源：[docker/README.md:50-60]()\n\n## Quick Reference Commands\n\n| Command | Purpose |\n|---------|---------|\n| `pip install haystack-ai` | Install Haystack |\n| `pip install --pre haystack-ai` | Install pre-release version |\n| `npm install` | Install documentation dependencies |\n| `npm start` | Start documentation dev server |\n| `npm run build` | Build documentation site |\n| `docker buildx bake base` | Build Docker base image |\n\n## Additional Resources\n\n- **Documentation Site:** [https://docs.haystack.deepset.ai](https://docs.haystack.deepset.ai)\n- **GitHub Repository:** [https://github.com/deepset-ai/haystack](https://github.com/deepset-ai/haystack)\n- **Community:** [GitHub Discussions](https://github.com/deepset-ai/haystack/discussions) and [Stack Overflow](https://stackoverflow.com/questions/tagged/haystack)\n- **Discord:** Join the [Haystack Discord community](https://discord.gg/VBpFBDegHY)\n\n---\n\n<a id='deployment'></a>\n\n## Deployment and Infrastructure\n\n### 相关页面\n\n相关主题：[Development Guide](#development-guide), [Introduction to Haystack](#introduction)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [docker/Dockerfile.base](https://github.com/deepset-ai/haystack/blob/main/docker/Dockerfile.base)\n- [docker/README.md](https://github.com/deepset-ai/haystack/blob/main/docker/README.md)\n- [docs-website/docs/development/deployment.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/development/deployment.mdx)\n- [docs-website/docs/development/deployment/docker.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/development/deployment/docker.mdx)\n- [docs-website/docs/development/deployment/kubernetes.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/development/deployment/kubernetes.mdx)\n- [docs-website/docs/development/enabling-gpu-acceleration.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/development/enabling-gpu-acceleration.mdx)\n</details>\n\n# Deployment and Infrastructure\n\n## Overview\n\nHaystack provides a comprehensive deployment infrastructure designed for production-ready LLM applications. The framework supports multiple deployment strategies including Docker containers, Kubernetes orchestration, and cloud platform integrations. This documentation covers the core deployment mechanisms, containerization approach, GPU acceleration support, and production best practices.\n\nThe deployment system is built around Docker images using BuildKit for efficient multi-platform builds, enabling deployment across x86_64 and ARM64 architectures. The infrastructure supports both development environments and production-grade deployments with high availability requirements.\n\n## Docker Containerization\n\n### Base Images\n\nHaystack provides pre-built Docker images that serve as the foundation for custom deployments. The base images contain a working Python environment with Haystack preinstalled and are intended to be extended with application-specific configurations.\n\nThe primary image variant available is:\n\n| Image Tag | Description | Use Case |\n|-----------|-------------|----------|\n| `haystack:base-<version>` | Base Python environment with Haystack | Custom image derivation |\n\nAll images are published to Docker Hub and can be pulled directly for use in production environments. The images follow semantic versioning and align with Haystack releases.\n\n### Building Custom Images\n\nCustom images can be built using Docker BuildKit and the `bake` command orchestrator. This approach allows for:\n\n- Custom Haystack versions or branches\n- Pre-installed dependencies\n- Application-specific configurations\n- Multi-platform support\n\nThe build process uses the `docker-bake.hcl` configuration file which defines build targets, platforms, and variable substitutions.\n\n#### Basic Build Command\n\n```sh\ndocker buildx bake base\n```\n\n#### Building with Custom Variables\n\nTo build with a custom Haystack version or branch, override the `HAYSTACK_VERSION` variable:\n\n```sh\nHAYSTACK_VERSION=mybranch_or_tag BASE_IMAGE_TAG_SUFFIX=latest docker buildx bake base --no-cache\n```\n\nThis mechanism enables CI/CD pipelines to build images from specific commits, branches, or release tags without modifying the underlying Dockerfile.\n\n### Multi-Platform Builds\n\nHaystack Docker images support multiple architectures including:\n\n- `linux/amd64` (x86_64)\n- `linux/arm64` (ARM64)\n\n#### Platform Limitations\n\nDepending on the operating system and Docker environment, building all platforms locally may not be possible. If encountering the following error:\n\n```\nmultiple platforms feature is currently not supported for docker driver. Please switch to a different driver\n(eg. \"docker buildx create --use\")\n```\n\nThe platform option must be overridden to match the local architecture. For example, on Apple M1 (ARM64):\n\n```sh\ndocker buildx bake base --set \"*.platform=linux/arm64\"\n```\n\n#### Cross-Platform Considerations\n\nWhen deploying multi-platform images, consider the following:\n\n- **CPU Compatibility**: Ensure target nodes match the built architecture\n- **Performance**: Native architecture builds perform optimally\n- **Registry Support**: Use registries that support multi-platform manifests\n\n## GPU Acceleration\n\n### Hardware Acceleration Support\n\nHaystack supports GPU acceleration for compute-intensive operations including:\n\n- Model inference\n- Embedding generation\n- Tokenization\n- Custom model operations\n\nGPU acceleration significantly improves throughput for LLM-based pipelines and embedding-heavy workloads.\n\n### Enabling GPU Support\n\n#### NVIDIA GPUs (CUDA)\n\nFor NVIDIA GPU support, use CUDA-enabled base images and ensure the nvidia-container-toolkit is installed on the host system.\n\n**Docker Compose Example:**\n\n```yaml\nservices:\n  haystack:\n    image: haystack:base-latest\n    runtime: nvidia\n    environment:\n      - NVIDIA_VISIBLE_DEVICES=all\n    deploy:\n      resources:\n        reservations:\n          devices:\n            - driver: nvidia\n              count: 1\n              capabilities: [gpu]\n```\n\n#### AMD GPUs (ROCm)\n\nAMD GPU support requires ROCm-enabled images and appropriate runtime configuration.\n\n### GPU Memory Management\n\nFor production deployments, configure memory limits based on model size:\n\n| Model Size | Recommended GPU Memory | Configuration |\n|------------|------------------------|---------------|\n| Small (<1B params) | 8 GB | `CUDA_VISIBLE_DEVICES=0` |\n| Medium (1-7B params) | 16 GB | `CUDA_VISIBLE_DEVICES=0,1` |\n| Large (7-70B params) | 32+ GB | Multi-GPU / quantization |\n\n### Quantization Options\n\nTo reduce GPU memory requirements, consider model quantization:\n\n- **4-bit quantization**: Reduces memory by ~75%\n- **8-bit quantization**: Reduces memory by ~50%\n- **Dynamic quantization**: Trade-off between speed and accuracy\n\n## Kubernetes Deployment\n\n### Container Orchestration\n\nHaystack can be deployed on Kubernetes for production environments requiring:\n\n- Horizontal scaling\n- High availability\n- Rolling updates\n- Resource management\n- Service discovery\n\n### Resource Configuration\n\n#### Resource Limits\n\nConfigure CPU and memory limits based on workload:\n\n```yaml\nresources:\n  limits:\n    cpu: \"4\"\n    memory: \"16Gi\"\n  requests:\n    cpu: \"2\"\n    memory: \"8Gi\"\n```\n\n#### GPU Resource Allocation\n\nFor GPU workloads, define accelerator resources:\n\n```yaml\nresources:\n  limits:\n    nvidia.com/gpu: \"2\"\n  requests:\n    nvidia.com/gpu: \"1\"\n```\n\n### High Availability Configuration\n\nFor production deployments, implement:\n\n1. **Replica Sets**: Deploy multiple replicas for fault tolerance\n2. **Health Checks**: Configure liveness and readiness probes\n3. **Pod Disruption Budgets**: Ensure availability during updates\n4. **Anti-Affinity Rules**: Distribute pods across nodes\n\n```yaml\nspec:\n  replicas: 3\n  strategy:\n    type: RollingUpdate\n    rollingUpdate:\n      maxSurge: 1\n      maxUnavailable: 0\n```\n\n### Service Configuration\n\nExpose Haystack services using Kubernetes Services:\n\n```yaml\napiVersion: v1\nkind: Service\nmetadata:\n  name: haystack-api\nspec:\n  selector:\n    app: haystack\n  ports:\n    - protocol: TCP\n      port: 80\n      targetPort: 8000\n  type: LoadBalancer\n```\n\n## Production Best Practices\n\n### Security Considerations\n\n| Practice | Implementation |\n|----------|----------------|\n| Non-root execution | Configure USER directive in Dockerfile |\n| Secret management | Use Kubernetes Secrets or external secret stores |\n| Network policies | Restrict pod-to-pod communication |\n| Image scanning | Scan images for vulnerabilities before deployment |\n| TLS termination | Configure ingress with TLS certificates |\n\n### Monitoring and Observability\n\nImplement monitoring using:\n\n- **Metrics**: Prometheus exporter for pipeline metrics\n- **Logging**: Centralized logging with ELK/Graylog\n- **Tracing**: OpenTelemetry for request tracing\n- **Alerts**: Configure alerts for error rates and latency\n\n### Performance Optimization\n\n1. **Connection Pooling**: Reuse database and API connections\n2. **Caching**: Implement caching for frequently accessed data\n3. **Batch Processing**: Process multiple requests in batches\n4. **Async Processing**: Use async/await for I/O operations\n\n## CI/CD Integration\n\n### Automated Builds\n\nHaystack supports automated Docker image builds through:\n\n- GitHub Actions workflows\n- BuildKit with bake files\n- Multi-stage Docker builds\n\n### Deployment Workflows\n\n```mermaid\ngraph TD\n    A[Code Change] --> B[Run Tests]\n    B --> C[Build Docker Image]\n    C --> D[Push to Registry]\n    D --> E[Update Deployment]\n    E --> F[Health Check]\n    F --> G{Healthy?}\n    G -->|Yes| H[Deployment Complete]\n    G -->|No| I[Rollback]\n```\n\n### Registry Configuration\n\nPopular registry options for Haystack images:\n\n| Registry | Use Case | Authentication |\n|----------|----------|----------------|\n| Docker Hub | Public deployments | Optional |\n| AWS ECR | AWS infrastructure | IAM roles |\n| GCR | GCP infrastructure | Service accounts |\n| Azure ACR | Azure infrastructure | Service principals |\n| Private Registry | Enterprise deployments | Username/password |\n\n## License and Compliance\n\nThe Haystack Docker images contain:\n\n- Haystack framework code under the Apache 2.0 license\n- Python runtime components\n- Base distribution software with their respective licenses\n\nUsers are responsible for ensuring compliance with all software licenses contained within deployed images. For enterprise deployments, review the license implications of all included components.\n\n## Related Documentation\n\n- [Installation Guide](https://docs.haystack.deepset.ai/docs/installation)\n- [Pipeline Components](https://docs.haystack.deepset.ai/docs/pipeline-components)\n- [API Reference](https://docs.haystack.deepset.ai/reference)\n- [Contributing Guide](https://github.com/deepset-ai/haystack/blob/main/CONTRIBUTING.md)\n\n## Summary\n\nHaystack provides a flexible and production-ready deployment infrastructure supporting Docker containerization, Kubernetes orchestration, and GPU acceleration. The multi-platform Docker images enable deployment across diverse infrastructure, while Kubernetes support facilitates enterprise-grade deployments with high availability and scalability requirements. GPU acceleration support enables high-performance inference for LLM-powered applications, with quantization options for resource-constrained environments.\n\n---\n\n---\n\n## Doramagic Pitfall Log\n\nProject: deepset-ai/haystack\n\nSummary: Found 38 potential pitfall items; 7 are high/blocking. Highest priority: installation - 来源证据：RFC: Signed receipts for Haystack pipeline component calls.\n\n## 1. installation · 来源证据：RFC: Signed receipts for Haystack pipeline component calls\n\n- Severity: high\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安装相关的待验证问题：RFC: Signed receipts for Haystack pipeline component calls\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_192c840953e54837869723f54ccfdd1a | https://github.com/deepset-ai/haystack/issues/11039 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 2. installation · 来源证据：feat: Add `run_async` to `MultiQueryEmbeddingRetriever`, `MultiQueryTextRetriever`, and `TextEmbeddingRetriever`\n\n- Severity: high\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安装相关的待验证问题：feat: Add `run_async` to `MultiQueryEmbeddingRetriever`, `MultiQueryTextRetriever`, and `TextEmbeddingRetriever`\n- User impact: 可能阻塞安装或首次运行。\n- Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_4b8f3323f54c4fd6b8de4e2d466cfe8b | https://github.com/deepset-ai/haystack/issues/11358 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 3. installation · 来源证据：feat: add INTERSECTION join mode to DocumentJoiner\n\n- Severity: high\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安装相关的待验证问题：feat: add INTERSECTION join mode to DocumentJoiner\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_00757f9859234e9cab8f8d4ce4f3e771 | https://github.com/deepset-ai/haystack/issues/11365 | 来源类型 github_issue 暴露的待验证使用条件。\n\n## 4. maintenance · 来源证据：docs: Update Ragas docs\n\n- Severity: high\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个维护/版本相关的待验证问题：docs: Update Ragas docs\n- User impact: 可能影响升级、迁移或版本选择。\n- Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_3204fffa09664d9f8553be2a3008f270 | https://github.com/deepset-ai/haystack/issues/11178 | 来源类型 github_issue 暴露的待验证使用条件。\n\n## 5. security_permissions · 来源证据：EnvVarSecrets: add multi-tenant context support (ContextVar / pipeline-run context)\n\n- Severity: high\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：EnvVarSecrets: add multi-tenant context support (ContextVar / pipeline-run context)\n- User impact: 可能影响升级、迁移或版本选择。\n- Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_8f72793700a1416891c2eedddc379129 | https://github.com/deepset-ai/haystack/issues/11366 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 6. security_permissions · 来源证据：Security: OWASP Agent Memory Guard for pipeline memory poisoning defense\n\n- Severity: high\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：Security: OWASP Agent Memory Guard for pipeline memory poisoning defense\n- User impact: 可能阻塞安装或首次运行。\n- Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_4f0868673100472fb74d831b5a04735f | https://github.com/deepset-ai/haystack/issues/11311 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 7. security_permissions · 来源证据：feat: support token-based budget in LostInTheMiddleRanker\n\n- Severity: high\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：feat: support token-based budget in LostInTheMiddleRanker\n- User impact: 可能影响授权、密钥配置或安全边界。\n- Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_7ad00787309c442eb497b10879fb3b28 | https://github.com/deepset-ai/haystack/issues/11351 | 来源类型 github_issue 暴露的待验证使用条件。\n\n## 8. installation · 失败模式：installation: Proposal: Transaction Protocol for idempotent, auditable agent pipelines\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this installation risk before relying on the project: Proposal: Transaction Protocol for idempotent, auditable agent pipelines\n- User impact: Developers may fail before the first successful local run: Proposal: Transaction Protocol for idempotent, auditable agent pipelines\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: Proposal: Transaction Protocol for idempotent, auditable agent pipelines. Context: Observed when using python\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_issue | fmev_58038e9b6373edf9376049b42d4b7bb4 | https://github.com/deepset-ai/haystack/issues/11266 | Proposal: Transaction Protocol for idempotent, auditable agent pipelines\n\n## 9. installation · 失败模式：installation: RFC: Signed receipts for Haystack pipeline component calls\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this installation risk before relying on the project: RFC: Signed receipts for Haystack pipeline component calls\n- User impact: Developers may fail before the first successful local run: RFC: Signed receipts for Haystack pipeline component calls\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: RFC: Signed receipts for Haystack pipeline component calls. Context: Observed when using node, python\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_issue | fmev_ce0b9c65d21126dcf11ede12120e154f | https://github.com/deepset-ai/haystack/issues/11039 | RFC: Signed receipts for Haystack pipeline component calls\n\n## 10. installation · 失败模式：installation: Security: OWASP Agent Memory Guard for pipeline memory poisoning defense\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this installation risk before relying on the project: Security: OWASP Agent Memory Guard for pipeline memory poisoning defense\n- User impact: Developers may fail before the first successful local run: Security: OWASP Agent Memory Guard for pipeline memory poisoning defense\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: Security: OWASP Agent Memory Guard for pipeline memory poisoning defense. Context: Observed when using python\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_issue | fmev_4d3276b6b9938595cb2dbb864a5509da | https://github.com/deepset-ai/haystack/issues/11311 | Security: OWASP Agent Memory Guard for pipeline memory poisoning defense\n\n## 11. installation · 失败模式：installation: [FEATURE] Support for code syntax-aware Document Splitters\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this installation risk before relying on the project: [FEATURE] Support for code syntax-aware Document Splitters\n- User impact: Developers may fail before the first successful local run: [FEATURE] Support for code syntax-aware Document Splitters\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: [FEATURE] Support for code syntax-aware Document Splitters. Context: Observed when using python\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_issue | fmev_997b84068ae32409b1d8d55daaddd984 | https://github.com/deepset-ai/haystack/issues/11354 | [FEATURE] Support for code syntax-aware Document Splitters\n\n## 12. installation · 来源证据：MCP Server for Haystack docs\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安装相关的待验证问题：MCP Server for Haystack docs\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_398390cf2fcd41d589dd5614a3bc646d | https://github.com/deepset-ai/haystack/issues/11346 | 来源类型 github_issue 暴露的待验证使用条件。\n\n## 13. installation · 来源证据：[FEATURE] Support for code syntax-aware Document Splitters\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安装相关的待验证问题：[FEATURE] Support for code syntax-aware Document Splitters\n- User impact: 可能阻塞安装或首次运行。\n- Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_76b3b1b8eae94593a2cd248d0ec55e2a | https://github.com/deepset-ai/haystack/issues/11354 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 14. installation · 来源证据：v2.25.2\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安装相关的待验证问题：v2.25.2\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_55d8aef5d1c3417ba9bdf05c0f5a3053 | https://github.com/deepset-ai/haystack/releases/tag/v2.25.2 | 来源类型 github_release 暴露的待验证使用条件。\n\n## 15. installation · 来源证据：v2.26.0\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安装相关的待验证问题：v2.26.0\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_d73f121017b64b04a8ad885da241fc6f | https://github.com/deepset-ai/haystack/releases/tag/v2.26.0 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 16. installation · 来源证据：v2.28.0\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安装相关的待验证问题：v2.28.0\n- User impact: 可能影响升级、迁移或版本选择。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_d9746a9178f0445d853c95cbb4a5241b | https://github.com/deepset-ai/haystack/releases/tag/v2.28.0 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 17. configuration · 失败模式：configuration: MCP Server for Haystack docs\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this configuration risk before relying on the project: MCP Server for Haystack docs\n- User impact: Developers may misconfigure credentials, environment, or host setup: MCP Server for Haystack docs\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: MCP Server for Haystack docs. Context: Observed when using python\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_issue | fmev_e20d9655fcfaa20fd6aea7f45a938545 | https://github.com/deepset-ai/haystack/issues/11346 | MCP Server for Haystack docs, failure_mode_cluster:github_issue | fmev_a1eed7aea672a032017343738a09159f | https://github.com/deepset-ai/haystack/issues/11346 | MCP Server for Haystack docs\n\n## 18. configuration · 失败模式：configuration: v2.26.0\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this configuration risk before relying on the project: v2.26.0\n- User impact: Upgrade or migration may change expected behavior: v2.26.0\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: v2.26.0. Context: Observed when using python, windows\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_release | fmev_3b9fc694d24804c99a261297652bf3cf | https://github.com/deepset-ai/haystack/releases/tag/v2.26.0 | v2.26.0\n\n## 19. configuration · 失败模式：configuration: v2.28.0\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this configuration risk before relying on the project: v2.28.0\n- User impact: Upgrade or migration may change expected behavior: v2.28.0\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: v2.28.0. Context: Observed when using python\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_release | fmev_0c6c5701a51e86d2246a4919b45c2606 | https://github.com/deepset-ai/haystack/releases/tag/v2.28.0 | v2.28.0\n\n## 20. configuration · 失败模式：configuration: v2.29.0\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this configuration risk before relying on the project: v2.29.0\n- User impact: Upgrade or migration may change expected behavior: v2.29.0\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: v2.29.0. Context: Observed when using python\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_release | fmev_285696f6bc066dc6f42482171a097432 | https://github.com/deepset-ai/haystack/releases/tag/v2.29.0 | v2.29.0\n\n## 21. capability · 能力判断依赖假设\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: README/documentation is current enough for a first validation pass.\n- User impact: 假设不成立时，用户拿不到承诺的能力。\n- Suggested check: 将假设转成下游验证清单。\n- Guardrail action: 假设必须转成验证项；没有验证结果前不能写成事实。\n- Evidence: capability.assumptions | github_repo:221654678 | https://github.com/deepset-ai/haystack | README/documentation is current enough for a first validation pass.\n\n## 22. runtime · 失败模式：runtime: v2.25.2\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this runtime risk before relying on the project: v2.25.2\n- User impact: Upgrade or migration may change expected behavior: v2.25.2\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: v2.25.2. Context: Observed when using python\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_release | fmev_32dfb0f93116d56f30cc46cdab3a0751 | https://github.com/deepset-ai/haystack/releases/tag/v2.25.2 | v2.25.2\n\n## 23. maintenance · 失败模式：migration: docs: Update Ragas docs\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this migration risk before relying on the project: docs: Update Ragas docs\n- User impact: Developers may hit a documented source-backed failure mode: docs: Update Ragas docs\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: docs: Update Ragas docs. Context: Observed during version upgrade or migration.\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_issue | fmev_57550d7e13c6f14ad00a030d3e3a20db | https://github.com/deepset-ai/haystack/issues/11178 | docs: Update Ragas docs, failure_mode_cluster:github_issue | fmev_c4773f63705049b6c2714f8a4517b847 | https://github.com/deepset-ai/haystack/issues/11178 | docs: Update Ragas docs\n\n## 24. maintenance · 来源证据：DocumentJoiner concatenate mode incorrectly drops documents with score=0.0 during deduplication\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个维护/版本相关的待验证问题：DocumentJoiner concatenate mode incorrectly drops documents with score=0.0 during deduplication\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_9e25887dd3694aa695807058e368f46c | https://github.com/deepset-ai/haystack/issues/11352 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 25. maintenance · 维护活跃度未知\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: 未记录 last_activity_observed。\n- User impact: 新项目、停更项目和活跃项目会被混在一起，推荐信任度下降。\n- Suggested check: 补 GitHub 最近 commit、release、issue/PR 响应信号。\n- Guardrail action: 维护活跃度未知时，推荐强度不能标为高信任。\n- Evidence: evidence.maintainer_signals | github_repo:221654678 | https://github.com/deepset-ai/haystack | last_activity_observed missing\n\n## 26. security_permissions · 下游验证发现风险项\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: no_demo\n- User impact: 下游已经要求复核，不能在页面中弱化。\n- Suggested check: 进入安全/权限治理复核队列。\n- Guardrail action: 下游风险存在时必须保持 review/recommendation 降级。\n- Evidence: downstream_validation.risk_items | github_repo:221654678 | https://github.com/deepset-ai/haystack | no_demo; severity=medium\n\n## 27. security_permissions · 存在评分风险\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: no_demo\n- User impact: 风险会影响是否适合普通用户安装。\n- Suggested check: 把风险写入边界卡，并确认是否需要人工复核。\n- Guardrail action: 评分风险必须进入边界卡，不能只作为内部分数。\n- Evidence: risks.scoring_risks | github_repo:221654678 | https://github.com/deepset-ai/haystack | no_demo; severity=medium\n\n## 28. security_permissions · 来源证据：Proposal: Transaction Protocol for idempotent, auditable agent pipelines\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：Proposal: Transaction Protocol for idempotent, auditable agent pipelines\n- User impact: 可能影响升级、迁移或版本选择。\n- Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_e0fcf29e18c5480baf59b94a464ecc85 | https://github.com/deepset-ai/haystack/issues/11266 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 29. security_permissions · 来源证据：v2.26.1\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：v2.26.1\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_1520403ba7f24184b2c108c30e5d609f | https://github.com/deepset-ai/haystack/releases/tag/v2.26.1 | 来源类型 github_release 暴露的待验证使用条件。\n\n## 30. security_permissions · 来源证据：v2.27.0\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：v2.27.0\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_1dddbe7bf8094d669dd185a18844ef75 | https://github.com/deepset-ai/haystack/releases/tag/v2.27.0 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 31. capability · 失败模式：conceptual: feat: Add `run_async` to `MultiQueryEmbeddingRetriever`, `MultiQueryTextRetriever`, and `Text...\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: Developers should check this conceptual risk before relying on the project: feat: Add `run_async` to `MultiQueryEmbeddingRetriever`, `MultiQueryTextRetriever`, and `TextEmbeddingRetriever`\n- User impact: Developers may hit a documented source-backed failure mode: feat: Add `run_async` to `MultiQueryEmbeddingRetriever`, `MultiQueryTextRetriever`, and `TextEmbeddingRetriever`\n- Suggested check: 复核 source-backed failure mode cluster，并把适用版本和验证路径写入资产。\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_issue | fmev_bf87ad8f610a525641ac857abffd6388 | https://github.com/deepset-ai/haystack/issues/11358 | feat: Add `run_async` to `MultiQueryEmbeddingRetriever`, `MultiQueryTextRetriever`, and `TextEmbeddingRetriever`, failure_mode_cluster:github_issue | fmev_315e3f2ec26809f7348a1892a9730a05 | https://github.com/deepset-ai/haystack/issues/11358 | feat: Add `run_async` to `MultiQueryEmbeddingRetriever`, `MultiQueryTextRetriever`, and `TextEmbeddingRetriever`\n\n## 32. capability · 失败模式：conceptual: feat: add INTERSECTION join mode to DocumentJoiner\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: Developers should check this conceptual risk before relying on the project: feat: add INTERSECTION join mode to DocumentJoiner\n- User impact: Developers may hit a documented source-backed failure mode: feat: add INTERSECTION join mode to DocumentJoiner\n- Suggested check: 复核 source-backed failure mode cluster，并把适用版本和验证路径写入资产。\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_issue | fmev_175e4485fffcc53c711d1fd504db9a38 | https://github.com/deepset-ai/haystack/issues/11365 | feat: add INTERSECTION join mode to DocumentJoiner\n\n## 33. capability · 失败模式：conceptual: feat: support token-based budget in LostInTheMiddleRanker\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: Developers should check this conceptual risk before relying on the project: feat: support token-based budget in LostInTheMiddleRanker\n- User impact: Developers may hit a documented source-backed failure mode: feat: support token-based budget in LostInTheMiddleRanker\n- Suggested check: 复核 source-backed failure mode cluster，并把适用版本和验证路径写入资产。\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_issue | fmev_eff234be9632dc6eb35cf59720b2c3f0 | https://github.com/deepset-ai/haystack/issues/11351 | feat: support token-based budget in LostInTheMiddleRanker\n\n## 34. runtime · 失败模式：performance: DocumentJoiner concatenate mode incorrectly drops documents with score=0.0 during deduplication\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: Developers should check this performance risk before relying on the project: DocumentJoiner concatenate mode incorrectly drops documents with score=0.0 during deduplication\n- User impact: Developers may hit a documented source-backed failure mode: DocumentJoiner concatenate mode incorrectly drops documents with score=0.0 during deduplication\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: DocumentJoiner concatenate mode incorrectly drops documents with score=0.0 during deduplication. Context: Observed when using python, macos, cuda\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_issue | fmev_7f9bb8e374256d979ec52a0c96020977 | https://github.com/deepset-ai/haystack/issues/11352 | DocumentJoiner concatenate mode incorrectly drops documents with score=0.0 during deduplication, failure_mode_cluster:github_issue | fmev_21fc5a912bed31520bb91639ca4fa3b3 | https://github.com/deepset-ai/haystack/issues/11352 | DocumentJoiner concatenate mode incorrectly drops documents with score=0.0 during deduplication\n\n## 35. runtime · 失败模式：performance: v2.27.0\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: Developers should check this performance risk before relying on the project: v2.27.0\n- User impact: Upgrade or migration may change expected behavior: v2.27.0\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: v2.27.0. Context: Observed when using python\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_release | fmev_9757a305d020b89fd79c9dc31c6a9d1c | https://github.com/deepset-ai/haystack/releases/tag/v2.27.0 | v2.27.0\n\n## 36. maintenance · issue/PR 响应质量未知\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: issue_or_pr_quality=unknown。\n- User impact: 用户无法判断遇到问题后是否有人维护。\n- Suggested check: 抽样最近 issue/PR，判断是否长期无人处理。\n- Guardrail action: issue/PR 响应未知时，必须提示维护风险。\n- Evidence: evidence.maintainer_signals | github_repo:221654678 | https://github.com/deepset-ai/haystack | issue_or_pr_quality=unknown\n\n## 37. maintenance · 发布节奏不明确\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: release_recency=unknown。\n- User impact: 安装命令和文档可能落后于代码，用户踩坑概率升高。\n- Suggested check: 确认最近 release/tag 和 README 安装命令是否一致。\n- Guardrail action: 发布节奏未知或过期时，安装说明必须标注可能漂移。\n- Evidence: evidence.maintainer_signals | github_repo:221654678 | https://github.com/deepset-ai/haystack | release_recency=unknown\n\n## 38. maintenance · 失败模式：maintenance: v2.26.1\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: Developers should check this maintenance risk before relying on the project: v2.26.1\n- User impact: Upgrade or migration may change expected behavior: v2.26.1\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: v2.26.1. Context: Source discussion did not expose a precise runtime context.\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_release | fmev_29416bd44cdae3aebbb8d4bd245bc398 | https://github.com/deepset-ai/haystack/releases/tag/v2.26.1 | v2.26.1\n\n<!-- canonical_name: deepset-ai/haystack; human_manual_source: deepwiki_human_wiki -->\n",
      "markdown_key": "haystack",
      "pages": "draft",
      "source_refs": [
        {
          "evidence_id": "github_repo:221654678",
          "kind": "repo",
          "supports_claim_ids": [
            "claim_identity",
            "claim_distribution",
            "claim_capability"
          ],
          "url": "https://github.com/deepset-ai/haystack"
        },
        {
          "evidence_id": "art_3da5693908af4dd08c207de6b8f3fd4b",
          "kind": "docs",
          "supports_claim_ids": [
            "claim_identity",
            "claim_distribution",
            "claim_capability"
          ],
          "url": "https://github.com/deepset-ai/haystack#readme"
        }
      ],
      "summary": "DeepWiki/Human Wiki output with a Doramagic pitfall appendix.",
      "title": "haystack 说明书",
      "toc": [
        "https://github.com/deepset-ai/haystack 项目说明书",
        "目录",
        "Introduction to Haystack",
        "What is Haystack?",
        "Architecture Overview",
        "Key Features",
        "Installation",
        "Documentation Structure",
        "Doramagic 踩坑日志"
      ]
    }
  },
  "quality_gate": {
    "blocking_gaps": [],
    "category_confidence": "medium",
    "compile_status": "ready_for_review",
    "five_assets_present": true,
    "install_sandbox_verified": true,
    "missing_evidence": [],
    "next_action": "publish to Doramagic.ai project surfaces",
    "prompt_preview_boundary_ok": true,
    "publish_status": "publishable",
    "quick_start_verified": true,
    "repo_clone_verified": true,
    "repo_commit": "3d90a53f9495babadc59ceceb115a5c63bcbd642",
    "repo_inspection_error": null,
    "repo_inspection_files": [
      "pyproject.toml",
      "README.md",
      "examples/README.md"
    ],
    "repo_inspection_verified": true,
    "review_reasons": [],
    "tag_count_ok": true,
    "unsupported_claims": []
  },
  "schema_version": "0.1",
  "user_assets": {
    "ai_context_pack": {
      "asset_id": "ai_context_pack",
      "filename": "AI_CONTEXT_PACK.md",
      "markdown": "# haystack-docs-website - Doramagic AI Context Pack\n\n> 定位：安装前体验与判断资产。它帮助宿主 AI 有一个好的开始，但不代表已经安装、执行或验证目标项目。\n\n## 充分原则\n\n- **充分原则，不是压缩原则**：AI Context Pack 应该充分到让宿主 AI 在开工前理解项目价值、能力边界、使用入口、风险和证据来源；它可以分层组织，但不以最短摘要为目标。\n- **压缩策略**：只压缩噪声和重复内容，不压缩会影响判断和开工质量的上下文。\n\n## 给宿主 AI 的使用方式\n\n你正在读取 Doramagic 为 haystack-docs-website 编译的 AI Context Pack。请把它当作开工前上下文：帮助用户理解适合谁、能做什么、如何开始、哪些必须安装后验证、风险在哪里。不要声称你已经安装、运行或执行了目标项目。\n\n## Claim 消费规则\n\n- **事实来源**：Repo Evidence + Claim/Evidence Graph；Human Wiki 只提供显著性、术语和叙事结构。\n- **事实最低状态**：`supported`\n- `supported`：可以作为项目事实使用，但回答中必须引用 claim_id 和证据路径。\n- `weak`：只能作为低置信度线索，必须要求用户继续核实。\n- `inferred`：只能用于风险提示或待确认问题，不能包装成项目事实。\n- `unverified`：不得作为事实使用，应明确说证据不足。\n- `contradicted`：必须展示冲突来源，不得替用户强行选择一个版本。\n\n## 它最适合谁\n\n- **想在安装前理解开源项目价值和边界的用户**：当前证据主要来自项目文档。 证据：`README.md` Claim：`clm_0002` supported 0.86\n\n## 它能做什么\n\n- **命令行启动或安装流程**（需要安装后验证）：项目文档中存在可执行命令，真实使用需要在本地或宿主环境中运行这些命令。 证据：`README.md` Claim：`clm_0001` supported 0.86\n\n## 怎么开始\n\n- `pip install haystack-ai` 证据：`README.md` Claim：`clm_0003` supported 0.86\n- `pip install --pre haystack-ai` 证据：`README.md` Claim：`clm_0004` supported 0.86\n\n## 继续前判断卡\n\n- **当前建议**：先做角色匹配试用\n- **为什么**：这个项目更像角色库，核心风险是选错角色或把角色文案当执行能力；先用 Prompt Preview 试角色匹配，再决定是否沙盒导入。\n\n### 30 秒判断\n\n- **现在怎么做**：先做角色匹配试用\n- **最小安全下一步**：先用 Prompt Preview 试角色匹配；满意后再隔离导入\n- **先别相信**：角色质量和任务匹配不能直接相信。\n- **继续会触碰**：角色选择偏差、命令执行、宿主 AI 配置\n\n### 现在可以相信\n\n- **适合人群线索：想在安装前理解开源项目价值和边界的用户**（supported）：有 supported claim 或项目证据支撑，但仍不等于真实安装效果。 证据：`README.md` Claim：`clm_0002` supported 0.86\n- **能力存在：命令行启动或安装流程**（supported）：可以相信项目包含这类能力线索；是否适合你的具体任务仍要试用或安装后验证。 证据：`README.md` Claim：`clm_0001` supported 0.86\n- **存在 Quick Start / 安装命令线索**（supported）：可以相信项目文档出现过启动或安装入口；不要因此直接在主力环境运行。 证据：`README.md` Claim：`clm_0003` supported 0.86\n\n### 现在还不能相信\n\n- **角色质量和任务匹配不能直接相信。**（unverified）：角色库证明有很多角色，不证明每个角色都适合你的具体任务，也不证明角色能产生高质量结果。\n- **不能把角色文案当成真实执行能力。**（unverified）：安装前只能判断角色描述和任务画像是否匹配，不能证明它能在宿主 AI 里完成任务。\n- **真实输出质量不能在安装前相信。**（unverified）：Prompt Preview 只能展示引导方式，不能证明真实项目中的结果质量。\n- **宿主 AI 版本兼容性不能在安装前相信。**（unverified）：Claude、Cursor、Codex、Gemini 等宿主加载规则和版本差异必须在真实环境验证。\n- **不会污染现有宿主 AI 行为，不能直接相信。**（inferred）：Skill、plugin、AGENTS/CLAUDE/GEMINI 指令可能改变宿主 AI 的默认行为。 证据：`AGENTS.md`, `CLAUDE.md`\n- **可安全回滚不能默认相信。**（unverified）：除非项目明确提供卸载和恢复说明，否则必须先在隔离环境验证。\n- **真实安装后是否与用户当前宿主 AI 版本兼容？**（unverified）：兼容性只能通过实际宿主环境验证。\n- **项目输出质量是否满足用户具体任务？**（unverified）：安装前预览只能展示流程和边界，不能替代真实评测。\n\n### 继续会触碰什么\n\n- **角色选择偏差**：用户对任务应该由哪个专家角色处理的判断。 原因：选错角色会让 AI 从错误专业视角回答，浪费时间或误导决策。\n- **命令执行**：包管理器、网络下载、本地插件目录、项目配置或用户主目录。 原因：运行第一条命令就可能产生环境改动；必须先判断是否值得跑。 证据：`README.md`\n- **宿主 AI 配置**：Claude/Codex/Cursor/Gemini/OpenCode 等宿主的 plugin、Skill 或规则加载配置。 原因：宿主配置会改变 AI 后续工作方式，可能和用户已有规则冲突。 证据：`AGENTS.md`, `CLAUDE.md`\n- **本地环境或项目文件**：安装结果、插件缓存、项目配置或本地依赖目录。 原因：安装前无法证明写入范围和回滚方式，需要隔离验证。 证据：`README.md`\n- **宿主 AI 上下文**：AI Context Pack、Prompt Preview、Skill 路由、风险规则和项目事实。 原因：导入上下文会影响宿主 AI 后续判断，必须避免把未验证项包装成事实。\n\n### 最小安全下一步\n\n- **先跑 Prompt Preview**：先用交互式试用验证任务画像和角色匹配，不要先导入整套角色库。（适用：任何项目都适用，尤其是输出质量未知时。）\n- **只在隔离目录或测试账号试装**：避免安装命令污染主力宿主 AI、真实项目或用户主目录。（适用：存在命令执行、插件配置或本地写入线索时。）\n- **先备份宿主 AI 配置**：Skill、plugin、规则文件可能改变 Claude/Cursor/Codex 的默认行为。（适用：存在插件 manifest、Skill 或宿主规则入口时。）\n- **安装后只验证一个最小任务**：先验证加载、兼容、输出质量和回滚，再决定是否深用。（适用：准备从试用进入真实工作流时。）\n\n### 退出方式\n\n- **保留安装前状态**：记录原始宿主配置和项目状态，后续才能判断是否可恢复。\n- **准备移除宿主 plugin / Skill / 规则入口**：如果试装后行为异常，可以把宿主 AI 恢复到试装前状态。\n- **保留原始角色选择记录**：如果输出偏题，可以回到任务画像阶段重新选择角色，而不是继续沿着错误角色推进。\n- **记录安装命令和写入路径**：没有明确卸载说明时，至少要知道哪些目录或配置需要手动清理。\n- **如果没有回滚路径，不进入主力环境**：不可回滚是继续前阻断项，不应靠信任或运气继续。\n\n## 哪些只能预览\n\n- 解释项目适合谁和能做什么\n- 基于项目文档演示典型对话流程\n- 帮助用户判断是否值得安装或继续研究\n\n## 哪些必须安装后验证\n\n- 真实安装 Skill、插件或 CLI\n- 执行脚本、修改本地文件或访问外部服务\n- 验证真实输出质量、性能和兼容性\n\n## 边界与风险判断卡\n\n- **把安装前预览误认为真实运行**：用户可能高估项目已经完成的配置、权限和兼容性验证。 处理方式：明确区分 prompt_preview_can_do 与 runtime_required。 Claim：`clm_0005` inferred 0.45\n- **命令执行会修改本地环境**：安装命令可能写入用户主目录、宿主插件目录或项目配置。 处理方式：先在隔离环境或测试账号中运行。 证据：`README.md` Claim：`clm_0006` supported 0.86\n- **待确认**：真实安装后是否与用户当前宿主 AI 版本兼容？。原因：兼容性只能通过实际宿主环境验证。\n- **待确认**：项目输出质量是否满足用户具体任务？。原因：安装前预览只能展示流程和边界，不能替代真实评测。\n- **待确认**：安装命令是否需要网络、权限或全局写入？。原因：这影响企业环境和个人环境的安装风险。\n\n## 开工前工作上下文\n\n### 加载顺序\n\n- 先读取 how_to_use.host_ai_instruction，建立安装前判断资产的边界。\n- 读取 claim_graph_summary，确认事实来自 Claim/Evidence Graph，而不是 Human Wiki 叙事。\n- 再读取 intended_users、capabilities 和 quick_start_candidates，判断用户是否匹配。\n- 需要执行具体任务时，优先查 role_skill_index，再查 evidence_index。\n- 遇到真实安装、文件修改、网络访问、性能或兼容性问题时，转入 risk_card 和 boundaries.runtime_required。\n\n### 任务路由\n\n- **命令行启动或安装流程**：先说明这是安装后验证能力，再给出安装前检查清单。 边界：必须真实安装或运行后验证。 证据：`README.md` Claim：`clm_0001` supported 0.86\n\n### 上下文规模\n\n- 文件总数：7504\n- 重要文件覆盖：40/7504\n- 证据索引条目：80\n- 角色 / Skill 条目：78\n\n### 证据不足时的处理\n\n- **missing_evidence**：说明证据不足，要求用户提供目标文件、README 段落或安装后验证记录；不要补全事实。\n- **out_of_scope_request**：说明该任务超出当前 AI Context Pack 证据范围，并建议用户先查看 Human Manual 或真实安装后验证。\n- **runtime_request**：给出安装前检查清单和命令来源，但不要替用户执行命令或声称已执行。\n- **source_conflict**：同时展示冲突来源，标记为待核实，不要强行选择一个版本。\n\n## Prompt Recipes\n\n### 适配判断\n\n- 目标：判断这个项目是否适合用户当前任务。\n- 预期输出：适配结论、关键理由、证据引用、安装前可预览内容、必须安装后验证内容、下一步建议。\n\n```text\n请基于 haystack-docs-website 的 AI Context Pack，先问我 3 个必要问题，然后判断它是否适合我的任务。回答必须包含：适合谁、能做什么、不能做什么、是否值得安装、证据来自哪里。所有项目事实必须引用 evidence_refs、source_paths 或 claim_id。\n```\n\n### 安装前体验\n\n- 目标：让用户在安装前感受核心工作流，同时避免把预览包装成真实能力或营销承诺。\n- 预期输出：一段带边界标签的体验剧本、安装后验证清单和谨慎建议；不含真实运行承诺或强营销表述。\n\n```text\n请把 haystack-docs-website 当作安装前体验资产，而不是已安装工具或真实运行环境。\n\n请严格输出四段：\n1. 先问我 3 个必要问题。\n2. 给出一段“体验剧本”：用 [安装前可预览]、[必须安装后验证]、[证据不足] 三种标签展示它可能如何引导工作流。\n3. 给出安装后验证清单：列出哪些能力只有真实安装、真实宿主加载、真实项目运行后才能确认。\n4. 给出谨慎建议：只能说“值得继续研究/试装”“先补充信息后再判断”或“不建议继续”，不得替项目背书。\n\n硬性边界：\n- 不要声称已经安装、运行、执行测试、修改文件或产生真实结果。\n- 不要写“自动适配”“确保通过”“完美适配”“强烈建议安装”等承诺性表达。\n- 如果描述安装后的工作方式，必须使用“如果安装成功且宿主正确加载 Skill，它可能会……”这种条件句。\n- 体验剧本只能写成“示例台词/假设流程”：使用“可能会询问/可能会建议/可能会展示”，不要写“已写入、已生成、已通过、正在运行、正在生成”。\n- Prompt Preview 不负责给安装命令；如用户准备试装，只能提示先阅读 Quick Start 和 Risk Card，并在隔离环境验证。\n- 所有项目事实必须来自 supported claim、evidence_refs 或 source_paths；inferred/unverified 只能作风险或待确认项。\n\n```\n\n### 角色 / Skill 选择\n\n- 目标：从项目里的角色或 Skill 中挑选最匹配的资产。\n- 预期输出：候选角色或 Skill 列表，每项包含适用场景、证据路径、风险边界和是否需要安装后验证。\n\n```text\n请读取 role_skill_index，根据我的目标任务推荐 3-5 个最相关的角色或 Skill。每个推荐都要说明适用场景、可能输出、风险边界和 evidence_refs。\n```\n\n### 风险预检\n\n- 目标：安装或引入前识别环境、权限、规则冲突和质量风险。\n- 预期输出：环境、权限、依赖、许可、宿主冲突、质量风险和未知项的检查清单。\n\n```text\n请基于 risk_card、boundaries 和 quick_start_candidates，给我一份安装前风险预检清单。不要替我执行命令，只说明我应该检查什么、为什么检查、失败会有什么影响。\n```\n\n### 宿主 AI 开工指令\n\n- 目标：把项目上下文转成一次对话开始前的宿主 AI 指令。\n- 预期输出：一段边界明确、证据引用明确、适合复制给宿主 AI 的开工前指令。\n\n```text\n请基于 haystack-docs-website 的 AI Context Pack，生成一段我可以粘贴给宿主 AI 的开工前指令。这段指令必须遵守 not_runtime=true，不能声称项目已经安装、运行或产生真实结果。\n```\n\n\n## 角色 / Skill 索引\n\n- 共索引 78 个角色 / Skill / 项目文档条目。\n\n- **Haystack Guidelines for AI Agents**（project_doc）：Haystack uses Hatch for environment and dependency management. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`AGENTS.md`\n- **CLAUDE.md**（project_doc）：Before you start working on this repository, read the AGENTS.md file and follow all the instructions. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`CLAUDE.md`\n- **Table of Contents**（project_doc）：------- ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------… 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`README.md`\n- **Haystack 2.x**（project_doc）：Haystack https://github.com/deepset-ai/haystack is an end-to-end LLM framework that allows you to build applications powered by LLMs, Transformer models, vector search and more. Whether you want to perform retrieval-augmented generation RAG , document search, question answering or answer generation, Haystack can orchestrate state-of-the-art embedding models and LLMs into pipelines to build end-to-end NLP application… 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docker/README.md`\n- **Haystack Documentation Website**（project_doc）：This directory contains the Docusaurus-powered documentation website for Haystack https://github.com/deepset-ai/haystack , an open-source framework for building production-ready applications with Large Language Models LLMs . 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/README.md`\n- **Examples have been moved!**（project_doc）：If you're searching for Haystack examples we moved them into a dedicated repository. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`examples/README.md`\n- **:ledger: Looking for the docs?**（project_doc）：You can find Haystack's documentation at https://docs.haystack.deepset.ai/. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`pydoc/README.md`\n- **Pipeline.run behavioural tests**（project_doc）：This module contains all behavioural tests for Pipeline.run . 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`test/core/pipeline/features/README.md`\n- **Contributing to Haystack**（project_doc）：First off, thanks for taking the time to contribute! :blue heart: 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`CONTRIBUTING.md`\n- **Contributing to Haystack Documentation**（project_doc）：Contributing to Haystack Documentation 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/CONTRIBUTING.md`\n- **Related Issues**（project_doc）：- I have read the contributors guidelines https://github.com/deepset-ai/haystack/blob/main/CONTRIBUTING.md and the code of conduct https://github.com/deepset-ai/haystack/blob/main/code of conduct.txt . - I have updated the related issue with new insights and changes. - I have added unit tests and updated the docstrings. - I've used one of the conventional commit types https://www.conventionalcommits.org/en/v1.0.0/ f… 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`.github/pull_request_template.md`\n- **Security Policy**（project_doc）：If you have found a security vulnerability in Haystack, please report via email to opensource-security@deepset.ai mailto:opensource-security@deepset.ai . 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`SECURITY.md`\n- **Breaking change proposal**（project_doc）：Briefly explain how the change is breaking and why it is needed. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`.github/ISSUE_TEMPLATE/breaking-change-proposal.md`\n- **Bug report**（project_doc）：Describe the bug A clear and concise description of what the bug is. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`.github/ISSUE_TEMPLATE/bug_report.md`\n- **Feature request**（project_doc）：Is your feature request related to a problem? Please describe. A clear and concise description of what the problem is. Ex. I'm always frustrated when ... 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`.github/ISSUE_TEMPLATE/feature_request.md`\n- **Module haystack\\ experimental.components.agents.agent**（project_doc）：Tool-using agents with provider-agnostic chat model support. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/experiments-api/experimental_agents_api.md`\n- **Module haystack\\ experimental.chat\\ message\\ stores.in\\ memory**（project_doc）：Storage for the chat messages. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/experiments-api/experimental_chatmessage_store_api.md`\n- **Module haystack\\ experimental.components.generators.chat.openai**（project_doc）：Enables text generation using LLMs. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/experiments-api/experimental_generators_api.md`\n- **Module haystack\\ experimental.memory\\ stores.mem0.memory\\ store**（project_doc）：Storage for the memories using Mem0 as the backend. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/experiments-api/experimental_mem0_memory_store_api.md`\n- **Module haystack\\ experimental.components.preprocessors.md\\ header\\ level\\ inferrer**（project_doc）：Pipelines wrapped as components. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/experiments-api/experimental_preprocessors_api.md`\n- **Module haystack\\ experimental.components.retrievers.chat\\ message\\ retriever**（project_doc）：Sweep through Document Stores and return a set of candidate documents that are relevant to the query. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/experiments-api/experimental_retrievers_api.md`\n- **Module haystack\\ experimental.components.summarizers.llm\\ summarizer**（project_doc）：Components that summarize texts into concise versions. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/experiments-api/experimental_summarizer_api.md`\n- **Module haystack\\ experimental.components.writers.chat\\ message\\ writer**（project_doc）：Writers for Haystack. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/experiments-api/experimental_writers_api.md`\n- **agent**（project_doc）：Tool-using agents with provider-agnostic chat model support. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/haystack-api/agents_api.md`\n- **whisper local**（project_doc）：Transcribes audio files. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/haystack-api/audio_api.md`\n- **answer builder**（project_doc）：Extract the output of a Generator to an Answer format, and build prompts. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/haystack-api/builders_api.md`\n- **cache checker**（project_doc）：Checks if any document coming from the given URL is already present in the store. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/haystack-api/cachings_api.md`\n- **document language classifier**（project_doc）：Classify documents based on the provided labels. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/haystack-api/classifiers_api.md`\n- **openapi**（project_doc）：Various connectors to integrate with external services. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/haystack-api/connectors_api.md`\n- **azure**（project_doc）：Various converters to transform data from one format to another. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/haystack-api/converters_api.md`\n- **answer**（project_doc）：Core classes that carry data through the system. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/haystack-api/data_classes_api.md`\n- **document store**（project_doc）：Stores your texts and meta data and provides them to the Retriever at query time. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/haystack-api/document_stores_api.md`\n- **document writer**（project_doc）：Writes Documents to a DocumentStore. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/haystack-api/document_writers_api.md`\n- **azure document embedder**（project_doc）：Transforms queries into vectors to look for similar or relevant Documents. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/haystack-api/embedders_api.md`\n- **eval run result**（project_doc）：Represents the results of evaluation. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/haystack-api/evaluation_api.md`\n- **answer exact match**（project_doc）：Evaluate your pipelines or individual components. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/haystack-api/evaluators_api.md`\n- **image/llm document content extractor**（project_doc）：Components to extract specific elements from textual data. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/haystack-api/extractors_api.md`\n- **link content**（project_doc）：Fetches content from a list of URLs and returns a list of extracted content streams. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/haystack-api/fetchers_api.md`\n- **azure**（project_doc）：Enables text generation using LLMs. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/haystack-api/generators_api.md`\n- **dataclasses**（project_doc）：Abstractions for integrating human feedback and interaction into Agent workflows. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/haystack-api/human_in_the_loop_api.md`\n- **document to image**（project_doc）：Various converters to transform image data from one format to another. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/haystack-api/image_converters_api.md`\n- **answer joiner**（project_doc）：Components that join list of different objects 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/haystack-api/joiners_api.md`\n- **async pipeline**（project_doc）：Arranges components and integrations in flow. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/haystack-api/pipeline_api.md`\n- **csv document cleaner**（project_doc）：Preprocess your Documents and texts. Clean, split, and more. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/haystack-api/preprocessors_api.md`\n- **query expander**（project_doc）：Components for query processing and expansion. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/haystack-api/query_api.md`\n- **hugging face tei**（project_doc）：Reorders a set of Documents based on their relevance to the query. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/haystack-api/rankers_api.md`\n- **extractive**（project_doc）：Takes a query and a set of Documents as input and returns ExtractedAnswers by selecting a text span within the Documents. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/haystack-api/readers_api.md`\n- **auto merging retriever**（project_doc）：Sweeps through a Document Store and returns a set of candidate Documents that are relevant to the query. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/haystack-api/retrievers_api.md`\n- **conditional router**（project_doc）：Routers is a group of components that route queries or Documents to other components that can handle them best. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/haystack-api/routers_api.md`\n- **top p**（project_doc）：Filters documents based on their similarity scores using top-p sampling. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/haystack-api/samplers_api.md`\n- **tool invoker**（project_doc）：Components related to Tool Calling. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/haystack-api/tool_components_api.md`\n- **component tool**（project_doc）：Unified abstractions to represent tools across the framework. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/haystack-api/tools_api.md`\n- **asynchronous**（project_doc）：Utility functions and classes used across the library. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/haystack-api/utils_api.md`\n- **json schema**（project_doc）：Validators validate LLM outputs 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/haystack-api/validators_api.md`\n- **searchapi**（project_doc）：Web search engine for Haystack. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/haystack-api/websearch_api.md`\n- **Module haystack\\ integrations.components.generators.aimlapi.chat.chat\\ generator**（project_doc）：AIMLAPI integration for Haystack 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/integrations-api/aimlapi.md`\n- **haystack integrations.components.retrievers.alloydb.embedding retriever**（project_doc）：AlloyDB integration for Haystack 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/integrations-api/alloydb.md`\n- **haystack integrations.common.amazon bedrock.errors**（project_doc）：Amazon Bedrock integration for Haystack 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/integrations-api/amazon_bedrock.md`\n- **Module haystack\\ integrations.components.generators.amazon\\ sagemaker.sagemaker**（project_doc）：Amazon Sagemaker integration for Haystack 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/integrations-api/amazon_sagemaker.md`\n- **haystack integrations.components.generators.anthropic.chat.chat generator**（project_doc）：Anthropic integration for Haystack 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/integrations-api/anthropic.md`\n- **haystack integrations.components.retrievers.arcadedb.embedding retriever**（project_doc）：ArcadeDB integration for Haystack 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/integrations-api/arcadedb.md`\n- **haystack integrations.components.retrievers.astra.retriever**（project_doc）：Astra integration for Haystack 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/integrations-api/astra.md`\n- **haystack integrations.components.retrievers.azure ai search.embedding retriever**（project_doc）：Azure AI Search integration for Haystack 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/integrations-api/azure_ai_search.md`\n- **Module haystack\\ integrations.components.converters.azure\\ doc\\ intelligence.converter**（project_doc）：Azure Document Intelligence integration for Haystack 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/integrations-api/azure_doc_intelligence.md`\n- **haystack integrations.components.websearch.brave.brave websearch**（project_doc）：Brave Search integration for Haystack 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/integrations-api/brave.md`\n- **haystack integrations.components.preprocessors.chonkie.recursive splitter**（project_doc）：Chonkie integration for Haystack 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/integrations-api/chonkie.md`\n- **haystack integrations.components.retrievers.chroma.retriever**（project_doc）：Chroma integration for Haystack 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/integrations-api/chroma.md`\n- **haystack integrations.components.embedders.cohere.document embedder**（project_doc）：Cohere integration for Haystack 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/integrations-api/cohere.md`\n- **Module haystack\\ integrations.components.generators.cometapi.chat.chat\\ generator**（project_doc）：Comet API integration for Haystack 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/integrations-api/cometapi.md`\n- **Module haystack\\ integrations.components.evaluators.deepeval.evaluator**（project_doc）：DeepEval integration for Haystack 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/integrations-api/deepeval.md`\n- **haystack integrations.components.converters.docling.converter**（project_doc）：Docling integration for Haystack 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/integrations-api/docling.md`\n- **haystack integrations.components.converters.docling serve.converter**（project_doc）：Docling Serve integration for Haystack 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/integrations-api/docling_serve.md`\n- **haystack integrations.tools.e2b.bash tool**（project_doc）：E2B integration for Haystack 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/integrations-api/e2b.md`\n- **haystack integrations.components.retrievers.elasticsearch.bm25 retriever**（project_doc）：Elasticsearch integration for Haystack 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/integrations-api/elasticsearch.md`\n- **haystack integrations.components.retrievers.faiss.embedding retriever**（project_doc）：FAISS integration for Haystack 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/integrations-api/faiss.md`\n- **haystack integrations.components.retrievers.falkordb.cypher retriever**（project_doc）：FalkorDB integration for Haystack 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/integrations-api/falkordb.md`\n- **haystack integrations.components.embedders.fastembed.fastembed document embedder**（project_doc）：FastEmbed integration for Haystack 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/integrations-api/fastembed.md`\n- **haystack integrations.components.fetchers.firecrawl.firecrawl crawler**（project_doc）：Firecrawl integration for Haystack 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs-website/reference/integrations-api/firecrawl.md`\n\n## 证据索引\n\n- 共索引 80 条证据。\n\n- **Haystack Guidelines for AI Agents**（documentation）：Haystack uses Hatch for environment and dependency management. 证据：`AGENTS.md`\n- **CLAUDE.md**（documentation）：Before you start working on this repository, read the AGENTS.md file and follow all the instructions. 证据：`CLAUDE.md`\n- **Table of Contents**（documentation）：------- -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------… 证据：`README.md`\n- **Haystack 2.x**（documentation）：Haystack https://github.com/deepset-ai/haystack is an end-to-end LLM framework that allows you to build applications powered by LLMs, Transformer models, vector search and more. Whether you want to perform retrieval-augmented generation RAG , document search, question answering or answer generation, Haystack can orchestrate state-of-the-art embedding models and LLMs into pipelines to build end-to-end NLP applications and solve your use case. 证据：`docker/README.md`\n- **Haystack Documentation Website**（documentation）：This directory contains the Docusaurus-powered documentation website for Haystack https://github.com/deepset-ai/haystack , an open-source framework for building production-ready applications with Large Language Models LLMs . 证据：`docs-website/README.md`\n- **Examples have been moved!**（documentation）：If you're searching for Haystack examples we moved them into a dedicated repository. 证据：`examples/README.md`\n- **:ledger: Looking for the docs?**（documentation）：You can find Haystack's documentation at https://docs.haystack.deepset.ai/. 证据：`pydoc/README.md`\n- **Pipeline.run behavioural tests**（documentation）：This module contains all behavioural tests for Pipeline.run . 证据：`test/core/pipeline/features/README.md`\n- **Package**（package_manifest）：{ \"name\": \"haystack-docs-website\", \"version\": \"0.0.0\", \"private\": true, \"scripts\": { \"docusaurus\": \"docusaurus\", \"start\": \"docusaurus start\", \"build\": \"docusaurus build\", \"swizzle\": \"docusaurus swizzle\", \"deploy\": \"docusaurus deploy\", \"clear\": \"docusaurus clear\", \"serve\": \"docusaurus serve\", \"write-translations\": \"docusaurus write-translations\", \"write-heading-ids\": \"docusaurus write-heading-ids\", \"update-next-version\": \"node scripts/update-next-version.js\", \"create-version\": \"node scripts/create-new-version.js\", \"vercel:dev\": \"vercel dev\", \"generate-llms-txt\": \"docusaurus generate-llms-txt\" }, \"dependencies\": { \"@docusaurus/core\": \"^3.10.0\", \"@docusaurus/faster\": \"^3.10.0\", \"@docusaurus/pl… 证据：`docs-website/package.json`\n- **Contributing to Haystack**（documentation）：First off, thanks for taking the time to contribute! :blue heart: 证据：`CONTRIBUTING.md`\n- **Contributing to Haystack Documentation**（documentation）：Contributing to Haystack Documentation 证据：`docs-website/CONTRIBUTING.md`\n- **License**（source_file）：Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ 证据：`LICENSE`\n- **Related Issues**（documentation）：- I have read the contributors guidelines https://github.com/deepset-ai/haystack/blob/main/CONTRIBUTING.md and the code of conduct https://github.com/deepset-ai/haystack/blob/main/code of conduct.txt . - I have updated the related issue with new insights and changes. - I have added unit tests and updated the docstrings. - I've used one of the conventional commit types https://www.conventionalcommits.org/en/v1.0.0/ for my PR title: fix: , feat: , build: , chore: , ci: , docs: , style: , refactor: , perf: , test: and added ! in case the PR includes breaking changes. - I have documented my code. - I have added a release note file, following the contributors guidelines https://github.com/deepse… 证据：`.github/pull_request_template.md`\n- **Security Policy**（documentation）：If you have found a security vulnerability in Haystack, please report via email to opensource-security@deepset.ai mailto:opensource-security@deepset.ai . 证据：`SECURITY.md`\n- **Summary and motivation**（documentation）：Briefly explain how the change is breaking and why it is needed. 证据：`.github/ISSUE_TEMPLATE/breaking-change-proposal.md`\n- **Bug Report**（documentation）：Describe the bug A clear and concise description of what the bug is. 证据：`.github/ISSUE_TEMPLATE/bug_report.md`\n- **Feature Request**（documentation）：Is your feature request related to a problem? Please describe. A clear and concise description of what the problem is. Ex. I'm always frustrated when ... 证据：`.github/ISSUE_TEMPLATE/feature_request.md`\n- **Module haystack\\ experimental.components.agents.agent**（documentation）：Module haystack\\ experimental.components.agents.agent 证据：`docs-website/reference/experiments-api/experimental_agents_api.md`\n- **Module haystack\\ experimental.chat\\ message\\ stores.in\\ memory**（documentation）：Module haystack\\ experimental.chat\\ message\\ stores.in\\ memory 证据：`docs-website/reference/experiments-api/experimental_chatmessage_store_api.md`\n- **Module haystack\\ experimental.components.generators.chat.openai**（documentation）：Module haystack\\ experimental.components.generators.chat.openai 证据：`docs-website/reference/experiments-api/experimental_generators_api.md`\n- **Module haystack\\ experimental.memory\\ stores.mem0.memory\\ store**（documentation）：Module haystack\\ experimental.memory\\ stores.mem0.memory\\ store 证据：`docs-website/reference/experiments-api/experimental_mem0_memory_store_api.md`\n- **Module haystack\\ experimental.components.preprocessors.md\\ header\\ level\\ inferrer**（documentation）：Module haystack\\ experimental.components.preprocessors.md\\ header\\ level\\ inferrer 证据：`docs-website/reference/experiments-api/experimental_preprocessors_api.md`\n- **Module haystack\\ experimental.components.retrievers.chat\\ message\\ retriever**（documentation）：Module haystack\\ experimental.components.retrievers.chat\\ message\\ retriever 证据：`docs-website/reference/experiments-api/experimental_retrievers_api.md`\n- **Module haystack\\ experimental.components.summarizers.llm\\ summarizer**（documentation）：Module haystack\\ experimental.components.summarizers.llm\\ summarizer 证据：`docs-website/reference/experiments-api/experimental_summarizer_api.md`\n- **Module haystack\\ experimental.components.writers.chat\\ message\\ writer**（documentation）：Module haystack\\ experimental.components.writers.chat\\ message\\ writer 证据：`docs-website/reference/experiments-api/experimental_writers_api.md`\n- **agent**（documentation）：A tool-using Agent powered by a large language model. 证据：`docs-website/reference/haystack-api/agents_api.md`\n- **whisper local**（documentation）：Transcribes audio files using OpenAI's Whisper model on your local machine. 证据：`docs-website/reference/haystack-api/audio_api.md`\n- **answer builder**（documentation）：Converts a query and Generator replies into a GeneratedAnswer object. 证据：`docs-website/reference/haystack-api/builders_api.md`\n- **cache checker**（documentation）：Checks for the presence of documents in a Document Store based on a specified field in each document's metadata. 证据：`docs-website/reference/haystack-api/cachings_api.md`\n- **document language classifier**（documentation）：Classifies the language of each document and adds it to its metadata. 证据：`docs-website/reference/haystack-api/classifiers_api.md`\n- **openapi**（documentation）：OpenAPIConnector enables direct invocation of REST endpoints defined in an OpenAPI specification. 证据：`docs-website/reference/haystack-api/connectors_api.md`\n- **azure**（documentation）：Converts files to documents using Azure's Document Intelligence service. 证据：`docs-website/reference/haystack-api/converters_api.md`\n- **answer**（documentation）：Holds an answer extracted by an extractive Reader query, score, text, and optional document/context . 证据：`docs-website/reference/haystack-api/data_classes_api.md`\n- **document store**（documentation）：A dataclass for managing document statistics for BM25 retrieval. 证据：`docs-website/reference/haystack-api/document_stores_api.md`\n- **document writer**（documentation）：Writes documents to a DocumentStore. 证据：`docs-website/reference/haystack-api/document_writers_api.md`\n- **azure document embedder**（documentation）：Calculates document embeddings using OpenAI models deployed on Azure. 证据：`docs-website/reference/haystack-api/embedders_api.md`\n- **eval run result**（documentation）：Contains the inputs and the outputs of an evaluation pipeline and provides methods to inspect them. 证据：`docs-website/reference/haystack-api/evaluation_api.md`\n- **answer exact match**（documentation）：An answer exact match evaluator class. 证据：`docs-website/reference/haystack-api/evaluators_api.md`\n- **image/llm document content extractor**（documentation）：image/llm document content extractor 证据：`docs-website/reference/haystack-api/extractors_api.md`\n- **link content**（documentation）：Fetches and extracts content from URLs. 证据：`docs-website/reference/haystack-api/fetchers_api.md`\n- **azure**（documentation）：Generates text using OpenAI's large language models LLMs . 证据：`docs-website/reference/haystack-api/generators_api.md`\n- **dataclasses**（documentation）：Result of the confirmation UI interaction. 证据：`docs-website/reference/haystack-api/human_in_the_loop_api.md`\n- **document to image**（documentation）：Converts documents sourced from PDF and image files into ImageContents. 证据：`docs-website/reference/haystack-api/image_converters_api.md`\n- **answer joiner**（documentation）：Convert a string to a JoinMode enum. 证据：`docs-website/reference/haystack-api/joiners_api.md`\n- **async pipeline**（documentation）：Asynchronous version of the Pipeline orchestration engine. 证据：`docs-website/reference/haystack-api/pipeline_api.md`\n- **csv document cleaner**（documentation）：A component for cleaning CSV documents by removing empty rows and columns. 证据：`docs-website/reference/haystack-api/preprocessors_api.md`\n- **query expander**（documentation）：A component that returns a list of semantically similar queries to improve retrieval recall in RAG systems. 证据：`docs-website/reference/haystack-api/query_api.md`\n- **hugging face tei**（documentation）：Defines the direction to truncate text when input length exceeds the model's limit. 证据：`docs-website/reference/haystack-api/rankers_api.md`\n- **extractive**（documentation）：Locates and extracts answers to a given query from Documents. 证据：`docs-website/reference/haystack-api/readers_api.md`\n- **auto merging retriever**（documentation）：A retriever which returns parent documents of the matched leaf nodes documents, based on a threshold setting. 证据：`docs-website/reference/haystack-api/retrievers_api.md`\n- **conditional router**（documentation）：Exception raised when no route is selected in ConditionalRouter. 证据：`docs-website/reference/haystack-api/routers_api.md`\n- **top p**（documentation）：Implements top-p nucleus sampling for document filtering based on cumulative probability scores. 证据：`docs-website/reference/haystack-api/samplers_api.md`\n- **tool invoker**（documentation）：Base exception class for ToolInvoker errors. 证据：`docs-website/reference/haystack-api/tool_components_api.md`\n- **component tool**（documentation）：A Tool that wraps Haystack components, allowing them to be used as tools by LLMs. 证据：`docs-website/reference/haystack-api/tools_api.md`\n- **asynchronous**（documentation）：Returns if the given callable is usable inside a component's run async method. 证据：`docs-website/reference/haystack-api/utils_api.md`\n- **json schema**（documentation）：Check if the provided string is a valid JSON. 证据：`docs-website/reference/haystack-api/validators_api.md`\n- **searchapi**（documentation）：Uses SearchApi https://www.searchapi.io/ to search the web for relevant documents. 证据：`docs-website/reference/haystack-api/websearch_api.md`\n- **Module haystack\\ integrations.components.generators.aimlapi.chat.chat\\ generator**（documentation）：Module haystack\\ integrations.components.generators.aimlapi.chat.chat\\ generator 证据：`docs-website/reference/integrations-api/aimlapi.md`\n- **haystack integrations.components.retrievers.alloydb.embedding retriever**（documentation）：haystack integrations.components.retrievers.alloydb.embedding retriever 证据：`docs-website/reference/integrations-api/alloydb.md`\n- **haystack integrations.common.amazon bedrock.errors**（documentation）：haystack integrations.common.amazon bedrock.errors 证据：`docs-website/reference/integrations-api/amazon_bedrock.md`\n- 其余 20 条证据见 `AI_CONTEXT_PACK.json` 或 `EVIDENCE_INDEX.json`。\n\n## 宿主 AI 必须遵守的规则\n\n- **把本资产当作开工前上下文，而不是运行环境。**：AI Context Pack 只包含证据化项目理解，不包含目标项目的可执行状态。 证据：`AGENTS.md`, `CLAUDE.md`, `README.md`\n- **回答用户时区分可预览内容与必须安装后才能验证的内容。**：安装前体验的消费者价值来自降低误装和误判，而不是伪装成真实运行。 证据：`AGENTS.md`, `CLAUDE.md`, `README.md`\n\n## 用户开工前应该回答的问题\n\n- 你准备在哪个宿主 AI 或本地环境中使用它？\n- 你只是想先体验工作流，还是准备真实安装？\n- 你最在意的是安装成本、输出质量、还是和现有规则的冲突？\n\n## 验收标准\n\n- 所有能力声明都能回指到 evidence_refs 中的文件路径。\n- AI_CONTEXT_PACK.md 没有把预览包装成真实运行。\n- 用户能在 3 分钟内看懂适合谁、能做什么、如何开始和风险边界。\n\n---\n\n## Doramagic Context Augmentation\n\nThe following material strengthens the Repomix/AI Context Pack body. Human Manual is only a reading skeleton; pitfall logs become hard operating constraints for the host AI.\n\n## Human Manual Skeleton\n\nUsage rule: this is only a reading path and salience signal, not factual authority. Concrete facts must still come from repo evidence / Claim Graph.\n\nHard rules for the host AI:\n- Do not treat page titles, order, summaries, or importance as project facts.\n- When explaining the Human Manual skeleton, state that it is only a reading path / salience signal.\n- Capability, installation, compatibility, runtime status, and risk judgments must cite repo evidence, source paths, or Claim Graph.\n\n- **Introduction to Haystack**：importance `high`\n  - source_paths: README.md, AGENTS.md, VERSION.txt\n- **Pipeline Architecture**：importance `high`\n  - source_paths: docs-website/docs/concepts/pipelines.mdx, docs-website/docs/concepts/pipelines/asyncpipeline.mdx, docs-website/docs/concepts/pipelines/serialization.mdx, docs-website/docs/concepts/pipelines/debugging-pipelines.mdx, docs-website/docs/concepts/pipelines/pipeline-breakpoints.mdx\n- **Core Concepts**：importance `high`\n  - source_paths: docs-website/docs/concepts/concepts-overview.mdx, docs-website/docs/concepts/components.mdx, docs-website/docs/concepts/data-classes.mdx, docs-website/docs/concepts/data-classes/chatmessage.mdx, docs-website/docs/concepts/jinja-templates.mdx\n- **Pipeline Component Types**：importance `high`\n  - source_paths: docs-website/docs/pipeline-components/generators.mdx, docs-website/docs/pipeline-components/embedders.mdx, docs-website/docs/pipeline-components/retrievers.mdx, docs-website/docs/pipeline-components/rankers.mdx, docs-website/docs/pipeline-components/preprocessors.mdx\n- **Data Processing Components**：importance `medium`\n  - source_paths: docs-website/docs/pipeline-components/preprocessors/documentsplitter.mdx, docs-website/docs/pipeline-components/preprocessors/recursivesplitter.mdx, docs-website/docs/pipeline-components/preprocessors/hierarchicaldocumentsplitter.mdx, docs-website/docs/pipeline-components/converters.mdx, docs-website/docs/pipeline-components/preprocessors/documentcleaner.mdx\n- **LLM and Embedder Integrations**：importance `high`\n  - source_paths: docs-website/docs/pipeline-components/generators/guides-to-generators/choosing-the-right-generator.mdx, docs-website/docs/pipeline-components/generators/guides-to-generators/function-calling.mdx, docs-website/docs/pipeline-components/embedders/choosing-the-right-embedder.mdx, docs-website/docs/concepts/integrations.mdx\n- **Document Stores and Retrievers**：importance `high`\n  - source_paths: docs-website/docs/concepts/document-store.mdx, docs-website/docs/concepts/document-store/choosing-a-document-store.mdx, docs-website/docs/document-stores/inmemorydocumentstore.mdx, docs-website/docs/document-stores/elasticsearch-document-store.mdx, docs-website/docs/document-stores/qdrant-document-store.mdx\n- **Agent Systems**：importance `medium`\n  - source_paths: docs-website/docs/concepts/agents.mdx, docs-website/docs/concepts/agents/multi-agent-systems.mdx, docs-website/docs/pipeline-components/agents-1/agent.mdx, docs-website/docs/pipeline-components/agents-1/state.mdx, docs-website/docs/pipeline-components/agents-1/human-in-the-loop.mdx\n\n## Repo Inspection Evidence\n\n- repo_clone_verified: true\n- repo_inspection_verified: true\n- repo_commit: `3d90a53f9495babadc59ceceb115a5c63bcbd642`\n- inspected_files: `pyproject.toml`, `README.md`, `examples/README.md`\n\nHard rules for the host AI:\n- Without repo_clone_verified=true, do not claim the source code has been read.\n- Without repo_inspection_verified=true, do not turn README/docs/package observations into facts.\n- Without quick_start_verified=true, do not claim the Quick Start has been successfully run.\n\n## Doramagic Pitfall Constraints\n\nThese rules come from Doramagic discovery, validation, or compilation pitfalls. The host AI must treat them as operating constraints, not general background notes.\n\n### Constraint 1: 来源证据：RFC: Signed receipts for Haystack pipeline component calls\n\n- Trigger: GitHub 社区证据显示该项目存在一个安装相关的待验证问题：RFC: Signed receipts for Haystack pipeline component calls\n- Host AI rule: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Why it matters: 可能增加新用户试用和生产接入成本。\n- Evidence: community_evidence:github | cevd_192c840953e54837869723f54ccfdd1a | https://github.com/deepset-ai/haystack/issues/11039 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n- Hard boundary: do not present this pitfall as solved, verified, or safe to ignore unless later validation evidence explicitly closes it.\n\n### Constraint 2: 来源证据：feat: Add `run_async` to `MultiQueryEmbeddingRetriever`, `MultiQueryTextRetriever`, and `TextEmbeddingRetriever`\n\n- Trigger: GitHub 社区证据显示该项目存在一个安装相关的待验证问题：feat: Add `run_async` to `MultiQueryEmbeddingRetriever`, `MultiQueryTextRetriever`, and `TextEmbeddingRetriever`\n- Host AI rule: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Why it matters: 可能阻塞安装或首次运行。\n- Evidence: community_evidence:github | cevd_4b8f3323f54c4fd6b8de4e2d466cfe8b | https://github.com/deepset-ai/haystack/issues/11358 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n- Hard boundary: do not present this pitfall as solved, verified, or safe to ignore unless later validation evidence explicitly closes it.\n\n### Constraint 3: 来源证据：feat: add INTERSECTION join mode to DocumentJoiner\n\n- Trigger: GitHub 社区证据显示该项目存在一个安装相关的待验证问题：feat: add INTERSECTION join mode to DocumentJoiner\n- Host AI rule: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Why it matters: 可能增加新用户试用和生产接入成本。\n- Evidence: community_evidence:github | cevd_00757f9859234e9cab8f8d4ce4f3e771 | https://github.com/deepset-ai/haystack/issues/11365 | 来源类型 github_issue 暴露的待验证使用条件。\n- Hard boundary: do not present this pitfall as solved, verified, or safe to ignore unless later validation evidence explicitly closes it.\n\n### Constraint 4: 来源证据：docs: Update Ragas docs\n\n- Trigger: GitHub 社区证据显示该项目存在一个维护/版本相关的待验证问题：docs: Update Ragas docs\n- Host AI rule: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Why it matters: 可能影响升级、迁移或版本选择。\n- Evidence: community_evidence:github | cevd_3204fffa09664d9f8553be2a3008f270 | https://github.com/deepset-ai/haystack/issues/11178 | 来源类型 github_issue 暴露的待验证使用条件。\n- Hard boundary: do not present this pitfall as solved, verified, or safe to ignore unless later validation evidence explicitly closes it.\n\n### Constraint 5: 来源证据：EnvVarSecrets: add multi-tenant context support (ContextVar / pipeline-run context)\n\n- Trigger: GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：EnvVarSecrets: add multi-tenant context support (ContextVar / pipeline-run context)\n- Host AI rule: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Why it matters: 可能影响升级、迁移或版本选择。\n- Evidence: community_evidence:github | cevd_8f72793700a1416891c2eedddc379129 | https://github.com/deepset-ai/haystack/issues/11366 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n- Hard boundary: do not present this pitfall as solved, verified, or safe to ignore unless later validation evidence explicitly closes it.\n\n### Constraint 6: 来源证据：Security: OWASP Agent Memory Guard for pipeline memory poisoning defense\n\n- Trigger: GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：Security: OWASP Agent Memory Guard for pipeline memory poisoning defense\n- Host AI rule: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Why it matters: 可能阻塞安装或首次运行。\n- Evidence: community_evidence:github | cevd_4f0868673100472fb74d831b5a04735f | https://github.com/deepset-ai/haystack/issues/11311 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n- Hard boundary: do not present this pitfall as solved, verified, or safe to ignore unless later validation evidence explicitly closes it.\n\n### Constraint 7: 来源证据：feat: support token-based budget in LostInTheMiddleRanker\n\n- Trigger: GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：feat: support token-based budget in LostInTheMiddleRanker\n- Host AI rule: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Why it matters: 可能影响授权、密钥配置或安全边界。\n- Evidence: community_evidence:github | cevd_7ad00787309c442eb497b10879fb3b28 | https://github.com/deepset-ai/haystack/issues/11351 | 来源类型 github_issue 暴露的待验证使用条件。\n- Hard boundary: do not present this pitfall as solved, verified, or safe to ignore unless later validation evidence explicitly closes it.\n\n### Constraint 8: 失败模式：installation: Proposal: Transaction Protocol for idempotent, auditable agent pipelines\n\n- Trigger: Developers should check this installation risk before relying on the project: Proposal: Transaction Protocol for idempotent, auditable agent pipelines\n- Host AI rule: Before packaging this project, run the relevant install/config/quickstart check for: Proposal: Transaction Protocol for idempotent, auditable agent pipelines. Context: Observed when using python\n- Why it matters: Developers may fail before the first successful local run: Proposal: Transaction Protocol for idempotent, auditable agent pipelines\n- Evidence: failure_mode_cluster:github_issue | fmev_58038e9b6373edf9376049b42d4b7bb4 | https://github.com/deepset-ai/haystack/issues/11266 | Proposal: Transaction Protocol for idempotent, auditable agent pipelines\n- Hard boundary: do not present this pitfall as solved, verified, or safe to ignore unless later validation evidence explicitly closes it.\n\n### Constraint 9: 失败模式：installation: RFC: Signed receipts for Haystack pipeline component calls\n\n- Trigger: Developers should check this installation risk before relying on the project: RFC: Signed receipts for Haystack pipeline component calls\n- Host AI rule: Before packaging this project, run the relevant install/config/quickstart check for: RFC: Signed receipts for Haystack pipeline component calls. Context: Observed when using node, python\n- Why it matters: Developers may fail before the first successful local run: RFC: Signed receipts for Haystack pipeline component calls\n- Evidence: failure_mode_cluster:github_issue | fmev_ce0b9c65d21126dcf11ede12120e154f | https://github.com/deepset-ai/haystack/issues/11039 | RFC: Signed receipts for Haystack pipeline component calls\n- Hard boundary: do not present this pitfall as solved, verified, or safe to ignore unless later validation evidence explicitly closes it.\n\n### Constraint 10: 失败模式：installation: Security: OWASP Agent Memory Guard for pipeline memory poisoning defense\n\n- Trigger: Developers should check this installation risk before relying on the project: Security: OWASP Agent Memory Guard for pipeline memory poisoning defense\n- Host AI rule: Before packaging this project, run the relevant install/config/quickstart check for: Security: OWASP Agent Memory Guard for pipeline memory poisoning defense. Context: Observed when using python\n- Why it matters: Developers may fail before the first successful local run: Security: OWASP Agent Memory Guard for pipeline memory poisoning defense\n- Evidence: failure_mode_cluster:github_issue | fmev_4d3276b6b9938595cb2dbb864a5509da | https://github.com/deepset-ai/haystack/issues/11311 | Security: OWASP Agent Memory Guard for pipeline memory poisoning defense\n- Hard boundary: do not present this pitfall as solved, verified, or safe to ignore unless later validation evidence explicitly closes it.\n",
      "summary": "Context and operating boundaries for host AI agents.",
      "title": "AI Context Pack"
    },
    "boundary_risk_card": {
      "asset_id": "boundary_risk_card",
      "filename": "BOUNDARY_RISK_CARD.md",
      "markdown": "# Boundary & Risk Card\n\nProject: deepset-ai/haystack\n\n## Doramagic Trial Decision\n\nCurrent decision: it can enter pre-publication recommendation checks. First use should still start with least privilege, a temporary directory, and reversible configuration.\n\n## What The User Can Do Now\n\n- Read the Human Manual first to understand the project purpose and main workflows.\n- Use Prompt Preview for pre-install exploration; it validates interaction shape, not real execution.\n- Run official Quick Start commands only inside an isolated environment, not a primary setup.\n\n## Do Not Do Yet\n\n- Do not treat Prompt Preview as a real project execution result.\n- Do not treat metadata-only validation as sandbox installation validation.\n- Do not describe unverified capabilities as supported, working, or safe to install.\n- Do not provide production data, private files, real secrets, or primary host configuration on first trial.\n\n## Pre-Install Checklist\n\n- Host AI match: local_cli\n- Official installation entry status: official entry point found\n- Isolated temporary directory, temporary host, or container validation: required\n- Configuration rollback path: required\n- API keys, network access, file access, or host configuration changes: treat as high risk until confirmed\n- Installation command, actual output, and failure logs: must be recorded\n\n## Current Blockers\n\n- No blockers.\n\n## Project-Specific Pitfalls\n\n- 来源证据：RFC: Signed receipts for Haystack pipeline component calls (high): 可能增加新用户试用和生产接入成本。 Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- 来源证据：feat: Add `run_async` to `MultiQueryEmbeddingRetriever`, `MultiQueryTextRetriever`, and `TextEmbeddingRetriever` (high): 可能阻塞安装或首次运行。 Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- 来源证据：feat: add INTERSECTION join mode to DocumentJoiner (high): 可能增加新用户试用和生产接入成本。 Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- 来源证据：docs: Update Ragas docs (high): 可能影响升级、迁移或版本选择。 Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- 来源证据：EnvVarSecrets: add multi-tenant context support (ContextVar / pipeline-run context) (high): 可能影响升级、迁移或版本选择。 Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n\n## Risk And Permission Notes\n\n- no_demo: medium\n\n## Evidence Gaps\n\n- No structured evidence gaps are currently visible.\n",
      "summary": "Installation, permission, validation, and pre-recommendation risks.",
      "title": "Boundary & Risk Card"
    },
    "human_manual": {
      "asset_id": "human_manual",
      "filename": "HUMAN_MANUAL.md",
      "markdown": "# https://github.com/deepset-ai/haystack 项目说明书\n\n生成时间：2026-05-15 20:17:22 UTC\n\n## 目录\n\n- [Introduction to Haystack](#introduction)\n- [Pipeline Architecture](#pipeline-architecture)\n- [Core Concepts](#core-concepts)\n- [Pipeline Component Types](#component-types)\n- [Data Processing Components](#data-processing)\n- [LLM and Embedder Integrations](#llm-integrations)\n- [Document Stores and Retrievers](#document-stores)\n- [Agent Systems](#agents)\n- [Development Guide](#development-guide)\n- [Deployment and Infrastructure](#deployment)\n\n<a id='introduction'></a>\n\n## Introduction to Haystack\n\n### 相关页面\n\n相关主题：[Pipeline Architecture](#pipeline-architecture), [Core Concepts](#core-concepts)\n\n<details>\n<summary>Relevant Source Files</summary>\n\n以下源码文件用于生成本页说明：\n\n- [README.md](https://github.com/deepset-ai/haystack/blob/main/README.md)\n- [docs-website/README.md](https://github.com/deepset-ai/haystack/blob/main/docs-website/README.md)\n- [docker/README.md](https://github.com/deepset-ai/haystack/blob/main/docker/README.md)\n- [pydoc/README.md](https://github.com/deepset-ai/haystack/blob/main/pydoc/README.md)\n- [examples/README.md](https://github.com/deepset-ai/haystack/blob/main/examples/README.md)\n</details>\n\n# Introduction to Haystack\n\nHaystack is an end-to-end LLM framework that enables developers to build applications powered by Large Language Models (LLMs), Transformer models, vector search, and more. The framework provides a flexible architecture for orchestrating state-of-the-art embedding models and LLMs into pipelines to solve real-world NLP use cases.\n\n## What is Haystack?\n\nHaystack is designed to facilitate the development of production-ready AI applications with a focus on **context engineering**—giving developers explicit control over how information is retrieved, ranked, filtered, combined, structured, and routed before it reaches the language model.\n\n资料来源：[README.md:1]()()\n\n### Core Capabilities\n\n| Capability | Description |\n|------------|-------------|\n| **Retrieval-Augmented Generation (RAG)** | Combine vector search with LLMs for accurate, context-grounded responses |\n| **Document Search** | Full-featured document indexing and semantic search |\n| **Question Answering** | Extract answers from large document collections |\n| **Pipeline Orchestration** | Build complex workflows with customizable components |\n| **Agent Integration** | Deploy autonomous agents with tool-use capabilities |\n\n资料来源：[docker/README.md:4-6]()()\n\n## Architecture Overview\n\nHaystack follows a component-based architecture where pipelines serve as the foundational building blocks. Pipelines connect various components including document stores, retrievers, readers, generators, and custom tools.\n\n```mermaid\ngraph TD\n    A[User Query] --> B[Pipeline]\n    B --> C[Retrievers]\n    B --> D[Document Stores]\n    C --> E[Rankers]\n    E --> F[LLM / Generator]\n    F --> G[Response]\n    \n    H[Documents] --> D\n    \n    style F fill:#e1f5fe\n    style D fill:#fff3e0\n    style C fill:#e8f5e9\n```\n\n### Pipeline Components\n\nPipelines in Haystack are composed of interconnected nodes that process data sequentially or in parallel. Each component handles a specific stage of the document processing or inference workflow.\n\n| Component Type | Function |\n|----------------|----------|\n| **DocumentStore** | Stores and indexes documents for retrieval |\n| **Retriever** | Finds relevant documents from the store |\n| **Ranker** | Reorders retrieved documents by relevance |\n| **Reader/Generator** | Extracts answers or generates responses |\n| **Preprocessor** | Cleans and splits documents before indexing |\n| **Custom Nodes** | User-defined processing logic |\n\n资料来源：[README.md:54-58]()()\n\n## Key Features\n\n### Built for Context Engineering\n\nHaystack provides fine-grained control over the entire retrieval and generation pipeline. Developers can:\n\n- Define custom retrieval strategies\n- Implement multi-stage ranking pipelines\n- Route queries to specialized processing branches\n- Control how context is assembled before reaching the LLM\n\n### Flexible Pipeline Design\n\nThe framework supports both declarative and programmatic pipeline construction, allowing developers to define workflows through configuration files or Python code.\n\n```mermaid\ngraph LR\n    A[Query Input] --> B[Retriever Node]\n    B --> C[Ranker Node]\n    C --> D[LLM Node]\n    D --> E[Output]\n    \n    F[Documents] --> G[Document Store]\n    G --> B\n```\n\n### Production-Ready Architecture\n\nHaystack includes enterprise features such as:\n\n- **Telemetry**: Anonymous usage statistics collection for component initialization tracking (opt-out available)\n- **Container Support**: Docker images for consistent deployment environments\n- **CI/CD Integration**: Automated testing with GitHub Actions workflows\n- **Type Checking**: Full MyPy type annotation support\n\n资料来源：[README.md:60-62]()()\n\n## Installation\n\n### Package Installation\n\nThe primary method for installing Haystack is via pip:\n\n```bash\npip install haystack-ai\n```\n\nFor testing pre-release features:\n\n```bash\npip install --pre haystack-ai\n```\n\n资料来源：[README.md:28-34]()()\n\n### Docker Installation\n\nHaystack provides Docker images for containerized deployments:\n\n| Image | Description |\n|-------|-------------|\n| `haystack:base-<version>` | Base image with Haystack preinstalled for derivation |\n\nMulti-platform builds are supported for various architectures including `linux/arm64` and `linux/amd64`.\n\n```bash\ndocker buildx bake base\n```\n\n资料来源：[docker/README.md:8-14]()()\n\n## Documentation Structure\n\nThe Haystack documentation is hosted at [docs.haystack.deepset.ai](https://docs.haystack.deepset.ai) and organized into several sections:\n\n| Section | Content |\n|---------|---------|\n| **Overview/Intro** | Getting started guides and project introduction |\n| **Get Started** | Quick-start guide for building first LLM applications |\n| **Tutorials** | Step-by-step learning paths |\n| **Cookbook** | Pre-built recipes and example implementations |\n| **API Reference** | Auto-generated documentation from docstrings |\n| **Concepts** | Core architectural concepts and design patterns |\n\n资料来源：[docs-website/README.md:1-8]()()\n\n### Documentation Versioning\n\nThe documentation site supports multiple versions:\n\n- **Next (Unreleased)**: Documentation for upcoming features\n- **Current (Stable)**: Documentation for the latest stable release\n- **Past Versions**: Archived documentation for previous releases\n\n资料来源：[docs-website/src/pages/versions.js:1-25]()()\n\n### API Reference Generation\n\nThe API reference pages are automatically generated from docstrings using [haystack-pydoc-tools](https://github.com/deepset-ai/haystack-pydoc-tools). A GitHub workflow regenerates the API reference when code changes are merged.\n\n资料来源：[pydoc/README.md:1-12]()()\n\n## Project Structure\n\n```\nhaystack/\n├── haystack/                    # Main package source code\n├── docs-website/                # Docusaurus documentation site\n│   ├── docs/                    # Main documentation content\n│   ├── reference/               # Auto-generated API reference\n│   └── versioned_docs/           # Versioned documentation snapshots\n├── docker/                      # Docker image configurations\n├── pydoc/                       # PyDoc configuration files\n└── examples/                    # Example implementations\n```\n\n> **Note**: Example implementations have been moved to the [haystack-cookbook](https://github.com/deepset-ai/haystack-cookbook/) repository.\n\n资料来源：[examples/README.md:1-5]()()\n\n## Community and Contributing\n\nHaystack is open to contributions from developers of all skill levels. There are multiple ways to contribute:\n\n| Contribution Area | Repository |\n|-------------------|------------|\n| Core Framework | `deepset-ai/haystack` |\n| Integrations | `deepset-ai/haystack-core-integrations` |\n| Documentation | `deepset-ai/haystack/tree/main/docs-website` |\n\n### Community Resources\n\n- **GitHub Issues**: Bug reports and feature requests\n- **GitHub Discussions**: General questions and community support\n- **Discord**: Real-time community engagement\n- **Stack Overflow**: Tagged questions at `haystack`\n- **Twitter/X**: Updates and announcements\n\n资料来源：[README.md:89-95]()()\n\n## Organizations Using Haystack\n\nHaystack is trusted by thousands of production AI teams across industries:\n\n| Industry | Organizations |\n|----------|---------------|\n| **Technology & AI** | Apple, Meta, Databricks, NVIDIA, Intel |\n| **Public Sector** | European Commission |\n\n资料来源：[README.md:78-85]()()\n\n## Licensing and Compliance\n\n- **License**: Apache 2.0\n- **Type Checking**: MyPy validated\n- **Coverage**: Automated test coverage tracking\n- **License Compliance**: Automated workflow verification\n\n资料来源：[README.md:10-11]()()\n\n## Summary\n\nHaystack provides a comprehensive framework for building production-ready LLM applications with emphasis on retrieval-augmented generation, flexible pipeline design, and context engineering. The framework's component-based architecture enables developers to customize every stage of the document processing and inference pipeline while maintaining production-grade reliability through integrated testing, documentation, and deployment tooling.\n\nWith support for Docker containerization, comprehensive documentation, and an active open-source community, Haystack serves as a robust foundation for teams implementing enterprise AI solutions across diverse industries.\n\n---\n\n<a id='pipeline-architecture'></a>\n\n## Pipeline Architecture\n\n### 相关页面\n\n相关主题：[Introduction to Haystack](#introduction), [Pipeline Component Types](#component-types), [Core Concepts](#core-concepts)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [docs-website/docs/concepts/pipelines.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/concepts/pipelines.mdx)\n- [docs-website/docs/concepts/pipelines/asyncpipeline.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/concepts/pipelines/asyncpipeline.mdx)\n- [docs-website/docs/concepts/pipelines/serialization.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/concepts/pipelines/serialization.mdx)\n- [docs-website/docs/concepts/pipelines/debugging-pipelines.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/concepts/pipelines/debugging-pipelines.mdx)\n- [docs-website/docs/concepts/pipelines/pipeline-breakpoints.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/concepts/pipelines/pipeline-breakpoints.mdx)\n</details>\n\n# Pipeline Architecture\n\n## Overview\n\nThe Pipeline architecture is the foundational component of the Haystack framework, enabling developers to construct flexible, modular workflows for building LLM-powered applications. Pipelines orchestrate the execution of various components—including retrievers, readers, generators, and custom processors—into cohesive data processing flows.\n\nPipelines in Haystack 2.x provide a declarative approach to defining application workflows, allowing developers to:\n\n- Connect multiple components in directed acyclic graphs (DAGs)\n- Route data between components with explicit connections\n- Handle both synchronous and asynchronous execution models\n- Debug and inspect execution through breakpoints and hooks\n- Persist and share pipeline configurations through serialization\n\n资料来源：[docs-website/docs/concepts/pipelines.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/concepts/pipelines.mdx)\n\n## Core Concepts\n\n### Component Connections\n\nComponents in a Haystack Pipeline are connected through named input/output connections. Each component exposes specific input and output slots that define how data flows through the pipeline.\n\n```mermaid\ngraph LR\n    A[Document Store] -->|query results| B[Retriever]\n    B -->|retrieved docs| C[Reader]\n    C -->|answers| D[Output]\n    \n    style A fill:#e1f5fe\n    style B fill:#fff3e0\n    style C fill:#e8f5e9\n    style D fill:#fce4ec\n```\n\nThe connection model requires that:\n- Output types must be compatible with target input types\n- Components can have multiple inputs and outputs\n- Connections form a directed graph structure\n\n资料来源：[docs-website/docs/concepts/pipelines.mdx:1-20](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/concepts/pipelines.mdx)\n\n### Pipeline Types\n\nHaystack provides multiple pipeline implementations optimized for different use cases:\n\n| Pipeline Type | Use Case | Execution Model |\n|---------------|----------|-----------------|\n| Standard Pipeline | General-purpose workflows | Synchronous |\n| AsyncPipeline | High-throughput I/O operations | Asynchronous with `async/await` |\n| SearchPipeline | Retrieval-focused workflows | Optimized for search |\n| GenerativePipeline | LLM-centric applications | Optimized for generation |\n\n资料来源：[docs-website/docs/concepts/pipelines.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/concepts/pipelines.mdx)\n\n## AsyncPipeline\n\nThe AsyncPipeline extends the standard Pipeline with asynchronous execution capabilities, making it suitable for applications requiring high concurrency and non-blocking I/O operations.\n\n### Key Features\n\n- **Non-blocking execution**: Components can execute concurrently when dependencies are satisfied\n- **Streaming support**: Better handling of streaming responses from LLMs\n- **Resource efficiency**: Improved CPU and memory utilization for I/O-bound workloads\n\n```python\nasync def run_async_pipeline(pipeline, query):\n    result = await pipeline.run_async(query=query)\n    return result\n```\n\n资料来源：[docs-website/docs/concepts/pipelines/asyncpipeline.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/concepts/pipelines/asyncpipeline.mdx)\n\n### Execution Flow\n\n```mermaid\ngraph TD\n    A[Start] --> B{AsyncPipeline.run_async}\n    B --> C[Execute Independent Components]\n    C --> D{Wait for Dependencies?}\n    D -->|No| E[Collect Results]\n    D -->|Yes| F[Await Dependency]\n    F --> E\n    E --> G[Return Unified Result]\n    \n    style B fill:#bbdefb\n    style C fill:#c8e6c9\n    style G fill:#ffe0b2\n```\n\n## Serialization\n\nPipeline configurations can be serialized to YAML format, enabling:\n\n- Persistence of pipeline definitions\n- Sharing configurations across environments\n- Version control for pipeline definitions\n- Reproducible deployments\n\n### Serialization Format\n\n```yaml\nversion: '2.0'\ncomponents:\n  - name: MyRetriever\n    type: BM25Retriever\n    init_parameters:\n      document_store: MyDocumentStore\n  - name: MyReader\n    type: FARMReader\n    init_parameters:\n      model_name_or_path: deepset/roberta-base-squad2\nedges: []\n```\n\n资料来源：[docs-website/docs/concepts/pipelines/serialization.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/concepts/pipelines/serialization.mdx)\n\n### Loading Serialized Pipelines\n\n```python\nfrom haystack import Pipeline\n\n# Load from YAML\npipeline = Pipeline.load_from_yaml(path=\"pipeline_config.yaml\")\n```\n\n## Debugging Pipelines\n\nHaystack provides comprehensive debugging capabilities to inspect and troubleshoot pipeline execution.\n\n### Execution Tracing\n\nThe debugging system tracks:\n- Component execution order\n- Input/output data at each stage\n- Execution timing and performance metrics\n- Error locations and stack traces\n\n```python\nfrom haystack import Pipeline\n\npipeline = Pipeline()\npipeline.debug = True  # Enable debug mode\nresult = pipeline.run(query=\"What is Haystack?\")\n```\n\n资料来源：[docs-website/docs/concepts/pipelines/debugging-pipelines.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/concepts/pipelines/debugging-pipelines.mdx)\n\n### Pipeline Inspector\n\nThe Pipeline Inspector provides detailed visibility into:\n\n| Inspection Target | Information Provided |\n|-------------------|---------------------|\n| Component Graph | Node and edge relationships |\n| Data Flow | Input/output shapes and types |\n| Execution State | Runtime values at breakpoints |\n| Performance | Timing and memory profiles |\n\n## Pipeline Breakpoints\n\nBreakpoints allow execution to pause at specific points, enabling detailed inspection of intermediate results.\n\n```mermaid\ngraph LR\n    A[Pipeline Run] --> B{Breakpoint 1?}\n    B -->|Yes| C[Pause & Inspect]\n    C --> D{Continue?}\n    D -->|Yes| E{Breakpoint 2?}\n    D -->|No| Z[Abort]\n    E -->|Yes| F[Pause & Inspect]\n    E -->|No| G[Continue to End]\n    B -->|No| E\n    \n    style C fill:#fff9c4\n    style F fill:#fff9c4\n    style Z fill:#ffcdd2\n```\n\n### Breakpoint Configuration\n\nBreakpoints can be configured at:\n\n- **Component level**: Pause before or after specific component execution\n- **Connection level**: Inspect data flowing through specific connections\n- **Condition level**: Pause only when certain conditions are met\n\n资料来源：[docs-website/docs/concepts/pipelines/pipeline-breakpoints.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/concepts/pipelines/pipeline-breakpoints.mdx)\n\n## Best Practices\n\n### Pipeline Design\n\n1. **Modularity**: Keep components focused on single responsibilities\n2. **Clear naming**: Use descriptive names for components and connections\n3. **Error handling**: Implement proper error handling at component boundaries\n4. **Testing**: Unit test individual components before integration\n\n### Performance Optimization\n\n| Strategy | Description |\n|----------|-------------|\n| Caching | Enable caching for expensive operations |\n| Batching | Use batch processing for multiple queries |\n| Async execution | Prefer AsyncPipeline for I/O-bound workflows |\n| Resource limits | Set appropriate timeouts and memory limits |\n\n## Architecture Summary\n\n```mermaid\ngraph TD\n    subgraph \"Pipeline Layer\"\n        A[Pipeline] --> B[AsyncPipeline]\n        A --> C[SearchPipeline]\n        A --> D[GenerativePipeline]\n    end\n    \n    subgraph \"Component Layer\"\n        E[Retrievers] --> A\n        F[Readers] --> A\n        G[Generators] --> A\n        H[Custom Processors] --> A\n    end\n    \n    subgraph \"Data Layer\"\n        I[Document Stores] --> E\n        J[Models] --> F\n        J --> G\n    end\n    \n    subgraph \"Infrastructure\"\n        K[Serialization] -.-> A\n        L[Debugging] -.-> A\n        M[Breakpoints] -.-> A\n    end\n```\n\n## Related Documentation\n\n- [Components Overview](https://docs.haystack.deepset.ai/docs/intro)\n- [Pipeline Components](https://docs.haystack.deepset.ai/docs/pipeline-components)\n- [API Reference](https://docs.haystack.deepset.ai/reference/pipeline)\n- [Cookbook Examples](https://haystack.deepset.ai/cookbook)\n\n---\n\n<a id='core-concepts'></a>\n\n## Core Concepts\n\n### 相关页面\n\n相关主题：[Pipeline Architecture](#pipeline-architecture), [Pipeline Component Types](#component-types), [Introduction to Haystack](#introduction)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [README.md](https://github.com/deepset-ai/haystack/blob/main/README.md)\n- [docs-website/README.md](https://github.com/deepset-ai/haystack/blob/main/docs-website/README.md)\n- [pydoc/README.md](https://github.com/deepset-ai/haystack/blob/main/pydoc/README.md)\n- [docker/README.md](https://github.com/deepset-ai/haystack/blob/main/docker/README.md)\n- [examples/README.md](https://github.com/deepset-ai/haystack/blob/main/examples/README.md)\n- [docs-website/src/theme/SearchBar.js](https://github.com/deepset-ai/haystack/blob/main/docs-website/src/theme/SearchBar.js)\n- [docs-website/src/components/CopyDropdown/index.tsx](https://github.com/deepset-ai/haystack/blob/main/docs-website/src/components/CopyDropdown/index.tsx)\n</details>\n\n# Core Concepts\n\nHaystack is an end-to-end LLM (Large Language Model) framework that enables developers to build applications powered by LLMs, Transformer models, vector search, and more. The framework orchestrates state-of-the-art embedding models and LLMs into pipelines to solve use cases such as retrieval-augmented generation (RAG), document search, question answering, and answer generation.\n\n## What is Haystack?\n\nHaystack provides a flexible architecture for designing systems with explicit control over how information is retrieved, ranked, filtered, combined, structured, and routed before it reaches the model. The framework allows developers to define pipelines and agent workflows where retrieval, memory, tools, and other components work together seamlessly.\n\n资料来源：[README.md](https://github.com/deepset-ai/haystack/blob/main/README.md)\n\n## Architecture Overview\n\nHaystack's architecture is built around the concept of **pipelines** that orchestrate various components. These pipelines provide explicit control over the data flow from input to output, enabling developers to build complex LLM applications with fine-grained control.\n\n```mermaid\ngraph TD\n    A[Input Query] --> B[Pipeline]\n    B --> C[Components]\n    C --> D[Retrievers]\n    C --> E[Rankers]\n    C --> F[Memory]\n    C --> G[Tools]\n    D --> H[Document Store]\n    E --> I[LLM]\n    H --> J[Context Engineering]\n    I --> K[Generated Response]\n    J --> I\n```\n\n资料来源：[README.md](https://github.com/deepset-ai/haystack/blob/main/README.md)\n\n## Installation\n\nHaystack can be installed via pip using the main package:\n\n```sh\npip install haystack-ai\n```\n\nFor trying newest features, install nightly pre-releases:\n\n```sh\npip install --pre haystack-ai\n```\n\n资料来源：[README.md](https://github.com/deepset-ai/haystack/blob/main/README.md)\n\n## Docker Support\n\nHaystack provides Docker images for containerized deployments. The base image `haystack:base-<version>` contains a working Python environment with Haystack preinstalled and is designed to be derived `FROM`.\n\nImages are built with BuildKit and orchestrated using `bake`:\n\n```sh\ndocker buildx bake base\n```\n\nCustom images can be built by overriding variables defined in the `docker-bake.hcl` file:\n\n```sh\nHAYSTACK_VERSION=mybranch_or_tag BASE_IMAGE_TAG_SUFFIX=latest docker buildx bake base --no-cache\n```\n\n资料来源：[docker/README.md](https://github.com/deepset-ai/haystack/blob/main/docker/README.md)\n\n## Documentation System\n\nHaystack maintains comprehensive documentation at [docs.haystack.deepset.ai](https://docs.haystack.deepset.ai). The documentation is built with Docusaurus 3 and provides guides, tutorials, API references, and best practices.\n\n### Documentation Structure\n\n| Directory | Purpose |\n|-----------|---------|\n| `docs/` | Main documentation (guides, tutorials, concepts) |\n| `docs/concepts/` | Core Haystack concepts |\n| `docs/pipeline-components/` | Component documentation |\n| `reference/` | API reference (auto-generated) |\n| `versioned_docs/` | Versioned copies of docs |\n| `src/` | React components and custom code |\n\n资料来源：[docs-website/README.md](https://github.com/deepset-ai/haystack/blob/main/docs-website/README.md)\n\n### Versioning\n\nDocumentation versions are released alongside Haystack releases and are fully automated through GitHub workflows. The versioning process includes:\n\n- `promote_unstable_docs.yml` - Automatically triggered during Haystack releases\n- `minor_version_release.yml` - Creates new version directories and updates version configuration\n\n资料来源：[docs-website/README.md](https://github.com/deepset-ai/haystack/blob/main/docs-website/README.md)\n\n## API Reference\n\nThe API reference is generated from docstrings in the codebase using [haystack-pydoc-tools](https://github.com/deepset-ai/haystack-pydoc-tools). A GitHub workflow regenerates the API reference when code changes.\n\nTo add documentation for a new module:\n\n1. Create a `.yml` file in the `pydoc` directory\n2. Configure how haystack-pydoc-tools will generate the page\n3. Commit to main\n\nAll API reference updates are initially deployed to unstable docs and promoted to stable docs during releases.\n\n资料来源：[pydoc/README.md](https://github.com/deepset-ai/haystack/blob/main/pydoc/README.md)\n\n## Documentation Website Development\n\nThe documentation site can be run locally for development:\n\n```bash\ngit clone https://github.com/deepset-ai/haystack.git\ncd haystack/docs-website\nnpm install\nnpm start\n```\n\nThe site opens at http://localhost:3000 with live reload functionality.\n\nCommon development tasks include:\n\n- Edit a page: update files under `docs/` or `versioned_docs/`\n- Add to sidebar: update `sidebars.js` with your doc ID\n- Production check: `npm run build && npm run serve`\n\n资料来源：[docs-website/README.md](https://github.com/deepset-ai/haystack/blob/main/docs-website/README.md)\n\n## Search Functionality\n\nThe documentation website includes a custom search bar that groups results by page and sorts them by relevance score. The search system supports filtering by category and provides snippets from matching documents.\n\n### Search Architecture\n\n```mermaid\ngraph TD\n    A[User Query] --> B[Search Input]\n    B --> C[Debounced Search]\n    C --> D[Search Algorithm]\n    D --> E{Results Found?}\n    E -->|Yes| F[Group by Page]\n    E -->|No| G[No Results State]\n    F --> H[Sort by Score]\n    H --> I[Display Results]\n    G --> J[Show Error/Message]\n```\n\n资料来源：[docs-website/src/theme/SearchBar.js](https://github.com/deepset-ai/haystack/blob/main/docs-website/src/theme/SearchBar.js)\n\n## Documentation Export Features\n\nThe documentation site provides multiple ways to export and share content:\n\n| Feature | Description |\n|---------|-------------|\n| Copy as Markdown | Copy page content in Markdown format for LLMs |\n| View as Markdown | View page as plain text |\n| Export as PDF | Save page as PDF file |\n| Ask AI | Open page in external AI assistants |\n\n资料来源：[docs-website/src/components/CopyDropdown/index.tsx](https://github.com/deepset-ai/haystack/blob/main/docs-website/src/components/CopyDropdown/index.tsx)\n\n### Markdown Conversion Rules\n\nThe export feature uses custom Turndown rules:\n\n- Code blocks: Wrapped in backticks\n- Admonitions: Converted to blockquotes with type labels (NOTE, TIP, WARNING, etc.)\n- Navigation elements: Removed from export\n- Scripts and styles: Filtered out\n\n资料来源：[docs-website/src/components/CopyDropdown/index.tsx](https://github.com/deepset-ai/haystack/blob/main/docs-website/src/components/CopyDropdown/index.tsx)\n\n## Examples and Cookbooks\n\nExample code and cookbooks have been moved to a dedicated repository: [haystack-cookbook](https://github.com/deepset-ai/haystack-cookbook/)\n\nThis separation allows for easier maintenance and discovery of example applications.\n\n资料来源：[examples/README.md](https://github.com/deepset-ai/haystack/blob/main/examples/README.md)\n\n## CI/CD and Quality Assurance\n\nHaystack maintains high code quality through automated workflows:\n\n| Workflow | Purpose |\n|----------|---------|\n| tests.yml | Run test suite |\n| types (Mypy) | Type checking |\n| Coverage | Code coverage tracking |\n| Ruff | Linting |\n| license_compliance.yml | License verification |\n\n资料来源：[README.md](https://github.com/deepset-ai/haystack/blob/main/README.md)\n\n## Contributing to Haystack\n\nHaystack welcomes community contributions in various forms:\n\n- **Main project**: Contribute to the core Haystack repository\n- **Integrations**: Contribute on [haystack-core-integrations](https://github.com/deepset-ai/haystack-core-integrations)\n- **Documentation**: Contribute to [haystack/docs-website](https://github.com/deepset-ai/haystack/tree/main/docs-website)\n\nThe project provides a [full list of issues open to contributions](https://github.com/orgs/deepset-ai/projects/14) for both new and experienced contributors.\n\n资料来源：[README.md](https://github.com/deepset-ai/haystack/blob/main/README.md)\n\n## Organizations Using Haystack\n\nHaystack is used in production by numerous organizations across industries:\n\n| Industry | Organizations |\n|----------|---------------|\n| Technology & AI | Apple, Meta, Databricks, NVIDIA, Intel |\n| Public Sector | European Commission |\n| Various | Thousands of teams building production AI systems |\n\n资料来源：[README.md](https://github.com/deepset-ai/haystack/blob/main/README.md)\n\n---\n\n<a id='component-types'></a>\n\n## Pipeline Component Types\n\n### 相关页面\n\n相关主题：[Pipeline Architecture](#pipeline-architecture), [Data Processing Components](#data-processing), [LLM and Embedder Integrations](#llm-integrations)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [docs-website/docs/pipeline-components/generators.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/generators.mdx)\n- [docs-website/docs/pipeline-components/embedders.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/embedders.mdx)\n- [docs-website/docs/pipeline-components/retrievers.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/retrievers.mdx)\n- [docs-website/docs/pipeline-components/rankers.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/rankers.mdx)\n- [docs-website/docs/pipeline-components/preprocessors.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/preprocessors.mdx)\n- [docs-website/docs/pipeline-components/converters.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/converters.mdx)\n- [docs-website/docs/pipeline-components/builders.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/builders.mdx)\n- [docs-website/docs/pipeline-components/routers.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/routers.mdx)\n- [docs-website/docs/pipeline-components/joiners.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/joiners.mdx)\n</details>\n\n# Pipeline Component Types\n\nPipeline components are the fundamental building blocks of Haystack pipelines. They are modular units that perform specific operations such as retrieving documents, converting file formats, generating responses, and routing data between pipeline stages. Each component follows a consistent interface that enables seamless integration into pipeline workflows, allowing developers to compose complex LLM applications from reusable, interchangeable parts.\n\n## Overview\n\nHaystack provides a comprehensive set of built-in pipeline components that cover the full lifecycle of LLM-powered applications. These components are designed to work together through a unified API, enabling developers to build retrieval-augmented generation (RAG) systems, question-answering pipelines, document processing workflows, and agent-based applications with minimal configuration.\n\nThe architecture follows a modular pattern where each component receives inputs, performs a specific transformation or operation, and produces outputs that can be consumed by subsequent components in the pipeline. This design philosophy ensures that components remain loosely coupled and highly reusable across different use cases.\n\nComponents in Haystack are categorized based on their primary function within the data flow. Some components handle input preparation (converters, preprocessors), others manage information retrieval (retrievers, embedders), some optimize result ordering (rankers), and others control program flow (routers, joiners). Understanding these categories is essential for designing effective pipelines that balance performance, accuracy, and resource utilization.\n\n## Component Architecture\n\n### Component Lifecycle\n\nComponents in Haystack follow a standardized lifecycle that includes initialization, execution, and optional teardown phases. During initialization, components receive their configuration parameters and prepare any required resources such as model weights, API connections, or index data. The execution phase processes input data through the component's core logic, while the teardown phase releases resources when the component is no longer needed.\n\n```mermaid\ngraph TD\n    A[Initialize Component] --> B[Load Resources]\n    B --> C[Receive Input Data]\n    C --> D[Process Data]\n    D --> E[Produce Output]\n    E --> F{Check Pipeline Status}\n    F -->|Continue| C\n    F -->|Complete| G[Release Resources]\n    G --> H[Component Lifecycle End]\n```\n\n### Data Flow Patterns\n\nHaystack pipelines support multiple data flow patterns that determine how information moves between components. Linear flow passes output directly to the next component, while branching flow sends data to multiple paths based on conditions. Parallel flow distributes work across multiple components simultaneously, and feedback flow allows outputs to influence earlier pipeline stages.\n\n## Input Processing Components\n\nInput processing components prepare raw data for use by downstream pipeline stages. These components handle the transformation of unstructured or heterogeneous data sources into standardized formats that can be processed consistently throughout the pipeline.\n\n### Converters\n\nConverters transform documents from various file formats into Haystack's internal document representation. They handle the extraction of text content from source files while preserving metadata that may be useful for subsequent processing or retrieval operations.\n\n| Converter Type | Supported Formats | Primary Use Case |\n|---------------|-------------------|------------------|\n| PDF Converter | PDF | Extract text from PDF documents |\n| Text Converter | TXT, MD | Plain text and markdown files |\n| DOCX Converter | DOCX | Microsoft Word documents |\n| HTML Converter | HTML | Web page content extraction |\n\nConverters are typically placed at the beginning of indexing pipelines where they process source documents before the content is split, embedded, and stored. The output of converters feeds directly into preprocessors that further refine the content.\n\n资料来源：[docs-website/docs/pipeline-components/converters.mdx]()\n\n### Preprocessors\n\nPreprocessors clean, normalize, and transform document content to improve retrieval quality and downstream processing. They apply transformations such as text cleaning, language detection, and content segmentation to prepare documents for embedding and storage.\n\n```mermaid\ngraph LR\n    A[Raw Document] --> B[Clean Text]\n    B --> C[Detect Language]\n    C --> D[Split Document]\n    D --> E[Normalize Content]\n    E --> F[Processed Document]\n```\n\nKey preprocessing operations include removing unnecessary whitespace, normalizing unicode characters, splitting long documents into manageable chunks, and filtering out low-quality content. These operations significantly impact the quality of retrieval results and should be configured based on the specific characteristics of your data.\n\nPreprocessors work closely with converters to form the input preparation stage of indexing pipelines. The processed output is then passed to embedders or directly to storage depending on the pipeline configuration.\n\n资料来源：[docs-website/docs/pipeline-components/preprocessors.mdx]()\n\n### Builders\n\nBuilders construct specialized data structures or artifacts that support pipeline operations. Unlike converters that handle file formats, builders create complex objects such as prompt templates, search indexes, or custom data representations required by other components.\n\nBuilders enable the composition of reusable building blocks that can be shared across multiple pipelines. They abstract away the complexity of constructing complex objects, allowing pipeline developers to focus on workflow design rather than implementation details.\n\n资料来源：[docs-website/docs/pipeline-components/builders.mdx]()\n\n## Information Retrieval Components\n\nInformation retrieval components locate and retrieve relevant content from data stores. These components form the core of RAG systems and document search applications, enabling pipelines to find the most relevant information based on query semantics or keywords.\n\n### Retrievers\n\nRetrievers search document stores to find content relevant to a given query. Haystack supports multiple retrieval strategies ranging from keyword-based sparse retrieval to semantic dense retrieval, enabling developers to choose the approach that best fits their use case.\n\n| Retrieval Type | Description | Best For |\n|--------------|-------------|----------|\n| Dense Retrieval | Uses neural embeddings for semantic matching | Conceptual queries, semantic similarity |\n| Sparse Retrieval | Traditional keyword-based matching | Exact matches, specific terminology |\n| Hybrid Retrieval | Combines dense and sparse methods | Balanced performance across query types |\n\nRetrievers are fundamental to RAG pipelines where they identify the documents or passages most likely to contain information relevant to the user's question. The retrieved content is then passed to generators that synthesize the final response.\n\n资料来源：[docs-website/docs/pipeline-components/retrievers.mdx]()\n\n### Embedders\n\nEmbedders convert text content into vector representations that capture semantic meaning. These vectors enable semantic similarity searches where documents are matched based on meaning rather than exact keyword occurrence.\n\n```mermaid\ngraph TD\n    A[Text Input] --> B[Embedding Model]\n    B --> C[Vector Representation]\n    C --> D[Vector Store]\n    \n    E[Query] --> F[Same Embedding Model]\n    F --> G[Query Vector]\n    G --> D\n    D --> H[Similarity Search]\n    H --> I[Ranked Results]\n```\n\nEmbedders are used both during indexing (to create document vectors) and at query time (to create query vectors). The choice of embedding model significantly impacts retrieval quality, and Haystack supports integration with various embedding providers including OpenAI, Hugging Face, and local models.\n\n资料来源：[docs-website/docs/pipeline-components/embedders.mdx]()\n\n### Rankers\n\nRankers improve retrieval results by reordering documents based on additional relevance signals. While retrievers perform the initial candidate selection, rankers apply more sophisticated scoring models to identify the most relevant results.\n\nRankers typically use cross-encoder models that jointly analyze query-document pairs to produce relevance scores. This approach is computationally more expensive than bi-encoder retrieval but provides higher accuracy for tasks where precision is critical.\n\nThe typical pipeline arrangement places rankers after retrievers, with retrievers performing the broad candidate selection and rankers performing the refined reordering. This two-stage approach balances computational efficiency with result quality.\n\n资料来源：[docs-website/docs/pipeline-components/rankers.mdx]()\n\n## Output Generation Components\n\nOutput generation components synthesize final responses or artifacts from the information retrieved and processed by earlier pipeline stages. These components transform raw retrieved content into user-facing outputs.\n\n### Generators\n\nGenerators produce final outputs such as text responses, summaries, or structured data from retrieved context and user queries. In RAG systems, generators receive relevant documents and formulate answers that incorporate information from the retrieved content.\n\n```mermaid\ngraph TD\n    A[User Query] --> E[Generator]\n    B[Retrieved Context] --> E\n    E --> F[Generate Response]\n    F --> G[Response Output]\n    \n    H[LLM Provider] <--> E\n    H --> |API Key| E\n```\n\nGenerators integrate with various LLM providers including OpenAI, Anthropic, Cohere, Hugging Face, and local models. Configuration options control parameters such as temperature, max tokens, and response format to customize generator behavior for specific applications.\n\n资料来源：[docs-website/docs/pipeline-components/generators.mdx]()\n\n## Flow Control Components\n\nFlow control components manage how data moves through pipelines, enabling conditional logic, parallel processing, and result aggregation. These components add flexibility to pipeline design beyond simple linear data flow.\n\n### Routers\n\nRouters direct input data to different pipeline branches based on conditions or classifications. They enable conditional execution where different components handle different types of inputs or queries.\n\n| Router Type | Decision Basis | Use Case |\n|------------|---------------|----------|\n| Conditional Router | User-defined rules | Route queries to appropriate handlers |\n| Semantic Router | Query classification | Direct to specialized pipelines |\n| Custom Router | Any Python logic | Flexible routing strategies |\n\nRouters are essential for building multi-stage pipelines that handle diverse input types or implement complex query routing strategies. They enable pipelines to adapt their behavior based on the specific requirements of each input.\n\n资料来源：[docs-website/docs/pipeline-components/routers.mdx]()\n\n### Joiners\n\nJoiners combine outputs from multiple pipeline branches into unified inputs for downstream components. They handle the aggregation of results from parallel processing paths or the merging of different data streams.\n\n```mermaid\ngraph TD\n    A[Input] --> B[Branch 1]\n    A --> C[Branch 2]\n    A --> D[Branch N]\n    B --> E[Joiner]\n    C --> E\n    D --> E\n    E --> F[Combined Output]\n```\n\nJoiners implement various combination strategies including concatenation, interleaving, and weighted merging. The appropriate strategy depends on the data types being combined and the requirements of downstream components.\n\n资料来源：[docs-website/docs/pipeline-components/joiners.mdx]()\n\n## Component Configuration Patterns\n\n### Initialization Parameters\n\nComponents accept configuration during initialization that determines their behavior, resource connections, and operational parameters. Common configuration categories include model selection, connection settings, and behavioral parameters.\n\n### Default Parameters\n\nComponents provide sensible defaults for most parameters, enabling quick pipeline construction while allowing customization when needed. Default values are documented in each component's reference documentation.\n\n### Runtime Parameters\n\nSome components accept parameters at runtime (during pipeline execution) in addition to initialization-time configuration. Runtime parameters enable dynamic behavior adjustment based on input characteristics or pipeline state.\n\n## Building Custom Components\n\nHaystack's component architecture supports extension through custom implementations. Custom components follow the same interface patterns as built-in components, ensuring compatibility with existing pipeline infrastructure.\n\n### Component Interface Requirements\n\nCustom components must implement the standard component methods including initialization, execution, and any component-specific lifecycle hooks. The exact interface depends on the component type, but all components must be serializable for pipeline persistence.\n\n### Integration with Pipeline\n\nCustom components integrate seamlessly with built-in components through the unified pipeline interface. They can receive inputs from and produce outputs for any other component type, enabling flexible composition of custom and built-in functionality.\n\n## Best Practices\n\n### Component Selection\n\nChoose components based on your specific use case requirements including accuracy needs, latency constraints, and resource availability. Consider the trade-offs between different retrieval strategies, embedding models, and generation approaches.\n\n### Pipeline Design\n\nDesign pipelines with clear separation of concerns between components. Input processing, retrieval, and generation should be logically separated to enable independent optimization and testing.\n\n### Performance Optimization\n\nOptimize component ordering based on computational cost. Place computationally expensive operations later in the pipeline where they operate on reduced candidate sets. Use rankers selectively based on the required result quality.\n\n## Summary\n\nPipeline components form the foundation of Haystack's architecture, enabling modular construction of LLM-powered applications. The component taxonomy spans input processing (converters, preprocessors, builders), information retrieval (retrievers, embedders, rankers), output generation (generators), and flow control (routers, joiners). Each component category serves a distinct purpose in the pipeline data flow, and understanding these roles enables effective pipeline design and customization.\n\n---\n\n<a id='data-processing'></a>\n\n## Data Processing Components\n\n### 相关页面\n\n相关主题：[Document Stores and Retrievers](#document-stores), [Pipeline Component Types](#component-types)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [docs-website/docs/pipeline-components/preprocessors/documentsplitter.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/preprocessors/documentsplitter.mdx)\n- [docs-website/docs/pipeline-components/preprocessors/recursivesplitter.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/preprocessors/recursivesplitter.mdx)\n- [docs-website/docs/pipeline-components/preprocessors/hierarchicaldocumentsplitter.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/preprocessors/hierarchicaldocumentsplitter.mdx)\n- [docs-website/docs/pipeline-components/converters.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/converters.mdx)\n- [docs-website/docs/pipeline-components/preprocessors/documentcleaner.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/preprocessors/documentcleaner.mdx)\n</details>\n\n# Data Processing Components\n\nData Processing Components are fundamental pipeline elements in Haystack that transform, clean, and prepare documents for downstream operations such as retrieval, indexing, and LLM processing. These components operate on `Document` objects, enabling structured manipulation of content while preserving metadata integrity throughout the processing pipeline.\n\n## Overview\n\nData Processing Components in Haystack serve as the preprocessing layer that bridges raw document ingestion with semantic retrieval and generation tasks. They are designed to handle various document formats, split long content into manageable chunks, and ensure data quality through cleaning operations.\n\nThe architecture follows a modular design pattern where each component type specializes in a specific transformation task:\n\n- **Document Splitters**: Divide documents into smaller, semantically coherent chunks\n- **Document Cleaners**: Remove noise, normalize text, and enhance readability\n- **Converters**: Transform external file formats into Haystack `Document` objects\n\n资料来源：[docs-website/docs/pipeline-components/preprocessors/documentsplitter.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/preprocessors/documentsplitter.mdx)\n\n## Architecture and Processing Flow\n\n```mermaid\ngraph TD\n    A[Raw Document Input] --> B[Converters]\n    B --> C[Document Objects]\n    C --> D[Document Cleaners]\n    D --> E[Document Splitters]\n    E --> F[Processed Chunks]\n    F --> G[Embedding Stores]\n    G --> H[Retrieval Pipelines]\n    \n    B -.->|File Types| I[TXT]\n    B -.->|File Types| J[PDF]\n    B -.->|File Types| K[Markdown]\n    B -.->|File Types| L[HTML]\n    B -.->|File Types| M[Docx]\n    \n    D -.->|Operations| N[Text Normalization]\n    D -.->|Operations| O[Whitespace Cleaning]\n    D -.->|Operations| P[Metadata Preservation]\n    \n    E -.->|Strategies| Q[Character Split]\n    E -.->|Strategies| R[Recursive Split]\n    E -.->|Strategies| S[Hierarchical Split]\n```\n\n## Document Splitters\n\nDocument splitters are preprocessors that divide long documents into smaller, manageable chunks while attempting to preserve semantic coherence. This is critical for effective retrieval since chunk size directly impacts retrieval precision and context window utilization.\n\n资料来源：[docs-website/docs/pipeline-components/preprocessors/recursivesplitter.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/preprocessors/recursivesplitter.mdx)\n\n### Splitter Types\n\n| Splitter Type | Use Case | Splitting Strategy |\n|---------------|----------|---------------------|\n| `DocumentSplitter` | Basic character or token-based splitting | Fixed-length chunks |\n| `RecursiveSplitter` | Hierarchical splitting by delimiters | Recursive character/separator traversal |\n| `HierarchicalDocumentSplitter` | Multi-level document structure | Preserves headings and sections |\n\n### DocumentSplitter\n\nThe base `DocumentSplitter` provides fundamental splitting capabilities using either character count or token count as the primary division criterion.\n\n**Key Parameters:**\n\n| Parameter | Type | Default | Description |\n|-----------|------|---------|-------------|\n| `split_length` | `int` | Required | Target size of each chunk |\n| `split_overlap` | `int` | `0` | Number of overlapping elements between chunks |\n| `split_by` | `str` | `\"word\"` | Splitting criterion: `\"word\"`, `\"sentence\"`, `\"passage\"`, or `\"token\"` |\n\n资料来源：[docs-website/docs/pipeline-components/preprocessors/documentsplitter.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/preprocessors/documentsplitter.mdx)\n\n### RecursiveSplitter\n\nThe `RecursiveSplitter` implements an intelligent multi-level splitting strategy that attempts to split documents at natural boundaries before falling back to smaller units.\n\n```python\nfrom haystack.components.preprocessors import RecursiveSplitter\n\nsplitter = RecursiveSplitter(\n    split_by=\"sentence\",\n    split_length=5,\n    split_overlap=2,\n    separators=[\"\\n\\n\", \"\\n\", \". \", \" \", \"\"]\n)\n```\n\nThe splitter iterates through the `separators` list, attempting to split at each level. If a split produces chunks larger than `split_length`, it moves to the next (smaller) separator in the list.\n\n资料来源：[docs-website/docs/pipeline-components/preprocessors/recursivesplitter.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/preprocessors/recursivesplitter.mdx)\n\n**Separator Priority:**\n\n| Priority | Separator | Context |\n|----------|-----------|---------|\n| 1 | `\"\\n\\n\"` | Paragraph breaks |\n| 2 | `\"\\n\"` | Line breaks |\n| 3 | `\". \"` | Sentence endings |\n| 4 | `\" \"` | Word boundaries |\n| 5 | `\"\"` | Character-level fallback |\n\n### HierarchicalDocumentSplitter\n\nThe `HierarchicalDocumentSplitter` is designed for structured documents that contain hierarchical headings and section markers. It preserves document structure by splitting at heading boundaries first.\n\n**Key Features:**\n\n- Detects heading patterns (e.g., `#`, `##`, `###` in Markdown)\n- Splits at the highest heading level available\n- Maintains hierarchical relationships between sections and subsections\n- Ideal for technical documentation and Markdown-based content\n\n```python\nfrom haystack.components.preprocessors import HierarchicalDocumentSplitter\n\nsplitter = HierarchicalDocumentSplitter(\n    split_by=\"sentence\",\n    split_length=10,\n    split_overlap=3\n)\n```\n\n资料来源：[docs-website/docs/pipeline-components/preprocessors/hierarchicaldocumentsplitter.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/preprocessors/hierarchicaldocumentsplitter.mdx)\n\n## Document Cleaners\n\nDocument cleaners are preprocessing components that normalize and sanitize text content while preserving essential structure and metadata. They remove unwanted artifacts, standardize formatting, and enhance downstream processing quality.\n\n资料来源：[docs-website/docs/pipeline-components/preprocessors/documentcleaner.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/preprocessors/documentcleaner.mdx)\n\n### Core Cleaning Operations\n\n| Operation | Description | Example |\n|-----------|-------------|---------|\n| Whitespace normalization | Collapse multiple spaces, trim line breaks | `\"  Hello\\n\\n  World  \"` → `\"Hello World\"` |\n| Character removal | Strip control characters and special symbols | Removes `\\x00` to `\\x1f` except `\\n`, `\\t` |\n| Quote normalization | Standardize quote characters | Smart quotes → straight quotes |\n| Heading normalization | Clean heading markers | Removes `#` from Markdown headings |\n\n### Common Parameters\n\n| Parameter | Type | Default | Description |\n|-----------|------|---------|-------------|\n| `remove_empty_lines` | `bool` | `True` | Remove lines with no content |\n| `remove_extra_whitespace` | `bool` | `True` | Normalize whitespace between words |\n| `remove_repeated_substrings` | `bool` | `False` | Eliminate duplicate consecutive substrings |\n\n## Converters\n\nConverters are components that transform external file formats into Haystack `Document` objects. They handle the ingestion pipeline by parsing various document formats and extracting both content and metadata.\n\n资料来源：[docs-website/docs/pipeline-components/converters.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/converters.mdx)\n\n### Supported Formats\n\n| Format | Converter Class | Features |\n|--------|-----------------|----------|\n| Plain Text | `TextConverter` | Direct text extraction |\n| PDF | `PdfToDocumentConverter` | Text and table extraction |\n| Markdown | `MarkdownToDocumentConverter` | Preserves structure and headings |\n| HTML | `HtmlToDocumentConverter` | Extracts text from HTML elements |\n| Microsoft Word | `DocxToDocumentConverter` | Document and paragraph parsing |\n\n### Converter Architecture\n\n```mermaid\ngraph LR\n    A[Input File] --> B[Format Detection]\n    B --> C[Format-Specific Parser]\n    C --> D[Content Extraction]\n    D --> E[Metadata Enrichment]\n    E --> F[Haystack Document]\n    \n    G[File Path] -.->|Direct Input| D\n    H[Binary Content] -.->|Raw Data| C\n```\n\n### Common Converter Parameters\n\n| Parameter | Type | Default | Description |\n|-----------|------|---------|-------------|\n| `encoding` | `str` | `\"utf-8\"` | Text encoding for file reading |\n| `encoding_errors` | `str` | `\"strict\"` | How to handle encoding errors |\n| `id_hash_keys` | `List[str]` | `[\"content\"]` | Keys for document ID generation |\n| `meta` | `Dict[str, Any]` | `{}` | Additional metadata to attach |\n\n资料来源：[docs-website/docs/pipeline-components/converters.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/converters.mdx)\n\n## Integration with Pipelines\n\nData Processing Components integrate seamlessly into Haystack pipelines as standard pipeline nodes. They can be composed in any order to create custom preprocessing workflows.\n\n### Typical Pipeline Configuration\n\n```python\nfrom haystack import Pipeline\nfrom haystack.components.preprocessors import DocumentCleaner, RecursiveSplitter\nfrom haystack.components.converters import TextConverter\n\npipeline = Pipeline()\npipeline.add_component(\"converter\", TextConverter())\npipeline.add_component(\"cleaner\", DocumentCleaner())\npipeline.add_component(\"splitter\", RecursiveSplitter(split_length=200, split_by=\"word\"))\n\npipeline.connect(\"converter\", \"cleaner\")\npipeline.connect(\"cleaner\", \"splitter\")\n```\n\n### Processing Order Recommendation\n\nWhile components can be connected in various orders, the recommended processing sequence is:\n\n1. **Convert** - Transform source files into `Document` objects\n2. **Clean** - Normalize and sanitize the text content\n3. **Split** - Divide documents into retrieval-optimized chunks\n\nThis sequence ensures that cleaning operations apply to the complete document before splitting, maintaining consistency across chunks.\n\n## Metadata Preservation\n\nAll Data Processing Components preserve and propagate document metadata throughout the processing pipeline. Metadata added during conversion is carried through cleaning and splitting operations.\n\n**Automatic Metadata Fields:**\n\n| Field | Source | Description |\n|-------|--------|-------------|\n| `source` | Converter | Original file path or URI |\n| `file_type` | Converter | Document format (pdf, txt, etc.) |\n| `page_number` | PDF Converter | Page number for page-level tracking |\n| `split_id` | Splitter | Unique identifier for each chunk |\n| `split_idx_start` | Splitter | Character offset where chunk begins |\n\n## Best Practices\n\n### Chunk Size Selection\n\n| Chunk Size | Recommended Use Case |\n|------------|---------------------|\n| 50-100 tokens | High-precision queries, precise fact extraction |\n| 200-300 tokens | Balanced retrieval, general Q&A |\n| 500+ tokens | Complex reasoning, multi-document synthesis |\n\n### Cleaning Configuration\n\n- Enable `remove_extra_whitespace` for all text-based content\n- Use `remove_empty_lines` when building dense indexes\n- Disable cleaning for Markdown/HTML if structure preservation is critical\n\n### Overlap Strategy\n\nWhen configuring `split_overlap`, consider:\n\n- **Low overlap (0-10%)**: Maximizes diversity, suitable for unique content\n- **Medium overlap (10-20%)**: Balances context preservation and diversity\n- **High overlap (20%+**: Essential for documents with continuous context\n\n## Related Components\n\n- **Embedding Generators**: Process chunks to create vector representations\n- **Document Stores**: Store and index processed chunks for retrieval\n- **Rankers**: Reorder retrieved chunks by relevance\n- **Prompt Engineers**: Combine chunks for LLM context windows\n\n---\n\n<a id='llm-integrations'></a>\n\n## LLM and Embedder Integrations\n\n### 相关页面\n\n相关主题：[Document Stores and Retrievers](#document-stores), [Pipeline Component Types](#component-types), [Development Guide](#development-guide)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [docs-website/docs/pipeline-components/generators/guides-to-generators/choosing-the-right-generator.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/generators/guides-to-generators/choosing-the-right-generator.mdx)\n- [docs-website/docs/pipeline-components/generators/guides-to-generators/function-calling.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/generators/guides-to-generators/function-calling.mdx)\n- [docs-website/docs/pipeline-components/embedders/choosing-the-right-embedder.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/embedders/choosing-the-right-embedder.mdx)\n- [docs-website/docs/concepts/integrations.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/concepts/integrations.mdx)\n</details>\n\n# LLM and Embedder Integrations\n\n## Overview\n\nLLM and Embedder Integrations in Haystack provide the core components for interfacing with Large Language Models and embedding services. These integrations enable developers to build production-ready applications powered by LLMs, Transformer models, and vector search capabilities.\n\n资料来源：[README.md:1-10]()\n\n## Architecture\n\nHaystack's integration architecture follows a modular pipeline design where Generators (LLMs) and Embedders serve as fundamental building blocks within the orchestration framework.\n\n```mermaid\ngraph TD\n    A[Haystack Pipeline] --> B[Retrieval Components]\n    A --> C[Generator Components]\n    A --> D[Embedder Components]\n    C --> E[LLM Providers]\n    D --> F[Embedding Models]\n    B --> F\n    E --> G[API Services]\n    F --> G\n```\n\n## Generator Integration\n\n### Purpose\n\nGenerators in Haystack are components that interact with Large Language Models to generate responses based on prompts and retrieved context. They serve as the core reasoning engine within RAG (Retrieval-Augmented Generation) pipelines.\n\n资料来源：[docs-website/docs/pipeline-components/generators/guides-to-generators/choosing-the-right-generator.mdx:1-15]()\n\n### Supported Providers\n\nHaystack supports multiple LLM providers through its integration system. The framework provides standardized interfaces for:\n\n| Provider | Integration Type | API Access |\n|----------|------------------|------------|\n| OpenAI | Chat Completions API | API Key |\n| Anthropic | Claude API | API Key |\n| Azure OpenAI | Azure OpenAI Service | Azure Credentials |\n| Hugging Face | Inference API / Local | API Key / Local |\n| Ollama | Local Models | Local Host |\n\n### Component Configuration\n\nGenerator components in Haystack follow a consistent initialization pattern:\n\n```python\nfrom haystack import Pipeline\nfrom haystack.components.generators import OpenAIChatGenerator\n\ngenerator = OpenAIChatGenerator(\n    api_key=\"your-api-key\",\n    model=\"gpt-4\",\n    streaming_callback=None,\n    generation_kwargs={\"temperature\": 0.7, \"max_tokens\": 500}\n)\n```\n\n## Embedder Integration\n\n### Purpose\n\nEmbedders are components that convert text into vector representations (embeddings) suitable for semantic search and similarity comparisons. They are essential for the retrieval portion of RAG pipelines.\n\n资料来源：[docs-website/docs/pipeline-components/embedders/choosing-the-right-embedder.mdx:1-20]()\n\n### Embedder Types\n\n| Type | Use Case | Deployment |\n|------|----------|------------|\n| Sentence Transformers | General text embeddings | Local / API |\n| OpenAI Embeddings | API-based generation | Remote |\n| Hugging Face | Transformer models | Local / Inference API |\n| Cohere | Multi-lingual support | API |\n\n### Integration with Retrievers\n\nEmbedders work in conjunction with document stores to enable semantic search:\n\n```mermaid\ngraph LR\n    A[Documents] --> B[Embedder]\n    B --> C[Vector Store]\n    C --> D[Retriever]\n    E[Query] --> F[Query Embedder]\n    F --> D\n    D --> G[Retrieved Docs]\n```\n\n## Function Calling\n\nFunction calling extends LLM integrations to enable structured interactions between LLMs and external tools. This feature allows Generators to produce structured outputs that can trigger specific actions.\n\n资料来源：[docs-website/docs/pipeline-components/generators/guides-to-generators/function-calling.mdx:1-30]()\n\n### Workflow\n\n```mermaid\nsequenceDiagram\n    participant User\n    participant Pipeline\n    participant LLM\n    participant Tool\n    \n    User->>Pipeline: Query with function definitions\n    Pipeline->>LLM: Send prompt + function specs\n    LLM->>LLM: Analyze request\n    LLM-->>Pipeline: Function call + parameters\n    Pipeline->>Tool: Execute function\n    Tool-->>Pipeline: Function result\n    Pipeline->>LLM: Send result + original context\n    LLM-->>Pipeline: Final response\n    Pipeline-->>User: Return answer\n```\n\n## Integration Configuration\n\n### Environment Setup\n\nIntegrations in Haystack typically require API credentials which can be configured via environment variables:\n\n```bash\nexport OPENAI_API_KEY=\"your-openai-key\"\nexport ANTHROPIC_API_KEY=\"your-anthropic-key\"\nexport HUGGINGFACE_TOKEN=\"your-hf-token\"\n```\n\n资料来源：[docs-website/docs/concepts/integrations.mdx:1-25]()\n\n### Configuration Options\n\n| Parameter | Description | Default |\n|-----------|-------------|---------|\n| `api_key` | Provider API key | Environment variable |\n| `model` | Model identifier | Provider default |\n| `timeout` | Request timeout in seconds | 60 |\n| `max_retries` | Number of retry attempts | 3 |\n\n## Pipeline Integration Example\n\n```python\nfrom haystack import Pipeline\nfrom haystack.components.retrievers import InMemoryBM25Retriever\nfrom haystack.components.generators import OpenAIChatGenerator\nfrom haystack.document_stores import InMemoryDocumentStore\n\n# Initialize components\ndocument_store = InMemoryDocumentStore()\nretriever = InMemoryBM25Retriever(document_store=document_store)\ngenerator = OpenAIChatGenerator(model=\"gpt-4\")\n\n# Build pipeline\npipeline = Pipeline()\npipeline.add_component(\"retriever\", retriever)\npipeline.add_component(\"generator\", generator)\npipeline.connect(\"retriever\", \"generator\")\n```\n\n## Installation\n\nTo use LLM and Embedder integrations, install the appropriate Haystack packages:\n\n```sh\n# Core package\npip install haystack-ai\n\n# For specific integrations\npip install \"haystack-ai[openai]\"    # OpenAI models\npip install \"haystack-ai[anthropic]\"  # Anthropic Claude\npip install \"haystack-ai[transformers]\" # Hugging Face\n```\n\n## Additional Resources\n\n- [Documentation Site](https://docs.haystack.deepset.ai)\n- [GitHub Repository](https://github.com/deepset-ai/haystack)\n- [Integration Guides](https://docs.haystack.deepset.ai/docs/integrations)\n\n---\n\n<a id='document-stores'></a>\n\n## Document Stores and Retrievers\n\n### 相关页面\n\n相关主题：[LLM and Embedder Integrations](#llm-integrations), [Data Processing Components](#data-processing)\n\n<details>\n<summary>Related Source Files</summary>\n\nThe following source files were used to generate this page:\n\n- [docs-website/docs/concepts/document-store.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/concepts/document-store.mdx)\n- [docs-website/docs/concepts/document-store/choosing-a-document-store.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/concepts/document-store/choosing-a-document-store.mdx)\n- [docs-website/docs/document-stores/inmemorydocumentstore.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/document-stores/inmemorydocumentstore.mdx)\n- [docs-website/docs/document-stores/elasticsearch-document-store.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/document-stores/elasticsearch-document-store.mdx)\n- [docs-website/docs/document-stores/qdrant-document-store.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/document-stores/qdrant-document-store.mdx)\n- [docs-website/docs/document-stores/pinecone-document-store.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/document-stores/pinecone-document-store.mdx)\n</details>\n\n# Document Stores and Retrievers\n\nDocument Stores and Retrievers are fundamental components in the Haystack framework that enable efficient storage, indexing, and retrieval of documents for LLM-powered applications. These components form the backbone of retrieval-augmented generation (RAG) pipelines and semantic search systems.\n\n## Overview\n\nHaystack provides a unified abstraction layer for document storage and retrieval, allowing developers to work with different backend technologies through a consistent interface. The framework supports multiple document store implementations, each optimized for different use cases, scales, and deployment requirements.\n\nDocument Stores in Haystack handle the persistence and indexing of documents, while Retrievers are specialized components that query these stores to find relevant documents based on user queries. This separation of concerns allows for flexible pipeline composition and easy swapping of storage backends.\n\n## Architecture\n\n```mermaid\ngraph TD\n    A[User Query] --> B[Retriever]\n    B --> C[Document Store]\n    C --> D[(Vector Index)]\n    C --> E[(Document DB)]\n    F[Documents] --> C\n    G[Embedding Model] --> D\n    B --> H[Query Embedding]\n    H --> D\n    D --> I[Relevant Documents]\n    I --> J[RAG Pipeline]\n```\n\nThe architecture separates concerns between storage and retrieval, enabling optimized implementations for each layer.\n\n## Document Store Types\n\nHaystack supports multiple document store implementations, each with distinct characteristics:\n\n| Document Store | Type | Use Case | Scalability |\n|----------------|------|----------|--------------|\n| InMemoryDocumentStore | In-memory | Development, testing, small datasets | Single machine, limited scale |\n| ElasticsearchDocumentStore | Distributed search | Production, full-text search | Horizontal scaling |\n| QdrantDocumentStore | Vector database | Semantic search, embeddings | High-dimensional vectors |\n| PineconeDocumentStore | Managed vector DB | Cloud-native, managed infrastructure | Global distribution |\n\n### InMemoryDocumentStore\n\nThe `InMemoryDocumentStore` is the simplest document store implementation, storing all data in memory. It is primarily used for development, testing, and prototyping scenarios where persistence is not required.\n\n**Key Characteristics:**\n- No external dependencies required\n- Fast read/write operations for small datasets\n- Data lost on application restart\n- Not suitable for production deployments with large volumes\n\n资料来源：[docs-website/docs/document-stores/inmemorydocumentstore.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/document-stores/inmemorydocumentstore.mdx)\n\n### ElasticsearchDocumentStore\n\nElasticsearch provides a mature, production-ready document store with powerful full-text search capabilities. It is well-suited for applications requiring sophisticated text analysis, faceted search, and scalable infrastructure.\n\n**Key Characteristics:**\n- Distributed architecture for high availability\n- Rich query DSL for complex search operations\n- BM25 ranking algorithm for relevance scoring\n- Supports millions of documents\n\n资料来源：[docs-website/docs/document-stores/elasticsearch-document-store.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/document-stores/elasticsearch-document-store.mdx)\n\n### QdrantDocumentStore\n\nQdrant is a vector database optimized for similarity search and high-dimensional embeddings. It provides efficient nearest neighbor search operations essential for semantic retrieval.\n\n**Key Characteristics:**\n- Optimized for vector similarity search\n- Supports payload filtering\n- Hybrid sparse-dense vector search\n- gRPC-based API for performance\n\n资料来源：[docs-website/docs/document-stores/qdrant-document-store.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/document-stores/qdrant-document-store.mdx)\n\n### PineconeDocumentStore\n\nPinecone is a managed vector database service that eliminates infrastructure management overhead. It provides global distribution and automatic scaling for production deployments.\n\n**Key Characteristics:**\n- Fully managed cloud service\n- Automatic scaling and sharding\n- Multi-tenancy support\n- Low-latency querying at scale\n\n资料来源：[docs-website/docs/document-stores/pinecone-document-store.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/document-stores/pinecone-document-store.mdx)\n\n## Choosing a Document Store\n\nSelecting the appropriate document store depends on several factors including scale, performance requirements, deployment environment, and feature needs.\n\n资料来源：[docs-website/docs/concepts/document-store/choosing-a-document-store.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/concepts/document-store/choosing-a-document-store.mdx)\n\n### Decision Criteria\n\n| Factor | InMemory | Elasticsearch | Qdrant | Pinecone |\n|--------|----------|---------------|--------|----------|\n| Dataset Size | < 100K docs | Unlimited | Unlimited | Unlimited |\n| Latency | Very low | Medium | Low | Low |\n| Persistence | None | Full | Full | Full |\n| Full-text Search | Basic | Advanced | Limited | Limited |\n| Vector Search | Basic | Plugin required | Native | Native |\n| Managed Service | No | Self-hosted/Cloud | Self-hosted/Cloud | Yes (managed) |\n| Cost | Free | Infrastructure | Infrastructure | Usage-based |\n\n### Recommendations\n\n**Development and Testing:**\nUse `InMemoryDocumentStore` for rapid prototyping and unit testing. It requires no setup and provides immediate feedback.\n\n**Production with Full-text Search:**\nChoose `ElasticsearchDocumentStore` when your application requires complex text queries, aggregations, or you already have an Elasticsearch infrastructure.\n\n**Semantic Search at Scale:**\nSelect `QdrantDocumentStore` or `PineconeDocumentStore` for applications primarily relying on embedding-based similarity search. Both provide native vector operations with efficient indexing.\n\n## Document Model\n\nDocuments in Haystack follow a standardized data model that captures content, metadata, and embedding vectors.\n\n```mermaid\nclassDiagram\n    class Document {\n        +str id\n        +str content\n        +dict meta\n        +List[float] embedding\n        +str blob\n        +str blob_mime_type\n    }\n```\n\n**Core Document Fields:**\n\n| Field | Type | Description |\n|-------|------|-------------|\n| `id` | string | Unique identifier for the document |\n| `content` | string | Main text content of the document |\n| `meta` | dict | Arbitrary metadata (source, author, date, etc.) |\n| `embedding` | list[float] | Vector representation for semantic search |\n\n资料来源：[docs-website/docs/concepts/document-store.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/concepts/document-store.mdx)\n\n## Retriever Types\n\nRetrievers query document stores to find the most relevant documents for a given query. Haystack provides multiple retriever implementations optimized for different search strategies.\n\n### Dense Retrievers\n\nDense retrievers use neural network models to encode queries and documents into dense vector representations. They excel at capturing semantic meaning and handling synonyms.\n\n### Sparse Retrievers\n\nSparse retrievers use traditional information retrieval techniques like BM25 or TF-IDF. They are effective for exact term matching and keyword-based queries.\n\n### Hybrid Retrievers\n\nHybrid retrievers combine both dense and sparse approaches, leveraging the strengths of each to provide robust retrieval across different query types.\n\n## Pipeline Integration\n\n```mermaid\ngraph LR\n    A[Query] --> B[Retriever]\n    B --> C[Document Store]\n    C --> D[Top-K Documents]\n    D --> E[Ranker]\n    E --> F[Reader/Generator]\n    F --> G[Answer]\n```\n\nDocument Stores and Retrievers integrate seamlessly into Haystack pipelines, typically appearing early in the pipeline to fetch candidate documents before passing them to downstream components like Readers or Generators.\n\n## Basic Usage Example\n\n```python\nfrom haystack import Document\nfrom haystack.document_stores import InMemoryDocumentStore\nfrom haystack.nodes import BM25Retriever\n\n# Initialize document store\ndocument_store = InMemoryDocumentStore()\n\n# Write documents\ndocuments = [\n    Document(content=\"Haystack is an open-source NLP framework\", meta={\"source\": \"docs\"}),\n    Document(content=\"It supports retrieval-augmented generation\", meta={\"source\": \"blog\"}),\n]\ndocument_store.write_documents(documents)\n\n# Initialize retriever\nretriever = BM25Retriever(document_store=document_store)\n\n# Query\nresults = retriever.retrieve(query=\"What is Haystack?\", top_k=10)\n```\n\n## Performance Considerations\n\n### Indexing Performance\n\n| Store | Indexing Speed | Memory Usage |\n|-------|----------------|--------------|\n| InMemory | Very Fast | Proportional to dataset |\n| Elasticsearch | Medium | Distributed across nodes |\n| Qdrant | Fast | Optimized for vectors |\n| Pinecone | Fast | Managed externally |\n\n### Query Performance\n\nQuery latency depends on the number of documents, vector dimensions, and the complexity of filters applied. Vector databases like Qdrant and Pinecone use specialized indexing structures (HNSW, IVF) to achieve sub-millisecond query times on large datasets.\n\n## See Also\n\n- [Document Store Concepts](docs/concepts/document-store.mdx) - Detailed conceptual overview\n- [Choosing a Document Store](docs/concepts/document-store/choosing-a-document-store.mdx) - Selection guide\n- [Pipeline Components](../pipeline-components/overview.mdx) - How retrievers fit into pipelines\n- [Embedding Models](../components/embedder.mdx) - Generating document embeddings\n\n---\n\n<a id='agents'></a>\n\n## Agent Systems\n\n### 相关页面\n\n相关主题：[Introduction to Haystack](#introduction), [Pipeline Architecture](#pipeline-architecture), [LLM and Embedder Integrations](#llm-integrations)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [docs-website/docs/concepts/agents.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/concepts/agents.mdx)\n- [docs-website/docs/concepts/agents/multi-agent-systems.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/concepts/agents/multi-agent-systems.mdx)\n- [docs-website/docs/pipeline-components/agents-1/agent.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/agents-1/agent.mdx)\n- [docs-website/docs/pipeline-components/agents-1/state.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/agents-1/state.mdx)\n- [docs-website/docs/pipeline-components/agents-1/human-in-the-loop.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/agents-1/human-in-the-loop.mdx)\n</details>\n\n# Agent Systems\n\nAgent systems in Haystack represent a powerful paradigm for building autonomous and semi-autonomous AI applications that can perceive, reason, act, and interact with their environment. Haystack's agent framework enables developers to create sophisticated LLM-powered applications where agents can use tools, maintain state, collaborate with other agents, and incorporate human feedback into their decision-making processes.\n\n## Overview\n\nHaystack agents are designed to extend beyond simple prompt-response interactions by providing a structured mechanism for Large Language Models to take actions, make decisions, and execute multi-step workflows. The agent system in Haystack is built with flexibility and modularity in mind, allowing developers to customize every aspect of agent behavior from the underlying model to the specific tools available and the logic governing agent decisions.\n\nThe framework supports a variety of agent types and architectures, ranging from single-agent systems that handle specific tasks to complex multi-agent ecosystems where multiple specialized agents collaborate to solve problems. This flexibility makes Haystack suitable for a wide range of use cases, from simple question-answering applications to sophisticated autonomous systems that can browse the web, execute code, and coordinate with other agents to complete complex tasks.\n\n## Core Architecture\n\nThe agent architecture in Haystack is built around a pipeline-based model that connects perception, reasoning, action selection, and execution into a cohesive workflow. At its core, an agent consists of several key components that work together to enable autonomous behavior.\n\n### Agent Components\n\n| Component | Purpose | Description |\n|-----------|---------|-------------|\n| LLM | Reasoning Engine | The underlying language model that drives decision-making |\n| Tools | Action Interface | Capabilities that allow the agent to interact with external systems |\n| Prompt Builder | Instruction Assembly | Constructs prompts that guide agent behavior |\n| Output Handler | Response Processing | Interprets and executes agent decisions |\n| Memory | State Management | Maintains conversation history and context |\n\n资料来源：[docs-website/docs/pipeline-components/agents-1/agent.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/agents-1/agent.mdx)\n\n### Execution Flow\n\n```mermaid\ngraph TD\n    A[User Input] --> B[Agent Receives Task]\n    B --> C[LLM Reasoning]\n    C --> D{Tool Selection?}\n    D -->|Yes| E[Execute Tool]\n    E --> F[Process Result]\n    D -->|No| G[Generate Response]\n    F --> C\n    G --> H[Return to User]\n    C --> I{Human Input Needed?}\n    I -->|Yes| J[Pause for Human Feedback]\n    J --> C\n    I -->|No| D\n```\n\nThe execution flow demonstrates how Haystack agents operate in a loop, continuously reasoning about the best course of action until the task is complete. The agent receives input, reasons about what to do, selects and executes tools as needed, and continues until it can provide a final response or requires additional input from the user or human overseer.\n\n## State Management\n\nState management is a critical aspect of agent systems, enabling agents to maintain context across multiple interactions and track the progress of complex, multi-step tasks. Haystack provides a flexible state management system that allows agents to store, retrieve, and update information throughout their execution lifecycle.\n\n### State Structure\n\nThe state system in Haystack agents typically includes several key elements that together form a comprehensive view of the agent's current situation and history. These elements enable the agent to maintain awareness of what has happened previously, what actions have been taken, and what information has been gathered.\n\n| State Element | Type | Description |\n|--------------|------|-------------|\n| Conversation History | List | Previous messages and interactions |\n| Tool Usage Log | List | Record of tools called and results |\n| Intermediate Results | Dict | Data collected during task execution |\n| User Preferences | Dict | Learned user preferences and feedback |\n| Task Progress | Dict | Current status of ongoing tasks |\n\n资料来源：[docs-website/docs/pipeline-components/agents-1/state.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/agents-1/state.mdx)\n\n### State Persistence\n\nAgents in Haystack can maintain state across sessions, enabling persistent memory and long-term learning. This is particularly valuable for applications where the agent needs to build relationships with users over time or maintain knowledge about specific domains or tasks. The state management system supports various backends for persistence, from simple in-memory storage to distributed databases for production deployments.\n\n## Multi-Agent Systems\n\nHaystack supports the creation of sophisticated multi-agent systems where multiple specialized agents work together to solve problems. This architectural pattern enables the decomposition of complex tasks into smaller, manageable subtasks that can be handled by agents with specialized capabilities.\n\n### Agent Collaboration Patterns\n\n```mermaid\ngraph TD\n    subgraph Coordinator Agent\n        A[Task Received] --> B{Analyze Task}\n        B --> C[Decompose into Subtasks]\n    end\n    \n    subgraph Specialized Agents\n        D[Agent A: Research]\n        E[Agent B: Analysis]\n        F[Agent C: Synthesis]\n    end\n    \n    C --> D\n    C --> E\n    C --> F\n    D --> G[Results Aggregation]\n    E --> G\n    F --> G\n    G --> H[Final Response]\n```\n\nMulti-agent systems in Haystack can be configured with various collaboration patterns. In the supervisor pattern, a single coordinating agent directs the work of subordinate agents, assigning tasks and collecting results. In the collaborative pattern, agents work together as equals, sharing information and contributing their expertise to solve problems collectively.\n\n### Communication Protocols\n\nAgents in a multi-agent system communicate through well-defined interfaces that specify how messages are passed between agents, how responses are aggregated, and how conflicts are resolved. This structured approach to agent communication ensures reliable operation even in complex agent ecosystems with many participants.\n\n资料来源：[docs-website/docs/concepts/agents/multi-agent-systems.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/concepts/agents/multi-agent-systems.mdx)\n\n## Human-in-the-Loop\n\nHaystack agents support human-in-the-loop workflows, enabling humans to provide guidance, approval, or corrections during agent execution. This capability is essential for applications where autonomous operation must be balanced with human oversight and control.\n\n### Interaction Modes\n\n| Mode | Description | Use Case |\n|------|-------------|----------|\n| Approval | Human approves agent actions before execution | High-stakes decisions |\n| Feedback | Human provides corrective feedback during execution | Fine-tuning agent behavior |\n| Escalation | Agent defers to human when uncertain | Handling edge cases |\n| Validation | Human validates agent outputs before completion | Quality assurance |\n\n资料来源：[docs-website/docs/pipeline-components/agents-1/human-in-the-loop.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/pipeline-components/agents-1/human-in-the-loop.mdx)\n\n### Workflow Integration\n\n```mermaid\ngraph TD\n    A[Agent Task] --> B{Requires Human Input?}\n    B -->|Yes| C[Pause Execution]\n    C --> D[Notify Human]\n    D --> E[Await Response]\n    E --> F{Human Action}\n    F -->|Approve| G[Continue Execution]\n    F -->|Reject| H[Abort or Retry]\n    F -->|Modify| I[Apply Modifications]\n    B -->|No| G\n    I --> G\n    G --> J[Task Complete]\n```\n\nThe human-in-the-loop system is designed to be non-intrusive, minimizing the cognitive load on human overseers while ensuring that critical decisions receive appropriate human review. Agents can be configured to automatically escalate certain types of decisions based on predefined rules, such as actions that affect sensitive data or exceed specified cost thresholds.\n\n## Tool Integration\n\nA defining characteristic of Haystack agents is their ability to use tools to interact with external systems and perform actions beyond text generation. The tool integration system provides a standardized interface for defining, registering, and invoking tools that extend agent capabilities.\n\n### Available Tool Categories\n\n| Category | Examples | Capabilities |\n|----------|----------|--------------|\n| Web Search | Google Search, Bing Search | Internet research, fact checking |\n| API Clients | REST, GraphQL | External service integration |\n| Code Execution | Python, Shell | Computation, automation |\n| Document Processing | PDF, CSV parsers | Information extraction |\n| Database | SQL, Vector DB | Data retrieval, storage |\n\nTools in Haystack follow a consistent interface that makes it easy to create custom tools for domain-specific applications. Each tool is defined with a name, description, input schema, and implementation, and the agent automatically learns when and how to use tools based on their descriptions.\n\n## Configuration Options\n\nHaystack agents expose a wide range of configuration options that allow developers to customize agent behavior for specific use cases. These options control aspects ranging from the underlying model selection to detailed parameters governing agent decision-making.\n\n### Core Configuration Parameters\n\n| Parameter | Type | Default | Description |\n|-----------|------|---------|-------------|\n| `model` | String | Required | The LLM to use for reasoning |\n| `max_iterations` | Integer | 10 | Maximum tool-calling loops |\n| `tools` | List | Empty | Available tools for the agent |\n| `prompt_template` | String | Default | Custom instruction template |\n| `verbose` | Boolean | False | Enable detailed logging |\n\nAdvanced configuration options allow developers to customize how the agent reasons, how it selects tools, and how it handles errors and edge cases. These options can be set at the agent level or overridden for specific use cases.\n\n## Best Practices\n\nWhen building agent systems with Haystack, several best practices can help ensure reliable and maintainable applications. Careful attention to prompt design, tool definitions, and error handling will significantly improve agent performance and user experience.\n\nClear and specific tool descriptions are essential for guiding agent behavior. Tools should have descriptive names and comprehensive descriptions that explain not just what the tool does, but when and why an agent should consider using it. This helps the underlying LLM make informed decisions about tool selection.\n\nState management should be designed with the target use case in mind. For simple single-turn interactions, minimal state management is appropriate. For complex multi-step tasks, comprehensive state tracking ensures the agent maintains context and can recover from errors gracefully.\n\nHuman-in-the-loop integration should be thoughtfully designed to balance autonomy with oversight. Critical decisions should require human approval, while routine operations can proceed autonomously. The escalation criteria should be clearly defined and regularly reviewed.\n\n## Summary\n\nHaystack's agent systems provide a comprehensive framework for building LLM-powered applications that can perceive, reason, and act. The architecture supports everything from simple single-agent applications to complex multi-agent ecosystems with human oversight. Key features include flexible state management, extensive tool integration, human-in-the-loop workflows, and configurable agent behavior.\n\n资料来源：[docs-website/docs/concepts/agents.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/concepts/agents.mdx)\n\n---\n\n<a id='development-guide'></a>\n\n## Development Guide\n\n### 相关页面\n\n相关主题：[Deployment and Infrastructure](#deployment), [Introduction to Haystack](#introduction)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [README.md](https://github.com/deepset-ai/haystack/blob/main/README.md)\n- [docs-website/README.md](https://github.com/deepset-ai/haystack/blob/main/docs-website/README.md)\n- [docker/README.md](https://github.com/deepset-ai/haystack/blob/main/docker/README.md)\n- [pydoc/README.md](https://github.com/deepset-ai/haystack/blob/main/pydoc/README.md)\n- [examples/README.md](https://github.com/deepset-ai/haystack/blob/main/examples/README.md)\n</details>\n\n# Development Guide\n\nThis guide provides comprehensive information for developers who want to contribute to Haystack or extend its functionality. Haystack is an end-to-end LLM framework that enables building applications powered by Large Language Models, Transformer models, and vector search capabilities.\n\n## Overview\n\nHaystack is an open-source framework maintained by deepset that allows developers to build production-ready AI applications. The framework supports retrieval-augmented generation (RAG), document search, question answering, and answer generation by orchestrating state-of-the-art embedding models and LLMs into pipelines.\n\n资料来源：[README.md:1-10]()\n\n## Project Structure\n\nThe Haystack repository is organized into several main directories, each serving a specific purpose in the overall project ecosystem.\n\n```mermaid\ngraph TD\n    A[haystack/ root] --> B[Main Package]\n    A --> C[docs-website/]\n    A --> D[docker/]\n    A --> E[pydoc/]\n    A --> F[examples/]\n    \n    B --> G[Core Framework Code]\n    C --> H[Documentation Site]\n    D --> I[Docker Images]\n    E --> J[API Reference Generation]\n    F --> K[Example Cookbooks]\n```\n\n### Directory Breakdown\n\n| Directory | Purpose |\n|-----------|---------|\n| `haystack/` | Main Python package containing core framework code |\n| `docs-website/` | Docusaurus-powered documentation site |\n| `docker/` | Docker image definitions and build configurations |\n| `pydoc/` | YAML configurations for API reference generation |\n| `examples/` | Example applications and cookbooks (moved to haystack-cookbook) |\n\n资料来源：[docs-website/README.md:40-55]()\n\n## Installation for Development\n\n### Standard Installation\n\nTo set up Haystack for development, install the package via pip:\n\n```bash\npip install haystack-ai\n```\n\n### Nightly Pre-releases\n\nFor trying the newest features before official releases:\n\n```bash\npip install --pre haystack-ai\n```\n\n### Docker-based Development\n\nHaystack provides Docker images for development environments. The base image contains a working Python environment with Haystack preinstalled and is designed to be derived `FROM`.\n\n```bash\ndocker buildx bake base\n```\n\nTo build custom images with specific branches or tags:\n\n```sh\nHAYSTACK_VERSION=mybranch_or_tag BASE_IMAGE_TAG_SUFFIX=latest docker buildx bake base --no-cache\n```\n\n资料来源：[docker/README.md:15-30]()\n\n### Multi-Platform Docker Builds\n\nHaystack images support multiple architectures. To limit builds to your local architecture:\n\n```bash\n# For Apple M1 (ARM)\ndocker buildx bake base --set \"*.platform=linux/arm64\"\n```\n\n资料来源：[docker/README.md:40-45]()\n\n## Documentation Development\n\nThe documentation website is built with Docusaurus 3 and provides comprehensive guides, tutorials, API references, and best practices for using Haystack.\n\n### Prerequisites\n\n- **Node.js** 18 or higher\n- **npm** (included with Node.js) or Yarn\n\n### Setting Up the Documentation Site\n\n```bash\n# Clone the repository and navigate to docs-website\ngit clone https://github.com/deepset-ai/haystack.git\ncd haystack/docs-website\n\n# Install dependencies\nnpm install\n\n# Start the development server\nnpm start\n\n# The site opens at http://localhost:3000 with live reload\n```\n\n### Common Documentation Tasks\n\n| Task | Command | Location |\n|------|---------|----------|\n| Edit a page | Update files under `docs/` or `versioned_docs/` | Preview at http://localhost:3000 |\n| Add to sidebar | Update `sidebars.js` with doc ID | `docs-website/` |\n| Production check | `npm run build && npm run serve` | `docs-website/` |\n\n资料来源：[docs-website/README.md:20-35]()\n\n### Documentation Project Structure\n\n```\ndocs-website/\n├── docs/                          # Main documentation (guides, tutorials, concepts)\n│   ├── _templates/               # Authoring templates (excluded from build)\n│   ├── concepts/                 # Core Haystack concepts\n│   ├── pipeline-components/      # Component documentation\n│   └── ...\n├── reference/                     # API reference (auto-generated, do not edit manually)\n├── versioned_docs/               # Versioned copies of docs/\n├── reference_versioned_docs/     # Versioned copies of reference/\n├── src/                          # React components and custom code\n│   ├── components/              # Custom React components\n│   ├── css/                     # Global styles\n│   ├── pages/                   # Custom pages\n│   ├── remark/                  # Remark plugins\n│   └── theme/                   # Docusaurus theme customization\n```\n\n资料来源：[docs-website/README.md:45-60]()\n\n## API Reference Development\n\nThe API reference is generated automatically from docstrings in the code using [haystack-pydoc-tools](https://github.com/deepset-ai/haystack-pydoc-tools). A GitHub workflow regenerates the API reference when code changes.\n\n### How API Reference Works\n\n1. Create a `.yml` file in the `pydoc` directory\n2. Configure how haystack-pydoc-tools will generate the page\n3. Commit the configuration to the main branch\n4. The GitHub workflow automatically generates the Markdown files\n\n### Version Management\n\nAll updates to API reference live in unstable docs version and are promoted to stable docs version when a new version is released.\n\n资料来源：[pydoc/README.md:1-20]()\n\n## Contributing to Haystack\n\nHaystack welcomes community contributions ranging from quick fixes like typo corrections to entirely new features.\n\n### Contribution Areas\n\n| Area | Repository | Description |\n|------|------------|-------------|\n| Main Haystack | `deepset-ai/haystack` | Core framework development |\n| Integrations | `deepset-ai/haystack-core-integrations` | Integration components |\n| Documentation | `haystack/docs-website` | Documentation content |\n\n### Getting Started\n\n1. Review the Contributor Guidelines in [CONTRIBUTING.md](https://github.com/deepset-ai/haystack/blob/main/CONTRIBUTING.md)\n2. Check the [full list of open issues](https://github.com/orgs/deepset-ai/projects/14) available for contributions\n3. You don't need to be a Haystack expert to provide meaningful improvements\n\n### CI/CD and Quality Standards\n\nThe project maintains high quality standards through automated checks:\n\n| Check | Badge | Description |\n|-------|-------|-------------|\n| Tests | GitHub Actions | Automated test suite |\n| Type Checking | Mypy | Static type analysis |\n| Code Coverage | Coverage Badge | Test coverage reporting |\n| Linting | Ruff | Code style enforcement |\n| License Compliance | License Check | Dependency license verification |\n\n资料来源：[README.md:30-55]()\n\n## Development Workflow\n\n```mermaid\ngraph TD\n    A[Start Development] --> B[Clone Repository]\n    B --> C[Set Up Environment]\n    C --> D[Install Dependencies]\n    D --> E[Make Changes]\n    E --> F[Run Tests]\n    F --> G{Tests Pass?}\n    G -->|No| H[Fix Issues]\n    H --> E\n    G -->|Yes| I[Run Linters]\n    I --> J{Code Quality OK?}\n    J -->|No| K[Address Linter Issues]\n    K --> E\n    J -->|Yes| L[Submit Pull Request]\n    L --> M[Review Process]\n    M --> N[Merge to Main]\n```\n\n## Examples and Cookbooks\n\nExample applications have been moved to a dedicated repository. All example cookbooks are now located at:\n\n**Repository:** [https://github.com/deepset-ai/haystack-cookbook/](https://github.com/deepset-ai/haystack-cookbook/)\n\nThis separation allows for more focused development and easier discovery of example applications.\n\n资料来源：[examples/README.md:1-10]()\n\n## License and Compliance\n\nAll contributions must comply with the project's license. View license information at:\n\n- [https://github.com/deepset-ai/haystack/blob/main/LICENSE](https://github.com/deepset-ai/haystack/blob/main/LICENSE)\n\nThe project includes automated license compliance checking through GitHub workflows.\n\n资料来源：[docker/README.md:50-60]()\n\n## Quick Reference Commands\n\n| Command | Purpose |\n|---------|---------|\n| `pip install haystack-ai` | Install Haystack |\n| `pip install --pre haystack-ai` | Install pre-release version |\n| `npm install` | Install documentation dependencies |\n| `npm start` | Start documentation dev server |\n| `npm run build` | Build documentation site |\n| `docker buildx bake base` | Build Docker base image |\n\n## Additional Resources\n\n- **Documentation Site:** [https://docs.haystack.deepset.ai](https://docs.haystack.deepset.ai)\n- **GitHub Repository:** [https://github.com/deepset-ai/haystack](https://github.com/deepset-ai/haystack)\n- **Community:** [GitHub Discussions](https://github.com/deepset-ai/haystack/discussions) and [Stack Overflow](https://stackoverflow.com/questions/tagged/haystack)\n- **Discord:** Join the [Haystack Discord community](https://discord.gg/VBpFBDegHY)\n\n---\n\n<a id='deployment'></a>\n\n## Deployment and Infrastructure\n\n### 相关页面\n\n相关主题：[Development Guide](#development-guide), [Introduction to Haystack](#introduction)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [docker/Dockerfile.base](https://github.com/deepset-ai/haystack/blob/main/docker/Dockerfile.base)\n- [docker/README.md](https://github.com/deepset-ai/haystack/blob/main/docker/README.md)\n- [docs-website/docs/development/deployment.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/development/deployment.mdx)\n- [docs-website/docs/development/deployment/docker.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/development/deployment/docker.mdx)\n- [docs-website/docs/development/deployment/kubernetes.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/development/deployment/kubernetes.mdx)\n- [docs-website/docs/development/enabling-gpu-acceleration.mdx](https://github.com/deepset-ai/haystack/blob/main/docs-website/docs/development/enabling-gpu-acceleration.mdx)\n</details>\n\n# Deployment and Infrastructure\n\n## Overview\n\nHaystack provides a comprehensive deployment infrastructure designed for production-ready LLM applications. The framework supports multiple deployment strategies including Docker containers, Kubernetes orchestration, and cloud platform integrations. This documentation covers the core deployment mechanisms, containerization approach, GPU acceleration support, and production best practices.\n\nThe deployment system is built around Docker images using BuildKit for efficient multi-platform builds, enabling deployment across x86_64 and ARM64 architectures. The infrastructure supports both development environments and production-grade deployments with high availability requirements.\n\n## Docker Containerization\n\n### Base Images\n\nHaystack provides pre-built Docker images that serve as the foundation for custom deployments. The base images contain a working Python environment with Haystack preinstalled and are intended to be extended with application-specific configurations.\n\nThe primary image variant available is:\n\n| Image Tag | Description | Use Case |\n|-----------|-------------|----------|\n| `haystack:base-<version>` | Base Python environment with Haystack | Custom image derivation |\n\nAll images are published to Docker Hub and can be pulled directly for use in production environments. The images follow semantic versioning and align with Haystack releases.\n\n### Building Custom Images\n\nCustom images can be built using Docker BuildKit and the `bake` command orchestrator. This approach allows for:\n\n- Custom Haystack versions or branches\n- Pre-installed dependencies\n- Application-specific configurations\n- Multi-platform support\n\nThe build process uses the `docker-bake.hcl` configuration file which defines build targets, platforms, and variable substitutions.\n\n#### Basic Build Command\n\n```sh\ndocker buildx bake base\n```\n\n#### Building with Custom Variables\n\nTo build with a custom Haystack version or branch, override the `HAYSTACK_VERSION` variable:\n\n```sh\nHAYSTACK_VERSION=mybranch_or_tag BASE_IMAGE_TAG_SUFFIX=latest docker buildx bake base --no-cache\n```\n\nThis mechanism enables CI/CD pipelines to build images from specific commits, branches, or release tags without modifying the underlying Dockerfile.\n\n### Multi-Platform Builds\n\nHaystack Docker images support multiple architectures including:\n\n- `linux/amd64` (x86_64)\n- `linux/arm64` (ARM64)\n\n#### Platform Limitations\n\nDepending on the operating system and Docker environment, building all platforms locally may not be possible. If encountering the following error:\n\n```\nmultiple platforms feature is currently not supported for docker driver. Please switch to a different driver\n(eg. \"docker buildx create --use\")\n```\n\nThe platform option must be overridden to match the local architecture. For example, on Apple M1 (ARM64):\n\n```sh\ndocker buildx bake base --set \"*.platform=linux/arm64\"\n```\n\n#### Cross-Platform Considerations\n\nWhen deploying multi-platform images, consider the following:\n\n- **CPU Compatibility**: Ensure target nodes match the built architecture\n- **Performance**: Native architecture builds perform optimally\n- **Registry Support**: Use registries that support multi-platform manifests\n\n## GPU Acceleration\n\n### Hardware Acceleration Support\n\nHaystack supports GPU acceleration for compute-intensive operations including:\n\n- Model inference\n- Embedding generation\n- Tokenization\n- Custom model operations\n\nGPU acceleration significantly improves throughput for LLM-based pipelines and embedding-heavy workloads.\n\n### Enabling GPU Support\n\n#### NVIDIA GPUs (CUDA)\n\nFor NVIDIA GPU support, use CUDA-enabled base images and ensure the nvidia-container-toolkit is installed on the host system.\n\n**Docker Compose Example:**\n\n```yaml\nservices:\n  haystack:\n    image: haystack:base-latest\n    runtime: nvidia\n    environment:\n      - NVIDIA_VISIBLE_DEVICES=all\n    deploy:\n      resources:\n        reservations:\n          devices:\n            - driver: nvidia\n              count: 1\n              capabilities: [gpu]\n```\n\n#### AMD GPUs (ROCm)\n\nAMD GPU support requires ROCm-enabled images and appropriate runtime configuration.\n\n### GPU Memory Management\n\nFor production deployments, configure memory limits based on model size:\n\n| Model Size | Recommended GPU Memory | Configuration |\n|------------|------------------------|---------------|\n| Small (<1B params) | 8 GB | `CUDA_VISIBLE_DEVICES=0` |\n| Medium (1-7B params) | 16 GB | `CUDA_VISIBLE_DEVICES=0,1` |\n| Large (7-70B params) | 32+ GB | Multi-GPU / quantization |\n\n### Quantization Options\n\nTo reduce GPU memory requirements, consider model quantization:\n\n- **4-bit quantization**: Reduces memory by ~75%\n- **8-bit quantization**: Reduces memory by ~50%\n- **Dynamic quantization**: Trade-off between speed and accuracy\n\n## Kubernetes Deployment\n\n### Container Orchestration\n\nHaystack can be deployed on Kubernetes for production environments requiring:\n\n- Horizontal scaling\n- High availability\n- Rolling updates\n- Resource management\n- Service discovery\n\n### Resource Configuration\n\n#### Resource Limits\n\nConfigure CPU and memory limits based on workload:\n\n```yaml\nresources:\n  limits:\n    cpu: \"4\"\n    memory: \"16Gi\"\n  requests:\n    cpu: \"2\"\n    memory: \"8Gi\"\n```\n\n#### GPU Resource Allocation\n\nFor GPU workloads, define accelerator resources:\n\n```yaml\nresources:\n  limits:\n    nvidia.com/gpu: \"2\"\n  requests:\n    nvidia.com/gpu: \"1\"\n```\n\n### High Availability Configuration\n\nFor production deployments, implement:\n\n1. **Replica Sets**: Deploy multiple replicas for fault tolerance\n2. **Health Checks**: Configure liveness and readiness probes\n3. **Pod Disruption Budgets**: Ensure availability during updates\n4. **Anti-Affinity Rules**: Distribute pods across nodes\n\n```yaml\nspec:\n  replicas: 3\n  strategy:\n    type: RollingUpdate\n    rollingUpdate:\n      maxSurge: 1\n      maxUnavailable: 0\n```\n\n### Service Configuration\n\nExpose Haystack services using Kubernetes Services:\n\n```yaml\napiVersion: v1\nkind: Service\nmetadata:\n  name: haystack-api\nspec:\n  selector:\n    app: haystack\n  ports:\n    - protocol: TCP\n      port: 80\n      targetPort: 8000\n  type: LoadBalancer\n```\n\n## Production Best Practices\n\n### Security Considerations\n\n| Practice | Implementation |\n|----------|----------------|\n| Non-root execution | Configure USER directive in Dockerfile |\n| Secret management | Use Kubernetes Secrets or external secret stores |\n| Network policies | Restrict pod-to-pod communication |\n| Image scanning | Scan images for vulnerabilities before deployment |\n| TLS termination | Configure ingress with TLS certificates |\n\n### Monitoring and Observability\n\nImplement monitoring using:\n\n- **Metrics**: Prometheus exporter for pipeline metrics\n- **Logging**: Centralized logging with ELK/Graylog\n- **Tracing**: OpenTelemetry for request tracing\n- **Alerts**: Configure alerts for error rates and latency\n\n### Performance Optimization\n\n1. **Connection Pooling**: Reuse database and API connections\n2. **Caching**: Implement caching for frequently accessed data\n3. **Batch Processing**: Process multiple requests in batches\n4. **Async Processing**: Use async/await for I/O operations\n\n## CI/CD Integration\n\n### Automated Builds\n\nHaystack supports automated Docker image builds through:\n\n- GitHub Actions workflows\n- BuildKit with bake files\n- Multi-stage Docker builds\n\n### Deployment Workflows\n\n```mermaid\ngraph TD\n    A[Code Change] --> B[Run Tests]\n    B --> C[Build Docker Image]\n    C --> D[Push to Registry]\n    D --> E[Update Deployment]\n    E --> F[Health Check]\n    F --> G{Healthy?}\n    G -->|Yes| H[Deployment Complete]\n    G -->|No| I[Rollback]\n```\n\n### Registry Configuration\n\nPopular registry options for Haystack images:\n\n| Registry | Use Case | Authentication |\n|----------|----------|----------------|\n| Docker Hub | Public deployments | Optional |\n| AWS ECR | AWS infrastructure | IAM roles |\n| GCR | GCP infrastructure | Service accounts |\n| Azure ACR | Azure infrastructure | Service principals |\n| Private Registry | Enterprise deployments | Username/password |\n\n## License and Compliance\n\nThe Haystack Docker images contain:\n\n- Haystack framework code under the Apache 2.0 license\n- Python runtime components\n- Base distribution software with their respective licenses\n\nUsers are responsible for ensuring compliance with all software licenses contained within deployed images. For enterprise deployments, review the license implications of all included components.\n\n## Related Documentation\n\n- [Installation Guide](https://docs.haystack.deepset.ai/docs/installation)\n- [Pipeline Components](https://docs.haystack.deepset.ai/docs/pipeline-components)\n- [API Reference](https://docs.haystack.deepset.ai/reference)\n- [Contributing Guide](https://github.com/deepset-ai/haystack/blob/main/CONTRIBUTING.md)\n\n## Summary\n\nHaystack provides a flexible and production-ready deployment infrastructure supporting Docker containerization, Kubernetes orchestration, and GPU acceleration. The multi-platform Docker images enable deployment across diverse infrastructure, while Kubernetes support facilitates enterprise-grade deployments with high availability and scalability requirements. GPU acceleration support enables high-performance inference for LLM-powered applications, with quantization options for resource-constrained environments.\n\n---\n\n---\n\n## Doramagic Pitfall Log\n\nProject: deepset-ai/haystack\n\nSummary: Found 38 potential pitfall items; 7 are high/blocking. Highest priority: installation - 来源证据：RFC: Signed receipts for Haystack pipeline component calls.\n\n## 1. installation · 来源证据：RFC: Signed receipts for Haystack pipeline component calls\n\n- Severity: high\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安装相关的待验证问题：RFC: Signed receipts for Haystack pipeline component calls\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_192c840953e54837869723f54ccfdd1a | https://github.com/deepset-ai/haystack/issues/11039 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 2. installation · 来源证据：feat: Add `run_async` to `MultiQueryEmbeddingRetriever`, `MultiQueryTextRetriever`, and `TextEmbeddingRetriever`\n\n- Severity: high\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安装相关的待验证问题：feat: Add `run_async` to `MultiQueryEmbeddingRetriever`, `MultiQueryTextRetriever`, and `TextEmbeddingRetriever`\n- User impact: 可能阻塞安装或首次运行。\n- Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_4b8f3323f54c4fd6b8de4e2d466cfe8b | https://github.com/deepset-ai/haystack/issues/11358 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 3. installation · 来源证据：feat: add INTERSECTION join mode to DocumentJoiner\n\n- Severity: high\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安装相关的待验证问题：feat: add INTERSECTION join mode to DocumentJoiner\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_00757f9859234e9cab8f8d4ce4f3e771 | https://github.com/deepset-ai/haystack/issues/11365 | 来源类型 github_issue 暴露的待验证使用条件。\n\n## 4. maintenance · 来源证据：docs: Update Ragas docs\n\n- Severity: high\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个维护/版本相关的待验证问题：docs: Update Ragas docs\n- User impact: 可能影响升级、迁移或版本选择。\n- Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_3204fffa09664d9f8553be2a3008f270 | https://github.com/deepset-ai/haystack/issues/11178 | 来源类型 github_issue 暴露的待验证使用条件。\n\n## 5. security_permissions · 来源证据：EnvVarSecrets: add multi-tenant context support (ContextVar / pipeline-run context)\n\n- Severity: high\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：EnvVarSecrets: add multi-tenant context support (ContextVar / pipeline-run context)\n- User impact: 可能影响升级、迁移或版本选择。\n- Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_8f72793700a1416891c2eedddc379129 | https://github.com/deepset-ai/haystack/issues/11366 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 6. security_permissions · 来源证据：Security: OWASP Agent Memory Guard for pipeline memory poisoning defense\n\n- Severity: high\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：Security: OWASP Agent Memory Guard for pipeline memory poisoning defense\n- User impact: 可能阻塞安装或首次运行。\n- Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_4f0868673100472fb74d831b5a04735f | https://github.com/deepset-ai/haystack/issues/11311 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 7. security_permissions · 来源证据：feat: support token-based budget in LostInTheMiddleRanker\n\n- Severity: high\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：feat: support token-based budget in LostInTheMiddleRanker\n- User impact: 可能影响授权、密钥配置或安全边界。\n- Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_7ad00787309c442eb497b10879fb3b28 | https://github.com/deepset-ai/haystack/issues/11351 | 来源类型 github_issue 暴露的待验证使用条件。\n\n## 8. installation · 失败模式：installation: Proposal: Transaction Protocol for idempotent, auditable agent pipelines\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this installation risk before relying on the project: Proposal: Transaction Protocol for idempotent, auditable agent pipelines\n- User impact: Developers may fail before the first successful local run: Proposal: Transaction Protocol for idempotent, auditable agent pipelines\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: Proposal: Transaction Protocol for idempotent, auditable agent pipelines. Context: Observed when using python\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_issue | fmev_58038e9b6373edf9376049b42d4b7bb4 | https://github.com/deepset-ai/haystack/issues/11266 | Proposal: Transaction Protocol for idempotent, auditable agent pipelines\n\n## 9. installation · 失败模式：installation: RFC: Signed receipts for Haystack pipeline component calls\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this installation risk before relying on the project: RFC: Signed receipts for Haystack pipeline component calls\n- User impact: Developers may fail before the first successful local run: RFC: Signed receipts for Haystack pipeline component calls\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: RFC: Signed receipts for Haystack pipeline component calls. Context: Observed when using node, python\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_issue | fmev_ce0b9c65d21126dcf11ede12120e154f | https://github.com/deepset-ai/haystack/issues/11039 | RFC: Signed receipts for Haystack pipeline component calls\n\n## 10. installation · 失败模式：installation: Security: OWASP Agent Memory Guard for pipeline memory poisoning defense\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this installation risk before relying on the project: Security: OWASP Agent Memory Guard for pipeline memory poisoning defense\n- User impact: Developers may fail before the first successful local run: Security: OWASP Agent Memory Guard for pipeline memory poisoning defense\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: Security: OWASP Agent Memory Guard for pipeline memory poisoning defense. Context: Observed when using python\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_issue | fmev_4d3276b6b9938595cb2dbb864a5509da | https://github.com/deepset-ai/haystack/issues/11311 | Security: OWASP Agent Memory Guard for pipeline memory poisoning defense\n\n## 11. installation · 失败模式：installation: [FEATURE] Support for code syntax-aware Document Splitters\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this installation risk before relying on the project: [FEATURE] Support for code syntax-aware Document Splitters\n- User impact: Developers may fail before the first successful local run: [FEATURE] Support for code syntax-aware Document Splitters\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: [FEATURE] Support for code syntax-aware Document Splitters. Context: Observed when using python\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_issue | fmev_997b84068ae32409b1d8d55daaddd984 | https://github.com/deepset-ai/haystack/issues/11354 | [FEATURE] Support for code syntax-aware Document Splitters\n\n## 12. installation · 来源证据：MCP Server for Haystack docs\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安装相关的待验证问题：MCP Server for Haystack docs\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_398390cf2fcd41d589dd5614a3bc646d | https://github.com/deepset-ai/haystack/issues/11346 | 来源类型 github_issue 暴露的待验证使用条件。\n\n## 13. installation · 来源证据：[FEATURE] Support for code syntax-aware Document Splitters\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安装相关的待验证问题：[FEATURE] Support for code syntax-aware Document Splitters\n- User impact: 可能阻塞安装或首次运行。\n- Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_76b3b1b8eae94593a2cd248d0ec55e2a | https://github.com/deepset-ai/haystack/issues/11354 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 14. installation · 来源证据：v2.25.2\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安装相关的待验证问题：v2.25.2\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_55d8aef5d1c3417ba9bdf05c0f5a3053 | https://github.com/deepset-ai/haystack/releases/tag/v2.25.2 | 来源类型 github_release 暴露的待验证使用条件。\n\n## 15. installation · 来源证据：v2.26.0\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安装相关的待验证问题：v2.26.0\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_d73f121017b64b04a8ad885da241fc6f | https://github.com/deepset-ai/haystack/releases/tag/v2.26.0 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 16. installation · 来源证据：v2.28.0\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安装相关的待验证问题：v2.28.0\n- User impact: 可能影响升级、迁移或版本选择。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_d9746a9178f0445d853c95cbb4a5241b | https://github.com/deepset-ai/haystack/releases/tag/v2.28.0 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 17. configuration · 失败模式：configuration: MCP Server for Haystack docs\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this configuration risk before relying on the project: MCP Server for Haystack docs\n- User impact: Developers may misconfigure credentials, environment, or host setup: MCP Server for Haystack docs\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: MCP Server for Haystack docs. Context: Observed when using python\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_issue | fmev_e20d9655fcfaa20fd6aea7f45a938545 | https://github.com/deepset-ai/haystack/issues/11346 | MCP Server for Haystack docs, failure_mode_cluster:github_issue | fmev_a1eed7aea672a032017343738a09159f | https://github.com/deepset-ai/haystack/issues/11346 | MCP Server for Haystack docs\n\n## 18. configuration · 失败模式：configuration: v2.26.0\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this configuration risk before relying on the project: v2.26.0\n- User impact: Upgrade or migration may change expected behavior: v2.26.0\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: v2.26.0. Context: Observed when using python, windows\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_release | fmev_3b9fc694d24804c99a261297652bf3cf | https://github.com/deepset-ai/haystack/releases/tag/v2.26.0 | v2.26.0\n\n## 19. configuration · 失败模式：configuration: v2.28.0\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this configuration risk before relying on the project: v2.28.0\n- User impact: Upgrade or migration may change expected behavior: v2.28.0\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: v2.28.0. Context: Observed when using python\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_release | fmev_0c6c5701a51e86d2246a4919b45c2606 | https://github.com/deepset-ai/haystack/releases/tag/v2.28.0 | v2.28.0\n\n## 20. configuration · 失败模式：configuration: v2.29.0\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this configuration risk before relying on the project: v2.29.0\n- User impact: Upgrade or migration may change expected behavior: v2.29.0\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: v2.29.0. Context: Observed when using python\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_release | fmev_285696f6bc066dc6f42482171a097432 | https://github.com/deepset-ai/haystack/releases/tag/v2.29.0 | v2.29.0\n\n## 21. capability · 能力判断依赖假设\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: README/documentation is current enough for a first validation pass.\n- User impact: 假设不成立时，用户拿不到承诺的能力。\n- Suggested check: 将假设转成下游验证清单。\n- Guardrail action: 假设必须转成验证项；没有验证结果前不能写成事实。\n- Evidence: capability.assumptions | github_repo:221654678 | https://github.com/deepset-ai/haystack | README/documentation is current enough for a first validation pass.\n\n## 22. runtime · 失败模式：runtime: v2.25.2\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this runtime risk before relying on the project: v2.25.2\n- User impact: Upgrade or migration may change expected behavior: v2.25.2\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: v2.25.2. Context: Observed when using python\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_release | fmev_32dfb0f93116d56f30cc46cdab3a0751 | https://github.com/deepset-ai/haystack/releases/tag/v2.25.2 | v2.25.2\n\n## 23. maintenance · 失败模式：migration: docs: Update Ragas docs\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this migration risk before relying on the project: docs: Update Ragas docs\n- User impact: Developers may hit a documented source-backed failure mode: docs: Update Ragas docs\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: docs: Update Ragas docs. Context: Observed during version upgrade or migration.\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_issue | fmev_57550d7e13c6f14ad00a030d3e3a20db | https://github.com/deepset-ai/haystack/issues/11178 | docs: Update Ragas docs, failure_mode_cluster:github_issue | fmev_c4773f63705049b6c2714f8a4517b847 | https://github.com/deepset-ai/haystack/issues/11178 | docs: Update Ragas docs\n\n## 24. maintenance · 来源证据：DocumentJoiner concatenate mode incorrectly drops documents with score=0.0 during deduplication\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个维护/版本相关的待验证问题：DocumentJoiner concatenate mode incorrectly drops documents with score=0.0 during deduplication\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_9e25887dd3694aa695807058e368f46c | https://github.com/deepset-ai/haystack/issues/11352 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 25. maintenance · 维护活跃度未知\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: 未记录 last_activity_observed。\n- User impact: 新项目、停更项目和活跃项目会被混在一起，推荐信任度下降。\n- Suggested check: 补 GitHub 最近 commit、release、issue/PR 响应信号。\n- Guardrail action: 维护活跃度未知时，推荐强度不能标为高信任。\n- Evidence: evidence.maintainer_signals | github_repo:221654678 | https://github.com/deepset-ai/haystack | last_activity_observed missing\n\n## 26. security_permissions · 下游验证发现风险项\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: no_demo\n- User impact: 下游已经要求复核，不能在页面中弱化。\n- Suggested check: 进入安全/权限治理复核队列。\n- Guardrail action: 下游风险存在时必须保持 review/recommendation 降级。\n- Evidence: downstream_validation.risk_items | github_repo:221654678 | https://github.com/deepset-ai/haystack | no_demo; severity=medium\n\n## 27. security_permissions · 存在评分风险\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: no_demo\n- User impact: 风险会影响是否适合普通用户安装。\n- Suggested check: 把风险写入边界卡，并确认是否需要人工复核。\n- Guardrail action: 评分风险必须进入边界卡，不能只作为内部分数。\n- Evidence: risks.scoring_risks | github_repo:221654678 | https://github.com/deepset-ai/haystack | no_demo; severity=medium\n\n## 28. security_permissions · 来源证据：Proposal: Transaction Protocol for idempotent, auditable agent pipelines\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：Proposal: Transaction Protocol for idempotent, auditable agent pipelines\n- User impact: 可能影响升级、迁移或版本选择。\n- Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_e0fcf29e18c5480baf59b94a464ecc85 | https://github.com/deepset-ai/haystack/issues/11266 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 29. security_permissions · 来源证据：v2.26.1\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：v2.26.1\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_1520403ba7f24184b2c108c30e5d609f | https://github.com/deepset-ai/haystack/releases/tag/v2.26.1 | 来源类型 github_release 暴露的待验证使用条件。\n\n## 30. security_permissions · 来源证据：v2.27.0\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：v2.27.0\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_1dddbe7bf8094d669dd185a18844ef75 | https://github.com/deepset-ai/haystack/releases/tag/v2.27.0 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 31. capability · 失败模式：conceptual: feat: Add `run_async` to `MultiQueryEmbeddingRetriever`, `MultiQueryTextRetriever`, and `Text...\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: Developers should check this conceptual risk before relying on the project: feat: Add `run_async` to `MultiQueryEmbeddingRetriever`, `MultiQueryTextRetriever`, and `TextEmbeddingRetriever`\n- User impact: Developers may hit a documented source-backed failure mode: feat: Add `run_async` to `MultiQueryEmbeddingRetriever`, `MultiQueryTextRetriever`, and `TextEmbeddingRetriever`\n- Suggested check: 复核 source-backed failure mode cluster，并把适用版本和验证路径写入资产。\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_issue | fmev_bf87ad8f610a525641ac857abffd6388 | https://github.com/deepset-ai/haystack/issues/11358 | feat: Add `run_async` to `MultiQueryEmbeddingRetriever`, `MultiQueryTextRetriever`, and `TextEmbeddingRetriever`, failure_mode_cluster:github_issue | fmev_315e3f2ec26809f7348a1892a9730a05 | https://github.com/deepset-ai/haystack/issues/11358 | feat: Add `run_async` to `MultiQueryEmbeddingRetriever`, `MultiQueryTextRetriever`, and `TextEmbeddingRetriever`\n\n## 32. capability · 失败模式：conceptual: feat: add INTERSECTION join mode to DocumentJoiner\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: Developers should check this conceptual risk before relying on the project: feat: add INTERSECTION join mode to DocumentJoiner\n- User impact: Developers may hit a documented source-backed failure mode: feat: add INTERSECTION join mode to DocumentJoiner\n- Suggested check: 复核 source-backed failure mode cluster，并把适用版本和验证路径写入资产。\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_issue | fmev_175e4485fffcc53c711d1fd504db9a38 | https://github.com/deepset-ai/haystack/issues/11365 | feat: add INTERSECTION join mode to DocumentJoiner\n\n## 33. capability · 失败模式：conceptual: feat: support token-based budget in LostInTheMiddleRanker\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: Developers should check this conceptual risk before relying on the project: feat: support token-based budget in LostInTheMiddleRanker\n- User impact: Developers may hit a documented source-backed failure mode: feat: support token-based budget in LostInTheMiddleRanker\n- Suggested check: 复核 source-backed failure mode cluster，并把适用版本和验证路径写入资产。\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_issue | fmev_eff234be9632dc6eb35cf59720b2c3f0 | https://github.com/deepset-ai/haystack/issues/11351 | feat: support token-based budget in LostInTheMiddleRanker\n\n## 34. runtime · 失败模式：performance: DocumentJoiner concatenate mode incorrectly drops documents with score=0.0 during deduplication\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: Developers should check this performance risk before relying on the project: DocumentJoiner concatenate mode incorrectly drops documents with score=0.0 during deduplication\n- User impact: Developers may hit a documented source-backed failure mode: DocumentJoiner concatenate mode incorrectly drops documents with score=0.0 during deduplication\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: DocumentJoiner concatenate mode incorrectly drops documents with score=0.0 during deduplication. Context: Observed when using python, macos, cuda\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_issue | fmev_7f9bb8e374256d979ec52a0c96020977 | https://github.com/deepset-ai/haystack/issues/11352 | DocumentJoiner concatenate mode incorrectly drops documents with score=0.0 during deduplication, failure_mode_cluster:github_issue | fmev_21fc5a912bed31520bb91639ca4fa3b3 | https://github.com/deepset-ai/haystack/issues/11352 | DocumentJoiner concatenate mode incorrectly drops documents with score=0.0 during deduplication\n\n## 35. runtime · 失败模式：performance: v2.27.0\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: Developers should check this performance risk before relying on the project: v2.27.0\n- User impact: Upgrade or migration may change expected behavior: v2.27.0\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: v2.27.0. Context: Observed when using python\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_release | fmev_9757a305d020b89fd79c9dc31c6a9d1c | https://github.com/deepset-ai/haystack/releases/tag/v2.27.0 | v2.27.0\n\n## 36. maintenance · issue/PR 响应质量未知\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: issue_or_pr_quality=unknown。\n- User impact: 用户无法判断遇到问题后是否有人维护。\n- Suggested check: 抽样最近 issue/PR，判断是否长期无人处理。\n- Guardrail action: issue/PR 响应未知时，必须提示维护风险。\n- Evidence: evidence.maintainer_signals | github_repo:221654678 | https://github.com/deepset-ai/haystack | issue_or_pr_quality=unknown\n\n## 37. maintenance · 发布节奏不明确\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: release_recency=unknown。\n- User impact: 安装命令和文档可能落后于代码，用户踩坑概率升高。\n- Suggested check: 确认最近 release/tag 和 README 安装命令是否一致。\n- Guardrail action: 发布节奏未知或过期时，安装说明必须标注可能漂移。\n- Evidence: evidence.maintainer_signals | github_repo:221654678 | https://github.com/deepset-ai/haystack | release_recency=unknown\n\n## 38. maintenance · 失败模式：maintenance: v2.26.1\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: Developers should check this maintenance risk before relying on the project: v2.26.1\n- User impact: Upgrade or migration may change expected behavior: v2.26.1\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: v2.26.1. Context: Source discussion did not expose a precise runtime context.\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_release | fmev_29416bd44cdae3aebbb8d4bd245bc398 | https://github.com/deepset-ai/haystack/releases/tag/v2.26.1 | v2.26.1\n\n<!-- canonical_name: deepset-ai/haystack; human_manual_source: deepwiki_human_wiki -->\n",
      "summary": "DeepWiki/Human Wiki output with a Doramagic pitfall appendix.",
      "title": "Human Manual"
    },
    "pitfall_log": {
      "asset_id": "pitfall_log",
      "filename": "PITFALL_LOG.md",
      "markdown": "# Pitfall Log\n\nProject: deepset-ai/haystack\n\nSummary: Found 38 potential pitfall items; 7 are high/blocking. Highest priority: installation - 来源证据：RFC: Signed receipts for Haystack pipeline component calls.\n\n## 1. installation · 来源证据：RFC: Signed receipts for Haystack pipeline component calls\n\n- Severity: high\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安装相关的待验证问题：RFC: Signed receipts for Haystack pipeline component calls\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_192c840953e54837869723f54ccfdd1a | https://github.com/deepset-ai/haystack/issues/11039 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 2. installation · 来源证据：feat: Add `run_async` to `MultiQueryEmbeddingRetriever`, `MultiQueryTextRetriever`, and `TextEmbeddingRetriever`\n\n- Severity: high\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安装相关的待验证问题：feat: Add `run_async` to `MultiQueryEmbeddingRetriever`, `MultiQueryTextRetriever`, and `TextEmbeddingRetriever`\n- User impact: 可能阻塞安装或首次运行。\n- Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_4b8f3323f54c4fd6b8de4e2d466cfe8b | https://github.com/deepset-ai/haystack/issues/11358 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 3. installation · 来源证据：feat: add INTERSECTION join mode to DocumentJoiner\n\n- Severity: high\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安装相关的待验证问题：feat: add INTERSECTION join mode to DocumentJoiner\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_00757f9859234e9cab8f8d4ce4f3e771 | https://github.com/deepset-ai/haystack/issues/11365 | 来源类型 github_issue 暴露的待验证使用条件。\n\n## 4. maintenance · 来源证据：docs: Update Ragas docs\n\n- Severity: high\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个维护/版本相关的待验证问题：docs: Update Ragas docs\n- User impact: 可能影响升级、迁移或版本选择。\n- Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_3204fffa09664d9f8553be2a3008f270 | https://github.com/deepset-ai/haystack/issues/11178 | 来源类型 github_issue 暴露的待验证使用条件。\n\n## 5. security_permissions · 来源证据：EnvVarSecrets: add multi-tenant context support (ContextVar / pipeline-run context)\n\n- Severity: high\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：EnvVarSecrets: add multi-tenant context support (ContextVar / pipeline-run context)\n- User impact: 可能影响升级、迁移或版本选择。\n- Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_8f72793700a1416891c2eedddc379129 | https://github.com/deepset-ai/haystack/issues/11366 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 6. security_permissions · 来源证据：Security: OWASP Agent Memory Guard for pipeline memory poisoning defense\n\n- Severity: high\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：Security: OWASP Agent Memory Guard for pipeline memory poisoning defense\n- User impact: 可能阻塞安装或首次运行。\n- Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_4f0868673100472fb74d831b5a04735f | https://github.com/deepset-ai/haystack/issues/11311 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 7. security_permissions · 来源证据：feat: support token-based budget in LostInTheMiddleRanker\n\n- Severity: high\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：feat: support token-based budget in LostInTheMiddleRanker\n- User impact: 可能影响授权、密钥配置或安全边界。\n- Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_7ad00787309c442eb497b10879fb3b28 | https://github.com/deepset-ai/haystack/issues/11351 | 来源类型 github_issue 暴露的待验证使用条件。\n\n## 8. installation · 失败模式：installation: Proposal: Transaction Protocol for idempotent, auditable agent pipelines\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this installation risk before relying on the project: Proposal: Transaction Protocol for idempotent, auditable agent pipelines\n- User impact: Developers may fail before the first successful local run: Proposal: Transaction Protocol for idempotent, auditable agent pipelines\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: Proposal: Transaction Protocol for idempotent, auditable agent pipelines. Context: Observed when using python\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_issue | fmev_58038e9b6373edf9376049b42d4b7bb4 | https://github.com/deepset-ai/haystack/issues/11266 | Proposal: Transaction Protocol for idempotent, auditable agent pipelines\n\n## 9. installation · 失败模式：installation: RFC: Signed receipts for Haystack pipeline component calls\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this installation risk before relying on the project: RFC: Signed receipts for Haystack pipeline component calls\n- User impact: Developers may fail before the first successful local run: RFC: Signed receipts for Haystack pipeline component calls\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: RFC: Signed receipts for Haystack pipeline component calls. Context: Observed when using node, python\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_issue | fmev_ce0b9c65d21126dcf11ede12120e154f | https://github.com/deepset-ai/haystack/issues/11039 | RFC: Signed receipts for Haystack pipeline component calls\n\n## 10. installation · 失败模式：installation: Security: OWASP Agent Memory Guard for pipeline memory poisoning defense\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this installation risk before relying on the project: Security: OWASP Agent Memory Guard for pipeline memory poisoning defense\n- User impact: Developers may fail before the first successful local run: Security: OWASP Agent Memory Guard for pipeline memory poisoning defense\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: Security: OWASP Agent Memory Guard for pipeline memory poisoning defense. Context: Observed when using python\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_issue | fmev_4d3276b6b9938595cb2dbb864a5509da | https://github.com/deepset-ai/haystack/issues/11311 | Security: OWASP Agent Memory Guard for pipeline memory poisoning defense\n\n## 11. installation · 失败模式：installation: [FEATURE] Support for code syntax-aware Document Splitters\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this installation risk before relying on the project: [FEATURE] Support for code syntax-aware Document Splitters\n- User impact: Developers may fail before the first successful local run: [FEATURE] Support for code syntax-aware Document Splitters\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: [FEATURE] Support for code syntax-aware Document Splitters. Context: Observed when using python\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_issue | fmev_997b84068ae32409b1d8d55daaddd984 | https://github.com/deepset-ai/haystack/issues/11354 | [FEATURE] Support for code syntax-aware Document Splitters\n\n## 12. installation · 来源证据：MCP Server for Haystack docs\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安装相关的待验证问题：MCP Server for Haystack docs\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_398390cf2fcd41d589dd5614a3bc646d | https://github.com/deepset-ai/haystack/issues/11346 | 来源类型 github_issue 暴露的待验证使用条件。\n\n## 13. installation · 来源证据：[FEATURE] Support for code syntax-aware Document Splitters\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安装相关的待验证问题：[FEATURE] Support for code syntax-aware Document Splitters\n- User impact: 可能阻塞安装或首次运行。\n- Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_76b3b1b8eae94593a2cd248d0ec55e2a | https://github.com/deepset-ai/haystack/issues/11354 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 14. installation · 来源证据：v2.25.2\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安装相关的待验证问题：v2.25.2\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_55d8aef5d1c3417ba9bdf05c0f5a3053 | https://github.com/deepset-ai/haystack/releases/tag/v2.25.2 | 来源类型 github_release 暴露的待验证使用条件。\n\n## 15. installation · 来源证据：v2.26.0\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安装相关的待验证问题：v2.26.0\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_d73f121017b64b04a8ad885da241fc6f | https://github.com/deepset-ai/haystack/releases/tag/v2.26.0 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 16. installation · 来源证据：v2.28.0\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安装相关的待验证问题：v2.28.0\n- User impact: 可能影响升级、迁移或版本选择。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_d9746a9178f0445d853c95cbb4a5241b | https://github.com/deepset-ai/haystack/releases/tag/v2.28.0 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 17. configuration · 失败模式：configuration: MCP Server for Haystack docs\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this configuration risk before relying on the project: MCP Server for Haystack docs\n- User impact: Developers may misconfigure credentials, environment, or host setup: MCP Server for Haystack docs\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: MCP Server for Haystack docs. Context: Observed when using python\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_issue | fmev_e20d9655fcfaa20fd6aea7f45a938545 | https://github.com/deepset-ai/haystack/issues/11346 | MCP Server for Haystack docs, failure_mode_cluster:github_issue | fmev_a1eed7aea672a032017343738a09159f | https://github.com/deepset-ai/haystack/issues/11346 | MCP Server for Haystack docs\n\n## 18. configuration · 失败模式：configuration: v2.26.0\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this configuration risk before relying on the project: v2.26.0\n- User impact: Upgrade or migration may change expected behavior: v2.26.0\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: v2.26.0. Context: Observed when using python, windows\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_release | fmev_3b9fc694d24804c99a261297652bf3cf | https://github.com/deepset-ai/haystack/releases/tag/v2.26.0 | v2.26.0\n\n## 19. configuration · 失败模式：configuration: v2.28.0\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this configuration risk before relying on the project: v2.28.0\n- User impact: Upgrade or migration may change expected behavior: v2.28.0\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: v2.28.0. Context: Observed when using python\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_release | fmev_0c6c5701a51e86d2246a4919b45c2606 | https://github.com/deepset-ai/haystack/releases/tag/v2.28.0 | v2.28.0\n\n## 20. configuration · 失败模式：configuration: v2.29.0\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this configuration risk before relying on the project: v2.29.0\n- User impact: Upgrade or migration may change expected behavior: v2.29.0\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: v2.29.0. Context: Observed when using python\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_release | fmev_285696f6bc066dc6f42482171a097432 | https://github.com/deepset-ai/haystack/releases/tag/v2.29.0 | v2.29.0\n\n## 21. capability · 能力判断依赖假设\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: README/documentation is current enough for a first validation pass.\n- User impact: 假设不成立时，用户拿不到承诺的能力。\n- Suggested check: 将假设转成下游验证清单。\n- Guardrail action: 假设必须转成验证项；没有验证结果前不能写成事实。\n- Evidence: capability.assumptions | github_repo:221654678 | https://github.com/deepset-ai/haystack | README/documentation is current enough for a first validation pass.\n\n## 22. runtime · 失败模式：runtime: v2.25.2\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this runtime risk before relying on the project: v2.25.2\n- User impact: Upgrade or migration may change expected behavior: v2.25.2\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: v2.25.2. Context: Observed when using python\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_release | fmev_32dfb0f93116d56f30cc46cdab3a0751 | https://github.com/deepset-ai/haystack/releases/tag/v2.25.2 | v2.25.2\n\n## 23. maintenance · 失败模式：migration: docs: Update Ragas docs\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this migration risk before relying on the project: docs: Update Ragas docs\n- User impact: Developers may hit a documented source-backed failure mode: docs: Update Ragas docs\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: docs: Update Ragas docs. Context: Observed during version upgrade or migration.\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_issue | fmev_57550d7e13c6f14ad00a030d3e3a20db | https://github.com/deepset-ai/haystack/issues/11178 | docs: Update Ragas docs, failure_mode_cluster:github_issue | fmev_c4773f63705049b6c2714f8a4517b847 | https://github.com/deepset-ai/haystack/issues/11178 | docs: Update Ragas docs\n\n## 24. maintenance · 来源证据：DocumentJoiner concatenate mode incorrectly drops documents with score=0.0 during deduplication\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个维护/版本相关的待验证问题：DocumentJoiner concatenate mode incorrectly drops documents with score=0.0 during deduplication\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_9e25887dd3694aa695807058e368f46c | https://github.com/deepset-ai/haystack/issues/11352 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 25. maintenance · 维护活跃度未知\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: 未记录 last_activity_observed。\n- User impact: 新项目、停更项目和活跃项目会被混在一起，推荐信任度下降。\n- Suggested check: 补 GitHub 最近 commit、release、issue/PR 响应信号。\n- Guardrail action: 维护活跃度未知时，推荐强度不能标为高信任。\n- Evidence: evidence.maintainer_signals | github_repo:221654678 | https://github.com/deepset-ai/haystack | last_activity_observed missing\n\n## 26. security_permissions · 下游验证发现风险项\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: no_demo\n- User impact: 下游已经要求复核，不能在页面中弱化。\n- Suggested check: 进入安全/权限治理复核队列。\n- Guardrail action: 下游风险存在时必须保持 review/recommendation 降级。\n- Evidence: downstream_validation.risk_items | github_repo:221654678 | https://github.com/deepset-ai/haystack | no_demo; severity=medium\n\n## 27. security_permissions · 存在评分风险\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: no_demo\n- User impact: 风险会影响是否适合普通用户安装。\n- Suggested check: 把风险写入边界卡，并确认是否需要人工复核。\n- Guardrail action: 评分风险必须进入边界卡，不能只作为内部分数。\n- Evidence: risks.scoring_risks | github_repo:221654678 | https://github.com/deepset-ai/haystack | no_demo; severity=medium\n\n## 28. security_permissions · 来源证据：Proposal: Transaction Protocol for idempotent, auditable agent pipelines\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：Proposal: Transaction Protocol for idempotent, auditable agent pipelines\n- User impact: 可能影响升级、迁移或版本选择。\n- Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_e0fcf29e18c5480baf59b94a464ecc85 | https://github.com/deepset-ai/haystack/issues/11266 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 29. security_permissions · 来源证据：v2.26.1\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：v2.26.1\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_1520403ba7f24184b2c108c30e5d609f | https://github.com/deepset-ai/haystack/releases/tag/v2.26.1 | 来源类型 github_release 暴露的待验证使用条件。\n\n## 30. security_permissions · 来源证据：v2.27.0\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：v2.27.0\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_1dddbe7bf8094d669dd185a18844ef75 | https://github.com/deepset-ai/haystack/releases/tag/v2.27.0 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 31. capability · 失败模式：conceptual: feat: Add `run_async` to `MultiQueryEmbeddingRetriever`, `MultiQueryTextRetriever`, and `Text...\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: Developers should check this conceptual risk before relying on the project: feat: Add `run_async` to `MultiQueryEmbeddingRetriever`, `MultiQueryTextRetriever`, and `TextEmbeddingRetriever`\n- User impact: Developers may hit a documented source-backed failure mode: feat: Add `run_async` to `MultiQueryEmbeddingRetriever`, `MultiQueryTextRetriever`, and `TextEmbeddingRetriever`\n- Suggested check: 复核 source-backed failure mode cluster，并把适用版本和验证路径写入资产。\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_issue | fmev_bf87ad8f610a525641ac857abffd6388 | https://github.com/deepset-ai/haystack/issues/11358 | feat: Add `run_async` to `MultiQueryEmbeddingRetriever`, `MultiQueryTextRetriever`, and `TextEmbeddingRetriever`, failure_mode_cluster:github_issue | fmev_315e3f2ec26809f7348a1892a9730a05 | https://github.com/deepset-ai/haystack/issues/11358 | feat: Add `run_async` to `MultiQueryEmbeddingRetriever`, `MultiQueryTextRetriever`, and `TextEmbeddingRetriever`\n\n## 32. capability · 失败模式：conceptual: feat: add INTERSECTION join mode to DocumentJoiner\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: Developers should check this conceptual risk before relying on the project: feat: add INTERSECTION join mode to DocumentJoiner\n- User impact: Developers may hit a documented source-backed failure mode: feat: add INTERSECTION join mode to DocumentJoiner\n- Suggested check: 复核 source-backed failure mode cluster，并把适用版本和验证路径写入资产。\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_issue | fmev_175e4485fffcc53c711d1fd504db9a38 | https://github.com/deepset-ai/haystack/issues/11365 | feat: add INTERSECTION join mode to DocumentJoiner\n\n## 33. capability · 失败模式：conceptual: feat: support token-based budget in LostInTheMiddleRanker\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: Developers should check this conceptual risk before relying on the project: feat: support token-based budget in LostInTheMiddleRanker\n- User impact: Developers may hit a documented source-backed failure mode: feat: support token-based budget in LostInTheMiddleRanker\n- Suggested check: 复核 source-backed failure mode cluster，并把适用版本和验证路径写入资产。\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_issue | fmev_eff234be9632dc6eb35cf59720b2c3f0 | https://github.com/deepset-ai/haystack/issues/11351 | feat: support token-based budget in LostInTheMiddleRanker\n\n## 34. runtime · 失败模式：performance: DocumentJoiner concatenate mode incorrectly drops documents with score=0.0 during deduplication\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: Developers should check this performance risk before relying on the project: DocumentJoiner concatenate mode incorrectly drops documents with score=0.0 during deduplication\n- User impact: Developers may hit a documented source-backed failure mode: DocumentJoiner concatenate mode incorrectly drops documents with score=0.0 during deduplication\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: DocumentJoiner concatenate mode incorrectly drops documents with score=0.0 during deduplication. Context: Observed when using python, macos, cuda\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_issue | fmev_7f9bb8e374256d979ec52a0c96020977 | https://github.com/deepset-ai/haystack/issues/11352 | DocumentJoiner concatenate mode incorrectly drops documents with score=0.0 during deduplication, failure_mode_cluster:github_issue | fmev_21fc5a912bed31520bb91639ca4fa3b3 | https://github.com/deepset-ai/haystack/issues/11352 | DocumentJoiner concatenate mode incorrectly drops documents with score=0.0 during deduplication\n\n## 35. runtime · 失败模式：performance: v2.27.0\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: Developers should check this performance risk before relying on the project: v2.27.0\n- User impact: Upgrade or migration may change expected behavior: v2.27.0\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: v2.27.0. Context: Observed when using python\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_release | fmev_9757a305d020b89fd79c9dc31c6a9d1c | https://github.com/deepset-ai/haystack/releases/tag/v2.27.0 | v2.27.0\n\n## 36. maintenance · issue/PR 响应质量未知\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: issue_or_pr_quality=unknown。\n- User impact: 用户无法判断遇到问题后是否有人维护。\n- Suggested check: 抽样最近 issue/PR，判断是否长期无人处理。\n- Guardrail action: issue/PR 响应未知时，必须提示维护风险。\n- Evidence: evidence.maintainer_signals | github_repo:221654678 | https://github.com/deepset-ai/haystack | issue_or_pr_quality=unknown\n\n## 37. maintenance · 发布节奏不明确\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: release_recency=unknown。\n- User impact: 安装命令和文档可能落后于代码，用户踩坑概率升高。\n- Suggested check: 确认最近 release/tag 和 README 安装命令是否一致。\n- Guardrail action: 发布节奏未知或过期时，安装说明必须标注可能漂移。\n- Evidence: evidence.maintainer_signals | github_repo:221654678 | https://github.com/deepset-ai/haystack | release_recency=unknown\n\n## 38. maintenance · 失败模式：maintenance: v2.26.1\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: Developers should check this maintenance risk before relying on the project: v2.26.1\n- User impact: Upgrade or migration may change expected behavior: v2.26.1\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: v2.26.1. Context: Source discussion did not expose a precise runtime context.\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_release | fmev_29416bd44cdae3aebbb8d4bd245bc398 | https://github.com/deepset-ai/haystack/releases/tag/v2.26.1 | v2.26.1\n",
      "summary": "Identity, installation, configuration, runtime, and safety pitfalls before user trial.",
      "title": "Pitfall Log"
    },
    "prompt_preview": {
      "asset_id": "prompt_preview",
      "filename": "PROMPT_PREVIEW.md",
      "markdown": "# haystack - Prompt Preview\n\n> Copy the prompt below into your AI host before installing anything.\n> Its purpose is to let you safely feel the project's workflow, not to claim the project has already run.\n\n## Copy this prompt\n\n```text\nYou are using an independent Doramagic capability pack for deepset-ai/haystack.\n\nProject:\n- Name: haystack\n- Repository: https://github.com/deepset-ai/haystack\n- Summary: Open-source AI orchestration framework for building context-engineered, production-ready LLM applications. Design modular pipelines and agent workflows with explicit control over retrieval, routing, memory, and generation. Built for scalable agents, RAG, multimodal applications, semantic search, and conversational systems.\n- Host target: local_cli\n\nGoal:\nHelp me evaluate this project for the following task without installing it yet: Open-source AI orchestration framework for building context-engineered, production-ready LLM applications. Design modular pipelines and agent workflows with explicit control over retrieval, routing, memory, and generation. Built for scalable agents, RAG, multimodal applications, semantic search, and conversational systems.\n\nBefore taking action:\n1. Restate my task, success standard, and boundary.\n2. Identify whether the next step requires tools, browser access, network access, filesystem access, credentials, package installation, or host configuration.\n3. Use only the Doramagic Project Pack, the upstream repository, and the source-linked evidence listed below.\n4. If a real command, install step, API call, file write, or host integration is required, mark it as \"requires post-install verification\" and ask for approval first.\n5. If evidence is missing, say \"evidence is missing\" instead of filling the gap.\n\nPreviewable capabilities:\n- Capability 1: Open-source AI orchestration framework for building context-engineered, production-ready LLM applications. Design modular pipelines and agent workflows with explicit control over retrieval, routing, memory, and generation. Built for scalable agents, RAG, multimodal applications, semantic search, and conversational systems.\n\nCapabilities that require post-install verification:\n- Capability 1: Use the source-backed project context to guide one small, checkable workflow step.\n\nCore service flow:\n1. introduction: Introduction to Haystack. Produce one small intermediate artifact and wait for confirmation.\n2. pipeline-architecture: Pipeline Architecture. Produce one small intermediate artifact and wait for confirmation.\n3. core-concepts: Core Concepts. Produce one small intermediate artifact and wait for confirmation.\n4. component-types: Pipeline Component Types. Produce one small intermediate artifact and wait for confirmation.\n5. llm-integrations: LLM and Embedder Integrations. Produce one small intermediate artifact and wait for confirmation.\n\nSource-backed evidence to keep in mind:\n- https://github.com/deepset-ai/haystack\n- https://github.com/deepset-ai/haystack#readme\n- README.md\n- AGENTS.md\n- VERSION.txt\n- docs-website/docs/concepts/pipelines.mdx\n- docs-website/docs/concepts/pipelines/asyncpipeline.mdx\n- docs-website/docs/concepts/pipelines/serialization.mdx\n- docs-website/docs/concepts/pipelines/debugging-pipelines.mdx\n- docs-website/docs/concepts/pipelines/pipeline-breakpoints.mdx\n\nFirst response rules:\n1. Start Step 1 only.\n2. Explain the one service action you will perform first.\n3. Ask exactly three questions about my target workflow, success standard, and sandbox boundary.\n4. Stop and wait for my answers.\n\nStep 1 follow-up protocol:\n- After I answer the first three questions, stay in Step 1.\n- Produce six parts only: clarified task, success standard, boundary conditions, two or three options, tradeoffs for each option, and one recommendation.\n- End by asking whether I confirm the recommendation.\n- Do not move to Step 2 until I explicitly confirm.\n\nConversation rules:\n- Advance one step at a time and wait for confirmation after each small artifact.\n- Write outputs as recommendations or planned checks, not as completed execution.\n- Do not claim tests passed, files changed, commands ran, APIs were called, or the project was installed.\n- If the user asks for execution, first provide the sandbox setup, expected output, rollback, and approval checkpoint.\n```\n",
      "summary": "不安装项目也能感受能力节奏的安全试用 Prompt。",
      "title": "Prompt Preview / 安装前试用 Prompt"
    },
    "quick_start": {
      "asset_id": "quick_start",
      "filename": "QUICK_START.md",
      "markdown": "# Quick Start\n\nProject: deepset-ai/haystack\n\n## Official Entry Points\n\n### Python / pip · 官方安装入口\n\n```bash\npip install haystack-ai\n```\n\nSource：https://github.com/deepset-ai/haystack#readme\n\n## Sources\n\n- repo: https://github.com/deepset-ai/haystack\n- docs: https://github.com/deepset-ai/haystack#readme\n",
      "summary": "Entry points extracted from official README or installation documentation.",
      "title": "Quick Start"
    }
  },
  "validation_id": "dval_2f942674156344b28c4ea6516b388287"
}