{
  "canonical_name": "QuivrHQ/quivr",
  "compilation_id": "pack_66c64ee43ff64d02ba22a1f06187ba06",
  "created_at": "2026-05-22T01:43:52.830239+00:00",
  "created_by": "project-pack-compiler",
  "feedback": {
    "carrier_selection_notes": [
      "viable_asset_types=mcp_config, recipe, host_instruction, eval, preflight",
      "recommended_asset_types=mcp_config, recipe, host_instruction, eval, preflight"
    ],
    "evidence_delta": {
      "confirmed_claims": [
        "identity_anchor_present",
        "capability_and_host_targets_present",
        "install_path_declared_or_better"
      ],
      "missing_required_fields": [],
      "must_verify_forwarded": [
        "Run or inspect `pip install quivr-core` in an isolated environment.",
        "Confirm the project exposes the claimed capability to at least one target host."
      ],
      "quickstart_execution_scope": "allowlisted_sandbox_smoke",
      "sandbox_command": "pip install quivr-core",
      "sandbox_container_image": "python:3.12-slim",
      "sandbox_execution_backend": "docker",
      "sandbox_planner_decision": "deterministic_isolated_install",
      "sandbox_validation_id": "sbx_4e8fb4508b7a40b0bede44dff077ac79"
    },
    "feedback_event_type": "project_pack_compilation_feedback",
    "learning_candidate_reasons": [],
    "template_gaps": []
  },
  "identity": {
    "canonical_id": "project_2c23d66dd834852a3b79de73f10bd4a3",
    "canonical_name": "QuivrHQ/quivr",
    "homepage_url": null,
    "license": "unknown",
    "repo_url": "https://github.com/QuivrHQ/quivr",
    "slug": "quivr",
    "source_packet_id": "phit_260effb2c10e4cfe9b03fac94a74d3be",
    "source_validation_id": "dval_cb34a15d24d14dc3a30e63e5b51ebb41"
  },
  "merchandising": {
    "best_for": "需要信息检索与知识管理能力，并使用 chatgpt的用户",
    "github_forks": 3743,
    "github_stars": 39150,
    "one_liner_en": "Opiniated RAG for integrating GenAI in your apps 🧠   Focus on your product rather than the RAG. Easy integration in existing products with customisation!  Any LLM: GPT4, Groq, Llama. Any Vectorstore: PGVector, Faiss. Any Files. Anyway you want.",
    "one_liner_zh": "Opiniated RAG for integrating GenAI in your apps 🧠   Focus on your product rather than the RAG. Easy integration in existing products with customisation!  Any LLM: GPT4, Groq, Llama. Any Vectorstore: PGVector, Faiss. Any Files. Anyway you want.",
    "primary_category": {
      "category_id": "research-knowledge",
      "confidence": "high",
      "name_en": "Research & Knowledge",
      "name_zh": "信息检索与知识管理",
      "reason": "strong category phrase match from project identity and outcome"
    },
    "target_user": "使用 chatgpt 等宿主 AI 的用户",
    "title_en": "quivr",
    "title_zh": "quivr 能力包",
    "visible_tags": [
      {
        "label_en": "Knowledge Retrieval",
        "label_zh": "知识检索",
        "source": "repo_evidence_project_characteristics",
        "tag_id": "product_domain-knowledge-retrieval",
        "type": "product_domain"
      },
      {
        "label_en": "Knowledge Base Q&A",
        "label_zh": "知识库问答",
        "source": "repo_evidence_project_characteristics",
        "tag_id": "user_job-knowledge-base-q-a",
        "type": "user_job"
      },
      {
        "label_en": "Structured Data Extraction",
        "label_zh": "结构化数据提取",
        "source": "repo_evidence_project_characteristics",
        "tag_id": "core_capability-structured-data-extraction",
        "type": "core_capability"
      },
      {
        "label_en": "Node-based Workflow",
        "label_zh": "节点式流程编排",
        "source": "repo_evidence_project_characteristics",
        "tag_id": "workflow_pattern-node-based-workflow",
        "type": "workflow_pattern"
      },
      {
        "label_en": "Open Source Tool",
        "label_zh": "开源工具",
        "source": "repo_evidence_project_characteristics",
        "tag_id": "selection_signal-open-source-tool",
        "type": "selection_signal"
      }
    ]
  },
  "packet_id": "phit_260effb2c10e4cfe9b03fac94a74d3be",
  "page_model": {
    "artifacts": {
      "artifact_slug": "quivr",
      "files": [
        "PROJECT_PACK.json",
        "QUICK_START.md",
        "PROMPT_PREVIEW.md",
        "HUMAN_MANUAL.md",
        "AI_CONTEXT_PACK.md",
        "BOUNDARY_RISK_CARD.md",
        "PITFALL_LOG.md",
        "REPO_INSPECTION.json",
        "REPO_INSPECTION.md",
        "CAPABILITY_CONTRACT.json",
        "EVIDENCE_INDEX.json",
        "CLAIM_GRAPH.json"
      ],
      "required_files": [
        "PROJECT_PACK.json",
        "QUICK_START.md",
        "PROMPT_PREVIEW.md",
        "HUMAN_MANUAL.md",
        "AI_CONTEXT_PACK.md",
        "BOUNDARY_RISK_CARD.md",
        "PITFALL_LOG.md",
        "REPO_INSPECTION.json"
      ]
    },
    "detail": {
      "capability_source": "Project Hit Packet + DownstreamValidationResult",
      "commands": [
        {
          "command": "pip install quivr-core",
          "label": "Python / pip · 官方安装入口",
          "source": "https://github.com/QuivrHQ/quivr#readme",
          "verified": true
        }
      ],
      "display_tags": [
        "知识检索",
        "知识库问答",
        "结构化数据提取",
        "节点式流程编排",
        "开源工具"
      ],
      "eyebrow": "信息检索与知识管理",
      "glance": [
        {
          "body": "判断自己是不是目标用户。",
          "label": "最适合谁",
          "value": "需要信息检索与知识管理能力，并使用 chatgpt的用户"
        },
        {
          "body": "先理解能力边界，再决定是否继续。",
          "label": "核心价值",
          "value": "Opiniated RAG for integrating GenAI in your apps 🧠   Focus on your product rather than the RAG. Easy integration in existing products with customisation!  Any LLM: GPT4, Groq, Llama. Any Vectorstore: PGVector, Faiss. Any Files. Anyway you want."
        },
        {
          "body": "未完成验证前保持审慎。",
          "label": "继续前",
          "value": "publish to Doramagic.ai project surfaces"
        }
      ],
      "guardrail_source": "Boundary & Risk Card",
      "guardrails": [
        {
          "body": "Prompt Preview 只展示流程，不证明项目已安装或运行。",
          "label": "Check 1",
          "value": "不要把试用当真实运行"
        },
        {
          "body": "chatgpt",
          "label": "Check 2",
          "value": "确认宿主兼容"
        },
        {
          "body": "publish to Doramagic.ai project surfaces",
          "label": "Check 3",
          "value": "先隔离验证"
        }
      ],
      "mode": "mcp_config, recipe, host_instruction, eval, preflight",
      "pitfall_log": {
        "items": [
          {
            "body": "GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：EU AI Act Compliance Scan Results — Sharing Findings for Feedback",
            "category": "安全/权限坑",
            "evidence": [
              "community_evidence:github | cevd_46118108af95480ba852109be6e2c66a | https://github.com/QuivrHQ/quivr/issues/3667 | 来源讨论提到 python 相关条件，需在安装/试用前复核。"
            ],
            "severity": "high",
            "suggested_check": "来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。",
            "title": "来源证据：EU AI Act Compliance Scan Results — Sharing Findings for Feedback",
            "user_impact": "可能影响授权、密钥配置或安全边界。"
          },
          {
            "body": "GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：[Bug]:",
            "category": "安全/权限坑",
            "evidence": [
              "community_evidence:github | cevd_ff260ca3ab5247679b2ded201ec21e24 | https://github.com/QuivrHQ/quivr/issues/2004 | 来源讨论提到 docker 相关条件，需在安装/试用前复核。"
            ],
            "severity": "high",
            "suggested_check": "来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。",
            "title": "来源证据：[Bug]:",
            "user_impact": "可能增加新用户试用和生产接入成本。"
          },
          {
            "body": "GitHub 社区证据显示该项目存在一个安装相关的待验证问题：Integration idea: Screenpipe for screen/audio context",
            "category": "安装坑",
            "evidence": [
              "community_evidence:github | cevd_3a7f345691d04bbea72b03858746645e | https://github.com/QuivrHQ/quivr/issues/3658 | 来源类型 github_issue 暴露的待验证使用条件。"
            ],
            "severity": "medium",
            "suggested_check": "来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。",
            "title": "来源证据：Integration idea: Screenpipe for screen/audio context",
            "user_impact": "可能增加新用户试用和生产接入成本。"
          },
          {
            "body": "GitHub 社区证据显示该项目存在一个安装相关的待验证问题：[Bug]: RuntimeError: There is no current event loop in thread 'MainThread' when using Brain.from_files() in script",
            "category": "安装坑",
            "evidence": [
              "community_evidence:github | cevd_e3091b23b6184dffb982e0cc494134fd | https://github.com/QuivrHQ/quivr/issues/3650 | 来源讨论提到 python 相关条件，需在安装/试用前复核。"
            ],
            "severity": "medium",
            "suggested_check": "来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。",
            "title": "来源证据：[Bug]: RuntimeError: There is no current event loop in thread 'MainThread' when using Brain.from_files() in script",
            "user_impact": "可能阻塞安装或首次运行。"
          },
          {
            "body": "README/documentation is current enough for a first validation pass.",
            "category": "能力坑",
            "evidence": [
              "capability.assumptions | github_repo:640079149 | https://github.com/QuivrHQ/quivr | README/documentation is current enough for a first validation pass."
            ],
            "severity": "medium",
            "suggested_check": "将假设转成下游验证清单。",
            "title": "能力判断依赖假设",
            "user_impact": "假设不成立时，用户拿不到承诺的能力。"
          },
          {
            "body": "GitHub 社区证据显示该项目存在一个运行相关的待验证问题：The garbage collector is trying to clean up non-checked-in connection <AdaptedConnection <asyncpg.connection.Connection object at 0x7f66a5b06110>>, which will…",
            "category": "运行坑",
            "evidence": [
              "community_evidence:github | cevd_e6fdde799a2b4f53806121190979025a | https://github.com/QuivrHQ/quivr/issues/3654 | 来源讨论提到 python 相关条件，需在安装/试用前复核。"
            ],
            "severity": "medium",
            "suggested_check": "来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。",
            "title": "来源证据：The garbage collector is trying to clean up non-checked-in connection <AdaptedConnection <asyncpg.connection.Connection…",
            "user_impact": "可能增加新用户试用和生产接入成本。"
          },
          {
            "body": "GitHub 社区证据显示该项目存在一个运行相关的待验证问题：core: v0.0.25",
            "category": "运行坑",
            "evidence": [
              "community_evidence:github | cevd_7c6096fb5a6442c7b53068a8dd8e2c78 | https://github.com/QuivrHQ/quivr/releases/tag/core-0.0.25 | 来源类型 github_release 暴露的待验证使用条件。"
            ],
            "severity": "medium",
            "suggested_check": "来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。",
            "title": "来源证据：core: v0.0.25",
            "user_impact": "可能增加新用户试用和生产接入成本。"
          },
          {
            "body": "GitHub 社区证据显示该项目存在一个运行相关的待验证问题：core: v0.0.29",
            "category": "运行坑",
            "evidence": [
              "community_evidence:github | cevd_7eeeeb626a10476e820fcd651727a55a | https://github.com/QuivrHQ/quivr/releases/tag/core-0.0.29 | 来源讨论提到 node 相关条件，需在安装/试用前复核。"
            ],
            "severity": "medium",
            "suggested_check": "来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。",
            "title": "来源证据：core: v0.0.29",
            "user_impact": "可能增加新用户试用和生产接入成本。"
          },
          {
            "body": "GitHub 社区证据显示该项目存在一个运行相关的待验证问题：core: v0.0.33",
            "category": "运行坑",
            "evidence": [
              "community_evidence:github | cevd_d7a735cb41bc44f2abeb148d689f1320 | https://github.com/QuivrHQ/quivr/releases/tag/core-0.0.33 | 来源类型 github_release 暴露的待验证使用条件。"
            ],
            "severity": "medium",
            "suggested_check": "来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。",
            "title": "来源证据：core: v0.0.33",
            "user_impact": "可能增加新用户试用和生产接入成本。"
          },
          {
            "body": "GitHub 社区证据显示该项目存在一个维护/版本相关的待验证问题：core: v0.0.24",
            "category": "维护坑",
            "evidence": [
              "community_evidence:github | cevd_5272fe70cda24220a5aeb994ad06819e | https://github.com/QuivrHQ/quivr/releases/tag/core-0.0.24 | 来源类型 github_release 暴露的待验证使用条件。"
            ],
            "severity": "medium",
            "suggested_check": "来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。",
            "title": "来源证据：core: v0.0.24",
            "user_impact": "可能影响升级、迁移或版本选择。"
          },
          {
            "body": "GitHub 社区证据显示该项目存在一个维护/版本相关的待验证问题：core: v0.0.26",
            "category": "维护坑",
            "evidence": [
              "community_evidence:github | cevd_7f723b616fd446c3be43298d75fd6e8b | https://github.com/QuivrHQ/quivr/releases/tag/core-0.0.26 | 来源类型 github_release 暴露的待验证使用条件。"
            ],
            "severity": "medium",
            "suggested_check": "来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。",
            "title": "来源证据：core: v0.0.26",
            "user_impact": "可能增加新用户试用和生产接入成本。"
          },
          {
            "body": "未记录 last_activity_observed。",
            "category": "维护坑",
            "evidence": [
              "evidence.maintainer_signals | github_repo:640079149 | https://github.com/QuivrHQ/quivr | last_activity_observed missing"
            ],
            "severity": "medium",
            "suggested_check": "补 GitHub 最近 commit、release、issue/PR 响应信号。",
            "title": "维护活跃度未知",
            "user_impact": "新项目、停更项目和活跃项目会被混在一起，推荐信任度下降。"
          },
          {
            "body": "no_demo",
            "category": "安全/权限坑",
            "evidence": [
              "downstream_validation.risk_items | github_repo:640079149 | https://github.com/QuivrHQ/quivr | no_demo; severity=medium"
            ],
            "severity": "medium",
            "suggested_check": "进入安全/权限治理复核队列。",
            "title": "下游验证发现风险项",
            "user_impact": "下游已经要求复核，不能在页面中弱化。"
          },
          {
            "body": "no_demo",
            "category": "安全/权限坑",
            "evidence": [
              "risks.scoring_risks | github_repo:640079149 | https://github.com/QuivrHQ/quivr | no_demo; severity=medium"
            ],
            "severity": "medium",
            "suggested_check": "把风险写入边界卡，并确认是否需要人工复核。",
            "title": "存在评分风险",
            "user_impact": "风险会影响是否适合普通用户安装。"
          },
          {
            "body": "GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：core: v0.0.27",
            "category": "安全/权限坑",
            "evidence": [
              "community_evidence:github | cevd_027338ba02764da6b371970615591265 | https://github.com/QuivrHQ/quivr/releases/tag/core-0.0.27 | 来源类型 github_release 暴露的待验证使用条件。"
            ],
            "severity": "medium",
            "suggested_check": "来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。",
            "title": "来源证据：core: v0.0.27",
            "user_impact": "可能影响授权、密钥配置或安全边界。"
          },
          {
            "body": "issue_or_pr_quality=unknown。",
            "category": "维护坑",
            "evidence": [
              "evidence.maintainer_signals | github_repo:640079149 | https://github.com/QuivrHQ/quivr | issue_or_pr_quality=unknown"
            ],
            "severity": "low",
            "suggested_check": "抽样最近 issue/PR，判断是否长期无人处理。",
            "title": "issue/PR 响应质量未知",
            "user_impact": "用户无法判断遇到问题后是否有人维护。"
          }
        ],
        "source": "ProjectPitfallLog + ProjectHitPacket + validation + community signals",
        "summary": "发现 17 个潜在踩坑项，其中 2 个为 high/blocking；最高优先级：安全/权限坑 - 来源证据：EU AI Act Compliance Scan Results — Sharing Findings for Feedback。",
        "title": "踩坑日志"
      },
      "snapshot": {
        "contributors": 119,
        "forks": 3743,
        "license": "unknown",
        "note": "站点快照，非实时质量证明；用于开工前背景判断。",
        "stars": 39150
      },
      "source_url": "https://github.com/QuivrHQ/quivr",
      "steps": [
        {
          "body": "不安装项目，先体验能力节奏。",
          "code": "preview",
          "title": "先试 Prompt"
        },
        {
          "body": "理解输入、输出、失败模式和边界。",
          "code": "manual",
          "title": "读说明书"
        },
        {
          "body": "把上下文交给宿主 AI 继续工作。",
          "code": "context",
          "title": "带给 AI"
        },
        {
          "body": "进入主力环境前先完成安装入口与风险边界验证。",
          "code": "verify",
          "title": "沙箱验证"
        }
      ],
      "subtitle": "Opiniated RAG for integrating GenAI in your apps 🧠   Focus on your product rather than the RAG. Easy integration in existing products with customisation!  Any LLM: GPT4, Groq, Llama. Any Vectorstore: PGVector, Faiss. Any Files. Anyway you want.",
      "title": "quivr 能力包",
      "trial_prompt": "# quivr - Prompt Preview\n\n> Copy the prompt below into your AI host before installing anything.\n> Its purpose is to let you safely feel the project's workflow, not to claim the project has already run.\n\n## Copy this prompt\n\n```text\nYou are using an independent Doramagic capability pack for QuivrHQ/quivr.\n\nProject:\n- Name: quivr\n- Repository: https://github.com/QuivrHQ/quivr\n- Summary: Opiniated RAG for integrating GenAI in your apps 🧠   Focus on your product rather than the RAG. Easy integration in existing products with customisation!  Any LLM: GPT4, Groq, Llama. Any Vectorstore: PGVector, Faiss. Any Files. Anyway you want.\n- Host target: chatgpt\n\nGoal:\nHelp me evaluate this project for the following task without installing it yet: Opiniated RAG for integrating GenAI in your apps 🧠   Focus on your product rather than the RAG. Easy integration in existing products with customisation!  Any LLM: GPT4, Groq, Llama. Any Vectorstore: PGVector, Faiss. Any Files. Anyway you want.\n\nBefore taking action:\n1. Restate my task, success standard, and boundary.\n2. Identify whether the next step requires tools, browser access, network access, filesystem access, credentials, package installation, or host configuration.\n3. Use only the Doramagic Project Pack, the upstream repository, and the source-linked evidence listed below.\n4. If a real command, install step, API call, file write, or host integration is required, mark it as \"requires post-install verification\" and ask for approval first.\n5. If evidence is missing, say \"evidence is missing\" instead of filling the gap.\n\nPreviewable capabilities:\n- Capability 1: Opiniated RAG for integrating GenAI in your apps 🧠   Focus on your product rather than the RAG. Easy integration in existing products with customisation!  Any LLM: GPT4, Groq, Llama. Any Vectorstore: PGVector, Faiss. Any Files. Anyway you want.\n\nCapabilities that require post-install verification:\n- Capability 1: Use the source-backed project context to guide one small, checkable workflow step.\n\nCore service flow:\n1. page-introduction: Introduction to Quivr. Produce one small intermediate artifact and wait for confirmation.\n2. page-getting-started: Getting Started. Produce one small intermediate artifact and wait for confirmation.\n3. page-installation: Installation. Produce one small intermediate artifact and wait for confirmation.\n4. page-architecture-overview: System Architecture Overview. Produce one small intermediate artifact and wait for confirmation.\n5. page-core-components: Core Components. Produce one small intermediate artifact and wait for confirmation.\n\nSource-backed evidence to keep in mind:\n- https://github.com/QuivrHQ/quivr\n- https://github.com/QuivrHQ/quivr#readme\n- README.md\n- core/quivr_core/__init__.py\n- core/README.md\n- examples/simple_question/simple_question.py\n- examples/simple_question/simple_question_streaming.py\n- core/pyproject.toml\n- examples/chatbot/pyproject.toml\n- core/quivr_core/brain/brain.py\n\nFirst response rules:\n1. Start Step 1 only.\n2. Explain the one service action you will perform first.\n3. Ask exactly three questions about my target workflow, success standard, and sandbox boundary.\n4. Stop and wait for my answers.\n\nStep 1 follow-up protocol:\n- After I answer the first three questions, stay in Step 1.\n- Produce six parts only: clarified task, success standard, boundary conditions, two or three options, tradeoffs for each option, and one recommendation.\n- End by asking whether I confirm the recommendation.\n- Do not move to Step 2 until I explicitly confirm.\n\nConversation rules:\n- Advance one step at a time and wait for confirmation after each small artifact.\n- Write outputs as recommendations or planned checks, not as completed execution.\n- Do not claim tests passed, files changed, commands ran, APIs were called, or the project was installed.\n- If the user asks for execution, first provide the sandbox setup, expected output, rollback, and approval checkpoint.\n```\n",
      "voices": [
        {
          "body": "来源平台：github。github/github_issue: [Bug]:（https://github.com/QuivrHQ/quivr/issues/2004）；github/github_issue: Integration idea: Screenpipe for screen/audio context（https://github.com/QuivrHQ/quivr/issues/3658）；github/github_issue: [Bug]: RuntimeError: There is no current event loop in thread 'MainThrea（https://github.com/QuivrHQ/quivr/issues/3650）；github/github_issue: EU AI Act Compliance Scan Results — Sharing Findings for Feedback（https://github.com/QuivrHQ/quivr/issues/3667）；github/github_issue: The garbage collector is trying to clean up non-checked-in connection <A（https://github.com/QuivrHQ/quivr/issues/3654）；github/github_release: core: v0.0.33（https://github.com/QuivrHQ/quivr/releases/tag/core-0.0.33）；github/github_release: core: v0.0.29（https://github.com/QuivrHQ/quivr/releases/tag/core-0.0.29）；github/github_release: core: v0.0.27（https://github.com/QuivrHQ/quivr/releases/tag/core-0.0.27）；github/github_release: core: v0.0.26（https://github.com/QuivrHQ/quivr/releases/tag/core-0.0.26）；github/github_release: core: v0.0.25（https://github.com/QuivrHQ/quivr/releases/tag/core-0.0.25）；github/github_release: core: v0.0.24（https://github.com/QuivrHQ/quivr/releases/tag/core-0.0.24）。这些是项目级外部声音，不作为单独质量证明。",
          "items": [
            {
              "kind": "github_issue",
              "source": "github",
              "title": "[Bug]:",
              "url": "https://github.com/QuivrHQ/quivr/issues/2004"
            },
            {
              "kind": "github_issue",
              "source": "github",
              "title": "Integration idea: Screenpipe for screen/audio context",
              "url": "https://github.com/QuivrHQ/quivr/issues/3658"
            },
            {
              "kind": "github_issue",
              "source": "github",
              "title": "[Bug]: RuntimeError: There is no current event loop in thread 'MainThrea",
              "url": "https://github.com/QuivrHQ/quivr/issues/3650"
            },
            {
              "kind": "github_issue",
              "source": "github",
              "title": "EU AI Act Compliance Scan Results — Sharing Findings for Feedback",
              "url": "https://github.com/QuivrHQ/quivr/issues/3667"
            },
            {
              "kind": "github_issue",
              "source": "github",
              "title": "The garbage collector is trying to clean up non-checked-in connection <A",
              "url": "https://github.com/QuivrHQ/quivr/issues/3654"
            },
            {
              "kind": "github_release",
              "source": "github",
              "title": "core: v0.0.33",
              "url": "https://github.com/QuivrHQ/quivr/releases/tag/core-0.0.33"
            },
            {
              "kind": "github_release",
              "source": "github",
              "title": "core: v0.0.29",
              "url": "https://github.com/QuivrHQ/quivr/releases/tag/core-0.0.29"
            },
            {
              "kind": "github_release",
              "source": "github",
              "title": "core: v0.0.27",
              "url": "https://github.com/QuivrHQ/quivr/releases/tag/core-0.0.27"
            },
            {
              "kind": "github_release",
              "source": "github",
              "title": "core: v0.0.26",
              "url": "https://github.com/QuivrHQ/quivr/releases/tag/core-0.0.26"
            },
            {
              "kind": "github_release",
              "source": "github",
              "title": "core: v0.0.25",
              "url": "https://github.com/QuivrHQ/quivr/releases/tag/core-0.0.25"
            },
            {
              "kind": "github_release",
              "source": "github",
              "title": "core: v0.0.24",
              "url": "https://github.com/QuivrHQ/quivr/releases/tag/core-0.0.24"
            }
          ],
          "status": "已收录 11 条来源",
          "title": "社区讨论"
        }
      ]
    },
    "homepage_card": {
      "category": "信息检索与知识管理",
      "desc": "Opiniated RAG for integrating GenAI in your apps 🧠   Focus on your product rather than the RAG. Easy integration in existing products with customisation!  Any LLM: GPT4, Groq, Llama. Any Vectorstore: PGVector, Faiss. Any Files. Anyway you want.",
      "effort": "安装已验证",
      "forks": 3743,
      "icon": "search",
      "name": "quivr 能力包",
      "risk": "可发布",
      "slug": "quivr",
      "stars": 39150,
      "tags": [
        "知识检索",
        "知识库问答",
        "结构化数据提取",
        "节点式流程编排",
        "开源工具"
      ],
      "thumb": "blue",
      "type": "MCP 配置"
    },
    "manual": {
      "markdown": "# https://github.com/QuivrHQ/quivr Project Manual\n\nGenerated on: 2026-05-21 19:22:47 UTC\n\n## Table of Contents\n\n- [Introduction to Quivr](#page-introduction)\n- [Getting Started](#page-getting-started)\n- [Installation](#page-installation)\n- [System Architecture Overview](#page-architecture-overview)\n- [Core Components](#page-core-components)\n- [Brain Class](#page-brain-class)\n- [RAG Implementation](#page-rag-implementation)\n- [LLM Integration](#page-llm-integration)\n- [File Processing and Parsers](#page-file-processing)\n- [Storage System](#page-storage)\n\n<a id='page-introduction'></a>\n\n## Introduction to Quivr\n\n### Related Pages\n\nRelated topics: [Getting Started](#page-getting-started), [System Architecture Overview](#page-architecture-overview)\n\n<details>\n<summary>Relevant source files</summary>\n\nThe following source files were used to generate this page:\n\n- [README.md](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n- [core/README.md](https://github.com/QuivrHQ/quivr/blob/main/core/README.md)\n- [core/quivr_core/processor/processor_base.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/processor_base.py)\n- [core/quivr_core/processor/implementations/simple_txt_processor.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/implementations/simple_txt_processor.py)\n- [examples/simple_question/simple_question.py](https://github.com/QuivrHQ/quivr/blob/main/examples/simple_question/simple_question.py)\n- [core/CHANGELOG.md](https://github.com/QuivrHQ/quivr/blob/main/core/CHANGELOG.md)\n</details>\n\n# Introduction to Quivr\n\nQuivr is an open-source project that helps you build your \"second brain\" by leveraging the power of Generative AI. It provides a Retrieval-Augmented Generation (RAG) framework that enables users to ingest documents and ask questions about their content using natural language. Source: [README.md](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n\nThe core philosophy of Quivr is to handle all the complexity of RAG implementations so developers can focus on their products. It provides an opinionated, fast, and efficient RAG system that supports multiple file types, various LLM providers, and customizable workflows. Source: [README.md](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n\n## Key Features\n\nQuivr offers several distinctive capabilities that make it a powerful choice for building document-aware AI applications:\n\n| Feature | Description |\n|---------|-------------|\n| **Opinionated RAG** | Pre-configured RAG pipeline optimized for speed and efficiency |\n| **Multi-LLM Support** | Works with OpenAI, Anthropic, Mistral, Gemma, and local models via Ollama |\n| **Any File Type** | Supports PDF, TXT, Markdown, and custom parsers |\n| **Customizable Workflows** | Extend RAG with internet search, tools, and custom processing |\n| **Megaparse Integration** | Optional integration with Megaparse for advanced document ingestion |\n| **Language Detection** | Automatic detection of document language for better processing |\n\nSource: [README.md](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n\n## Architecture Overview\n\nQuivr follows a modular architecture with the `quivr-core` package as its central component. The architecture is designed around a workflow-based system where different processing nodes are connected to form a complete RAG pipeline.\n\n```mermaid\ngraph TD\n    A[User Input] --> B[Brain.ask]\n    B --> C[RetrievalConfig]\n    C --> D[Workflow Engine]\n    D --> E[Processing Nodes]\n    E --> F[LLM Response]\n    \n    G[Documents] --> H[Processor]\n    H --> I[Chunks]\n    I --> J[Vector Store]\n    J --> D\n```\n\n### Package Structure\n\nThe main components of the Quivr architecture include:\n\n| Component | Purpose |\n|-----------|---------|\n| `quivr_core.Brain` | Central class for managing document collections and answering questions |\n| `ProcessorBase` | Abstract base class for document processors |\n| `RetrievalConfig` | Configuration for retrieval and workflow settings |\n| `QuivrFile` | File wrapper for document handling |\n| `ProcessedDocument` | Container for processed document chunks |\n\nSource: [core/README.md](https://github.com/QuivrHQ/quivr/blob/main/core/README.md)\n\n## Getting Started\n\n### Prerequisites\n\nBefore installing Quivr, ensure you have:\n\n- Python 3.10 or newer\n- An API key for your chosen LLM provider (OpenAI, Anthropic, or Mistral)\n\nSource: [README.md](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n\n### Installation\n\nInstall the `quivr-core` package using pip:\n\n```bash\npip install quivr-core\n```\n\nSource: [README.md](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n\n### Quick Start Example\n\nThe following example demonstrates creating a simple RAG application with Quivr in approximately 5 lines of code:\n\n```python\nimport tempfile\nfrom quivr_core import Brain\n\nwith tempfile.NamedTemporaryFile(mode=\"w\", suffix=\".txt\") as temp_file:\n    temp_file.write(\"Gold is a liquid of blue-like colour.\")\n    temp_file.flush()\n\n    brain = Brain.from_files(\n        name=\"test_brain\",\n        file_paths=[temp_file.name],\n    )\n\n    answer = brain.ask(\"what is gold? answer in french\")\n    print(\"answer:\", answer)\n```\n\nSource: [examples/simple_question/simple_question.py](https://github.com/QuivrHQ/quivr/blob/main/examples/simple_question/simple_question.py)\n\n## Core Components\n\n### Brain Class\n\nThe `Brain` class is the central interface for interacting with Quivr. It manages document ingestion, storage, and querying.\n\n**Key Methods:**\n\n| Method | Description |\n|--------|-------------|\n| `Brain.from_files()` | Create a brain from one or more files |\n| `brain.ask()` | Ask a question about the ingested documents |\n| `brain.print_info()` | Display information about the brain |\n\n**Typical Workflow:**\n\n```mermaid\ngraph LR\n    A[Create Brain] --> B[from_files]\n    B --> C[Process Documents]\n    C --> D[Store Chunks]\n    D --> E[Ask Questions]\n    E --> F[Get Answers]\n```\n\nSource: [README.md](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n\n### Document Processors\n\nProcessors handle the parsing and chunking of different file types. The `ProcessorBase` abstract class defines the interface that all processors must implement.\n\n```python\nclass ProcessorBase(ABC):\n    @abstractmethod\n    async def process_file_inner(self, file: QuivrFile) -> ProcessedDocument[R]:\n        raise NotImplementedError\n```\n\nSource: [core/quivr_core/processor/processor_base.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/processor_base.py)\n\n**Processing Pipeline:**\n\nDuring document processing, Quivr performs the following operations:\n\n1. Parse the input file using the appropriate processor\n2. Split documents into chunks based on `SplitterConfig`\n3. Add metadata including chunk index, version info, and detected language\n4. Sanitize content by removing null characters and encoding issues\n\nSource: [core/quivr_core/processor/processor_base.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/processor_base.py)\n\n### Simple Txt Processor\n\nThe `SimpleTxtProcessor` is a built-in processor for handling plain text files. It uses recursive character splitting to divide documents into manageable chunks:\n\n```python\ndef recursive_character_splitter(\n    doc: Document, chunk_size: int, chunk_overlap: int\n) -> list[Document]:\n    assert chunk_overlap < chunk_size, \"chunk_overlap is greater than chunk_size\"\n\n    if len(doc.page_content) <= chunk_size:\n        return [doc]\n\n    chunk = Document(page_content=doc.page_content[:chunk_size], metadata=doc.metadata)\n    remaining = Document(\n        page_content=doc.page_content[chunk_size - chunk_overlap :],\n        metadata=doc.metadata,\n    )\n\n    return [chunk] + recursive_character_splitter(remaining, chunk_size, chunk_overlap)\n```\n\nSource: [core/quivr_core/processor/implementations/simple_txt_processor.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/implementations/simple_txt_processor.py)\n\n## Configuration\n\n### Environment Setup\n\nSet your API keys as environment variables before creating a brain:\n\n```python\nimport os\nos.environ[\"OPENAI_API_KEY\"] = \"my_openai_apikey\"\n```\n\nSource: [README.md](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n\n### Retrieval Configuration\n\nThe `RetrievalConfig` class allows customization of the RAG workflow. Configuration can be loaded from YAML files:\n\n```python\nfrom quivr_core.config import RetrievalConfig\n\nconfig_file_name = \"./basic_rag_workflow.yaml\"\nretrieval_config = RetrievalConfig.from_yaml(config_file_name)\n```\n\nSource: [README.md](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n\n### Workflow Configuration\n\nWorkflows are defined using YAML files that specify processing nodes and their connections:\n\n```yaml\nworkflow_config:\n  name: \"standard RAG\"\n  nodes:\n    - name: \"START\"\n      edges: [\"filter_history\"]\n    - name: \"filter_history\"\n      edges: [\"rewrite\"]\n    - name: \"rewrite\"\n      edges: [\"retrieve\"]\n```\n\n**Workflow Node Types:**\n\n| Node | Function |\n|------|----------|\n| START | Entry point for the workflow |\n| filter_history | Filters conversation history |\n| rewrite | Rewrites the query for better retrieval |\n| retrieve | Fetches relevant document chunks |\n\nSource: [README.md](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n\n## Advanced Usage\n\n### Custom Workflow with Rich Console\n\nFor interactive applications, Quivr can be combined with the `rich` library for enhanced console output:\n\n```python\nfrom quivr_core import Brain\nfrom quivr_core.config import RetrievalConfig\nfrom rich.console import Console\nfrom rich.panel import Panel\nfrom rich.prompt import Prompt\n\nbrain = Brain.from_files(\n    name=\"my smart brain\",\n    file_paths=[\"./my_first_doc.pdf\", \"./my_second_doc.txt\"],\n)\n\nconfig_file_name = \"./basic_rag_workflow.yaml\"\nretrieval_config = RetrievalConfig.from_yaml(config_file_name)\n\nconsole = Console()\nwhile True:\n    question = Prompt.ask(\"[bold cyan]Question[/bold cyan]\")\n    if question.lower() == \"exit\":\n        break\n    answer = brain.ask(question, retrieval_config=retrieval_config)\n    console.print(f\"[bold green]Answer[/bold green]: {answer.answer}\")\n```\n\nSource: [README.md](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n\n### Supported LLM Providers\n\nQuivr integrates with multiple LLM providers:\n\n| Provider | Model Support | Configuration |\n|----------|---------------|---------------|\n| OpenAI | GPT-4, GPT-3.5 | `OPENAI_API_KEY` |\n| Anthropic | Claude family | `ANTHROPIC_API_KEY` |\n| Mistral | Mistral models | `MISTRAL_API_KEY` |\n| Ollama | Local models | `OLLAMA_BASE_URL` |\n\nSource: [README.md](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n\n## Version History\n\nQuivr follows semantic versioning for the `quivr-core` package. Key releases include:\n\n| Version | Date | Key Changes |\n|---------|------|-------------|\n| 0.0.27 | 2024-12-16 | Max context tokens enforcement, megaparse SDK integration |\n| 0.0.19 | 2024-10-21 | Beginning of quivr-core development |\n| 0.0.13 | 2024-08-01 | Added parsers and tox tests |\n| 0.0.2 | 2024-07-09 | Initial quivr-core package release |\n\nSource: [core/CHANGELOG.md](https://github.com/QuivrHQ/quivr/blob/main/core/CHANGELOG.md)\n\n## Documentation and Community\n\nAdditional resources for learning and contributing to Quivr:\n\n| Resource | Description |\n|----------|-------------|\n| [Official Documentation](https://core.quivr.com/) | Comprehensive guides and API reference |\n| [GitHub Issues](https://github.com/quivrhq/quivr/issues) | Bug reports and feature requests |\n| [Discord Community](https://discord.gg/HUpRgp2HG8) | Real-time support and discussions |\n| [Good First Issues](https://github.com/quivrhq/quivr/issues?q=is%3Aopen+is%3Aissue+label%3A%22good+first+issue%22) | Beginner-friendly contribution opportunities |\n\nSource: [README.md](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n\n## License\n\nQuivr is licensed under the Apache 2.0 License, making it freely available for commercial and personal use. Source: [core/README.md](https://github.com/QuivrHQ/quivr/blob/main/core/README.md)\n\n---\n\n<a id='page-getting-started'></a>\n\n## Getting Started\n\n### Related Pages\n\nRelated topics: [Introduction to Quivr](#page-introduction), [Installation](#page-installation)\n\n<details>\n<summary>Relevant source files</summary>\n\nThe following source files were used to generate this page:\n\n- [README.md](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n- [core/README.md](https://github.com/QuivrHQ/quivr/blob/main/core/README.md)\n- [examples/simple_question/simple_question.py](https://github.com/QuivrHQ/quivr/blob/main/examples/simple_question/simple_question.py)\n- [core/quivr_core/brain/brain.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/brain/brain.py)\n- [core/quivr_core/files/file.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/files/file.py)\n- [core/quivr_core/processor/processor_base.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/processor_base.py)\n- [examples/pdf_parsing_tika.py](https://github.com/QuivrHQ/quivr/blob/main/examples/pdf_parsing_tika.py)\n</details>\n\n# Getting Started\n\n## Overview\n\nQuivr is an open-source RAG (Retrieval-Augmented Generation) framework that enables developers to build AI-powered \"second brain\" applications. The `quivr-core` package provides the core RAG functionality, allowing users to ingest documents and query them using large language models. Source: [README.md:1-15](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n\nThe framework is designed to be **opinionated, fast, and efficient**, abstracting away the complexity of document processing, vector storage, and LLM integration so developers can focus on building their products. Source: [README.md:27-30](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n\n## Prerequisites\n\nBefore getting started with Quivr, ensure your environment meets the following requirements:\n\n| Requirement | Version | Description |\n|-------------|---------|-------------|\n| Python | 3.10+ | The programming language runtime |\n| pip | Latest | Package installer for Python |\n| API Key | - | Required for cloud LLM providers (OpenAI, Anthropic, Mistral) |\n\nSource: [README.md:44-50](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n\n### Supported LLM Providers\n\nQuivr supports multiple LLM providers:\n\n| Provider | API Type | Notes |\n|----------|----------|-------|\n| OpenAI | API Key | Set `OPENAI_API_KEY` environment variable |\n| Anthropic | API Key | Supports Claude models |\n| Mistral | API Key | Supports Mistral AI models |\n| Ollama | Local | For running models locally |\n\nSource: [README.md:58-62](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n\n## Installation\n\n### Package Installation\n\nInstall the core package using pip:\n\n```bash\npip install quivr-core\n```\n\nSource: [README.md:53-55](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n\n### Verify Installation\n\nTo verify the installation worked correctly, you can import the package:\n\n```python\nfrom quivr_core import Brain\nprint(\"Quivr-core installed successfully!\")\n```\n\n## Core Concepts\n\n### Brain\n\nThe `Brain` is the central entity in Quivr that manages document ingestion and querying. It encapsulates:\n\n- **LLM Integration**: The language model used for generating answers\n- **Embedder**: The embedding model for vectorizing documents\n- **Vector Database**: Storage for document embeddings and retrieval\n- **File Processors**: Components that parse various file formats\n\nSource: [core/quivr_core/brain/brain.py:1-50](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/brain/brain.py)\n\n### QuivrFile\n\nFiles are represented as `QuivrFile` objects with the following attributes:\n\n| Attribute | Type | Description |\n|-----------|------|-------------|\n| `id` | UUID | Unique identifier for the file |\n| `brain_id` | UUID | ID of the brain this file belongs to |\n| `path` | Path | File system path |\n| `original_filename` | str | Original filename |\n| `file_size` | int | File size in bytes |\n| `file_extension` | FileExtension | File type enum |\n| `file_sha1` | str | SHA1 hash for deduplication |\n| `additional_metadata` | dict | Custom metadata |\n\nSource: [core/quivr_core/files/file.py:50-70](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/files/file.py)\n\n### Processor Pipeline\n\nDocuments go through a processing pipeline that:\n\n1. **Parses** the file content based on file type\n2. **Splits** content into manageable chunks\n3. **Detects language** for each chunk\n4. **Embeds** chunks for vector storage\n5. **Stores** in the vector database\n\nSource: [core/quivr_core/processor/processor_base.py:40-60](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/processor_base.py)\n\n## Quick Start Guide\n\n### Minimal Example (5 Lines of Code)\n\nThe fastest way to get started with Quivr:\n\n```python\nimport tempfile\nfrom quivr_core import Brain\n\nwith tempfile.NamedTemporaryFile(mode=\"w\", suffix=\".txt\") as temp_file:\n    temp_file.write(\"Gold is a liquid of blue-like colour.\")\n    temp_file.flush()\n\n    brain = Brain.from_files(\n        name=\"test_brain\",\n        file_paths=[temp_file.name],\n    )\n\n    answer = brain.ask(\"what is gold? answer in french\")\n    print(\"answer:\", answer)\n```\n\nSource: [examples/simple_question/simple_question.py:1-20](https://github.com/QuivrHQ/quivr/blob/main/examples/simple_question/simple_question.py)\n\n### Interactive Chat Example\n\nFor a more complete example with a console-based chat interface:\n\n```python\nimport os\nos.environ[\"OPENAI_API_KEY\"] = \"your-api-key-here\"\n\nfrom quivr_core import Brain\nfrom quivr_core.config import RetrievalConfig\n\nbrain = Brain.from_files(\n    name=\"my_smart_brain\",\n    file_paths=[\"./document.pdf\", \"./notes.txt\"],\n)\n\nanswer = brain.ask(\n    \"What is the main topic of these documents?\",\n    retrieval_config=RetrievalConfig()\n)\nprint(answer.answer)\n```\n\nSource: [README.md:85-100](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n\n### PDF Processing Example\n\nFor processing PDF files with custom LLM configuration:\n\n```python\nfrom langchain_core.embeddings import DeterministicFakeEmbedding\nfrom langchain_core.language_models import FakeListChatModel\nfrom quivr_core import Brain\nfrom quivr_core.rag.entities.config import LLMEndpointConfig\nfrom quivr_core.llm.llm_endpoint import LLMEndpoint\n\nbrain = Brain.from_files(\n    name=\"test_brain\",\n    file_paths=[\"tests/processor/data/dummy.pdf\"],\n    llm=LLMEndpoint(\n        llm=FakeListChatModel(responses=[\"good\"]),\n        llm_config=LLMEndpointConfig(model=\"fake_model\", llm_base_url=\"local\"),\n    ),\n    embedder=DeterministicFakeEmbedding(size=20),\n)\n\nanswer = brain.ask(\"What is this document about?\")\nprint(answer.answer)\n```\n\nSource: [examples/pdf_parsing_tika.py:1-25](https://github.com/QuivrHQ/quivr/blob/main/examples/pdf_parsing_tika.py)\n\n## Workflow Architecture\n\n### Basic RAG Workflow\n\nThe following diagram illustrates the basic RAG workflow in Quivr:\n\n```mermaid\ngraph TD\n    A[User Query] --> B[Filter History]\n    B --> C[Query Rewrite]\n    C --> D[Retrieval]\n    D --> E[LLM Generation]\n    E --> F[Response]\n    \n    G[Document Ingestion] --> H[File Processing]\n    H --> I[Chunking]\n    I --> J[Embedding]\n    J --> K[Vector Storage]\n    \n    D --> K\n```\n\n### Data Flow\n\n```mermaid\ngraph LR\n    A[Files] -->|Parse| B[Documents]\n    B -->|Chunk| C[Chunks]\n    C -->|Embed| D[Vectors]\n    D -->|Store| E[Vector DB]\n    \n    F[Query] -->|Embed| G[Query Vector]\n    G -->|Search| E\n    E -->|Results| H[Context]\n    H -->|Generate| I[Answer]\n```\n\n## Configuration\n\n### Environment Variables\n\nConfigure your API keys as environment variables:\n\n```bash\nexport OPENAI_API_KEY=\"your-openai-key\"\nexport ANTHROPIC_API_KEY=\"your-anthropic-key\"  # Optional\nexport MISTRAL_API_KEY=\"your-mistral-key\"       # Optional\n```\n\n### Custom Retrieval Configuration\n\nCreate a YAML configuration file for customized retrieval strategies:\n\n```yaml\nworkflow_config:\n  name: \"standard RAG\"\n  nodes:\n    - name: \"START\"\n      edges: [\"filter_history\"]\n    - name: \"filter_history\"\n      edges: [\"rewrite\"]\n    - name: \"rewrite\"\n      edges: [\"retrieval\"]\n    - name: \"retrieval\"\n      edges: [\"generation\"]\n    - name: \"generation\"\n      edges: [\"END\"]\n  llm:\n    temperature: 0.7\n```\n\nSource: [README.md:65-85](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n\n### RetrievalConfig Usage\n\n```python\nfrom quivr_core.config import RetrievalConfig\n\n# Load from YAML file\nretrieval_config = RetrievalConfig.from_yaml(\"./basic_rag_workflow.yaml\")\n\n# Use with brain.ask()\nanswer = brain.ask(\n    question=\"Your question here\",\n    retrieval_config=retrieval_config\n)\n```\n\n## Supported File Types\n\nQuivr works with various file formats through pluggable processors:\n\n| File Type | Extension | Processor |\n|-----------|-----------|-----------|\n| Plain Text | `.txt` | SimpleTxtProcessor |\n| PDF | `.pdf` | TikaProcessor |\n| Markdown | `.md` | MarkdownProcessor |\n| CSV | `.csv` | CSVProcessor |\n| JSON | `.json` | JSONProcessor |\n\nSource: [README.md:31-35](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n\n## Advanced Usage\n\n### Streaming Responses\n\nFor real-time response streaming:\n\n```python\nfrom quivr_core import Brain\n\nbrain = Brain.from_files(\n    name=\"streaming_brain\",\n    file_paths=[\"./documents/\"],\n)\n\nfor chunk in brain.answer_astream(\"Explain the findings\"):\n    if chunk.last_chunk:\n        break\n    print(chunk.answer, end=\"\", flush=True)\n```\n\n### Custom Processors\n\nImplement your own processor by extending `ProcessorBase`:\n\n```python\nfrom quivr_core.processor.processor_base import ProcessorBase, ProcessedDocument\n\nclass CustomProcessor(ProcessorBase):\n    supported_extensions = [\".custom\"]\n    \n    async def process_file_inner(self, file: QuivrFile) -> ProcessedDocument:\n        # Implement custom parsing logic\n        pass\n```\n\n## Project Structure\n\n```\nquivr/\n├── README.md                    # Main documentation\n├── core/\n│   ├── README.md               # quivr-core package info\n│   └── quivr_core/\n│       ├── brain/\n│       │   └── brain.py        # Brain class implementation\n│       ├── files/\n│       │   └── file.py         # QuivrFile dataclass\n│       ├── processor/\n│       │   ├── processor_base.py\n│       │   └── implementations/\n│       └── rag/\n│           ├── quivr_rag.py    # RAG implementation\n│           └── prompts.py      # LLM prompts\n├── examples/\n│   ├── simple_question/        # Basic usage examples\n│   ├── chatbot/                # Chainlit chatbot example\n│   └── pdf_parsing_tika.py     # PDF processing example\n```\n\n## Next Steps\n\nAfter completing the Getting Started guide:\n\n1. **Explore Examples**: Check the `examples/` directory for more use cases\n2. **Read Documentation**: Visit [core.quivr.com](https://core.quivr.com/) for full documentation\n3. **Join Community**: Connect with other users on [Discord](https://discord.gg/HUpRgp2HG8)\n4. **Contribute**: Check [open issues](https://github.com/quivrhq/quivr/issues) for contribution opportunities\n\n## Troubleshooting\n\n### Common Issues\n\n| Issue | Solution |\n|-------|----------|\n| ImportError: No module named 'quivr_core' | Run `pip install quivr-core` |\n| API Key not found | Set environment variable before running |\n| File parsing fails | Verify file format and size (max 20MB for examples) |\n| Slow retrieval | Adjust `n_results` parameter or use local embeddings |\n\n### Getting Help\n\n- **Documentation**: [https://core.quivr.com/](https://core.quivr.com/)\n- **Discord**: [Join our community](https://discord.gg/HUpRgp2HG8)\n- **GitHub Issues**: [Report bugs](https://github.com/quivrhq/quivr/issues)\n\n---\n\n<a id='page-installation'></a>\n\n## Installation\n\n### Related Pages\n\nRelated topics: [Getting Started](#page-getting-started), [LLM Integration](#page-llm-integration)\n\n<details>\n<summary>Relevant source files</summary>\n\nThe following source files were used to generate this page:\n\n- [core/README.md](https://github.com/QuivrHQ/quivr/blob/main/core/README.md)\n- [examples/chatbot/README.md](https://github.com/QuivrHQ/quivr/blob/main/examples/chatbot/README.md)\n- [examples/chatbot_voice/README.md](https://github.com/QuivrHQ/quivr/blob/main/examples/chatbot_voice/README.md)\n- [examples/quivr-whisper/README.md](https://github.com/QuivrHQ/quivr/blob/main/examples/quivr-whisper/README.md)\n- [examples/simple_question/simple_question.py](https://github.com/QuivrHQ/quivr/blob/main/examples/simple_question/simple_question.py)\n</details>\n\n# Installation\n\nThis guide covers all aspects of installing and setting up Quivr, including the core package, examples, and required dependencies.\n\n## Overview\n\nQuivr is a RAG (Retrieval-Augmented Generation) framework that enables users to create AI-powered knowledge bases from various file types. The installation process varies depending on your use case:\n\n- **Core package installation** for integrating Quivr into existing Python projects\n- **Example applications** for testing and learning purposes\n- **Development setup** for contributing to the project\n\nSource: [core/README.md]()\n\n## Prerequisites\n\n### System Requirements\n\n| Requirement | Minimum | Recommended |\n|-------------|---------|-------------|\n| Python Version | 3.8+ | 3.10+ |\n| Operating System | Linux, macOS, Windows | Linux/macOS |\n| RAM | 4 GB | 8 GB+ |\n| Disk Space | 500 MB | 1 GB+ |\n\n**Note:** While the core package officially requires Python 3.10 or newer for full compatibility, some examples (such as chatbot_voice) support Python 3.8 or higher.\n\nSource: [core/README.md](), [examples/chatbot_voice/README.md]()\n\n### API Keys\n\nQuivr supports multiple LLM providers. You must configure at least one API key:\n\n| Provider | Environment Variable | Required |\n|----------|---------------------|----------|\n| OpenAI | `OPENAI_API_KEY` | Yes (if using OpenAI) |\n| Anthropic | `ANTHROPIC_API_KEY` | Yes (if using Anthropic) |\n| Mistral | `MISTRAL_API_KEY` | Yes (if using Mistral) |\n\nSource: [core/README.md]()\n\n## Installing the Core Package\n\n### Using pip (Recommended)\n\nThe simplest way to install Quivr Core is via pip:\n\n```bash\npip install quivr-core\n```\n\nVerify the installation by checking that the package is importable:\n\n```python\nfrom quivr_core import Brain\n```\n\nSource: [core/README.md]()\n\n### Package Contents\n\nThe `quivr-core` package includes:\n\n- **Brain class**: The main interface for creating and managing knowledge bases\n- **RAG components**: Retrieval, processing, and answer generation pipelines\n- **Built-in processors**: Support for PDF, TXT, Markdown, and other common formats\n- **Default configurations**: Ready-to-use settings for quick setup\n\nSource: [core/README.md](), [core/quivr_core/processor/processor_base.py]()\n\n## Installation for Examples\n\n### Chatbot Example with Chainlit\n\nThe chatbot example demonstrates file upload and Q&A capabilities using Chainlit.\n\n#### Using rye (Recommended)\n\n```bash\n# Clone or navigate to the chatbot directory\ncd examples/chatbot\n\n# Install dependencies with rye\nrye sync\n\n# Activate the virtual environment\nsource ./venv/bin/activate\n```\n\n#### Using pip\n\n```bash\n# Navigate to the chatbot directory\ncd examples/chatbot\n\n# Install from requirements\npip install -r requirements.txt\n```\n\nSource: [examples/chatbot/README.md]()\n\n### Voice Chatbot Example\n\nThe voice chatbot example adds voice interaction capabilities.\n\n```bash\n# Navigate to the voice chatbot directory\ncd examples/chatbot_voice\n\n# Install dependencies\npip install -r requirements.lock\n```\n\nSource: [examples/chatbot_voice/README.md]()\n\n### Flask-based Example (quivr-whisper)\n\nThis example uses Flask for a web server implementation.\n\n```bash\n# Install Flask and dependencies\npip install flask openai requests python-dotenv\n```\n\nSource: [examples/quivr-whisper/README.md]()\n\n### Simple Question Example\n\nThe simplest example demonstrating basic usage:\n\n```bash\n# Create a Python script with the following content\nimport tempfile\nfrom quivr_core import Brain\n\nbrain = Brain.from_files(\n    name=\"test_brain\",\n    file_paths=[\"your_file.txt\"],\n)\n\nanswer = brain.ask(\"Your question here\")\nprint(answer)\n```\n\nRequires `python-dotenv` for loading environment variables.\n\nSource: [examples/simple_question/simple_question.py]()\n\n## Environment Configuration\n\n### Required Environment Variables\n\nCreate a `.env` file in your project root:\n\n```env\n# LLM API Keys (at least one required)\nOPENAI_API_KEY=your_openai_api_key\n# ANTHROPIC_API_KEY=your_anthropic_key\n# MISTRAL_API_KEY=your_mistral_key\n\n# Optional: For specific integrations\nQUIVR_API_KEY=your_quivr_api_key\nQUIVR_CHAT_ID=your_chat_id\nQUIVR_BRAIN_ID=your_brain_id\nQUIVR_URL=https://api.quivr.app\n```\n\n### Loading Environment Variables\n\nUse `python-dotenv` to load environment variables:\n\n```python\nimport dotenv\ndotenv.load_dotenv()\n```\n\nSource: [examples/quivr-whisper/README.md](), [examples/simple_question/simple_question.py]()\n\n## Installation Flow\n\n```mermaid\ngraph TD\n    A[Start Installation] --> B{Choose Installation Type}\n    \n    B --> C[Core Package Only]\n    B --> D[Examples & Demos]\n    B --> E[Development Setup]\n    \n    C --> F[pip install quivr-core]\n    F --> G[Set API Keys]\n    G --> H[Ready to Integrate]\n    \n    D --> I{Which Example?}\n    \n    I --> J[Chatbot with Chainlit]\n    I --> K[Voice Chatbot]\n    I --> L[Flask App]\n    \n    J --> M[rye sync or pip install]\n    K --> N[pip install -r requirements.lock]\n    L --> O[pip install flask openai requests]\n    \n    M --> P[Run with chainlit run main.py]\n    N --> Q[Run with chainlit run main.py]\n    O --> R[Run with flask run]\n    \n    E --> S[Clone Repository]\n    S --> T[Navigate to core/]\n    T --> U[Install from pyproject.toml]\n    U --> V[Run Tests]\n```\n\n## Verifying Installation\n\n### Quick Verification\n\nAfter installation, verify that Quivr is correctly installed:\n\n```python\nimport quivr_core\nprint(quivr_core.__version__)  # Should print the installed version\n```\n\n### Full Installation Test\n\nCreate a test script to verify all components:\n\n```python\nimport tempfile\nfrom quivr_core import Brain\n\n# Create a temporary test file\nwith tempfile.NamedTemporaryFile(mode=\"w\", suffix=\".txt\", delete=False) as f:\n    f.write(\"Quivr is a RAG framework for building AI knowledge bases.\")\n    temp_path = f.name\n\n# Create a brain from the file\nbrain = Brain.from_files(\n    name=\"test_brain\",\n    file_paths=[temp_path],\n)\n\n# Test the ask function\nanswer = brain.ask(\"What is Quivr?\")\nprint(f\"Answer: {answer}\")\n\n# Print brain info\nbrain.print_info()\n```\n\nSource: [core/README.md](), [examples/simple_question/simple_question.py]()\n\n## Troubleshooting\n\n### Common Issues\n\n| Issue | Cause | Solution |\n|-------|-------|----------|\n| ImportError: No module named 'quivr_core' | Package not installed | Run `pip install quivr-core` |\n| AuthenticationError | Invalid API key | Verify your API key is correct |\n| Version mismatch | Incompatible Python version | Ensure Python 3.10+ is used |\n| File not found | Incorrect file path | Check the file path exists |\n\n### Checking Installed Version\n\nThe Quivr version is automatically tracked in document metadata:\n\n```python\nfrom quivr_core.processor.processor_base import get_version\n\ntry:\n    from importlib.metadata import version\n    qvr_version = version(\"quivr-core\")\nexcept PackageNotFoundError:\n    qvr_version = \"dev\"\n```\n\nSource: [core/quivr_core/processor/processor_base.py]()\n\n## Next Steps\n\nAfter successful installation:\n\n1. **Create your first Brain**: Load documents and create a knowledge base\n2. **Configure RAG**: Customize retrieval strategies using YAML configuration files\n3. **Explore Examples**: Test different example applications to understand capabilities\n4. **Read Documentation**: Visit [core.quivr.com](https://core.quivr.com/) for advanced usage\n\nSource: [core/README.md](), [examples/chatbot/README.md]()\n\n---\n\n<a id='page-architecture-overview'></a>\n\n## System Architecture Overview\n\n### Related Pages\n\nRelated topics: [Core Components](#page-core-components), [Brain Class](#page-brain-class), [RAG Implementation](#page-rag-implementation)\n\n<details>\n<summary>Relevant source files</summary>\n\nThe following source files were used to generate this page:\n\n- [core/quivr_core/brain/brain.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/brain/brain.py)\n- [core/quivr_core/rag/quivr_rag.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/rag/quivr_rag.py)\n- [core/quivr_core/rag/quivr_rag_langgraph.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/rag/quivr_rag_langgraph.py)\n- [core/quivr_core/processor/processor_base.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/processor_base.py)\n- [core/quivr_core/files/file.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/files/file.py)\n- [core/quivr_core/rag/prompts.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/rag/prompts.py)\n</details>\n\n# System Architecture Overview\n\n## Introduction\n\nQuivr is an open-source framework for building RAG (Retrieval Augmented Generation) applications that enables users to create personal \"brains\" from various document types. The system leverages generative AI to provide intelligent question-answering capabilities over user-provided documents.\n\nThe architecture is designed around three core pillars:\n\n- **Document Processing**: Extracting and chunking content from files\n- **Vector Storage**: Storing embeddings for semantic search\n- **LLM-powered Answer Generation**: Using large language models to generate answers based on retrieved context\n\n## High-Level Architecture\n\n```mermaid\ngraph TD\n    A[User Files] --> B[File Processing]\n    B --> C[Chunking & Embedding]\n    C --> D[Vector Database]\n    E[User Query] --> F[Embedding]\n    F --> G[Semantic Search]\n    G --> H[Context Assembly]\n    H --> I[LLM Generation]\n    I --> J[Answer Response]\n    \n    D --> G\n```\n\n## Core Components\n\n### Brain Class\n\nThe `Brain` class is the central orchestrator of the Quivr framework. It manages the entire lifecycle from file ingestion to query answering.\n\n**File**: `core/quivr_core/brain/brain.py`\n\n#### Key Responsibilities\n\n| Responsibility | Description |\n|----------------|-------------|\n| File Ingestion | Processes files through various parsers |\n| Vector Storage | Manages the vector database for embeddings |\n| Query Processing | Handles search and retrieval operations |\n| Answer Generation | Orchestrates LLM-based response generation |\n\n#### Core Methods\n\n| Method | Type | Purpose |\n|--------|------|---------|\n| `from_files` | Synchronous | Create a Brain from file paths |\n| `afrom_files` | Async | Async creation from files |\n| `afrom_langchain_documents` | Async | Create from LangChain Document objects |\n| `asearch` | Async | Search for relevant documents |\n| `ask_streaming` | Async Generator | Stream answers to questions |\n\n#### Initialization Parameters\n\n```python\nclass Brain:\n    def __init__(\n        self,\n        id: UUID,\n        name: str,\n        storage: BrainStorage | None,\n        llm: LLMEndpoint,\n        embedder: Embeddings,\n        vector_db: QuivrVectorStore,\n    )\n```\n\nSource: `core/quivr_core/brain/brain.py:1-100`\n\n### File Processing Pipeline\n\nThe processor system handles extraction and transformation of various file formats.\n\n**File**: `core/quivr_core/processor/processor_base.py`\n\n#### Processing Flow\n\n```mermaid\ngraph LR\n    A[QuivrFile] --> B[process_file]\n    B --> C[process_file_inner]\n    C --> D[ProcessedDocument]\n    D --> E[Metadata Enrichment]\n    E --> F[Chunk Output]\n```\n\n#### Metadata Enrichment\n\nDuring processing, each chunk receives comprehensive metadata:\n\n```python\ndoc.metadata = {\n    \"chunk_index\": idx,\n    \"quivr_core_version\": qvr_version,\n    \"language\": detect_language(text=...),\n    **file.metadata,\n    **doc.metadata,\n    **self.processor_metadata,\n}\n```\n\nSource: `core/quivr_core/processor/processor_base.py:20-35`\n\n### QuivrFile Data Model\n\n**File**: `core/quivr_core/files/file.py`\n\nThe `QuivrFile` class represents uploaded files with their associated metadata.\n\n```python\nclass QuivrFile:\n    __slots__ = [\n        \"id\",\n        \"brain_id\",\n        \"path\",\n        \"original_filename\",\n        \"file_size\",\n        \"file_extension\",\n        \"file_sha1\",\n        \"additional_metadata\",\n    ]\n```\n\n| Field | Type | Description |\n|-------|------|-------------|\n| `id` | UUID | Unique identifier |\n| `brain_id` | UUID | Associated brain identifier |\n| `path` | Path | File system path |\n| `original_filename` | str | Original file name |\n| `file_size` | int | File size in bytes |\n| `file_extension` | FileExtension | File type |\n| `file_sha1` | str | SHA1 hash for deduplication |\n\nSource: `core/quivr_core/files/file.py:1-50`\n\n### Retrieval and Answer Generation\n\n## RAG Architecture\n\nThe RAG (Retrieval Augmented Generation) system combines semantic search with LLM-powered answer generation.\n\n**File**: `core/quivr_core/rag/quivr_rag_langgraph.py`\n\n#### RAG Workflow\n\n```mermaid\ngraph TD\n    A[User Question] --> B[Filter History]\n    B --> C[Query Rewrite]\n    C --> D[Retrieval]\n    D --> E[Context Assembly]\n    E --> F[LLM Generation]\n    F --> G[Streaming Response]\n    \n    H[System Prompt] --> F\n    I[Chat History] --> B\n    J[File Filters] --> D\n```\n\n#### Configuration\n\nThe system uses `RetrievalConfig` for configuring the RAG pipeline:\n\n| Parameter | Type | Default | Description |\n|-----------|------|---------|-------------|\n| `llm_config` | LLMEndpointConfig | Brain's LLM | LLM model configuration |\n| `temperature` | float | 0.7 | Generation temperature |\n| `n_results` | int | 5 | Number of retrieval results |\n\nSource: `core/quivr_core/rag/quivr_rag.py:1-50`\n\n### Streaming Answer Generation\n\nThe system supports streaming responses for real-time answer delivery:\n\n```python\nasync for response in rag_instance.answer_astream(\n    run_id=run_id,\n    question=question,\n    system_prompt=system_prompt or None,\n    history=chat_history,\n    list_files=list_files,\n    metadata=metadata,\n):\n    if not response.last_chunk:\n        yield response\n```\n\nSource: `core/quivr_core/brain/brain.py:150-170`\n\n## Prompt Templates\n\n**File**: `core/quivr_core/rag/prompts.py`\n\nThe system uses structured prompts with multiple context sections:\n\n| Section | Purpose |\n|---------|---------|\n| `user_metadata` | User-specific context |\n| `ticket_metadata` | Query-related metadata |\n| `similar_tickets` | Reference information |\n| `ticket_history` | Conversation history |\n| `additional_information` | Extra context |\n| `client_query` | The actual question |\n\nDefault instructions ensure consistent response quality:\n- Verbosity matching similar responses\n- Proper formatting with paragraphs, bold, italic\n- Language consistency with the query\n- No signature at end of response\n\n## LLM Integration\n\n### Default LLM Configuration\n\nThe system supports multiple LLM providers:\n\n| Provider | Configuration |\n|----------|---------------|\n| OpenAI | `OPENAI_API_KEY` environment variable |\n| Anthropic | Anthropic API support |\n| Mistral | Mistral API support |\n| Ollama | Local model support |\n\nSource: README.md - Configuration section\n\n### Embedder Abstraction\n\nEmbedders are abstracted to support multiple backends:\n\n```python\nif embedder is None:\n    embedder = default_embedder()\n```\n\nVector DB initialization with embeddings:\n\n```python\nif vector_db is None:\n    vector_db = await build_default_vectordb(langchain_documents, embedder)\n```\n\nSource: `core/quivr_core/brain/brain.py:60-70`\n\n## Search Capabilities\n\n### Async Search Method\n\n```python\nasync def asearch(\n    self,\n    query: str | Document,\n    n_results: int = 5,\n    filter: Callable | Dict[str, Any] | None = None,\n    fetch_n_neighbors: int = 20,\n) -> list[SearchResult]\n```\n\n| Parameter | Type | Default | Description |\n|-----------|------|---------|-------------|\n| `query` | str \\| Document | Required | Search query |\n| `n_results` | int | 5 | Number of results |\n| `filter` | Callable \\| Dict | None | Custom filtering |\n| `fetch_n_neighbors` | int | 20 | Extended fetch for re-ranking |\n\nSource: `core/quivr_core/brain/brain.py:70-85`\n\n## State Management\n\n### Brain ID Generation\n\nEach brain receives a unique identifier:\n\n```python\nbrain_id = uuid4()\n```\n\n### Workspace and Chat Context\n\nThe system tracks conversation context:\n\n```python\nmetadata = LangchainMetadata(\n    langfuse_trace_id=str(run_id),\n    langfuse_user_id=str(self.workspace_id),\n    langfuse_session_id=str(self.chat_id),\n)\n```\n\n## Installation and Dependencies\n\n### Core Package\n\n```bash\npip install quivr-core\n```\n\n### Quick Start Example\n\n```python\nfrom quivr_core import Brain\n\nbrain = Brain.from_files(\n    name=\"my_smart_brain\",\n    file_paths=[\"./my_first_doc.pdf\", \"./my_second_doc.txt\"],\n)\n\nanswer = brain.ask(\"What is the main topic?\")\n```\n\n## Summary\n\nThe Quivr architecture implements a modular RAG pipeline where:\n\n1. **Files** are processed and chunked with metadata enrichment\n2. **Embeddings** are generated and stored in a vector database\n3. **Queries** trigger semantic search with configurable filters\n4. **LLMs** generate contextual answers from retrieved content\n5. **Streaming** enables real-time response delivery\n\nThe design prioritizes flexibility through dependency injection, allowing custom LLM providers, embedders, and vector stores while maintaining a consistent API for brain creation and querying.\n\n---\n\n<a id='page-core-components'></a>\n\n## Core Components\n\n### Related Pages\n\nRelated topics: [System Architecture Overview](#page-architecture-overview), [Brain Class](#page-brain-class), [LLM Integration](#page-llm-integration)\n\n<details>\n<summary>Relevant source files</summary>\n\nThe following source files were used to generate this page:\n\n- [core/quivr_core/brain/brain.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/brain/brain.py)\n- [core/quivr_core/rag/quivr_rag.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/rag/quivr_rag.py)\n- [core/quivr_core/rag/entities/config.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/rag/entities/config.py)\n- [core/quivr_core/llm/llm_endpoint.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/llm/llm_endpoint.py)\n- [core/quivr_core/processor/processor_base.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/processor_base.py)\n- [core/quivr_core/processor/implementations/simple_txt_processor.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/implementations/simple_txt_processor.py)\n- [examples/simple_question/simple_question.py](https://github.com/QuivrHQ/quivr/blob/main/examples/simple_question/simple_question.py)\n</details>\n\n# Core Components\n\nQuivr is an open-source RAG (Retrieval-Augmented Generation) framework that enables users to create \"brains\" from various document types and query them using natural language. The core components form the architectural foundation that powers document processing, vector storage, retrieval, and LLM-based answer generation.\n\n## Architecture Overview\n\nQuivr's architecture follows a modular design with clear separation of concerns across several key layers:\n\n```mermaid\ngraph TD\n    A[User Query] --> B[Brain.ask]\n    B --> C[RetrievalConfig]\n    C --> D[QuivrQARAGLangGraph]\n    D --> E[Vector Store Retrieval]\n    E --> F[Context Chunks]\n    F --> G[LLM Generation]\n    G --> H[Streaming Response]\n    \n    I[Files] --> J[Processor]\n    J --> K[ProcessedDocument]\n    K --> L[Vector DB Indexing]\n    L --> E\n    \n    M[LLM Config] --> G\n    N[Embedder] --> L\n```\n\nThe system is built around three primary abstractions:\n\n| Component | Purpose | Key Files |\n|-----------|---------|-----------|\n| **Brain** | Central orchestrator managing files, vectors, and LLM interactions | brain.py |\n| **RAG Engine** | Handles retrieval and answer generation pipeline | quivr_rag.py |\n| **Processors** | Parse and chunk various file formats | processor_base.py |\n\n---\n\n## Brain Class\n\nThe `Brain` class is the central entry point for all Quivr operations. It encapsulates file management, vector storage, and LLM-based question answering.\n\n### Initialization Methods\n\nThe Brain class provides multiple factory methods for creation:\n\n| Method | Description | Use Case |\n|--------|-------------|----------|\n| `from_files()` | Synchronous creation from file paths | Simple scripts, examples |\n| `afrom_files()` | Async creation from file paths | Production async applications |\n| `from_langchain_documents()` | From pre-processed LangChain documents | Advanced integration scenarios |\n| `afrom_langchain_documents()` | Async version | Async workflows with external processing |\n\n### Creating a Brain from Files\n\n```python\nfrom quivr_core import Brain\n\nbrain = Brain.from_files(\n    name=\"my_smart_brain\",\n    file_paths=[\"./my_first_doc.pdf\", \"./my_second_doc.txt\"],\n)\n```\n\nThe `from_files()` method automatically:\n\n1. Processes each file using the appropriate processor based on file extension\n2. Chunks documents according to the configured splitter settings\n3. Generates embeddings using the configured embedder\n4. Stores vectors in the configured vector database\n5. Returns an initialized Brain instance ready for queries\n\nSource: [examples/simple_question/simple_question.py:1-17](https://github.com/QuivrHQ/quivr/blob/main/examples/simple_question/simple_question.py)\n\n### Querying the Brain\n\n```python\nanswer = brain.ask(\n    \"what is gold? answer in french\",\n    retrieval_config=retrieval_config\n)\nprint(\"answer:\", answer.answer)\n```\n\nThe `ask()` method supports:\n\n- Streaming responses via `ask_streaming()`\n- Custom retrieval configurations\n- Chat history management\n- System prompt customization\n- File filtering during retrieval\n\nSource: [core/quivr_core/brain/brain.py:150-200](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/brain/brain.py)\n\n---\n\n## Retrieval Configuration\n\nThe `RetrievalConfig` class controls how documents are retrieved and how the LLM generates responses.\n\n### Configuration Parameters\n\n| Parameter | Type | Default | Description |\n|-----------|------|---------|-------------|\n| `llm_config` | `LLMEndpointConfig` | Brain's default LLM | LLM model and endpoint settings |\n| `n_results` | `int` | `5` | Number of documents to retrieve |\n| `fetch_n_neighbors` | `int` | `20` | Neighbors to fetch for re-ranking |\n| `llm_base_url` | `str` | `None` | Custom LLM endpoint base URL |\n| `llm_api_key` | `str` | Environment var | API key for LLM service |\n\n### YAML Configuration\n\nQuivr supports YAML-based workflow configuration:\n\n```yaml\nworkflow_config:\n  name: \"standard RAG\"\n  nodes:\n    - name: \"START\"\n      edges: [\"filter_history\"]\n    - name: \"filter_history\"\n      edges: [\"rewrite\"]\n    - name: \"rewrite\"\n      edges: [\"retrieval\"]\n```\n\nConfiguration can be loaded from YAML files:\n\n```python\nfrom quivr_core.config import RetrievalConfig\n\nretrieval_config = RetrievalConfig.from_yaml(\"./basic_rag_workflow.yaml\")\n```\n\nSource: [README.md](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n\n---\n\n## LLM Integration\n\nThe LLM layer is abstracted through the `LLMEndpoint` class, providing flexibility to use different LLM providers.\n\n### Supported Providers\n\nQuivr supports multiple LLM providers:\n\n- **OpenAI** (GPT models)\n- **Anthropic** (Claude models)\n- **Mistral**\n- **Local models** via Ollama\n\n### LLM Configuration\n\n```python\nfrom quivr_core.llm.llm_endpoint import LLMEndpoint\nfrom quivr_core.rag.entities.config import LLMEndpointConfig\n\nllm = LLMEndpoint.from_config(\n    config=LLMEndpointConfig(\n        model=\"gpt-4\",\n        llm_base_url=\"https://api.openai.com/v1\",\n        temperature=0.7\n    )\n)\n```\n\n### Embedding Configuration\n\nEmbeddings are generated using the configured embedder. If no embedder is specified, Quivr uses the default embedder:\n\n```python\nif embedder is None:\n    embedder = default_embedder()\n```\n\nThe embedder is critical for:\n\n- Converting document chunks into vector representations\n- Encoding user queries for similarity search\n- Enabling semantic retrieval over the document corpus\n\nSource: [core/quivr_core/llm/llm_endpoint.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/llm/llm_endpoint.py)\n\n---\n\n## File Processing Pipeline\n\nThe processor system handles parsing various file formats and converting them into chunked documents suitable for vector storage.\n\n### Processor Architecture\n\n```mermaid\ngraph LR\n    A[QuivrFile] --> B[ProcessorBase]\n    B --> C[process_file_inner]\n    C --> D[ProcessedDocument]\n    D --> E[Splitter]\n    E --> F[Chunked Documents]\n    F --> G[Metadata Enrichment]\n    G --> H[Vector DB Indexing]\n```\n\n### Processor Base Class\n\nAll processors inherit from `ProcessorBase`, which provides:\n\n- Async file processing interface\n- Automatic metadata enrichment\n- Language detection\n- Chunk indexing\n\n```python\nclass ProcessorBase(ABC, Generic[R]):\n    @abstractmethod\n    async def process_file_inner(self, file: QuivrFile) -> ProcessedDocument[R]:\n        raise NotImplementedError\n```\n\n### Metadata Enrichment\n\nDuring processing, each chunk receives enriched metadata:\n\n```python\ndoc.metadata = {\n    \"chunk_index\": idx,\n    \"quivr_core_version\": qvr_version,\n    \"language\": detect_language(\n        text=doc.page_content,\n        low_memory=True,\n    ).value,\n    **file.metadata,\n    **doc.metadata,\n    **self.processor_metadata,\n}\n```\n\nSource: [core/quivr_core/processor/processor_base.py:40-55](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/processor_base.py)\n\n### Supported Processors\n\n| Processor | Extensions | Description |\n|-----------|------------|-------------|\n| `SimpleTxtProcessor` | `.txt` | Plain text file processing |\n| `PdfProcessor` | `.pdf` | PDF document parsing |\n| `MarkdownProcessor` | `.md` | Markdown file parsing |\n\n### Text File Processing\n\nThe `SimpleTxtProcessor` demonstrates the recursive chunking strategy:\n\n```python\ndef recursive_character_splitter(\n    doc: Document, chunk_size: int, chunk_overlap: int\n) -> list[Document]:\n    if len(doc.page_content) <= chunk_size:\n        return [doc]\n    \n    chunk = Document(page_content=doc.page_content[:chunk_size], metadata=doc.metadata)\n    remaining = Document(\n        page_content=doc.page_content[chunk_size - chunk_overlap:],\n        metadata=doc.metadata,\n    )\n    return [chunk] + recursive_character_splitter(remaining, chunk_size, chunk_overlap)\n```\n\nKey constraints enforced:\n\n- `chunk_overlap` must be less than `chunk_size`\n- Original file names are prepended to chunk content\n- Null characters are removed\n- UTF-8 encoding is enforced\n\nSource: [core/quivr_core/processor/implementations/simple_txt_processor.py:15-30](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/implementations/simple_txt_processor.py)\n\n---\n\n## RAG Engine\n\nThe RAG engine coordinates the retrieval and generation pipeline, transforming user queries into contextual answers.\n\n### Answer Generation Flow\n\n```mermaid\nsequenceDiagram\n    participant User\n    participant Brain\n    participant RAG as QuivrQARAGLangGraph\n    participant VectorDB\n    participant LLM\n    \n    User->>Brain: ask(question)\n    Brain->>RAG: answer_astream()\n    RAG->>VectorDB: search(query, n_results)\n    VectorDB-->>RAG: relevant_chunks\n    RAG->>LLM: generate(context, question)\n    LLM-->>RAG: streaming_response\n    RAG-->>User: yield chunks\n```\n\n### Streaming Responses\n\nThe `answer_streaming()` method returns an async generator:\n\n```python\nasync for chunk in brain.ask_streaming(\"What is the meaning of life?\"):\n    print(chunk.answer)\n```\n\nEach chunk contains:\n\n- `answer`: The partial or complete response text\n- `sources`: Relevant document metadata\n- `last_chunk`: Boolean flag indicating completion\n- `metadata`: Additional context about the generation\n\n### Chat History Integration\n\nThe RAG engine maintains chat history for conversational queries:\n\n```python\nchat_history = self.default_chat if chat_history is None else chat_history\n\nfull_answer = \"\"\nasync for response in rag_instance.answer_astream(\n    question=question,\n    history=chat_history,\n    list_files=list_files,\n    metadata=metadata,\n):\n    # Process streaming chunks\n```\n\nSource: [core/quivr_core/rag/quivr_rag.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/rag/quivr_rag.py)\n\n---\n\n## Brain Serialization and Persistence\n\nBrains can be serialized to disk and reloaded for persistence across sessions.\n\n### Save Operation\n\n```python\nsave_path = await brain.save(\"/home/user/.local/quivr\")\n```\n\n### Load Operation\n\n```python\nbrain_loaded = Brain.load(save_path)\nbrain_loaded.print_info()\n```\n\nSource: [examples/save_load_brain.py](https://github.com/QuivrHQ/quivr/blob/main/examples/save_load_brain.py)\n\n---\n\n## Search Functionality\n\nThe Brain class provides direct search capabilities for retrieving relevant documents without generating LLM responses.\n\n### Search Parameters\n\n| Parameter | Type | Description |\n|-----------|------|-------------|\n| `query` | `str \\| Document` | Search query text or LangChain document |\n| `n_results` | `int` | Number of results to return (default: 5) |\n| `filter` | `Callable \\| Dict \\| None` | Optional metadata filtering |\n| `fetch_n_neighbors` | `int` | Neighbors to fetch for re-ranking (default: 20) |\n\n### Search Return Type\n\nThe `asearch()` method returns a list of `SearchResult` objects containing:\n\n- Matched document chunks\n- Similarity scores\n- Metadata about the source documents\n\n---\n\n## Configuration Summary\n\n### Environment Variables\n\n| Variable | Required | Description |\n|----------|----------|-------------|\n| `OPENAI_API_KEY` | Yes (for OpenAI) | API key for OpenAI models |\n| `ANTHROPIC_API_KEY` | Yes (for Claude) | API key for Anthropic models |\n| `OPENAI_API_BASE` | No | Custom API base URL |\n\n### Default Configuration\n\nWhen no configuration is provided, Quivr uses sensible defaults:\n\n```python\nif llm is None:\n    llm = default_llm()\n\nif embedder is None:\n    embedder = default_embedder()\n\nif vector_db is None:\n    vector_db = await build_default_vectordb(langchain_documents, embedder)\n```\n\nThis ensures that users can get started with minimal configuration while still allowing full customization.\n\n---\n\n## Next Steps\n\n- **Workflows**: Explore YAML-based workflow configurations for advanced retrieval strategies\n- **Custom Processors**: Implement the `ProcessorBase` interface to support additional file formats\n- **LLM Providers**: Configure different LLM providers based on your requirements\n- **Deployment**: Review deployment documentation for production setups\n\n---\n\n<a id='page-brain-class'></a>\n\n## Brain Class\n\n### Related Pages\n\nRelated topics: [RAG Implementation](#page-rag-implementation), [File Processing and Parsers](#page-file-processing), [Storage System](#page-storage)\n\n<details>\n<summary>Relevant source files</summary>\n\nThe following source files were used to generate this page:\n\n- [core/quivr_core/brain/brain.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/brain/brain.py)\n- [core/quivr_core/brain/__init__.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/brain/__init__.py)\n- [examples/save_load_brain.py](https://github.com/QuivrHQ/quivr/blob/main/examples/save_load_brain.py)\n- [examples/simple_question/simple_question.py](https://github.com/QuivrHQ/quivr/blob/main/examples/simple_question/simple_question.py)\n- [examples/pdf_parsing_tika.py](https://github.com/QuivrHQ/quivr/blob/main/examples/pdf_parsing_tika.py)\n- [core/quivr_core/files/file.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/files/file.py)\n</details>\n\n# Brain Class\n\nThe `Brain` class is the central abstraction in Quivr for building Retrieval Augmented Generation (RAG) systems. It serves as the primary interface for creating knowledge bases from documents, performing semantic search, and generating AI-powered answers based on retrieved context.\n\n## Overview\n\nThe Brain class encapsulates:\n\n- **Vector storage** for semantic document indexing\n- **LLM integration** for answer generation\n- **Embedder configuration** for document vectorization\n- **File processing pipeline** for document ingestion\n- **Chat history management** for conversational context\n\nA Brain can be created from files (PDF, TXT, Markdown, etc.) or from LangChain documents, then queried to generate context-aware responses.\n\n## Architecture\n\n```mermaid\ngraph TD\n    A[Files / Documents] --> B[Brain Class]\n    B --> C[Vector DB]\n    B --> D[LLM]\n    B --> E[Embedder]\n    B --> F[Storage]\n    G[Query] --> B\n    B --> H[QuivrQARAGLangGraph]\n    H --> I[Answer]\n    C --> H\n```\n\n## Creating a Brain\n\n### From Files\n\nThe most common way to create a Brain is from a collection of files:\n\n```python\nfrom quivr_core import Brain\n\nbrain = Brain.from_files(\n    name=\"my_smart_brain\",\n    file_paths=[\"./my_first_doc.pdf\", \"./my_second_doc.txt\"],\n)\n```\n\nSource: [examples/simple_question/simple_question.py:1-14](https://github.com/QuivrHQ/quivr/blob/main/examples/simple_question/simple_question.py)\n\n### From LangChain Documents\n\nFor programmatic document creation:\n\n```python\nfrom langchain_core.documents import Document\nfrom quivr_core import Brain\n\ndocuments = [Document(page_content=\"Hello, world!\")]\nbrain = await Brain.afrom_langchain_documents(name=\"My Brain\", langchain_documents=documents)\n```\n\nSource: [core/quivr_core/brain/brain.py:1-150](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/brain/brain.py)\n\n### With Custom LLM and Embedder\n\nOverride default configurations with custom implementations:\n\n```python\nfrom langchain_core.embeddings import DeterministicFakeEmbedding\nfrom langchain_core.language_models import FakeListChatModel\nfrom quivr_core import Brain\nfrom quivr_core.rag.entities.config import LLMEndpointConfig\nfrom quivr_core.llm.llm_endpoint import LLMEndpoint\n\nbrain = Brain.from_files(\n    name=\"test_brain\",\n    file_paths=[\"tests/processor/data/dummy.pdf\"],\n    llm=LLMEndpoint(\n        llm=FakeListChatModel(responses=[\"good\"]),\n        llm_config=LLMEndpointConfig(model=\"fake_model\", llm_base_url=\"local\"),\n    ),\n    embedder=DeterministicFakeEmbedding(size=20),\n)\n```\n\nSource: [examples/pdf_parsing_tika.py:1-20](https://github.com/QuivrHQ/quivr/blob/main/examples/pdf_parsing_tika.py)\n\n## Core Methods\n\n### Asking Questions\n\n#### Synchronous\n\n```python\nanswer = brain.ask(\n    \"what is gold? answer in french\",\n    retrieval_config=retrieval_config\n)\nprint(\"answer:\", answer)\n```\n\n#### Asynchronous Streaming\n\n```python\nasync for chunk in brain.ask_streaming(\"What is the meaning of life?\"):\n    print(chunk.answer)\n```\n\nSource: [core/quivr_core/brain/brain.py:150-250](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/brain/brain.py)\n\n### Search\n\n#### Async Search\n\n```python\nresults = await brain.asearch(\n    query=\"your search query\",\n    n_results=5,\n    filter=None,\n    fetch_n_neighbors=20\n)\n```\n\n| Parameter | Type | Default | Description |\n|-----------|------|---------|-------------|\n| `query` | `str \\| Document` | Required | The search query |\n| `n_results` | `int` | 5 | Number of results to return |\n| `filter` | `Callable \\| Dict \\| None` | None | Optional filter for results |\n| `fetch_n_neighbors` | `int` | 20 | Number of neighbors to fetch |\n\nSource: [core/quivr_core/brain/brain.py:100-120](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/brain/brain.py)\n\n## Storage and Persistence\n\n### Saving a Brain\n\nBrains can be persisted to disk for later reuse:\n\n```python\nsave_path = await brain.save(\"/home/user/.local/quivr\")\n```\n\nSource: [examples/save_load_brain.py:1-22](https://github.com/QuivrHQ/quivr/blob/main/examples/save_load_brain.py)\n\n### Loading a Brain\n\n```python\nbrain_loaded = Brain.load(save_path)\nbrain_loaded.print_info()\n```\n\nSource: [examples/save_load_brain.py:18](https://github.com/QuivrHQ/quivr/blob/main/examples/save_load_brain.py)\n\n## File Processing\n\n### QuivrFile Entity\n\nThe Brain processes files through the `QuivrFile` dataclass:\n\n```python\nclass QuivrFile:\n    __slots__ = [\n        \"id\",\n        \"brain_id\",\n        \"path\",\n        \"original_filename\",\n        \"file_size\",\n        \"file_extension\",\n        \"file_sha1\",\n        \"additional_metadata\",\n    ]\n```\n\nSource: [core/quivr_core/files/file.py:20-35](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/files/file.py)\n\n### File Metadata\n\nDuring processing, each chunk receives metadata including:\n\n| Field | Description |\n|-------|-------------|\n| `chunk_index` | Position of the chunk in the document |\n| `quivr_core_version` | Version of quivr-core used |\n| `language` | Detected language of the content |\n| `original_file_name` | Source filename |\n\nSource: [core/quivr_core/processor/processor_base.py:1-50](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/processor_base.py)\n\n## Retrieval Configuration\n\nThe `RetrievalConfig` controls the RAG pipeline behavior:\n\n```python\nfrom quivr_core.config import RetrievalConfig\n\nconfig_file_name = \"./basic_rag_workflow.yaml\"\nretrieval_config = RetrievalConfig.from_yaml(config_file_name)\n```\n\n### YAML Configuration Example\n\n```yaml\nworkflow_config:\n  name: \"standard RAG\"\n  nodes:\n    - name: \"START\"\n      edges: [\"filter_history\"]\n    - name: \"filter_history\"\n      edges: [\"rewrite\"]\n    - name: \"rewrite\"\n      edges: [...]\n```\n\n## Complete Usage Example\n\n```python\nimport tempfile\nfrom quivr_core import Brain\n\nwith tempfile.NamedTemporaryFile(mode=\"w\", suffix=\".txt\") as temp_file:\n    temp_file.write(\"Gold is a liquid of blue-like colour.\")\n    temp_file.flush()\n\n    brain = Brain.from_files(\n        name=\"test_brain\",\n        file_paths=[temp_file.name],\n    )\n\n    answer = brain.ask(\"what is gold? answer in french\")\n    print(\"answer:\", answer)\n```\n\nSource: [README.md](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n\n## Attributes Summary\n\n| Attribute | Type | Description |\n|-----------|------|-------------|\n| `id` | `UUID` | Unique identifier for the brain |\n| `name` | `str` | Human-readable name |\n| `vector_db` | `VectorStore` | Vector storage for embeddings |\n| `llm` | `LLMEndpoint` | Language model for generation |\n| `embedder` | `Embeddings` | Embedding model for vectorization |\n| `storage` | `BrainStorage` | Persistence layer |\n\n## Module Export\n\nThe Brain class is exported from the main `quivr_core` package:\n\n```python\nfrom quivr_core import Brain\n```\n\nSource: [core/quivr_core/brain/__init__.py:1-5](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/brain/__init__.py)\n\n---\n\n<a id='page-rag-implementation'></a>\n\n## RAG Implementation\n\n### Related Pages\n\nRelated topics: [Brain Class](#page-brain-class)\n\n<details>\n<summary>Relevant source files</summary>\n\nThe following source files were used to generate this page:\n\n- [core/quivr_core/rag/quivr_rag.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/rag/quivr_rag.py)\n- [core/quivr_core/rag/quivr_rag_langgraph.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/rag/quivr_rag_langgraph.py)\n- [core/quivr_core/rag/entities/config.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/rag/entities/config.py)\n- [core/quivr_core/rag/entities/chat.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/rag/entities/chat.py)\n- [core/quivr_core/rag/prompts.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/rag/prompts.py)\n</details>\n\n# RAG Implementation\n\n## Overview\n\nThe RAG (Retrieval-Augmented Generation) implementation in Quivr provides a flexible, opinionated framework for building Retrieval-Augmented Generation pipelines. It combines vector-based document retrieval with Large Language Model (LLM) generation to enable question-answering over uploaded documents and files.\n\nThe implementation supports both synchronous and streaming responses, configurable retrieval strategies, and integration with multiple LLM providers including OpenAI, Anthropic, Mistral, and local models via Ollama.\n\n## Architecture\n\n```mermaid\ngraph TD\n    A[User Query] --> B[Brain.ask]\n    B --> C[RAG Pipeline]\n    C --> D[Retrieval Phase]\n    C --> E[Generation Phase]\n    D --> F[Vector DB Search]\n    F --> G[Relevant Chunks]\n    E --> H[LLM Processing]\n    G --> H\n    H --> I[Streaming Response]\n    I --> J[ParsedRAGChunkResponse]\n```\n\n## Core Components\n\n### Brain Class\n\nThe `Brain` class serves as the main entry point for RAG operations. It orchestrates document processing, retrieval, and generation.\n\n**Key Methods:**\n\n| Method | Description | Return Type |\n|--------|-------------|-------------|\n| `from_files()` | Create a brain from file paths | `Brain` |\n| `afrom_langchain_documents()` | Create a brain from LangChain documents | `Brain` |\n| `ask()` | Synchronous question answering | `RAGResponse` |\n| `asearch()` | Search for relevant documents | `list[SearchResult]` |\n\nSource: [core/quivr_core/brain/brain.py:1-100](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/brain/brain.py)\n\n### RAG Pipeline Flow\n\n```mermaid\nsequenceDiagram\n    participant User\n    participant Brain\n    participant Retriever\n    participant VectorDB\n    participant LLM\n    participant Response\n    \n    User->>Brain: ask(question)\n    Brain->>Retriever: retrieve(query)\n    Retriever->>VectorDB: similarity_search()\n    VectorDB-->>Retriever: relevant_chunks\n    Retriever-->>Brain: context_chunks\n    Brain->>LLM: generate(context, question)\n    LLM-->>Response: ParsedRAGChunkResponse\n    Response-->>User: Streaming Answer\n```\n\n## Streaming Response System\n\n### ParsedRAGChunkResponse\n\nThe streaming response is built around the `ParsedRAGChunkResponse` data class, which provides incremental chunks of the generated answer along with metadata.\n\n**Chunk Structure:**\n\n| Field | Type | Description |\n|-------|------|-------------|\n| `answer` | `str` | The partial or complete answer text |\n| `metadata` | `dict` | Sources and additional context |\n| `last_chunk` | `bool` | Indicates final chunk of response |\n| `sources` | `list` | Retrieved document sources |\n| `chunk_id` | `int` | Sequence number of the chunk |\n\nSource: [core/quivr_core/rag/quivr_rag.py:1-50](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/rag/quivr_rag.py)\n\n### Answer Streaming Implementation\n\nThe `answer_astream` method implements asynchronous streaming of LLM responses:\n\n```python\nasync def answer_astream(self, query: str, ...) -> AsyncGenerator[ParsedRAGChunkResponse]:\n    # Processing logic yields ParsedRAGChunkResponse chunks\n    yield ParsedRAGChunkResponse(\n        answer=\"\",\n        metadata=get_chunk_metadata(rolling_message, sources),\n        last_chunk=True,\n    )\n```\n\n**Streaming Characteristics:**\n- Yields multiple `ParsedRAGChunkResponse` objects during generation\n- Each chunk contains accumulated `rolling_message` content\n- Uses `chunk_id` for tracking sequence order\n- Final chunk marked with `last_chunk=True`\n- Metadata includes retrieved sources for citation\n\nSource: [core/quivr_core/rag/quivr_rag.py:50-100](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/rag/quivr_rag.py)\n\n## Retrieval Configuration\n\n### RetrievalConfig\n\nThe retrieval behavior is controlled through `RetrievalConfig` which supports YAML-based configuration:\n\n```yaml\nworkflow_config:\n  name: \"standard RAG\"\n  nodes:\n    - name: \"START\"\n      edges: [\"filter_history\"]\n    - name: \"filter_history\"\n      edges: [\"rewrite\"]\n    - name: \"rewrite\"\n```\n\nSource: [README.md](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n\n### Configuration Parameters\n\n| Parameter | Type | Default | Description |\n|-----------|------|---------|-------------|\n| `n_results` | `int` | `5` | Number of documents to retrieve |\n| `fetch_n_neighbors` | `int` | `20` | Number of neighbors to fetch from vector DB |\n| `filter` | `Callable \\| Dict` | `None` | Optional metadata filtering |\n\nSource: [core/quivr_core/rag/entities/config.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/rag/entities/config.py)\n\n## Search Operation\n\n### Async Search Method\n\n```python\nasync def asearch(\n    self,\n    query: str | Document,\n    n_results: int = 5,\n    filter: Callable | Dict[str, Any] | None = None,\n    fetch_n_neighbors: int = 20,\n) -> list[SearchResult]:\n```\n\n**Parameters:**\n\n| Parameter | Type | Required | Description |\n|-----------|------|----------|-------------|\n| `query` | `str \\| Document` | Yes | The search query |\n| `n_results` | `int` | No | Maximum results to return |\n| `filter` | `Callable \\| Dict` | No | Metadata filter condition |\n| `fetch_n_neighbors` | `int` | No | Initial fetch size before re-ranking |\n\nSource: [core/quivr_core/brain/brain.py:50-80](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/brain/brain.py)\n\n## Document Processing Pipeline\n\n### Processor Base\n\nThe `processor_base.py` handles document chunking and metadata enrichment:\n\n```python\nasync def process_file(file: QuivrFile) -> ProcessedDocument:\n    docs = await self.process_file_inner(file)\n    qvr_version = version(\"quivr-core\")\n    \n    for idx, doc in enumerate(docs.chunks, start=1):\n        doc.metadata = {\n            \"chunk_index\": idx,\n            \"quivr_core_version\": qvr_version,\n            \"language\": detect_language(text=...).value,\n            **file.metadata,\n            **doc.metadata,\n        }\n```\n\n**Metadata Enrichment:**\n- `chunk_index`: Sequential position of chunk\n- `quivr_core_version`: Version of quivr-core\n- `language`: Auto-detected language of content\n- Original filename embedded in content for reference\n\nSource: [core/quivr_core/processor/processor_base.py:1-50](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/processor_base.py)\n\n## Prompt Templates\n\n### User Prompt Template\n\nThe prompt system uses structured templates with metadata injection:\n\n```\n<user_metadata>\n{user_metadata}\n</user_metadata>\n\n<ticket_metadata>\n{ticket_metadata}\n</ticket_metadata>\n\n<similar_tickets>\n{similar_tickets}\n</similar_tickets>\n\n<ticket_history>\n{ticket_history}\n</ticket_history>\n\n<additional_information>\n{additional_information}\n</additional_information>\n\n<client_query>\n{client_query}\n</client_query>\n```\n\nSource: [core/quivr_core/rag/prompts.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/rag/prompts.py)\n\n### Default Instructions\n\nThe system includes default instructions that guide response generation:\n\n| Instruction | Description |\n|-------------|-------------|\n| Conciseness | Use same level of detail as similar responses |\n| Formatting | Proper paragraphs, bold, italic for readability |\n| Language | Respond in same language as user query |\n| Consistency | Maintain terminology consistency |\n| No Signature | Signature added separately after response |\n\n## LangGraph Integration\n\nThe implementation includes a LangGraph-based RAG pipeline (`quivr_rag_langgraph.py`) for more complex workflows:\n\n```mermaid\ngraph LR\n    A[Query] --> B[History Filter]\n    B --> C[Query Rewrite]\n    C --> D[Retrieval]\n    D --> E[Answer Generation]\n    E --> F[Response]\n```\n\nFeatures:\n- Multi-step processing pipelines\n- Conversation history integration\n- Query rewriting for better retrieval\n- Customizable workflow nodes\n\nSource: [core/quivr_core/rag/quivr_rag_langgraph.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/rag/quivr_rag_langgraph.py)\n\n## LLM Configuration\n\n### LLMEndpointConfig\n\n| Parameter | Type | Description |\n|-----------|------|-------------|\n| `model` | `str` | Model identifier |\n| `llm_base_url` | `str` | API endpoint URL |\n| `temperature` | `float` | Generation temperature (default: 0.7) |\n\nSource: [core/quivr_core/rag/entities/config.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/rag/entities/config.py)\n\n### Supported Providers\n\n- **OpenAI**: GPT-4, GPT-3.5-turbo\n- **Anthropic**: Claude models\n- **Mistral**: Mistral AI models\n- **Ollama**: Local model support\n\n## Usage Example\n\n```python\nfrom quivr_core import Brain\nfrom quivr_core.config import RetrievalConfig\n\n# Create brain from files\nbrain = Brain.from_files(\n    name=\"my_brain\",\n    file_paths=[\"./document.pdf\", \"./notes.txt\"],\n)\n\n# Configure retrieval\nretrieval_config = RetrievalConfig.from_yaml(\"./workflow.yaml\")\n\n# Ask a question\nanswer = brain.ask(\n    \"What is the main topic of these documents?\",\n    retrieval_config=retrieval_config\n)\n\nprint(answer.answer)\n```\n\nSource: [examples/simple_question/simple_question.py](https://github.com/QuivrHQ/quivr/blob/main/examples/simple_question/simple_question.py)\n\n## Key Design Patterns\n\n| Pattern | Implementation |\n|---------|-----------------|\n| Async/Await | All I/O operations are asynchronous |\n| Streaming | Responses streamed via async generators |\n| Dependency Injection | LLM and embedder are configurable |\n| Configuration-driven | Workflows defined via YAML |\n| Metadata Enrichment | Automatic language detection and versioning |\n\n---\n\n<a id='page-llm-integration'></a>\n\n## LLM Integration\n\n### Related Pages\n\nRelated topics: [Core Components](#page-core-components), [System Architecture Overview](#page-architecture-overview)\n\n<details>\n<summary>Relevant source files</summary>\n\nThe following source files were used to generate this page:\n\n- [core/quivr_core/llm/llm_endpoint.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/llm/llm_endpoint.py)\n- [core/quivr_core/rag/entities/config.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/rag/entities/config.py)\n- [core/quivr_core/config.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/config.py)\n- [core/quivr_core/base_config.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/base_config.py)\n- [core/quivr_core/rag/quivr_rag_langgraph.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/rag/quivr_rag_langgraph.py)\n</details>\n\n# LLM Integration\n\n## Overview\n\nThe LLM Integration module in Quivr provides a flexible, abstracted interface for interacting with Large Language Models (LLMs) from various providers. This module enables Quivr's RAG (Retrieval-Augmented Generation) pipeline to leverage different LLM backends without requiring changes to the core business logic. The integration supports OpenAI, Anthropic, Mistral, Meta (Llama), Groq, and local models via Ollama.\n\nThe primary goals of the LLM Integration are:\n\n- **Provider Abstraction**: Uniform API regardless of the underlying LLM provider\n- **Configuration Management**: Centralized configuration for model parameters, token limits, and provider-specific settings\n- **Runtime Flexibility**: Support for both synchronous and asynchronous operations\n- **Embeddings Integration**: Seamless integration with embedding models for semantic search\n\nSource: [core/quivr_core/llm/llm_endpoint.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/llm/llm_endpoint.py)\n\n## Architecture\n\n### High-Level Components\n\nThe LLM Integration consists of several key components:\n\n| Component | Purpose | Location |\n|-----------|---------|----------|\n| `LLMEndpoint` | Main wrapper class for LLM interactions | `quivr_core/llm/llm_endpoint.py` |\n| `LLMEndpointConfig` | Configuration dataclass for LLM settings | `quivr_core/rag/entities/config.py` |\n| `LLMConfig` | Per-model configuration with token limits | `quivr_core/rag/entities/config.py` |\n| `DefaultModelSuppliers` | Enum for supported LLM providers | `quivr_core/rag/entities/config.py` |\n\n### Class Diagram\n\n```mermaid\nclassDiagram\n    class LLMEndpoint {\n        +llm: BaseChatModel\n        +llm_config: LLMEndpointConfig\n        +__init__(llm, llm_config)\n        +get_client() BaseChatModel\n    }\n    \n    class LLMEndpointConfig {\n        +model: str\n        +llm_base_url: str\n        +temperature: float\n        +max_output_tokens: int\n        +model_type: LLMEndpointType\n    }\n    \n    class LLMConfig {\n        +max_context_tokens: int\n        +max_output_tokens: int\n        +tokenizer_hub: str\n    }\n    \n    class DefaultModelSuppliers {\n        <<enumeration>>\n        OPENAI\n        ANTHROPIC\n        MISTRAL\n        META\n        GROQ\n        OLLAMA\n    }\n    \n    LLMEndpoint --> LLMEndpointConfig\n    LLMEndpointConfig ..> DefaultModelSuppliers\n```\n\nSource: [core/quivr_core/llm/llm_endpoint.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/llm/llm_endpoint.py)  \nSource: [core/quivr_core/rag/entities/config.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/rag/entities/config.py)\n\n## Configuration\n\n### LLMEndpointConfig Parameters\n\nThe `LLMEndpointConfig` class provides configuration for LLM endpoints:\n\n| Parameter | Type | Default | Description |\n|-----------|------|---------|-------------|\n| `model` | `str` | Required | Model identifier (e.g., \"gpt-4o\", \"claude-3-opus\") |\n| `llm_base_url` | `str` | `\"local\"` | Base URL for the LLM API endpoint |\n| `temperature` | `float` | `0.7` | Sampling temperature for generation (0.0-2.0) |\n| `max_output_tokens` | `int` | `4096` | Maximum tokens in the generated response |\n| `model_type` | `LLMEndpointType` | `LLMEndpointType.CHAT` | Type of LLM endpoint |\n\nSource: [core/quivr_core/rag/entities/config.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/rag/entities/config.py)\n\n### Default Model Configurations\n\nQuivr provides pre-configured settings for various models through the `DefaultModelSuppliers` enum and associated `LLMConfig` dictionaries:\n\n| Provider | Model | Max Context Tokens | Max Output Tokens | Tokenizer Hub |\n|----------|-------|-------------------|-------------------|---------------|\n| **OpenAI** | gpt-4o | 128,000 | 16,384 | Quivr/claude-tokenizer |\n| **OpenAI** | gpt-4-turbo | 128,000 | 4,096 | Quivr/claude-tokenizer |\n| **Anthropic** | claude-3.5-sonnet | 200,000 | 8,192 | Quivr/claude-tokenizer |\n| **Anthropic** | claude-3-opus | 200,000 | 4,096 | Quivr/claude-tokenizer |\n| **Mistral** | mistral-large | 32,000 | N/A | - |\n| **Meta** | llama-3.1 | 128,000 | 4,096 | Quivr/Meta-Llama-3.1-Tokenizer |\n| **Meta** | llama-3 | 8,192 | 2,048 | Quivr/llama3-tokenizer-new |\n| **Groq** | llama-3.3-70b | 128,000 | 32,768 | Quivr/Meta-Llama-3.1-Tokenizer |\n\nSource: [core/quivr_core/rag/entities/config.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/rag/entities/config.py)\n\n### Environment Variables\n\nAPI keys should be set as environment variables before initializing the LLM:\n\n```bash\nexport OPENAI_API_KEY=\"your-openai-api-key\"\nexport ANTHROPIC_API_KEY=\"your-anthropic-api-key\"\n```\n\nExample usage in code:\n\n```python\nimport os\nos.environ[\"OPENAI_API_KEY\"] = \"myopenai_apikey\"\n```\n\nSource: [README.md](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n\n## Usage Patterns\n\n### Basic Integration with Brain\n\nThe most common pattern is to pass an `LLMEndpoint` instance when creating a Brain:\n\n```python\nfrom langchain_openai import ChatOpenAI\nfrom quivr_core import Brain\nfrom quivr_core.llm.llm_endpoint import LLMEndpoint\nfrom quivr_core.rag.entities.config import LLMEndpointConfig\n\nbrain = Brain.from_files(\n    name=\"my_smart_brain\",\n    file_paths=[\"./documents/*.pdf\"],\n    llm=LLMEndpoint(\n        llm_config=LLMEndpointConfig(model=\"gpt-4o\"),\n        llm=ChatOpenAI(model=\"gpt-4o\", api_key=str(os.getenv(\"OPENAI_API_KEY\"))),\n    ),\n)\n```\n\nSource: [examples/simple_question_megaparse.py](https://github.com/QuivrHQ/quivr/blob/main/examples/simple_question_megaparse.py)\n\n### Using Fake LLM for Testing\n\nFor testing purposes, Quivr supports fake LLM implementations:\n\n```python\nfrom langchain_core.language_models import FakeListChatModel\nfrom quivr_core import Brain\nfrom quivr_core.llm.llm_endpoint import LLMEndpoint\nfrom quivr_core.rag.entities.config import LLMEndpointConfig\nfrom langchain_core.embeddings import DeterministicFakeEmbedding\n\nbrain = Brain.from_files(\n    name=\"test_brain\",\n    file_paths=[\"tests/processor/data/dummy.pdf\"],\n    llm=LLMEndpoint(\n        llm=FakeListChatModel(responses=[\"good\"]),\n        llm_config=LLMEndpointConfig(model=\"fake_model\", llm_base_url=\"local\"),\n    ),\n    embedder=DeterministicFakeEmbedding(size=20),\n)\n```\n\nSource: [examples/pdf_parsing_tika.py](https://github.com/QuivrHQ/quivr/blob/main/examples/pdf_parsing_tika.py)\n\n### Custom Embeddings Integration\n\nWhen using custom LLM configurations, you can also specify custom embedders:\n\n```python\nfrom langchain_openai import ChatOpenAI, OpenAIEmbeddings\nfrom quivr_core import Brain\nfrom quivr_core.llm.llm_endpoint import LLMEndpoint\nfrom quivr_core.rag.entities.config import LLMEndpointConfig\n\n# LLM Configuration\nllm = LLMEndpoint(\n    llm_config=LLMEndpointConfig(model=\"gpt-4o\"),\n    llm=ChatOpenAI(model=\"gpt-4o\", api_key=str(os.getenv(\"OPENAI_API_KEY\"))),\n)\n\n# Embedder Configuration\nembedder = OpenAIEmbeddings(model=\"text-embedding-3-large\")\n\nbrain = Brain.from_files(\n    name=\"custom_brain\",\n    file_paths=[\"./data/documents/\"],\n    llm=llm,\n    embedder=embedder,\n)\n```\n\nSource: [examples/simple_question_megaparse.py](https://github.com/QuivrHQ/quivr/blob/main/examples/simple_question_megaparse.py)\n\n## Supported Providers\n\n### OpenAI\n\nOpenAI models are supported through the `langchain_openai` package:\n\n```python\nfrom langchain_openai import ChatOpenAI\nfrom quivr_core.llm.llm_endpoint import LLMEndpoint\nfrom quivr_core.rag.entities.config import LLMEndpointConfig\n\nllm = LLMEndpoint(\n    llm=ChatOpenAI(model=\"gpt-4o\", api_key=api_key),\n    llm_config=LLMEndpointConfig(\n        model=\"gpt-4o\",\n        temperature=0.7,\n    ),\n)\n```\n\nSupported models: `gpt-4o`, `gpt-4-turbo`, `gpt-3.5-turbo`\n\n### Anthropic\n\nAnthropic models require the `langchain-anthropic` package:\n\n```python\nfrom langchain_anthropic import ChatAnthropic\nfrom quivr_core.llm.llm_endpoint import LLMEndpoint\n\nllm = LLMEndpoint(\n    llm=ChatAnthropic(model=\"claude-3-5-sonnet-20241022\", anthropic_api_key=api_key),\n    llm_config=LLMEndpointConfig(model=\"claude-3.5-sonnet\"),\n)\n```\n\n### Mistral\n\nMistral models are supported via their API:\n\n```python\nfrom langchain_mistralai import ChatMistralAI\n\nllm = LLMEndpoint(\n    llm=ChatMistralAI(model=\"mistral-large-latest\", mistral_api_key=api_key),\n    llm_config=LLMEndpointConfig(model=\"mistral-large\"),\n)\n```\n\n### Local Models (Ollama)\n\nFor local inference using Ollama:\n\n```python\nfrom langchain_ollama import ChatOllama\n\nllm = LLMEndpoint(\n    llm=ChatOllama(model=\"llama3.1\", base_url=\"http://localhost:11434\"),\n    llm_config=LLMEndpointConfig(model=\"llama-3.1\", llm_base_url=\"http://localhost:11434\"),\n)\n```\n\nSource: [README.md](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n\n## RAG Workflow Integration\n\nThe LLM Integration is a core component of Quivr's RAG pipeline. When processing a user query, the LLM is responsible for generating the final answer based on retrieved context.\n\n```mermaid\ngraph TD\n    A[User Query] --> B[Brain.ask]\n    B --> C[Vector Search]\n    C --> D[Retrieve Relevant Chunks]\n    D --> E[LLMEndpoint]\n    E --> F[Generate Answer]\n    F --> G[RAG Response]\n    \n    H[LLMEndpointConfig] --> E\n    I[Chat History] --> E\n    J[System Prompt] --> E\n```\n\nThe LLM receives contextual information from the retrieval step and generates responses that are then formatted and returned to the user. The `quivr_rag_langgraph.py` module orchestrates this workflow using LangGraph for complex graph-based processing.\n\nSource: [core/quivr_core/rag/quivr_rag.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/rag/quivr_rag.py)  \nSource: [core/quivr_core/rag/quivr_rag_langgraph.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/rag/quivr_rag_langgraph.py)\n\n## Default LLM Fallback\n\nIf no LLM is explicitly provided when creating a Brain, Quivr automatically initializes a default LLM:\n\n```python\n# From brain.py source code\nif llm is None:\n    llm = default_llm()\n```\n\nThis fallback mechanism ensures that users can get started quickly without explicit configuration, while still allowing advanced users to customize their LLM setup.\n\nSource: [core/quivr_core/brain/brain.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/brain/brain.py)\n\n## Best Practices\n\n### API Key Security\n\n- Store API keys in environment variables, never hardcode them\n- Use `.env` files with proper `.gitignore` entries for local development\n- Consider using secret management services (AWS Secrets Manager, HashiCorp Vault) for production\n\n### Model Selection\n\n- Choose models based on your use case requirements (speed vs. quality)\n- Use smaller models for simple queries to reduce costs\n- Reserve larger models (e.g., GPT-4o, Claude 3.5) for complex reasoning tasks\n\n### Token Management\n\n- Monitor `max_context_tokens` to avoid exceeding model limits\n- Set appropriate `max_output_tokens` based on expected response length\n- Use the `RetrievalConfig` to control how many chunks are fed to the LLM\n\n### Temperature Settings\n\n| Temperature | Use Case | Characteristics |\n|-------------|----------|-----------------|\n| 0.0 - 0.3 | Factual/Deterministic | More focused, consistent responses |\n| 0.4 - 0.7 | General Purpose | Balanced creativity and consistency |\n| 0.8 - 1.0 | Creative/Brainstorming | More varied, potentially surprising outputs |\n\n## Troubleshooting\n\n### Common Issues\n\n1. **API Key Not Found**\n   ```\n   Error: OPENAI_API_KEY environment variable not set\n   ```\n   Solution: Ensure the API key is set before running the script:\n   ```bash\n   export OPENAI_API_KEY=\"your-key\"\n   ```\n\n2. **Model Context Limit Exceeded**\n   Solution: Reduce the number of retrieved chunks or adjust chunk size in processing\n\n3. **Rate Limiting**\n   Solution: Implement retry logic or use a rate limiting middleware\n\nSource: [README.md](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n\n## See Also\n\n- [Brain Configuration](./brain-configuration.md)\n- [RAG Workflows](./rag-workflows.md)\n- [Embedding Integration](./embedding-integration.md)\n- [Quivr Documentation](https://core.quivr.com/)\n\n---\n\n<a id='page-file-processing'></a>\n\n## File Processing and Parsers\n\n### Related Pages\n\nRelated topics: [Storage System](#page-storage)\n\n<details>\n<summary>Relevant source files</summary>\n\nThe following source files were used to generate this page:\n\n- [core/quivr_core/processor/processor_base.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/processor_base.py)\n- [core/quivr_core/processor/registry.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/registry.py)\n- [core/quivr_core/processor/implementations/default.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/implementations/default.py)\n- [core/quivr_core/processor/implementations/simple_txt_processor.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/implementations/simple_txt_processor.py)\n- [core/quivr_core/processor/implementations/tika_processor.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/implementations/tika_processor.py)\n- [core/quivr_core/processor/implementations/megaparse_processor.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/implementations/megaparse_processor.py)\n</details>\n\n# File Processing and Parsers\n\nQuivr's file processing system provides a extensible architecture for ingesting, parsing, and chunking various document formats for use in Retrieval-Augmented Generation (RAG) workflows. The processor subsystem sits at the core of Quivr's data ingestion pipeline, transforming raw files into structured document chunks ready for embedding and retrieval.\n\n## Architecture Overview\n\nThe file processing architecture follows a plugin-based design pattern where processors are registered by file extension and lazily loaded on demand. This separation of concerns allows new file format support to be added without modifying core processing logic.\n\n```mermaid\ngraph TD\n    A[QuivrFile Input] --> B[Processor Registry]\n    B --> C{File Extension}\n    C -->|.txt| D[SimpleTxtProcessor]\n    C -->|.pdf| E[MegaparseProcessor]\n    C -->|Other| F[Default Processors]\n    D --> G[ProcessedDocument]\n    E --> G\n    F --> G\n    G --> H[Document Chunks]\n    H --> I[RAG Pipeline]\n```\n\n### Core Components\n\n| Component | Purpose | Location |\n|-----------|---------|----------|\n| `ProcessorBase` | Abstract base class for all processors | `processor_base.py` |\n| `ProcessedDocument` | Generic container for processing results | `processor_base.py` |\n| `ProcessorRegistry` | Maps extensions to processor classes | `registry.py` |\n| `SplitterConfig` | Configuration for text chunking | `splitter.py` |\n\nSource: [processor_base.py:1-75](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/processor_base.py), [registry.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/registry.py)\n\n---\n\n## ProcessorBase Abstract Class\n\nThe `ProcessorBase` serves as the foundation for all file processors in Quivr. It defines the contract that every processor implementation must follow.\n\n### Class Definition\n\n```python\nclass ProcessorBase(ABC, Generic[R]):\n    supported_extensions: list[FileExtension | str]\n    \n    @property\n    @abstractmethod\n    def processor_metadata(self) -> dict[str, Any]:\n        raise NotImplementedError\n    \n    async def process_file(self, file: QuivrFile) -> ProcessedDocument[R]:\n        ...\n    \n    @abstractmethod\n    async def process_file_inner(self, file: QuivrFile) -> ProcessedDocument[R]:\n        raise NotImplementedError\n```\n\nSource: [processor_base.py:47-75](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/processor_base.py)\n\n### Key Methods\n\n| Method | Access | Description |\n|--------|--------|-------------|\n| `supported_extensions` | Class attribute | List of file extensions the processor handles |\n| `check_supported()` | Instance | Validates if a file type is supported |\n| `process_file()` | Async | Main entry point; validates, processes, and enriches metadata |\n| `process_file_inner()` | Abstract Async | Subclass implementation for actual parsing logic |\n| `processor_metadata` | Abstract Property | Returns processor configuration as dict |\n\n### Metadata Enrichment\n\nThe `process_file()` method automatically enriches each document chunk with standardized metadata:\n\n```python\ndoc.metadata = {\n    \"chunk_index\": idx,\n    \"quivr_core_version\": qvr_version,\n    \"language\": detect_language(...).value,\n    **file.metadata,\n    **doc.metadata,\n    **self.processor_metadata,\n}\n```\n\nThis ensures all chunks carry provenance information including the Quivr version, detected language, and processor configuration.\n\nSource: [processor_base.py:20-39](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/processor_base.py)\n\n---\n\n## ProcessedDocument Generic Container\n\nThe `ProcessedDocument` is a generic wrapper that encapsulates the output of any processor:\n\n```python\n@dataclass\nclass ProcessedDocument(Generic[R]):\n    chunks: List[Document]          # List of LangChain Document objects\n    processor_cls: str              # Name of the processor class\n    processor_response: R           # Processor-specific return value\n```\n\nThe generic type `R` allows each processor to define its own response type. For example, `SimpleTxtProcessor` returns `ProcessedDocument[str]` where `processor_response` contains the raw text content.\n\nSource: [processor_base.py:40-46](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/processor_base.py)\n\n---\n\n## Text Chunking and Splitting\n\nQuivr provides configurable text chunking through the `SplitterConfig` class and custom splitting implementations.\n\n### SplitterConfig\n\n```python\n@dataclass\nclass SplitterConfig(BaseModel):\n    chunk_size: int = 500\n    chunk_overlap: int = 0\n    length_function: Callable[[str], int] = len\n```\n\n| Parameter | Default | Description |\n|-----------|---------|-------------|\n| `chunk_size` | 500 | Maximum characters per chunk |\n| `chunk_overlap` | 0 | Characters to overlap between chunks |\n| `length_function` | `len` | Function to calculate text length |\n\n### SimpleTxtProcessor Recursive Splitter\n\nThe `SimpleTxtProcessor` implements a custom recursive character splitter:\n\n```python\ndef recursive_character_splitter(\n    doc: Document, chunk_size: int, chunk_overlap: int\n) -> list[Document]:\n    if len(doc.page_content) <= chunk_size:\n        return [doc]\n    \n    chunk = Document(page_content=doc.page_content[:chunk_size], metadata=doc.metadata)\n    remaining = Document(\n        page_content=doc.page_content[chunk_size - chunk_overlap:],\n        metadata=doc.metadata,\n    )\n    return [chunk] + recursive_character_splitter(remaining, chunk_size, chunk_overlap)\n```\n\n**Constraint**: `chunk_overlap` must be less than `chunk_size`\n\nSource: [simple_txt_processor.py:8-26](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/implementations/simple_txt_processor.py)\n\n### LangChain Text Splitters\n\nOther processors leverage LangChain's `RecursiveCharacterTextSplitter`:\n\n```python\nfrom langchain_text_splitters import RecursiveCharacterTextSplitter, TextSplitter\n\ntext_splitter = RecursiveCharacterTextSplitter(\n    chunk_size=splitter_config.chunk_size,\n    chunk_overlap=splitter_config.chunk_overlap,\n    length_function=tiktoken_len,  # Token-based length\n)\ndocs = text_splitter.split_documents([document])\n```\n\nSource: [megaparse_processor.py:1-50](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/implementations/megaparse_processor.py)\n\n---\n\n## Available Processors\n\n### Processor Comparison\n\n| Processor | File Types | Parser Backend | Notes |\n|-----------|------------|----------------|-------|\n| `SimpleTxtProcessor` | `.txt` | Custom recursive splitter | Lightweight, no external dependencies |\n| `MegaparseProcessor` | `.txt`, `.pdf`, `.docx`, `.pptx`, `.xlsx`, `.csv`, `.epub`, `.bib`, `.odt`, `.html`, `.md`, `.mdx` | MegaParse SDK | AI-optimized document parsing |\n| `TikaProcessor` | `.pdf` | Apache Tika | Robust PDF extraction |\n| Default Processors | Various | LangChain loaders | Catch-all for standard formats |\n\n### SimpleTxtProcessor\n\nDesigned for plain text files with zero external dependencies:\n\n```python\nclass SimpleTxtProcessor(ProcessorBase):\n    supported_extensions = [FileExtension.txt]\n    \n    async def process_file_inner(self, file: QuivrFile) -> ProcessedDocument[str]:\n        async with aiofiles.open(file.path, mode=\"r\") as f:\n            content = await f.read()\n        \n        doc = Document(page_content=content)\n        docs = recursive_character_splitter(\n            doc, self.splitter_config.chunk_size, self.splitter_config.chunk_overlap\n        )\n        return ProcessedDocument(\n            chunks=docs, \n            processor_cls=\"SimpleTxtProcessor\", \n            processor_response=content\n        )\n```\n\nSource: [simple_txt_processor.py:30-60](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/implementations/simple_txt_processor.py)\n\n### MegaparseProcessor\n\nThe most versatile processor, supporting 14+ file formats through the MegaParse SDK:\n\n```python\nclass MegaparseProcessor(ProcessorBase[MPDocument]):\n    supported_extensions = [\n        FileExtension.txt, FileExtension.pdf, FileExtension.docx,\n        FileExtension.pptx, FileExtension.xlsx, FileExtension.csv,\n        FileExtension.epub, FileExtension.html, FileExtension.markdown,\n        # ... more extensions\n    ]\n    \n    def __init__(\n        self,\n        splitter: TextSplitter | None = None,\n        splitter_config: SplitterConfig = SplitterConfig(),\n        megaparse_config: MegaparseConfig = MegaparseConfig(),\n    ) -> None:\n        ...\n```\n\n**Installation**: `pip install megaparse`\n\nSource: [megaparse_processor.py:1-60](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/implementations/megaparse_processor.py)\n\n### Default Processors\n\nBuilt using LangChain's community loaders for standard formats:\n\n| LangChain Loader | File Extension |\n|------------------|----------------|\n| `UnstructuredPDFLoader` | `.pdf` |\n| `Docx2txtLoader` | `.docx` |\n| `UnstructuredPowerPointLoader` | `.pptx` |\n| `UnstructuredExcelLoader` | `.xls`, `.xlsx` |\n| `CSVLoader` | `.csv` |\n| `UnstructuredEPubLoader` | `.epub` |\n| `UnstructuredMarkdownLoader` | `.md`, `.markdown` |\n| `UnstructuredHTMLLoader` | `.html` |\n| `TextLoader` | `.txt` |\n\nThese are dynamically generated using `_build_processor()` factory:\n\n```python\ndef _build_processor(\n    cls_name: str, \n    load_cls: Type[P], \n    cls_extensions: List[FileExtension | str]\n) -> Type[ProcessorInit]:\n    enc = tiktoken.get_encoding(\"cl100k_base\")\n    \n    class _Processor(ProcessorBase):\n        supported_extensions = cls_extensions\n        # ... implementation\n    return _Processor\n```\n\nSource: [default.py:1-60](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/implementations/default.py)\n\n### TikaProcessor\n\nUses Apache Tika for robust PDF extraction with token-based chunk sizing:\n\n```python\nasync def process_file_inner(self, file: QuivrFile) -> ProcessedDocument[None]:\n    async with file.open() as f:\n        txt = await self._send_parse_tika(f)\n    \n    document = Document(page_content=txt)\n    docs = self.text_splitter.split_documents([document])\n    \n    for doc in docs:\n        doc.metadata = {\"chunk_size\": len(self.enc.encode(doc.page_content))}\n    \n    return ProcessedDocument(\n        chunks=docs, \n        processor_cls=\"TikaProcessor\", \n        processor_response=None\n    )\n```\n\nSource: [tika_processor.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/implementations/tika_processor.py)\n\n---\n\n## Processor Registry\n\nThe registry provides lazy-loading lookup from file extensions to processor classes:\n\n```python\ndef get_processor_class(file_extension: FileExtension | str) -> Type[ProcessorBase]:\n    \"\"\"Fetch processor class from registry\n    \n    Loading of these classes is *Lazy*. Appropriate import will happen\n    the first time we try to process some file type.\n    \"\"\"\n    ...\n    return known_processors[file_extension].cls\n```\n\n### Registry Structure\n\n```python\nProcEntry = namedtuple('ProcEntry', ['exts', 'cls_mod', 'errtxt', 'priority'])\n\nknown_processors = defaults_to_proc_entries(base_processors)\n```\n\nEach entry contains:\n- `exts`: List of file extensions\n- `cls_mod`: Module path to import\n- `errtxt`: Error message if import fails\n- `priority`: Processor priority (for multiple handlers)\n\n### Registration Flow\n\n```mermaid\ngraph LR\n    A[defaults_to_proc_entries] --> B[_append_proc_mapping]\n    B --> C[base_processors dict]\n    C --> D[known_processors]\n    D --> E[get_processor_class]\n    E --> F{Lazy Import}\n    F --> G[Processor Class]\n```\n\nSource: [registry.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/registry.py)\n\n---\n\n## Processing Workflow\n\n```mermaid\nsequenceDiagram\n    participant User\n    participant Brain\n    participant Registry\n    participant Processor\n    participant Splitter\n    \n    User->>Brain: add_files(file_paths)\n    Brain->>Registry: get_processor_class(extension)\n    Registry-->>Brain: Processor Class\n    Brain->>Processor: process_file(QuivrFile)\n    Processor->>Processor: check_supported()\n    Processor->>Processor: process_file_inner()\n    Processor->>Splitter: split_documents()\n    Splitter-->>Processor: List[Document]\n    Processor->>Processor: enrich_metadata()\n    Processor-->>Brain: ProcessedDocument\n    Brain->>Brain: vectorstore.add_documents()\n```\n\n### Step-by-Step Processing\n\n1. **File Input**: `QuivrFile` object created from file path with metadata (SHA1, size, extension)\n2. **Registry Lookup**: Extension matched to appropriate processor class\n3. **Validation**: `check_supported()` confirms file type compatibility\n4. **Parsing**: `process_file_inner()` extracts raw text/content\n5. **Chunking**: Documents split according to `SplitterConfig`\n6. **Metadata Enrichment**: Each chunk receives index, version, and language tags\n7. **Output**: `ProcessedDocument` returned with chunks ready for embedding\n\n---\n\n## Configuration Options\n\n### Splitter Configuration\n\n```python\nfrom quivr_core.processor.splitter import SplitterConfig\n\nconfig = SplitterConfig(\n    chunk_size=1000,      # Characters per chunk\n    chunk_overlap=100,     # Overlap between chunks\n    length_function=len   # Or tiktoken-based for token counting\n)\n```\n\n### Megaparse Configuration\n\n```python\nfrom quivr_core.config import MegaparseConfig\n\nconfig = MegaparseConfig(\n    # Provider-specific settings\n)\n```\n\n---\n\n## Extending with Custom Processors\n\nTo add support for a new file format:\n\n1. Create a class extending `ProcessorBase[R]`\n2. Implement `process_file_inner()` method\n3. Define `supported_extensions` class attribute\n4. Implement `processor_metadata` property\n5. Register in `registry.py` or dynamically\n\n```python\nclass CustomProcessor(ProcessorBase[MyResponseType]):\n    supported_extensions = [FileExtension.my_format]\n    \n    @property\n    def processor_metadata(self) -> dict[str, Any]:\n        return {\"processor_cls\": \"CustomProcessor\"}\n    \n    async def process_file_inner(self, file: QuivrFile) -> ProcessedDocument[MyResponseType]:\n        # Custom parsing logic\n        ...\n        return ProcessedDocument(chunks=docs, processor_cls=\"CustomProcessor\", processor_response=result)\n```\n\n---\n\n## Error Handling\n\nProcessors validate file support at runtime:\n\n```python\ndef check_supported(self, file: QuivrFile) -> None:\n    if file.file_extension not in self.supported_extensions:\n        raise ValueError(f\"can't process a file of type {file.file_extension}\")\n```\n\nLazy loading provides graceful degradation for optional dependencies:\n\n```python\ntry:\n    from megaparse_sdk.client import MegaParseNATSClient\nexcept ImportError:\n    raise ImportError(\"Please install megaparse: pip install megaparse\")\n```\n\nSource: [processor_base.py:50-53](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/processor_base.py)\n\n---\n\n## Related Components\n\n- **QuivrFile**: Represents an input file with metadata (path, SHA1, size, extension)\n- **Brain**: Orchestrates file processing and retrieval\n- **VectorStore**: Stores processed document chunks for similarity search\n- **RetrievalConfig**: Configures how chunks are retrieved during Q&A\n\n---\n\n<a id='page-storage'></a>\n\n## Storage System\n\n### Related Pages\n\nRelated topics: [File Processing and Parsers](#page-file-processing), [Brain Class](#page-brain-class)\n\n<details>\n<summary>Relevant source files</summary>\n\nThe following source files were used to generate this page:\n\n- [core/quivr_core/storage/storage_base.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/storage/storage_base.py)\n- [core/quivr_core/storage/local_storage.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/storage/local_storage.py)\n- [core/quivr_core/storage/file.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/storage/file.py)\n- [core/quivr_core/files/file.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/files/file.py)\n</details>\n\n# Storage System\n\n## Overview\n\nThe Storage System in Quivr is a modular abstraction layer responsible for managing file uploads, storage, and retrieval across different storage backends. It provides a clean separation between the RAG (Retrieval-Augmented Generation) logic and the underlying file persistence mechanisms, enabling Quivr to work with various storage implementations while maintaining a consistent API.\n\nThe system is designed to handle:\n- File upload and deduplication using SHA-1 hashing\n- Asynchronous file access\n- Multiple storage backends (currently supporting local filesystem)\n- File metadata tracking and organization by brain\n\nSource: [core/quivr_core/storage/storage_base.py:1-47](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/storage/storage_base.py)\n\n## Architecture\n\n### High-Level Architecture\n\n```mermaid\ngraph TD\n    A[Brain] --> B[StorageBase]\n    B --> C[LocalStorage]\n    B --> D[Future: CloudStorage]\n    B --> E[Future: S3Storage]\n    C --> F[QuivrFile]\n    C --> G[File System]\n    F --> H[Metadata]\n    F --> I[SHA-1 Hash]\n```\n\n### Component Hierarchy\n\n```mermaid\nclassDiagram\n    class StorageBase {\n        <<abstract>>\n        +name: str\n        +nb_files() int\n        +get_files() List~QuivrFile~\n        +upload_file(file, exists_ok)*\n    }\n    \n    class LocalStorage {\n        +name: str = \"local_storage\"\n        +files: List~QuivrFile~\n        +hashes: Set~str~\n        +copy_flag: bool\n        +dir_path: Path\n    }\n    \n    class QuivrFile {\n        +id: UUID\n        +brain_id: UUID\n        +path: Path\n        +original_filename: str\n        +file_size: int\n        +file_extension: FileExtension\n        +file_sha1: str\n        +additional_metadata: dict\n    }\n    \n    StorageBase <|-- LocalStorage\n```\n\n## Core Components\n\n### StorageBase (Abstract Base Class)\n\nThe `StorageBase` is an abstract base class that defines the contract for all storage implementations. It enforces a consistent interface across different storage backends through Python's ABC (Abstract Base Class) mechanism.\n\nSource: [core/quivr_core/storage/storage_base.py:19-47](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/storage/storage_base.py)\n\n#### Required Subclass Attributes\n\n| Attribute | Type | Description |\n|-----------|------|-------------|\n| `name` | `str` | Human-readable name of the storage type |\n\n#### Abstract Methods\n\n| Method | Return Type | Description |\n|--------|-------------|-------------|\n| `nb_files()` | `int` | Returns the total number of files stored |\n| `get_files()` | `List[QuivrFile]` | Asynchronously retrieves all files in storage |\n| `upload_file(file, exists_ok)` | `None` | Uploads a file to the storage |\n\nThe base class enforces that subclasses must define the `name` attribute through `__init_subclass__`:\n\n```python\ndef __init_subclass__(cls, **kwargs):\n    for required in (\"name\",):\n        if not getattr(cls, required):\n            raise TypeError(\n                f\"Can't instantiate abstract class {cls.__name__} without {required} attribute defined\"\n            )\n    return super().__init_subclass__(**kwargs)\n```\n\nSource: [core/quivr_core/storage/storage_base.py:19-27](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/storage/storage_base.py)\n\n### LocalStorage Implementation\n\n`LocalStorage` is the concrete implementation of `StorageBase` that persists files to the local filesystem.\n\nSource: [core/quivr_core/storage/local_storage.py:1-50](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/storage/local_storage.py)\n\n#### Attributes\n\n| Attribute | Type | Default | Description |\n|-----------|------|---------|-------------|\n| `name` | `str` | `\"local_storage\"` | Storage type identifier |\n| `files` | `List[QuivrFile]` | `[]` | In-memory list of stored files |\n| `hashes` | `Set[str]` | `set()` | Set of SHA-1 hashes for deduplication |\n| `copy_flag` | `bool` | `True` | If `True`, copy files; if `False`, create symlinks |\n| `dir_path` | `Path` | See below | Directory path for file storage |\n\n#### Directory Resolution\n\nThe storage directory is resolved in the following order:\n\n1. Explicitly provided `dir_path` argument\n2. Environment variable `QUIVR_LOCAL_STORAGE`\n3. Default path: `~/.cache/quivr/files`\n\n```python\nif dir_path is None:\n    self.dir_path = Path(\n        os.getenv(\"QUIVR_LOCAL_STORAGE\", \"~/.cache/quivr/files\")\n    )\nelse:\n    self.dir_path = dir_path\nos.makedirs(self.dir_path, exist_ok=True)\n```\n\nSource: [core/quivr_core/storage/local_storage.py:38-46](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/storage/local_storage.py)\n\n### QuivrFile Data Model\n\n`QuivrFile` represents a file stored in the Quivr system with all associated metadata.\n\nSource: [core/quivr_core/files/file.py:48-74](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/files/file.py)\n\n#### Class Slots\n\nThe class uses `__slots__` for memory-efficient attribute storage:\n\n| Slot | Type | Description |\n|------|------|-------------|\n| `id` | `UUID` | Unique identifier for the file |\n| `brain_id` | `UUID \\| None` | ID of the brain this file belongs to |\n| `path` | `Path` | Actual filesystem path to the file |\n| `original_filename` | `str` | Original name of the uploaded file |\n| `file_size` | `int \\| None` | Size of the file in bytes |\n| `file_extension` | `FileExtension \\| str` | File extension/type |\n| `file_sha1` | `str` | SHA-1 hash of the file content |\n| `additional_metadata` | `dict` | Custom metadata dictionary |\n\nSource: [core/quivr_core/files/file.py:49-57](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/files/file.py)\n\n#### Constructor Parameters\n\n```python\ndef __init__(\n    self,\n    id: UUID,\n    original_filename: str,\n    path: Path,\n    file_sha1: str,\n    file_extension: FileExtension | str,\n    brain_id: UUID | None = None,\n    file_size: int | None = None,\n    metadata: dict[str, Any] | None = None,\n) -> None:\n```\n\nSource: [core/quivr_core/files/file.py:59-70](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/files/file.py)\n\n#### Async File Access\n\nThe `QuivrFile` class provides an async context manager for reading file contents:\n\n```python\n@asynccontextmanager\nasync def open(self) -> AsyncGenerator[AsyncIterable[bytes], None]:\n    f = await aiofiles.open(self.path, mode=\"rb\")\n    try:\n        yield f\n    finally:\n        await f.close()\n```\n\nSource: [core/quivr_core/files/file.py:76-83](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/files/file.py)\n\n## File Upload Workflow\n\n```mermaid\nsequenceDiagram\n    participant Client\n    participant LocalStorage\n    participant FileSystem\n    \n    Client->>LocalStorage: upload_file(QuivrFile)\n    LocalStorage->>LocalStorage: Check SHA-1 hash\n    alt file exists and exists_ok=False\n        LocalStorage-->>Client: FileExistsError\n    else file doesn't exist\n        LocalStorage->>FileSystem: Copy or Symlink\n        FileSystem-->>LocalStorage: Success\n        LocalStorage->>LocalStorage: Update files list\n        LocalStorage->>LocalStorage: Add hash to hashes set\n    end\n```\n\n### Upload Process Details\n\n1. **File Path Construction**: Files are stored at `{dir_path}/{brain_id}/{file_id}`\n\n2. **Deduplication**: Before upload, the system checks if a file with the same SHA-1 hash already exists using `file_sha1`:\n\n   ```python\n   if file.file_sha1 in self.hashes and not exists_ok:\n       raise FileExistsError(...)\n   ```\n\n3. **Storage Strategy**: Based on `copy_flag`:\n   - `True`: Copy file content to destination\n   - `False`: Create symbolic link to original file\n\n4. **File Registration**: After successful upload, the file is added to the internal tracking list.\n\nSource: [core/quivr_core/storage/local_storage.py:48-70](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/storage/local_storage.py)\n\n## File Creation Helper\n\nThe `get_file_path()` function creates a `QuivrFile` instance from a filesystem path:\n\n```python\ndef get_file_path(\n    path: Path,\n    brain_id: UUID | None = None\n) -> QuivrFile:\n```\n\n### Processing Steps\n\n| Step | Operation |\n|------|-----------|\n| 1 | Get file size using `os.path.getsize()` |\n| 2 | Compute SHA-1 hash by reading file bytes |\n| 3 | Extract or generate UUID for file ID |\n| 4 | Determine file extension |\n| 5 | Create and return `QuivrFile` instance |\n\n```python\nfile_size = os.path.getsize(path)\nwith aiofiles.open(path, mode=\"rb\") as f:\n    file_sha1 = hashlib.sha1(await f.read()).hexdigest()\n\ntry:\n    id = UUID(path.name)\nexcept ValueError:\n    id = uuid4()\n```\n\nSource: [core/quivr_core/storage/file.py:19-43](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/storage/file.py)\n\n## Configuration Options\n\n### LocalStorage Configuration\n\n| Parameter | Type | Default | Description |\n|-----------|------|---------|-------------|\n| `dir_path` | `Path \\| None` | `None` | Custom storage directory |\n| `copy_flag` | `bool` | `True` | File storage method |\n\n### Environment Variables\n\n| Variable | Description |\n|----------|-------------|\n| `QUIVR_LOCAL_STORAGE` | Override default storage directory |\n\n## Usage Examples\n\n### Creating a LocalStorage Instance\n\n```python\nfrom quivr_core.storage.local_storage import LocalStorage\nfrom pathlib import Path\n\n# Use default directory (~/.cache/quivr/files)\nstorage = LocalStorage()\n\n# Use custom directory\nstorage = LocalStorage(dir_path=Path(\"/my/custom/storage\"))\n\n# Use symlinks instead of copying\nstorage = LocalStorage(dir_path=Path(\"/mnt/data\"), copy_flag=False)\n```\n\n### Checking Storage Information\n\n```python\nfrom quivr_core.storage.local_storage import LocalStorage\n\nstorage = LocalStorage()\n\n# Get number of files\nfile_count = storage.nb_files()\n\n# Get storage info\ninfo = storage.info()\n# Returns: {\"directory_path\": \"...\", \"name\": \"local_storage\", \"nb_files\": N}\n```\n\n### Uploading Files\n\n```python\nimport asyncio\nfrom quivr_core.storage.local_storage import LocalStorage\nfrom quivr_core.files.file import get_file_path\nfrom pathlib import Path\n\nasync def upload_example():\n    storage = LocalStorage()\n    \n    # Create QuivrFile from path\n    qfile = get_file_path(Path(\"./document.pdf\"), brain_id=None)\n    \n    # Upload with overwrite allowed\n    await storage.upload_file(qfile, exists_ok=True)\n    \n    print(f\"Total files: {storage.nb_files()}\")\n```\n\n### Listing All Files\n\n```python\nimport asyncio\nfrom quivr_core.storage.local_storage import LocalStorage\n\nasync def list_files():\n    storage = LocalStorage()\n    \n    files = await storage.get_files()\n    \n    for f in files:\n        print(f\"File: {f.original_filename}\")\n        print(f\"  ID: {f.id}\")\n        print(f\"  Size: {f.file_size} bytes\")\n        print(f\"  SHA-1: {f.file_sha1}\")\n```\n\n## Memory Management\n\nThe `QuivrFile` class uses `__slots__` to optimize memory usage by restricting attribute creation:\n\n```python\nclass QuivrFile:\n    __slots__ = [\n        \"id\",\n        \"brain_id\",\n        \"path\",\n        \"original_filename\",\n        \"file_size\",\n        \"file_extension\",\n        \"file_sha1\",\n        \"additional_metadata\",\n    ]\n```\n\nThis approach:\n- Prevents arbitrary attribute assignment\n- Reduces memory overhead per instance\n- Improves attribute access speed\n\nSource: [core/quivr_core/files/file.py:48-57](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/files/file.py)\n\n## Extending the Storage System\n\nTo create a custom storage backend, implement the `StorageBase` abstract class:\n\n```python\nfrom quivr_core.storage.storage_base import StorageBase\nfrom quivr_core.files.file import QuivrFile\n\nclass MyCustomStorage(StorageBase):\n    name = \"my_custom_storage\"\n    \n    def __init__(self):\n        self._files = []\n    \n    def nb_files(self) -> int:\n        return len(self._files)\n    \n    async def get_files(self) -> list[QuivrFile]:\n        return self._files\n    \n    async def upload_file(self, file: QuivrFile, exists_ok: bool = False) -> None:\n        # Custom implementation\n        pass\n```\n\n## Best Practices\n\n1. **Deduplication**: Always compute and check SHA-1 hashes before uploading to avoid duplicate files.\n\n2. **Async Operations**: All file I/O operations are asynchronous. Use `await` properly in async contexts.\n\n3. **Memory Efficiency**: Use `__slots__` for custom file classes to reduce memory footprint.\n\n4. **Path Handling**: Use `pathlib.Path` for cross-platform path operations.\n\n5. **Error Handling**: Handle `FileExistsError` when uploading files with `exists_ok=False` (default).\n\n6. **Directory Management**: Ensure storage directories exist before operations using `os.makedirs(path, exist_ok=True)`.\n\n## Related Components\n\n| Component | Description |\n|-----------|-------------|\n| [Brain](../brain/brain.md) | Uses Storage for file management |\n| [Processor System](../processor/processor.md) | Processes files from storage |\n| [RAG System](../rag/rag.md) | Retrieves documents from storage for query answering |\n\n---\n\n---\n\n## Doramagic Pitfall Log\n\nProject: QuivrHQ/quivr\n\nSummary: Found 17 potential pitfall items; 2 are high/blocking. Highest priority: security_permissions - 来源证据：EU AI Act Compliance Scan Results — Sharing Findings for Feedback.\n\n## 1. security_permissions · 来源证据：EU AI Act Compliance Scan Results — Sharing Findings for Feedback\n\n- Severity: high\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：EU AI Act Compliance Scan Results — Sharing Findings for Feedback\n- User impact: 可能影响授权、密钥配置或安全边界。\n- Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_46118108af95480ba852109be6e2c66a | https://github.com/QuivrHQ/quivr/issues/3667 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 2. security_permissions · 来源证据：[Bug]:\n\n- Severity: high\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：[Bug]:\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_ff260ca3ab5247679b2ded201ec21e24 | https://github.com/QuivrHQ/quivr/issues/2004 | 来源讨论提到 docker 相关条件，需在安装/试用前复核。\n\n## 3. installation · 来源证据：Integration idea: Screenpipe for screen/audio context\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安装相关的待验证问题：Integration idea: Screenpipe for screen/audio context\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_3a7f345691d04bbea72b03858746645e | https://github.com/QuivrHQ/quivr/issues/3658 | 来源类型 github_issue 暴露的待验证使用条件。\n\n## 4. installation · 来源证据：[Bug]: RuntimeError: There is no current event loop in thread 'MainThread' when using Brain.from_files() in script\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安装相关的待验证问题：[Bug]: RuntimeError: There is no current event loop in thread 'MainThread' when using Brain.from_files() in script\n- User impact: 可能阻塞安装或首次运行。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_e3091b23b6184dffb982e0cc494134fd | https://github.com/QuivrHQ/quivr/issues/3650 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 5. capability · 能力判断依赖假设\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: README/documentation is current enough for a first validation pass.\n- User impact: 假设不成立时，用户拿不到承诺的能力。\n- Suggested check: 将假设转成下游验证清单。\n- Guardrail action: 假设必须转成验证项；没有验证结果前不能写成事实。\n- Evidence: capability.assumptions | github_repo:640079149 | https://github.com/QuivrHQ/quivr | README/documentation is current enough for a first validation pass.\n\n## 6. runtime · 来源证据：The garbage collector is trying to clean up non-checked-in connection <AdaptedConnection <asyncpg.connection.Connection…\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个运行相关的待验证问题：The garbage collector is trying to clean up non-checked-in connection <AdaptedConnection <asyncpg.connection.Connection object at 0x7f66a5b06110>>, which will…\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_e6fdde799a2b4f53806121190979025a | https://github.com/QuivrHQ/quivr/issues/3654 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 7. runtime · 来源证据：core: v0.0.25\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个运行相关的待验证问题：core: v0.0.25\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_7c6096fb5a6442c7b53068a8dd8e2c78 | https://github.com/QuivrHQ/quivr/releases/tag/core-0.0.25 | 来源类型 github_release 暴露的待验证使用条件。\n\n## 8. runtime · 来源证据：core: v0.0.29\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个运行相关的待验证问题：core: v0.0.29\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_7eeeeb626a10476e820fcd651727a55a | https://github.com/QuivrHQ/quivr/releases/tag/core-0.0.29 | 来源讨论提到 node 相关条件，需在安装/试用前复核。\n\n## 9. runtime · 来源证据：core: v0.0.33\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个运行相关的待验证问题：core: v0.0.33\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_d7a735cb41bc44f2abeb148d689f1320 | https://github.com/QuivrHQ/quivr/releases/tag/core-0.0.33 | 来源类型 github_release 暴露的待验证使用条件。\n\n## 10. maintenance · 来源证据：core: v0.0.24\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个维护/版本相关的待验证问题：core: v0.0.24\n- User impact: 可能影响升级、迁移或版本选择。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_5272fe70cda24220a5aeb994ad06819e | https://github.com/QuivrHQ/quivr/releases/tag/core-0.0.24 | 来源类型 github_release 暴露的待验证使用条件。\n\n## 11. maintenance · 来源证据：core: v0.0.26\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个维护/版本相关的待验证问题：core: v0.0.26\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_7f723b616fd446c3be43298d75fd6e8b | https://github.com/QuivrHQ/quivr/releases/tag/core-0.0.26 | 来源类型 github_release 暴露的待验证使用条件。\n\n## 12. maintenance · 维护活跃度未知\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: 未记录 last_activity_observed。\n- User impact: 新项目、停更项目和活跃项目会被混在一起，推荐信任度下降。\n- Suggested check: 补 GitHub 最近 commit、release、issue/PR 响应信号。\n- Guardrail action: 维护活跃度未知时，推荐强度不能标为高信任。\n- Evidence: evidence.maintainer_signals | github_repo:640079149 | https://github.com/QuivrHQ/quivr | last_activity_observed missing\n\n## 13. security_permissions · 下游验证发现风险项\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: no_demo\n- User impact: 下游已经要求复核，不能在页面中弱化。\n- Suggested check: 进入安全/权限治理复核队列。\n- Guardrail action: 下游风险存在时必须保持 review/recommendation 降级。\n- Evidence: downstream_validation.risk_items | github_repo:640079149 | https://github.com/QuivrHQ/quivr | no_demo; severity=medium\n\n## 14. security_permissions · 存在评分风险\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: no_demo\n- User impact: 风险会影响是否适合普通用户安装。\n- Suggested check: 把风险写入边界卡，并确认是否需要人工复核。\n- Guardrail action: 评分风险必须进入边界卡，不能只作为内部分数。\n- Evidence: risks.scoring_risks | github_repo:640079149 | https://github.com/QuivrHQ/quivr | no_demo; severity=medium\n\n## 15. security_permissions · 来源证据：core: v0.0.27\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：core: v0.0.27\n- User impact: 可能影响授权、密钥配置或安全边界。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_027338ba02764da6b371970615591265 | https://github.com/QuivrHQ/quivr/releases/tag/core-0.0.27 | 来源类型 github_release 暴露的待验证使用条件。\n\n## 16. maintenance · issue/PR 响应质量未知\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: issue_or_pr_quality=unknown。\n- User impact: 用户无法判断遇到问题后是否有人维护。\n- Suggested check: 抽样最近 issue/PR，判断是否长期无人处理。\n- Guardrail action: issue/PR 响应未知时，必须提示维护风险。\n- Evidence: evidence.maintainer_signals | github_repo:640079149 | https://github.com/QuivrHQ/quivr | issue_or_pr_quality=unknown\n\n## 17. maintenance · 发布节奏不明确\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: release_recency=unknown。\n- User impact: 安装命令和文档可能落后于代码，用户踩坑概率升高。\n- Suggested check: 确认最近 release/tag 和 README 安装命令是否一致。\n- Guardrail action: 发布节奏未知或过期时，安装说明必须标注可能漂移。\n- Evidence: evidence.maintainer_signals | github_repo:640079149 | https://github.com/QuivrHQ/quivr | release_recency=unknown\n\n<!-- canonical_name: QuivrHQ/quivr; human_manual_source: deepwiki_human_wiki -->\n",
      "markdown_key": "quivr",
      "pages": "draft",
      "source_refs": [
        {
          "evidence_id": "github_repo:640079149",
          "kind": "repo",
          "supports_claim_ids": [
            "claim_identity",
            "claim_distribution",
            "claim_capability"
          ],
          "url": "https://github.com/QuivrHQ/quivr"
        },
        {
          "evidence_id": "art_508b34cf9ee340be9ef8db74ca412fc0",
          "kind": "docs",
          "supports_claim_ids": [
            "claim_identity",
            "claim_distribution",
            "claim_capability"
          ],
          "url": "https://github.com/QuivrHQ/quivr#readme"
        }
      ],
      "summary": "DeepWiki/Human Wiki output with a Doramagic pitfall appendix.",
      "title": "quivr 说明书",
      "toc": [
        "https://github.com/QuivrHQ/quivr Project Manual",
        "Table of Contents",
        "Introduction to Quivr",
        "Key Features",
        "Architecture Overview",
        "Getting Started",
        "Core Components",
        "Configuration",
        "Doramagic 踩坑日志"
      ]
    }
  },
  "quality_gate": {
    "blocking_gaps": [],
    "category_confidence": "medium",
    "compile_status": "ready_for_review",
    "five_assets_present": true,
    "install_sandbox_verified": true,
    "missing_evidence": [],
    "next_action": "publish to Doramagic.ai project surfaces",
    "prompt_preview_boundary_ok": true,
    "publish_status": "publishable",
    "quick_start_verified": true,
    "repo_clone_verified": true,
    "repo_commit": "947a785415c6c35ab2ae8157222b4720b0710b4d",
    "repo_inspection_error": null,
    "repo_inspection_files": [
      "README.md",
      "docs/pyproject.toml",
      "docs/mkdocs.yml",
      "docs/README.md",
      "docs/docs/index.md",
      "docs/docs/quickstart.md",
      "docs/src/docs/__init__.py",
      "docs/docs/vectorstores/index.md",
      "docs/docs/vectorstores/pgvector.md",
      "docs/docs/vectorstores/faiss.md",
      "docs/docs/brain/index.md",
      "docs/docs/brain/brain.md",
      "docs/docs/brain/chat.md",
      "docs/docs/parsers/index.md",
      "docs/docs/parsers/megaparse.md",
      "docs/docs/parsers/simple.md",
      "docs/docs/config/index.md",
      "docs/docs/config/base_config.md",
      "docs/docs/config/config.md",
      "docs/docs/storage/index.md",
      "docs/docs/storage/local_storage.md",
      "docs/docs/storage/base.md",
      "docs/docs/examples/index.md",
      "docs/docs/examples/custom_storage.md",
      "docs/docs/examples/chatbot.md",
      "docs/docs/examples/chatbot_voice_flask.md",
      "docs/docs/examples/chatbot_voice.md",
      "docs/docs/workflows/index.md",
      "docs/docs/workflows/examples/basic_ingestion.md",
      "docs/docs/workflows/examples/basic_rag.md",
      "docs/docs/workflows/examples/rag_with_web_search.md",
      "examples/pdf_parsing_tika.py",
      "examples/save_load_brain.py",
      "examples/pdf_document_from_yaml.py",
      "examples/simple_question_megaparse.py",
      "examples/quivr-whisper/pyproject.toml",
      "examples/quivr-whisper/README.md",
      "examples/quivr-whisper/app.py",
      "examples/chatbot/pyproject.toml",
      "examples/chatbot/main.py"
    ],
    "repo_inspection_verified": true,
    "review_reasons": [],
    "tag_count_ok": true,
    "unsupported_claims": []
  },
  "schema_version": "0.1",
  "user_assets": {
    "ai_context_pack": {
      "asset_id": "ai_context_pack",
      "filename": "AI_CONTEXT_PACK.md",
      "markdown": "# quivr - Doramagic AI Context Pack\n\n> 定位：安装前体验与判断资产。它帮助宿主 AI 有一个好的开始，但不代表已经安装、执行或验证目标项目。\n\n## 充分原则\n\n- **充分原则，不是压缩原则**：AI Context Pack 应该充分到让宿主 AI 在开工前理解项目价值、能力边界、使用入口、风险和证据来源；它可以分层组织，但不以最短摘要为目标。\n- **压缩策略**：只压缩噪声和重复内容，不压缩会影响判断和开工质量的上下文。\n\n## 给宿主 AI 的使用方式\n\n你正在读取 Doramagic 为 quivr 编译的 AI Context Pack。请把它当作开工前上下文：帮助用户理解适合谁、能做什么、如何开始、哪些必须安装后验证、风险在哪里。不要声称你已经安装、运行或执行了目标项目。\n\n## Claim 消费规则\n\n- **事实来源**：Repo Evidence + Claim/Evidence Graph；Human Wiki 只提供显著性、术语和叙事结构。\n- **事实最低状态**：`supported`\n- `supported`：可以作为项目事实使用，但回答中必须引用 claim_id 和证据路径。\n- `weak`：只能作为低置信度线索，必须要求用户继续核实。\n- `inferred`：只能用于风险提示或待确认问题，不能包装成项目事实。\n- `unverified`：不得作为事实使用，应明确说证据不足。\n- `contradicted`：必须展示冲突来源，不得替用户强行选择一个版本。\n\n## 它最适合谁\n\n- **想在安装前理解开源项目价值和边界的用户**：当前证据主要来自项目文档。 证据：`README.md` Claim：`clm_0002` supported 0.86\n\n## 它能做什么\n\n- **命令行启动或安装流程**（需要安装后验证）：项目文档中存在可执行命令，真实使用需要在本地或宿主环境中运行这些命令。 证据：`README.md` Claim：`clm_0001` supported 0.86\n\n## 怎么开始\n\n- `pip install quivr-core # Check that the installation worked` 证据：`README.md` Claim：`clm_0003` supported 0.86\n\n## 继续前判断卡\n\n- **当前建议**：需要管理员/安全审批\n- **为什么**：继续前可能涉及密钥、账号、外部服务或敏感上下文，建议先经过管理员或安全审批。\n\n### 30 秒判断\n\n- **现在怎么做**：需要管理员/安全审批\n- **最小安全下一步**：先跑 Prompt Preview；若涉及凭证或企业环境，先审批再试装\n- **先别相信**：角色质量和任务匹配不能直接相信。\n- **继续会触碰**：角色选择偏差、命令执行、本地环境或项目文件\n\n### 现在可以相信\n\n- **适合人群线索：想在安装前理解开源项目价值和边界的用户**（supported）：有 supported claim 或项目证据支撑，但仍不等于真实安装效果。 证据：`README.md` Claim：`clm_0002` supported 0.86\n- **能力存在：命令行启动或安装流程**（supported）：可以相信项目包含这类能力线索；是否适合你的具体任务仍要试用或安装后验证。 证据：`README.md` Claim：`clm_0001` supported 0.86\n- **存在 Quick Start / 安装命令线索**（supported）：可以相信项目文档出现过启动或安装入口；不要因此直接在主力环境运行。 证据：`README.md` Claim：`clm_0003` supported 0.86\n\n### 现在还不能相信\n\n- **角色质量和任务匹配不能直接相信。**（unverified）：角色库证明有很多角色，不证明每个角色都适合你的具体任务，也不证明角色能产生高质量结果。\n- **不能把角色文案当成真实执行能力。**（unverified）：安装前只能判断角色描述和任务画像是否匹配，不能证明它能在宿主 AI 里完成任务。\n- **真实输出质量不能在安装前相信。**（unverified）：Prompt Preview 只能展示引导方式，不能证明真实项目中的结果质量。\n- **宿主 AI 版本兼容性不能在安装前相信。**（unverified）：Claude、Cursor、Codex、Gemini 等宿主加载规则和版本差异必须在真实环境验证。\n- **不会污染现有宿主 AI 行为，不能直接相信。**（inferred）：Skill、plugin、AGENTS/CLAUDE/GEMINI 指令可能改变宿主 AI 的默认行为。\n- **可安全回滚不能默认相信。**（unverified）：除非项目明确提供卸载和恢复说明，否则必须先在隔离环境验证。\n- **真实安装后是否与用户当前宿主 AI 版本兼容？**（unverified）：兼容性只能通过实际宿主环境验证。\n- **项目输出质量是否满足用户具体任务？**（unverified）：安装前预览只能展示流程和边界，不能替代真实评测。\n\n### 继续会触碰什么\n\n- **角色选择偏差**：用户对任务应该由哪个专家角色处理的判断。 原因：选错角色会让 AI 从错误专业视角回答，浪费时间或误导决策。\n- **命令执行**：包管理器、网络下载、本地插件目录、项目配置或用户主目录。 原因：运行第一条命令就可能产生环境改动；必须先判断是否值得跑。 证据：`README.md`\n- **本地环境或项目文件**：安装结果、插件缓存、项目配置或本地依赖目录。 原因：安装前无法证明写入范围和回滚方式，需要隔离验证。 证据：`README.md`\n- **环境变量 / API Key**：项目入口文档明确出现 API key、token、secret 或账号凭证配置。 原因：如果真实安装需要凭证，应先使用测试凭证并经过权限/合规判断。 证据：`README.md`, `core/tests/conftest.py`, `core/tests/test_llm_endpoint.py`, `docs/docs/examples/chatbot.md` 等\n- **宿主 AI 上下文**：AI Context Pack、Prompt Preview、Skill 路由、风险规则和项目事实。 原因：导入上下文会影响宿主 AI 后续判断，必须避免把未验证项包装成事实。\n\n### 最小安全下一步\n\n- **先跑 Prompt Preview**：先用交互式试用验证任务画像和角色匹配，不要先导入整套角色库。（适用：任何项目都适用，尤其是输出质量未知时。）\n- **只在隔离目录或测试账号试装**：避免安装命令污染主力宿主 AI、真实项目或用户主目录。（适用：存在命令执行、插件配置或本地写入线索时。）\n- **不要使用真实生产凭证**：环境变量/API key 一旦进入宿主或工具链，可能产生账号和合规风险。（适用：出现 API、TOKEN、KEY、SECRET 等环境线索时。）\n- **安装后只验证一个最小任务**：先验证加载、兼容、输出质量和回滚，再决定是否深用。（适用：准备从试用进入真实工作流时。）\n\n### 退出方式\n\n- **保留安装前状态**：记录原始宿主配置和项目状态，后续才能判断是否可恢复。\n- **保留原始角色选择记录**：如果输出偏题，可以回到任务画像阶段重新选择角色，而不是继续沿着错误角色推进。\n- **记录安装命令和写入路径**：没有明确卸载说明时，至少要知道哪些目录或配置需要手动清理。\n- **准备撤销测试 API key 或 token**：测试凭证泄露或误用时，可以快速止损。\n- **如果没有回滚路径，不进入主力环境**：不可回滚是继续前阻断项，不应靠信任或运气继续。\n\n## 哪些只能预览\n\n- 解释项目适合谁和能做什么\n- 基于项目文档演示典型对话流程\n- 帮助用户判断是否值得安装或继续研究\n\n## 哪些必须安装后验证\n\n- 真实安装 Skill、插件或 CLI\n- 执行脚本、修改本地文件或访问外部服务\n- 验证真实输出质量、性能和兼容性\n\n## 边界与风险判断卡\n\n- **把安装前预览误认为真实运行**：用户可能高估项目已经完成的配置、权限和兼容性验证。 处理方式：明确区分 prompt_preview_can_do 与 runtime_required。 Claim：`clm_0004` inferred 0.45\n- **命令执行会修改本地环境**：安装命令可能写入用户主目录、宿主插件目录或项目配置。 处理方式：先在隔离环境或测试账号中运行。 证据：`README.md` Claim：`clm_0005` supported 0.86\n- **待确认**：真实安装后是否与用户当前宿主 AI 版本兼容？。原因：兼容性只能通过实际宿主环境验证。\n- **待确认**：项目输出质量是否满足用户具体任务？。原因：安装前预览只能展示流程和边界，不能替代真实评测。\n- **待确认**：安装命令是否需要网络、权限或全局写入？。原因：这影响企业环境和个人环境的安装风险。\n\n## 开工前工作上下文\n\n### 加载顺序\n\n- 先读取 how_to_use.host_ai_instruction，建立安装前判断资产的边界。\n- 读取 claim_graph_summary，确认事实来自 Claim/Evidence Graph，而不是 Human Wiki 叙事。\n- 再读取 intended_users、capabilities 和 quick_start_candidates，判断用户是否匹配。\n- 需要执行具体任务时，优先查 role_skill_index，再查 evidence_index。\n- 遇到真实安装、文件修改、网络访问、性能或兼容性问题时，转入 risk_card 和 boundaries.runtime_required。\n\n### 任务路由\n\n- **命令行启动或安装流程**：先说明这是安装后验证能力，再给出安装前检查清单。 边界：必须真实安装或运行后验证。 证据：`README.md` Claim：`clm_0001` supported 0.86\n\n### 上下文规模\n\n- 文件总数：210\n- 重要文件覆盖：40/210\n- 证据索引条目：80\n- 角色 / Skill 条目：39\n\n### 证据不足时的处理\n\n- **missing_evidence**：说明证据不足，要求用户提供目标文件、README 段落或安装后验证记录；不要补全事实。\n- **out_of_scope_request**：说明该任务超出当前 AI Context Pack 证据范围，并建议用户先查看 Human Manual 或真实安装后验证。\n- **runtime_request**：给出安装前检查清单和命令来源，但不要替用户执行命令或声称已执行。\n- **source_conflict**：同时展示冲突来源，标记为待核实，不要强行选择一个版本。\n\n## Prompt Recipes\n\n### 适配判断\n\n- 目标：判断这个项目是否适合用户当前任务。\n- 预期输出：适配结论、关键理由、证据引用、安装前可预览内容、必须安装后验证内容、下一步建议。\n\n```text\n请基于 quivr 的 AI Context Pack，先问我 3 个必要问题，然后判断它是否适合我的任务。回答必须包含：适合谁、能做什么、不能做什么、是否值得安装、证据来自哪里。所有项目事实必须引用 evidence_refs、source_paths 或 claim_id。\n```\n\n### 安装前体验\n\n- 目标：让用户在安装前感受核心工作流，同时避免把预览包装成真实能力或营销承诺。\n- 预期输出：一段带边界标签的体验剧本、安装后验证清单和谨慎建议；不含真实运行承诺或强营销表述。\n\n```text\n请把 quivr 当作安装前体验资产，而不是已安装工具或真实运行环境。\n\n请严格输出四段：\n1. 先问我 3 个必要问题。\n2. 给出一段“体验剧本”：用 [安装前可预览]、[必须安装后验证]、[证据不足] 三种标签展示它可能如何引导工作流。\n3. 给出安装后验证清单：列出哪些能力只有真实安装、真实宿主加载、真实项目运行后才能确认。\n4. 给出谨慎建议：只能说“值得继续研究/试装”“先补充信息后再判断”或“不建议继续”，不得替项目背书。\n\n硬性边界：\n- 不要声称已经安装、运行、执行测试、修改文件或产生真实结果。\n- 不要写“自动适配”“确保通过”“完美适配”“强烈建议安装”等承诺性表达。\n- 如果描述安装后的工作方式，必须使用“如果安装成功且宿主正确加载 Skill，它可能会……”这种条件句。\n- 体验剧本只能写成“示例台词/假设流程”：使用“可能会询问/可能会建议/可能会展示”，不要写“已写入、已生成、已通过、正在运行、正在生成”。\n- Prompt Preview 不负责给安装命令；如用户准备试装，只能提示先阅读 Quick Start 和 Risk Card，并在隔离环境验证。\n- 所有项目事实必须来自 supported claim、evidence_refs 或 source_paths；inferred/unverified 只能作风险或待确认项。\n\n```\n\n### 角色 / Skill 选择\n\n- 目标：从项目里的角色或 Skill 中挑选最匹配的资产。\n- 预期输出：候选角色或 Skill 列表，每项包含适用场景、证据路径、风险边界和是否需要安装后验证。\n\n```text\n请读取 role_skill_index，根据我的目标任务推荐 3-5 个最相关的角色或 Skill。每个推荐都要说明适用场景、可能输出、风险边界和 evidence_refs。\n```\n\n### 风险预检\n\n- 目标：安装或引入前识别环境、权限、规则冲突和质量风险。\n- 预期输出：环境、权限、依赖、许可、宿主冲突、质量风险和未知项的检查清单。\n\n```text\n请基于 risk_card、boundaries 和 quick_start_candidates，给我一份安装前风险预检清单。不要替我执行命令，只说明我应该检查什么、为什么检查、失败会有什么影响。\n```\n\n### 宿主 AI 开工指令\n\n- 目标：把项目上下文转成一次对话开始前的宿主 AI 指令。\n- 预期输出：一段边界明确、证据引用明确、适合复制给宿主 AI 的开工前指令。\n\n```text\n请基于 quivr 的 AI Context Pack，生成一段我可以粘贴给宿主 AI 的开工前指令。这段指令必须遵守 not_runtime=true，不能声称项目已经安装、运行或产生真实结果。\n```\n\n\n## 角色 / Skill 索引\n\n- 共索引 39 个角色 / Skill / 项目文档条目。\n\n- **docs**（project_doc）： 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/README.md`\n- **Quivr - Your Second Brain, Empowered by Generative AI**（project_doc）：Quivr - Your Second Brain, Empowered by Generative AI 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`README.md`\n- **quivr-core package**（project_doc）：This project is licensed under the Apache 2.0 License 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`core/README.md`\n- **Quivr Chatbot Example**（project_doc）：This example demonstrates how to create a simple chatbot using Quivr and Chainlit. The chatbot allows users to upload a text file and then ask questions about its content. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`examples/chatbot/README.md`\n- **Quivr Chatbot Example**（project_doc）：This example demonstrates how to create a simple chatbot using Quivr and Chainlit. The chatbot allows users to upload a text file and then ask questions about its content. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`examples/chatbot_voice/README.md`\n- **Quivr-Whisper**（project_doc）：Quivr-Whisper is a web application that allows users to ask questions via audio input. It leverages OpenAI's Whisper model for speech transcription and synthesizes responses using OpenAI's text-to-speech capabilities. The application queries the Quivr API to get a response based on the transcribed audio input. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`examples/quivr-whisper/README.md`\n- **simple-question**（project_doc）： 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`examples/simple_question/README.md`\n- **Brain**（project_doc）：::: quivr core.brain.brain options: heading level: 2 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/docs/brain/brain.md`\n- **ChatHistory**（project_doc）：The ChatHistory class is where all the conversation between the user and the LLM gets stored. A ChatHistory object will transparently be instanciated in the Brain every time you create one. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/docs/brain/chat.md`\n- **Brain**（project_doc）：The brain is the essential component of Quivr that stores and processes the knowledge you want to retrieve informations from. Simply create a brain with the files you want to process and use the latest Quivr RAG workflow to retrieve informations from the knowledge. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/docs/brain/index.md`\n- **Configuration Base Class**（project_doc）：::: quivr core.base config options: heading level: 2 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/docs/config/base_config.md`\n- **Configuration**（project_doc）：Retrieval Configuration ::: quivr core.rag.entities.config.RetrievalConfig 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/docs/config/config.md`\n- **Configuration**（project_doc）：The configuration classes are based on Pydantic https://docs.pydantic.dev/latest/ and allow the configuration of the ingestion and retrieval workflows via YAML files. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/docs/config/index.md`\n- **Chatbot with Chainlit**（project_doc）：This example demonstrates a simple chatbot using Quivr and Chainlit , where users can upload a .txt file and ask questions based on its content. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/docs/examples/chatbot.md`\n- **Voice Chatbot with Chainlit**（project_doc）：This example demonstrates how to create a voice-enabled chatbot using Quivr and Chainlit . The chatbot lets users upload a text file, ask questions about its content, and interact using speech. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/docs/examples/chatbot_voice.md`\n- **Voice Chatbot with Flask**（project_doc）：This example demonstrates a simple chatbot using Flask and Quivr , where users can upload a .txt file and ask questions based on its content. It supports speech-to-text and text-to-speech capabilities for a seamless interactive experience. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/docs/examples/chatbot_voice_flask.md`\n- **Transparent Storage**（project_doc）： 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/docs/examples/custom_storage.md`\n- **Examples**（project_doc）： 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/docs/examples/index.md`\n- **Welcome to Quivr Documentation**（project_doc）：Quivr, helps you build your second brain, utilizes the power of GenerativeAI to be your personal assistant ! 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/docs/index.md`\n- **Parsers**（project_doc）：Quivr provides a suite of parsers to extract structured data from various sources. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/docs/parsers/index.md`\n- **Megaparse**（project_doc）：::: quivr core.processor.implementations.megaparse processor options: heading level: 2 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/docs/parsers/megaparse.md`\n- **Simple Txt**（project_doc）：::: quivr core.processor.implementations.simple txt processor options: heading level: 2 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/docs/parsers/simple.md`\n- **Quick start**（project_doc）：If you need to quickly start talking to your list of files, here are the steps. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/docs/quickstart.md`\n- **StorageBase**（project_doc）：::: quivr core.storage.storage base options: heading level: 2 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/docs/storage/base.md`\n- **🗄️ Storage**（project_doc）：Your Brain’s File Management System 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/docs/storage/index.md`\n- **LocalStorage**（project_doc）：::: quivr core.storage.local storage options: heading level: 2 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/docs/storage/local_storage.md`\n- **Faiss**（project_doc）： 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/docs/vectorstores/faiss.md`\n- **Vector Stores**（project_doc）： 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/docs/vectorstores/index.md`\n- **PGVector**（project_doc）： 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/docs/vectorstores/pgvector.md`\n- **Basic ingestion**（project_doc）：Creating a basic ingestion workflow like the one above is simple, here are the steps: 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/docs/workflows/examples/basic_ingestion.md`\n- **Basic RAG**（project_doc）：Creating a basic RAG workflow like the one above is simple, here are the steps: 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/docs/workflows/examples/basic_rag.md`\n- **RAG with web search**（project_doc）：! rag with web search.excalidraw.png 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/docs/workflows/examples/rag_with_web_search.md`\n- **Workflows**（project_doc）：In this section, you will find examples of workflows that you can use to create your own agentic RAG systems. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/docs/workflows/index.md`\n- **Description**（project_doc）：Please include a summary of the changes and the related issue. Please also include relevant motivation and context. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`.github/PULL_REQUEST_TEMPLATE.md`\n- **Changelog**（project_doc）：What's Changed feat: Add new documentation files by @StanGirard in https://github.com/QuivrHQ/quivr/pull/3351 fix: separate english and french ingredients by @chloedia in https://github.com/QuivrHQ/quivr/pull/3358 docs core : init by @StanGirard in https://github.com/QuivrHQ/quivr/pull/3365 docs: quivr core storage by @AmineDiro in https://github.com/QuivrHQ/quivr/pull/3366 fix: fixing pdf parsing by @jacopo-chevall… 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`CHANGELOG.md`\n- **Changelog**（project_doc）：0.0.33 https://github.com/QuivrHQ/quivr/compare/core-0.0.32...core-0.0.33 2025-02-03 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`core/CHANGELOG.md`\n- **Quivr Chatbot Example**（project_doc）：This example demonstrates how to create a simple chatbot using Quivr and Chainlit. The chatbot allows users to upload a text file and then ask questions about its content. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`examples/chatbot/chainlit.md`\n- **Quivr Chatbot Example**（project_doc）：This example demonstrates how to create a simple chatbot using Quivr and Chainlit. The chatbot allows users to upload a text file and then ask questions about its content. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`examples/chatbot_voice/chainlit.md`\n- **Backend code guidelines**（project_doc）：- Follow a clear project structure : - In quivr-api we have modules divided into : controller, entity, services, repositories, utils - Use dependency injection for better testability and modularity 🔺 - Use environment variables for configuration 🔺 - We use Pydantic settings for parsing the arguments - Don’t add unnecessary abstractions → KISS principle. - Premature abstractions are a bad pattern - Avoid using Global… 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`core/tests/processor/data/guidelines_code.md`\n\n## 证据索引\n\n- 共索引 80 条证据。\n\n- **docs**（documentation）：docs Describe your project here. 证据：`docs/README.md`\n- **Quivr - Your Second Brain, Empowered by Generative AI**（documentation）：Quivr - Your Second Brain, Empowered by Generative AI 证据：`README.md`\n- **quivr-core package**（documentation）：This project is licensed under the Apache 2.0 License 证据：`core/README.md`\n- **Quivr Chatbot Example**（documentation）：This example demonstrates how to create a simple chatbot using Quivr and Chainlit. The chatbot allows users to upload a text file and then ask questions about its content. 证据：`examples/chatbot/README.md`\n- **Quivr Chatbot Example**（documentation）：This example demonstrates how to create a simple chatbot using Quivr and Chainlit. The chatbot allows users to upload a text file and then ask questions about its content. 证据：`examples/chatbot_voice/README.md`\n- **Quivr-Whisper**（documentation）：Quivr-Whisper is a web application that allows users to ask questions via audio input. It leverages OpenAI's Whisper model for speech transcription and synthesizes responses using OpenAI's text-to-speech capabilities. The application queries the Quivr API to get a response based on the transcribed audio input. 证据：`examples/quivr-whisper/README.md`\n- **simple-question**（documentation）：simple-question Describe your project here. 证据：`examples/simple_question/README.md`\n- **License**（source_file）：Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ 证据：`LICENSE`\n- **Brain**（documentation）：::: quivr core.brain.brain options: heading level: 2 证据：`docs/docs/brain/brain.md`\n- **ChatHistory**（documentation）：The ChatHistory class is where all the conversation between the user and the LLM gets stored. A ChatHistory object will transparently be instanciated in the Brain every time you create one. 证据：`docs/docs/brain/chat.md`\n- **Brain**（documentation）：The brain is the essential component of Quivr that stores and processes the knowledge you want to retrieve informations from. Simply create a brain with the files you want to process and use the latest Quivr RAG workflow to retrieve informations from the knowledge. 证据：`docs/docs/brain/index.md`\n- **Configuration Base Class**（documentation）：::: quivr core.base config options: heading level: 2 证据：`docs/docs/config/base_config.md`\n- **Configuration**（documentation）：Retrieval Configuration ::: quivr core.rag.entities.config.RetrievalConfig 证据：`docs/docs/config/config.md`\n- **Configuration**（documentation）：The configuration classes are based on Pydantic https://docs.pydantic.dev/latest/ and allow the configuration of the ingestion and retrieval workflows via YAML files. 证据：`docs/docs/config/index.md`\n- **Chatbot with Chainlit**（documentation）：This example demonstrates a simple chatbot using Quivr and Chainlit , where users can upload a .txt file and ask questions based on its content. 证据：`docs/docs/examples/chatbot.md`\n- **Voice Chatbot with Chainlit**（documentation）：This example demonstrates how to create a voice-enabled chatbot using Quivr and Chainlit . The chatbot lets users upload a text file, ask questions about its content, and interact using speech. 证据：`docs/docs/examples/chatbot_voice.md`\n- **Voice Chatbot with Flask**（documentation）：This example demonstrates a simple chatbot using Flask and Quivr , where users can upload a .txt file and ask questions based on its content. It supports speech-to-text and text-to-speech capabilities for a seamless interactive experience. 证据：`docs/docs/examples/chatbot_voice_flask.md`\n- **Transparent Storage**（documentation）：Transparent Storage todo 证据：`docs/docs/examples/custom_storage.md`\n- **Examples**（documentation）：Examples 证据：`docs/docs/examples/index.md`\n- **Welcome to Quivr Documentation**（documentation）：Quivr, helps you build your second brain, utilizes the power of GenerativeAI to be your personal assistant ! 证据：`docs/docs/index.md`\n- **Parsers**（documentation）：Quivr provides a suite of parsers to extract structured data from various sources. 证据：`docs/docs/parsers/index.md`\n- **Megaparse**（documentation）：::: quivr core.processor.implementations.megaparse processor options: heading level: 2 证据：`docs/docs/parsers/megaparse.md`\n- **Simple Txt**（documentation）：::: quivr core.processor.implementations.simple txt processor options: heading level: 2 证据：`docs/docs/parsers/simple.md`\n- **Quick start**（documentation）：If you need to quickly start talking to your list of files, here are the steps. 证据：`docs/docs/quickstart.md`\n- **StorageBase**（documentation）：::: quivr core.storage.storage base options: heading level: 2 证据：`docs/docs/storage/base.md`\n- **🗄️ Storage**（documentation）：Your Brain’s File Management System 证据：`docs/docs/storage/index.md`\n- **LocalStorage**（documentation）：::: quivr core.storage.local storage options: heading level: 2 证据：`docs/docs/storage/local_storage.md`\n- **Faiss**（documentation）：Faiss 证据：`docs/docs/vectorstores/faiss.md`\n- **Vector Stores**（documentation）：Vector Stores 证据：`docs/docs/vectorstores/index.md`\n- **PGVector**（documentation）：PGVector 证据：`docs/docs/vectorstores/pgvector.md`\n- **Basic ingestion**（documentation）：Creating a basic ingestion workflow like the one above is simple, here are the steps: 证据：`docs/docs/workflows/examples/basic_ingestion.md`\n- **Basic RAG**（documentation）：Creating a basic RAG workflow like the one above is simple, here are the steps: 证据：`docs/docs/workflows/examples/basic_rag.md`\n- **RAG with web search**（documentation）：! rag with web search.excalidraw.png 证据：`docs/docs/workflows/examples/rag_with_web_search.md`\n- **Workflows**（documentation）：In this section, you will find examples of workflows that you can use to create your own agentic RAG systems. 证据：`docs/docs/workflows/index.md`\n- **Description**（documentation）：Please include a summary of the changes and the related issue. Please also include relevant motivation and context. 证据：`.github/PULL_REQUEST_TEMPLATE.md`\n- **Changelog**（documentation）：What's Changed feat: Add new documentation files by @StanGirard in https://github.com/QuivrHQ/quivr/pull/3351 fix: separate english and french ingredients by @chloedia in https://github.com/QuivrHQ/quivr/pull/3358 docs core : init by @StanGirard in https://github.com/QuivrHQ/quivr/pull/3365 docs: quivr core storage by @AmineDiro in https://github.com/QuivrHQ/quivr/pull/3366 fix: fixing pdf parsing by @jacopo-chevallard in https://github.com/QuivrHQ/quivr/pull/3349 feat: Improve user credit calculation in get user credits by @StanGirard in https://github.com/QuivrHQ/quivr/pull/3367 fix unwanted parsing effect by @chloedia in https://github.com/QuivrHQ/quivr/pull/3371 add fallback on llamapar… 证据：`CHANGELOG.md`\n- **Changelog**（documentation）：0.0.33 https://github.com/QuivrHQ/quivr/compare/core-0.0.32...core-0.0.33 2025-02-03 证据：`core/CHANGELOG.md`\n- **Quivr Chatbot Example**（documentation）：This example demonstrates how to create a simple chatbot using Quivr and Chainlit. The chatbot allows users to upload a text file and then ask questions about its content. 证据：`examples/chatbot/chainlit.md`\n- **Quivr Chatbot Example**（documentation）：This example demonstrates how to create a simple chatbot using Quivr and Chainlit. The chatbot allows users to upload a text file and then ask questions about its content. 证据：`examples/chatbot_voice/chainlit.md`\n- **.Release Please Manifest**（structured_config）：{ \"core\": \"0.0.33\" } 证据：`.release-please-manifest.json`\n- **Release Please Config**（structured_config）：{ \"packages\": { \"core\": { \"release-type\": \"python\", \"package-name\": \"core\", \"bump-patch-for-minor-pre-major\": true, \"include-v-in-tag\": false, \"tag-separator\": \"-\", \"component\": \"core\" } } } 证据：`release-please-config.json`\n- **Vercel**（structured_config）：{ \"git\": { \"deploymentEnabled\": { \"main\": false } } } 证据：`vercel.json`\n- **Bn**（structured_config）：{ \"components\": { \"atoms\": { \"buttons\": { \"userButton\": { \"menu\": { \"settings\": \"\\u09b8\\u09c7\\u099f\\u09bf\\u0982\\u09b8\", \"settingsKey\": \"S\", \"APIKeys\": \"\\u098f\\u09aa\\u09bf\\u0986\\u0987 \\u0995\\u09c0\", \"logout\": \"\\u09b2\\u0997\\u0986\\u0989\\u099f\" } } } }, \"molecules\": { \"newChatButton\": { \"newChat\": \"\\u09a8\\u09a4\\u09c1\\u09a8 \\u0986\\u09a1\\u09cd\\u09a1\\u09be\" }, \"tasklist\": { \"TaskList\": { \"title\": \"\\ud83d\\uddd2\\ufe0f \\u0995\\u09be\\u09b0\\u09cd\\u09af \\u09a4\\u09be\\u09b2\\u09bf\\u0995\\u09be\", \"loading\": \"\\u09b2\\u09cb\\u09a1\\u0964\\u0964\\u0964\", \"error\": \"\\u098f\\u0995\\u099f\\u09bf \\u09a4\\u09cd\\u09b0\\u09c1\\u099f\\u09bf \\u09b8\\u0982\\u0998\\u099f\\u09bf\\u09a4 \\u09b9\\u09af\\u09bc\\u09c7\\u099b\\u09c7\" } }, \"attachments\"… 证据：`examples/chatbot/.chainlit/translations/bn.json`\n- **En Us**（structured_config）：{ \"components\": { \"atoms\": { \"buttons\": { \"userButton\": { \"menu\": { \"settings\": \"Settings\", \"settingsKey\": \"S\", \"APIKeys\": \"API Keys\", \"logout\": \"Logout\" } } } }, \"molecules\": { \"newChatButton\": { \"newChat\": \"New Chat\" }, \"tasklist\": { \"TaskList\": { \"title\": \"\\ud83d\\uddd2\\ufe0f Task List\", \"loading\": \"Loading...\", \"error\": \"An error occurred\" } }, \"attachments\": { \"cancelUpload\": \"Cancel upload\", \"removeAttachment\": \"Remove attachment\" }, \"newChatDialog\": { \"createNewChat\": \"Create new chat?\", \"clearChat\": \"This will clear the current messages and start a new chat.\", \"cancel\": \"Cancel\", \"confirm\": \"Confirm\" }, \"settingsModal\": { \"settings\": \"Settings\", \"expandMessages\": \"Expand Messages\", \"… 证据：`examples/chatbot/.chainlit/translations/en-US.json`\n- **Gu**（structured_config）：{ \"components\": { \"atoms\": { \"buttons\": { \"userButton\": { \"menu\": { \"settings\": \"\\u0ab8\\u0ac1\\u0aaf\\u0acb\\u0a9c\\u0aa8\\u0acb\", \"settingsKey\": \"S\", \"APIKeys\": \"API \\u0a95\\u0ac0\\u0a93\", \"logout\": \"\\u0aac\\u0ab9\\u0abe\\u0ab0 \\u0aa8\\u0ac0\\u0a95\\u0ab3\\u0acb\" } } } }, \"molecules\": { \"newChatButton\": { \"newChat\": \"\\u0aa8\\u0ab5\\u0acb \\u0ab8\\u0a82\\u0ab5\\u0abe\\u0aa6\" }, \"tasklist\": { \"TaskList\": { \"title\": \"\\ud83d\\uddd2\\ufe0f \\u0a95\\u0abe\\u0ab0\\u0acd\\u0aaf \\u0aaf\\u0abe\\u0aa6\\u0ac0\", \"loading\": \"\\u0ab2\\u0acb\\u0aa1 \\u0a95\\u0ab0\\u0ac0 \\u0ab0\\u0ab9\\u0acd\\u0aaf\\u0abe \\u0a9b\\u0ac7...\", \"error\": \"\\u0aad\\u0ac2\\u0ab2 \\u0a89\\u0aa6\\u0acd\\u0aad\\u0ab5\\u0ac0\" } }, \"attachments\": { \"cancelUpload\": \"\\u0a85\\u0aaa\\u0ab2\\… 证据：`examples/chatbot/.chainlit/translations/gu.json`\n- **He Il**（structured_config）：{ \"components\": { \"atoms\": { \"buttons\": { \"userButton\": { \"menu\": { \"settings\": \"\\u05d4\\u05d2\\u05d3\\u05e8\\u05d5\\u05ea\", \"settingsKey\": \"S\", \"APIKeys\": \"API Keys\", \"logout\": \"\\u05d4\\u05ea\\u05e0\\u05ea\\u05e7\" } } } }, \"molecules\": { \"newChatButton\": { \"newChat\": \"\\u05e6'\\u05d0\\u05d8 \\u05d7\\u05d3\\u05e9\" }, \"tasklist\": { \"TaskList\": { \"title\": \"\\ud83d\\uddd2\\ufe0f Task List\", \"loading\": \"\\u05d8\\u05d5\\u05e2\\u05df...\", \"error\": \"\\u05e9\\u05d2\\u05d9\\u05d0\\u05d4\" } }, \"attachments\": { \"cancelUpload\": \"\\u05d1\\u05d8\\u05dc \\u05d4\\u05e2\\u05dc\\u05d0\\u05d4\", \"removeAttachment\": \"\\u05d4\\u05e1\\u05e8 \\u05e7\\u05d5\\u05d1\\u05e5 \\u05de\\u05e6\\u05d5\\u05e8\\u05e3\" }, \"newChatDialog\": { \"createNewChat\": \"\\u05e6\\u05d5\\u… 证据：`examples/chatbot/.chainlit/translations/he-IL.json`\n- **Hi**（structured_config）：{ \"components\": { \"atoms\": { \"buttons\": { \"userButton\": { \"menu\": { \"settings\": \"\\u0938\\u0947\\u091f\\u093f\\u0902\\u0917\\u094d\\u0938\", \"settingsKey\": \"\\u0926\\u0915\\u094d\\u0937\\u093f\\u0923\\u0940\", \"APIKeys\": \"\\u090f\\u092a\\u0940\\u0906\\u0908 \\u0915\\u0941\\u0902\\u091c\\u0940\", \"logout\": \"\\u0932\\u0949\\u0917\\u0906\\u0909\\u091f\" } } } }, \"molecules\": { \"newChatButton\": { \"newChat\": \"\\u0928\\u0908 \\u091a\\u0948\\u091f\" }, \"tasklist\": { \"TaskList\": { \"title\": \"\\ud83d\\uddd2\\ufe0f \\u0915\\u093e\\u0930\\u094d\\u092f \\u0938\\u0942\\u091a\\u0940\", \"loading\": \"\\u0932\\u094b\\u0921\\u0964\\u0964\\u0964\", \"error\": \"\\u0915\\u094b\\u0908 \\u0924\\u094d\\u0930\\u0941\\u091f\\u093f \\u0909\\u0924\\u094d\\u092a\\u0928\\u094d\\u0928 \\u0939\\u0941\\u0… 证据：`examples/chatbot/.chainlit/translations/hi.json`\n- **Kn**（structured_config）：{ \"components\": { \"atoms\": { \"buttons\": { \"userButton\": { \"menu\": { \"settings\": \"\\u0cb8\\u0cc6\\u0c9f\\u0ccd\\u0c9f\\u0cbf\\u0c82\\u0c97\\u0ccd \\u0c97\\u0cb3\\u0cc1\", \"settingsKey\": \"S\", \"APIKeys\": \"API \\u0c95\\u0cc0\\u0cb2\\u0cbf\\u0c97\\u0cb3\\u0cc1\", \"logout\": \"\\u0cb2\\u0cbe\\u0c97\\u0ccd \\u0c94\\u0c9f\\u0ccd\" } } } }, \"molecules\": { \"newChatButton\": { \"newChat\": \"\\u0cb9\\u0cca\\u0cb8 \\u0c9a\\u0cbe\\u0c9f\\u0ccd\" }, \"tasklist\": { \"TaskList\": { \"title\": \"\\ud83d\\uddd2\\ufe0f \\u0c95\\u0cbe\\u0cb0\\u0ccd\\u0caf \\u0caa\\u0c9f\\u0ccd\\u0c9f\\u0cbf\", \"loading\": \"\\u0cb2\\u0ccb\\u0ca1\\u0ccd \\u0c86\\u0c97\\u0cc1\\u0ca4\\u0ccd\\u0ca4\\u0cbf\\u0ca6\\u0cc6...\", \"error\": \"\\u0ca6\\u0ccb\\u0cb7 \\u0cb8\\u0c82\\u0cad\\u0cb5\\u0cbf\\u0cb8\\u0cbf\\u0ca6\\u0cc6\"… 证据：`examples/chatbot/.chainlit/translations/kn.json`\n- **Ml**（structured_config）：{ \"components\": { \"atoms\": { \"buttons\": { \"userButton\": { \"menu\": { \"settings\": \"\\u0d15\\u0d4d\\u0d30\\u0d2e\\u0d40\\u0d15\\u0d30\\u0d23\\u0d19\\u0d4d\\u0d19\\u0d7e\", \"settingsKey\": \"S\", \"APIKeys\": \"API \\u0d15\\u0d40\\u0d15\\u0d7e\", \"logout\": \"\\u0d32\\u0d4b\\u0d17\\u0d4b\\u0d1f\\u0d4d\\u0d1f\\u0d4d\" } } } }, \"molecules\": { \"newChatButton\": { \"newChat\": \"\\u0d2a\\u0d41\\u0d24\\u0d3f\\u0d2f \\u0d1a\\u0d3e\\u0d31\\u0d4d\\u0d31\\u0d4d\" }, \"tasklist\": { \"TaskList\": { \"title\": \"\\ud83d\\uddd2\\ufe0f \\u0d1f\\u0d3e\\u0d38\\u0d4d\\u0d15\\u0d4d \\u0d32\\u0d3f\\u0d38\\u0d4d\\u0d31\\u0d4d\\u0d31\\u0d4d\", \"loading\": \"\\u0d32\\u0d4b\\u0d21\\u0d3f\\u0d02\\u0d17\\u0d4d...\", \"error\": \"\\u0d12\\u0d30\\u0d41 \\u0d2a\\u0d3f\\u0d36\\u0d15\\u0d4d \\u0d38\\u0d02\\u0d2d\\u0d35\\u0… 证据：`examples/chatbot/.chainlit/translations/ml.json`\n- **Mr**（structured_config）：{ \"components\": { \"atoms\": { \"buttons\": { \"userButton\": { \"menu\": { \"settings\": \"\\u0938\\u0947\\u091f\\u093f\\u0902\\u0917\\u094d\\u0938\", \"settingsKey\": \"S\", \"APIKeys\": \"\\u090f\\u092a\\u0940\\u0906\\u092f \\u0915\\u0940\\u091c\", \"logout\": \"Logout\" } } } }, \"molecules\": { \"newChatButton\": { \"newChat\": \"\\u0928\\u0935\\u0940\\u0928 \\u0917\\u092a\\u094d\\u092a\\u093e\" }, \"tasklist\": { \"TaskList\": { \"title\": \"\\ud83d\\uddd2\\ufe0f \\u0915\\u093e\\u0930\\u094d\\u092f \\u0938\\u0942\\u091a\\u0940\", \"loading\": \"\\u0932\\u094b\\u0921\\u093f\\u0902\\u0917...\", \"error\": \"\\u090f\\u0915 \\u0924\\u094d\\u0930\\u0941\\u091f\\u0940 \\u091d\\u093e\\u0932\\u0940\" } }, \"attachments\": { \"cancelUpload\": \"\\u0905\\u092a\\u0932\\u094b\\u0921 \\u0930\\u0926\\u094d\\u0926… 证据：`examples/chatbot/.chainlit/translations/mr.json`\n- **Ta**（structured_config）：{ \"components\": { \"atoms\": { \"buttons\": { \"userButton\": { \"menu\": { \"settings\": \"\\u0b85\\u0bae\\u0bc8\\u0baa\\u0bcd\\u0baa\\u0bc1\\u0b95\\u0bb3\\u0bcd\", \"settingsKey\": \"S\", \"APIKeys\": \"API \\u0bb5\\u0bbf\\u0b9a\\u0bc8\\u0b95\\u0bb3\\u0bcd\", \"logout\": \"\\u0bb5\\u0bc6\\u0bb3\\u0bbf\\u0baf\\u0bc7\\u0bb1\\u0bc1\" } } } }, \"molecules\": { \"newChatButton\": { \"newChat\": \"\\u0baa\\u0bc1\\u0ba4\\u0bbf\\u0baf \\u0b85\\u0bb0\\u0b9f\\u0bcd\\u0b9f\\u0bc8\" }, \"tasklist\": { \"TaskList\": { \"title\": \"\\ud83d\\uddd2\\ufe0f \\u0baa\\u0ba3\\u0bbf \\u0baa\\u0b9f\\u0bcd\\u0b9f\\u0bbf\\u0baf\\u0bb2\\u0bcd\", \"loading\": \"\\u0b8f\\u0bb1\\u0bcd\\u0bb1\\u0bc1\\u0b95\\u0bbf\\u0bb1\\u0ba4\\u0bc1...\", \"error\": \"\\u0b92\\u0bb0\\u0bc1 \\u0baa\\u0bbf\\u0bb4\\u0bc8 \\u0b8f\\u0bb1\\u0bcd\\u0baa\\u0… 证据：`examples/chatbot/.chainlit/translations/ta.json`\n- **Te**（structured_config）：{ \"components\": { \"atoms\": { \"buttons\": { \"userButton\": { \"menu\": { \"settings\": \"\\u0c38\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f\\u0c02\\u0c17\\u0c4d \\u0c32\\u0c41\", \"settingsKey\": \"S\", \"APIKeys\": \"API Keys\", \"logout\": \"Logout\" } } } }, \"molecules\": { \"newChatButton\": { \"newChat\": \"\\u0c15\\u0c4a\\u0c24\\u0c4d\\u0c24 \\u0c1a\\u0c3e\\u0c1f\\u0c4d\" }, \"tasklist\": { \"TaskList\": { \"title\": \"\\ud83d\\uddd2\\ufe0f \\u0c1f\\u0c3e\\u0c38\\u0c4d\\u0c15\\u0c4d \\u0c32\\u0c3f\\u0c38\\u0c4d\\u0c1f\\u0c4d\", \"loading\": \"\\u0c32\\u0c4b\\u0c21\\u0c3f\\u0c02\\u0c17\\u0c4d...\", \"error\": \"\\u0c12\\u0c15 \\u0c26\\u0c4b\\u0c37\\u0c02 \\u0c38\\u0c02\\u0c2d\\u0c35\\u0c3f\\u0c02\\u0c1a\\u0c3f\\u0c02\\u0c26\\u0c3f\" } }, \"attachments\": { \"cancelUpload\": \"\\u0c05\\u0c2a\\u0c4d \\u0c… 证据：`examples/chatbot/.chainlit/translations/te.json`\n- **Zh Cn**（structured_config）：{ \"components\": { \"atoms\": { \"buttons\": { \"userButton\": { \"menu\": { \"settings\": \"\\u8bbe\\u7f6e\", \"settingsKey\": \"S\", \"APIKeys\": \"API \\u5bc6\\u94a5\", \"logout\": \"\\u767b\\u51fa\" } } } }, \"molecules\": { \"newChatButton\": { \"newChat\": \"\\u65b0\\u5efa\\u5bf9\\u8bdd\" }, \"tasklist\": { \"TaskList\": { \"title\": \"\\ud83d\\uddd2\\ufe0f \\u4efb\\u52a1\\u5217\\u8868\", \"loading\": \"\\u52a0\\u8f7d\\u4e2d...\", \"error\": \"\\u53d1\\u751f\\u9519\\u8bef\" } }, \"attachments\": { \"cancelUpload\": \"\\u53d6\\u6d88\\u4e0a\\u4f20\", \"removeAttachment\": \"\\u79fb\\u9664\\u9644\\u4ef6\" }, \"newChatDialog\": { \"createNewChat\": \"\\u521b\\u5efa\\u65b0\\u5bf9\\u8bdd\\uff1f\", \"clearChat\": \"\\u8fd9\\u5c06\\u6e05\\u9664\\u5f53\\u524d\\u6d88\\u606f\\u5e76\\u5f00\\u59cb\\u65b0\\u7684\\u5… 证据：`examples/chatbot/.chainlit/translations/zh-CN.json`\n- **Bn**（structured_config）：{ \"components\": { \"atoms\": { \"buttons\": { \"userButton\": { \"menu\": { \"settings\": \"\\u09b8\\u09c7\\u099f\\u09bf\\u0982\\u09b8\", \"settingsKey\": \"S\", \"APIKeys\": \"\\u098f\\u09aa\\u09bf\\u0986\\u0987 \\u0995\\u09c0\", \"logout\": \"\\u09b2\\u0997\\u0986\\u0989\\u099f\" } } } }, \"molecules\": { \"newChatButton\": { \"newChat\": \"\\u09a8\\u09a4\\u09c1\\u09a8 \\u0986\\u09a1\\u09cd\\u09a1\\u09be\" }, \"tasklist\": { \"TaskList\": { \"title\": \"\\ud83d\\uddd2\\ufe0f \\u0995\\u09be\\u09b0\\u09cd\\u09af \\u09a4\\u09be\\u09b2\\u09bf\\u0995\\u09be\", \"loading\": \"\\u09b2\\u09cb\\u09a1\\u0964\\u0964\\u0964\", \"error\": \"\\u098f\\u0995\\u099f\\u09bf \\u09a4\\u09cd\\u09b0\\u09c1\\u099f\\u09bf \\u09b8\\u0982\\u0998\\u099f\\u09bf\\u09a4 \\u09b9\\u09af\\u09bc\\u09c7\\u099b\\u09c7\" } }, \"attachments\"… 证据：`examples/chatbot_voice/.chainlit/translations/bn.json`\n- **En Us**（structured_config）：{ \"components\": { \"atoms\": { \"buttons\": { \"userButton\": { \"menu\": { \"settings\": \"Settings\", \"settingsKey\": \"S\", \"APIKeys\": \"API Keys\", \"logout\": \"Logout\" } } } }, \"molecules\": { \"newChatButton\": { \"newChat\": \"New Chat\" }, \"tasklist\": { \"TaskList\": { \"title\": \"\\ud83d\\uddd2\\ufe0f Task List\", \"loading\": \"Loading...\", \"error\": \"An error occurred\" } }, \"attachments\": { \"cancelUpload\": \"Cancel upload\", \"removeAttachment\": \"Remove attachment\" }, \"newChatDialog\": { \"createNewChat\": \"Create new chat?\", \"clearChat\": \"This will clear the current messages and start a new chat.\", \"cancel\": \"Cancel\", \"confirm\": \"Confirm\" }, \"settingsModal\": { \"settings\": \"Settings\", \"expandMessages\": \"Expand Messages\", \"… 证据：`examples/chatbot_voice/.chainlit/translations/en-US.json`\n- **Gu**（structured_config）：{ \"components\": { \"atoms\": { \"buttons\": { \"userButton\": { \"menu\": { \"settings\": \"\\u0ab8\\u0ac1\\u0aaf\\u0acb\\u0a9c\\u0aa8\\u0acb\", \"settingsKey\": \"S\", \"APIKeys\": \"API \\u0a95\\u0ac0\\u0a93\", \"logout\": \"\\u0aac\\u0ab9\\u0abe\\u0ab0 \\u0aa8\\u0ac0\\u0a95\\u0ab3\\u0acb\" } } } }, \"molecules\": { \"newChatButton\": { \"newChat\": \"\\u0aa8\\u0ab5\\u0acb \\u0ab8\\u0a82\\u0ab5\\u0abe\\u0aa6\" }, \"tasklist\": { \"TaskList\": { \"title\": \"\\ud83d\\uddd2\\ufe0f \\u0a95\\u0abe\\u0ab0\\u0acd\\u0aaf \\u0aaf\\u0abe\\u0aa6\\u0ac0\", \"loading\": \"\\u0ab2\\u0acb\\u0aa1 \\u0a95\\u0ab0\\u0ac0 \\u0ab0\\u0ab9\\u0acd\\u0aaf\\u0abe \\u0a9b\\u0ac7...\", \"error\": \"\\u0aad\\u0ac2\\u0ab2 \\u0a89\\u0aa6\\u0acd\\u0aad\\u0ab5\\u0ac0\" } }, \"attachments\": { \"cancelUpload\": \"\\u0a85\\u0aaa\\u0ab2\\… 证据：`examples/chatbot_voice/.chainlit/translations/gu.json`\n- **He Il**（structured_config）：{ \"components\": { \"atoms\": { \"buttons\": { \"userButton\": { \"menu\": { \"settings\": \"\\u05d4\\u05d2\\u05d3\\u05e8\\u05d5\\u05ea\", \"settingsKey\": \"S\", \"APIKeys\": \"API Keys\", \"logout\": \"\\u05d4\\u05ea\\u05e0\\u05ea\\u05e7\" } } } }, \"molecules\": { \"newChatButton\": { \"newChat\": \"\\u05e6'\\u05d0\\u05d8 \\u05d7\\u05d3\\u05e9\" }, \"tasklist\": { \"TaskList\": { \"title\": \"\\ud83d\\uddd2\\ufe0f Task List\", \"loading\": \"\\u05d8\\u05d5\\u05e2\\u05df...\", \"error\": \"\\u05e9\\u05d2\\u05d9\\u05d0\\u05d4\" } }, \"attachments\": { \"cancelUpload\": \"\\u05d1\\u05d8\\u05dc \\u05d4\\u05e2\\u05dc\\u05d0\\u05d4\", \"removeAttachment\": \"\\u05d4\\u05e1\\u05e8 \\u05e7\\u05d5\\u05d1\\u05e5 \\u05de\\u05e6\\u05d5\\u05e8\\u05e3\" }, \"newChatDialog\": { \"createNewChat\": \"\\u05e6\\u05d5\\u… 证据：`examples/chatbot_voice/.chainlit/translations/he-IL.json`\n- **Hi**（structured_config）：{ \"components\": { \"atoms\": { \"buttons\": { \"userButton\": { \"menu\": { \"settings\": \"\\u0938\\u0947\\u091f\\u093f\\u0902\\u0917\\u094d\\u0938\", \"settingsKey\": \"\\u0926\\u0915\\u094d\\u0937\\u093f\\u0923\\u0940\", \"APIKeys\": \"\\u090f\\u092a\\u0940\\u0906\\u0908 \\u0915\\u0941\\u0902\\u091c\\u0940\", \"logout\": \"\\u0932\\u0949\\u0917\\u0906\\u0909\\u091f\" } } } }, \"molecules\": { \"newChatButton\": { \"newChat\": \"\\u0928\\u0908 \\u091a\\u0948\\u091f\" }, \"tasklist\": { \"TaskList\": { \"title\": \"\\ud83d\\uddd2\\ufe0f \\u0915\\u093e\\u0930\\u094d\\u092f \\u0938\\u0942\\u091a\\u0940\", \"loading\": \"\\u0932\\u094b\\u0921\\u0964\\u0964\\u0964\", \"error\": \"\\u0915\\u094b\\u0908 \\u0924\\u094d\\u0930\\u0941\\u091f\\u093f \\u0909\\u0924\\u094d\\u092a\\u0928\\u094d\\u0928 \\u0939\\u0941\\u0… 证据：`examples/chatbot_voice/.chainlit/translations/hi.json`\n- **Kn**（structured_config）：{ \"components\": { \"atoms\": { \"buttons\": { \"userButton\": { \"menu\": { \"settings\": \"\\u0cb8\\u0cc6\\u0c9f\\u0ccd\\u0c9f\\u0cbf\\u0c82\\u0c97\\u0ccd \\u0c97\\u0cb3\\u0cc1\", \"settingsKey\": \"S\", \"APIKeys\": \"API \\u0c95\\u0cc0\\u0cb2\\u0cbf\\u0c97\\u0cb3\\u0cc1\", \"logout\": \"\\u0cb2\\u0cbe\\u0c97\\u0ccd \\u0c94\\u0c9f\\u0ccd\" } } } }, \"molecules\": { \"newChatButton\": { \"newChat\": \"\\u0cb9\\u0cca\\u0cb8 \\u0c9a\\u0cbe\\u0c9f\\u0ccd\" }, \"tasklist\": { \"TaskList\": { \"title\": \"\\ud83d\\uddd2\\ufe0f \\u0c95\\u0cbe\\u0cb0\\u0ccd\\u0caf \\u0caa\\u0c9f\\u0ccd\\u0c9f\\u0cbf\", \"loading\": \"\\u0cb2\\u0ccb\\u0ca1\\u0ccd \\u0c86\\u0c97\\u0cc1\\u0ca4\\u0ccd\\u0ca4\\u0cbf\\u0ca6\\u0cc6...\", \"error\": \"\\u0ca6\\u0ccb\\u0cb7 \\u0cb8\\u0c82\\u0cad\\u0cb5\\u0cbf\\u0cb8\\u0cbf\\u0ca6\\u0cc6\"… 证据：`examples/chatbot_voice/.chainlit/translations/kn.json`\n- **Ml**（structured_config）：{ \"components\": { \"atoms\": { \"buttons\": { \"userButton\": { \"menu\": { \"settings\": \"\\u0d15\\u0d4d\\u0d30\\u0d2e\\u0d40\\u0d15\\u0d30\\u0d23\\u0d19\\u0d4d\\u0d19\\u0d7e\", \"settingsKey\": \"S\", \"APIKeys\": \"API \\u0d15\\u0d40\\u0d15\\u0d7e\", \"logout\": \"\\u0d32\\u0d4b\\u0d17\\u0d4b\\u0d1f\\u0d4d\\u0d1f\\u0d4d\" } } } }, \"molecules\": { \"newChatButton\": { \"newChat\": \"\\u0d2a\\u0d41\\u0d24\\u0d3f\\u0d2f \\u0d1a\\u0d3e\\u0d31\\u0d4d\\u0d31\\u0d4d\" }, \"tasklist\": { \"TaskList\": { \"title\": \"\\ud83d\\uddd2\\ufe0f \\u0d1f\\u0d3e\\u0d38\\u0d4d\\u0d15\\u0d4d \\u0d32\\u0d3f\\u0d38\\u0d4d\\u0d31\\u0d4d\\u0d31\\u0d4d\", \"loading\": \"\\u0d32\\u0d4b\\u0d21\\u0d3f\\u0d02\\u0d17\\u0d4d...\", \"error\": \"\\u0d12\\u0d30\\u0d41 \\u0d2a\\u0d3f\\u0d36\\u0d15\\u0d4d \\u0d38\\u0d02\\u0d2d\\u0d35\\u0… 证据：`examples/chatbot_voice/.chainlit/translations/ml.json`\n- 其余 20 条证据见 `AI_CONTEXT_PACK.json` 或 `EVIDENCE_INDEX.json`。\n\n## 宿主 AI 必须遵守的规则\n\n- **把本资产当作开工前上下文，而不是运行环境。**：AI Context Pack 只包含证据化项目理解，不包含目标项目的可执行状态。 证据：`docs/README.md`, `README.md`, `core/README.md`\n- **回答用户时区分可预览内容与必须安装后才能验证的内容。**：安装前体验的消费者价值来自降低误装和误判，而不是伪装成真实运行。 证据：`docs/README.md`, `README.md`, `core/README.md`\n\n## 用户开工前应该回答的问题\n\n- 你准备在哪个宿主 AI 或本地环境中使用它？\n- 你只是想先体验工作流，还是准备真实安装？\n- 你最在意的是安装成本、输出质量、还是和现有规则的冲突？\n\n## 验收标准\n\n- 所有能力声明都能回指到 evidence_refs 中的文件路径。\n- AI_CONTEXT_PACK.md 没有把预览包装成真实运行。\n- 用户能在 3 分钟内看懂适合谁、能做什么、如何开始和风险边界。\n\n---\n\n## Doramagic Context Augmentation\n\nThe following material strengthens the Repomix/AI Context Pack body. Human Manual is only a reading skeleton; pitfall logs become hard operating constraints for the host AI.\n\n## Human Manual Skeleton\n\nUsage rule: this is only a reading path and salience signal, not factual authority. Concrete facts must still come from repo evidence / Claim Graph.\n\nHard rules for the host AI:\n- Do not treat page titles, order, summaries, or importance as project facts.\n- When explaining the Human Manual skeleton, state that it is only a reading path / salience signal.\n- Capability, installation, compatibility, runtime status, and risk judgments must cite repo evidence, source paths, or Claim Graph.\n\n- **Introduction to Quivr**：importance `high`\n  - source_paths: README.md, core/quivr_core/__init__.py, core/README.md\n- **Getting Started**：importance `high`\n  - source_paths: README.md, examples/simple_question/simple_question.py, examples/simple_question/simple_question_streaming.py\n- **Installation**：importance `high`\n  - source_paths: core/pyproject.toml, examples/chatbot/pyproject.toml\n- **System Architecture Overview**：importance `high`\n  - source_paths: core/quivr_core/brain/brain.py, core/quivr_core/rag/quivr_rag.py, core/quivr_core/rag/quivr_rag_langgraph.py\n- **Core Components**：importance `high`\n  - source_paths: core/quivr_core/brain/brain.py, core/quivr_core/brain/info.py, core/quivr_core/brain/serialization.py, core/quivr_core/rag/__init__.py, core/quivr_core/llm/llm_endpoint.py\n- **Brain Class**：importance `high`\n  - source_paths: core/quivr_core/brain/brain.py, core/quivr_core/brain/brain_defaults.py, core/quivr_core/brain/__init__.py, examples/save_load_brain.py\n- **RAG Implementation**：importance `high`\n  - source_paths: core/quivr_core/rag/quivr_rag.py, core/quivr_core/rag/quivr_rag_langgraph.py, core/quivr_core/rag/entities/config.py, core/quivr_core/rag/entities/chat.py, core/quivr_core/rag/prompts.py\n- **LLM Integration**：importance `high`\n  - source_paths: core/quivr_core/llm/llm_endpoint.py, core/quivr_core/language/models.py, core/quivr_core/config.py, core/quivr_core/base_config.py\n\n## Repo Inspection Evidence\n\n- repo_clone_verified: true\n- repo_inspection_verified: true\n- repo_commit: `947a785415c6c35ab2ae8157222b4720b0710b4d`\n- inspected_files: `README.md`, `docs/pyproject.toml`, `docs/mkdocs.yml`, `docs/README.md`, `docs/docs/index.md`, `docs/docs/quickstart.md`, `docs/src/docs/__init__.py`, `docs/docs/vectorstores/index.md`, `docs/docs/vectorstores/pgvector.md`, `docs/docs/vectorstores/faiss.md`, `docs/docs/brain/index.md`, `docs/docs/brain/brain.md`, `docs/docs/brain/chat.md`, `docs/docs/parsers/index.md`, `docs/docs/parsers/megaparse.md`, `docs/docs/parsers/simple.md`, `docs/docs/config/index.md`, `docs/docs/config/base_config.md`, `docs/docs/config/config.md`, `docs/docs/storage/index.md`\n\nHard rules for the host AI:\n- Without repo_clone_verified=true, do not claim the source code has been read.\n- Without repo_inspection_verified=true, do not turn README/docs/package observations into facts.\n- Without quick_start_verified=true, do not claim the Quick Start has been successfully run.\n\n## Doramagic Pitfall Constraints\n\nThese rules come from Doramagic discovery, validation, or compilation pitfalls. The host AI must treat them as operating constraints, not general background notes.\n\n### Constraint 1: 来源证据：EU AI Act Compliance Scan Results — Sharing Findings for Feedback\n\n- Trigger: GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：EU AI Act Compliance Scan Results — Sharing Findings for Feedback\n- Host AI rule: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Why it matters: 可能影响授权、密钥配置或安全边界。\n- Evidence: community_evidence:github | cevd_46118108af95480ba852109be6e2c66a | https://github.com/QuivrHQ/quivr/issues/3667 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n- Hard boundary: do not present this pitfall as solved, verified, or safe to ignore unless later validation evidence explicitly closes it.\n\n### Constraint 2: 来源证据：[Bug]:\n\n- Trigger: GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：[Bug]:\n- Host AI rule: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Why it matters: 可能增加新用户试用和生产接入成本。\n- Evidence: community_evidence:github | cevd_ff260ca3ab5247679b2ded201ec21e24 | https://github.com/QuivrHQ/quivr/issues/2004 | 来源讨论提到 docker 相关条件，需在安装/试用前复核。\n- Hard boundary: do not present this pitfall as solved, verified, or safe to ignore unless later validation evidence explicitly closes it.\n\n### Constraint 3: 来源证据：Integration idea: Screenpipe for screen/audio context\n\n- Trigger: GitHub 社区证据显示该项目存在一个安装相关的待验证问题：Integration idea: Screenpipe for screen/audio context\n- Host AI rule: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Why it matters: 可能增加新用户试用和生产接入成本。\n- Evidence: community_evidence:github | cevd_3a7f345691d04bbea72b03858746645e | https://github.com/QuivrHQ/quivr/issues/3658 | 来源类型 github_issue 暴露的待验证使用条件。\n- Hard boundary: do not present this pitfall as solved, verified, or safe to ignore unless later validation evidence explicitly closes it.\n\n### Constraint 4: 来源证据：[Bug]: RuntimeError: There is no current event loop in thread 'MainThread' when using Brain.from_files() in script\n\n- Trigger: GitHub 社区证据显示该项目存在一个安装相关的待验证问题：[Bug]: RuntimeError: There is no current event loop in thread 'MainThread' when using Brain.from_files() in script\n- Host AI rule: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Why it matters: 可能阻塞安装或首次运行。\n- Evidence: community_evidence:github | cevd_e3091b23b6184dffb982e0cc494134fd | https://github.com/QuivrHQ/quivr/issues/3650 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n- Hard boundary: do not present this pitfall as solved, verified, or safe to ignore unless later validation evidence explicitly closes it.\n\n### Constraint 5: 能力判断依赖假设\n\n- Trigger: README/documentation is current enough for a first validation pass.\n- Host AI rule: 将假设转成下游验证清单。\n- Why it matters: 假设不成立时，用户拿不到承诺的能力。\n- Evidence: capability.assumptions | github_repo:640079149 | https://github.com/QuivrHQ/quivr | README/documentation is current enough for a first validation pass.\n- Hard boundary: do not present this pitfall as solved, verified, or safe to ignore unless later validation evidence explicitly closes it.\n\n### Constraint 6: 来源证据：The garbage collector is trying to clean up non-checked-in connection <AdaptedConnection <asyncpg.connection.Connection…\n\n- Trigger: GitHub 社区证据显示该项目存在一个运行相关的待验证问题：The garbage collector is trying to clean up non-checked-in connection <AdaptedConnection <asyncpg.connection.Connection object at 0x7f66a5b06110>>, which will…\n- Host AI rule: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Why it matters: 可能增加新用户试用和生产接入成本。\n- Evidence: community_evidence:github | cevd_e6fdde799a2b4f53806121190979025a | https://github.com/QuivrHQ/quivr/issues/3654 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n- Hard boundary: do not present this pitfall as solved, verified, or safe to ignore unless later validation evidence explicitly closes it.\n\n### Constraint 7: 来源证据：core: v0.0.25\n\n- Trigger: GitHub 社区证据显示该项目存在一个运行相关的待验证问题：core: v0.0.25\n- Host AI rule: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Why it matters: 可能增加新用户试用和生产接入成本。\n- Evidence: community_evidence:github | cevd_7c6096fb5a6442c7b53068a8dd8e2c78 | https://github.com/QuivrHQ/quivr/releases/tag/core-0.0.25 | 来源类型 github_release 暴露的待验证使用条件。\n- Hard boundary: do not present this pitfall as solved, verified, or safe to ignore unless later validation evidence explicitly closes it.\n\n### Constraint 8: 来源证据：core: v0.0.29\n\n- Trigger: GitHub 社区证据显示该项目存在一个运行相关的待验证问题：core: v0.0.29\n- Host AI rule: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Why it matters: 可能增加新用户试用和生产接入成本。\n- Evidence: community_evidence:github | cevd_7eeeeb626a10476e820fcd651727a55a | https://github.com/QuivrHQ/quivr/releases/tag/core-0.0.29 | 来源讨论提到 node 相关条件，需在安装/试用前复核。\n- Hard boundary: do not present this pitfall as solved, verified, or safe to ignore unless later validation evidence explicitly closes it.\n\n### Constraint 9: 来源证据：core: v0.0.33\n\n- Trigger: GitHub 社区证据显示该项目存在一个运行相关的待验证问题：core: v0.0.33\n- Host AI rule: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Why it matters: 可能增加新用户试用和生产接入成本。\n- Evidence: community_evidence:github | cevd_d7a735cb41bc44f2abeb148d689f1320 | https://github.com/QuivrHQ/quivr/releases/tag/core-0.0.33 | 来源类型 github_release 暴露的待验证使用条件。\n- Hard boundary: do not present this pitfall as solved, verified, or safe to ignore unless later validation evidence explicitly closes it.\n\n### Constraint 10: 来源证据：core: v0.0.24\n\n- Trigger: GitHub 社区证据显示该项目存在一个维护/版本相关的待验证问题：core: v0.0.24\n- Host AI rule: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Why it matters: 可能影响升级、迁移或版本选择。\n- Evidence: community_evidence:github | cevd_5272fe70cda24220a5aeb994ad06819e | https://github.com/QuivrHQ/quivr/releases/tag/core-0.0.24 | 来源类型 github_release 暴露的待验证使用条件。\n- Hard boundary: do not present this pitfall as solved, verified, or safe to ignore unless later validation evidence explicitly closes it.\n",
      "summary": "Context and operating boundaries for host AI agents.",
      "title": "AI Context Pack"
    },
    "boundary_risk_card": {
      "asset_id": "boundary_risk_card",
      "filename": "BOUNDARY_RISK_CARD.md",
      "markdown": "# Boundary & Risk Card\n\nProject: QuivrHQ/quivr\n\n## Doramagic Trial Decision\n\nCurrent decision: it can enter pre-publication recommendation checks. First use should still start with least privilege, a temporary directory, and reversible configuration.\n\n## What The User Can Do Now\n\n- Read the Human Manual first to understand the project purpose and main workflows.\n- Use Prompt Preview for pre-install exploration; it validates interaction shape, not real execution.\n- Run official Quick Start commands only inside an isolated environment, not a primary setup.\n\n## Do Not Do Yet\n\n- Do not treat Prompt Preview as a real project execution result.\n- Do not treat metadata-only validation as sandbox installation validation.\n- Do not describe unverified capabilities as supported, working, or safe to install.\n- Do not provide production data, private files, real secrets, or primary host configuration on first trial.\n\n## Pre-Install Checklist\n\n- Host AI match: chatgpt\n- Official installation entry status: official entry point found\n- Isolated temporary directory, temporary host, or container validation: required\n- Configuration rollback path: required\n- API keys, network access, file access, or host configuration changes: treat as high risk until confirmed\n- Installation command, actual output, and failure logs: must be recorded\n\n## Current Blockers\n\n- No blockers.\n\n## Project-Specific Pitfalls\n\n- 来源证据：EU AI Act Compliance Scan Results — Sharing Findings for Feedback (high): 可能影响授权、密钥配置或安全边界。 Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- 来源证据：[Bug]: (high): 可能增加新用户试用和生产接入成本。 Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- 来源证据：Integration idea: Screenpipe for screen/audio context (medium): 可能增加新用户试用和生产接入成本。 Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- 来源证据：[Bug]: RuntimeError: There is no current event loop in thread 'MainThread' when using Brain.from_files() in script (medium): 可能阻塞安装或首次运行。 Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- 能力判断依赖假设 (medium): 假设不成立时，用户拿不到承诺的能力。 Suggested check: 将假设转成下游验证清单。\n\n## Risk And Permission Notes\n\n- no_demo: medium\n\n## Evidence Gaps\n\n- No structured evidence gaps are currently visible.\n",
      "summary": "Installation, permission, validation, and pre-recommendation risks.",
      "title": "Boundary & Risk Card"
    },
    "human_manual": {
      "asset_id": "human_manual",
      "filename": "HUMAN_MANUAL.md",
      "markdown": "# https://github.com/QuivrHQ/quivr Project Manual\n\nGenerated on: 2026-05-21 19:22:47 UTC\n\n## Table of Contents\n\n- [Introduction to Quivr](#page-introduction)\n- [Getting Started](#page-getting-started)\n- [Installation](#page-installation)\n- [System Architecture Overview](#page-architecture-overview)\n- [Core Components](#page-core-components)\n- [Brain Class](#page-brain-class)\n- [RAG Implementation](#page-rag-implementation)\n- [LLM Integration](#page-llm-integration)\n- [File Processing and Parsers](#page-file-processing)\n- [Storage System](#page-storage)\n\n<a id='page-introduction'></a>\n\n## Introduction to Quivr\n\n### Related Pages\n\nRelated topics: [Getting Started](#page-getting-started), [System Architecture Overview](#page-architecture-overview)\n\n<details>\n<summary>Relevant source files</summary>\n\nThe following source files were used to generate this page:\n\n- [README.md](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n- [core/README.md](https://github.com/QuivrHQ/quivr/blob/main/core/README.md)\n- [core/quivr_core/processor/processor_base.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/processor_base.py)\n- [core/quivr_core/processor/implementations/simple_txt_processor.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/implementations/simple_txt_processor.py)\n- [examples/simple_question/simple_question.py](https://github.com/QuivrHQ/quivr/blob/main/examples/simple_question/simple_question.py)\n- [core/CHANGELOG.md](https://github.com/QuivrHQ/quivr/blob/main/core/CHANGELOG.md)\n</details>\n\n# Introduction to Quivr\n\nQuivr is an open-source project that helps you build your \"second brain\" by leveraging the power of Generative AI. It provides a Retrieval-Augmented Generation (RAG) framework that enables users to ingest documents and ask questions about their content using natural language. Source: [README.md](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n\nThe core philosophy of Quivr is to handle all the complexity of RAG implementations so developers can focus on their products. It provides an opinionated, fast, and efficient RAG system that supports multiple file types, various LLM providers, and customizable workflows. Source: [README.md](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n\n## Key Features\n\nQuivr offers several distinctive capabilities that make it a powerful choice for building document-aware AI applications:\n\n| Feature | Description |\n|---------|-------------|\n| **Opinionated RAG** | Pre-configured RAG pipeline optimized for speed and efficiency |\n| **Multi-LLM Support** | Works with OpenAI, Anthropic, Mistral, Gemma, and local models via Ollama |\n| **Any File Type** | Supports PDF, TXT, Markdown, and custom parsers |\n| **Customizable Workflows** | Extend RAG with internet search, tools, and custom processing |\n| **Megaparse Integration** | Optional integration with Megaparse for advanced document ingestion |\n| **Language Detection** | Automatic detection of document language for better processing |\n\nSource: [README.md](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n\n## Architecture Overview\n\nQuivr follows a modular architecture with the `quivr-core` package as its central component. The architecture is designed around a workflow-based system where different processing nodes are connected to form a complete RAG pipeline.\n\n```mermaid\ngraph TD\n    A[User Input] --> B[Brain.ask]\n    B --> C[RetrievalConfig]\n    C --> D[Workflow Engine]\n    D --> E[Processing Nodes]\n    E --> F[LLM Response]\n    \n    G[Documents] --> H[Processor]\n    H --> I[Chunks]\n    I --> J[Vector Store]\n    J --> D\n```\n\n### Package Structure\n\nThe main components of the Quivr architecture include:\n\n| Component | Purpose |\n|-----------|---------|\n| `quivr_core.Brain` | Central class for managing document collections and answering questions |\n| `ProcessorBase` | Abstract base class for document processors |\n| `RetrievalConfig` | Configuration for retrieval and workflow settings |\n| `QuivrFile` | File wrapper for document handling |\n| `ProcessedDocument` | Container for processed document chunks |\n\nSource: [core/README.md](https://github.com/QuivrHQ/quivr/blob/main/core/README.md)\n\n## Getting Started\n\n### Prerequisites\n\nBefore installing Quivr, ensure you have:\n\n- Python 3.10 or newer\n- An API key for your chosen LLM provider (OpenAI, Anthropic, or Mistral)\n\nSource: [README.md](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n\n### Installation\n\nInstall the `quivr-core` package using pip:\n\n```bash\npip install quivr-core\n```\n\nSource: [README.md](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n\n### Quick Start Example\n\nThe following example demonstrates creating a simple RAG application with Quivr in approximately 5 lines of code:\n\n```python\nimport tempfile\nfrom quivr_core import Brain\n\nwith tempfile.NamedTemporaryFile(mode=\"w\", suffix=\".txt\") as temp_file:\n    temp_file.write(\"Gold is a liquid of blue-like colour.\")\n    temp_file.flush()\n\n    brain = Brain.from_files(\n        name=\"test_brain\",\n        file_paths=[temp_file.name],\n    )\n\n    answer = brain.ask(\"what is gold? answer in french\")\n    print(\"answer:\", answer)\n```\n\nSource: [examples/simple_question/simple_question.py](https://github.com/QuivrHQ/quivr/blob/main/examples/simple_question/simple_question.py)\n\n## Core Components\n\n### Brain Class\n\nThe `Brain` class is the central interface for interacting with Quivr. It manages document ingestion, storage, and querying.\n\n**Key Methods:**\n\n| Method | Description |\n|--------|-------------|\n| `Brain.from_files()` | Create a brain from one or more files |\n| `brain.ask()` | Ask a question about the ingested documents |\n| `brain.print_info()` | Display information about the brain |\n\n**Typical Workflow:**\n\n```mermaid\ngraph LR\n    A[Create Brain] --> B[from_files]\n    B --> C[Process Documents]\n    C --> D[Store Chunks]\n    D --> E[Ask Questions]\n    E --> F[Get Answers]\n```\n\nSource: [README.md](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n\n### Document Processors\n\nProcessors handle the parsing and chunking of different file types. The `ProcessorBase` abstract class defines the interface that all processors must implement.\n\n```python\nclass ProcessorBase(ABC):\n    @abstractmethod\n    async def process_file_inner(self, file: QuivrFile) -> ProcessedDocument[R]:\n        raise NotImplementedError\n```\n\nSource: [core/quivr_core/processor/processor_base.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/processor_base.py)\n\n**Processing Pipeline:**\n\nDuring document processing, Quivr performs the following operations:\n\n1. Parse the input file using the appropriate processor\n2. Split documents into chunks based on `SplitterConfig`\n3. Add metadata including chunk index, version info, and detected language\n4. Sanitize content by removing null characters and encoding issues\n\nSource: [core/quivr_core/processor/processor_base.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/processor_base.py)\n\n### Simple Txt Processor\n\nThe `SimpleTxtProcessor` is a built-in processor for handling plain text files. It uses recursive character splitting to divide documents into manageable chunks:\n\n```python\ndef recursive_character_splitter(\n    doc: Document, chunk_size: int, chunk_overlap: int\n) -> list[Document]:\n    assert chunk_overlap < chunk_size, \"chunk_overlap is greater than chunk_size\"\n\n    if len(doc.page_content) <= chunk_size:\n        return [doc]\n\n    chunk = Document(page_content=doc.page_content[:chunk_size], metadata=doc.metadata)\n    remaining = Document(\n        page_content=doc.page_content[chunk_size - chunk_overlap :],\n        metadata=doc.metadata,\n    )\n\n    return [chunk] + recursive_character_splitter(remaining, chunk_size, chunk_overlap)\n```\n\nSource: [core/quivr_core/processor/implementations/simple_txt_processor.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/implementations/simple_txt_processor.py)\n\n## Configuration\n\n### Environment Setup\n\nSet your API keys as environment variables before creating a brain:\n\n```python\nimport os\nos.environ[\"OPENAI_API_KEY\"] = \"my_openai_apikey\"\n```\n\nSource: [README.md](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n\n### Retrieval Configuration\n\nThe `RetrievalConfig` class allows customization of the RAG workflow. Configuration can be loaded from YAML files:\n\n```python\nfrom quivr_core.config import RetrievalConfig\n\nconfig_file_name = \"./basic_rag_workflow.yaml\"\nretrieval_config = RetrievalConfig.from_yaml(config_file_name)\n```\n\nSource: [README.md](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n\n### Workflow Configuration\n\nWorkflows are defined using YAML files that specify processing nodes and their connections:\n\n```yaml\nworkflow_config:\n  name: \"standard RAG\"\n  nodes:\n    - name: \"START\"\n      edges: [\"filter_history\"]\n    - name: \"filter_history\"\n      edges: [\"rewrite\"]\n    - name: \"rewrite\"\n      edges: [\"retrieve\"]\n```\n\n**Workflow Node Types:**\n\n| Node | Function |\n|------|----------|\n| START | Entry point for the workflow |\n| filter_history | Filters conversation history |\n| rewrite | Rewrites the query for better retrieval |\n| retrieve | Fetches relevant document chunks |\n\nSource: [README.md](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n\n## Advanced Usage\n\n### Custom Workflow with Rich Console\n\nFor interactive applications, Quivr can be combined with the `rich` library for enhanced console output:\n\n```python\nfrom quivr_core import Brain\nfrom quivr_core.config import RetrievalConfig\nfrom rich.console import Console\nfrom rich.panel import Panel\nfrom rich.prompt import Prompt\n\nbrain = Brain.from_files(\n    name=\"my smart brain\",\n    file_paths=[\"./my_first_doc.pdf\", \"./my_second_doc.txt\"],\n)\n\nconfig_file_name = \"./basic_rag_workflow.yaml\"\nretrieval_config = RetrievalConfig.from_yaml(config_file_name)\n\nconsole = Console()\nwhile True:\n    question = Prompt.ask(\"[bold cyan]Question[/bold cyan]\")\n    if question.lower() == \"exit\":\n        break\n    answer = brain.ask(question, retrieval_config=retrieval_config)\n    console.print(f\"[bold green]Answer[/bold green]: {answer.answer}\")\n```\n\nSource: [README.md](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n\n### Supported LLM Providers\n\nQuivr integrates with multiple LLM providers:\n\n| Provider | Model Support | Configuration |\n|----------|---------------|---------------|\n| OpenAI | GPT-4, GPT-3.5 | `OPENAI_API_KEY` |\n| Anthropic | Claude family | `ANTHROPIC_API_KEY` |\n| Mistral | Mistral models | `MISTRAL_API_KEY` |\n| Ollama | Local models | `OLLAMA_BASE_URL` |\n\nSource: [README.md](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n\n## Version History\n\nQuivr follows semantic versioning for the `quivr-core` package. Key releases include:\n\n| Version | Date | Key Changes |\n|---------|------|-------------|\n| 0.0.27 | 2024-12-16 | Max context tokens enforcement, megaparse SDK integration |\n| 0.0.19 | 2024-10-21 | Beginning of quivr-core development |\n| 0.0.13 | 2024-08-01 | Added parsers and tox tests |\n| 0.0.2 | 2024-07-09 | Initial quivr-core package release |\n\nSource: [core/CHANGELOG.md](https://github.com/QuivrHQ/quivr/blob/main/core/CHANGELOG.md)\n\n## Documentation and Community\n\nAdditional resources for learning and contributing to Quivr:\n\n| Resource | Description |\n|----------|-------------|\n| [Official Documentation](https://core.quivr.com/) | Comprehensive guides and API reference |\n| [GitHub Issues](https://github.com/quivrhq/quivr/issues) | Bug reports and feature requests |\n| [Discord Community](https://discord.gg/HUpRgp2HG8) | Real-time support and discussions |\n| [Good First Issues](https://github.com/quivrhq/quivr/issues?q=is%3Aopen+is%3Aissue+label%3A%22good+first+issue%22) | Beginner-friendly contribution opportunities |\n\nSource: [README.md](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n\n## License\n\nQuivr is licensed under the Apache 2.0 License, making it freely available for commercial and personal use. Source: [core/README.md](https://github.com/QuivrHQ/quivr/blob/main/core/README.md)\n\n---\n\n<a id='page-getting-started'></a>\n\n## Getting Started\n\n### Related Pages\n\nRelated topics: [Introduction to Quivr](#page-introduction), [Installation](#page-installation)\n\n<details>\n<summary>Relevant source files</summary>\n\nThe following source files were used to generate this page:\n\n- [README.md](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n- [core/README.md](https://github.com/QuivrHQ/quivr/blob/main/core/README.md)\n- [examples/simple_question/simple_question.py](https://github.com/QuivrHQ/quivr/blob/main/examples/simple_question/simple_question.py)\n- [core/quivr_core/brain/brain.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/brain/brain.py)\n- [core/quivr_core/files/file.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/files/file.py)\n- [core/quivr_core/processor/processor_base.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/processor_base.py)\n- [examples/pdf_parsing_tika.py](https://github.com/QuivrHQ/quivr/blob/main/examples/pdf_parsing_tika.py)\n</details>\n\n# Getting Started\n\n## Overview\n\nQuivr is an open-source RAG (Retrieval-Augmented Generation) framework that enables developers to build AI-powered \"second brain\" applications. The `quivr-core` package provides the core RAG functionality, allowing users to ingest documents and query them using large language models. Source: [README.md:1-15](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n\nThe framework is designed to be **opinionated, fast, and efficient**, abstracting away the complexity of document processing, vector storage, and LLM integration so developers can focus on building their products. Source: [README.md:27-30](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n\n## Prerequisites\n\nBefore getting started with Quivr, ensure your environment meets the following requirements:\n\n| Requirement | Version | Description |\n|-------------|---------|-------------|\n| Python | 3.10+ | The programming language runtime |\n| pip | Latest | Package installer for Python |\n| API Key | - | Required for cloud LLM providers (OpenAI, Anthropic, Mistral) |\n\nSource: [README.md:44-50](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n\n### Supported LLM Providers\n\nQuivr supports multiple LLM providers:\n\n| Provider | API Type | Notes |\n|----------|----------|-------|\n| OpenAI | API Key | Set `OPENAI_API_KEY` environment variable |\n| Anthropic | API Key | Supports Claude models |\n| Mistral | API Key | Supports Mistral AI models |\n| Ollama | Local | For running models locally |\n\nSource: [README.md:58-62](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n\n## Installation\n\n### Package Installation\n\nInstall the core package using pip:\n\n```bash\npip install quivr-core\n```\n\nSource: [README.md:53-55](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n\n### Verify Installation\n\nTo verify the installation worked correctly, you can import the package:\n\n```python\nfrom quivr_core import Brain\nprint(\"Quivr-core installed successfully!\")\n```\n\n## Core Concepts\n\n### Brain\n\nThe `Brain` is the central entity in Quivr that manages document ingestion and querying. It encapsulates:\n\n- **LLM Integration**: The language model used for generating answers\n- **Embedder**: The embedding model for vectorizing documents\n- **Vector Database**: Storage for document embeddings and retrieval\n- **File Processors**: Components that parse various file formats\n\nSource: [core/quivr_core/brain/brain.py:1-50](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/brain/brain.py)\n\n### QuivrFile\n\nFiles are represented as `QuivrFile` objects with the following attributes:\n\n| Attribute | Type | Description |\n|-----------|------|-------------|\n| `id` | UUID | Unique identifier for the file |\n| `brain_id` | UUID | ID of the brain this file belongs to |\n| `path` | Path | File system path |\n| `original_filename` | str | Original filename |\n| `file_size` | int | File size in bytes |\n| `file_extension` | FileExtension | File type enum |\n| `file_sha1` | str | SHA1 hash for deduplication |\n| `additional_metadata` | dict | Custom metadata |\n\nSource: [core/quivr_core/files/file.py:50-70](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/files/file.py)\n\n### Processor Pipeline\n\nDocuments go through a processing pipeline that:\n\n1. **Parses** the file content based on file type\n2. **Splits** content into manageable chunks\n3. **Detects language** for each chunk\n4. **Embeds** chunks for vector storage\n5. **Stores** in the vector database\n\nSource: [core/quivr_core/processor/processor_base.py:40-60](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/processor_base.py)\n\n## Quick Start Guide\n\n### Minimal Example (5 Lines of Code)\n\nThe fastest way to get started with Quivr:\n\n```python\nimport tempfile\nfrom quivr_core import Brain\n\nwith tempfile.NamedTemporaryFile(mode=\"w\", suffix=\".txt\") as temp_file:\n    temp_file.write(\"Gold is a liquid of blue-like colour.\")\n    temp_file.flush()\n\n    brain = Brain.from_files(\n        name=\"test_brain\",\n        file_paths=[temp_file.name],\n    )\n\n    answer = brain.ask(\"what is gold? answer in french\")\n    print(\"answer:\", answer)\n```\n\nSource: [examples/simple_question/simple_question.py:1-20](https://github.com/QuivrHQ/quivr/blob/main/examples/simple_question/simple_question.py)\n\n### Interactive Chat Example\n\nFor a more complete example with a console-based chat interface:\n\n```python\nimport os\nos.environ[\"OPENAI_API_KEY\"] = \"your-api-key-here\"\n\nfrom quivr_core import Brain\nfrom quivr_core.config import RetrievalConfig\n\nbrain = Brain.from_files(\n    name=\"my_smart_brain\",\n    file_paths=[\"./document.pdf\", \"./notes.txt\"],\n)\n\nanswer = brain.ask(\n    \"What is the main topic of these documents?\",\n    retrieval_config=RetrievalConfig()\n)\nprint(answer.answer)\n```\n\nSource: [README.md:85-100](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n\n### PDF Processing Example\n\nFor processing PDF files with custom LLM configuration:\n\n```python\nfrom langchain_core.embeddings import DeterministicFakeEmbedding\nfrom langchain_core.language_models import FakeListChatModel\nfrom quivr_core import Brain\nfrom quivr_core.rag.entities.config import LLMEndpointConfig\nfrom quivr_core.llm.llm_endpoint import LLMEndpoint\n\nbrain = Brain.from_files(\n    name=\"test_brain\",\n    file_paths=[\"tests/processor/data/dummy.pdf\"],\n    llm=LLMEndpoint(\n        llm=FakeListChatModel(responses=[\"good\"]),\n        llm_config=LLMEndpointConfig(model=\"fake_model\", llm_base_url=\"local\"),\n    ),\n    embedder=DeterministicFakeEmbedding(size=20),\n)\n\nanswer = brain.ask(\"What is this document about?\")\nprint(answer.answer)\n```\n\nSource: [examples/pdf_parsing_tika.py:1-25](https://github.com/QuivrHQ/quivr/blob/main/examples/pdf_parsing_tika.py)\n\n## Workflow Architecture\n\n### Basic RAG Workflow\n\nThe following diagram illustrates the basic RAG workflow in Quivr:\n\n```mermaid\ngraph TD\n    A[User Query] --> B[Filter History]\n    B --> C[Query Rewrite]\n    C --> D[Retrieval]\n    D --> E[LLM Generation]\n    E --> F[Response]\n    \n    G[Document Ingestion] --> H[File Processing]\n    H --> I[Chunking]\n    I --> J[Embedding]\n    J --> K[Vector Storage]\n    \n    D --> K\n```\n\n### Data Flow\n\n```mermaid\ngraph LR\n    A[Files] -->|Parse| B[Documents]\n    B -->|Chunk| C[Chunks]\n    C -->|Embed| D[Vectors]\n    D -->|Store| E[Vector DB]\n    \n    F[Query] -->|Embed| G[Query Vector]\n    G -->|Search| E\n    E -->|Results| H[Context]\n    H -->|Generate| I[Answer]\n```\n\n## Configuration\n\n### Environment Variables\n\nConfigure your API keys as environment variables:\n\n```bash\nexport OPENAI_API_KEY=\"your-openai-key\"\nexport ANTHROPIC_API_KEY=\"your-anthropic-key\"  # Optional\nexport MISTRAL_API_KEY=\"your-mistral-key\"       # Optional\n```\n\n### Custom Retrieval Configuration\n\nCreate a YAML configuration file for customized retrieval strategies:\n\n```yaml\nworkflow_config:\n  name: \"standard RAG\"\n  nodes:\n    - name: \"START\"\n      edges: [\"filter_history\"]\n    - name: \"filter_history\"\n      edges: [\"rewrite\"]\n    - name: \"rewrite\"\n      edges: [\"retrieval\"]\n    - name: \"retrieval\"\n      edges: [\"generation\"]\n    - name: \"generation\"\n      edges: [\"END\"]\n  llm:\n    temperature: 0.7\n```\n\nSource: [README.md:65-85](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n\n### RetrievalConfig Usage\n\n```python\nfrom quivr_core.config import RetrievalConfig\n\n# Load from YAML file\nretrieval_config = RetrievalConfig.from_yaml(\"./basic_rag_workflow.yaml\")\n\n# Use with brain.ask()\nanswer = brain.ask(\n    question=\"Your question here\",\n    retrieval_config=retrieval_config\n)\n```\n\n## Supported File Types\n\nQuivr works with various file formats through pluggable processors:\n\n| File Type | Extension | Processor |\n|-----------|-----------|-----------|\n| Plain Text | `.txt` | SimpleTxtProcessor |\n| PDF | `.pdf` | TikaProcessor |\n| Markdown | `.md` | MarkdownProcessor |\n| CSV | `.csv` | CSVProcessor |\n| JSON | `.json` | JSONProcessor |\n\nSource: [README.md:31-35](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n\n## Advanced Usage\n\n### Streaming Responses\n\nFor real-time response streaming:\n\n```python\nfrom quivr_core import Brain\n\nbrain = Brain.from_files(\n    name=\"streaming_brain\",\n    file_paths=[\"./documents/\"],\n)\n\nfor chunk in brain.answer_astream(\"Explain the findings\"):\n    if chunk.last_chunk:\n        break\n    print(chunk.answer, end=\"\", flush=True)\n```\n\n### Custom Processors\n\nImplement your own processor by extending `ProcessorBase`:\n\n```python\nfrom quivr_core.processor.processor_base import ProcessorBase, ProcessedDocument\n\nclass CustomProcessor(ProcessorBase):\n    supported_extensions = [\".custom\"]\n    \n    async def process_file_inner(self, file: QuivrFile) -> ProcessedDocument:\n        # Implement custom parsing logic\n        pass\n```\n\n## Project Structure\n\n```\nquivr/\n├── README.md                    # Main documentation\n├── core/\n│   ├── README.md               # quivr-core package info\n│   └── quivr_core/\n│       ├── brain/\n│       │   └── brain.py        # Brain class implementation\n│       ├── files/\n│       │   └── file.py         # QuivrFile dataclass\n│       ├── processor/\n│       │   ├── processor_base.py\n│       │   └── implementations/\n│       └── rag/\n│           ├── quivr_rag.py    # RAG implementation\n│           └── prompts.py      # LLM prompts\n├── examples/\n│   ├── simple_question/        # Basic usage examples\n│   ├── chatbot/                # Chainlit chatbot example\n│   └── pdf_parsing_tika.py     # PDF processing example\n```\n\n## Next Steps\n\nAfter completing the Getting Started guide:\n\n1. **Explore Examples**: Check the `examples/` directory for more use cases\n2. **Read Documentation**: Visit [core.quivr.com](https://core.quivr.com/) for full documentation\n3. **Join Community**: Connect with other users on [Discord](https://discord.gg/HUpRgp2HG8)\n4. **Contribute**: Check [open issues](https://github.com/quivrhq/quivr/issues) for contribution opportunities\n\n## Troubleshooting\n\n### Common Issues\n\n| Issue | Solution |\n|-------|----------|\n| ImportError: No module named 'quivr_core' | Run `pip install quivr-core` |\n| API Key not found | Set environment variable before running |\n| File parsing fails | Verify file format and size (max 20MB for examples) |\n| Slow retrieval | Adjust `n_results` parameter or use local embeddings |\n\n### Getting Help\n\n- **Documentation**: [https://core.quivr.com/](https://core.quivr.com/)\n- **Discord**: [Join our community](https://discord.gg/HUpRgp2HG8)\n- **GitHub Issues**: [Report bugs](https://github.com/quivrhq/quivr/issues)\n\n---\n\n<a id='page-installation'></a>\n\n## Installation\n\n### Related Pages\n\nRelated topics: [Getting Started](#page-getting-started), [LLM Integration](#page-llm-integration)\n\n<details>\n<summary>Relevant source files</summary>\n\nThe following source files were used to generate this page:\n\n- [core/README.md](https://github.com/QuivrHQ/quivr/blob/main/core/README.md)\n- [examples/chatbot/README.md](https://github.com/QuivrHQ/quivr/blob/main/examples/chatbot/README.md)\n- [examples/chatbot_voice/README.md](https://github.com/QuivrHQ/quivr/blob/main/examples/chatbot_voice/README.md)\n- [examples/quivr-whisper/README.md](https://github.com/QuivrHQ/quivr/blob/main/examples/quivr-whisper/README.md)\n- [examples/simple_question/simple_question.py](https://github.com/QuivrHQ/quivr/blob/main/examples/simple_question/simple_question.py)\n</details>\n\n# Installation\n\nThis guide covers all aspects of installing and setting up Quivr, including the core package, examples, and required dependencies.\n\n## Overview\n\nQuivr is a RAG (Retrieval-Augmented Generation) framework that enables users to create AI-powered knowledge bases from various file types. The installation process varies depending on your use case:\n\n- **Core package installation** for integrating Quivr into existing Python projects\n- **Example applications** for testing and learning purposes\n- **Development setup** for contributing to the project\n\nSource: [core/README.md]()\n\n## Prerequisites\n\n### System Requirements\n\n| Requirement | Minimum | Recommended |\n|-------------|---------|-------------|\n| Python Version | 3.8+ | 3.10+ |\n| Operating System | Linux, macOS, Windows | Linux/macOS |\n| RAM | 4 GB | 8 GB+ |\n| Disk Space | 500 MB | 1 GB+ |\n\n**Note:** While the core package officially requires Python 3.10 or newer for full compatibility, some examples (such as chatbot_voice) support Python 3.8 or higher.\n\nSource: [core/README.md](), [examples/chatbot_voice/README.md]()\n\n### API Keys\n\nQuivr supports multiple LLM providers. You must configure at least one API key:\n\n| Provider | Environment Variable | Required |\n|----------|---------------------|----------|\n| OpenAI | `OPENAI_API_KEY` | Yes (if using OpenAI) |\n| Anthropic | `ANTHROPIC_API_KEY` | Yes (if using Anthropic) |\n| Mistral | `MISTRAL_API_KEY` | Yes (if using Mistral) |\n\nSource: [core/README.md]()\n\n## Installing the Core Package\n\n### Using pip (Recommended)\n\nThe simplest way to install Quivr Core is via pip:\n\n```bash\npip install quivr-core\n```\n\nVerify the installation by checking that the package is importable:\n\n```python\nfrom quivr_core import Brain\n```\n\nSource: [core/README.md]()\n\n### Package Contents\n\nThe `quivr-core` package includes:\n\n- **Brain class**: The main interface for creating and managing knowledge bases\n- **RAG components**: Retrieval, processing, and answer generation pipelines\n- **Built-in processors**: Support for PDF, TXT, Markdown, and other common formats\n- **Default configurations**: Ready-to-use settings for quick setup\n\nSource: [core/README.md](), [core/quivr_core/processor/processor_base.py]()\n\n## Installation for Examples\n\n### Chatbot Example with Chainlit\n\nThe chatbot example demonstrates file upload and Q&A capabilities using Chainlit.\n\n#### Using rye (Recommended)\n\n```bash\n# Clone or navigate to the chatbot directory\ncd examples/chatbot\n\n# Install dependencies with rye\nrye sync\n\n# Activate the virtual environment\nsource ./venv/bin/activate\n```\n\n#### Using pip\n\n```bash\n# Navigate to the chatbot directory\ncd examples/chatbot\n\n# Install from requirements\npip install -r requirements.txt\n```\n\nSource: [examples/chatbot/README.md]()\n\n### Voice Chatbot Example\n\nThe voice chatbot example adds voice interaction capabilities.\n\n```bash\n# Navigate to the voice chatbot directory\ncd examples/chatbot_voice\n\n# Install dependencies\npip install -r requirements.lock\n```\n\nSource: [examples/chatbot_voice/README.md]()\n\n### Flask-based Example (quivr-whisper)\n\nThis example uses Flask for a web server implementation.\n\n```bash\n# Install Flask and dependencies\npip install flask openai requests python-dotenv\n```\n\nSource: [examples/quivr-whisper/README.md]()\n\n### Simple Question Example\n\nThe simplest example demonstrating basic usage:\n\n```bash\n# Create a Python script with the following content\nimport tempfile\nfrom quivr_core import Brain\n\nbrain = Brain.from_files(\n    name=\"test_brain\",\n    file_paths=[\"your_file.txt\"],\n)\n\nanswer = brain.ask(\"Your question here\")\nprint(answer)\n```\n\nRequires `python-dotenv` for loading environment variables.\n\nSource: [examples/simple_question/simple_question.py]()\n\n## Environment Configuration\n\n### Required Environment Variables\n\nCreate a `.env` file in your project root:\n\n```env\n# LLM API Keys (at least one required)\nOPENAI_API_KEY=your_openai_api_key\n# ANTHROPIC_API_KEY=your_anthropic_key\n# MISTRAL_API_KEY=your_mistral_key\n\n# Optional: For specific integrations\nQUIVR_API_KEY=your_quivr_api_key\nQUIVR_CHAT_ID=your_chat_id\nQUIVR_BRAIN_ID=your_brain_id\nQUIVR_URL=https://api.quivr.app\n```\n\n### Loading Environment Variables\n\nUse `python-dotenv` to load environment variables:\n\n```python\nimport dotenv\ndotenv.load_dotenv()\n```\n\nSource: [examples/quivr-whisper/README.md](), [examples/simple_question/simple_question.py]()\n\n## Installation Flow\n\n```mermaid\ngraph TD\n    A[Start Installation] --> B{Choose Installation Type}\n    \n    B --> C[Core Package Only]\n    B --> D[Examples & Demos]\n    B --> E[Development Setup]\n    \n    C --> F[pip install quivr-core]\n    F --> G[Set API Keys]\n    G --> H[Ready to Integrate]\n    \n    D --> I{Which Example?}\n    \n    I --> J[Chatbot with Chainlit]\n    I --> K[Voice Chatbot]\n    I --> L[Flask App]\n    \n    J --> M[rye sync or pip install]\n    K --> N[pip install -r requirements.lock]\n    L --> O[pip install flask openai requests]\n    \n    M --> P[Run with chainlit run main.py]\n    N --> Q[Run with chainlit run main.py]\n    O --> R[Run with flask run]\n    \n    E --> S[Clone Repository]\n    S --> T[Navigate to core/]\n    T --> U[Install from pyproject.toml]\n    U --> V[Run Tests]\n```\n\n## Verifying Installation\n\n### Quick Verification\n\nAfter installation, verify that Quivr is correctly installed:\n\n```python\nimport quivr_core\nprint(quivr_core.__version__)  # Should print the installed version\n```\n\n### Full Installation Test\n\nCreate a test script to verify all components:\n\n```python\nimport tempfile\nfrom quivr_core import Brain\n\n# Create a temporary test file\nwith tempfile.NamedTemporaryFile(mode=\"w\", suffix=\".txt\", delete=False) as f:\n    f.write(\"Quivr is a RAG framework for building AI knowledge bases.\")\n    temp_path = f.name\n\n# Create a brain from the file\nbrain = Brain.from_files(\n    name=\"test_brain\",\n    file_paths=[temp_path],\n)\n\n# Test the ask function\nanswer = brain.ask(\"What is Quivr?\")\nprint(f\"Answer: {answer}\")\n\n# Print brain info\nbrain.print_info()\n```\n\nSource: [core/README.md](), [examples/simple_question/simple_question.py]()\n\n## Troubleshooting\n\n### Common Issues\n\n| Issue | Cause | Solution |\n|-------|-------|----------|\n| ImportError: No module named 'quivr_core' | Package not installed | Run `pip install quivr-core` |\n| AuthenticationError | Invalid API key | Verify your API key is correct |\n| Version mismatch | Incompatible Python version | Ensure Python 3.10+ is used |\n| File not found | Incorrect file path | Check the file path exists |\n\n### Checking Installed Version\n\nThe Quivr version is automatically tracked in document metadata:\n\n```python\nfrom quivr_core.processor.processor_base import get_version\n\ntry:\n    from importlib.metadata import version\n    qvr_version = version(\"quivr-core\")\nexcept PackageNotFoundError:\n    qvr_version = \"dev\"\n```\n\nSource: [core/quivr_core/processor/processor_base.py]()\n\n## Next Steps\n\nAfter successful installation:\n\n1. **Create your first Brain**: Load documents and create a knowledge base\n2. **Configure RAG**: Customize retrieval strategies using YAML configuration files\n3. **Explore Examples**: Test different example applications to understand capabilities\n4. **Read Documentation**: Visit [core.quivr.com](https://core.quivr.com/) for advanced usage\n\nSource: [core/README.md](), [examples/chatbot/README.md]()\n\n---\n\n<a id='page-architecture-overview'></a>\n\n## System Architecture Overview\n\n### Related Pages\n\nRelated topics: [Core Components](#page-core-components), [Brain Class](#page-brain-class), [RAG Implementation](#page-rag-implementation)\n\n<details>\n<summary>Relevant source files</summary>\n\nThe following source files were used to generate this page:\n\n- [core/quivr_core/brain/brain.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/brain/brain.py)\n- [core/quivr_core/rag/quivr_rag.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/rag/quivr_rag.py)\n- [core/quivr_core/rag/quivr_rag_langgraph.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/rag/quivr_rag_langgraph.py)\n- [core/quivr_core/processor/processor_base.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/processor_base.py)\n- [core/quivr_core/files/file.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/files/file.py)\n- [core/quivr_core/rag/prompts.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/rag/prompts.py)\n</details>\n\n# System Architecture Overview\n\n## Introduction\n\nQuivr is an open-source framework for building RAG (Retrieval Augmented Generation) applications that enables users to create personal \"brains\" from various document types. The system leverages generative AI to provide intelligent question-answering capabilities over user-provided documents.\n\nThe architecture is designed around three core pillars:\n\n- **Document Processing**: Extracting and chunking content from files\n- **Vector Storage**: Storing embeddings for semantic search\n- **LLM-powered Answer Generation**: Using large language models to generate answers based on retrieved context\n\n## High-Level Architecture\n\n```mermaid\ngraph TD\n    A[User Files] --> B[File Processing]\n    B --> C[Chunking & Embedding]\n    C --> D[Vector Database]\n    E[User Query] --> F[Embedding]\n    F --> G[Semantic Search]\n    G --> H[Context Assembly]\n    H --> I[LLM Generation]\n    I --> J[Answer Response]\n    \n    D --> G\n```\n\n## Core Components\n\n### Brain Class\n\nThe `Brain` class is the central orchestrator of the Quivr framework. It manages the entire lifecycle from file ingestion to query answering.\n\n**File**: `core/quivr_core/brain/brain.py`\n\n#### Key Responsibilities\n\n| Responsibility | Description |\n|----------------|-------------|\n| File Ingestion | Processes files through various parsers |\n| Vector Storage | Manages the vector database for embeddings |\n| Query Processing | Handles search and retrieval operations |\n| Answer Generation | Orchestrates LLM-based response generation |\n\n#### Core Methods\n\n| Method | Type | Purpose |\n|--------|------|---------|\n| `from_files` | Synchronous | Create a Brain from file paths |\n| `afrom_files` | Async | Async creation from files |\n| `afrom_langchain_documents` | Async | Create from LangChain Document objects |\n| `asearch` | Async | Search for relevant documents |\n| `ask_streaming` | Async Generator | Stream answers to questions |\n\n#### Initialization Parameters\n\n```python\nclass Brain:\n    def __init__(\n        self,\n        id: UUID,\n        name: str,\n        storage: BrainStorage | None,\n        llm: LLMEndpoint,\n        embedder: Embeddings,\n        vector_db: QuivrVectorStore,\n    )\n```\n\nSource: `core/quivr_core/brain/brain.py:1-100`\n\n### File Processing Pipeline\n\nThe processor system handles extraction and transformation of various file formats.\n\n**File**: `core/quivr_core/processor/processor_base.py`\n\n#### Processing Flow\n\n```mermaid\ngraph LR\n    A[QuivrFile] --> B[process_file]\n    B --> C[process_file_inner]\n    C --> D[ProcessedDocument]\n    D --> E[Metadata Enrichment]\n    E --> F[Chunk Output]\n```\n\n#### Metadata Enrichment\n\nDuring processing, each chunk receives comprehensive metadata:\n\n```python\ndoc.metadata = {\n    \"chunk_index\": idx,\n    \"quivr_core_version\": qvr_version,\n    \"language\": detect_language(text=...),\n    **file.metadata,\n    **doc.metadata,\n    **self.processor_metadata,\n}\n```\n\nSource: `core/quivr_core/processor/processor_base.py:20-35`\n\n### QuivrFile Data Model\n\n**File**: `core/quivr_core/files/file.py`\n\nThe `QuivrFile` class represents uploaded files with their associated metadata.\n\n```python\nclass QuivrFile:\n    __slots__ = [\n        \"id\",\n        \"brain_id\",\n        \"path\",\n        \"original_filename\",\n        \"file_size\",\n        \"file_extension\",\n        \"file_sha1\",\n        \"additional_metadata\",\n    ]\n```\n\n| Field | Type | Description |\n|-------|------|-------------|\n| `id` | UUID | Unique identifier |\n| `brain_id` | UUID | Associated brain identifier |\n| `path` | Path | File system path |\n| `original_filename` | str | Original file name |\n| `file_size` | int | File size in bytes |\n| `file_extension` | FileExtension | File type |\n| `file_sha1` | str | SHA1 hash for deduplication |\n\nSource: `core/quivr_core/files/file.py:1-50`\n\n### Retrieval and Answer Generation\n\n## RAG Architecture\n\nThe RAG (Retrieval Augmented Generation) system combines semantic search with LLM-powered answer generation.\n\n**File**: `core/quivr_core/rag/quivr_rag_langgraph.py`\n\n#### RAG Workflow\n\n```mermaid\ngraph TD\n    A[User Question] --> B[Filter History]\n    B --> C[Query Rewrite]\n    C --> D[Retrieval]\n    D --> E[Context Assembly]\n    E --> F[LLM Generation]\n    F --> G[Streaming Response]\n    \n    H[System Prompt] --> F\n    I[Chat History] --> B\n    J[File Filters] --> D\n```\n\n#### Configuration\n\nThe system uses `RetrievalConfig` for configuring the RAG pipeline:\n\n| Parameter | Type | Default | Description |\n|-----------|------|---------|-------------|\n| `llm_config` | LLMEndpointConfig | Brain's LLM | LLM model configuration |\n| `temperature` | float | 0.7 | Generation temperature |\n| `n_results` | int | 5 | Number of retrieval results |\n\nSource: `core/quivr_core/rag/quivr_rag.py:1-50`\n\n### Streaming Answer Generation\n\nThe system supports streaming responses for real-time answer delivery:\n\n```python\nasync for response in rag_instance.answer_astream(\n    run_id=run_id,\n    question=question,\n    system_prompt=system_prompt or None,\n    history=chat_history,\n    list_files=list_files,\n    metadata=metadata,\n):\n    if not response.last_chunk:\n        yield response\n```\n\nSource: `core/quivr_core/brain/brain.py:150-170`\n\n## Prompt Templates\n\n**File**: `core/quivr_core/rag/prompts.py`\n\nThe system uses structured prompts with multiple context sections:\n\n| Section | Purpose |\n|---------|---------|\n| `user_metadata` | User-specific context |\n| `ticket_metadata` | Query-related metadata |\n| `similar_tickets` | Reference information |\n| `ticket_history` | Conversation history |\n| `additional_information` | Extra context |\n| `client_query` | The actual question |\n\nDefault instructions ensure consistent response quality:\n- Verbosity matching similar responses\n- Proper formatting with paragraphs, bold, italic\n- Language consistency with the query\n- No signature at end of response\n\n## LLM Integration\n\n### Default LLM Configuration\n\nThe system supports multiple LLM providers:\n\n| Provider | Configuration |\n|----------|---------------|\n| OpenAI | `OPENAI_API_KEY` environment variable |\n| Anthropic | Anthropic API support |\n| Mistral | Mistral API support |\n| Ollama | Local model support |\n\nSource: README.md - Configuration section\n\n### Embedder Abstraction\n\nEmbedders are abstracted to support multiple backends:\n\n```python\nif embedder is None:\n    embedder = default_embedder()\n```\n\nVector DB initialization with embeddings:\n\n```python\nif vector_db is None:\n    vector_db = await build_default_vectordb(langchain_documents, embedder)\n```\n\nSource: `core/quivr_core/brain/brain.py:60-70`\n\n## Search Capabilities\n\n### Async Search Method\n\n```python\nasync def asearch(\n    self,\n    query: str | Document,\n    n_results: int = 5,\n    filter: Callable | Dict[str, Any] | None = None,\n    fetch_n_neighbors: int = 20,\n) -> list[SearchResult]\n```\n\n| Parameter | Type | Default | Description |\n|-----------|------|---------|-------------|\n| `query` | str \\| Document | Required | Search query |\n| `n_results` | int | 5 | Number of results |\n| `filter` | Callable \\| Dict | None | Custom filtering |\n| `fetch_n_neighbors` | int | 20 | Extended fetch for re-ranking |\n\nSource: `core/quivr_core/brain/brain.py:70-85`\n\n## State Management\n\n### Brain ID Generation\n\nEach brain receives a unique identifier:\n\n```python\nbrain_id = uuid4()\n```\n\n### Workspace and Chat Context\n\nThe system tracks conversation context:\n\n```python\nmetadata = LangchainMetadata(\n    langfuse_trace_id=str(run_id),\n    langfuse_user_id=str(self.workspace_id),\n    langfuse_session_id=str(self.chat_id),\n)\n```\n\n## Installation and Dependencies\n\n### Core Package\n\n```bash\npip install quivr-core\n```\n\n### Quick Start Example\n\n```python\nfrom quivr_core import Brain\n\nbrain = Brain.from_files(\n    name=\"my_smart_brain\",\n    file_paths=[\"./my_first_doc.pdf\", \"./my_second_doc.txt\"],\n)\n\nanswer = brain.ask(\"What is the main topic?\")\n```\n\n## Summary\n\nThe Quivr architecture implements a modular RAG pipeline where:\n\n1. **Files** are processed and chunked with metadata enrichment\n2. **Embeddings** are generated and stored in a vector database\n3. **Queries** trigger semantic search with configurable filters\n4. **LLMs** generate contextual answers from retrieved content\n5. **Streaming** enables real-time response delivery\n\nThe design prioritizes flexibility through dependency injection, allowing custom LLM providers, embedders, and vector stores while maintaining a consistent API for brain creation and querying.\n\n---\n\n<a id='page-core-components'></a>\n\n## Core Components\n\n### Related Pages\n\nRelated topics: [System Architecture Overview](#page-architecture-overview), [Brain Class](#page-brain-class), [LLM Integration](#page-llm-integration)\n\n<details>\n<summary>Relevant source files</summary>\n\nThe following source files were used to generate this page:\n\n- [core/quivr_core/brain/brain.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/brain/brain.py)\n- [core/quivr_core/rag/quivr_rag.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/rag/quivr_rag.py)\n- [core/quivr_core/rag/entities/config.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/rag/entities/config.py)\n- [core/quivr_core/llm/llm_endpoint.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/llm/llm_endpoint.py)\n- [core/quivr_core/processor/processor_base.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/processor_base.py)\n- [core/quivr_core/processor/implementations/simple_txt_processor.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/implementations/simple_txt_processor.py)\n- [examples/simple_question/simple_question.py](https://github.com/QuivrHQ/quivr/blob/main/examples/simple_question/simple_question.py)\n</details>\n\n# Core Components\n\nQuivr is an open-source RAG (Retrieval-Augmented Generation) framework that enables users to create \"brains\" from various document types and query them using natural language. The core components form the architectural foundation that powers document processing, vector storage, retrieval, and LLM-based answer generation.\n\n## Architecture Overview\n\nQuivr's architecture follows a modular design with clear separation of concerns across several key layers:\n\n```mermaid\ngraph TD\n    A[User Query] --> B[Brain.ask]\n    B --> C[RetrievalConfig]\n    C --> D[QuivrQARAGLangGraph]\n    D --> E[Vector Store Retrieval]\n    E --> F[Context Chunks]\n    F --> G[LLM Generation]\n    G --> H[Streaming Response]\n    \n    I[Files] --> J[Processor]\n    J --> K[ProcessedDocument]\n    K --> L[Vector DB Indexing]\n    L --> E\n    \n    M[LLM Config] --> G\n    N[Embedder] --> L\n```\n\nThe system is built around three primary abstractions:\n\n| Component | Purpose | Key Files |\n|-----------|---------|-----------|\n| **Brain** | Central orchestrator managing files, vectors, and LLM interactions | brain.py |\n| **RAG Engine** | Handles retrieval and answer generation pipeline | quivr_rag.py |\n| **Processors** | Parse and chunk various file formats | processor_base.py |\n\n---\n\n## Brain Class\n\nThe `Brain` class is the central entry point for all Quivr operations. It encapsulates file management, vector storage, and LLM-based question answering.\n\n### Initialization Methods\n\nThe Brain class provides multiple factory methods for creation:\n\n| Method | Description | Use Case |\n|--------|-------------|----------|\n| `from_files()` | Synchronous creation from file paths | Simple scripts, examples |\n| `afrom_files()` | Async creation from file paths | Production async applications |\n| `from_langchain_documents()` | From pre-processed LangChain documents | Advanced integration scenarios |\n| `afrom_langchain_documents()` | Async version | Async workflows with external processing |\n\n### Creating a Brain from Files\n\n```python\nfrom quivr_core import Brain\n\nbrain = Brain.from_files(\n    name=\"my_smart_brain\",\n    file_paths=[\"./my_first_doc.pdf\", \"./my_second_doc.txt\"],\n)\n```\n\nThe `from_files()` method automatically:\n\n1. Processes each file using the appropriate processor based on file extension\n2. Chunks documents according to the configured splitter settings\n3. Generates embeddings using the configured embedder\n4. Stores vectors in the configured vector database\n5. Returns an initialized Brain instance ready for queries\n\nSource: [examples/simple_question/simple_question.py:1-17](https://github.com/QuivrHQ/quivr/blob/main/examples/simple_question/simple_question.py)\n\n### Querying the Brain\n\n```python\nanswer = brain.ask(\n    \"what is gold? answer in french\",\n    retrieval_config=retrieval_config\n)\nprint(\"answer:\", answer.answer)\n```\n\nThe `ask()` method supports:\n\n- Streaming responses via `ask_streaming()`\n- Custom retrieval configurations\n- Chat history management\n- System prompt customization\n- File filtering during retrieval\n\nSource: [core/quivr_core/brain/brain.py:150-200](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/brain/brain.py)\n\n---\n\n## Retrieval Configuration\n\nThe `RetrievalConfig` class controls how documents are retrieved and how the LLM generates responses.\n\n### Configuration Parameters\n\n| Parameter | Type | Default | Description |\n|-----------|------|---------|-------------|\n| `llm_config` | `LLMEndpointConfig` | Brain's default LLM | LLM model and endpoint settings |\n| `n_results` | `int` | `5` | Number of documents to retrieve |\n| `fetch_n_neighbors` | `int` | `20` | Neighbors to fetch for re-ranking |\n| `llm_base_url` | `str` | `None` | Custom LLM endpoint base URL |\n| `llm_api_key` | `str` | Environment var | API key for LLM service |\n\n### YAML Configuration\n\nQuivr supports YAML-based workflow configuration:\n\n```yaml\nworkflow_config:\n  name: \"standard RAG\"\n  nodes:\n    - name: \"START\"\n      edges: [\"filter_history\"]\n    - name: \"filter_history\"\n      edges: [\"rewrite\"]\n    - name: \"rewrite\"\n      edges: [\"retrieval\"]\n```\n\nConfiguration can be loaded from YAML files:\n\n```python\nfrom quivr_core.config import RetrievalConfig\n\nretrieval_config = RetrievalConfig.from_yaml(\"./basic_rag_workflow.yaml\")\n```\n\nSource: [README.md](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n\n---\n\n## LLM Integration\n\nThe LLM layer is abstracted through the `LLMEndpoint` class, providing flexibility to use different LLM providers.\n\n### Supported Providers\n\nQuivr supports multiple LLM providers:\n\n- **OpenAI** (GPT models)\n- **Anthropic** (Claude models)\n- **Mistral**\n- **Local models** via Ollama\n\n### LLM Configuration\n\n```python\nfrom quivr_core.llm.llm_endpoint import LLMEndpoint\nfrom quivr_core.rag.entities.config import LLMEndpointConfig\n\nllm = LLMEndpoint.from_config(\n    config=LLMEndpointConfig(\n        model=\"gpt-4\",\n        llm_base_url=\"https://api.openai.com/v1\",\n        temperature=0.7\n    )\n)\n```\n\n### Embedding Configuration\n\nEmbeddings are generated using the configured embedder. If no embedder is specified, Quivr uses the default embedder:\n\n```python\nif embedder is None:\n    embedder = default_embedder()\n```\n\nThe embedder is critical for:\n\n- Converting document chunks into vector representations\n- Encoding user queries for similarity search\n- Enabling semantic retrieval over the document corpus\n\nSource: [core/quivr_core/llm/llm_endpoint.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/llm/llm_endpoint.py)\n\n---\n\n## File Processing Pipeline\n\nThe processor system handles parsing various file formats and converting them into chunked documents suitable for vector storage.\n\n### Processor Architecture\n\n```mermaid\ngraph LR\n    A[QuivrFile] --> B[ProcessorBase]\n    B --> C[process_file_inner]\n    C --> D[ProcessedDocument]\n    D --> E[Splitter]\n    E --> F[Chunked Documents]\n    F --> G[Metadata Enrichment]\n    G --> H[Vector DB Indexing]\n```\n\n### Processor Base Class\n\nAll processors inherit from `ProcessorBase`, which provides:\n\n- Async file processing interface\n- Automatic metadata enrichment\n- Language detection\n- Chunk indexing\n\n```python\nclass ProcessorBase(ABC, Generic[R]):\n    @abstractmethod\n    async def process_file_inner(self, file: QuivrFile) -> ProcessedDocument[R]:\n        raise NotImplementedError\n```\n\n### Metadata Enrichment\n\nDuring processing, each chunk receives enriched metadata:\n\n```python\ndoc.metadata = {\n    \"chunk_index\": idx,\n    \"quivr_core_version\": qvr_version,\n    \"language\": detect_language(\n        text=doc.page_content,\n        low_memory=True,\n    ).value,\n    **file.metadata,\n    **doc.metadata,\n    **self.processor_metadata,\n}\n```\n\nSource: [core/quivr_core/processor/processor_base.py:40-55](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/processor_base.py)\n\n### Supported Processors\n\n| Processor | Extensions | Description |\n|-----------|------------|-------------|\n| `SimpleTxtProcessor` | `.txt` | Plain text file processing |\n| `PdfProcessor` | `.pdf` | PDF document parsing |\n| `MarkdownProcessor` | `.md` | Markdown file parsing |\n\n### Text File Processing\n\nThe `SimpleTxtProcessor` demonstrates the recursive chunking strategy:\n\n```python\ndef recursive_character_splitter(\n    doc: Document, chunk_size: int, chunk_overlap: int\n) -> list[Document]:\n    if len(doc.page_content) <= chunk_size:\n        return [doc]\n    \n    chunk = Document(page_content=doc.page_content[:chunk_size], metadata=doc.metadata)\n    remaining = Document(\n        page_content=doc.page_content[chunk_size - chunk_overlap:],\n        metadata=doc.metadata,\n    )\n    return [chunk] + recursive_character_splitter(remaining, chunk_size, chunk_overlap)\n```\n\nKey constraints enforced:\n\n- `chunk_overlap` must be less than `chunk_size`\n- Original file names are prepended to chunk content\n- Null characters are removed\n- UTF-8 encoding is enforced\n\nSource: [core/quivr_core/processor/implementations/simple_txt_processor.py:15-30](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/implementations/simple_txt_processor.py)\n\n---\n\n## RAG Engine\n\nThe RAG engine coordinates the retrieval and generation pipeline, transforming user queries into contextual answers.\n\n### Answer Generation Flow\n\n```mermaid\nsequenceDiagram\n    participant User\n    participant Brain\n    participant RAG as QuivrQARAGLangGraph\n    participant VectorDB\n    participant LLM\n    \n    User->>Brain: ask(question)\n    Brain->>RAG: answer_astream()\n    RAG->>VectorDB: search(query, n_results)\n    VectorDB-->>RAG: relevant_chunks\n    RAG->>LLM: generate(context, question)\n    LLM-->>RAG: streaming_response\n    RAG-->>User: yield chunks\n```\n\n### Streaming Responses\n\nThe `answer_streaming()` method returns an async generator:\n\n```python\nasync for chunk in brain.ask_streaming(\"What is the meaning of life?\"):\n    print(chunk.answer)\n```\n\nEach chunk contains:\n\n- `answer`: The partial or complete response text\n- `sources`: Relevant document metadata\n- `last_chunk`: Boolean flag indicating completion\n- `metadata`: Additional context about the generation\n\n### Chat History Integration\n\nThe RAG engine maintains chat history for conversational queries:\n\n```python\nchat_history = self.default_chat if chat_history is None else chat_history\n\nfull_answer = \"\"\nasync for response in rag_instance.answer_astream(\n    question=question,\n    history=chat_history,\n    list_files=list_files,\n    metadata=metadata,\n):\n    # Process streaming chunks\n```\n\nSource: [core/quivr_core/rag/quivr_rag.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/rag/quivr_rag.py)\n\n---\n\n## Brain Serialization and Persistence\n\nBrains can be serialized to disk and reloaded for persistence across sessions.\n\n### Save Operation\n\n```python\nsave_path = await brain.save(\"/home/user/.local/quivr\")\n```\n\n### Load Operation\n\n```python\nbrain_loaded = Brain.load(save_path)\nbrain_loaded.print_info()\n```\n\nSource: [examples/save_load_brain.py](https://github.com/QuivrHQ/quivr/blob/main/examples/save_load_brain.py)\n\n---\n\n## Search Functionality\n\nThe Brain class provides direct search capabilities for retrieving relevant documents without generating LLM responses.\n\n### Search Parameters\n\n| Parameter | Type | Description |\n|-----------|------|-------------|\n| `query` | `str \\| Document` | Search query text or LangChain document |\n| `n_results` | `int` | Number of results to return (default: 5) |\n| `filter` | `Callable \\| Dict \\| None` | Optional metadata filtering |\n| `fetch_n_neighbors` | `int` | Neighbors to fetch for re-ranking (default: 20) |\n\n### Search Return Type\n\nThe `asearch()` method returns a list of `SearchResult` objects containing:\n\n- Matched document chunks\n- Similarity scores\n- Metadata about the source documents\n\n---\n\n## Configuration Summary\n\n### Environment Variables\n\n| Variable | Required | Description |\n|----------|----------|-------------|\n| `OPENAI_API_KEY` | Yes (for OpenAI) | API key for OpenAI models |\n| `ANTHROPIC_API_KEY` | Yes (for Claude) | API key for Anthropic models |\n| `OPENAI_API_BASE` | No | Custom API base URL |\n\n### Default Configuration\n\nWhen no configuration is provided, Quivr uses sensible defaults:\n\n```python\nif llm is None:\n    llm = default_llm()\n\nif embedder is None:\n    embedder = default_embedder()\n\nif vector_db is None:\n    vector_db = await build_default_vectordb(langchain_documents, embedder)\n```\n\nThis ensures that users can get started with minimal configuration while still allowing full customization.\n\n---\n\n## Next Steps\n\n- **Workflows**: Explore YAML-based workflow configurations for advanced retrieval strategies\n- **Custom Processors**: Implement the `ProcessorBase` interface to support additional file formats\n- **LLM Providers**: Configure different LLM providers based on your requirements\n- **Deployment**: Review deployment documentation for production setups\n\n---\n\n<a id='page-brain-class'></a>\n\n## Brain Class\n\n### Related Pages\n\nRelated topics: [RAG Implementation](#page-rag-implementation), [File Processing and Parsers](#page-file-processing), [Storage System](#page-storage)\n\n<details>\n<summary>Relevant source files</summary>\n\nThe following source files were used to generate this page:\n\n- [core/quivr_core/brain/brain.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/brain/brain.py)\n- [core/quivr_core/brain/__init__.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/brain/__init__.py)\n- [examples/save_load_brain.py](https://github.com/QuivrHQ/quivr/blob/main/examples/save_load_brain.py)\n- [examples/simple_question/simple_question.py](https://github.com/QuivrHQ/quivr/blob/main/examples/simple_question/simple_question.py)\n- [examples/pdf_parsing_tika.py](https://github.com/QuivrHQ/quivr/blob/main/examples/pdf_parsing_tika.py)\n- [core/quivr_core/files/file.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/files/file.py)\n</details>\n\n# Brain Class\n\nThe `Brain` class is the central abstraction in Quivr for building Retrieval Augmented Generation (RAG) systems. It serves as the primary interface for creating knowledge bases from documents, performing semantic search, and generating AI-powered answers based on retrieved context.\n\n## Overview\n\nThe Brain class encapsulates:\n\n- **Vector storage** for semantic document indexing\n- **LLM integration** for answer generation\n- **Embedder configuration** for document vectorization\n- **File processing pipeline** for document ingestion\n- **Chat history management** for conversational context\n\nA Brain can be created from files (PDF, TXT, Markdown, etc.) or from LangChain documents, then queried to generate context-aware responses.\n\n## Architecture\n\n```mermaid\ngraph TD\n    A[Files / Documents] --> B[Brain Class]\n    B --> C[Vector DB]\n    B --> D[LLM]\n    B --> E[Embedder]\n    B --> F[Storage]\n    G[Query] --> B\n    B --> H[QuivrQARAGLangGraph]\n    H --> I[Answer]\n    C --> H\n```\n\n## Creating a Brain\n\n### From Files\n\nThe most common way to create a Brain is from a collection of files:\n\n```python\nfrom quivr_core import Brain\n\nbrain = Brain.from_files(\n    name=\"my_smart_brain\",\n    file_paths=[\"./my_first_doc.pdf\", \"./my_second_doc.txt\"],\n)\n```\n\nSource: [examples/simple_question/simple_question.py:1-14](https://github.com/QuivrHQ/quivr/blob/main/examples/simple_question/simple_question.py)\n\n### From LangChain Documents\n\nFor programmatic document creation:\n\n```python\nfrom langchain_core.documents import Document\nfrom quivr_core import Brain\n\ndocuments = [Document(page_content=\"Hello, world!\")]\nbrain = await Brain.afrom_langchain_documents(name=\"My Brain\", langchain_documents=documents)\n```\n\nSource: [core/quivr_core/brain/brain.py:1-150](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/brain/brain.py)\n\n### With Custom LLM and Embedder\n\nOverride default configurations with custom implementations:\n\n```python\nfrom langchain_core.embeddings import DeterministicFakeEmbedding\nfrom langchain_core.language_models import FakeListChatModel\nfrom quivr_core import Brain\nfrom quivr_core.rag.entities.config import LLMEndpointConfig\nfrom quivr_core.llm.llm_endpoint import LLMEndpoint\n\nbrain = Brain.from_files(\n    name=\"test_brain\",\n    file_paths=[\"tests/processor/data/dummy.pdf\"],\n    llm=LLMEndpoint(\n        llm=FakeListChatModel(responses=[\"good\"]),\n        llm_config=LLMEndpointConfig(model=\"fake_model\", llm_base_url=\"local\"),\n    ),\n    embedder=DeterministicFakeEmbedding(size=20),\n)\n```\n\nSource: [examples/pdf_parsing_tika.py:1-20](https://github.com/QuivrHQ/quivr/blob/main/examples/pdf_parsing_tika.py)\n\n## Core Methods\n\n### Asking Questions\n\n#### Synchronous\n\n```python\nanswer = brain.ask(\n    \"what is gold? answer in french\",\n    retrieval_config=retrieval_config\n)\nprint(\"answer:\", answer)\n```\n\n#### Asynchronous Streaming\n\n```python\nasync for chunk in brain.ask_streaming(\"What is the meaning of life?\"):\n    print(chunk.answer)\n```\n\nSource: [core/quivr_core/brain/brain.py:150-250](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/brain/brain.py)\n\n### Search\n\n#### Async Search\n\n```python\nresults = await brain.asearch(\n    query=\"your search query\",\n    n_results=5,\n    filter=None,\n    fetch_n_neighbors=20\n)\n```\n\n| Parameter | Type | Default | Description |\n|-----------|------|---------|-------------|\n| `query` | `str \\| Document` | Required | The search query |\n| `n_results` | `int` | 5 | Number of results to return |\n| `filter` | `Callable \\| Dict \\| None` | None | Optional filter for results |\n| `fetch_n_neighbors` | `int` | 20 | Number of neighbors to fetch |\n\nSource: [core/quivr_core/brain/brain.py:100-120](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/brain/brain.py)\n\n## Storage and Persistence\n\n### Saving a Brain\n\nBrains can be persisted to disk for later reuse:\n\n```python\nsave_path = await brain.save(\"/home/user/.local/quivr\")\n```\n\nSource: [examples/save_load_brain.py:1-22](https://github.com/QuivrHQ/quivr/blob/main/examples/save_load_brain.py)\n\n### Loading a Brain\n\n```python\nbrain_loaded = Brain.load(save_path)\nbrain_loaded.print_info()\n```\n\nSource: [examples/save_load_brain.py:18](https://github.com/QuivrHQ/quivr/blob/main/examples/save_load_brain.py)\n\n## File Processing\n\n### QuivrFile Entity\n\nThe Brain processes files through the `QuivrFile` dataclass:\n\n```python\nclass QuivrFile:\n    __slots__ = [\n        \"id\",\n        \"brain_id\",\n        \"path\",\n        \"original_filename\",\n        \"file_size\",\n        \"file_extension\",\n        \"file_sha1\",\n        \"additional_metadata\",\n    ]\n```\n\nSource: [core/quivr_core/files/file.py:20-35](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/files/file.py)\n\n### File Metadata\n\nDuring processing, each chunk receives metadata including:\n\n| Field | Description |\n|-------|-------------|\n| `chunk_index` | Position of the chunk in the document |\n| `quivr_core_version` | Version of quivr-core used |\n| `language` | Detected language of the content |\n| `original_file_name` | Source filename |\n\nSource: [core/quivr_core/processor/processor_base.py:1-50](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/processor_base.py)\n\n## Retrieval Configuration\n\nThe `RetrievalConfig` controls the RAG pipeline behavior:\n\n```python\nfrom quivr_core.config import RetrievalConfig\n\nconfig_file_name = \"./basic_rag_workflow.yaml\"\nretrieval_config = RetrievalConfig.from_yaml(config_file_name)\n```\n\n### YAML Configuration Example\n\n```yaml\nworkflow_config:\n  name: \"standard RAG\"\n  nodes:\n    - name: \"START\"\n      edges: [\"filter_history\"]\n    - name: \"filter_history\"\n      edges: [\"rewrite\"]\n    - name: \"rewrite\"\n      edges: [...]\n```\n\n## Complete Usage Example\n\n```python\nimport tempfile\nfrom quivr_core import Brain\n\nwith tempfile.NamedTemporaryFile(mode=\"w\", suffix=\".txt\") as temp_file:\n    temp_file.write(\"Gold is a liquid of blue-like colour.\")\n    temp_file.flush()\n\n    brain = Brain.from_files(\n        name=\"test_brain\",\n        file_paths=[temp_file.name],\n    )\n\n    answer = brain.ask(\"what is gold? answer in french\")\n    print(\"answer:\", answer)\n```\n\nSource: [README.md](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n\n## Attributes Summary\n\n| Attribute | Type | Description |\n|-----------|------|-------------|\n| `id` | `UUID` | Unique identifier for the brain |\n| `name` | `str` | Human-readable name |\n| `vector_db` | `VectorStore` | Vector storage for embeddings |\n| `llm` | `LLMEndpoint` | Language model for generation |\n| `embedder` | `Embeddings` | Embedding model for vectorization |\n| `storage` | `BrainStorage` | Persistence layer |\n\n## Module Export\n\nThe Brain class is exported from the main `quivr_core` package:\n\n```python\nfrom quivr_core import Brain\n```\n\nSource: [core/quivr_core/brain/__init__.py:1-5](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/brain/__init__.py)\n\n---\n\n<a id='page-rag-implementation'></a>\n\n## RAG Implementation\n\n### Related Pages\n\nRelated topics: [Brain Class](#page-brain-class)\n\n<details>\n<summary>Relevant source files</summary>\n\nThe following source files were used to generate this page:\n\n- [core/quivr_core/rag/quivr_rag.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/rag/quivr_rag.py)\n- [core/quivr_core/rag/quivr_rag_langgraph.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/rag/quivr_rag_langgraph.py)\n- [core/quivr_core/rag/entities/config.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/rag/entities/config.py)\n- [core/quivr_core/rag/entities/chat.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/rag/entities/chat.py)\n- [core/quivr_core/rag/prompts.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/rag/prompts.py)\n</details>\n\n# RAG Implementation\n\n## Overview\n\nThe RAG (Retrieval-Augmented Generation) implementation in Quivr provides a flexible, opinionated framework for building Retrieval-Augmented Generation pipelines. It combines vector-based document retrieval with Large Language Model (LLM) generation to enable question-answering over uploaded documents and files.\n\nThe implementation supports both synchronous and streaming responses, configurable retrieval strategies, and integration with multiple LLM providers including OpenAI, Anthropic, Mistral, and local models via Ollama.\n\n## Architecture\n\n```mermaid\ngraph TD\n    A[User Query] --> B[Brain.ask]\n    B --> C[RAG Pipeline]\n    C --> D[Retrieval Phase]\n    C --> E[Generation Phase]\n    D --> F[Vector DB Search]\n    F --> G[Relevant Chunks]\n    E --> H[LLM Processing]\n    G --> H\n    H --> I[Streaming Response]\n    I --> J[ParsedRAGChunkResponse]\n```\n\n## Core Components\n\n### Brain Class\n\nThe `Brain` class serves as the main entry point for RAG operations. It orchestrates document processing, retrieval, and generation.\n\n**Key Methods:**\n\n| Method | Description | Return Type |\n|--------|-------------|-------------|\n| `from_files()` | Create a brain from file paths | `Brain` |\n| `afrom_langchain_documents()` | Create a brain from LangChain documents | `Brain` |\n| `ask()` | Synchronous question answering | `RAGResponse` |\n| `asearch()` | Search for relevant documents | `list[SearchResult]` |\n\nSource: [core/quivr_core/brain/brain.py:1-100](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/brain/brain.py)\n\n### RAG Pipeline Flow\n\n```mermaid\nsequenceDiagram\n    participant User\n    participant Brain\n    participant Retriever\n    participant VectorDB\n    participant LLM\n    participant Response\n    \n    User->>Brain: ask(question)\n    Brain->>Retriever: retrieve(query)\n    Retriever->>VectorDB: similarity_search()\n    VectorDB-->>Retriever: relevant_chunks\n    Retriever-->>Brain: context_chunks\n    Brain->>LLM: generate(context, question)\n    LLM-->>Response: ParsedRAGChunkResponse\n    Response-->>User: Streaming Answer\n```\n\n## Streaming Response System\n\n### ParsedRAGChunkResponse\n\nThe streaming response is built around the `ParsedRAGChunkResponse` data class, which provides incremental chunks of the generated answer along with metadata.\n\n**Chunk Structure:**\n\n| Field | Type | Description |\n|-------|------|-------------|\n| `answer` | `str` | The partial or complete answer text |\n| `metadata` | `dict` | Sources and additional context |\n| `last_chunk` | `bool` | Indicates final chunk of response |\n| `sources` | `list` | Retrieved document sources |\n| `chunk_id` | `int` | Sequence number of the chunk |\n\nSource: [core/quivr_core/rag/quivr_rag.py:1-50](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/rag/quivr_rag.py)\n\n### Answer Streaming Implementation\n\nThe `answer_astream` method implements asynchronous streaming of LLM responses:\n\n```python\nasync def answer_astream(self, query: str, ...) -> AsyncGenerator[ParsedRAGChunkResponse]:\n    # Processing logic yields ParsedRAGChunkResponse chunks\n    yield ParsedRAGChunkResponse(\n        answer=\"\",\n        metadata=get_chunk_metadata(rolling_message, sources),\n        last_chunk=True,\n    )\n```\n\n**Streaming Characteristics:**\n- Yields multiple `ParsedRAGChunkResponse` objects during generation\n- Each chunk contains accumulated `rolling_message` content\n- Uses `chunk_id` for tracking sequence order\n- Final chunk marked with `last_chunk=True`\n- Metadata includes retrieved sources for citation\n\nSource: [core/quivr_core/rag/quivr_rag.py:50-100](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/rag/quivr_rag.py)\n\n## Retrieval Configuration\n\n### RetrievalConfig\n\nThe retrieval behavior is controlled through `RetrievalConfig` which supports YAML-based configuration:\n\n```yaml\nworkflow_config:\n  name: \"standard RAG\"\n  nodes:\n    - name: \"START\"\n      edges: [\"filter_history\"]\n    - name: \"filter_history\"\n      edges: [\"rewrite\"]\n    - name: \"rewrite\"\n```\n\nSource: [README.md](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n\n### Configuration Parameters\n\n| Parameter | Type | Default | Description |\n|-----------|------|---------|-------------|\n| `n_results` | `int` | `5` | Number of documents to retrieve |\n| `fetch_n_neighbors` | `int` | `20` | Number of neighbors to fetch from vector DB |\n| `filter` | `Callable \\| Dict` | `None` | Optional metadata filtering |\n\nSource: [core/quivr_core/rag/entities/config.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/rag/entities/config.py)\n\n## Search Operation\n\n### Async Search Method\n\n```python\nasync def asearch(\n    self,\n    query: str | Document,\n    n_results: int = 5,\n    filter: Callable | Dict[str, Any] | None = None,\n    fetch_n_neighbors: int = 20,\n) -> list[SearchResult]:\n```\n\n**Parameters:**\n\n| Parameter | Type | Required | Description |\n|-----------|------|----------|-------------|\n| `query` | `str \\| Document` | Yes | The search query |\n| `n_results` | `int` | No | Maximum results to return |\n| `filter` | `Callable \\| Dict` | No | Metadata filter condition |\n| `fetch_n_neighbors` | `int` | No | Initial fetch size before re-ranking |\n\nSource: [core/quivr_core/brain/brain.py:50-80](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/brain/brain.py)\n\n## Document Processing Pipeline\n\n### Processor Base\n\nThe `processor_base.py` handles document chunking and metadata enrichment:\n\n```python\nasync def process_file(file: QuivrFile) -> ProcessedDocument:\n    docs = await self.process_file_inner(file)\n    qvr_version = version(\"quivr-core\")\n    \n    for idx, doc in enumerate(docs.chunks, start=1):\n        doc.metadata = {\n            \"chunk_index\": idx,\n            \"quivr_core_version\": qvr_version,\n            \"language\": detect_language(text=...).value,\n            **file.metadata,\n            **doc.metadata,\n        }\n```\n\n**Metadata Enrichment:**\n- `chunk_index`: Sequential position of chunk\n- `quivr_core_version`: Version of quivr-core\n- `language`: Auto-detected language of content\n- Original filename embedded in content for reference\n\nSource: [core/quivr_core/processor/processor_base.py:1-50](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/processor_base.py)\n\n## Prompt Templates\n\n### User Prompt Template\n\nThe prompt system uses structured templates with metadata injection:\n\n```\n<user_metadata>\n{user_metadata}\n</user_metadata>\n\n<ticket_metadata>\n{ticket_metadata}\n</ticket_metadata>\n\n<similar_tickets>\n{similar_tickets}\n</similar_tickets>\n\n<ticket_history>\n{ticket_history}\n</ticket_history>\n\n<additional_information>\n{additional_information}\n</additional_information>\n\n<client_query>\n{client_query}\n</client_query>\n```\n\nSource: [core/quivr_core/rag/prompts.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/rag/prompts.py)\n\n### Default Instructions\n\nThe system includes default instructions that guide response generation:\n\n| Instruction | Description |\n|-------------|-------------|\n| Conciseness | Use same level of detail as similar responses |\n| Formatting | Proper paragraphs, bold, italic for readability |\n| Language | Respond in same language as user query |\n| Consistency | Maintain terminology consistency |\n| No Signature | Signature added separately after response |\n\n## LangGraph Integration\n\nThe implementation includes a LangGraph-based RAG pipeline (`quivr_rag_langgraph.py`) for more complex workflows:\n\n```mermaid\ngraph LR\n    A[Query] --> B[History Filter]\n    B --> C[Query Rewrite]\n    C --> D[Retrieval]\n    D --> E[Answer Generation]\n    E --> F[Response]\n```\n\nFeatures:\n- Multi-step processing pipelines\n- Conversation history integration\n- Query rewriting for better retrieval\n- Customizable workflow nodes\n\nSource: [core/quivr_core/rag/quivr_rag_langgraph.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/rag/quivr_rag_langgraph.py)\n\n## LLM Configuration\n\n### LLMEndpointConfig\n\n| Parameter | Type | Description |\n|-----------|------|-------------|\n| `model` | `str` | Model identifier |\n| `llm_base_url` | `str` | API endpoint URL |\n| `temperature` | `float` | Generation temperature (default: 0.7) |\n\nSource: [core/quivr_core/rag/entities/config.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/rag/entities/config.py)\n\n### Supported Providers\n\n- **OpenAI**: GPT-4, GPT-3.5-turbo\n- **Anthropic**: Claude models\n- **Mistral**: Mistral AI models\n- **Ollama**: Local model support\n\n## Usage Example\n\n```python\nfrom quivr_core import Brain\nfrom quivr_core.config import RetrievalConfig\n\n# Create brain from files\nbrain = Brain.from_files(\n    name=\"my_brain\",\n    file_paths=[\"./document.pdf\", \"./notes.txt\"],\n)\n\n# Configure retrieval\nretrieval_config = RetrievalConfig.from_yaml(\"./workflow.yaml\")\n\n# Ask a question\nanswer = brain.ask(\n    \"What is the main topic of these documents?\",\n    retrieval_config=retrieval_config\n)\n\nprint(answer.answer)\n```\n\nSource: [examples/simple_question/simple_question.py](https://github.com/QuivrHQ/quivr/blob/main/examples/simple_question/simple_question.py)\n\n## Key Design Patterns\n\n| Pattern | Implementation |\n|---------|-----------------|\n| Async/Await | All I/O operations are asynchronous |\n| Streaming | Responses streamed via async generators |\n| Dependency Injection | LLM and embedder are configurable |\n| Configuration-driven | Workflows defined via YAML |\n| Metadata Enrichment | Automatic language detection and versioning |\n\n---\n\n<a id='page-llm-integration'></a>\n\n## LLM Integration\n\n### Related Pages\n\nRelated topics: [Core Components](#page-core-components), [System Architecture Overview](#page-architecture-overview)\n\n<details>\n<summary>Relevant source files</summary>\n\nThe following source files were used to generate this page:\n\n- [core/quivr_core/llm/llm_endpoint.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/llm/llm_endpoint.py)\n- [core/quivr_core/rag/entities/config.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/rag/entities/config.py)\n- [core/quivr_core/config.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/config.py)\n- [core/quivr_core/base_config.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/base_config.py)\n- [core/quivr_core/rag/quivr_rag_langgraph.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/rag/quivr_rag_langgraph.py)\n</details>\n\n# LLM Integration\n\n## Overview\n\nThe LLM Integration module in Quivr provides a flexible, abstracted interface for interacting with Large Language Models (LLMs) from various providers. This module enables Quivr's RAG (Retrieval-Augmented Generation) pipeline to leverage different LLM backends without requiring changes to the core business logic. The integration supports OpenAI, Anthropic, Mistral, Meta (Llama), Groq, and local models via Ollama.\n\nThe primary goals of the LLM Integration are:\n\n- **Provider Abstraction**: Uniform API regardless of the underlying LLM provider\n- **Configuration Management**: Centralized configuration for model parameters, token limits, and provider-specific settings\n- **Runtime Flexibility**: Support for both synchronous and asynchronous operations\n- **Embeddings Integration**: Seamless integration with embedding models for semantic search\n\nSource: [core/quivr_core/llm/llm_endpoint.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/llm/llm_endpoint.py)\n\n## Architecture\n\n### High-Level Components\n\nThe LLM Integration consists of several key components:\n\n| Component | Purpose | Location |\n|-----------|---------|----------|\n| `LLMEndpoint` | Main wrapper class for LLM interactions | `quivr_core/llm/llm_endpoint.py` |\n| `LLMEndpointConfig` | Configuration dataclass for LLM settings | `quivr_core/rag/entities/config.py` |\n| `LLMConfig` | Per-model configuration with token limits | `quivr_core/rag/entities/config.py` |\n| `DefaultModelSuppliers` | Enum for supported LLM providers | `quivr_core/rag/entities/config.py` |\n\n### Class Diagram\n\n```mermaid\nclassDiagram\n    class LLMEndpoint {\n        +llm: BaseChatModel\n        +llm_config: LLMEndpointConfig\n        +__init__(llm, llm_config)\n        +get_client() BaseChatModel\n    }\n    \n    class LLMEndpointConfig {\n        +model: str\n        +llm_base_url: str\n        +temperature: float\n        +max_output_tokens: int\n        +model_type: LLMEndpointType\n    }\n    \n    class LLMConfig {\n        +max_context_tokens: int\n        +max_output_tokens: int\n        +tokenizer_hub: str\n    }\n    \n    class DefaultModelSuppliers {\n        <<enumeration>>\n        OPENAI\n        ANTHROPIC\n        MISTRAL\n        META\n        GROQ\n        OLLAMA\n    }\n    \n    LLMEndpoint --> LLMEndpointConfig\n    LLMEndpointConfig ..> DefaultModelSuppliers\n```\n\nSource: [core/quivr_core/llm/llm_endpoint.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/llm/llm_endpoint.py)  \nSource: [core/quivr_core/rag/entities/config.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/rag/entities/config.py)\n\n## Configuration\n\n### LLMEndpointConfig Parameters\n\nThe `LLMEndpointConfig` class provides configuration for LLM endpoints:\n\n| Parameter | Type | Default | Description |\n|-----------|------|---------|-------------|\n| `model` | `str` | Required | Model identifier (e.g., \"gpt-4o\", \"claude-3-opus\") |\n| `llm_base_url` | `str` | `\"local\"` | Base URL for the LLM API endpoint |\n| `temperature` | `float` | `0.7` | Sampling temperature for generation (0.0-2.0) |\n| `max_output_tokens` | `int` | `4096` | Maximum tokens in the generated response |\n| `model_type` | `LLMEndpointType` | `LLMEndpointType.CHAT` | Type of LLM endpoint |\n\nSource: [core/quivr_core/rag/entities/config.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/rag/entities/config.py)\n\n### Default Model Configurations\n\nQuivr provides pre-configured settings for various models through the `DefaultModelSuppliers` enum and associated `LLMConfig` dictionaries:\n\n| Provider | Model | Max Context Tokens | Max Output Tokens | Tokenizer Hub |\n|----------|-------|-------------------|-------------------|---------------|\n| **OpenAI** | gpt-4o | 128,000 | 16,384 | Quivr/claude-tokenizer |\n| **OpenAI** | gpt-4-turbo | 128,000 | 4,096 | Quivr/claude-tokenizer |\n| **Anthropic** | claude-3.5-sonnet | 200,000 | 8,192 | Quivr/claude-tokenizer |\n| **Anthropic** | claude-3-opus | 200,000 | 4,096 | Quivr/claude-tokenizer |\n| **Mistral** | mistral-large | 32,000 | N/A | - |\n| **Meta** | llama-3.1 | 128,000 | 4,096 | Quivr/Meta-Llama-3.1-Tokenizer |\n| **Meta** | llama-3 | 8,192 | 2,048 | Quivr/llama3-tokenizer-new |\n| **Groq** | llama-3.3-70b | 128,000 | 32,768 | Quivr/Meta-Llama-3.1-Tokenizer |\n\nSource: [core/quivr_core/rag/entities/config.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/rag/entities/config.py)\n\n### Environment Variables\n\nAPI keys should be set as environment variables before initializing the LLM:\n\n```bash\nexport OPENAI_API_KEY=\"your-openai-api-key\"\nexport ANTHROPIC_API_KEY=\"your-anthropic-api-key\"\n```\n\nExample usage in code:\n\n```python\nimport os\nos.environ[\"OPENAI_API_KEY\"] = \"myopenai_apikey\"\n```\n\nSource: [README.md](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n\n## Usage Patterns\n\n### Basic Integration with Brain\n\nThe most common pattern is to pass an `LLMEndpoint` instance when creating a Brain:\n\n```python\nfrom langchain_openai import ChatOpenAI\nfrom quivr_core import Brain\nfrom quivr_core.llm.llm_endpoint import LLMEndpoint\nfrom quivr_core.rag.entities.config import LLMEndpointConfig\n\nbrain = Brain.from_files(\n    name=\"my_smart_brain\",\n    file_paths=[\"./documents/*.pdf\"],\n    llm=LLMEndpoint(\n        llm_config=LLMEndpointConfig(model=\"gpt-4o\"),\n        llm=ChatOpenAI(model=\"gpt-4o\", api_key=str(os.getenv(\"OPENAI_API_KEY\"))),\n    ),\n)\n```\n\nSource: [examples/simple_question_megaparse.py](https://github.com/QuivrHQ/quivr/blob/main/examples/simple_question_megaparse.py)\n\n### Using Fake LLM for Testing\n\nFor testing purposes, Quivr supports fake LLM implementations:\n\n```python\nfrom langchain_core.language_models import FakeListChatModel\nfrom quivr_core import Brain\nfrom quivr_core.llm.llm_endpoint import LLMEndpoint\nfrom quivr_core.rag.entities.config import LLMEndpointConfig\nfrom langchain_core.embeddings import DeterministicFakeEmbedding\n\nbrain = Brain.from_files(\n    name=\"test_brain\",\n    file_paths=[\"tests/processor/data/dummy.pdf\"],\n    llm=LLMEndpoint(\n        llm=FakeListChatModel(responses=[\"good\"]),\n        llm_config=LLMEndpointConfig(model=\"fake_model\", llm_base_url=\"local\"),\n    ),\n    embedder=DeterministicFakeEmbedding(size=20),\n)\n```\n\nSource: [examples/pdf_parsing_tika.py](https://github.com/QuivrHQ/quivr/blob/main/examples/pdf_parsing_tika.py)\n\n### Custom Embeddings Integration\n\nWhen using custom LLM configurations, you can also specify custom embedders:\n\n```python\nfrom langchain_openai import ChatOpenAI, OpenAIEmbeddings\nfrom quivr_core import Brain\nfrom quivr_core.llm.llm_endpoint import LLMEndpoint\nfrom quivr_core.rag.entities.config import LLMEndpointConfig\n\n# LLM Configuration\nllm = LLMEndpoint(\n    llm_config=LLMEndpointConfig(model=\"gpt-4o\"),\n    llm=ChatOpenAI(model=\"gpt-4o\", api_key=str(os.getenv(\"OPENAI_API_KEY\"))),\n)\n\n# Embedder Configuration\nembedder = OpenAIEmbeddings(model=\"text-embedding-3-large\")\n\nbrain = Brain.from_files(\n    name=\"custom_brain\",\n    file_paths=[\"./data/documents/\"],\n    llm=llm,\n    embedder=embedder,\n)\n```\n\nSource: [examples/simple_question_megaparse.py](https://github.com/QuivrHQ/quivr/blob/main/examples/simple_question_megaparse.py)\n\n## Supported Providers\n\n### OpenAI\n\nOpenAI models are supported through the `langchain_openai` package:\n\n```python\nfrom langchain_openai import ChatOpenAI\nfrom quivr_core.llm.llm_endpoint import LLMEndpoint\nfrom quivr_core.rag.entities.config import LLMEndpointConfig\n\nllm = LLMEndpoint(\n    llm=ChatOpenAI(model=\"gpt-4o\", api_key=api_key),\n    llm_config=LLMEndpointConfig(\n        model=\"gpt-4o\",\n        temperature=0.7,\n    ),\n)\n```\n\nSupported models: `gpt-4o`, `gpt-4-turbo`, `gpt-3.5-turbo`\n\n### Anthropic\n\nAnthropic models require the `langchain-anthropic` package:\n\n```python\nfrom langchain_anthropic import ChatAnthropic\nfrom quivr_core.llm.llm_endpoint import LLMEndpoint\n\nllm = LLMEndpoint(\n    llm=ChatAnthropic(model=\"claude-3-5-sonnet-20241022\", anthropic_api_key=api_key),\n    llm_config=LLMEndpointConfig(model=\"claude-3.5-sonnet\"),\n)\n```\n\n### Mistral\n\nMistral models are supported via their API:\n\n```python\nfrom langchain_mistralai import ChatMistralAI\n\nllm = LLMEndpoint(\n    llm=ChatMistralAI(model=\"mistral-large-latest\", mistral_api_key=api_key),\n    llm_config=LLMEndpointConfig(model=\"mistral-large\"),\n)\n```\n\n### Local Models (Ollama)\n\nFor local inference using Ollama:\n\n```python\nfrom langchain_ollama import ChatOllama\n\nllm = LLMEndpoint(\n    llm=ChatOllama(model=\"llama3.1\", base_url=\"http://localhost:11434\"),\n    llm_config=LLMEndpointConfig(model=\"llama-3.1\", llm_base_url=\"http://localhost:11434\"),\n)\n```\n\nSource: [README.md](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n\n## RAG Workflow Integration\n\nThe LLM Integration is a core component of Quivr's RAG pipeline. When processing a user query, the LLM is responsible for generating the final answer based on retrieved context.\n\n```mermaid\ngraph TD\n    A[User Query] --> B[Brain.ask]\n    B --> C[Vector Search]\n    C --> D[Retrieve Relevant Chunks]\n    D --> E[LLMEndpoint]\n    E --> F[Generate Answer]\n    F --> G[RAG Response]\n    \n    H[LLMEndpointConfig] --> E\n    I[Chat History] --> E\n    J[System Prompt] --> E\n```\n\nThe LLM receives contextual information from the retrieval step and generates responses that are then formatted and returned to the user. The `quivr_rag_langgraph.py` module orchestrates this workflow using LangGraph for complex graph-based processing.\n\nSource: [core/quivr_core/rag/quivr_rag.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/rag/quivr_rag.py)  \nSource: [core/quivr_core/rag/quivr_rag_langgraph.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/rag/quivr_rag_langgraph.py)\n\n## Default LLM Fallback\n\nIf no LLM is explicitly provided when creating a Brain, Quivr automatically initializes a default LLM:\n\n```python\n# From brain.py source code\nif llm is None:\n    llm = default_llm()\n```\n\nThis fallback mechanism ensures that users can get started quickly without explicit configuration, while still allowing advanced users to customize their LLM setup.\n\nSource: [core/quivr_core/brain/brain.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/brain/brain.py)\n\n## Best Practices\n\n### API Key Security\n\n- Store API keys in environment variables, never hardcode them\n- Use `.env` files with proper `.gitignore` entries for local development\n- Consider using secret management services (AWS Secrets Manager, HashiCorp Vault) for production\n\n### Model Selection\n\n- Choose models based on your use case requirements (speed vs. quality)\n- Use smaller models for simple queries to reduce costs\n- Reserve larger models (e.g., GPT-4o, Claude 3.5) for complex reasoning tasks\n\n### Token Management\n\n- Monitor `max_context_tokens` to avoid exceeding model limits\n- Set appropriate `max_output_tokens` based on expected response length\n- Use the `RetrievalConfig` to control how many chunks are fed to the LLM\n\n### Temperature Settings\n\n| Temperature | Use Case | Characteristics |\n|-------------|----------|-----------------|\n| 0.0 - 0.3 | Factual/Deterministic | More focused, consistent responses |\n| 0.4 - 0.7 | General Purpose | Balanced creativity and consistency |\n| 0.8 - 1.0 | Creative/Brainstorming | More varied, potentially surprising outputs |\n\n## Troubleshooting\n\n### Common Issues\n\n1. **API Key Not Found**\n   ```\n   Error: OPENAI_API_KEY environment variable not set\n   ```\n   Solution: Ensure the API key is set before running the script:\n   ```bash\n   export OPENAI_API_KEY=\"your-key\"\n   ```\n\n2. **Model Context Limit Exceeded**\n   Solution: Reduce the number of retrieved chunks or adjust chunk size in processing\n\n3. **Rate Limiting**\n   Solution: Implement retry logic or use a rate limiting middleware\n\nSource: [README.md](https://github.com/QuivrHQ/quivr/blob/main/README.md)\n\n## See Also\n\n- [Brain Configuration](./brain-configuration.md)\n- [RAG Workflows](./rag-workflows.md)\n- [Embedding Integration](./embedding-integration.md)\n- [Quivr Documentation](https://core.quivr.com/)\n\n---\n\n<a id='page-file-processing'></a>\n\n## File Processing and Parsers\n\n### Related Pages\n\nRelated topics: [Storage System](#page-storage)\n\n<details>\n<summary>Relevant source files</summary>\n\nThe following source files were used to generate this page:\n\n- [core/quivr_core/processor/processor_base.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/processor_base.py)\n- [core/quivr_core/processor/registry.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/registry.py)\n- [core/quivr_core/processor/implementations/default.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/implementations/default.py)\n- [core/quivr_core/processor/implementations/simple_txt_processor.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/implementations/simple_txt_processor.py)\n- [core/quivr_core/processor/implementations/tika_processor.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/implementations/tika_processor.py)\n- [core/quivr_core/processor/implementations/megaparse_processor.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/implementations/megaparse_processor.py)\n</details>\n\n# File Processing and Parsers\n\nQuivr's file processing system provides a extensible architecture for ingesting, parsing, and chunking various document formats for use in Retrieval-Augmented Generation (RAG) workflows. The processor subsystem sits at the core of Quivr's data ingestion pipeline, transforming raw files into structured document chunks ready for embedding and retrieval.\n\n## Architecture Overview\n\nThe file processing architecture follows a plugin-based design pattern where processors are registered by file extension and lazily loaded on demand. This separation of concerns allows new file format support to be added without modifying core processing logic.\n\n```mermaid\ngraph TD\n    A[QuivrFile Input] --> B[Processor Registry]\n    B --> C{File Extension}\n    C -->|.txt| D[SimpleTxtProcessor]\n    C -->|.pdf| E[MegaparseProcessor]\n    C -->|Other| F[Default Processors]\n    D --> G[ProcessedDocument]\n    E --> G\n    F --> G\n    G --> H[Document Chunks]\n    H --> I[RAG Pipeline]\n```\n\n### Core Components\n\n| Component | Purpose | Location |\n|-----------|---------|----------|\n| `ProcessorBase` | Abstract base class for all processors | `processor_base.py` |\n| `ProcessedDocument` | Generic container for processing results | `processor_base.py` |\n| `ProcessorRegistry` | Maps extensions to processor classes | `registry.py` |\n| `SplitterConfig` | Configuration for text chunking | `splitter.py` |\n\nSource: [processor_base.py:1-75](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/processor_base.py), [registry.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/registry.py)\n\n---\n\n## ProcessorBase Abstract Class\n\nThe `ProcessorBase` serves as the foundation for all file processors in Quivr. It defines the contract that every processor implementation must follow.\n\n### Class Definition\n\n```python\nclass ProcessorBase(ABC, Generic[R]):\n    supported_extensions: list[FileExtension | str]\n    \n    @property\n    @abstractmethod\n    def processor_metadata(self) -> dict[str, Any]:\n        raise NotImplementedError\n    \n    async def process_file(self, file: QuivrFile) -> ProcessedDocument[R]:\n        ...\n    \n    @abstractmethod\n    async def process_file_inner(self, file: QuivrFile) -> ProcessedDocument[R]:\n        raise NotImplementedError\n```\n\nSource: [processor_base.py:47-75](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/processor_base.py)\n\n### Key Methods\n\n| Method | Access | Description |\n|--------|--------|-------------|\n| `supported_extensions` | Class attribute | List of file extensions the processor handles |\n| `check_supported()` | Instance | Validates if a file type is supported |\n| `process_file()` | Async | Main entry point; validates, processes, and enriches metadata |\n| `process_file_inner()` | Abstract Async | Subclass implementation for actual parsing logic |\n| `processor_metadata` | Abstract Property | Returns processor configuration as dict |\n\n### Metadata Enrichment\n\nThe `process_file()` method automatically enriches each document chunk with standardized metadata:\n\n```python\ndoc.metadata = {\n    \"chunk_index\": idx,\n    \"quivr_core_version\": qvr_version,\n    \"language\": detect_language(...).value,\n    **file.metadata,\n    **doc.metadata,\n    **self.processor_metadata,\n}\n```\n\nThis ensures all chunks carry provenance information including the Quivr version, detected language, and processor configuration.\n\nSource: [processor_base.py:20-39](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/processor_base.py)\n\n---\n\n## ProcessedDocument Generic Container\n\nThe `ProcessedDocument` is a generic wrapper that encapsulates the output of any processor:\n\n```python\n@dataclass\nclass ProcessedDocument(Generic[R]):\n    chunks: List[Document]          # List of LangChain Document objects\n    processor_cls: str              # Name of the processor class\n    processor_response: R           # Processor-specific return value\n```\n\nThe generic type `R` allows each processor to define its own response type. For example, `SimpleTxtProcessor` returns `ProcessedDocument[str]` where `processor_response` contains the raw text content.\n\nSource: [processor_base.py:40-46](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/processor_base.py)\n\n---\n\n## Text Chunking and Splitting\n\nQuivr provides configurable text chunking through the `SplitterConfig` class and custom splitting implementations.\n\n### SplitterConfig\n\n```python\n@dataclass\nclass SplitterConfig(BaseModel):\n    chunk_size: int = 500\n    chunk_overlap: int = 0\n    length_function: Callable[[str], int] = len\n```\n\n| Parameter | Default | Description |\n|-----------|---------|-------------|\n| `chunk_size` | 500 | Maximum characters per chunk |\n| `chunk_overlap` | 0 | Characters to overlap between chunks |\n| `length_function` | `len` | Function to calculate text length |\n\n### SimpleTxtProcessor Recursive Splitter\n\nThe `SimpleTxtProcessor` implements a custom recursive character splitter:\n\n```python\ndef recursive_character_splitter(\n    doc: Document, chunk_size: int, chunk_overlap: int\n) -> list[Document]:\n    if len(doc.page_content) <= chunk_size:\n        return [doc]\n    \n    chunk = Document(page_content=doc.page_content[:chunk_size], metadata=doc.metadata)\n    remaining = Document(\n        page_content=doc.page_content[chunk_size - chunk_overlap:],\n        metadata=doc.metadata,\n    )\n    return [chunk] + recursive_character_splitter(remaining, chunk_size, chunk_overlap)\n```\n\n**Constraint**: `chunk_overlap` must be less than `chunk_size`\n\nSource: [simple_txt_processor.py:8-26](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/implementations/simple_txt_processor.py)\n\n### LangChain Text Splitters\n\nOther processors leverage LangChain's `RecursiveCharacterTextSplitter`:\n\n```python\nfrom langchain_text_splitters import RecursiveCharacterTextSplitter, TextSplitter\n\ntext_splitter = RecursiveCharacterTextSplitter(\n    chunk_size=splitter_config.chunk_size,\n    chunk_overlap=splitter_config.chunk_overlap,\n    length_function=tiktoken_len,  # Token-based length\n)\ndocs = text_splitter.split_documents([document])\n```\n\nSource: [megaparse_processor.py:1-50](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/implementations/megaparse_processor.py)\n\n---\n\n## Available Processors\n\n### Processor Comparison\n\n| Processor | File Types | Parser Backend | Notes |\n|-----------|------------|----------------|-------|\n| `SimpleTxtProcessor` | `.txt` | Custom recursive splitter | Lightweight, no external dependencies |\n| `MegaparseProcessor` | `.txt`, `.pdf`, `.docx`, `.pptx`, `.xlsx`, `.csv`, `.epub`, `.bib`, `.odt`, `.html`, `.md`, `.mdx` | MegaParse SDK | AI-optimized document parsing |\n| `TikaProcessor` | `.pdf` | Apache Tika | Robust PDF extraction |\n| Default Processors | Various | LangChain loaders | Catch-all for standard formats |\n\n### SimpleTxtProcessor\n\nDesigned for plain text files with zero external dependencies:\n\n```python\nclass SimpleTxtProcessor(ProcessorBase):\n    supported_extensions = [FileExtension.txt]\n    \n    async def process_file_inner(self, file: QuivrFile) -> ProcessedDocument[str]:\n        async with aiofiles.open(file.path, mode=\"r\") as f:\n            content = await f.read()\n        \n        doc = Document(page_content=content)\n        docs = recursive_character_splitter(\n            doc, self.splitter_config.chunk_size, self.splitter_config.chunk_overlap\n        )\n        return ProcessedDocument(\n            chunks=docs, \n            processor_cls=\"SimpleTxtProcessor\", \n            processor_response=content\n        )\n```\n\nSource: [simple_txt_processor.py:30-60](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/implementations/simple_txt_processor.py)\n\n### MegaparseProcessor\n\nThe most versatile processor, supporting 14+ file formats through the MegaParse SDK:\n\n```python\nclass MegaparseProcessor(ProcessorBase[MPDocument]):\n    supported_extensions = [\n        FileExtension.txt, FileExtension.pdf, FileExtension.docx,\n        FileExtension.pptx, FileExtension.xlsx, FileExtension.csv,\n        FileExtension.epub, FileExtension.html, FileExtension.markdown,\n        # ... more extensions\n    ]\n    \n    def __init__(\n        self,\n        splitter: TextSplitter | None = None,\n        splitter_config: SplitterConfig = SplitterConfig(),\n        megaparse_config: MegaparseConfig = MegaparseConfig(),\n    ) -> None:\n        ...\n```\n\n**Installation**: `pip install megaparse`\n\nSource: [megaparse_processor.py:1-60](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/implementations/megaparse_processor.py)\n\n### Default Processors\n\nBuilt using LangChain's community loaders for standard formats:\n\n| LangChain Loader | File Extension |\n|------------------|----------------|\n| `UnstructuredPDFLoader` | `.pdf` |\n| `Docx2txtLoader` | `.docx` |\n| `UnstructuredPowerPointLoader` | `.pptx` |\n| `UnstructuredExcelLoader` | `.xls`, `.xlsx` |\n| `CSVLoader` | `.csv` |\n| `UnstructuredEPubLoader` | `.epub` |\n| `UnstructuredMarkdownLoader` | `.md`, `.markdown` |\n| `UnstructuredHTMLLoader` | `.html` |\n| `TextLoader` | `.txt` |\n\nThese are dynamically generated using `_build_processor()` factory:\n\n```python\ndef _build_processor(\n    cls_name: str, \n    load_cls: Type[P], \n    cls_extensions: List[FileExtension | str]\n) -> Type[ProcessorInit]:\n    enc = tiktoken.get_encoding(\"cl100k_base\")\n    \n    class _Processor(ProcessorBase):\n        supported_extensions = cls_extensions\n        # ... implementation\n    return _Processor\n```\n\nSource: [default.py:1-60](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/implementations/default.py)\n\n### TikaProcessor\n\nUses Apache Tika for robust PDF extraction with token-based chunk sizing:\n\n```python\nasync def process_file_inner(self, file: QuivrFile) -> ProcessedDocument[None]:\n    async with file.open() as f:\n        txt = await self._send_parse_tika(f)\n    \n    document = Document(page_content=txt)\n    docs = self.text_splitter.split_documents([document])\n    \n    for doc in docs:\n        doc.metadata = {\"chunk_size\": len(self.enc.encode(doc.page_content))}\n    \n    return ProcessedDocument(\n        chunks=docs, \n        processor_cls=\"TikaProcessor\", \n        processor_response=None\n    )\n```\n\nSource: [tika_processor.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/implementations/tika_processor.py)\n\n---\n\n## Processor Registry\n\nThe registry provides lazy-loading lookup from file extensions to processor classes:\n\n```python\ndef get_processor_class(file_extension: FileExtension | str) -> Type[ProcessorBase]:\n    \"\"\"Fetch processor class from registry\n    \n    Loading of these classes is *Lazy*. Appropriate import will happen\n    the first time we try to process some file type.\n    \"\"\"\n    ...\n    return known_processors[file_extension].cls\n```\n\n### Registry Structure\n\n```python\nProcEntry = namedtuple('ProcEntry', ['exts', 'cls_mod', 'errtxt', 'priority'])\n\nknown_processors = defaults_to_proc_entries(base_processors)\n```\n\nEach entry contains:\n- `exts`: List of file extensions\n- `cls_mod`: Module path to import\n- `errtxt`: Error message if import fails\n- `priority`: Processor priority (for multiple handlers)\n\n### Registration Flow\n\n```mermaid\ngraph LR\n    A[defaults_to_proc_entries] --> B[_append_proc_mapping]\n    B --> C[base_processors dict]\n    C --> D[known_processors]\n    D --> E[get_processor_class]\n    E --> F{Lazy Import}\n    F --> G[Processor Class]\n```\n\nSource: [registry.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/registry.py)\n\n---\n\n## Processing Workflow\n\n```mermaid\nsequenceDiagram\n    participant User\n    participant Brain\n    participant Registry\n    participant Processor\n    participant Splitter\n    \n    User->>Brain: add_files(file_paths)\n    Brain->>Registry: get_processor_class(extension)\n    Registry-->>Brain: Processor Class\n    Brain->>Processor: process_file(QuivrFile)\n    Processor->>Processor: check_supported()\n    Processor->>Processor: process_file_inner()\n    Processor->>Splitter: split_documents()\n    Splitter-->>Processor: List[Document]\n    Processor->>Processor: enrich_metadata()\n    Processor-->>Brain: ProcessedDocument\n    Brain->>Brain: vectorstore.add_documents()\n```\n\n### Step-by-Step Processing\n\n1. **File Input**: `QuivrFile` object created from file path with metadata (SHA1, size, extension)\n2. **Registry Lookup**: Extension matched to appropriate processor class\n3. **Validation**: `check_supported()` confirms file type compatibility\n4. **Parsing**: `process_file_inner()` extracts raw text/content\n5. **Chunking**: Documents split according to `SplitterConfig`\n6. **Metadata Enrichment**: Each chunk receives index, version, and language tags\n7. **Output**: `ProcessedDocument` returned with chunks ready for embedding\n\n---\n\n## Configuration Options\n\n### Splitter Configuration\n\n```python\nfrom quivr_core.processor.splitter import SplitterConfig\n\nconfig = SplitterConfig(\n    chunk_size=1000,      # Characters per chunk\n    chunk_overlap=100,     # Overlap between chunks\n    length_function=len   # Or tiktoken-based for token counting\n)\n```\n\n### Megaparse Configuration\n\n```python\nfrom quivr_core.config import MegaparseConfig\n\nconfig = MegaparseConfig(\n    # Provider-specific settings\n)\n```\n\n---\n\n## Extending with Custom Processors\n\nTo add support for a new file format:\n\n1. Create a class extending `ProcessorBase[R]`\n2. Implement `process_file_inner()` method\n3. Define `supported_extensions` class attribute\n4. Implement `processor_metadata` property\n5. Register in `registry.py` or dynamically\n\n```python\nclass CustomProcessor(ProcessorBase[MyResponseType]):\n    supported_extensions = [FileExtension.my_format]\n    \n    @property\n    def processor_metadata(self) -> dict[str, Any]:\n        return {\"processor_cls\": \"CustomProcessor\"}\n    \n    async def process_file_inner(self, file: QuivrFile) -> ProcessedDocument[MyResponseType]:\n        # Custom parsing logic\n        ...\n        return ProcessedDocument(chunks=docs, processor_cls=\"CustomProcessor\", processor_response=result)\n```\n\n---\n\n## Error Handling\n\nProcessors validate file support at runtime:\n\n```python\ndef check_supported(self, file: QuivrFile) -> None:\n    if file.file_extension not in self.supported_extensions:\n        raise ValueError(f\"can't process a file of type {file.file_extension}\")\n```\n\nLazy loading provides graceful degradation for optional dependencies:\n\n```python\ntry:\n    from megaparse_sdk.client import MegaParseNATSClient\nexcept ImportError:\n    raise ImportError(\"Please install megaparse: pip install megaparse\")\n```\n\nSource: [processor_base.py:50-53](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/processor/processor_base.py)\n\n---\n\n## Related Components\n\n- **QuivrFile**: Represents an input file with metadata (path, SHA1, size, extension)\n- **Brain**: Orchestrates file processing and retrieval\n- **VectorStore**: Stores processed document chunks for similarity search\n- **RetrievalConfig**: Configures how chunks are retrieved during Q&A\n\n---\n\n<a id='page-storage'></a>\n\n## Storage System\n\n### Related Pages\n\nRelated topics: [File Processing and Parsers](#page-file-processing), [Brain Class](#page-brain-class)\n\n<details>\n<summary>Relevant source files</summary>\n\nThe following source files were used to generate this page:\n\n- [core/quivr_core/storage/storage_base.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/storage/storage_base.py)\n- [core/quivr_core/storage/local_storage.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/storage/local_storage.py)\n- [core/quivr_core/storage/file.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/storage/file.py)\n- [core/quivr_core/files/file.py](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/files/file.py)\n</details>\n\n# Storage System\n\n## Overview\n\nThe Storage System in Quivr is a modular abstraction layer responsible for managing file uploads, storage, and retrieval across different storage backends. It provides a clean separation between the RAG (Retrieval-Augmented Generation) logic and the underlying file persistence mechanisms, enabling Quivr to work with various storage implementations while maintaining a consistent API.\n\nThe system is designed to handle:\n- File upload and deduplication using SHA-1 hashing\n- Asynchronous file access\n- Multiple storage backends (currently supporting local filesystem)\n- File metadata tracking and organization by brain\n\nSource: [core/quivr_core/storage/storage_base.py:1-47](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/storage/storage_base.py)\n\n## Architecture\n\n### High-Level Architecture\n\n```mermaid\ngraph TD\n    A[Brain] --> B[StorageBase]\n    B --> C[LocalStorage]\n    B --> D[Future: CloudStorage]\n    B --> E[Future: S3Storage]\n    C --> F[QuivrFile]\n    C --> G[File System]\n    F --> H[Metadata]\n    F --> I[SHA-1 Hash]\n```\n\n### Component Hierarchy\n\n```mermaid\nclassDiagram\n    class StorageBase {\n        <<abstract>>\n        +name: str\n        +nb_files() int\n        +get_files() List~QuivrFile~\n        +upload_file(file, exists_ok)*\n    }\n    \n    class LocalStorage {\n        +name: str = \"local_storage\"\n        +files: List~QuivrFile~\n        +hashes: Set~str~\n        +copy_flag: bool\n        +dir_path: Path\n    }\n    \n    class QuivrFile {\n        +id: UUID\n        +brain_id: UUID\n        +path: Path\n        +original_filename: str\n        +file_size: int\n        +file_extension: FileExtension\n        +file_sha1: str\n        +additional_metadata: dict\n    }\n    \n    StorageBase <|-- LocalStorage\n```\n\n## Core Components\n\n### StorageBase (Abstract Base Class)\n\nThe `StorageBase` is an abstract base class that defines the contract for all storage implementations. It enforces a consistent interface across different storage backends through Python's ABC (Abstract Base Class) mechanism.\n\nSource: [core/quivr_core/storage/storage_base.py:19-47](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/storage/storage_base.py)\n\n#### Required Subclass Attributes\n\n| Attribute | Type | Description |\n|-----------|------|-------------|\n| `name` | `str` | Human-readable name of the storage type |\n\n#### Abstract Methods\n\n| Method | Return Type | Description |\n|--------|-------------|-------------|\n| `nb_files()` | `int` | Returns the total number of files stored |\n| `get_files()` | `List[QuivrFile]` | Asynchronously retrieves all files in storage |\n| `upload_file(file, exists_ok)` | `None` | Uploads a file to the storage |\n\nThe base class enforces that subclasses must define the `name` attribute through `__init_subclass__`:\n\n```python\ndef __init_subclass__(cls, **kwargs):\n    for required in (\"name\",):\n        if not getattr(cls, required):\n            raise TypeError(\n                f\"Can't instantiate abstract class {cls.__name__} without {required} attribute defined\"\n            )\n    return super().__init_subclass__(**kwargs)\n```\n\nSource: [core/quivr_core/storage/storage_base.py:19-27](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/storage/storage_base.py)\n\n### LocalStorage Implementation\n\n`LocalStorage` is the concrete implementation of `StorageBase` that persists files to the local filesystem.\n\nSource: [core/quivr_core/storage/local_storage.py:1-50](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/storage/local_storage.py)\n\n#### Attributes\n\n| Attribute | Type | Default | Description |\n|-----------|------|---------|-------------|\n| `name` | `str` | `\"local_storage\"` | Storage type identifier |\n| `files` | `List[QuivrFile]` | `[]` | In-memory list of stored files |\n| `hashes` | `Set[str]` | `set()` | Set of SHA-1 hashes for deduplication |\n| `copy_flag` | `bool` | `True` | If `True`, copy files; if `False`, create symlinks |\n| `dir_path` | `Path` | See below | Directory path for file storage |\n\n#### Directory Resolution\n\nThe storage directory is resolved in the following order:\n\n1. Explicitly provided `dir_path` argument\n2. Environment variable `QUIVR_LOCAL_STORAGE`\n3. Default path: `~/.cache/quivr/files`\n\n```python\nif dir_path is None:\n    self.dir_path = Path(\n        os.getenv(\"QUIVR_LOCAL_STORAGE\", \"~/.cache/quivr/files\")\n    )\nelse:\n    self.dir_path = dir_path\nos.makedirs(self.dir_path, exist_ok=True)\n```\n\nSource: [core/quivr_core/storage/local_storage.py:38-46](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/storage/local_storage.py)\n\n### QuivrFile Data Model\n\n`QuivrFile` represents a file stored in the Quivr system with all associated metadata.\n\nSource: [core/quivr_core/files/file.py:48-74](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/files/file.py)\n\n#### Class Slots\n\nThe class uses `__slots__` for memory-efficient attribute storage:\n\n| Slot | Type | Description |\n|------|------|-------------|\n| `id` | `UUID` | Unique identifier for the file |\n| `brain_id` | `UUID \\| None` | ID of the brain this file belongs to |\n| `path` | `Path` | Actual filesystem path to the file |\n| `original_filename` | `str` | Original name of the uploaded file |\n| `file_size` | `int \\| None` | Size of the file in bytes |\n| `file_extension` | `FileExtension \\| str` | File extension/type |\n| `file_sha1` | `str` | SHA-1 hash of the file content |\n| `additional_metadata` | `dict` | Custom metadata dictionary |\n\nSource: [core/quivr_core/files/file.py:49-57](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/files/file.py)\n\n#### Constructor Parameters\n\n```python\ndef __init__(\n    self,\n    id: UUID,\n    original_filename: str,\n    path: Path,\n    file_sha1: str,\n    file_extension: FileExtension | str,\n    brain_id: UUID | None = None,\n    file_size: int | None = None,\n    metadata: dict[str, Any] | None = None,\n) -> None:\n```\n\nSource: [core/quivr_core/files/file.py:59-70](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/files/file.py)\n\n#### Async File Access\n\nThe `QuivrFile` class provides an async context manager for reading file contents:\n\n```python\n@asynccontextmanager\nasync def open(self) -> AsyncGenerator[AsyncIterable[bytes], None]:\n    f = await aiofiles.open(self.path, mode=\"rb\")\n    try:\n        yield f\n    finally:\n        await f.close()\n```\n\nSource: [core/quivr_core/files/file.py:76-83](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/files/file.py)\n\n## File Upload Workflow\n\n```mermaid\nsequenceDiagram\n    participant Client\n    participant LocalStorage\n    participant FileSystem\n    \n    Client->>LocalStorage: upload_file(QuivrFile)\n    LocalStorage->>LocalStorage: Check SHA-1 hash\n    alt file exists and exists_ok=False\n        LocalStorage-->>Client: FileExistsError\n    else file doesn't exist\n        LocalStorage->>FileSystem: Copy or Symlink\n        FileSystem-->>LocalStorage: Success\n        LocalStorage->>LocalStorage: Update files list\n        LocalStorage->>LocalStorage: Add hash to hashes set\n    end\n```\n\n### Upload Process Details\n\n1. **File Path Construction**: Files are stored at `{dir_path}/{brain_id}/{file_id}`\n\n2. **Deduplication**: Before upload, the system checks if a file with the same SHA-1 hash already exists using `file_sha1`:\n\n   ```python\n   if file.file_sha1 in self.hashes and not exists_ok:\n       raise FileExistsError(...)\n   ```\n\n3. **Storage Strategy**: Based on `copy_flag`:\n   - `True`: Copy file content to destination\n   - `False`: Create symbolic link to original file\n\n4. **File Registration**: After successful upload, the file is added to the internal tracking list.\n\nSource: [core/quivr_core/storage/local_storage.py:48-70](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/storage/local_storage.py)\n\n## File Creation Helper\n\nThe `get_file_path()` function creates a `QuivrFile` instance from a filesystem path:\n\n```python\ndef get_file_path(\n    path: Path,\n    brain_id: UUID | None = None\n) -> QuivrFile:\n```\n\n### Processing Steps\n\n| Step | Operation |\n|------|-----------|\n| 1 | Get file size using `os.path.getsize()` |\n| 2 | Compute SHA-1 hash by reading file bytes |\n| 3 | Extract or generate UUID for file ID |\n| 4 | Determine file extension |\n| 5 | Create and return `QuivrFile` instance |\n\n```python\nfile_size = os.path.getsize(path)\nwith aiofiles.open(path, mode=\"rb\") as f:\n    file_sha1 = hashlib.sha1(await f.read()).hexdigest()\n\ntry:\n    id = UUID(path.name)\nexcept ValueError:\n    id = uuid4()\n```\n\nSource: [core/quivr_core/storage/file.py:19-43](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/storage/file.py)\n\n## Configuration Options\n\n### LocalStorage Configuration\n\n| Parameter | Type | Default | Description |\n|-----------|------|---------|-------------|\n| `dir_path` | `Path \\| None` | `None` | Custom storage directory |\n| `copy_flag` | `bool` | `True` | File storage method |\n\n### Environment Variables\n\n| Variable | Description |\n|----------|-------------|\n| `QUIVR_LOCAL_STORAGE` | Override default storage directory |\n\n## Usage Examples\n\n### Creating a LocalStorage Instance\n\n```python\nfrom quivr_core.storage.local_storage import LocalStorage\nfrom pathlib import Path\n\n# Use default directory (~/.cache/quivr/files)\nstorage = LocalStorage()\n\n# Use custom directory\nstorage = LocalStorage(dir_path=Path(\"/my/custom/storage\"))\n\n# Use symlinks instead of copying\nstorage = LocalStorage(dir_path=Path(\"/mnt/data\"), copy_flag=False)\n```\n\n### Checking Storage Information\n\n```python\nfrom quivr_core.storage.local_storage import LocalStorage\n\nstorage = LocalStorage()\n\n# Get number of files\nfile_count = storage.nb_files()\n\n# Get storage info\ninfo = storage.info()\n# Returns: {\"directory_path\": \"...\", \"name\": \"local_storage\", \"nb_files\": N}\n```\n\n### Uploading Files\n\n```python\nimport asyncio\nfrom quivr_core.storage.local_storage import LocalStorage\nfrom quivr_core.files.file import get_file_path\nfrom pathlib import Path\n\nasync def upload_example():\n    storage = LocalStorage()\n    \n    # Create QuivrFile from path\n    qfile = get_file_path(Path(\"./document.pdf\"), brain_id=None)\n    \n    # Upload with overwrite allowed\n    await storage.upload_file(qfile, exists_ok=True)\n    \n    print(f\"Total files: {storage.nb_files()}\")\n```\n\n### Listing All Files\n\n```python\nimport asyncio\nfrom quivr_core.storage.local_storage import LocalStorage\n\nasync def list_files():\n    storage = LocalStorage()\n    \n    files = await storage.get_files()\n    \n    for f in files:\n        print(f\"File: {f.original_filename}\")\n        print(f\"  ID: {f.id}\")\n        print(f\"  Size: {f.file_size} bytes\")\n        print(f\"  SHA-1: {f.file_sha1}\")\n```\n\n## Memory Management\n\nThe `QuivrFile` class uses `__slots__` to optimize memory usage by restricting attribute creation:\n\n```python\nclass QuivrFile:\n    __slots__ = [\n        \"id\",\n        \"brain_id\",\n        \"path\",\n        \"original_filename\",\n        \"file_size\",\n        \"file_extension\",\n        \"file_sha1\",\n        \"additional_metadata\",\n    ]\n```\n\nThis approach:\n- Prevents arbitrary attribute assignment\n- Reduces memory overhead per instance\n- Improves attribute access speed\n\nSource: [core/quivr_core/files/file.py:48-57](https://github.com/QuivrHQ/quivr/blob/main/core/quivr_core/files/file.py)\n\n## Extending the Storage System\n\nTo create a custom storage backend, implement the `StorageBase` abstract class:\n\n```python\nfrom quivr_core.storage.storage_base import StorageBase\nfrom quivr_core.files.file import QuivrFile\n\nclass MyCustomStorage(StorageBase):\n    name = \"my_custom_storage\"\n    \n    def __init__(self):\n        self._files = []\n    \n    def nb_files(self) -> int:\n        return len(self._files)\n    \n    async def get_files(self) -> list[QuivrFile]:\n        return self._files\n    \n    async def upload_file(self, file: QuivrFile, exists_ok: bool = False) -> None:\n        # Custom implementation\n        pass\n```\n\n## Best Practices\n\n1. **Deduplication**: Always compute and check SHA-1 hashes before uploading to avoid duplicate files.\n\n2. **Async Operations**: All file I/O operations are asynchronous. Use `await` properly in async contexts.\n\n3. **Memory Efficiency**: Use `__slots__` for custom file classes to reduce memory footprint.\n\n4. **Path Handling**: Use `pathlib.Path` for cross-platform path operations.\n\n5. **Error Handling**: Handle `FileExistsError` when uploading files with `exists_ok=False` (default).\n\n6. **Directory Management**: Ensure storage directories exist before operations using `os.makedirs(path, exist_ok=True)`.\n\n## Related Components\n\n| Component | Description |\n|-----------|-------------|\n| [Brain](../brain/brain.md) | Uses Storage for file management |\n| [Processor System](../processor/processor.md) | Processes files from storage |\n| [RAG System](../rag/rag.md) | Retrieves documents from storage for query answering |\n\n---\n\n---\n\n## Doramagic Pitfall Log\n\nProject: QuivrHQ/quivr\n\nSummary: Found 17 potential pitfall items; 2 are high/blocking. Highest priority: security_permissions - 来源证据：EU AI Act Compliance Scan Results — Sharing Findings for Feedback.\n\n## 1. security_permissions · 来源证据：EU AI Act Compliance Scan Results — Sharing Findings for Feedback\n\n- Severity: high\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：EU AI Act Compliance Scan Results — Sharing Findings for Feedback\n- User impact: 可能影响授权、密钥配置或安全边界。\n- Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_46118108af95480ba852109be6e2c66a | https://github.com/QuivrHQ/quivr/issues/3667 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 2. security_permissions · 来源证据：[Bug]:\n\n- Severity: high\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：[Bug]:\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_ff260ca3ab5247679b2ded201ec21e24 | https://github.com/QuivrHQ/quivr/issues/2004 | 来源讨论提到 docker 相关条件，需在安装/试用前复核。\n\n## 3. installation · 来源证据：Integration idea: Screenpipe for screen/audio context\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安装相关的待验证问题：Integration idea: Screenpipe for screen/audio context\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_3a7f345691d04bbea72b03858746645e | https://github.com/QuivrHQ/quivr/issues/3658 | 来源类型 github_issue 暴露的待验证使用条件。\n\n## 4. installation · 来源证据：[Bug]: RuntimeError: There is no current event loop in thread 'MainThread' when using Brain.from_files() in script\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安装相关的待验证问题：[Bug]: RuntimeError: There is no current event loop in thread 'MainThread' when using Brain.from_files() in script\n- User impact: 可能阻塞安装或首次运行。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_e3091b23b6184dffb982e0cc494134fd | https://github.com/QuivrHQ/quivr/issues/3650 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 5. capability · 能力判断依赖假设\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: README/documentation is current enough for a first validation pass.\n- User impact: 假设不成立时，用户拿不到承诺的能力。\n- Suggested check: 将假设转成下游验证清单。\n- Guardrail action: 假设必须转成验证项；没有验证结果前不能写成事实。\n- Evidence: capability.assumptions | github_repo:640079149 | https://github.com/QuivrHQ/quivr | README/documentation is current enough for a first validation pass.\n\n## 6. runtime · 来源证据：The garbage collector is trying to clean up non-checked-in connection <AdaptedConnection <asyncpg.connection.Connection…\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个运行相关的待验证问题：The garbage collector is trying to clean up non-checked-in connection <AdaptedConnection <asyncpg.connection.Connection object at 0x7f66a5b06110>>, which will…\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_e6fdde799a2b4f53806121190979025a | https://github.com/QuivrHQ/quivr/issues/3654 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 7. runtime · 来源证据：core: v0.0.25\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个运行相关的待验证问题：core: v0.0.25\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_7c6096fb5a6442c7b53068a8dd8e2c78 | https://github.com/QuivrHQ/quivr/releases/tag/core-0.0.25 | 来源类型 github_release 暴露的待验证使用条件。\n\n## 8. runtime · 来源证据：core: v0.0.29\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个运行相关的待验证问题：core: v0.0.29\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_7eeeeb626a10476e820fcd651727a55a | https://github.com/QuivrHQ/quivr/releases/tag/core-0.0.29 | 来源讨论提到 node 相关条件，需在安装/试用前复核。\n\n## 9. runtime · 来源证据：core: v0.0.33\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个运行相关的待验证问题：core: v0.0.33\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_d7a735cb41bc44f2abeb148d689f1320 | https://github.com/QuivrHQ/quivr/releases/tag/core-0.0.33 | 来源类型 github_release 暴露的待验证使用条件。\n\n## 10. maintenance · 来源证据：core: v0.0.24\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个维护/版本相关的待验证问题：core: v0.0.24\n- User impact: 可能影响升级、迁移或版本选择。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_5272fe70cda24220a5aeb994ad06819e | https://github.com/QuivrHQ/quivr/releases/tag/core-0.0.24 | 来源类型 github_release 暴露的待验证使用条件。\n\n## 11. maintenance · 来源证据：core: v0.0.26\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个维护/版本相关的待验证问题：core: v0.0.26\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_7f723b616fd446c3be43298d75fd6e8b | https://github.com/QuivrHQ/quivr/releases/tag/core-0.0.26 | 来源类型 github_release 暴露的待验证使用条件。\n\n## 12. maintenance · 维护活跃度未知\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: 未记录 last_activity_observed。\n- User impact: 新项目、停更项目和活跃项目会被混在一起，推荐信任度下降。\n- Suggested check: 补 GitHub 最近 commit、release、issue/PR 响应信号。\n- Guardrail action: 维护活跃度未知时，推荐强度不能标为高信任。\n- Evidence: evidence.maintainer_signals | github_repo:640079149 | https://github.com/QuivrHQ/quivr | last_activity_observed missing\n\n## 13. security_permissions · 下游验证发现风险项\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: no_demo\n- User impact: 下游已经要求复核，不能在页面中弱化。\n- Suggested check: 进入安全/权限治理复核队列。\n- Guardrail action: 下游风险存在时必须保持 review/recommendation 降级。\n- Evidence: downstream_validation.risk_items | github_repo:640079149 | https://github.com/QuivrHQ/quivr | no_demo; severity=medium\n\n## 14. security_permissions · 存在评分风险\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: no_demo\n- User impact: 风险会影响是否适合普通用户安装。\n- Suggested check: 把风险写入边界卡，并确认是否需要人工复核。\n- Guardrail action: 评分风险必须进入边界卡，不能只作为内部分数。\n- Evidence: risks.scoring_risks | github_repo:640079149 | https://github.com/QuivrHQ/quivr | no_demo; severity=medium\n\n## 15. security_permissions · 来源证据：core: v0.0.27\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：core: v0.0.27\n- User impact: 可能影响授权、密钥配置或安全边界。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_027338ba02764da6b371970615591265 | https://github.com/QuivrHQ/quivr/releases/tag/core-0.0.27 | 来源类型 github_release 暴露的待验证使用条件。\n\n## 16. maintenance · issue/PR 响应质量未知\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: issue_or_pr_quality=unknown。\n- User impact: 用户无法判断遇到问题后是否有人维护。\n- Suggested check: 抽样最近 issue/PR，判断是否长期无人处理。\n- Guardrail action: issue/PR 响应未知时，必须提示维护风险。\n- Evidence: evidence.maintainer_signals | github_repo:640079149 | https://github.com/QuivrHQ/quivr | issue_or_pr_quality=unknown\n\n## 17. maintenance · 发布节奏不明确\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: release_recency=unknown。\n- User impact: 安装命令和文档可能落后于代码，用户踩坑概率升高。\n- Suggested check: 确认最近 release/tag 和 README 安装命令是否一致。\n- Guardrail action: 发布节奏未知或过期时，安装说明必须标注可能漂移。\n- Evidence: evidence.maintainer_signals | github_repo:640079149 | https://github.com/QuivrHQ/quivr | release_recency=unknown\n\n<!-- canonical_name: QuivrHQ/quivr; human_manual_source: deepwiki_human_wiki -->\n",
      "summary": "DeepWiki/Human Wiki output with a Doramagic pitfall appendix.",
      "title": "Human Manual"
    },
    "pitfall_log": {
      "asset_id": "pitfall_log",
      "filename": "PITFALL_LOG.md",
      "markdown": "# Pitfall Log\n\nProject: QuivrHQ/quivr\n\nSummary: Found 17 potential pitfall items; 2 are high/blocking. Highest priority: security_permissions - 来源证据：EU AI Act Compliance Scan Results — Sharing Findings for Feedback.\n\n## 1. security_permissions · 来源证据：EU AI Act Compliance Scan Results — Sharing Findings for Feedback\n\n- Severity: high\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：EU AI Act Compliance Scan Results — Sharing Findings for Feedback\n- User impact: 可能影响授权、密钥配置或安全边界。\n- Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_46118108af95480ba852109be6e2c66a | https://github.com/QuivrHQ/quivr/issues/3667 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 2. security_permissions · 来源证据：[Bug]:\n\n- Severity: high\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：[Bug]:\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_ff260ca3ab5247679b2ded201ec21e24 | https://github.com/QuivrHQ/quivr/issues/2004 | 来源讨论提到 docker 相关条件，需在安装/试用前复核。\n\n## 3. installation · 来源证据：Integration idea: Screenpipe for screen/audio context\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安装相关的待验证问题：Integration idea: Screenpipe for screen/audio context\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_3a7f345691d04bbea72b03858746645e | https://github.com/QuivrHQ/quivr/issues/3658 | 来源类型 github_issue 暴露的待验证使用条件。\n\n## 4. installation · 来源证据：[Bug]: RuntimeError: There is no current event loop in thread 'MainThread' when using Brain.from_files() in script\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安装相关的待验证问题：[Bug]: RuntimeError: There is no current event loop in thread 'MainThread' when using Brain.from_files() in script\n- User impact: 可能阻塞安装或首次运行。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_e3091b23b6184dffb982e0cc494134fd | https://github.com/QuivrHQ/quivr/issues/3650 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 5. capability · 能力判断依赖假设\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: README/documentation is current enough for a first validation pass.\n- User impact: 假设不成立时，用户拿不到承诺的能力。\n- Suggested check: 将假设转成下游验证清单。\n- Guardrail action: 假设必须转成验证项；没有验证结果前不能写成事实。\n- Evidence: capability.assumptions | github_repo:640079149 | https://github.com/QuivrHQ/quivr | README/documentation is current enough for a first validation pass.\n\n## 6. runtime · 来源证据：The garbage collector is trying to clean up non-checked-in connection <AdaptedConnection <asyncpg.connection.Connection…\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个运行相关的待验证问题：The garbage collector is trying to clean up non-checked-in connection <AdaptedConnection <asyncpg.connection.Connection object at 0x7f66a5b06110>>, which will…\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_e6fdde799a2b4f53806121190979025a | https://github.com/QuivrHQ/quivr/issues/3654 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 7. runtime · 来源证据：core: v0.0.25\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个运行相关的待验证问题：core: v0.0.25\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_7c6096fb5a6442c7b53068a8dd8e2c78 | https://github.com/QuivrHQ/quivr/releases/tag/core-0.0.25 | 来源类型 github_release 暴露的待验证使用条件。\n\n## 8. runtime · 来源证据：core: v0.0.29\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个运行相关的待验证问题：core: v0.0.29\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_7eeeeb626a10476e820fcd651727a55a | https://github.com/QuivrHQ/quivr/releases/tag/core-0.0.29 | 来源讨论提到 node 相关条件，需在安装/试用前复核。\n\n## 9. runtime · 来源证据：core: v0.0.33\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个运行相关的待验证问题：core: v0.0.33\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_d7a735cb41bc44f2abeb148d689f1320 | https://github.com/QuivrHQ/quivr/releases/tag/core-0.0.33 | 来源类型 github_release 暴露的待验证使用条件。\n\n## 10. maintenance · 来源证据：core: v0.0.24\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个维护/版本相关的待验证问题：core: v0.0.24\n- User impact: 可能影响升级、迁移或版本选择。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_5272fe70cda24220a5aeb994ad06819e | https://github.com/QuivrHQ/quivr/releases/tag/core-0.0.24 | 来源类型 github_release 暴露的待验证使用条件。\n\n## 11. maintenance · 来源证据：core: v0.0.26\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个维护/版本相关的待验证问题：core: v0.0.26\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_7f723b616fd446c3be43298d75fd6e8b | https://github.com/QuivrHQ/quivr/releases/tag/core-0.0.26 | 来源类型 github_release 暴露的待验证使用条件。\n\n## 12. maintenance · 维护活跃度未知\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: 未记录 last_activity_observed。\n- User impact: 新项目、停更项目和活跃项目会被混在一起，推荐信任度下降。\n- Suggested check: 补 GitHub 最近 commit、release、issue/PR 响应信号。\n- Guardrail action: 维护活跃度未知时，推荐强度不能标为高信任。\n- Evidence: evidence.maintainer_signals | github_repo:640079149 | https://github.com/QuivrHQ/quivr | last_activity_observed missing\n\n## 13. security_permissions · 下游验证发现风险项\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: no_demo\n- User impact: 下游已经要求复核，不能在页面中弱化。\n- Suggested check: 进入安全/权限治理复核队列。\n- Guardrail action: 下游风险存在时必须保持 review/recommendation 降级。\n- Evidence: downstream_validation.risk_items | github_repo:640079149 | https://github.com/QuivrHQ/quivr | no_demo; severity=medium\n\n## 14. security_permissions · 存在评分风险\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: no_demo\n- User impact: 风险会影响是否适合普通用户安装。\n- Suggested check: 把风险写入边界卡，并确认是否需要人工复核。\n- Guardrail action: 评分风险必须进入边界卡，不能只作为内部分数。\n- Evidence: risks.scoring_risks | github_repo:640079149 | https://github.com/QuivrHQ/quivr | no_demo; severity=medium\n\n## 15. security_permissions · 来源证据：core: v0.0.27\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：core: v0.0.27\n- User impact: 可能影响授权、密钥配置或安全边界。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_027338ba02764da6b371970615591265 | https://github.com/QuivrHQ/quivr/releases/tag/core-0.0.27 | 来源类型 github_release 暴露的待验证使用条件。\n\n## 16. maintenance · issue/PR 响应质量未知\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: issue_or_pr_quality=unknown。\n- User impact: 用户无法判断遇到问题后是否有人维护。\n- Suggested check: 抽样最近 issue/PR，判断是否长期无人处理。\n- Guardrail action: issue/PR 响应未知时，必须提示维护风险。\n- Evidence: evidence.maintainer_signals | github_repo:640079149 | https://github.com/QuivrHQ/quivr | issue_or_pr_quality=unknown\n\n## 17. maintenance · 发布节奏不明确\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: release_recency=unknown。\n- User impact: 安装命令和文档可能落后于代码，用户踩坑概率升高。\n- Suggested check: 确认最近 release/tag 和 README 安装命令是否一致。\n- Guardrail action: 发布节奏未知或过期时，安装说明必须标注可能漂移。\n- Evidence: evidence.maintainer_signals | github_repo:640079149 | https://github.com/QuivrHQ/quivr | release_recency=unknown\n",
      "summary": "Identity, installation, configuration, runtime, and safety pitfalls before user trial.",
      "title": "Pitfall Log"
    },
    "prompt_preview": {
      "asset_id": "prompt_preview",
      "filename": "PROMPT_PREVIEW.md",
      "markdown": "# quivr - Prompt Preview\n\n> Copy the prompt below into your AI host before installing anything.\n> Its purpose is to let you safely feel the project's workflow, not to claim the project has already run.\n\n## Copy this prompt\n\n```text\nYou are using an independent Doramagic capability pack for QuivrHQ/quivr.\n\nProject:\n- Name: quivr\n- Repository: https://github.com/QuivrHQ/quivr\n- Summary: Opiniated RAG for integrating GenAI in your apps 🧠   Focus on your product rather than the RAG. Easy integration in existing products with customisation!  Any LLM: GPT4, Groq, Llama. Any Vectorstore: PGVector, Faiss. Any Files. Anyway you want.\n- Host target: chatgpt\n\nGoal:\nHelp me evaluate this project for the following task without installing it yet: Opiniated RAG for integrating GenAI in your apps 🧠   Focus on your product rather than the RAG. Easy integration in existing products with customisation!  Any LLM: GPT4, Groq, Llama. Any Vectorstore: PGVector, Faiss. Any Files. Anyway you want.\n\nBefore taking action:\n1. Restate my task, success standard, and boundary.\n2. Identify whether the next step requires tools, browser access, network access, filesystem access, credentials, package installation, or host configuration.\n3. Use only the Doramagic Project Pack, the upstream repository, and the source-linked evidence listed below.\n4. If a real command, install step, API call, file write, or host integration is required, mark it as \"requires post-install verification\" and ask for approval first.\n5. If evidence is missing, say \"evidence is missing\" instead of filling the gap.\n\nPreviewable capabilities:\n- Capability 1: Opiniated RAG for integrating GenAI in your apps 🧠   Focus on your product rather than the RAG. Easy integration in existing products with customisation!  Any LLM: GPT4, Groq, Llama. Any Vectorstore: PGVector, Faiss. Any Files. Anyway you want.\n\nCapabilities that require post-install verification:\n- Capability 1: Use the source-backed project context to guide one small, checkable workflow step.\n\nCore service flow:\n1. page-introduction: Introduction to Quivr. Produce one small intermediate artifact and wait for confirmation.\n2. page-getting-started: Getting Started. Produce one small intermediate artifact and wait for confirmation.\n3. page-installation: Installation. Produce one small intermediate artifact and wait for confirmation.\n4. page-architecture-overview: System Architecture Overview. Produce one small intermediate artifact and wait for confirmation.\n5. page-core-components: Core Components. Produce one small intermediate artifact and wait for confirmation.\n\nSource-backed evidence to keep in mind:\n- https://github.com/QuivrHQ/quivr\n- https://github.com/QuivrHQ/quivr#readme\n- README.md\n- core/quivr_core/__init__.py\n- core/README.md\n- examples/simple_question/simple_question.py\n- examples/simple_question/simple_question_streaming.py\n- core/pyproject.toml\n- examples/chatbot/pyproject.toml\n- core/quivr_core/brain/brain.py\n\nFirst response rules:\n1. Start Step 1 only.\n2. Explain the one service action you will perform first.\n3. Ask exactly three questions about my target workflow, success standard, and sandbox boundary.\n4. Stop and wait for my answers.\n\nStep 1 follow-up protocol:\n- After I answer the first three questions, stay in Step 1.\n- Produce six parts only: clarified task, success standard, boundary conditions, two or three options, tradeoffs for each option, and one recommendation.\n- End by asking whether I confirm the recommendation.\n- Do not move to Step 2 until I explicitly confirm.\n\nConversation rules:\n- Advance one step at a time and wait for confirmation after each small artifact.\n- Write outputs as recommendations or planned checks, not as completed execution.\n- Do not claim tests passed, files changed, commands ran, APIs were called, or the project was installed.\n- If the user asks for execution, first provide the sandbox setup, expected output, rollback, and approval checkpoint.\n```\n",
      "summary": "不安装项目也能感受能力节奏的安全试用 Prompt。",
      "title": "Prompt Preview / 安装前试用 Prompt"
    },
    "quick_start": {
      "asset_id": "quick_start",
      "filename": "QUICK_START.md",
      "markdown": "# Quick Start\n\nProject: QuivrHQ/quivr\n\n## Official Entry Points\n\n### Python / pip · 官方安装入口\n\n```bash\npip install quivr-core\n```\n\nSource：https://github.com/QuivrHQ/quivr#readme\n\n## Sources\n\n- repo: https://github.com/QuivrHQ/quivr\n- docs: https://github.com/QuivrHQ/quivr#readme\n",
      "summary": "Entry points extracted from official README or installation documentation.",
      "title": "Quick Start"
    }
  },
  "validation_id": "dval_cb34a15d24d14dc3a30e63e5b51ebb41"
}