{
  "canonical_name": "chroma-core/chroma",
  "compilation_id": "pack_21a7c10676e84c77a7bb0f5cf1bce15e",
  "created_at": "2026-05-15T23:30:32.638778+00:00",
  "created_by": "project-pack-compiler",
  "feedback": {
    "carrier_selection_notes": [
      "viable_asset_types=skill, recipe, host_instruction, eval, preflight",
      "recommended_asset_types=skill, recipe, host_instruction, eval, preflight"
    ],
    "evidence_delta": {
      "confirmed_claims": [
        "identity_anchor_present",
        "capability_and_host_targets_present",
        "install_path_declared_or_better"
      ],
      "missing_required_fields": [],
      "must_verify_forwarded": [
        "Run or inspect `pip install chromadb` in an isolated environment.",
        "Confirm the project exposes the claimed capability to at least one target host."
      ],
      "quickstart_execution_scope": "allowlisted_sandbox_smoke",
      "sandbox_command": "pip install chromadb",
      "sandbox_container_image": "python:3.12-slim",
      "sandbox_execution_backend": "docker",
      "sandbox_planner_decision": "llm_execute_isolated_install",
      "sandbox_validation_id": "sbx_757bd053778043b19e02ed8040e2b20e"
    },
    "feedback_event_type": "project_pack_compilation_feedback",
    "learning_candidate_reasons": [],
    "template_gaps": []
  },
  "identity": {
    "canonical_id": "project_d0aaea7fac4f215bb66dc3d614a3e6ad",
    "canonical_name": "chroma-core/chroma",
    "homepage_url": null,
    "license": "unknown",
    "repo_url": "https://github.com/chroma-core/chroma",
    "slug": "chroma",
    "source_packet_id": "phit_66c9da2f5fa34d29a26558c9dc864360",
    "source_validation_id": "dval_ce6db9c989fb420eb0e8d2f3c80e9153"
  },
  "merchandising": {
    "best_for": "需要信息检索与知识管理能力，并使用 local_cli的用户",
    "github_forks": 2237,
    "github_stars": 27894,
    "one_liner_en": "Search infrastructure for AI",
    "one_liner_zh": "Search infrastructure for AI",
    "primary_category": {
      "category_id": "research-knowledge",
      "confidence": "high",
      "name_en": "Research & Knowledge",
      "name_zh": "信息检索与知识管理",
      "reason": "curated popular coverage category matched project identity"
    },
    "target_user": "使用 local_cli 等宿主 AI 的用户",
    "title_en": "chroma",
    "title_zh": "chroma 能力包",
    "visible_tags": [
      {
        "label_en": "Browser Agents",
        "label_zh": "浏览器 Agent",
        "source": "repo_evidence_project_characteristics",
        "tag_id": "product_domain-browser-agents",
        "type": "product_domain"
      },
      {
        "label_en": "Web Task Automation",
        "label_zh": "网页任务自动化",
        "source": "repo_evidence_project_characteristics",
        "tag_id": "user_job-web-task-automation",
        "type": "user_job"
      },
      {
        "label_en": "Browser Automation",
        "label_zh": "浏览器自动化",
        "source": "repo_evidence_project_characteristics",
        "tag_id": "core_capability-browser-automation",
        "type": "core_capability"
      },
      {
        "label_en": "Node-based Workflow",
        "label_zh": "节点式流程编排",
        "source": "repo_evidence_project_characteristics",
        "tag_id": "workflow_pattern-node-based-workflow",
        "type": "workflow_pattern"
      },
      {
        "label_en": "Evaluation Suite",
        "label_zh": "评测体系",
        "source": "repo_evidence_project_characteristics",
        "tag_id": "selection_signal-evaluation-suite",
        "type": "selection_signal"
      }
    ]
  },
  "packet_id": "phit_66c9da2f5fa34d29a26558c9dc864360",
  "page_model": {
    "artifacts": {
      "artifact_slug": "chroma",
      "files": [
        "PROJECT_PACK.json",
        "QUICK_START.md",
        "PROMPT_PREVIEW.md",
        "HUMAN_MANUAL.md",
        "AI_CONTEXT_PACK.md",
        "BOUNDARY_RISK_CARD.md",
        "PITFALL_LOG.md",
        "REPO_INSPECTION.json",
        "REPO_INSPECTION.md",
        "CAPABILITY_CONTRACT.json",
        "EVIDENCE_INDEX.json",
        "CLAIM_GRAPH.json"
      ],
      "required_files": [
        "PROJECT_PACK.json",
        "QUICK_START.md",
        "PROMPT_PREVIEW.md",
        "HUMAN_MANUAL.md",
        "AI_CONTEXT_PACK.md",
        "BOUNDARY_RISK_CARD.md",
        "PITFALL_LOG.md",
        "REPO_INSPECTION.json"
      ]
    },
    "detail": {
      "capability_source": "Project Hit Packet + DownstreamValidationResult",
      "commands": [
        {
          "command": "pip install chromadb",
          "label": "Python / pip · 官方安装入口",
          "source": "https://github.com/chroma-core/chroma#readme",
          "verified": true
        }
      ],
      "display_tags": [
        "浏览器 Agent",
        "网页任务自动化",
        "浏览器自动化",
        "节点式流程编排",
        "评测体系"
      ],
      "eyebrow": "信息检索与知识管理",
      "glance": [
        {
          "body": "判断自己是不是目标用户。",
          "label": "最适合谁",
          "value": "需要信息检索与知识管理能力，并使用 local_cli的用户"
        },
        {
          "body": "先理解能力边界，再决定是否继续。",
          "label": "核心价值",
          "value": "Search infrastructure for AI"
        },
        {
          "body": "未完成验证前保持审慎。",
          "label": "继续前",
          "value": "publish to Doramagic.ai project surfaces"
        }
      ],
      "guardrail_source": "Boundary & Risk Card",
      "guardrails": [
        {
          "body": "Prompt Preview 只展示流程，不证明项目已安装或运行。",
          "label": "Check 1",
          "value": "不要把试用当真实运行"
        },
        {
          "body": "local_cli",
          "label": "Check 2",
          "value": "确认宿主兼容"
        },
        {
          "body": "publish to Doramagic.ai project surfaces",
          "label": "Check 3",
          "value": "先隔离验证"
        }
      ],
      "mode": "skill, recipe, host_instruction, eval, preflight",
      "pitfall_log": {
        "items": [
          {
            "body": "README/documentation is current enough for a first validation pass.",
            "category": "能力坑",
            "evidence": [
              "capability.assumptions | github_repo:546206616 | https://github.com/chroma-core/chroma | README/documentation is current enough for a first validation pass."
            ],
            "severity": "medium",
            "suggested_check": "将假设转成下游验证清单。",
            "title": "能力判断依赖假设",
            "user_impact": "假设不成立时，用户拿不到承诺的能力。"
          },
          {
            "body": "未记录 last_activity_observed。",
            "category": "维护坑",
            "evidence": [
              "evidence.maintainer_signals | github_repo:546206616 | https://github.com/chroma-core/chroma | last_activity_observed missing"
            ],
            "severity": "medium",
            "suggested_check": "补 GitHub 最近 commit、release、issue/PR 响应信号。",
            "title": "维护活跃度未知",
            "user_impact": "新项目、停更项目和活跃项目会被混在一起，推荐信任度下降。"
          },
          {
            "body": "no_demo",
            "category": "安全/权限坑",
            "evidence": [
              "downstream_validation.risk_items | github_repo:546206616 | https://github.com/chroma-core/chroma | no_demo; severity=medium"
            ],
            "severity": "medium",
            "suggested_check": "进入安全/权限治理复核队列。",
            "title": "下游验证发现风险项",
            "user_impact": "下游已经要求复核，不能在页面中弱化。"
          },
          {
            "body": "no_demo",
            "category": "安全/权限坑",
            "evidence": [
              "risks.scoring_risks | github_repo:546206616 | https://github.com/chroma-core/chroma | no_demo; severity=medium"
            ],
            "severity": "medium",
            "suggested_check": "把风险写入边界卡，并确认是否需要人工复核。",
            "title": "存在评分风险",
            "user_impact": "风险会影响是否适合普通用户安装。"
          },
          {
            "body": "issue_or_pr_quality=unknown。",
            "category": "维护坑",
            "evidence": [
              "evidence.maintainer_signals | github_repo:546206616 | https://github.com/chroma-core/chroma | issue_or_pr_quality=unknown"
            ],
            "severity": "low",
            "suggested_check": "抽样最近 issue/PR，判断是否长期无人处理。",
            "title": "issue/PR 响应质量未知",
            "user_impact": "用户无法判断遇到问题后是否有人维护。"
          },
          {
            "body": "release_recency=unknown。",
            "category": "维护坑",
            "evidence": [
              "evidence.maintainer_signals | github_repo:546206616 | https://github.com/chroma-core/chroma | release_recency=unknown"
            ],
            "severity": "low",
            "suggested_check": "确认最近 release/tag 和 README 安装命令是否一致。",
            "title": "发布节奏不明确",
            "user_impact": "安装命令和文档可能落后于代码，用户踩坑概率升高。"
          }
        ],
        "source": "ProjectPitfallLog + ProjectHitPacket + validation + community signals",
        "summary": "发现 6 个潜在踩坑项，其中 0 个为 high/blocking；最高优先级：能力坑 - 能力判断依赖假设。",
        "title": "踩坑日志"
      },
      "snapshot": {
        "contributors": 185,
        "forks": 2237,
        "license": "unknown",
        "note": "站点快照，非实时质量证明；用于开工前背景判断。",
        "stars": 27894
      },
      "source_url": "https://github.com/chroma-core/chroma",
      "steps": [
        {
          "body": "不安装项目，先体验能力节奏。",
          "code": "preview",
          "title": "先试 Prompt"
        },
        {
          "body": "理解输入、输出、失败模式和边界。",
          "code": "manual",
          "title": "读说明书"
        },
        {
          "body": "把上下文交给宿主 AI 继续工作。",
          "code": "context",
          "title": "带给 AI"
        },
        {
          "body": "进入主力环境前先完成安装入口与风险边界验证。",
          "code": "verify",
          "title": "沙箱验证"
        }
      ],
      "subtitle": "Search infrastructure for AI",
      "title": "chroma 能力包",
      "trial_prompt": "# chroma - Prompt Preview\n\n> Copy the prompt below into your AI host before installing anything.\n> Its purpose is to let you safely feel the project's workflow, not to claim the project has already run.\n\n## Copy this prompt\n\n```text\nYou are using an independent Doramagic capability pack for chroma-core/chroma.\n\nProject:\n- Name: chroma\n- Repository: https://github.com/chroma-core/chroma\n- Summary: Search infrastructure for AI\n- Host target: local_cli\n\nGoal:\nHelp me evaluate this project for the following task without installing it yet: Search infrastructure for AI\n\nBefore taking action:\n1. Restate my task, success standard, and boundary.\n2. Identify whether the next step requires tools, browser access, network access, filesystem access, credentials, package installation, or host configuration.\n3. Use only the Doramagic Project Pack, the upstream repository, and the source-linked evidence listed below.\n4. If a real command, install step, API call, file write, or host integration is required, mark it as \"requires post-install verification\" and ask for approval first.\n5. If evidence is missing, say \"evidence is missing\" instead of filling the gap.\n\nPreviewable capabilities:\n- Capability 1: Search infrastructure for AI\n\nCapabilities that require post-install verification:\n- Capability 1: Use the source-backed project context to guide one small, checkable workflow step.\n\nCore service flow:\n1. chroma-overview: Chroma Overview. Produce one small intermediate artifact and wait for confirmation.\n2. getting-started: Getting Started with Chroma. Produce one small intermediate artifact and wait for confirmation.\n3. architecture-overview: System Architecture Overview. Produce one small intermediate artifact and wait for confirmation.\n4. python-client-sdk: Python Client SDK. Produce one small intermediate artifact and wait for confirmation.\n5. rust-services-architecture: Rust Backend Services Architecture. Produce one small intermediate artifact and wait for confirmation.\n\nSource-backed evidence to keep in mind:\n- https://github.com/chroma-core/chroma\n- https://github.com/chroma-core/chroma#readme\n- README.md\n- Cargo.toml\n- pyproject.toml\n- chromadb/__init__.py\n- chromadb/api/client.py\n- chromadb/api/models/Collection.py\n- examples/basic_functionality/start_here.ipynb\n- rust/frontend/src/server.rs\n\nFirst response rules:\n1. Start Step 1 only.\n2. Explain the one service action you will perform first.\n3. Ask exactly three questions about my target workflow, success standard, and sandbox boundary.\n4. Stop and wait for my answers.\n\nStep 1 follow-up protocol:\n- After I answer the first three questions, stay in Step 1.\n- Produce six parts only: clarified task, success standard, boundary conditions, two or three options, tradeoffs for each option, and one recommendation.\n- End by asking whether I confirm the recommendation.\n- Do not move to Step 2 until I explicitly confirm.\n\nConversation rules:\n- Advance one step at a time and wait for confirmation after each small artifact.\n- Write outputs as recommendations or planned checks, not as completed execution.\n- Do not claim tests passed, files changed, commands ran, APIs were called, or the project was installed.\n- If the user asks for execution, first provide the sandbox setup, expected output, rollback, and approval checkpoint.\n```\n",
      "voices": [
        {
          "body": "来源平台：github。github/github_issue: [Bug]: metadata filter does not work over 20 millions chunk.（https://github.com/chroma-core/chroma/issues/4089）；github/github_issue: [Bug]: PersistentClient second-opener hangs ~16 minutes on shared persis（https://github.com/chroma-core/chroma/issues/7040）；github/github_issue: [Security] Unsafe pickle.load() in PersistentLocalHnswSegment enables ar（https://github.com/chroma-core/chroma/issues/6926）；github/github_issue: query(where=...) raises 'Error finding id' after batched adds until WAL （https://github.com/chroma-core/chroma/issues/7032）；github/github_release: 1.5.9（https://github.com/chroma-core/chroma/releases/tag/1.5.9）；github/github_release: foundation-cli-v0.1.0-alpha.3（https://github.com/chroma-core/chroma/releases/tag/foundation-cli-v0.1.0-alpha.3）；github/github_release: 1.5.8（https://github.com/chroma-core/chroma/releases/tag/1.5.8）；github/github_release: 1.5.7（https://github.com/chroma-core/chroma/releases/tag/1.5.7）；github/github_release: 1.5.6（https://github.com/chroma-core/chroma/releases/tag/1.5.6）；github/github_release: 1.5.5（https://github.com/chroma-core/chroma/releases/tag/1.5.5）。这些是项目级外部声音，不作为单独质量证明。",
          "items": [
            {
              "kind": "github_issue",
              "source": "github",
              "title": "[Bug]: metadata filter does not work over 20 millions chunk.",
              "url": "https://github.com/chroma-core/chroma/issues/4089"
            },
            {
              "kind": "github_issue",
              "source": "github",
              "title": "[Bug]: PersistentClient second-opener hangs ~16 minutes on shared persis",
              "url": "https://github.com/chroma-core/chroma/issues/7040"
            },
            {
              "kind": "github_issue",
              "source": "github",
              "title": "[Security] Unsafe pickle.load() in PersistentLocalHnswSegment enables ar",
              "url": "https://github.com/chroma-core/chroma/issues/6926"
            },
            {
              "kind": "github_issue",
              "source": "github",
              "title": "query(where=...) raises 'Error finding id' after batched adds until WAL ",
              "url": "https://github.com/chroma-core/chroma/issues/7032"
            },
            {
              "kind": "github_release",
              "source": "github",
              "title": "1.5.9",
              "url": "https://github.com/chroma-core/chroma/releases/tag/1.5.9"
            },
            {
              "kind": "github_release",
              "source": "github",
              "title": "foundation-cli-v0.1.0-alpha.3",
              "url": "https://github.com/chroma-core/chroma/releases/tag/foundation-cli-v0.1.0-alpha.3"
            },
            {
              "kind": "github_release",
              "source": "github",
              "title": "1.5.8",
              "url": "https://github.com/chroma-core/chroma/releases/tag/1.5.8"
            },
            {
              "kind": "github_release",
              "source": "github",
              "title": "1.5.7",
              "url": "https://github.com/chroma-core/chroma/releases/tag/1.5.7"
            },
            {
              "kind": "github_release",
              "source": "github",
              "title": "1.5.6",
              "url": "https://github.com/chroma-core/chroma/releases/tag/1.5.6"
            },
            {
              "kind": "github_release",
              "source": "github",
              "title": "1.5.5",
              "url": "https://github.com/chroma-core/chroma/releases/tag/1.5.5"
            }
          ],
          "status": "已收录 10 条来源",
          "title": "社区讨论"
        }
      ]
    },
    "homepage_card": {
      "category": "信息检索与知识管理",
      "desc": "Search infrastructure for AI",
      "effort": "安装已验证",
      "forks": 2237,
      "icon": "search",
      "name": "chroma 能力包",
      "risk": "可发布",
      "slug": "chroma",
      "stars": 27894,
      "tags": [
        "浏览器 Agent",
        "网页任务自动化",
        "浏览器自动化",
        "节点式流程编排",
        "评测体系"
      ],
      "thumb": "blue",
      "type": "Skill Pack"
    },
    "manual": {
      "markdown": "# https://github.com/chroma-core/chroma 项目说明书\n\n生成时间：2026-05-15 23:02:55 UTC\n\n## 目录\n\n- [Chroma Overview](#chroma-overview)\n- [Getting Started with Chroma](#getting-started)\n- [System Architecture Overview](#architecture-overview)\n- [Protocol Buffers & gRPC API](#protocol-buffers-api)\n- [Python Client SDK](#python-client-sdk)\n- [JavaScript/TypeScript Client SDKs](#javascript-client-sdk)\n- [Rust Backend Services Architecture](#rust-services-architecture)\n- [Go Coordinator & Distributed Systems](#go-coordinator)\n- [Data Storage & Blockstore](#data-storage-blockstore)\n- [Embedding Functions Integration](#embedding-functions)\n\n<a id='chroma-overview'></a>\n\n## Chroma Overview\n\n### 相关页面\n\n相关主题：[Getting Started with Chroma](#getting-started), [System Architecture Overview](#architecture-overview)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [README.md](https://github.com/chroma-core/chroma/blob/main/README.md)\n- [clients/python/README.md](https://github.com/chroma-core/chroma/blob/main/clients/python/README.md)\n- [clients/new-js/packages/chromadb/README.md](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/chromadb/README.md)\n- [rust/types/src/metadata.rs](https://github.com/chroma-core/chroma/blob/main/rust/types/src/metadata.rs)\n- [rust/types/src/api_types.rs](https://github.com/chroma-core/chroma/blob/main/rust/types/src/api_types.rs)\n- [rust/types/src/execution/operator.rs](https://github.com/chroma-core/chroma/blob/main/rust/types/src/execution/operator.rs)\n- [examples/deployments/do-terraform/README.md](https://github.com/chroma-core/chroma/blob/main/examples/deployments/do-terraform/README.md)\n</details>\n\n# Chroma Overview\n\n## Introduction\n\nChroma is an open-source data infrastructure platform designed specifically for AI applications. It provides the foundational building blocks for storing, querying, and managing vector embeddings along with associated metadata, enabling developers to build AI-powered applications with efficient similarity search capabilities. 资料来源：[README.md:1]()\n\nAs an open-source solution, Chroma offers flexibility for self-hosting while also providing a cloud-hosted option called Chroma Cloud, which delivers serverless vector, hybrid, and full-text search capabilities. The platform is designed to be fast, cost-effective, scalable, and straightforward to deploy. 资料来源：[README.md:17-21]()\n\n## Architecture Overview\n\nChroma follows a client-server architecture with multiple client libraries available for different programming environments. The system is built with Rust for core performance-critical components and provides idiomatic client libraries for Python and JavaScript/TypeScript.\n\n```mermaid\ngraph TD\n    A[Client Applications] --> B[Python Client / JS Client]\n    B --> C[Chroma Server API]\n    C --> D[Worker Nodes]\n    D --> E[Blockstore<br/>Arrow Storage]\n    D --> F[Compaction &<br/>Log Processing]\n    E --> G[Persistent Storage]\n    \n    H[Chroma Cloud] -.->|Optional hosted| C\n```\n\n### Client Libraries\n\nChroma provides two primary client libraries:\n\n| Client | Package | Description |\n|--------|---------|-------------|\n| Python | `chromadb` | Full-featured Python client library 资料来源：[clients/python/README.md:1]() |\n| Python HTTP | `chromadb-client` | Lightweight HTTP-only client for server connections 资料来源：[clients/python/README.md:12]() |\n| JavaScript/TypeScript | `chromadb` (npm) | Full-featured JS client for Node.js and browser 资料来源：[clients/new-js/packages/chromadb/README.md:1]() |\n\n#### Python Client Installation\n\n```bash\npip install chromadb  # Full client library\npip install chromadb-client  # HTTP client only\n```\n\n#### JavaScript Client Example\n\n```javascript\nimport { ChromaClient } from \"chromadb\";\n\nconst chroma = new ChromaClient();\nconst collection = await chroma.createCollection({ name: \"test-from-js\" });\n\nfor (let i = 0; i < 20; i++) {\n  await collection.add({\n    ids: [\"test-id-\" + i.toString()],\n    embeddings: [[1, 2, 3, 4, 5]],\n    documents: [\"test\"],\n  });\n}\n\nconst queryData = await collection.query({\n  queryEmbeddings: [[1, 2, 3, 4, 5]],\n  queryTexts: [\"test\"],\n});\n```\n\n资料来源：[clients/new-js/packages/chromadb/README.md:9-27]()\n\n## Data Model\n\n### Collection Structure\n\nCollections in Chroma serve as the primary organizational unit for storing related documents and their associated embeddings. Each collection contains:\n\n- **Documents**: The textual content to be embedded\n- **Embeddings**: Vector representations of documents\n- **Metadatas**: Key-value pairs for filtering and categorization\n- **Unique Identifiers**: User-provided IDs for each record 资料来源：[clients/python/README.md:16-27]()\n\n### Metadata Filtering\n\nChroma supports rich metadata filtering through operators that enable precise data retrieval:\n\n```mermaid\ngraph LR\n    A[Query Request] --> B[Metadata Filter]\n    B --> C{Operator Type}\n    C -->|Contains| D[String contains check]\n    C -->|NotContains| E[String excludes check]\n    C -->|Regex| F[Regular expression match]\n    C -->|NotRegex| G[Regex exclusion]\n```\n\n**Supported Document Operators:**\n\n| Operator | Description | Example |\n|----------|-------------|---------|\n| `Contains` | Document contains substring | `{\"$contains\": \"keyword\"}` |\n| `NotContains` | Document excludes substring | `{\"$not_contains\": \"spam\"}` |\n| `Regex` | Regular expression match | `{\"$regex\": \"^prefix.*\"}` |\n| `NotRegex` | Exclude by regex pattern | `{\"$not_regex\": \".*suffix$\"}` |\n\n资料来源：[rust/types/src/metadata.rs:1-30]()\n\n### Search Keys\n\nThe query system supports specialized keys for accessing different aspects of stored data:\n\n| Key | Description | Usage |\n|-----|-------------|-------|\n| `#document` | Full text content | `Key::Document` |\n| `#embedding` | Vector embeddings | `Key::Embedding` |\n| `#metadata` | Record metadata | `Key::Metadata` |\n| `#score` | Similarity score | `Key::Score` |\n| Custom fields | User-defined metadata | `Key::field(\"field_name\")` |\n\n资料来源：[rust/types/src/execution/operator.rs:1-80]()\n\n## Core Components\n\n### Storage Layer\n\nThe blockstore provides the underlying storage mechanism using Arrow format for efficient columnar data storage and retrieval. This enables high-performance queries across large datasets. 资料来源：[rust/blockstore/src/arrow/root.rs:1]()\n\n### Execution Operators\n\nChroma's query execution pipeline uses operators that transform and filter data through well-defined stages:\n\n```mermaid\ngraph TD\n    A[Query Request] --> B[Log Fetch Orchestrator]\n    B --> C[KNN Filter]\n    C --> D[Apply Logs Orchestrator]\n    D --> E[Segment Writers]\n    E --> F[Compact Collection]\n```\n\n**Key Orchestrators:**\n\n| Component | Purpose |\n|-----------|---------|\n| `LogFetchOrchestrator` | Fetches and materializes log entries 资料来源：[rust/worker/src/execution/orchestration/log_fetch_orchestrator.rs:1]() |\n| `KnnFilter` | Performs k-nearest neighbor filtering 资料来源：[rust/worker/src/execution/orchestration/knn_filter.rs:1]() |\n| `ApplyLogsOrchestrator` | Applies log entries to segment writers 资料来源：[rust/worker/src/execution/orchestration/apply_logs_orchestrator.rs:1]() |\n\n### Error Handling\n\nThe system uses a consistent error code hierarchy for reliable error management:\n\n| Error Code | Description |\n|------------|-------------|\n| `InvalidArgument` | Client-provided invalid parameters |\n| `Internal` | System-level internal errors |\n| `ResourceExhausted` | Resource limits reached (e.g., task abortion) |\n\n资料来源：[rust/blockstore/src/arrow/block/types.rs:1-20]()\n\n## Deployment Options\n\n### Self-Hosting\n\nChroma can be deployed on-premises or in cloud environments using Docker, Kubernetes, or direct installation.\n\n**Deployment Requirements:**\n\n| Component | Specification |\n|-----------|---------------|\n| Storage | Persistent volume for vector data |\n| Network | Port 8000 for API access |\n| Auth | Optional token or basic authentication (v0.4.7+) |\n\n资料来源：[examples/deployments/do-terraform/README.md:1-50]()\n\n**Starting the Server:**\n\n```bash\n# Install via pip\npip install chromadb\n\n# Run in client-server mode\nchroma run --path /chroma_db_path\n```\n\n资料来源：[README.md:14-16]()\n\n### Chroma Cloud\n\nChroma Cloud provides a fully managed hosted service with:\n\n- Serverless vector search\n- Hybrid search capabilities\n- Full-text search integration\n- Automatic scaling\n- $5 free credits for new users\n\n资料来源：[README.md:23-29]()\n\n### Cloud Deployment (Terraform Example)\n\nFor DigitalOcean deployment:\n\n```bash\nexport TF_VAR_do_token=<DIGITALOCEAN_TOKEN>\nexport TF_ssh_public_key=\"./chroma-do.pub\"\nexport TF_ssh_private_key=\"./chroma-do\"\nexport TF_VAR_chroma_release=\"0.4.12\"\nexport TF_VAR_region=\"ams2\"\nexport TF_VAR_public_access=\"true\"\nexport TF_VAR_enable_auth=\"true\"\nexport TF_VAR_auth_type=\"token\"\n\nterraform apply -auto-approve\n```\n\n资料来源：[examples/deployments/do-terraform/README.md:30-45]()\n\n## CLI Tool\n\nThe Rust-based CLI provides command-line management capabilities:\n\n```bash\nchroma run --path <db_path>     # Run the server\nchroma db create <db_name>      # Create database\nchroma db list                  # List databases\nchroma login                    # Authenticate with Chroma Cloud\nchroma profile                  # Manage profiles\nchroma install                  # Install updates\nchroma update                   # Check for updates\n```\n\n资料来源：[rust/cli/src/lib.rs:1-30]()\n\n## Embedding Integration\n\n### Ollama Integration\n\nThe JavaScript client supports Ollama for local embedding generation:\n\n**Configuration Options:**\n\n| Option | Default | Description |\n|--------|---------|-------------|\n| `url` | `http://localhost:11434` | Ollama server URL |\n| `model` | `chroma/all-minilm-l6-v2-f32` | Embedding model |\n\n**Supported Models:**\n\n| Model | Dimensions | Use Case |\n|-------|------------|----------|\n| `chroma/all-minilm-l6-v2-f32` | 384 | General purpose (default) |\n| `nomic-embed-text` | 768 | Extended context |\n| `mxbai-embed-large` | 1024 | High accuracy |\n| `snowflake-arctic-embed` | Variable | Domain-specific |\n\n资料来源：[clients/new-js/packages/ai-embeddings/ollama/README.md:1-40]()\n\n## API Response Format\n\n### Get Response Structure\n\nQuery results are returned with flexible inclusion options:\n\n```rust\npub struct GetResponse {\n    pub ids: Vec<String>,\n    pub embeddings: Option<Vec<Vec<f32>>>,      // Optional\n    pub documents: Option<Vec<Option<String>>>, // Optional\n    pub uris: Option<Vec<Option<String>>>,      // Optional\n    pub metadatas: Option<Vec<Option<Metadata>>>, // Optional\n    pub include: IncludeList,\n}\n```\n\n资料来源：[rust/types/src/api_types.rs:1-30]()\n\n## License\n\nChroma is released under the Apache 2.0 license, making it suitable for both commercial and open-source projects. 资料来源：[README.md:10]()\n\n## Community and Support\n\n| Resource | Link |\n|----------|------|\n| Documentation | https://docs.trychroma.com/ |\n| Discord | https://discord.gg/MMeYNTmh3x |\n| Homepage | https://www.trychroma.com/ |\n\n---\n\n<a id='getting-started'></a>\n\n## Getting Started with Chroma\n\n### 相关页面\n\n相关主题：[Chroma Overview](#chroma-overview), [Python Client SDK](#python-client-sdk)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [clients/python/README.md](https://github.com/chroma-core/chroma/blob/main/clients/python/README.md)\n- [clients/new-js/packages/chromadb/README.md](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/chromadb/README.md)\n- [clients/js/packages/chromadb-client/README.md](https://github.com/chroma-core/chroma/blob/main/clients/js/packages/chromadb-client/README.md)\n- [clients/new-js/packages/ai-embeddings/common/README.md](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/ai-embeddings/common/README.md)\n- [chromadb/utils/embedding_functions/schemas/README.md](https://github.com/chroma-core/chroma/blob/main/chromadb/utils/embedding_functions/schemas/README.md)\n- [README.md](https://github.com/chroma-core/chroma/blob/main/README.md)\n- [rust/chroma/README.md](https://github.com/chroma-core/chroma/blob/main/rust/chroma/README.md)\n</details>\n\n# Getting Started with Chroma\n\nChroma is an open-source data infrastructure for AI that provides vector, hybrid, and full-text search capabilities. It enables developers to build AI applications by storing embeddings, documents, and metadata with efficient querying mechanisms.\n\n## Overview\n\nChroma serves as a vector database optimized for AI workloads. It allows you to:\n\n- Store embeddings alongside documents and metadata\n- Query using text or embedding vectors\n- Filter results based on metadata\n- Work with multiple programming languages including Python and JavaScript\n\n## Installation\n\n### Python Client\n\nInstall the Python client using pip:\n\n```bash\npip install chromadb\n```\n\nFor a lightweight HTTP-only client that connects to a Chroma server:\n\n```bash\npip install chromadb-client\n```\n\n资料来源：[clients/python/README.md](https://github.com/chroma-core/chroma/blob/main/clients/python/README.md)\n\n### JavaScript/TypeScript Client\n\nFor the new JavaScript client:\n\n```bash\nnpm install chromadb\n```\n\nFor a lighter package with optional dependencies:\n\n```bash\nnpm install chromadb-client\n```\n\n资料来源：[clients/new-js/packages/chromadb/README.md](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/chromadb/README.md)\n\n## Basic Setup and Configuration\n\n### Python Client Setup\n\nConnect to a Chroma server running locally:\n\n```python\nimport chromadb\n\nclient = chromadb.HttpClient(host=\"localhost\", port=8000)\n```\n\n资料来源：[clients/python/README.md](https://github.com/chroma-core/chroma/blob/main/clients/python/README.md)\n\n### JavaScript Client Setup\n\n```javascript\nimport { ChromaClient } from \"chromadb\";\n\nconst chroma = new ChromaClient();\nconst collection = await chroma.createCollection({ name: \"test-from-js\" });\n```\n\n资料来源：[clients/new-js/packages/chromadb/README.md](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/chromadb/README.md)\n\n### Running Chroma Server\n\nTo run Chroma in client-server mode:\n\n```bash\nchroma run --path /chroma_db_path\n```\n\n资料来源：[README.md](https://github.com/chroma-core/chroma/blob/main/README.md)\n\n## Core Operations\n\n### Creating a Collection\n\nCollections are containers for your documents, embeddings, and metadata.\n\n```python\ncollection = client.create_collection(\"all-my-documents\")\n```\n\n### Adding Documents\n\nAdd documents with optional embeddings, metadata, and unique IDs:\n\n```python\ncollection.add(\n    documents=[\"This is document1\", \"This is document2\"],\n    metadatas=[{\"source\": \"notion\"}, {\"source\": \"google-docs\"}],\n    ids=[\"doc1\", \"doc2\"],\n    embeddings=[[1.2, 2.1, ...], [1.2, 2.1, ...]]\n)\n```\n\n资料来源：[clients/python/README.md](https://github.com/chroma-core/chroma/blob/main/clients/python/README.md)\n\n### Querying Documents\n\nQuery the collection using text or embeddings:\n\n```python\nresults = collection.query(\n    query_texts=[\"This is a query document\"],\n    n_results=2\n)\n```\n\n```javascript\nconst queryData = await collection.query({\n    queryEmbeddings: [[1, 2, 3, 4, 5]],\n    queryTexts: [\"test\"],\n});\n```\n\n资料来源：[clients/python/README.md](https://github.com/chroma-core/chroma/blob/main/clients/python/README.md) and [clients/new-js/packages/chromadb/README.md](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/chromadb/README.md)\n\n## Embedding Functions\n\nChroma supports various embedding providers through configurable embedding functions.\n\n### Configuration Schema\n\nEmbedding functions use JSON Schema validation to ensure cross-language compatibility:\n\n```python\nfrom chromadb.utils.embedding_functions.schemas import validate_config\n\nconfig = {\n    \"api_key_env_var\": \"CHROMA_OPENAI_API_KEY\",\n    \"model_name\": \"text-embedding-ada-002\"\n}\nvalidate_config(config, \"openai\")\n```\n\nEach schema follows JSON Schema Draft-07 specification and includes version, title, description, properties, required fields, and additionalProperties settings.\n\n资料来源：[chromadb/utils/embedding_functions/schemas/README.md](https://github.com/chroma-core/chroma/blob/main/chromadb/utils/embedding_functions/schemas/README.md)\n\n### Available Embedding Providers\n\n| Provider | Package | API Key Environment Variable |\n|----------|---------|------------------------------|\n| OpenAI | `@chroma-core/openai` | `CHROMA_OPENAI_API_KEY` |\n| Cohere | `@chroma-core/cohere` | `COHERE_API_KEY` |\n| Jina | `@chroma-core/jina` | `JINA_API_KEY` |\n| Google Gemini | `@chroma-core/google-gemini` | `GOOGLE_API_KEY` |\n| Hugging Face | `@chroma-core/hugging-face` | `HF_API_KEY` |\n| Ollama | `@chroma-core/ollama` | `OLLAMA_API_KEY` |\n| Together AI | `@chroma-core/together-ai` | `TOGETHER_API_KEY` |\n| Voyage AI | `@chroma-core/voyageai` | `VOOYAGE_API_KEY` |\n| xAI | `@chroma-core/xai` | `XAI_API_KEY` |\n\n资料来源：[clients/new-js/packages/ai-embeddings/all/README.md](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/ai-embeddings/all/README.md)\n\n### Using Embedding Functions\n\n```typescript\nimport { ChromaClient } from 'chromadb';\nimport { JinaEmbeddingFunction } from '@chroma-core/jina';\n\nconst embedder = new JinaEmbeddingFunction({\n    apiKey: 'your-api-key',\n    modelName: 'jina-embeddings-v2-base-en',\n    task: 'retrieval.passage',\n    dimensions: 768,\n    lateChunking: false,\n    truncate: true,\n    normalized: true,\n    embeddingType: 'float'\n});\n\nconst collection = await client.createCollection({\n    name: 'my-collection',\n    embeddingFunction: embedder,\n});\n```\n\n资料来源：[clients/new-js/packages/ai-embeddings/jina/README.md](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/ai-embeddings/jina/README.md)\n\n### Common Utilities\n\nThe `@chroma-core/ai-embeddings-common` package provides shared utilities:\n\n```typescript\nimport { validateConfigSchema, snakeCase, isBrowser } from '@chroma-core/ai-embeddings-common';\n\n// Convert camelCase to snake_case\nconst snakeCaseConfig = snakeCase({ modelName: 'text-embedding-3-small' });\n// Result: { model_name: 'text-embedding-3-small' }\n\n// Check environment\nif (isBrowser()) {\n    // Browser-specific logic\n}\n```\n\n资料来源：[clients/new-js/packages/ai-embeddings/common/README.md](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/ai-embeddings/common/README.md)\n\n## JavaScript Client Packages\n\n### chromadb vs chromadb-client\n\n| Feature | `chromadb` | `chromadb-client` |\n|---------|------------|-------------------|\n| Package size | Larger | Smaller |\n| Dependencies | Bundled | Optional peer dependencies |\n| Use case | Quick setup | Production with specific providers |\n\nThe `chromadb-client` package is ideal for production environments where you only use specific embedding providers.\n\n资料来源：[clients/js/packages/chromadb-client/README.md](https://github.com/chroma-core/chroma/blob/main/clients/js/packages/chromadb-client/README.md)\n\n## Chroma Cloud\n\nChroma Cloud provides a hosted service for serverless vector, hybrid, and full-text search. To use Chroma Cloud:\n\n1. Sign up at [trychroma.com](https://trychroma.com/signup)\n2. Create a database\n3. Get your API key from the dashboard\n\nConfigure environment variables for cloud access:\n\n```bash\nexport CHROMA_API_KEY=your-api-key\nexport CHROMA_TENANT=your-tenant\nexport CHROMA_DATABASE=your-database\n```\n\n资料来源：[README.md](https://github.com/chroma-core/chroma/blob/main/README.md) and [rust/chroma/README.md](https://github.com/chroma-core/chroma/blob/main/rust/chroma/README.md)\n\n## Environment Variables\n\n| Variable | Description |\n|----------|-------------|\n| `CHROMA_API_KEY` | API key for Chroma Cloud authentication |\n| `CHROMA_TENANT` | Sets the tenant (auto-inferred with API key) |\n| `CHROMA_DATABASE` | Sets the database (auto-inferred with scoped API key) |\n| `[PROVIDER]_API_KEY` | Provider-specific API keys (e.g., `OPENAI_API_KEY`) |\n\nFor local development, you can use:\n\n```rust\nlet client = ChromaHttpClient::from_env()?;\n```\n\n资料来源：[rust/chroma/README.md](https://github.com/chroma-core/chroma/blob/main/rust/chroma/README.md)\n\n## Complete Example Workflow\n\n```mermaid\ngraph TD\n    A[Install Chroma Client] --> B[Initialize Client]\n    B --> C[Create Collection]\n    C --> D[Add Documents with Embeddings]\n    D --> E[Query Collection]\n    E --> F[Get Results]\n    \n    G[Configure Embedding Function] --> D\n    H[Add Metadata] --> D\n    I[Set API Keys] --> B\n```\n\n## Quick Reference Commands\n\n### Installation\n\n```bash\n# Python\npip install chromadb\n\n# JavaScript\nnpm install chromadb\n\n# Start server\nchroma run --path /chroma_db_path\n```\n\n### Basic Operations\n\n| Operation | Python | JavaScript |\n|-----------|--------|------------|\n| Create client | `client = chromadb.HttpClient()` | `new ChromaClient()` |\n| Create collection | `client.create_collection(name)` | `client.createCollection({name})` |\n| Add documents | `collection.add(...)` | `collection.add(...)` |\n| Query | `collection.query(...)` | `collection.query(...)` |\n\n## Additional Resources\n\n- [Documentation](https://docs.trychroma.com/)\n- [Community Discord](https://discord.gg/MMeYNTmh3x)\n- [GitHub Repository](https://github.com/chroma-core/chroma)\n- [Homepage](https://www.trychroma.com/)\n\n---\n\n<a id='architecture-overview'></a>\n\n## System Architecture Overview\n\n### 相关页面\n\n相关主题：[Rust Backend Services Architecture](#rust-services-architecture), [Go Coordinator & Distributed Systems](#go-coordinator), [Protocol Buffers & gRPC API](#protocol-buffers-api)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [rust/frontend/src/server.rs](https://github.com/chroma-core/chroma/blob/main/rust/frontend/src/server.rs)\n- [rust/worker/src/server.rs](https://github.com/chroma-core/chroma/blob/main/rust/worker/src/server.rs)\n- [rust/sysdb/src/sysdb.rs](https://github.com/chroma-core/chroma/blob/main/rust/sysdb/src/sysdb.rs)\n- [rust/types/src/lib.rs](https://github.com/chroma-core/chroma/blob/main/rust/types/src/lib.rs)\n- [docs/mintlify/reference/architecture/overview.mdx](https://github.com/chroma-core/chroma/blob/main/docs/mintlify/reference/architecture/overview.mdx)\n</details>\n\n# System Architecture Overview\n\n## Introduction\n\nChroma is an open-source data infrastructure platform designed for AI applications, providing vector, hybrid, and full-text search capabilities. The system is built as a distributed, scalable architecture that handles embedding storage, indexing, and query execution across multiple components. Chroma positions itself as the open-source alternative to hosted vector database services, enabling developers to deploy sophisticated AI search infrastructure while maintaining full control over their data.\n\nThe architecture follows a modular design pattern with distinct components for API serving, query processing, data storage, and system coordination. Each component is responsible for specific aspects of the data pipeline, from ingestion through indexing to query execution.\n\n## High-Level Architecture\n\nChroma's architecture consists of three primary layers working in concert to provide vector search capabilities:\n\n1. **Frontend Layer** - Handles API requests and response formatting\n2. **Worker Layer** - Executes query operations and manages indexing\n3. **System Database (SysDB) Layer** - Maintains metadata and system state\n\n```mermaid\ngraph TD\n    A[Client Application] --> B[Frontend Server]\n    B --> C[Worker Servers]\n    C --> D[SysDB]\n    C --> E[Blockstore]\n    E --> F[Arrow Files]\n    D --> G[Collection Metadata]\n    G --> H[Topology Information]\n```\n\n## Component Architecture\n\n### Frontend Server\n\nThe frontend server component serves as the API gateway for Chroma, handling incoming HTTP/gRPC requests and translating them into internal operations. The frontend is responsible for request validation, authentication handling, and response serialization.\n\n**Key Responsibilities:**\n\n| Responsibility | Description |\n|----------------|-------------|\n| API Endpoint Handling | Exposes REST and gRPC endpoints for collection operations |\n| Request Validation | Validates incoming query parameters and payload structures |\n| Response Serialization | Converts internal data structures to API response formats |\n| Error Mapping | Translates internal errors to appropriate HTTP status codes |\n\n资料来源：[rust/frontend/src/server.rs:1-50]()\n\nThe frontend server implements the `ChromaError` trait for consistent error handling across the system. Error codes are mapped as follows:\n\n| Internal Error | HTTP Status Code |\n|----------------|------------------|\n| InvalidArgument | 400 Bad Request |\n| NotFound | 404 Not Found |\n| Internal | 500 Internal Server Error |\n| Unavailable | 503 Service Unavailable |\n\n### Worker Server\n\nThe worker server handles the core data operations including embedding storage, indexing, and query execution. Workers are the primary compute units in Chroma's architecture, responsible for processing search requests and maintaining index structures.\n\n资料来源：[rust/worker/src/server.rs:1-60]()\n\n**Worker Components:**\n\n```mermaid\ngraph LR\n    A[Query Request] --> B[Query Planner]\n    B --> C[HNSW Index]\n    B --> D[Spann Index]\n    B --> E[Record Segment]\n    B --> F[Metadata Segment]\n    C --> G[Result Merger]\n    D --> G\n    E --> G\n    F --> G\n    G --> H[Response]\n```\n\nThe worker server implements orchestration components for managing complex operations:\n\n- **ApplyLogsOrchestrator** - Coordinates log application and compaction\n- **WorkQueueClient** - Manages distributed task execution\n- **Segment Writers** - Handles data persistence for different segment types\n\n资料来源：[rust/worker/src/execution/orchestration/apply_logs_orchestrator.rs:1-80]()\n\n### System Database (SysDB)\n\nThe SysDB component maintains all metadata about collections, segments, and system topology. It provides a centralized view of the system's state and enables coordination across multiple workers.\n\n**SysDB Responsibilities:**\n\n| Function | Description |\n|----------|-------------|\n| Collection Metadata | Stores collection configurations and schemas |\n| Segment Registry | Tracks active segments and their locations |\n| Topology Management | Manages provider-region mappings for distributed deployments |\n| Transaction Coordination | Ensures consistency across distributed operations |\n\n资料来源：[rust/sysdb/src/sysdb.rs:1-100]()\n\nThe SysDB uses a provider-region topology model that supports multi-cloud and multi-region deployments:\n\n```rust\npub struct ProviderRegion<T> {\n    name: RegionName,\n    provider: String,      // e.g., \"aws\", \"gcp\"\n    region: String,        // e.g., \"us-east-1\"\n    config: T,             // Provider-specific configuration\n}\n```\n\n资料来源：[rust/types/src/topology.rs:1-60]()\n\n## Data Model Architecture\n\n### Collection Schema\n\nCollections in Chroma follow a flexible schema model that supports multiple index types and data fields.\n\n```mermaid\ngraph TD\n    A[Collection] --> B[Record Segment]\n    A --> C[Metadata Segment]\n    A --> D[Vector Index]\n    A --> E[Sparse Vector Index]\n    D --> F[HNSW Index]\n    D --> G[Spann Index]\n```\n\n**Supported Index Types:**\n\n| Index Type | Purpose | Key Configuration |\n|------------|---------|-------------------|\n| Vector Index | Dense embeddings | `Space` (Cosine, L2, Dot), HNSW params |\n| Sparse Vector Index | BM25-style inverted index | StringInvertedIndexConfig |\n| Spann Index | Memory-efficient approximate search | InternalSpannConfiguration |\n\n资料来源：[rust/types/src/collection_schema.rs:1-150]()\n\n### API Types\n\nThe API layer defines core types for query operations:\n\n| Type | Purpose |\n|------|---------|\n| `Include` | Specifies which fields to return (distances, documents, embeddings, metadatas, uris) |\n| `IncludeList` | Collection of Include values with convenience constructors |\n| `WhereDocumentOperator` | Document filtering (Contains, NotContains, Regex, NotRegex) |\n\n资料来源：[rust/types/src/api_types.rs:1-100]()\n\n```rust\npub enum Include {\n    Distance,\n    Document,\n    Embedding,\n    Metadata,\n    Uri,\n}\n\nimpl IncludeList {\n    pub fn default_query() -> Self {\n        Self(vec![Include::Document, Include::Metadata, Include::Distance])\n    }\n    pub fn all() -> Self {\n        Self(vec![Include::Document, Include::Metadata, Include::Distance, Include::Embedding, Include::Uri])\n    }\n}\n```\n\n### Metadata Filtering\n\nChroma supports rich metadata filtering through the `MetadataExpression` and `MetadataComparison` types:\n\n```mermaid\ngraph TD\n    A[MetadataExpression] --> B[key: String]\n    A --> C[comparison: MetadataComparison]\n    C --> D[Primitive: Operator + Value]\n    C --> E[Set: Operator + SetValue]\n```\n\n资料来源：[rust/types/src/metadata.rs:1-80]()\n\n## Blockstore Architecture\n\nThe blockstore provides persistent storage for indexed data using Apache Arrow format for efficient serialization and querying.\n\n### Arrow Block Structure\n\n```mermaid\ngraph LR\n    A[Write Operation] --> B[Block Delta]\n    B --> C[Commit to Block]\n    C --> D[Arrow IPC Format]\n    D --> E[Disk Storage]\n    E --> F[BlockfileReader]\n```\n\n**Block Types:**\n\n| Block Type | Description |\n|------------|-------------|\n| `OrderedBlockDelta` | Sequential writes with ordering guarantees |\n| `UnorderedBlockDelta` | High-throughput writes without ordering |\n| `DirectoryBlock` | Sparse posting directory entries |\n\n资料来源：[rust/blockstore/src/arrow/block/types.rs:1-100]()\n\nThe Arrow layout verification ensures data integrity:\n\n```rust\npub enum ArrowLayoutVerificationError {\n    BufferLengthNotAligned,\n    NoRecordBatches,\n    MultipleRecordBatches,\n    InvalidMessageType,\n    RecordBatchDecodeError,\n}\n```\n\n### Sparse Posting Blocks\n\nSparse vectors use a specialized block format for efficient storage:\n\n```\nbody = [ max_offset: u32 LE, max_weight: f32 LE ] × num_entries\n```\n\nThe `DirectoryBlock` stores per-posting-block metadata for term pruning:\n\n- `max_offset`: Largest document offset in the posting block\n- `max_weight`: Largest weight in the posting block\n\n资料来源：[rust/types/src/sparse_posting_block.rs:1-60]()\n\n## Spann Index Architecture\n\nSpann is Chroma's memory-efficient approximate nearest neighbor index that combines HNSW with posting lists.\n\n```mermaid\ngraph TD\n    A[SpannIndexWriter] --> B[HNSW Index]\n    A --> C[Posting Lists]\n    A --> D[Versions Map]\n    A --> E[MaxHeadID Blockfile]\n    B --> F[Reader with adaptive search]\n```\n\n**SpannIndexReader Structure:**\n\n| Component | Type | Purpose |\n|-----------|------|---------|\n| posting_lists | BlockfileReader<u32, SpannPostingList> | Term postings |\n| hnsw_index | HnswIndexRef | Graph-based search |\n| versions_map | BlockfileReader<u32, u32> | Version tracking |\n| dimensionality | usize | Vector dimension |\n| adaptive_search_nprobe | bool | Adaptive parameter |\n\n资料来源：[rust/index/src/spann/types.rs:1-80]()\n\n## Indexing Pipeline\n\nThe indexing pipeline handles document ingestion through the following stages:\n\n```mermaid\ngraph LR\n    A[Add Records] --> B[ApplyLogsOrchestrator]\n    B --> C[Record Segment Writer]\n    B --> D[Metadata Segment Writer]\n    B --> E[Vector Index Writer]\n    C --> F[Flush to Blockstore]\n    D --> F\n    E --> F\n    F --> G[Collection Update]\n```\n\n**Error Handling:**\n\nThe orchestrator implements comprehensive error tracking:\n\n| Error Type | Error Code | Tracing |\n|------------|------------|---------|\n| ApplyLog | Internal | Yes |\n| Channel | Internal | Yes |\n| Commit | Internal | Yes |\n| HnswSegment | Internal | Yes |\n| MetadataSegment | Internal | Yes |\n| Seal | Internal | Yes |\n| InvariantViolation | - | Always |\n\n资料来源：[rust/worker/src/execution/orchestration/apply_logs_orchestrator.rs:1-100]()\n\n## Query Execution Flow\n\n### Query Request Processing\n\n```mermaid\ngraph TD\n    A[Query Request] --> B[Parse Query]\n    B --> C[Load Segments]\n    C --> D[Parallel Segment Queries]\n    D --> E{HNSW Search}\n    D --> F{Spann Search}\n    D --> G{Record Scan}\n    E --> H[Merge Results]\n    F --> H\n    G --> H\n    H --> I[Apply Filters]\n    I --> J[Return Results]\n```\n\n### Work Queue Integration\n\nDistributed query execution uses a work queue system for task coordination:\n\n```mermaid\ngraph TD\n    A[Coordinator] --> B[WorkQueueClient]\n    B --> C[gRPC Channel]\n    C --> D[Worker Pool]\n    D --> E[Task Execution]\n    E --> F[Result Collection]\n```\n\n**Error Code Mapping:**\n\n| gRPC Code | Chroma Error Code |\n|-----------|-------------------|\n| Unavailable | Unavailable |\n| DeadlineExceeded | DeadlineExceeded |\n| ResourceExhausted | ResourceExhausted |\n| NotFound | NotFound |\n| InvalidArgument | InvalidArgument |\n\n资料来源：[rust/worker/src/work_queue/work_queue_client.rs:1-80]()\n\n## Deployment Topology\n\nChroma supports flexible deployment configurations through its topology model:\n\n```mermaid\ngraph TD\n    A[Topology] --> B[TopologyName]\n    A --> C[Vec<RegionName>]\n    A --> D[Config T]\n    C --> E[ProviderRegion]\n    E --> F[Provider]\n    E --> G[Region]\n```\n\nThe topology system enables:\n\n- Multi-cloud deployments (AWS, GCP, Azure)\n- Region-specific configurations\n- Custom provider extensions\n\n## Summary\n\nChroma's architecture provides a scalable foundation for AI-powered search with several key design principles:\n\n1. **Separation of Concerns** - Frontend, worker, and SysDB components handle distinct responsibilities\n2. **Arrow-Based Storage** - Efficient columnar storage for analytical queries\n3. **Flexible Indexing** - Support for HNSW, Spann, and sparse vector indexes\n4. **Distributed Coordination** - Work queues and topology management for multi-node deployments\n5. **Comprehensive Error Handling** - Consistent error codes and tracing across all components\n\nThe modular architecture allows Chroma to scale from single-node development deployments to distributed production clusters serving AI applications at scale.\n\n---\n\n<a id='protocol-buffers-api'></a>\n\n## Protocol Buffers & gRPC API\n\n### 相关页面\n\n相关主题：[System Architecture Overview](#architecture-overview), [Rust Backend Services Architecture](#rust-services-architecture)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [rust/types/src/record.rs](https://github.com/chroma-core/chroma/blob/main/rust/types/src/record.rs)\n- [rust/types/src/metadata.rs](https://github.com/chroma-core/chroma/blob/main/rust/types/src/metadata.rs)\n- [rust/types/src/collection_schema.rs](https://github.com/chroma-core/chroma/blob/main/rust/types/src/collection_schema.rs)\n- [rust/types/src/topology.rs](https://github.com/chroma-core/chroma/blob/main/rust/types/src/topology.rs)\n- [clients/js/packages/chromadb-core/src/generated/api.ts](https://github.com/chroma-core/chroma/blob/main/clients/js/packages/chromadb-core/src/generated/api.ts)\n- [go/README.md](https://github.com/chroma-core/chroma/blob/main/go/README.md)\n- [rust/blockstore/src/arrow/root.rs](https://github.com/chroma-core/chroma/blob/main/rust/blockstore/src/arrow/root.rs)\n</details>\n\n# Protocol Buffers & gRPC API\n\nChroma uses Protocol Buffers (protobuf) as the core serialization format for inter-service communication and data persistence. The IDL (Interface Definition Language) files in the `idl/` directory define the service APIs, data structures, and message types that power Chroma's distributed architecture.\n\n## Architecture Overview\n\nChroma employs a client-server architecture where Protocol Buffers serve as the contract between components. The protobuf definitions are centralized in the `idl/` directory and used to generate code for multiple language runtimes including Python, JavaScript, Go, and Rust.\n\n```mermaid\ngraph TD\n    subgraph \"Client Layer\"\n        JS[JavaScript Client]\n        PY[Python Client]\n        GO[Go Client]\n    end\n    \n    subgraph \"IDL Definitions\"\n        PROTO[Protocol Buffer Definitions]\n    end\n    \n    subgraph \"Server Layer\"\n        API[API Server]\n        COORD[Coordinator Service]\n        QUERY[Query Executor]\n    end\n    \n    JS -->|Generated TS Bindings| PROTO\n    PY -->|Generated Python Stub| PROTO\n    GO -->|Generated Go Code| PROTO\n    API -->|gRPC/prost| PROTO\n    COORD -->|gRPC/prost| PROTO\n    QUERY -->|gRPC/prost| PROTO\n```\n\n## Proto Definitions Structure\n\n### Core Service Definitions\n\nThe main protobuf definitions are organized in `idl/chromadb/proto/`:\n\n| Proto File | Purpose | Key Messages |\n|------------|---------|--------------|\n| `chroma.proto` | Core data types and collection operations | Collection, Database, OperationRecord |\n| `coordinator.proto` | Coordinator service for cluster management | Tenant, Database, Segment operations |\n| `query_executor.proto` | Query execution service interface | Query requests and responses |\n\n### Data Type Coverage\n\nThe protobuf definitions cover all core data types used throughout Chroma:\n\n| Data Type | Usage |\n|-----------|-------|\n| `Vector` | Embedding vectors with scalar encoding |\n| `OperationRecord` | CRUD operations for records |\n| `LogRecord` | Write-ahead log entries with offsets |\n| `Metadata` | Key-value metadata for filtering |\n| `Collection` | Collection configuration and schema |\n| `Cmek` | Customer-managed encryption keys |\n\n## Rust Type Conversions\n\nChroma's Rust backend uses protobuf-generated types and converts them to idiomatic Rust types through `TryFrom` implementations. This pattern ensures type safety and clean separation between the wire format and internal representations.\n\n### Record Conversions\n\nThe `rust/types/src/record.rs` file contains conversion logic between protobuf and Rust types:\n\n```mermaid\ngraph LR\n    A[chroma_proto::LogRecord] -->|TryFrom| B[LogRecord Rust]\n    A2[chroma_proto::Vector] -->|TryFrom| B2[(Vec<f32>, ScalarEncoding)]\n```\n\n**OperationRecord Conversion** (资料来源：[rust/types/src/record.rs:recordinfo]())\n\nThe `OperationRecord` conversion extracts metadata and document fields from protobuf representations:\n\n```rust\n// Metadata is extracted from proto, with document potentially in metadata\nlet (metadata, document) = match operation_record_proto.metadata {\n    Some(proto_metadata) => match UpdateMetadata::try_from(proto_metadata) {\n        Ok(mut metadata) => {\n            let document = metadata.remove(CHROMA_DOCUMENT_KEY);\n            match document {\n                Some(UpdateMetadataValue::Str(document)) => {\n                    (Some(metadata), Some(document))\n                }\n                _ => (Some(metadata), None),\n            }\n        }\n        Err(e) => return Err(RecordConversionError::...),\n    },\n    None => (None, None),\n};\n```\n\n### Vector Type Conversions\n\nVectors are stored with their encoding information (资料来源：[rust/types/src/record.rs:vector]())\n\n```rust\nimpl TryFrom<chroma_proto::Vector> for (Vec<f32>, ScalarEncoding) {\n    type Error = VectorConversionError;\n    // Conversion implementation\n}\n```\n\n## Metadata Filtering Types\n\nThe metadata system supports rich filtering expressions defined in protobuf and converted to Rust types (资料来源：[rust/types/src/metadata.rs:metadata-types]())\n\n### Document Operators\n\n```mermaid\ngraph TD\n    DOC_OPS[WhereDocumentOperator] --> Contains\n    DOC_OPS --> NotContains\n    DOC_OPS --> Regex\n    DOC_OPS --> NotRegex\n```\n\n| Operator | Description |\n|----------|-------------|\n| `Contains` | Document contains substring |\n| `NotContains` | Document does not contain substring |\n| `Regex` | Document matches regex pattern |\n| `NotRegex` | Document does not match regex pattern |\n\n### Metadata Expression Structure\n\n```rust\npub struct MetadataExpression {\n    pub key: String,\n    pub comparison: MetadataComparison,\n}\n```\n\nMetadata comparisons support both primitive types (strings, integers, floats, booleans) and set operations.\n\n## Collection Schema Definitions\n\nSchema definitions in `rust/types/src/collection_schema.rs` define how collections are configured for indexing (资料来源：[rust/types/src/collection_schema.rs:schema-struct]())\n\n### Schema Builder Pattern\n\nThe `Schema` struct provides a fluent builder API for index configuration:\n\n```mermaid\ngraph TD\n    SCHEMA[Schema::default] --> CREATE_INDEX[.create_index]\n    CREATE_INDEX --> VALIDATE[Validate Index Config]\n    VALIDATE -->|Valid| RETURN[Return Self]\n    VALIDATE -->|Invalid| ERROR[SchemaBuilderError]\n```\n\n**Index Creation Example** (资料来源：[rust/types/src/collection_schema.rs:create-index-example]())\n\n```rust\nlet schema = Schema::default()\n    .create_index(None, VectorIndexConfig {\n        space: Some(Space::Cosine),\n        embedding_function: None,\n        source_key: None,\n        hnsw: None,\n        spann: None,\n    }.into())?\n    .create_index(Some(\"category\"), StringInvertedIndexConfig {}.into())?;\n```\n\n### Supported Index Types\n\n| Index Type | Configuration | Applies To |\n|------------|---------------|------------|\n| `VectorIndexConfig` | HNSW, Space (Cosine/L2/IP), embedding function | `#embedding` key only |\n| `StringInvertedIndexConfig` | String indexing | Custom string keys |\n| `FtsIndexConfig` | Full-text search | Document key |\n\n## CMEK (Customer-Managed Encryption Keys)\n\nChroma supports customer-managed encryption keys through the `Cmek` type defined in protobuf (资料来源：[rust/types/src/collection_schema.rs:cmek]())\n\n### CMEK Provider Configuration\n\n| Provider | Validation Pattern | Resource Format |\n|----------|-------------------|-----------------|\n| GCP | `CMEK_GCP_RE` regex | GCP resource identifier |\n\n```rust\nimpl Cmek {\n    pub fn gcp(resource: String) -> Self;\n    pub fn validate_pattern(&self) -> bool;\n}\n```\n\n## Topology and Region Management\n\nFor multi-region deployments, Chroma uses topology definitions (资料来源：[rust/types/src/topology.rs:topology]())\n\n### Provider Region Structure\n\n```mermaid\nclassDiagram\n    class ProviderRegion {\n        +name: RegionName\n        +provider: String\n        +region: String\n        +config: T\n    }\n    \n    class Topology {\n        +name: TopologyName\n        +regions: Vec~RegionName~\n        +config: T\n    }\n```\n\n| Component | Description |\n|-----------|-------------|\n| `ProviderRegion` | Single cloud provider region configuration |\n| `Topology` | Collection of regions forming a deployment topology |\n\n## Code Generation Pipeline\n\n### Build Process\n\nProtobuf definitions are compiled to target languages using `protoc` and language-specific plugins (资料来源：[go/README.md:protobuf-setup]())\n\n```mermaid\ngraph LR\n    A[.proto files] --> B[protoc compiler]\n    B -->|Python| C[Python stubs]\n    B -->|Go| D[Go gRPC code]\n    B -->|JS/TS| E[TypeScript definitions]\n    B -->|Rust| F[Rust + prost]\n```\n\n### Required Tools\n\n| Tool | Purpose |\n|------|---------|\n| `protoc` | Protocol Buffer compiler |\n| `protoc-gen-go` | Go code generation |\n| `protoc-gen-go-grpc` | Go gRPC service generation |\n\n### Generated API Patterns\n\nThe generated TypeScript API in `clients/js/packages/chromadb-core/src/generated/api.ts` follows standard gRPC-web patterns (资料来源：[clients/js/packages/chromadb-core/src/generated/api.ts:fetch-pattern]())\n\n```typescript\nconst localVarFetchArgs = ApiApiFetchParamCreator(configuration).version(options);\nreturn (fetch: FetchAPI = defaultFetch, basePath: string = BASE_PATH) => {\n    return fetch(\n        basePath + localVarFetchArgs.url,\n        localVarFetchArgs.options,\n    ).then((response) => {\n        // Handle response by content type and status\n        if (response.status === 200) {\n            if (mimeType === \"application/json\") {\n                return response.json();\n            }\n        }\n        // Error handling for 401, 404, 409, 500\n    });\n};\n```\n\n### Error Code Mapping\n\nError types are mapped from Rust/Arrow errors to Chroma error codes (资料来源：[rust/blockstore/src/arrow/root.rs:error-mapping]())\n\n| Arrow Error Type | Chroma Error Code |\n|-----------------|-------------------|\n| `IOError` | `Internal` |\n| `ArrowError` | `Internal` |\n| `LayoutVerificationError` | `Internal` |\n| `FromBytesError` variants | `InvalidArgument` / `Internal` |\n\n## Message Format Details\n\n### Arrow Block Serialization\n\nBinary data in protobuf messages uses Arrow IPC format for efficient columnar storage (资料来源：[rust/blockstore/src/arrow/root.rs:arrow-reader]())\n\n```rust\nlet arrow_reader = arrow::ipc::reader::FileReader::try_new(&mut cursor, None);\nlet record_batch = match arrow_reader {\n    Ok(mut reader) => match reader.next() {\n        Some(Ok(batch)) => batch,\n        Some(Err(e)) => return Err(FromBytesError::ArrowError(e)),\n        None => return Err(FromBytesError::NoDataError),\n    },\n    Err(e) => return Err(FromBytesError::ArrowError(e)),\n};\n```\n\n### IPC Footer Structure\n\nThe Arrow footer format requires:\n- ARROW_MAGIC header (6 bytes)\n- Footer content\n- Footer length (4 bytes)\n- Footer checksum\n\n## See Also\n\n- [Rust Types Module](rust/types/src/) - Internal Rust type definitions\n- [Block Store Architecture](rust/blockstore/) - Data persistence with Arrow\n- [Client SDKs](clients/) - Multi-language client implementations\n- [Go Server Implementation](go/) - Server-side gRPC implementation\n\n---\n\n<a id='python-client-sdk'></a>\n\n## Python Client SDK\n\n### 相关页面\n\n相关主题：[Getting Started with Chroma](#getting-started), [JavaScript/TypeScript Client SDKs](#javascript-client-sdk), [Embedding Functions Integration](#embedding-functions)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [chromadb/api/client.py](https://github.com/chroma-core/chroma/blob/main/chromadb/api/client.py)\n- [chromadb/api/async_client.py](https://github.com/chroma-core/chroma/blob/main/chromadb/api/async_client.py)\n- [chromadb/api/models/Collection.py](https://github.com/chroma-core/chroma/blob/main/chromadb/api/models/Collection.py)\n- [chromadb/api/types.py](https://github.com/chroma-core/chroma/blob/main/chromadb/api/types.py)\n- [clients/python/pyproject.toml](https://github.com/chroma-core/chroma/blob/main/clients/python/pyproject.toml)\n</details>\n\n# Python Client SDK\n\nThe Chroma Python Client SDK is the official Python library for interacting with Chroma, an open-source vector database designed for AI applications. This SDK provides a complete interface for managing collections, storing embeddings, and performing similarity searches across vector data.\n\n## Overview\n\nChroma positions itself as the open-source data infrastructure for AI, offering developers a streamlined way to incorporate vector search capabilities into their applications. The Python Client SDK serves as the primary client library for Python developers, enabling seamless integration with Chroma's vector database capabilities.\n\nThe SDK supports two primary modes of operation: **embedded mode**, where the database runs locally within the same process, and **client-server mode**, where the Python client communicates with a remote Chroma server via HTTP. This flexibility allows developers to choose the deployment architecture that best fits their application requirements, whether they need a lightweight local setup for development and testing or a scalable server-based deployment for production environments.\n\nFor Python-specific installations, developers can choose between the full `chromadb` package, which includes all embedding libraries as dependencies, or the `chromadb-client` package, which is a lightweight HTTP-only client that connects to a running Chroma server. The installation is straightforward via pip, making it accessible for projects of all sizes.\n\nThe SDK is designed with developer productivity in mind, providing intuitive APIs for common operations like adding documents, querying collections, and managing metadata. It handles the complexity of embedding generation and vector storage behind a clean, Pythonic interface, allowing developers to focus on building their AI applications rather than managing low-level database operations.\n\n## Architecture\n\nThe Python Client SDK follows a layered architecture that separates concerns between the client interface, API communication, and data models. Understanding this architecture helps developers effectively use the SDK and troubleshoot any issues that may arise during development.\n\n```mermaid\ngraph TD\n    A[Application Code] --> B[ChromaClient / AsyncChromaClient]\n    B --> C[Collection API]\n    B --> D[Embedding Functions]\n    C --> E[REST API Layer]\n    D --> F[External Embedding Providers]\n    E --> G[Chroma Server]\n    E --> H[Embedded Mode]\n    G --> I[Persistent Storage]\n    H --> I\n```\n\n### Client Layer\n\nThe client layer forms the entry point for all SDK operations. Chroma provides two client implementations: the synchronous `Client` class for traditional Python applications and the `AsyncClient` class for asynchronous applications built with async/await patterns.\n\nThe synchronous client is suitable for most use cases, providing blocking API calls that execute immediately and return results. This approach is familiar to developers coming from traditional Python backgrounds and works well in scripts, batch processing jobs, and web applications that don't require high concurrency.\n\nThe asynchronous client, on the other hand, is designed for applications that need to handle many concurrent operations efficiently, such as web servers built on frameworks like FastAPI or Starlette. By using Python's asyncio library, the async client can perform multiple network operations concurrently, improving throughput in I/O-bound scenarios.\n\nBoth clients share a similar interface, with the async client simply wrapping the underlying HTTP calls with async/await syntax. This consistency makes it easy to switch between synchronous and asynchronous code as requirements evolve.\n\n### Collection Management\n\nCollections serve as the primary organizational unit in Chroma, analogous to tables in traditional relational databases or buckets in object storage. Each collection contains a set of vectors along with their associated metadata, documents, and unique identifiers.\n\nThe SDK provides a comprehensive collection API that supports creating new collections, retrieving existing ones, listing all collections in the database, and deleting collections when they're no longer needed. Collections can be configured with specific settings at creation time, including the embedding function to use for auto-embedding documents and the name of the collection for identification purposes.\n\nCollections maintain a schema-like structure through their use of metadata. While Chroma is schemaless in the traditional sense, the metadata associated with vectors allows developers to impose structure on their data for filtering and organization purposes.\n\n### Data Model\n\nThe data model in Chroma revolves around four core concepts: vectors, documents, metadata, and IDs. Each record in a collection consists of these four components, providing a flexible yet structured way to store and retrieve information.\n\nVectors are the mathematical representations of data in embedding space. They can be provided directly by the application or generated automatically using embedding functions. The SDK accepts vectors as lists of floating-point numbers, making it compatible with output from virtually any embedding model.\n\nDocuments are the original text or content that was transformed into vectors. Storing documents alongside their vectors enables applications to retrieve the original content during query operations without needing to maintain a separate document store.\n\nMetadata provides contextual information about each record. Examples include the source of the document, timestamps, user IDs, or any other application-specific attributes. Metadata can be used for filtering during queries, allowing applications to narrow search results based on specific criteria.\n\nIDs uniquely identify each record within a collection. The SDK accepts string identifiers, giving applications flexibility in how they choose to name and reference their data. Common patterns include using UUIDs, meaningful string identifiers derived from the document content, or sequential numbers.\n\n## Installation and Setup\n\nInstalling the Chroma Python Client SDK is straightforward using pip, Python's package manager. The SDK is available in two variants to accommodate different use cases and deployment scenarios.\n\n```bash\npip install chromadb\n```\n\nThis command installs the full Chroma package, which includes all core functionality plus built-in support for various embedding providers. This variant is recommended for most users who want a complete, self-contained installation.\n\n```bash\npip install chromadb-client\n```\n\nThis command installs only the HTTP client library, which is useful for scenarios where the Chroma server runs separately or where a minimal dependency footprint is required. This variant connects to Chroma servers via HTTP and doesn't include embedding provider libraries.\n\n## Client Initialization\n\nInitializing the Chroma client depends on the deployment mode and desired configuration. The SDK provides flexible initialization options to accommodate different environments.\n\n### Embedded Mode\n\nIn embedded mode, Chroma runs entirely within your Python process, storing data locally. This is ideal for development, testing, and small-scale deployments where a separate server isn't required.\n\n```python\nimport chromadb\n\nclient = chromadb.Client()\n```\n\nThe embedded client automatically creates a local database directory and manages all data storage internally. Data persists across process restarts, making it suitable for applications that need persistent storage without the complexity of a separate server process.\n\n### Client-Server Mode\n\nIn client-server mode, your Python application connects to a Chroma server running separately, either locally or on a remote machine. This architecture supports larger-scale deployments and enables sharing data across multiple client applications.\n\n```python\nimport chromadb\n\nclient = chromadb.HttpClient(\n    host=\"localhost\",\n    port=8000\n)\n```\n\nThe HTTP client communicates with the server using REST API calls, handling serialization, network transport, and error handling transparently. This mode requires a Chroma server to be running and accessible at the specified host and port.\n\n### Configuration Options\n\nThe client supports various configuration options to customize its behavior for specific use cases. These options can be provided during client initialization to control aspects like SSL/TLS settings, authentication, and connection pooling.\n\n| Option | Type | Default | Description |\n|--------|------|---------|-------------|\n| `host` | string | \"localhost\" | Server hostname or IP address |\n| `port` | integer | 8000 | Server port number |\n| `ssl` | boolean | false | Enable SSL/TLS encryption |\n| `headers` | dict | None | Custom HTTP headers for requests |\n| `tenant` | string | None | Tenant identifier for multi-tenant setups |\n| `database` | string | None | Database name for organized data storage |\n\n## Collection Operations\n\nCollections are the central organizing structure in Chroma, grouping related vectors, documents, and metadata together. The SDK provides a comprehensive API for creating, managing, and interacting with collections.\n\n### Creating a Collection\n\nCollections are created using the client's `create_collection` method, which accepts a name and optional configuration parameters.\n\n```python\ncollection = client.create_collection(\n    name=\"my-documents\",\n    metadata={\"description\": \"Document collection for RAG\"},\n    get_or_create=True\n)\n```\n\nThe `get_or_create` parameter is particularly useful in production applications, as it prevents errors if a collection with the same name already exists. When set to `True`, the method returns the existing collection if one exists or creates a new one if it doesn't.\n\n### Adding Data\n\nData is added to collections using the `add` method, which accepts vectors, documents, metadata, and unique identifiers. All parameters must be provided as lists of equal length, with each index representing a single record.\n\n```python\ncollection.add(\n    documents=[\"This is the first document\", \"This is the second document\"],\n    metadatas=[{\"source\": \"notion\"}, {\"source\": \"google-docs\"}],\n    ids=[\"doc-1\", \"doc-2\"],\n    embeddings=[[1.2, 2.1, 3.5], [1.1, 2.0, 3.4]]\n)\n```\n\nThe SDK supports automatic embedding generation when embedding functions are configured for the collection. In this case, documents can be provided without explicit embeddings, and the SDK will generate the vector representations automatically.\n\n### Querying Data\n\nQuerying is performed using the `query` method, which accepts query text or query vectors and returns the most similar results based on vector similarity.\n\n```python\nresults = collection.query(\n    query_texts=[\"search terms here\"],\n    n_results=2,\n    where={\"source\": \"notion\"},\n    include=[\"documents\", \"metadatas\", \"distances\"]\n)\n```\n\nThe `where` parameter enables filtering results based on metadata conditions, allowing applications to narrow search results to specific subsets of data. The `include` parameter controls which data components are returned, helping optimize bandwidth and processing for applications that don't need all available information.\n\nQuery results include the matched document IDs, the documents themselves, associated metadata, and distance scores indicating how similar each result is to the query. Lower distance scores indicate higher similarity, with zero representing an exact match.\n\n### Updating and Deleting Data\n\nThe SDK supports updating existing records and deleting unwanted data from collections. These operations are essential for maintaining data accuracy and managing collection lifecycle.\n\n```python\ncollection.update(\n    ids=[\"doc-1\"],\n    documents=[\"Updated document content\"],\n    metadatas=[{\"source\": \"notion\", \"updated\": True}]\n)\n\ncollection.delete(\n    ids=[\"doc-2\"],\n    where={\"source\": \"google-docs\"}\n)\n```\n\nUpdate operations modify existing records identified by their IDs, replacing the specified fields while preserving unchanged data. Delete operations remove records matching the provided ID or metadata filters, with the ability to delete multiple records simultaneously.\n\n## Querying and Filtering\n\nChroma provides powerful querying and filtering capabilities that enable precise retrieval of relevant results. Understanding these capabilities is essential for building effective vector search applications.\n\n### Vector Similarity Search\n\nThe core query operation performs vector similarity search, finding the most similar records to a given query vector or text. The SDK handles text queries by first embedding them using the collection's configured embedding function.\n\nResults are ranked by similarity, with the most similar results appearing first. The `n_results` parameter controls how many results are returned, allowing applications to balance result completeness with performance considerations.\n\n### Metadata Filtering\n\nMetadata filtering narrows search results based on document attributes stored alongside vectors. This is particularly useful for applications that need to search within specific subsets of data, such as documents from a particular source or within a date range.\n\n```python\nresults = collection.query(\n    query_texts=[\"search terms\"],\n    where={\n        \"source\": \"notion\",\n        \"category\": {\"$in\": [\"technical\", \"documentation\"]}\n    }\n)\n```\n\nThe filter syntax supports various operators including equality, inequality, comparison operators for numeric ranges, and set membership tests. Complex filter expressions can be constructed using logical operators to combine multiple conditions.\n\n### Result Inclusion\n\nThe `include` parameter controls which data components are included in query results. This allows applications to optimize their queries by requesting only the data they need.\n\n| Include Option | Description |\n|---------------|-------------|\n| `embeddings` | Include the full vector for each result |\n| `documents` | Include the original document text |\n| `metadatas` | Include the associated metadata |\n| `distances` | Include similarity distance scores |\n\nBy default, only documents and distances are included in results. Applications should specify only the needed components to minimize bandwidth usage and processing overhead.\n\n## Embedding Functions\n\nEmbedding functions transform text into vector representations that capture semantic meaning. Chroma supports multiple embedding providers, allowing applications to choose the approach that best fits their requirements.\n\n### Built-in Embeddings\n\nFor simple use cases, Chroma includes a default embedding function that works out of the box without additional configuration. This function is suitable for development and testing but may not provide the best quality embeddings for production applications.\n\n### External Providers\n\nFor production applications requiring higher quality embeddings, Chroma supports integration with external embedding services. These services provide state-of-the-art embedding models that can significantly improve search quality.\n\nSupported providers include OpenAI's embedding models, which offer excellent quality for English text, and various open-source alternatives. Each provider has its own configuration requirements, typically involving API keys and model selection parameters.\n\nConfiguration is typically done at the collection level, allowing different collections to use different embedding functions if needed. This flexibility supports applications that work with multiple data types or require different embedding strategies for different use cases.\n\n### Custom Embedding Functions\n\nFor specialized use cases, applications can implement custom embedding functions by conforming to the SDK's embedding function interface. This allows integration with any embedding model or service that can be accessed from Python.\n\nCustom functions receive a list of texts and return a corresponding list of vectors. They can implement any logic needed, including batching, caching, and error handling, giving applications full control over the embedding process.\n\n## Error Handling\n\nThe SDK provides comprehensive error handling to help applications gracefully manage failure scenarios. Understanding the error types and how to handle them is important for building robust applications.\n\n### Connection Errors\n\nConnection errors occur when the client cannot establish communication with the Chroma server. These errors can result from network issues, server unavailability, or incorrect server configuration.\n\n```python\ntry:\n    collection = client.get_collection(\"my-collection\")\nexcept chromadb.connection.ChromaConnectionError:\n    print(\"Unable to connect to Chroma server\")\n```\n\nApplications should implement appropriate retry logic and user-facing error messages when connection errors occur, as these situations typically require intervention beyond the application's control.\n\n### Collection Not Found\n\nOperations on non-existent collections raise specific errors that can be caught and handled appropriately.\n\n```python\ntry:\n    collection = client.get_collection(\"non-existent\")\nexcept chromadb.not_found.NotFound:\n    print(\"Collection does not exist\")\n```\n\nThe `get_or_create` parameter available during collection creation provides an alternative to explicit error handling when the existence of a collection is uncertain.\n\n### Invalid Arguments\n\nInvalid argument errors indicate problems with the data or parameters provided to SDK methods. These errors typically result from bugs in application code or invalid user input.\n\nExamples include malformed IDs, vectors of incorrect dimensions, mismatched list lengths, and invalid filter expressions. The error messages provide guidance on what parameter is problematic, making debugging straightforward.\n\n## Best Practices\n\nFollowing best practices ensures optimal performance, reliability, and maintainability when using the Python Client SDK in production applications.\n\n### Connection Management\n\nApplications should create a single client instance and reuse it across the application rather than creating new clients for each operation. The client manages connection pooling and state internally, and creating multiple instances can lead to resource waste and inconsistent state.\n\n```python\nclient = chromadb.HttpClient(host=\"localhost\", port=8000)\n\ndef get_collection():\n    return client.get_collection(\"my-documents\")\n```\n\nFor applications that require clean-up, the client should be properly closed when the application terminates, ensuring any pending operations complete and resources are released.\n\n### Batch Operations\n\nWhen adding or querying large numbers of records, batching operations improves performance by reducing network overhead and allowing the server to optimize processing. The SDK handles batching internally for the most common operations, but applications should be aware of batch size considerations.\n\n### Error Recovery\n\nProduction applications should implement comprehensive error handling that distinguishes between recoverable errors (like temporary network issues) and non-recoverable errors (like invalid input). Recoverable errors can be handled with retry logic, while non-recoverable errors should surface appropriate feedback to users.\n\n## Related Documentation\n\nFor further information on using Chroma's Python Client SDK, the following resources provide additional context and examples.\n\nThe official Chroma documentation at trychroma.com provides comprehensive guides on getting started, deployment options, and advanced usage patterns. The documentation includes tutorials, API reference material, and example applications that demonstrate real-world usage.\n\nThe GitHub repository at github.com/chroma-core/chroma contains the complete source code for Chroma, including the Python Client SDK. Developers interested in understanding implementation details or contributing to the project can explore the codebase directly.\n\nThe Chroma Discord community provides a forum for asking questions, sharing experiences, and connecting with other developers using Chroma. The community is an excellent resource for troubleshooting issues and discovering best practices from experienced users.\n\n---\n\n<a id='javascript-client-sdk'></a>\n\n## JavaScript/TypeScript Client SDKs\n\n### 相关页面\n\n相关主题：[Python Client SDK](#python-client-sdk), [Getting Started with Chroma](#getting-started)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [clients/js/packages/chromadb-core/src/ChromaClient.ts](https://github.com/chroma-core/chroma/blob/main/clients/js/packages/chromadb-core/src/ChromaClient.ts)\n- [clients/new-js/packages/chromadb/src/chroma-client.ts](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/chromadb/src/chroma-client.ts)\n- [clients/new-js/packages/chromadb/src/api/sdk.gen.ts](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/chromadb/src/api/sdk.gen.ts)\n- [clients/js/packages/chromadb-core/src/Collection.ts](https://github.com/chroma-core/chroma/blob/main/clients/js/packages/chromadb-core/src/Collection.ts)\n- [clients/js/packages/chromadb/package.json](https://github.com/chroma-core/chroma/blob/main/clients/js/packages/chromadb/package.json)\n- [clients/js/packages/chromadb-client/package.json](https://github.com/chroma-core/chroma/blob/main/clients/js/packages/chromadb-client/package.json)\n- [clients/new-js/packages/chromadb/package.json](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/chromadb/package.json)\n- [clients/new-js/packages/ai-embeddings/all/package.json](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/ai-embeddings/all/package.json)\n- [clients/js/examples/node/README.md](https://github.com/chroma-core/chroma/blob/main/clients/js/examples/node/README.md)\n\n</details>\n\n# JavaScript/TypeScript Client SDKs\n\nChroma provides comprehensive JavaScript and TypeScript client libraries for interacting with Chroma servers from browser and Node.js environments. The SDKs offer both low-level HTTP API access and high-level abstractions for collections, embedding functions, and query operations.\n\n## Architecture Overview\n\nChroma maintains two generations of JavaScript clients to support different use cases and ecosystem requirements.\n\n```mermaid\ngraph TD\n    A[Chroma Server] <--> B[HTTP API];\n    B <--> C[Legacy JS Client v2.4.7];\n    B <--> D[new-js Client v3.4.5];\n    C --> E[chromadb<br/>Bundled];\n    C --> F[chromadb-client<br/>Peer Dependencies];\n    D --> G[ChromaClient];\n    D --> H[Embedding Functions<br/>via @chroma-core/*];\n```\n\n### Client Package Versions\n\n| Package | Version | Type | Description |\n|---------|---------|------|-------------|\n| `chromadb` (legacy) | 2.4.7 | npm | Bundled package with all embedding libraries included |\n| `chromadb-client` (legacy) | 2.4.7 | npm | Client package requiring peer dependencies |\n| `chromadb` (new-js) | 3.4.5 | npm | Modern client with modular architecture |\n| `@internal/chromadb-core` | 2.4.7 | workspace | Shared core functionality |\n\n资料来源：[clients/js/packages/chromadb/package.json:3](https://github.com/chroma-core/chroma/blob/main/clients/js/packages/chromadb/package.json)  \n资料来源：[clients/new-js/packages/chromadb/package.json:3](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/chromadb/package.json)\n\n## Package Structure\n\n### Legacy Client (v2.x)\n\nThe legacy client provides two distribution options:\n\n```mermaid\ngraph LR\n    A[chromadb] --> B[chromadb-core<br/>+ All Embeddings];\n    C[chromadb-client] --> D[chromadb-core<br/>+ Peer Dependencies];\n    B --> E[@google/generative-ai];\n    B --> F[@xenova/transformers];\n    B --> G[cohere-ai];\n    D --> E;\n    D --> F;\n    D --> G;\n```\n\n| Package | Use Case | Embedding Libraries |\n|---------|----------|---------------------|\n| `chromadb` | Simple projects wanting everything included | Bundled with all providers |\n| `chromadb-client` | Projects needing specific embedding libraries | Peer dependencies required |\n\n资料来源：[clients/js/packages/chromadb-client/package.json:1-55](https://github.com/chroma-core/chroma/blob/main/clients/js/packages/chromadb-client/package.json)\n\n### New-JS Client (v3.x)\n\nThe new JavaScript client uses a modular workspace architecture with the following structure:\n\n```\nclients/new-js/\n├── packages/\n│   ├── chromadb/                    # Core client package\n│   │   └── src/\n│   │       ├── chroma-client.ts     # Main client implementation\n│   │       └── api/\n│   │           └── sdk.gen.ts       # Generated API client\n│   └── ai-embeddings/\n│       ├── common/                  # Shared utilities\n│       ├── all/                     # Aggregated providers\n│       ├── chroma-bm25/             # BM25 sparse embeddings\n│       ├── cohere/                  # Cohere provider\n│       ├── google-gemini/           # Google Gemini provider\n│       ├── huggingface-server/      # HuggingFace server\n│       ├── jina/                    # Jina AI provider\n│       ├── together-ai/             # Together AI provider\n│       └── voyageai/                # Voyage AI provider\n```\n\n资料来源：[clients/new-js/packages/ai-embeddings/all/package.json:1-45](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/ai-embeddings/all/package.json)\n\n## Module Exports Configuration\n\nBoth client generations support modern JavaScript module resolution with ESM and CommonJS exports.\n\n### Export Structure\n\n```mermaid\ngraph TD\n    A[Package Entry] --> B{Import Type};\n    B -->|ESM import| C[.mjs / .d.ts];\n    B -->|CommonJS require| D[.cjs / .d.cts];\n    C --> E[dist/*.mjs];\n    D --> F[dist/cjs/*.cjs];\n```\n\n| Export Condition | Entry Point | Type Definitions |\n|-------------------|-------------|------------------|\n| ESM `import` | `dist/chromadb.mjs` | `dist/chromadb.d.ts` |\n| CommonJS `require` | `dist/cjs/chromadb.cjs` | `dist/cjs/chromadb.d.cts` |\n\n资料来源：[clients/js/packages/chromadb/package.json:12-25](https://github.com/chroma-core/chroma/blob/main/clients/js/packages/chromadb/package.json)  \n资料来源：[clients/new-js/packages/chromadb/package.json:12-25](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/chromadb/package.json)\n\n## Client Initialization\n\n### Basic Connection\n\n```typescript\nimport { ChromaClient } from \"chromadb\";\n\n// Initialize the client\nconst chroma = new ChromaClient({ \n  path: \"http://localhost:8000\" \n});\n```\n\n资料来源：[clients/js/packages/chromadb-client/README.md:15-20](https://github.com/chroma-core/chroma/blob/main/clients/js/packages/chromadb-client/README.md)\n\n### With Embedding Function\n\n```typescript\nimport { ChromaClient } from 'chromadb';\nimport { TogetherAIEmbeddingFunction } from '@chroma-core/together-ai';\n\nconst embedder = new TogetherAIEmbeddingFunction({\n  apiKey: 'your-api-key',\n  modelName: 'togethercomputer/m2-bert-80M-8k-retrieval',\n});\n\nconst client = new ChromaClient({\n  path: 'http://localhost:8000',\n});\n```\n\n资料来源：[clients/new-js/packages/ai-embeddings/together-ai/README.md:1-35](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/ai-embeddings/together-ai/README.md)\n\n## Collection Operations\n\nCollections are the primary data structure for storing and querying embeddings.\n\n### Create Collection\n\n```typescript\nconst collection = await chroma.createCollection({\n  name: \"my-collection\",\n  embeddingFunction: embedder,  // Optional\n  metadata: {                    // Optional\n    description: \"My document collection\"\n  }\n});\n```\n\n### Add Documents\n\n```typescript\nawait collection.add({\n  ids: [\"id1\", \"id2\"],\n  embeddings: [                  // Optional if embedding function provided\n    [1.1, 2.3, 3.2],\n    [4.5, 6.9, 4.4],\n  ],\n  metadatas: [{ source: \"doc1\" }, { source: \"doc2\" }],\n  documents: [\"Document 1 content\", \"Document 2 content\"],\n});\n```\n\n### Query Collection\n\n```typescript\nconst results = await collection.query({\n  queryEmbeddings: [1.1, 2.3, 3.2],    // Or queryTexts with embedding function\n  queryTexts: [\"Sample query\"],          // Text query (uses embedding function)\n  nResults: 2,                           // Number of results\n  where: { source: \"doc1\" },             // Optional metadata filter\n  include: [\"documents\", \"metadatas\", \"distances\"]\n});\n```\n\n资料来源：[clients/js/packages/chromadb-client/README.md:25-50](https://github.com/chroma-core/chroma/blob/main/clients/js/packages/chromadb-client/README.md)\n\n## Embedding Function Providers\n\nThe new-js client provides first-class support for multiple embedding providers through the `@chroma-core/*` packages.\n\n### Available Providers\n\n| Provider Package | Model Examples | API Required |\n|------------------|----------------|--------------|\n| `@chroma-core/together-ai` | `togethercomputer/m2-bert-80M-8k-retrieval` | Yes |\n| `@chroma-core/voyageai` | `voyage-2` | Yes |\n| `@chroma-core/google-gemini` | `text-embedding-004` | Yes |\n| `@chroma-core/jina` | `jina-embeddings-v2-base-en` | Yes |\n| `@chroma-core/cohere` | Various Cohere models | Yes |\n| `@chroma-core/chroma-bm25` | N/A (local algorithm) | No |\n| `@chroma-core/all` | All providers bundled | Varies |\n\n资料来源：[clients/new-js/packages/ai-embeddings/together-ai/README.md](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/ai-embeddings/together-ai/README.md)  \n资料来源：[clients/new-js/packages/ai-embeddings/voyageai/README.md](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/ai-embeddings/voyageai/README.md)\n\n### Configuration Options\n\nEach embedding function supports common configuration patterns:\n\n```typescript\nconst embedder = new SomeEmbeddingFunction({\n  apiKey: 'your-api-key',          // Or set via environment variable\n  apiKeyEnvVar: 'PROVIDER_API_KEY', // Default env var name\n  modelName: 'provider-model-name', // Provider-specific model\n  // Provider-specific options\n  task: 'retrieval.passage',       // Jina example\n  dimensions: 768,                  // Jina example\n  truncate: true,                   // Jina example\n  normalized: true,                 // Jina example\n});\n```\n\n### Environment Variable Configuration\n\n| Provider | Environment Variable |\n|----------|---------------------|\n| Together AI | `TOGETHER_API_KEY` |\n| Voyage AI | `VOYAGE_API_KEY` |\n| Google Gemini | `GEMINI_API_KEY` |\n| Jina | `JINA_API_KEY` |\n\n资料来源：[clients/new-js/packages/ai-embeddings/jina/README.md:1-45](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/ai-embeddings/jina/README.md)\n\n## Rust Native Bindings\n\nFor performance-critical applications, Chroma provides pre-built Rust native bindings for Node.js.\n\n### Supported Platforms\n\n| Package Name | OS | Architecture | LibC |\n|--------------|-----|--------------|------|\n| `chromadb-js-bindings-darwin-x64` | macOS (Intel) | x64 | N/A |\n| `chromadb-js-bindings-darwin-arm64` | macOS (Apple Silicon) | arm64 | N/A |\n| `chromadb-js-bindings-linux-x64-gnu` | Linux | x64 | glibc |\n| `chromadb-js-bindings-linux-arm64-gnu` | Linux | arm64 | glibc |\n\nAll bindings versions: **1.3.4**  \nMinimum Node.js version: **>= 10**\n\n资料来源：[rust/js_bindings/npm/darwin-x64/package.json:1-18](https://github.com/chroma-core/chroma/blob/main/rust/js_bindings/npm/darwin-x64/package.json)  \n资料来源：[rust/js_bindings/npm/linux-x64-gnu/package.json:1-18](https://github.com/chroma-core/chroma/blob/main/rust/js_bindings/npm/linux-x64-gnu/package.json)\n\n## Build and Development\n\n### Build Scripts\n\n| Command | Description |\n|---------|-------------|\n| `pnpm build` | Build all packages |\n| `pnpm build:core` | Build only `@internal/chromadb-core` |\n| `pnpm build:packages` | Build all packages except core |\n| `pnpm watch` | Watch mode for development |\n| `pnpm test` | Run all tests |\n| `pnpm test:functional` | Run functional tests (excluding auth) |\n\n### New-JS Client Build Configuration\n\n```json\n{\n  \"scripts\": {\n    \"build\": \"tsup\",\n    \"watch\": \"tsup --watch\",\n    \"typecheck\": \"tsc --noEmit\"\n  }\n}\n```\n\nBuild tooling uses **tsup** for efficient bundling with TypeScript support.\n\n资料来源：[clients/new-js/packages/ai-embeddings/common/package.json:18-25](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/ai-embeddings/common/package.json)  \n资料来源：[clients/js/package.json:22-30](https://github.com/chroma-core/chroma/blob/main/clients/js/package.json)\n\n## Choosing a Client Package\n\n```mermaid\ngraph TD\n    A[Start] --> B{Do you need all embedding providers?};\n    B -->|Yes, convenience| C[chromadb v2.4.7<br/>or @chroma-core/all + chromadb v3.4.5];\n    B -->|No, want to minimize bundle| D{Do you have embedding requirements?};\n    D -->|Yes, specific providers| E[chromadb-client v2.4.7<br/>with peer dependencies];\n    D -->|No, just vector storage| F[chromadb-client v2.4.7<br/>or chromadb v3.4.5];\n    C --> G[Include all embedding libraries];\n    E --> H[Only install needed providers];\n    F --> I[No embedding function needed];\n```\n\n### Decision Matrix\n\n| Requirement | Recommended Package |\n|-------------|--------------------|\n| Simple setup, all features | `chromadb` (bundled) |\n| Minimal bundle size | `chromadb-client` with peer deps |\n| Modern architecture | `chromadb` (new-js v3.4.5) |\n| BM25 sparse embeddings | `@chroma-core/chroma-bm25` |\n| Cloud/Remote providers | `@chroma-core/*` packages |\n\n资料来源：[clients/js/examples/node/README.md:1-45](https://github.com/chroma-core/chroma/blob/main/clients/js/examples/node/README.md)\n\n## TypeScript Support\n\nAll JavaScript client packages include full TypeScript type definitions:\n\n```json\n{\n  \"types\": \"dist/chromadb.d.ts\",\n  \"exports\": {\n    \".\": {\n      \"import\": {\n        \"types\": \"./dist/chromadb.d.ts\"\n      },\n      \"require\": {\n        \"types\": \"./dist/cjs/chromadb.d.cts\"\n      }\n    }\n  }\n}\n```\n\nThe TypeScript minimum version requirement is **^5.0.4** for the legacy client and **^5.3.3** for new-js packages.\n\n资料来源：[clients/js/packages/chromadb/package.json:8](https://github.com/chroma-core/chroma/blob/main/clients/js/packages/chromadb/package.json)  \n资料来源：[clients/new-js/packages/ai-embeddings/common/package.json:30](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/ai-embeddings/common/package.json)\n\n## Dependencies\n\n### Core Dependencies\n\n| Package | Version | Purpose |\n|---------|---------|---------|\n| `isomorphic-fetch` | ^3.0.0 | HTTP client for browser/Node.js |\n| `ajv` | ^8.12.0 / ^8.17.1 | JSON schema validation |\n| `cliui` | ^8.0.1 | CLI utilities |\n\n### Node.js Compatibility\n\n| Package Generation | Minimum Node.js |\n|--------------------|-----------------|\n| Legacy (v2.x) | >= 14.17.0 |\n| New-JS (v3.x) | >= 20 |\n| Rust Bindings | >= 10 |\n\n资料来源：[clients/js/packages/chromadb-client/package.json:50-55](https://github.com/chroma-core/chroma/blob/main/clients/js/packages/chromadb-client/package.json)  \n资料来源：[clients/new-js/packages/ai-embeddings/common/package.json:35-38](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/ai-embeddings/common/package.json)\n\n---\n\n<a id='rust-services-architecture'></a>\n\n## Rust Backend Services Architecture\n\n### 相关页面\n\n相关主题：[System Architecture Overview](#architecture-overview), [Data Storage & Blockstore](#data-storage-blockstore)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [rust/blockstore/src/arrow/root.rs](https://github.com/chroma-core/chroma/blob/main/rust/blockstore/src/arrow/root.rs)\n- [rust/blockstore/src/arrow/block/types.rs](https://github.com/chroma-core/chroma/blob/main/rust/blockstore/src/arrow/block/types.rs)\n- [rust/blockstore/src/arrow/provider.rs](https://github.com/chroma-core/chroma/blob/main/rust/blockstore/src/arrow/provider.rs)\n- [rust/types/src/execution/operator.rs](https://github.com/chroma-core/chroma/blob/main/rust/types/src/execution/operator.rs)\n- [rust/types/src/api_types.rs](https://github.com/chroma-core/chroma/blob/main/rust/types/src/api_types.rs)\n- [rust/types/src/topology.rs](https://github.com/chroma-core/chroma/blob/main/rust/types/src/topology.rs)\n- [rust/types/src/collection_schema.rs](https://github.com/chroma-core/chroma/blob/main/rust/types/src/collection_schema.rs)\n- [rust/types/src/sparse_posting_block.rs](https://github.com/chroma-core/chroma/blob/main/rust/types/src/sparse_posting_block.rs)\n- [rust/index/src/spann/types.rs](https://github.com/chroma-core/chroma/blob/main/rust/index/src/spann/types.rs)\n- [rust/worker/src/work_queue/work_queue_client.rs](https://github.com/chroma-core/chroma/blob/main/rust/worker/src/work_queue/work_queue_client.rs)\n- [rust/worker/src/execution/orchestration/apply_logs_orchestrator.rs](https://github.com/chroma-core/chroma/blob/main/rust/worker/src/execution/orchestration/apply_logs_orchestrator.rs)\n- [rust/worker/src/execution/orchestration/knn_filter.rs](https://github.com/chroma-core/chroma/blob/main/rust/worker/src/execution/orchestration/knn_filter.rs)\n</details>\n\n# Rust Backend Services Architecture\n\n## Overview\n\nThe Chroma Rust backend provides a high-performance, scalable vector database service built entirely in Rust. The architecture follows a distributed systems design with multiple specialized services working together to handle embedding storage, indexing, and similarity search operations.\n\n### Design Goals\n\n| Goal | Description |\n|------|-------------|\n| High Performance | Arrow-based columnar storage for efficient data access |\n| Scalability | Multi-cloud, multi-region deployment support |\n| Reliability | Comprehensive error handling with typed error codes |\n| Flexibility | Multiple index types (HNSW, Spann, Inverted) |\n| Consistency | Ordered and unordered mutation ordering options |\n\n### Core Service Components\n\n```mermaid\ngraph TD\n    subgraph \"Rust Backend Services\"\n        W[Worker Service]\n        BS[Blockstore Service]\n        SYS[Sysdb Service]\n        LOG[Log Service]\n    end\n    \n    W --> BS\n    W --> SYS\n    W --> LOG\n```\n\n## Blockstore Architecture\n\nThe blockstore is the core storage layer in Chroma's Rust backend, providing persistent storage for vector embeddings and associated metadata using Arrow columnar format.\n\n### Arrow-Based Storage\n\nChroma uses Apache Arrow as its primary storage format, which provides:\n\n- **Columnar Layout**: Efficient analytic queries by column\n- **Zero-Copy Reads**: Memory-mapped access patterns\n- **Cross-Language Interop**: Standardized binary format\n- **Compression Support**: Built-in encoding/decoding\n\n资料来源：[rust/blockstore/src/arrow/root.rs:1-40]()\n\n### Blockfile Structure\n\n```mermaid\ngraph TD\n    subgraph \"Blockfile Components\"\n        BF[Blockfile]\n        BR[Block Reader]\n        BW[Block Writer]\n        RM[Root Manager]\n        BM[Block Manager]\n    end\n    \n    BF --> BR\n    BF --> BW\n    BW --> RM\n    BR --> BM\n```\n\n#### Root Management\n\nThe `Root` component manages the root directory structure and file operations:\n\n```rust\npub(super) fn get_all_block_ids_from_bytes(\n    bytes: &[u8],\n    id: Uuid,\n) -> Result<Vec<Uuid>, FromBytesError>\n```\n\nKey responsibilities:\n- Reading Arrow IPC files\n- Extracting block metadata and IDs\n- Version validation and verification\n\n资料来源：[rust/blockstore/src/arrow/root.rs:28-50]()\n\n#### Block Layout Verification\n\nThe block layout verification ensures data integrity:\n\n```rust\n#[derive(Error, Debug)]\npub enum ArrowLayoutVerificationError {\n    #[error(\"Buffer length is not 64 byte aligned\")]\n    BufferLengthNotAligned,\n    #[error(\"No record batches in footer\")]\n    NoRecordBatches,\n    #[error(\"More than one record batch in IPC file\")]\n    MultipleRecordBatches,\n    #[error(\"Invalid message type\")]\n    InvalidMessageType,\n}\n```\n\n资料来源：[rust/blockstore/src/arrow/block/types.rs:1-30]()\n\n| Error Type | Error Code | Severity |\n|------------|------------|----------|\n| `BufferLengthNotAligned` | Internal | High |\n| `NoRecordBatches` | Internal | High |\n| `MultipleRecordBatches` | Internal | Medium |\n| `InvalidMessageType` | Internal | High |\n| `RecordBatchDecodeError` | Internal | High |\n\n### Blockfile Writer Types\n\nChroma supports two mutation ordering strategies:\n\n| Ordering Type | Description | Use Case |\n|--------------|-------------|----------|\n| `Ordered` | Sequential writes with guaranteed order | Consistent state |\n| `Unordered` | Parallel writes for throughput | High-volume ingestion |\n\n资料来源：[rust/blockstore/src/arrow/provider.rs:1-50]()\n\n```rust\nmatch options.mutation_ordering {\n    BlockfileWriterMutationOrdering::Ordered => {\n        let file = ArrowOrderedBlockfileWriter::from_root(...);\n        Ok(BlockfileWriter::ArrowOrderedBlockfileWriter(file))\n    }\n    BlockfileWriterMutationOrdering::Unordered => {\n        let file = ArrowUnorderedBlockfileWriter::from_root(...);\n        Ok(BlockfileWriter::ArrowUnorderedBlockfileWriter(file))\n    }\n}\n```\n\n### Forking and Versioning\n\nBlockfiles support forking for snapshot isolation:\n\n```rust\nlet new_root = self\n    .root_manager\n    .fork::<K>(\n        &fork_from,\n        new_id,\n        &options.prefix_path,\n        self.block_manager.default_max_block_size_bytes(),\n    )\n    .await\n```\n\n资料来源：[rust/blockstore/src/arrow/provider.rs:1-30]()\n\n## Type System\n\n### Query Result Types\n\nThe execution layer uses a rich type system for search results:\n\n```rust\n#[derive(Clone, Debug, Default)]\npub struct SearchPayloadResult {\n    pub records: Vec<SearchRecord>,\n}\n```\n\n资料来源：[rust/types/src/execution/operator.rs:1-20]()\n\n#### Search Results Structure\n\n```mermaid\ngraph LR\n    SR[SearchResult] --> SPR[SearchPayloadResult]\n    SPR --> SR_vec[Vec<SearchRecord>]\n    SR --> PLB[pulled_log_bytes]\n```\n\n| Field | Type | Description |\n|-------|------|-------------|\n| `results` | `Vec<SearchPayloadResult>` | Per-query search results |\n| `pulled_log_bytes` | `u64` | Total log bytes fetched for metrics |\n\n### Include Enum\n\nThe `Include` enum controls which fields are returned in query results:\n\n```rust\npub enum Include {\n    #[serde(rename = \"distances\")]\n    Distance,\n    #[serde(rename = \"documents\")]\n    Document,\n    #[serde(rename = \"embeddings\")]\n    Embedding,\n    #[serde(rename = \"metadatas\")]\n    Metadata,\n    #[serde(rename = \"uris\")]\n    Uri,\n}\n```\n\n资料来源：[rust/types/src/api_types.rs:1-30]()\n\n| Include Value | Returned Field | Default Query |\n|---------------|----------------|---------------|\n| `distances` | Distance scores | ✓ |\n| `documents` | Text content | ✓ |\n| `embeddings` | Vector data | ✗ |\n| `metadatas` | Metadata objects | ✓ |\n| `uris` | Resource URIs | ✗ |\n\n#### IncludeList Helper Methods\n\n```rust\nimpl IncludeList {\n    pub fn empty() -> Self { Self(Vec::new()) }\n    \n    pub fn default_query() -> Self {\n        Self(vec![Include::Document, Include::Metadata, Include::Distance])\n    }\n    \n    pub fn default_get() -> Self {\n        Self(vec![Include::Document, Include::Metadata])\n    }\n    \n    pub fn all() -> Self {\n        Self(vec![Include::Document, Include::Metadata, Include::Distance, \n                  Include::Embedding, Include::Uri])\n    }\n}\n```\n\n资料来源：[rust/types/src/api_types.rs:1-60]()\n\n### Key Filter System\n\nThe `Key` enum represents filterable fields in metadata queries:\n\n```rust\npub enum Key {\n    Document,\n    Embedding,\n    Metadata,\n    Score,\n    MetadataField(String),\n}\n```\n\n资料来源：[rust/types/src/operator.rs:1-30]()\n\n| Key | Purpose | Example |\n|-----|---------|---------|\n| `#document` | Document content | `Key::Document` |\n| `#embedding` | Vector data | `Key::Embedding` |\n| `#metadata` | All metadata | `Key::Metadata` |\n| `#score` | Similarity score | `Key::Score` |\n| `field_name` | Custom metadata | `Key::MetadataField(\"status\")` |\n\n#### Key Factory Methods\n\n```rust\nimpl Key {\n    /// Creates a Key for a custom metadata field\n    pub fn field(name: impl Into<String>) -> Self {\n        Key::MetadataField(name.into())\n    }\n    \n    /// Creates an equality filter: `field == value`\n    pub fn eq(self, value: impl Into<MetadataValue>) -> ComparisonValue { ... }\n}\n```\n\n## Index Architecture\n\n### Spann Index\n\nSpann is Chroma's sparse vector index implementation combining HNSW with posting lists:\n\n```rust\n#[derive(Clone, Debug)]\npub struct SpannIndexReader<'me> {\n    pub posting_lists: BlockfileReader<'me, u32, SpannPostingList<'me>>,\n    pub hnsw_index: HnswIndexRef,\n    pub versions_map: BlockfileReader<'me, u32, u32>,\n    pub dimensionality: usize,\n    pub adaptive_search_nprobe: bool,\n    pub params: InternalSpannConfiguration,\n}\n```\n\n资料来源：[rust/index/src/spann/types.rs:1-30]()\n\n#### Spann Index Structure\n\n```mermaid\ngraph TD\n    subgraph \"Spann Index\"\n        SPI[SpannIndexReader]\n        HNSW[HNSW Index]\n        PL[Posting Lists]\n        VM[Versions Map]\n    end\n    \n    SPI --> HNSW\n    SPI --> PL\n    SPI --> VM\n```\n\n| Component | Type | Purpose |\n|-----------|------|---------|\n| `hnsw_index` | `HnswIndexRef` | Approximate nearest neighbor search |\n| `posting_lists` | `BlockfileReader<u32, SpannPostingList>` | Document postings |\n| `versions_map` | `BlockfileReader<u32, u32>` | Document versioning |\n| `adaptive_search_nprobe` | `bool` | Adaptive parameter tuning |\n\n### Sparse Posting Block\n\nThe sparse posting block implements an inverted index structure:\n\n```rust\n#[derive(Debug, Clone)]\npub struct DirectoryBlock(SparsePostingBlock);\n\nimpl DirectoryBlock {\n    pub fn new(max_offsets: &[u32], max_weights: &[f32]) \n        -> Result<Self, SparsePostingBlockError>\n}\n```\n\n资料来源：[rust/types/src/sparse_posting_block.rs:1-40]()\n\n| Field | Type | Description |\n|-------|------|-------------|\n| `max_offset` | `u32` | Largest doc offset in posting block |\n| `max_weight` | `f32` | Maximum weight for term pruning |\n\n## Schema and Index Configuration\n\n### Collection Schema\n\nThe schema system supports multiple index types:\n\n```rust\npub struct Schema {\n    pub fn create_index(\n        mut self,\n        key: Option<&str>,\n        config: IndexConfig,\n    ) -> Result<Self, SchemaBuilderError>\n}\n```\n\n资料来源：[rust/types/src/collection_schema.rs:1-50]()\n\n| Index Type | Key | Description |\n|------------|-----|-------------|\n| `VectorIndexConfig` | `None` | Global vector index (HNSW/Spann) |\n| `StringInvertedIndexConfig` | `Some(field)` | Field-specific FTS |\n| `SparseVectorIndexConfig` | `Some(field)` | Sparse vector index |\n\n### Index Configuration\n\n```rust\npub struct VectorIndexConfig {\n    pub space: Option<Space>,\n    pub embedding_function: Option<EmbeddingFunctionId>,\n    pub source_key: Option<Key>,\n    pub hnsw: Option<HnswConfig>,\n    pub spann: Option<SpannConfig>,\n}\n```\n\n| Parameter | Type | Default | Description |\n|-----------|------|---------|-------------|\n| `space` | `Option<Space>` | `None` | Vector space (Cosine, L2, etc.) |\n| `embedding_function` | `Option<EFId>` | `None` | Embedding function ID |\n| `hnsw` | `Option<HnswConfig>` | `None` | HNSW parameters |\n| `spann` | `Option<SpannConfig>` | `None` | Spann parameters |\n\n## Worker Service Architecture\n\n### Work Queue Client\n\nThe work queue client manages distributed task execution:\n\n```rust\npub enum WorkQueueClientError {\n    ConnectionError(#[from] tonic::Status),\n    RequestError(#[from] tonic::Status),\n}\n```\n\n资料来源：[rust/worker/src/work_queue/work_queue_client.rs:1-20]()\n\n#### Error Code Mapping\n\n| gRPC Code | Chroma Error Code |\n|-----------|-------------------|\n| `Unavailable` | `Unavailable` |\n| `DeadlineExceeded` | `DeadlineExceeded` |\n| `ResourceExhausted` | `ResourceExhausted` |\n| `InvalidArgument` | `InvalidArgument` |\n| `NotFound` | `NotFound` |\n| `PermissionDenied` | `PermissionDenied` |\n\n### Apply Logs Orchestrator\n\nThe apply logs orchestrator handles log-based data synchronization:\n\n```rust\n#[derive(Debug)]\npub struct ApplyLogsOrchestratorResponse {\n    pub job_id: JobId,\n    pub total_records_post_compaction: u64,\n    pub flush_results: Vec<SegmentFlushInfo>,\n    pub collection_logical_size_bytes: u64,\n}\n```\n\n资料来源：[rust/worker/src/execution/orchestration/apply_logs_orchestrator.rs:1-50]()\n\n### KNN Filter Architecture\n\nThe KNN filter orchestrates vector similarity search:\n\n```mermaid\ngraph TD\n    subgraph \"KNN Query Pipeline\"\n        Q[Query Request]\n        F[Filter Logs]\n        K[KNN Search]\n        R[Results]\n    end\n    \n    Q --> F\n    F --> K\n    K --> R\n```\n\n#### KNN Error Handling\n\n```rust\npub enum KnnError {\n    QuantizedSpannCenterSearch(QuantizedSpannError),\n    QuantizedSpannLoadCenter(QuantizedSpannError),\n    InvalidDistanceFunction,\n    Aborted,\n    InvalidSchema(#[from] SchemaError),\n}\n```\n\n资料来源：[rust/worker/src/execution/orchestration/knn_filter.rs:1-40]()\n\n| Error Type | Error Code | Traced |\n|-----------|------------|--------|\n| `QuantizedSpannCenterSearch` | From inner | ✓ |\n| `InvalidDistanceFunction` | `InvalidArgument` | ✗ |\n| `Aborted` | `ResourceExhausted` | ✗ |\n| `Result(_)` | `Internal` | ✓ |\n\n### KNN Filter Output\n\n```rust\n#[derive(Clone, Debug)]\npub struct KnnFilterOutput {\n    pub logs: FetchLogOutput,\n    pub fetch_log_bytes: u64,\n    pub filter_output: FilterOutput,\n    pub dimension: usize,\n    pub distance_function: DistanceFunction,\n}\n```\n\n## Multi-Cloud Topology\n\nChroma supports multi-cloud and multi-region deployments:\n\n```rust\npub struct ProviderRegion<T: Clone + Debug> {\n    pub name: RegionName,\n    pub provider: String,\n    pub region: String,\n    pub config: T,\n}\n```\n\n资料来源：[rust/types/src/topology.rs:1-30]()\n\n### Topology Structure\n\n```mermaid\ngraph TD\n    subgraph \"Multi-Cloud Topology\"\n        Config[Configuration]\n        Topologies[Vec<Topology>]\n        Regions[Vec<ProviderRegion>]\n        Preferred[Preferred Region]\n    end\n    \n    Config --> Topologies\n    Config --> Regions\n    Config --> Preferred\n```\n\n### Configuration Schema\n\n```rust\nstruct RawMultiCloudMultiRegionConfiguration<R, T> {\n    preferred: RegionName,\n    regions: Vec<ProviderRegion<R>>,\n    topologies: Vec<Topology<T>>,\n}\n```\n\n| Field | Type | Description |\n|-------|------|-------------|\n| `preferred` | `RegionName` | Default region for operations |\n| `regions` | `Vec<ProviderRegion>` | Available cloud regions |\n| `topologies` | `Vec<Topology>` | Topology configurations |\n\n## Error Handling Framework\n\n### Chroma Error Traits\n\nAll errors implement the `ChromaError` trait:\n\n```rust\npub trait ChromaError: std::error::Error {\n    fn code(&self) -> ErrorCodes;\n    fn should_trace_error(&self) -> bool;\n}\n```\n\n### Error Code Registry\n\n| Code | Category | Description |\n|------|----------|-------------|\n| `InvalidArgument` | Client | Malformed request |\n| `NotFound` | Client | Resource missing |\n| `AlreadyExists` | Client | Duplicate resource |\n| `PermissionDenied` | Security | Access denied |\n| `ResourceExhausted` | Rate | Quota exceeded |\n| `Internal` | Server | System error |\n\n## CLI Integration\n\nThe Rust CLI provides management commands:\n\n```rust\npub enum Command {\n    Browse(BrowseArgs),\n    Copy(CopyArgs),\n    Db(DbSubcommand),\n    Docs,\n    Install(InstallArgs),\n    Login(LoginArgs),\n    Profile(ProfileSubcommand),\n    Run(RunArgs),\n    Support,\n    Update,\n    Vacuum(VacuumArgs),\n}\n```\n\n资料来源：[rust/cli/src/lib.rs:1-30]()\n\n### Available Commands\n\n| Command | Description |\n|---------|-------------|\n| `browse` | Open web interface |\n| `copy` | Copy data between collections |\n| `db` | Database operations |\n| `docs` | Open documentation |\n| `install` | Install Chroma |\n| `login` | Authenticate user |\n| `profile` | Performance profiling |\n| `run` | Start Chroma server |\n| `support` | Open support resources |\n| `update` | Update installation |\n| `vacuum` | Compact storage |\n\n## See Also\n\n- [Blockstore Provider Configuration](rust/blockstore/src/arrow/provider.rs)\n- [Query API Types](rust/types/src/api_types.rs)\n- [Index Implementations](rust/index/src/spann/types.rs)\n- [Worker Execution](rust/worker/src/execution/orchestration/)\n\n---\n\n<a id='go-coordinator'></a>\n\n## Go Coordinator & Distributed Systems\n\n### 相关页面\n\n相关主题：[System Architecture Overview](#architecture-overview)\n\nI cannot generate this wiki page as specified. The `<details>` block lists Go source files, but the retrieved repository context contains only Rust implementation files - there are no Go coordinator or distributed systems files in the provided context.\n\nThe context includes:\n- `rust/worker/src/execution/orchestration/` - Orchestrator implementations (ApplyLogsOrchestrator, LogFetchOrchestrator, RegisterOrchestrator)\n- `rust/worker/src/compactor/scheduler.rs` - Compaction scheduler\n- `rust/types/src/topology.rs` - Topology and region management\n- `rust/blockstore/src/arrow/` - Arrow block storage\n- `rust/worker/src/work_queue/work_queue_client.rs` - Work queue client\n\nTo generate a wiki page about **Go Coordinator & Distributed Systems**, I would need access to:\n- `go/pkg/sysdb/coordinator/coordinator.go`\n- `go/pkg/memberlist_manager/memberlist_manager.go`\n- `go/pkg/leader/election.go`\n- `go/cmd/coordinator/main.go`\n\nWould you like me to:\n1. Generate a wiki page about the Rust-based distributed systems components that ARE present in the context (orchestrators, scheduler, topology)?\n2. Or provide guidance on how to retrieve the missing Go source files from the repository?\n\n---\n\n<a id='data-storage-blockstore'></a>\n\n## Data Storage & Blockstore\n\n### 相关页面\n\n相关主题：[Rust Backend Services Architecture](#rust-services-architecture), [Embedding Functions Integration](#embedding-functions)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [rust/blockstore/src/arrow/blockfile.rs](https://github.com/chroma-core/chroma/blob/main/rust/blockstore/src/arrow/blockfile.rs)\n- [rust/blockstore/src/arrow/provider.rs](https://github.com/chroma-core/chroma/blob/main/rust/blockstore/src/arrow/provider.rs)\n- [rust/blockstore/src/types/reader.rs](https://github.com/chroma-core/chroma/blob/main/rust/blockstore/src/types/reader.rs)\n- [rust/blockstore/src/types/writer.rs](https://github.com/chroma-core/chroma/blob/main/rust/blockstore/src/types/writer.rs)\n- [rust/blockstore/src/provider.rs](https://github.com/chroma-core/chroma/blob/main/rust/blockstore/src/provider.rs)\n- [rust/blockstore/src/arrow/block/types.rs](https://github.com/chroma-core/chroma/blob/main/rust/blockstore/src/arrow/block/types.rs)\n- [rust/blockstore/src/arrow/root.rs](https://github.com/chroma-core/chroma/blob/main/rust/blockstore/src/arrow/root.rs)\n- [rust/blockstore/src/memory/provider.rs](https://github.com/chroma-core/chroma/blob/main/rust/blockstore/src/memory/provider.rs)\n- [rust/blockstore/src/arrow/ordered_blockfile_writer.rs](https://github.com/chroma-core/chroma/blob/main/rust/blockstore/src/arrow/ordered_blockfile_writer.rs)\n</details>\n\n# Data Storage & Blockstore\n\n## Overview\n\nThe Chroma blockstore is the core storage subsystem responsible for persisting vector embeddings, metadata, and related data structures. It provides a unified abstraction layer over different storage backends (in-memory and Arrow-based) while maintaining performance characteristics suitable for high-throughput vector database operations.\n\nThe blockstore system is architected around the concept of **blockfiles** — persistent, columnar storage structures that organize data by prefix-based partitioning and support efficient key-value operations.\n\n## Architecture\n\n```mermaid\ngraph TD\n    subgraph \"Public API Layer\"\n        BP[BlockfileProvider]\n        BR[BlockfileReader]\n        BW[BlockfileWriter]\n        BF[BlockfileFlusher]\n    end\n\n    subgraph \"Implementation Layer\"\n        ABP[ArrowBlockfileProvider]\n        MBP[MemoryBlockfileProvider]\n        ABF[ArrowUnorderedBlockfileWriter]\n        ABO[ArrowOrderedBlockfileWriter]\n    end\n\n    subgraph \"Storage Layer\"\n        BM[BlockManager]\n        RM[RootManager]\n        ST[Storage]\n    end\n\n    subgraph \"Arrow Format\"\n        R[Root]\n        SB[Sparse Index]\n        B[Blocks]\n    end\n\n    BP --> ABP\n    BP --> MBP\n    BR --> ABP\n    BR --> MBP\n    BW --> ABF\n    BW --> ABO\n\n    ABP --> BM\n    ABP --> RM\n    ABF --> BM\n    ABF --> RM\n    ABO --> BM\n    ABO --> RM\n    BM --> ST\n    RM --> ST\n\n    RM --> R\n    R --> SB\n    R --> B\n```\n\n## Core Components\n\n### BlockfileProvider\n\nThe `BlockfileProvider` is the main entry point for creating readers and writers. It abstracts the underlying storage implementation and provides factory methods for blockfile operations.\n\n**Variants:**\n\n| Provider Type | Description | Use Case |\n|---------------|-------------|----------|\n| `HashMapBlockfileProvider` | In-memory blockfile storage | Testing, ephemeral data |\n| `ArrowBlockfileProvider` | Persistent Arrow-based storage | Production workloads |\n\n**API Methods:**\n\n```rust\npub fn storage(&self) -> Option<Arc<Storage>> {\n    match self {\n        BlockfileProvider::ArrowBlockfileProvider(provider) => Some(provider.storage().clone()),\n        BlockfileProvider::HashMapBlockfileProvider(_) => None,\n    }\n}\n\npub fn new_memory() -> Self {\n    BlockfileProvider::HashMapBlockfileProvider(MemoryBlockfileProvider::new())\n}\n```\n\n资料来源：[rust/blockstore/src/provider.rs:1-30](https://github.com/chroma-core/chroma/blob/main/rust/blockstore/src/provider.rs)\n\n### BlockfileReader\n\nThe `BlockfileReader` trait provides read access to stored data. It supports generic key and value types that implement the `ReadKey` and `ReadValue` traits.\n\n**Trait Definition:**\n\n```rust\npub trait ReadKey<'a>:\n    Key\n    + Into<KeyWrapper>\n    + TryFrom<&'a KeyWrapper, Error = InvalidKeyConversion>\n    + ArrowReadableKey<'a>\n    + Sync\n    + 'a\n{}\n\npub trait ReadValue<'a>: Value + Readable<'a> + ArrowReadableValue<'a> + Sync + 'a {}\n```\n\n资料来源：[rust/blockstore/src/provider.rs:40-55](https://github.com/chroma-core/chroma/blob/main/rust/blockstore/src/provider.rs)\n\n### BlockfileWriter\n\nThe `BlockfileWriter` trait provides write access to blockfiles with support for ordered and unordered mutation patterns.\n\n**Core Operations:**\n\n| Method | Signature | Description |\n|--------|-----------|-------------|\n| `set` | `set(prefix, key, value)` | Insert or update a key-value pair |\n| `delete` | `delete(prefix, key)` | Remove a key-value pair |\n| `commit` | `commit()` | Finalize and persist the writer |\n\n```rust\npub async fn set<\n    K: Key + Into<KeyWrapper> + ArrowWriteableKey,\n    V: Value + Writeable + ArrowWriteableValue,\n>(\n    &self,\n    prefix: &str,\n    key: K,\n    value: V,\n) -> Result<(), Box<dyn ChromaError>>\n```\n\n资料来源：[rust/blockstore/src/types/writer.rs:50-75](https://github.com/chroma-core/chroma/blob/main/rust/blockstore/src/types/writer.rs)\n\n## Arrow Blockfile Implementation\n\nThe Arrow-based blockfile is the primary production storage implementation, providing efficient columnar storage with Arrow IPC format.\n\n### Blockfile Structure\n\n```mermaid\ngraph TD\n    R[Root File<br/>Root Writer] --> SB[Sparse Index<br/>Block Key Mapping]\n    R --> BH[Block Header<br/>Metadata]\n    \n    SB --> B1[Block 1<br/>Arrow IPC]\n    SB --> B2[Block 2<br/>Arrow IPC]\n    SB --> BN[Block N<br/>Arrow IPC]\n    \n    B1 --> P1[Prefix: \"vec_1\"]\n    B1 --> P2[Prefix: \"vec_2\"]\n```\n\n### ArrowBlockfileProvider\n\nThe `ArrowBlockfileProvider` manages the lifecycle of blockfiles using Arrow IPC format with a root-sparse index architecture.\n\n**Key Features:**\n\n- **Fork Support**: Create new blockfiles from existing ones via forking\n- **CMEK Support**: Optional Customer-Managed Encryption Keys\n- **Block Size Management**: Configurable maximum block sizes\n\n```rust\npub async fn write<K: Key + ArrowWriteableKey, V: ArrowWriteableValue>(\n    &self,\n    options: BlockfileWriterOptions,\n) -> Result<BlockfileWriter, Box<CreateError>>\n```\n\n资料来源：[rust/blockstore/src/arrow/provider.rs:1-50](https://github.com/chroma-core/chroma/blob/main/rust/blockstore/src/arrow/provider.rs)\n\n### Writer Types\n\n#### ArrowUnorderedBlockfileWriter\n\nProvides high-performance unordered writes optimized for bulk insertion scenarios.\n\n```rust\nimpl ArrowUnorderedBlockfileWriter {\n    pub(super) fn new<K: ArrowWriteableKey, V: ArrowWriteableValue>(\n        id: Uuid,\n        prefix_path: &str,\n        block_manager: BlockManager,\n        root_manager: RootManager,\n        max_block_size_bytes: usize,\n        cmek: Option<Cmek>,\n    ) -> Self\n}\n```\n\n资料来源：[rust/blockstore/src/arrow/blockfile.rs:50-80](https://github.com/chroma-core/chroma/blob/main/rust/blockstore/src/arrow/blockfile.rs)\n\n#### ArrowOrderedBlockfileWriter\n\nMaintains key ordering within blocks, optimized for range queries and ordered iteration.\n\n资料来源：[rust/blockstore/src/arrow/ordered_blockfile_writer.rs:1-50](https://github.com/chroma-core/chroma/blob/main/rust/blockstore/src/arrow/ordered_blockfile_writer.rs)\n\n### BlockManager and RootManager\n\n| Component | Responsibility |\n|-----------|----------------|\n| `BlockManager` | Manages individual data blocks, handles block creation and commitment |\n| `RootManager` | Manages root files containing sparse indices and metadata |\n\n```rust\n// Forking a new root from an existing one\nlet new_root = self\n    .root_manager\n    .fork::<K>(\n        &fork_from,\n        new_id,\n        &options.prefix_path,\n        self.block_manager.default_max_block_size_bytes(),\n    )\n    .await\n```\n\n资料来源：[rust/blockstore/src/arrow/provider.rs:45-70](https://github.com/chroma-core/chroma/blob/main/rust/blockstore/src/arrow/provider.rs)\n\n## Error Handling\n\n### Error Types\n\n| Error Type | Description | Error Code |\n|------------|-------------|------------|\n| `BlockNotFound` | Requested block does not exist | Internal |\n| `BlockFetchError` | Failed to retrieve block from storage | Internal |\n| `MigrationError` | Blockfile migration failed | Internal |\n| `IOError` | Storage I/O operation failed | Internal |\n| `ArrowError` | Arrow IPC parsing/encoding error | Internal |\n| `NoRecordBatches` | Invalid Arrow file structure | Internal |\n\n```rust\n#[derive(Error, Debug)]\npub enum ArrowBlockfileError {\n    #[error(\"Block not found\")]\n    BlockNotFound,\n    #[error(\"Could not fetch block\")]\n    BlockFetchError(#[from] GetError),\n    #[error(\"Could not migrate blockfile to new version\")]\n    MigrationError(#[from] MigrationError),\n}\n```\n\n资料来源：[rust/blockstore/src/arrow/blockfile.rs:25-40](https://github.com/chroma-core/chroma/blob/main/rust/blockstore/src/arrow/blockfile.rs)\n\n### Layout Verification\n\nThe system validates Arrow file layouts to ensure data integrity:\n\n```rust\n#[derive(Error, Debug)]\npub enum ArrowLayoutVerificationError {\n    #[error(\"Buffer length is not 64 byte aligned\")]\n    BufferLengthNotAligned,\n    #[error(\"No record batches in footer\")]\n    NoRecordBatches,\n    #[error(\"More than one record batch in IPC file\")]\n    MultipleRecordBatches,\n    #[error(\"Invalid message type\")]\n    InvalidMessageType,\n}\n```\n\n资料来源：[rust/blockstore/src/arrow/block/types.rs:40-60](https://github.com/chroma-core/chroma/blob/main/rust/blockstore/src/arrow/block/types.rs)\n\n## Storage Operations\n\n### Write Flow\n\n```mermaid\nsequenceDiagram\n    participant Client\n    participant Provider as BlockfileProvider\n    participant Writer as BlockfileWriter\n    participant BM as BlockManager\n    participant RM as RootManager\n    participant Storage\n\n    Client->>Provider: write(options)\n    Provider->>Writer: create_writer()\n    Provider->>RM: create/fork_root()\n    Client->>Writer: set(prefix, key, value)\n    Writer->>BM: create_block()\n    loop Until flush\n        Writer->>Writer: accumulate_data()\n    end\n    Client->>Writer: commit()\n    Writer->>BM: commit_block()\n    Writer->>RM: update_root()\n    RM->>Storage: persist()\n    BM->>Storage: persist()\n```\n\n### Read Flow\n\n```mermaid\nsequenceDiagram\n    participant Client\n    participant Reader as BlockfileReader\n    participant RM as RootManager\n    participant BM as BlockManager\n    participant Storage\n\n    Client->>Reader: get(prefix, key)\n    Reader->>RM: get_block_ids()\n    RM->>Reader: block_id_list\n    loop For each block\n        Reader->>BM: get_block(id)\n        BM->>Storage: read()\n        Storage->>Reader: block_data\n    end\n    Reader->>Reader: search_blocks()\n    Reader->>Client: value\n```\n\n## Configuration Options\n\n### BlockfileWriterOptions\n\n| Option | Type | Default | Description |\n|--------|------|---------|-------------|\n| `prefix_path` | `String` | Required | Path prefix for storage |\n| `max_block_size_bytes` | `usize` | Provider default | Maximum size per block |\n| `mutation_ordering` | `BlockfileWriterMutationOrdering` | `Ordered` | Write ordering mode |\n| `fork_from` | `Option<Uuid>` | `None` | Source blockfile ID for forking |\n| `cmek` | `Option<Cmek>` | `None` | Customer-managed encryption key |\n\n```rust\nlet mut bf_options = BlockfileWriterOptions::new(prefix_path.to_string())\n    .max_block_size_bytes(pl_block_size);\nbf_options = bf_options.unordered_mutations();\nif let Some(cmek) = cmek {\n    bf_options = bf_options.with_cmek(cmek);\n}\n```\n\n资料来源：[rust/blockstore/src/arrow/provider.rs:90-110](https://github.com/chroma-core/chroma/blob/main/rust/blockstore/src/arrow/provider.rs)\n\n## Memory Blockfile\n\nFor testing and ephemeral use cases, Chroma provides an in-memory blockfile implementation:\n\n```rust\npub fn new_memory() -> Self {\n    BlockfileProvider::HashMapBlockfileProvider(MemoryBlockfileProvider::new())\n}\n```\n\n**Limitations:**\n- No persistence\n- No fork support\n- Limited to unordered mutations\n\n```rust\nif options.fork_from.is_some() {\n    unimplemented!();\n}\n```\n\n资料来源：[rust/blockstore/src/memory/provider.rs:40-55](https://github.com/chroma-core/chroma/blob/main/rust/blockstore/src/memory/provider.rs)\n\n## Block Reading\n\n### RootReader\n\nThe `RootReader` is responsible for reading block metadata and identifying which blocks contain specific data:\n\n```rust\nimpl RootReader {\n    pub(super) fn get_all_block_ids_from_bytes(\n        bytes: &[u8],\n        id: Uuid,\n    ) -> Result<Vec<Uuid>, FromBytesError> {\n        let mut cursor = std::io::Cursor::new(bytes);\n        let arrow_reader = arrow::ipc::reader::FileReader::try_new(&mut cursor, None);\n        \n        let record_batch = match arrow_reader {\n            Ok(mut reader) => match reader.next() {\n                Some(Ok(batch)) => batch,\n                Some(Err(e)) => return Err(FromBytesError::ArrowError(e)),\n                None => return Err(FromBytesError::NoDataError),\n            },\n            Err(e) => return Err(FromBytesError::ArrowError(e)),\n        };\n        \n        let (version, read_id) = Self::version_and_id_from_record_batch(&record_batch, id)?;\n        if read_id != id {\n            return Err(FromBytesError::IdMismatch);\n        }\n        \n        Self::block_ids_from_record_batch(&record_batch, version)\n    }\n}\n```\n\n资料来源：[rust/blockstore/src/arrow/root.rs:20-55](https://github.com/chroma-core/chroma/blob/main/rust/blockstore/src/arrow/root.rs)\n\n## Related Components\n\n### SpannIndex Integration\n\nThe blockstore is used by the Spann (Sparse + ANN) index for storing posting lists:\n\n| Component | Purpose |\n|-----------|---------|\n| `SpannIndexReader` | Reads posting lists and HNSW indices |\n| `SpannIndexWriter` | Creates and manages posting list writers |\n| `SpannPostingList` | Stores document IDs and embeddings |\n\n```rust\npub struct SpannIndexReader<'me> {\n    pub posting_lists: BlockfileReader<'me, u32, SpannPostingList<'me>>,\n    pub hnsw_index: HnswIndexRef,\n    pub versions_map: BlockfileReader<'me, u32, u32>,\n    pub dimensionality: usize,\n}\n```\n\n资料来源：[rust/index/src/spann/types.rs:30-45](https://github.com/chroma-core/chroma/blob/main/rust/index/src/spann/types.rs)\n\n## Summary\n\nThe Chroma blockstore provides a robust, extensible storage layer built on Arrow IPC format. Key architectural decisions include:\n\n1. **Separation of concerns**: BlockManager handles data blocks while RootManager manages metadata and sparse indices\n2. **Dual writer support**: Ordered and unordered writers for different access patterns\n3. **Forking capability**: Efficient creation of derived blockfiles without full copies\n4. **Error classification**: Clear mapping from internal errors to error codes for API responses\n5. **Type-safe abstractions**: Generic key-value traits enabling flexible data modeling\n\n---\n\n<a id='embedding-functions'></a>\n\n## Embedding Functions Integration\n\n### 相关页面\n\n相关主题：[Python Client SDK](#python-client-sdk), [Data Storage & Blockstore](#data-storage-blockstore)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [clients/new-js/packages/ai-embeddings/common/README.md](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/ai-embeddings/common/README.md)\n- [clients/new-js/packages/ai-embeddings/ollama/README.md](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/ai-embeddings/ollama/README.md)\n- [rust/types/src/api_types.rs](https://github.com/chroma-core/chroma/blob/main/rust/types/src/api_types.rs)\n- [clients/new-js/packages/ai-embeddings/all/README.md](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/ai-embeddings/all/README.md)\n- [clients/new-js/packages/chromadb/src/embedding-function.ts](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/chromadb/src/embedding-function.ts)\n- [clients/new-js/packages/chromadb/src/collection-configuration.ts](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/chromadb/src/collection-configuration.ts)\n- [clients/new-js/packages/ai-embeddings/morph/README.md](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/ai-embeddings/morph/README.md)\n- [clients/new-js/packages/ai-embeddings/chroma-cloud-qwen/README.md](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/ai-embeddings/chroma-cloud-qwen/README.md)\n</details>\n\n# Embedding Functions Integration\n\n## Overview\n\nEmbedding Functions in Chroma provide a standardized interface for converting text into vector embeddings. Chroma supports multiple embedding providers through a plugin architecture that allows developers to use custom embedding functions or leverage hosted services like OpenAI, Cohere, Ollama, and others.\n\nThe embedding function system serves as the bridge between raw text data and the vector representation used for similarity search. Each embedding function implements a consistent interface that handles API communication, request formatting, and response parsing for its respective provider.\n\n资料来源：[clients/new-js/packages/ai-embeddings/common/README.md](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/ai-embeddings/common/README.md)\n\n## Architecture\n\n### High-Level Architecture\n\n```mermaid\ngraph TD\n    A[Client Application] --> B[Chroma Collection]\n    B --> C[Embedding Function]\n    C --> D[Embedding Provider API]\n    D --> E[Vector Embeddings]\n    E --> B\n    \n    F[@chroma-core/openai] --> C\n    G[@chroma-core/ollama] --> C\n    H[@chroma-core/cohere] --> C\n    I[@chroma-core/morph] --> C\n    J[@chroma-core/all] --> C\n```\n\n### Embedding Function Package Structure\n\nChroma organizes embedding functions into separate packages under the `@chroma-core` namespace. Each package focuses on a specific provider while sharing common utilities.\n\n| Package | Provider | Environment Support |\n|---------|----------|---------------------|\n| `@chroma-core/ai-embeddings-common` | Shared utilities | Node.js + Browser |\n| `@chroma-core/openai` | OpenAI | Node.js + Browser |\n| `@chroma-core/ollama` | Ollama (local) | Node.js + Browser |\n| `@chroma-core/cohere` | Cohere | Node.js + Browser |\n| `@chroma-core/jina` | Jina AI | Node.js + Browser |\n| `@chroma-core/morph` | Morph | Node.js |\n| `@chroma-core/all` | All providers | Node.js + Browser |\n\n资料来源：[clients/new-js/packages/ai-embeddings/all/README.md](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/ai-embeddings/all/README.md)\n\n## Core Components\n\n### Common Utilities Package\n\nThe `@chroma-core/ai-embeddings-common` package provides shared functionality used by all embedding function implementations:\n\n```typescript\nimport { validateConfigSchema, snakeCase, isBrowser } from '@chroma-core/ai-embeddings-common';\n```\n\n**Key Features:**\n\n| Feature | Purpose |\n|---------|---------|\n| `validateConfigSchema` | Validates embedding function configurations using JSON schemas |\n| `snakeCase` | Converts camelCase JavaScript objects to snake_case for API compatibility |\n| `isBrowser` | Detects browser vs Node.js runtime environment |\n\n资料来源：[clients/new-js/packages/ai-embeddings/common/README.md](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/ai-embeddings/common/README.md)\n\n### Dynamic Loading Mechanism\n\nThe embedding function system supports dynamic loading of packages based on configuration:\n\n```typescript\nconst fullPackageName = `@chroma-core/${packageName}`;\nawait import(fullPackageName);\nembeddingFunction = knownEmbeddingFunctions.get(packageName);\n```\n\nThe system maintains mappings for known embedding function names and handles package resolution automatically when a collection is configured with a specific embedding provider.\n\n资料来源：[clients/new-js/packages/chromadb/src/embedding-function.ts](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/chromadb/src/embedding-function.ts)\n\n### Configuration Schema\n\nEmbedding functions support structured configuration with schema validation. Configuration options vary by provider but typically include:\n\n| Parameter | Description | Provider Support |\n|-----------|-------------|------------------|\n| `apiKey` | API key for authentication | OpenAI, Cohere, Jina, Gemini |\n| `modelName` | Specific model identifier | All providers |\n| `apiBase` | Custom API endpoint URL | Ollama, Morph, Gemini |\n| `encodingFormat` | Output format (float/base64) | OpenAI, Morph |\n\n资料来源：[clients/new-js/packages/ai-embeddings/morph/README.md](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/ai-embeddings/morph/README.md)\n\n## Provider Implementations\n\n### OpenAI Embeddings\n\nThe OpenAI embedding function supports the OpenAI API for generating text embeddings:\n\n```typescript\nimport { OpenAIEmbeddingFunction } from '@chroma-core/openai';\n\nconst openAIEF = new OpenAIEmbeddingFunction({\n  apiKey: 'your-api-key',\n  modelName: 'text-embedding-3-small'\n});\n```\n\n### Ollama (Local Embeddings)\n\nOllama enables local embedding generation without external API calls:\n\n```bash\n# Install Ollama from ollama.ai\n# Start the server\nollama serve\n# Pull an embedding model\nollama pull chroma/all-minilm-l6-v2-f32\n```\n\n**Supported Models:**\n\n| Model | Dimensions |\n|-------|------------|\n| `chroma/all-minilm-l6-v2-f32` (default) | 384 |\n| `nomic-embed-text` | 768 |\n| `mxbai-embed-large` | 1024 |\n| `snowflake-arctic-embed` | Variable |\n\n资料来源：[clients/new-js/packages/ai-embeddings/ollama/README.md](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/ai-embeddings/ollama/README.md)\n\n### Morph Embeddings\n\nMorph provides embeddings optimized for code-related content:\n\n```typescript\nconst morphEmbedding = new MorphEmbeddingFunction({\n  api_key: 'your-morph-api-key',\n  model_name: 'morph-embedding-v2',\n  api_base: 'https://api.morphllm.com/v1',\n  encoding_format: 'float'\n});\n```\n\n资料来源：[clients/new-js/packages/ai-embeddings/morph/README.md](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/ai-embeddings/morph/README.md)\n\n### Chroma Cloud Qwen\n\nHosted embedding service using Qwen models:\n\n```typescript\nconst qwenEmbedding = new QwenEmbeddingFunction({\n  model: 'Qwen/Qwen3-Embedding-0.6B',\n  task: 'document' // or 'query'\n});\n```\n\nConfiguration includes:\n- `model`: The Qwen model to use\n- `task`: Task type (document or query embedding)\n- `instruction_dict`: Custom instructions for specific tasks\n- `apiKeyEnvVar`: Environment variable for API key (default: `CHROMA_API_KEY`)\n\n资料来源：[clients/new-js/packages/ai-embeddings/chroma-cloud-qwen/README.md](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/ai-embeddings/chroma-cloud-qwen/README.md)\n\n## Collection Integration\n\n### Embedding Function in Collections\n\nWhen creating a collection, the embedding function can be specified at multiple levels:\n\n```typescript\nconst collection = await chroma.createCollection({\n  name: \"my-collection\",\n  embeddingFunction: openAIEF  // Specify embedding function\n});\n```\n\n### Space Configuration\n\nEmbedding functions can define supported distance spaces and default configurations:\n\n```typescript\nif (overallEf && overallEf.defaultSpace && overallEf.supportedSpaces) {\n  if (configuration?.hnsw === undefined && configuration?.spann === undefined) {\n    configuration.hnsw = { space: overallEf.defaultSpace() };\n  }\n}\n```\n\nThe system validates that configured spaces are supported by the embedding function and warns if mismatches occur:\n\n```\nSpace 'cosine' is not supported by embedding function 'openai'. \nSupported spaces: cosine, euclidean, dotproduct\n```\n\n资料来源：[clients/new-js/packages/chromadb/src/collection-configuration.ts](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/chromadb/src/collection-configuration.ts)\n\n## Query Response Structure\n\n### Include Parameter\n\nQueries support specifying which data to include in results through the `Include` parameter:\n\n```rust\npub enum Include {\n    Distance,\n    Document,\n    Embedding,\n    Metadata,\n    Uri,\n}\n```\n\n**Default Inclusion Behavior:**\n\n| Operation | Default Includes |\n|-----------|------------------|\n| Query | Document, Metadata, Distance |\n| Get | Document, Metadata |\n\n**Include List Methods:**\n\n| Method | Returns |\n|--------|---------|\n| `IncludeList::empty()` | No includes |\n| `IncludeList::default_query()` | Document, Metadata, Distance |\n| `IncludeList::default_get()` | Document, Metadata |\n| `IncludeList::all()` | All five include types |\n\n资料来源：[rust/types/src/api_types.rs](https://github.com/chroma-core/chroma/blob/main/rust/types/src/api_types.rs)\n\n## Usage Patterns\n\n### Basic Usage with JavaScript Client\n\n```javascript\nimport { ChromaClient } from \"chromadb\";\nimport { OpenAIEmbeddingFunction } from \"@chroma-core/openai\";\n\nconst chroma = new ChromaClient();\nconst embeddingFunction = new OpenAIEmbeddingFunction({\n  apiKey: process.env.OPENAI_API_KEY\n});\n\nconst collection = await chroma.createCollection({\n  name: \"documents\",\n  embeddingFunction: embeddingFunction\n});\n\nawait collection.add({\n  ids: [\"doc-1\", \"doc-2\"],\n  documents: [\"Document content here\", \"Another document\"],\n  metadatas: [{ source: \"notion\" }, { source: \"google-docs\" }]\n});\n\nconst results = await collection.query({\n  queryTexts: [\"Search query\"],\n  nResults: 2\n});\n```\n\n### Python Client Usage\n\n```python\nimport chromadb\n\nclient = chromadb.HttpClient(host=\"localhost\", port=8000)\ncollection = client.create_collection(\"documents\")\n\ncollection.add(\n    documents=[\"Document 1\", \"Document 2\"],\n    metadatas=[{\"source\": \"notion\"}, {\"source\": \"google-docs\"}],\n    ids=[\"doc1\", \"doc2\"],\n    embeddings=[[1.2, 2.1, ...], [1.2, 2.1, ...]]\n)\n\nresults = collection.query(\n    query_texts=[\"Query document\"],\n    n_results=2\n)\n```\n\n资料来源：[clients/new-js/packages/chromadb/README.md](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/chromadb/README.md)\n\n## Environment Detection\n\nEmbedding functions automatically detect the runtime environment to select the appropriate HTTP client:\n\n```typescript\nimport { isBrowser } from '@chroma-core/ai-embeddings-common';\n\nif (isBrowser()) {\n  // Use browser-compatible fetch\n} else {\n  // Use Node.js HTTP client\n}\n```\n\nThis enables packages like Ollama to work seamlessly in both browser and Node.js environments:\n\n> This package works in both Node.js and browser environments, automatically detecting the runtime and using the appropriate Ollama client.\n\n资料来源：[clients/new-js/packages/ai-embeddings/ollama/README.md](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/ai-embeddings/ollama/README.md)\n\n## Type Safety\n\nThe embedding function system provides TypeScript types and interfaces for:\n\n- Configuration validation\n- Response parsing\n- Error handling\n- Provider-specific options\n\n```typescript\nexport const getSparseEmbeddingFunction = async (\n  client: ChromaClient,\n  efConfig?: EmbeddingFunctionConfiguration\n) => {\n  // Returns SparseEmbeddingFunction instance or undefined\n};\n```\n\n资料来源：[clients/new-js/packages/chromadb/src/embedding-function.ts](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/chromadb/src/embedding-function.ts)\n\n## Summary\n\nEmbedding Functions Integration in Chroma provides a unified, extensible system for text vectorization. Key aspects include:\n\n1. **Provider Abstraction**: Standardized interface across multiple embedding providers\n2. **Dynamic Loading**: Packages loaded on-demand based on collection configuration\n3. **Schema Validation**: JSON schema-based configuration validation\n4. **Cross-Platform**: Support for both Node.js and browser environments\n5. **Flexible Configuration**: Provider-specific options with sensible defaults\n6. **Space Support**: Distance metric configuration aligned with embedding provider capabilities\n\nThe plugin architecture allows Chroma to integrate new embedding providers while maintaining API consistency across the SDK.\n\n---\n\n---\n\n## Doramagic 踩坑日志\n\n项目：chroma-core/chroma\n\n摘要：发现 6 个潜在踩坑项，其中 0 个为 high/blocking；最高优先级：能力坑 - 能力判断依赖假设。\n\n## 1. 能力坑 · 能力判断依赖假设\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：README/documentation is current enough for a first validation pass.\n- 对用户的影响：假设不成立时，用户拿不到承诺的能力。\n- 建议检查：将假设转成下游验证清单。\n- 防护动作：假设必须转成验证项；没有验证结果前不能写成事实。\n- 证据：capability.assumptions | github_repo:546206616 | https://github.com/chroma-core/chroma | README/documentation is current enough for a first validation pass.\n\n## 2. 维护坑 · 维护活跃度未知\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：未记录 last_activity_observed。\n- 对用户的影响：新项目、停更项目和活跃项目会被混在一起，推荐信任度下降。\n- 建议检查：补 GitHub 最近 commit、release、issue/PR 响应信号。\n- 防护动作：维护活跃度未知时，推荐强度不能标为高信任。\n- 证据：evidence.maintainer_signals | github_repo:546206616 | https://github.com/chroma-core/chroma | last_activity_observed missing\n\n## 3. 安全/权限坑 · 下游验证发现风险项\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：no_demo\n- 对用户的影响：下游已经要求复核，不能在页面中弱化。\n- 建议检查：进入安全/权限治理复核队列。\n- 防护动作：下游风险存在时必须保持 review/recommendation 降级。\n- 证据：downstream_validation.risk_items | github_repo:546206616 | https://github.com/chroma-core/chroma | no_demo; severity=medium\n\n## 4. 安全/权限坑 · 存在评分风险\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：no_demo\n- 对用户的影响：风险会影响是否适合普通用户安装。\n- 建议检查：把风险写入边界卡，并确认是否需要人工复核。\n- 防护动作：评分风险必须进入边界卡，不能只作为内部分数。\n- 证据：risks.scoring_risks | github_repo:546206616 | https://github.com/chroma-core/chroma | no_demo; severity=medium\n\n## 5. 维护坑 · issue/PR 响应质量未知\n\n- 严重度：low\n- 证据强度：source_linked\n- 发现：issue_or_pr_quality=unknown。\n- 对用户的影响：用户无法判断遇到问题后是否有人维护。\n- 建议检查：抽样最近 issue/PR，判断是否长期无人处理。\n- 防护动作：issue/PR 响应未知时，必须提示维护风险。\n- 证据：evidence.maintainer_signals | github_repo:546206616 | https://github.com/chroma-core/chroma | issue_or_pr_quality=unknown\n\n## 6. 维护坑 · 发布节奏不明确\n\n- 严重度：low\n- 证据强度：source_linked\n- 发现：release_recency=unknown。\n- 对用户的影响：安装命令和文档可能落后于代码，用户踩坑概率升高。\n- 建议检查：确认最近 release/tag 和 README 安装命令是否一致。\n- 防护动作：发布节奏未知或过期时，安装说明必须标注可能漂移。\n- 证据：evidence.maintainer_signals | github_repo:546206616 | https://github.com/chroma-core/chroma | release_recency=unknown\n\n<!-- canonical_name: chroma-core/chroma; human_manual_source: deepwiki_human_wiki -->\n",
      "markdown_key": "chroma",
      "pages": "draft",
      "source_refs": [
        {
          "evidence_id": "github_repo:546206616",
          "kind": "repo",
          "supports_claim_ids": [
            "claim_identity",
            "claim_distribution",
            "claim_capability"
          ],
          "url": "https://github.com/chroma-core/chroma"
        },
        {
          "evidence_id": "art_4add53223c9947409ac05916314bff2e",
          "kind": "docs",
          "supports_claim_ids": [
            "claim_identity",
            "claim_distribution",
            "claim_capability"
          ],
          "url": "https://github.com/chroma-core/chroma#readme"
        }
      ],
      "summary": "DeepWiki/Human Wiki 完整输出，末尾追加 Discovery Agent 踩坑日志。",
      "title": "chroma 说明书",
      "toc": [
        "https://github.com/chroma-core/chroma 项目说明书",
        "目录",
        "Chroma Overview",
        "Introduction",
        "Architecture Overview",
        "Data Model",
        "Core Components",
        "Deployment Options",
        "Doramagic 踩坑日志"
      ]
    }
  },
  "quality_gate": {
    "blocking_gaps": [],
    "category_confidence": "medium",
    "compile_status": "ready_for_review",
    "five_assets_present": true,
    "install_sandbox_verified": true,
    "missing_evidence": [],
    "next_action": "publish to Doramagic.ai project surfaces",
    "prompt_preview_boundary_ok": true,
    "publish_status": "publishable",
    "quick_start_verified": true,
    "repo_clone_verified": true,
    "repo_commit": "8f76e1bad8f00ec791ec2d8ff4816d0c3477eb4a",
    "repo_inspection_error": null,
    "repo_inspection_files": [
      "pyproject.toml",
      "Dockerfile",
      "README.md",
      "docker-compose.yml",
      "requirements.txt",
      "docs/mintlify/AGENTS.md",
      "docs/mintlify/CLAUDE.md",
      "docs/mintlify/sync.openapi.json",
      "docs/mintlify/README.md",
      "docs/mintlify/docs.json",
      "docs/scripts/generate_python_reference.py",
      "docs/scripts/README.md",
      "docs/scripts/generate_ts_reference.ts",
      "docs/mintlify/reference/rust.mdx",
      "docs/mintlify/reference/swift.mdx",
      "docs/mintlify/reference/search.mdx",
      "docs/mintlify/reference/server-env-vars.mdx",
      "docs/mintlify/reference/where-filter.mdx",
      "docs/mintlify/reference/overview.mdx",
      "docs/mintlify/reference/kotlin.mdx",
      "docs/mintlify/snippets/callout.mdx",
      "docs/mintlify/.claude/settings.json",
      "docs/mintlify/cloud/quotas-limits.mdx",
      "docs/mintlify/cloud/getting-started.mdx",
      "docs/mintlify/cloud/pricing.mdx",
      "docs/mintlify/integrations/chroma-integrations.mdx",
      "docs/mintlify/docs/cli/vacuum.mdx",
      "docs/mintlify/docs/cli/sample-apps.mdx",
      "docs/mintlify/docs/cli/profile.mdx",
      "docs/mintlify/docs/cli/update.mdx",
      "docs/mintlify/docs/cli/run.mdx",
      "docs/mintlify/docs/cli/login.mdx",
      "docs/mintlify/docs/cli/browse.mdx",
      "docs/mintlify/docs/cli/db.mdx",
      "docs/mintlify/docs/cli/install.mdx",
      "docs/mintlify/docs/cli/copy.mdx",
      "docs/mintlify/docs/embeddings/multimodal.mdx",
      "docs/mintlify/docs/embeddings/embedding-functions.mdx",
      "docs/mintlify/docs/collections/add-data.mdx",
      "docs/mintlify/docs/collections/delete-data.mdx"
    ],
    "repo_inspection_verified": true,
    "review_reasons": [],
    "tag_count_ok": true,
    "unsupported_claims": []
  },
  "schema_version": "0.1",
  "user_assets": {
    "ai_context_pack": {
      "asset_id": "ai_context_pack",
      "filename": "AI_CONTEXT_PACK.md",
      "markdown": "# example-node - Doramagic AI Context Pack\n\n> 定位：安装前体验与判断资产。它帮助宿主 AI 有一个好的开始，但不代表已经安装、执行或验证目标项目。\n\n## 充分原则\n\n- **充分原则，不是压缩原则**：AI Context Pack 应该充分到让宿主 AI 在开工前理解项目价值、能力边界、使用入口、风险和证据来源；它可以分层组织，但不以最短摘要为目标。\n- **压缩策略**：只压缩噪声和重复内容，不压缩会影响判断和开工质量的上下文。\n\n## 给宿主 AI 的使用方式\n\n你正在读取 Doramagic 为 example-node 编译的 AI Context Pack。请把它当作开工前上下文：帮助用户理解适合谁、能做什么、如何开始、哪些必须安装后验证、风险在哪里。不要声称你已经安装、运行或执行了目标项目。\n\n## Claim 消费规则\n\n- **事实来源**：Repo Evidence + Claim/Evidence Graph；Human Wiki 只提供显著性、术语和叙事结构。\n- **事实最低状态**：`supported`\n- `supported`：可以作为项目事实使用，但回答中必须引用 claim_id 和证据路径。\n- `weak`：只能作为低置信度线索，必须要求用户继续核实。\n- `inferred`：只能用于风险提示或待确认问题，不能包装成项目事实。\n- `unverified`：不得作为事实使用，应明确说证据不足。\n- `contradicted`：必须展示冲突来源，不得替用户强行选择一个版本。\n\n## 它最适合谁\n\n- **想在安装前理解开源项目价值和边界的用户**：当前证据主要来自项目文档。 证据：`README.md` Claim：`clm_0002` supported 0.86\n\n## 它能做什么\n\n- **命令行启动或安装流程**（需要安装后验证）：项目文档中存在可执行命令，真实使用需要在本地或宿主环境中运行这些命令。 证据：`README.md` Claim：`clm_0001` supported 0.86\n\n## 怎么开始\n\n- `pip install chromadb # python client` 证据：`README.md` Claim：`clm_0003` supported 0.86\n\n## 继续前判断卡\n\n- **当前建议**：先做角色匹配试用\n- **为什么**：这个项目更像角色库，核心风险是选错角色或把角色文案当执行能力；先用 Prompt Preview 试角色匹配，再决定是否沙盒导入。\n\n### 30 秒判断\n\n- **现在怎么做**：先做角色匹配试用\n- **最小安全下一步**：先用 Prompt Preview 试角色匹配；满意后再隔离导入\n- **先别相信**：角色质量和任务匹配不能直接相信。\n- **继续会触碰**：角色选择偏差、命令执行、宿主 AI 配置\n\n### 现在可以相信\n\n- **适合人群线索：想在安装前理解开源项目价值和边界的用户**（supported）：有 supported claim 或项目证据支撑，但仍不等于真实安装效果。 证据：`README.md` Claim：`clm_0002` supported 0.86\n- **能力存在：命令行启动或安装流程**（supported）：可以相信项目包含这类能力线索；是否适合你的具体任务仍要试用或安装后验证。 证据：`README.md` Claim：`clm_0001` supported 0.86\n- **存在 Quick Start / 安装命令线索**（supported）：可以相信项目文档出现过启动或安装入口；不要因此直接在主力环境运行。 证据：`README.md` Claim：`clm_0003` supported 0.86\n\n### 现在还不能相信\n\n- **角色质量和任务匹配不能直接相信。**（unverified）：角色库证明有很多角色，不证明每个角色都适合你的具体任务，也不证明角色能产生高质量结果。\n- **不能把角色文案当成真实执行能力。**（unverified）：安装前只能判断角色描述和任务画像是否匹配，不能证明它能在宿主 AI 里完成任务。\n- **真实输出质量不能在安装前相信。**（unverified）：Prompt Preview 只能展示引导方式，不能证明真实项目中的结果质量。\n- **宿主 AI 版本兼容性不能在安装前相信。**（unverified）：Claude、Cursor、Codex、Gemini 等宿主加载规则和版本差异必须在真实环境验证。\n- **不会污染现有宿主 AI 行为，不能直接相信。**（inferred）：Skill、plugin、AGENTS/CLAUDE/GEMINI 指令可能改变宿主 AI 的默认行为。 证据：`AGENTS.md`, `CLAUDE.md`\n- **可安全回滚不能默认相信。**（unverified）：除非项目明确提供卸载和恢复说明，否则必须先在隔离环境验证。\n- **真实安装后是否与用户当前宿主 AI 版本兼容？**（unverified）：兼容性只能通过实际宿主环境验证。\n- **项目输出质量是否满足用户具体任务？**（unverified）：安装前预览只能展示流程和边界，不能替代真实评测。\n\n### 继续会触碰什么\n\n- **角色选择偏差**：用户对任务应该由哪个专家角色处理的判断。 原因：选错角色会让 AI 从错误专业视角回答，浪费时间或误导决策。\n- **命令执行**：包管理器、网络下载、本地插件目录、项目配置或用户主目录。 原因：运行第一条命令就可能产生环境改动；必须先判断是否值得跑。 证据：`README.md`\n- **宿主 AI 配置**：Claude/Codex/Cursor/Gemini/OpenCode 等宿主的 plugin、Skill 或规则加载配置。 原因：宿主配置会改变 AI 后续工作方式，可能和用户已有规则冲突。 证据：`AGENTS.md`, `CLAUDE.md`\n- **本地环境或项目文件**：安装结果、插件缓存、项目配置或本地依赖目录。 原因：安装前无法证明写入范围和回滚方式，需要隔离验证。 证据：`README.md`\n- **宿主 AI 上下文**：AI Context Pack、Prompt Preview、Skill 路由、风险规则和项目事实。 原因：导入上下文会影响宿主 AI 后续判断，必须避免把未验证项包装成事实。\n\n### 最小安全下一步\n\n- **先跑 Prompt Preview**：先用交互式试用验证任务画像和角色匹配，不要先导入整套角色库。（适用：任何项目都适用，尤其是输出质量未知时。）\n- **只在隔离目录或测试账号试装**：避免安装命令污染主力宿主 AI、真实项目或用户主目录。（适用：存在命令执行、插件配置或本地写入线索时。）\n- **先备份宿主 AI 配置**：Skill、plugin、规则文件可能改变 Claude/Cursor/Codex 的默认行为。（适用：存在插件 manifest、Skill 或宿主规则入口时。）\n- **安装后只验证一个最小任务**：先验证加载、兼容、输出质量和回滚，再决定是否深用。（适用：准备从试用进入真实工作流时。）\n\n### 退出方式\n\n- **保留安装前状态**：记录原始宿主配置和项目状态，后续才能判断是否可恢复。\n- **准备移除宿主 plugin / Skill / 规则入口**：如果试装后行为异常，可以把宿主 AI 恢复到试装前状态。\n- **保留原始角色选择记录**：如果输出偏题，可以回到任务画像阶段重新选择角色，而不是继续沿着错误角色推进。\n- **记录安装命令和写入路径**：没有明确卸载说明时，至少要知道哪些目录或配置需要手动清理。\n- **如果没有回滚路径，不进入主力环境**：不可回滚是继续前阻断项，不应靠信任或运气继续。\n\n## 哪些只能预览\n\n- 解释项目适合谁和能做什么\n- 基于项目文档演示典型对话流程\n- 帮助用户判断是否值得安装或继续研究\n\n## 哪些必须安装后验证\n\n- 真实安装 Skill、插件或 CLI\n- 执行脚本、修改本地文件或访问外部服务\n- 验证真实输出质量、性能和兼容性\n\n## 边界与风险判断卡\n\n- **把安装前预览误认为真实运行**：用户可能高估项目已经完成的配置、权限和兼容性验证。 处理方式：明确区分 prompt_preview_can_do 与 runtime_required。 Claim：`clm_0004` inferred 0.45\n- **命令执行会修改本地环境**：安装命令可能写入用户主目录、宿主插件目录或项目配置。 处理方式：先在隔离环境或测试账号中运行。 证据：`README.md` Claim：`clm_0005` supported 0.86\n- **待确认**：真实安装后是否与用户当前宿主 AI 版本兼容？。原因：兼容性只能通过实际宿主环境验证。\n- **待确认**：项目输出质量是否满足用户具体任务？。原因：安装前预览只能展示流程和边界，不能替代真实评测。\n- **待确认**：安装命令是否需要网络、权限或全局写入？。原因：这影响企业环境和个人环境的安装风险。\n\n## 开工前工作上下文\n\n### 加载顺序\n\n- 先读取 how_to_use.host_ai_instruction，建立安装前判断资产的边界。\n- 读取 claim_graph_summary，确认事实来自 Claim/Evidence Graph，而不是 Human Wiki 叙事。\n- 再读取 intended_users、capabilities 和 quick_start_candidates，判断用户是否匹配。\n- 需要执行具体任务时，优先查 role_skill_index，再查 evidence_index。\n- 遇到真实安装、文件修改、网络访问、性能或兼容性问题时，转入 risk_card 和 boundaries.runtime_required。\n\n### 任务路由\n\n- **命令行启动或安装流程**：先说明这是安装后验证能力，再给出安装前检查清单。 边界：必须真实安装或运行后验证。 证据：`README.md` Claim：`clm_0001` supported 0.86\n\n### 上下文规模\n\n- 文件总数：1932\n- 重要文件覆盖：40/1932\n- 证据索引条目：80\n- 角色 / Skill 条目：60\n\n### 证据不足时的处理\n\n- **missing_evidence**：说明证据不足，要求用户提供目标文件、README 段落或安装后验证记录；不要补全事实。\n- **out_of_scope_request**：说明该任务超出当前 AI Context Pack 证据范围，并建议用户先查看 Human Manual 或真实安装后验证。\n- **runtime_request**：给出安装前检查清单和命令来源，但不要替用户执行命令或声称已执行。\n- **source_conflict**：同时展示冲突来源，标记为待核实，不要强行选择一个版本。\n\n## Prompt Recipes\n\n### 适配判断\n\n- 目标：判断这个项目是否适合用户当前任务。\n- 预期输出：适配结论、关键理由、证据引用、安装前可预览内容、必须安装后验证内容、下一步建议。\n\n```text\n请基于 example-node 的 AI Context Pack，先问我 3 个必要问题，然后判断它是否适合我的任务。回答必须包含：适合谁、能做什么、不能做什么、是否值得安装、证据来自哪里。所有项目事实必须引用 evidence_refs、source_paths 或 claim_id。\n```\n\n### 安装前体验\n\n- 目标：让用户在安装前感受核心工作流，同时避免把预览包装成真实能力或营销承诺。\n- 预期输出：一段带边界标签的体验剧本、安装后验证清单和谨慎建议；不含真实运行承诺或强营销表述。\n\n```text\n请把 example-node 当作安装前体验资产，而不是已安装工具或真实运行环境。\n\n请严格输出四段：\n1. 先问我 3 个必要问题。\n2. 给出一段“体验剧本”：用 [安装前可预览]、[必须安装后验证]、[证据不足] 三种标签展示它可能如何引导工作流。\n3. 给出安装后验证清单：列出哪些能力只有真实安装、真实宿主加载、真实项目运行后才能确认。\n4. 给出谨慎建议：只能说“值得继续研究/试装”“先补充信息后再判断”或“不建议继续”，不得替项目背书。\n\n硬性边界：\n- 不要声称已经安装、运行、执行测试、修改文件或产生真实结果。\n- 不要写“自动适配”“确保通过”“完美适配”“强烈建议安装”等承诺性表达。\n- 如果描述安装后的工作方式，必须使用“如果安装成功且宿主正确加载 Skill，它可能会……”这种条件句。\n- 体验剧本只能写成“示例台词/假设流程”：使用“可能会询问/可能会建议/可能会展示”，不要写“已写入、已生成、已通过、正在运行、正在生成”。\n- Prompt Preview 不负责给安装命令；如用户准备试装，只能提示先阅读 Quick Start 和 Risk Card，并在隔离环境验证。\n- 所有项目事实必须来自 supported claim、evidence_refs 或 source_paths；inferred/unverified 只能作风险或待确认项。\n\n```\n\n### 角色 / Skill 选择\n\n- 目标：从项目里的角色或 Skill 中挑选最匹配的资产。\n- 预期输出：候选角色或 Skill 列表，每项包含适用场景、证据路径、风险边界和是否需要安装后验证。\n\n```text\n请读取 role_skill_index，根据我的目标任务推荐 3-5 个最相关的角色或 Skill。每个推荐都要说明适用场景、可能输出、风险边界和 evidence_refs。\n```\n\n### 风险预检\n\n- 目标：安装或引入前识别环境、权限、规则冲突和质量风险。\n- 预期输出：环境、权限、依赖、许可、宿主冲突、质量风险和未知项的检查清单。\n\n```text\n请基于 risk_card、boundaries 和 quick_start_candidates，给我一份安装前风险预检清单。不要替我执行命令，只说明我应该检查什么、为什么检查、失败会有什么影响。\n```\n\n### 宿主 AI 开工指令\n\n- 目标：把项目上下文转成一次对话开始前的宿主 AI 指令。\n- 预期输出：一段边界明确、证据引用明确、适合复制给宿主 AI 的开工前指令。\n\n```text\n请基于 example-node 的 AI Context Pack，生成一段我可以粘贴给宿主 AI 的开工前指令。这段指令必须遵守 not_runtime=true，不能声称项目已经安装、运行或产生真实结果。\n```\n\n\n## 角色 / Skill 索引\n\n- 共索引 60 个角色 / Skill / 项目文档条目。\n\n- **AI instructions**（project_doc）：This file provides guidance to coding agents when working in this documentation package. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/mintlify/AGENTS.md`\n- **Chroma Documentation**（project_doc）：This is the official documentation for Chroma https://www.trychroma.com , the open-source data infrastructure for AI. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/mintlify/README.md`\n- **Documentation Generator Scripts**（project_doc）：Generate all split reference files into docs/mintlify/reference/python/ : 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/scripts/README.md`\n- **Chroma Codebase Guidelines for AI Agents**（project_doc）：Chroma Codebase Guidelines for AI Agents 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`AGENTS.md`\n- **Chroma Codebase Guidelines**（project_doc）：Use the TYPE scope : Description format: 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`CLAUDE.md`\n- **for javascript, npm install chromadb!**（project_doc）：! Chroma ./docs/assets/chroma-wordmark-color.png gh-light-mode-only ! Chroma ./docs/assets/chroma-wordmark-white.png gh-dark-mode-only 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`README.md`\n- **Examples**（project_doc）：Searching for community contributions! Join the contributing https://discord.com/channels/1073293645303795742/1074711539724058635 Discord Channel to discuss. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`examples/README.md`\n- **Telemetry**（project_doc）：This directory holds all the telemetry for Chroma. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`chromadb/telemetry/README.md`\n- **Embedding Function Schemas**（project_doc）：This directory contains JSON schemas for all embedding functions in Chroma. The purpose of having this schema is to support cross language compatibility, and to validate that changes in one client library do not accidentally diverge from others. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`chromadb/utils/embedding_functions/schemas/README.md`\n- **chromadb**（project_doc）：Chroma is the open-source data infrastructure for AI. Chroma makes it easy to build LLM apps by making knowledge, facts, and skills pluggable for LLMs. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`clients/js/README.md`\n- **ChromaDB Examples**（project_doc）：This directory contains examples for using both ChromaDB package options: 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`clients/js/examples/README.md`\n- **Node Example**（project_doc）：This is an example of how to use ChromaDB with Node.js. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`clients/js/examples/node/README.md`\n- **ChromaDB Client**（project_doc）：Chroma is the open-source data infrastructure for AI. Chroma makes it easy to build LLM apps by making knowledge, facts, and skills pluggable for LLMs. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`clients/js/packages/chromadb-client/README.md`\n- **API**（project_doc）：This generator creates TypeScript/JavaScript client that utilizes Fetch API https://fetch.spec.whatwg.org/ . The generated Node module can be used in the following environments: 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`clients/js/packages/chromadb-core/src/generated/README.md`\n- **ChromaDB JavaScript Client**（project_doc）：Chroma is the open-source data infrastructure for AI. Chroma makes it easy to build LLM apps by making knowledge, facts, and skills pluggable for LLMs. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`clients/js/packages/chromadb/README.md`\n- **@chroma-core/all**（project_doc）：All AI embedding providers for Chroma in one package. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`clients/new-js/packages/ai-embeddings/all/README.md`\n- **Chroma Embeddings**（project_doc）：This package provides an embedding function for the Qwen model family hosted on Chroma's cloud embedding service. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`clients/new-js/packages/ai-embeddings/chroma-cloud-qwen/README.md`\n- **Chroma Cloud Splade Embeddings**（project_doc）：This package provides a sparse embedding function for the Splade model family hosted on Chroma's cloud embedding service. Splade Sparse Lexical and Expansion embeddings are particularly effective for information retrieval tasks, combining the benefits of sparse representations with learned relevance. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`clients/new-js/packages/ai-embeddings/chroma-cloud-splade/README.md`\n- **Cloudflare Workers AI Embedding Provider for Chroma**（project_doc）：Cloudflare Workers AI Embedding Provider for Chroma 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`clients/new-js/packages/ai-embeddings/cloudflare-worker-ai/README.md`\n- **Cohere Embedding Function for Chroma**（project_doc）：Cohere Embedding Function for Chroma 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`clients/new-js/packages/ai-embeddings/cohere/README.md`\n- **@chroma-core/ai-embeddings-common**（project_doc）：Common utilities and shared functionality for ChromaDB AI embedding packages. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`clients/new-js/packages/ai-embeddings/common/README.md`\n- **Default Embedding Function for Chroma**（project_doc）：Default Embedding Function for Chroma 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`clients/new-js/packages/ai-embeddings/default-embed/README.md`\n- **Google Gemini Embedding Function for Chroma**（project_doc）：Google Gemini Embedding Function for Chroma 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`clients/new-js/packages/ai-embeddings/google-gemini/README.md`\n- **Hugging Face Server Embedding Function for Chroma**（project_doc）：Hugging Face Server Embedding Function for Chroma 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`clients/new-js/packages/ai-embeddings/huggingface-server/README.md`\n- **Jina Embedding Function for Chroma**（project_doc）：This package provides a Jina AI embedding provider for Chroma. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`clients/new-js/packages/ai-embeddings/jina/README.md`\n- **Mistral Embedding Function for Chroma**（project_doc）：Mistral Embedding Function for Chroma 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`clients/new-js/packages/ai-embeddings/mistral/README.md`\n- **@chroma-core/morph**（project_doc）：Chroma integration for Morph embedding models. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`clients/new-js/packages/ai-embeddings/morph/README.md`\n- **Ollama Embedding Function for Chroma**（project_doc）：Ollama Embedding Function for Chroma 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`clients/new-js/packages/ai-embeddings/ollama/README.md`\n- **OpenAI Embedding Function for Chroma**（project_doc）：OpenAI Embedding Function for Chroma 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`clients/new-js/packages/ai-embeddings/openai/README.md`\n- **Perplexity Embedding Function for Chroma**（project_doc）：Perplexity Embedding Function for Chroma 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`clients/new-js/packages/ai-embeddings/perplexity/README.md`\n- **Sentence Transformers Embedding Function for Chroma**（project_doc）：Sentence Transformers Embedding Function for Chroma 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`clients/new-js/packages/ai-embeddings/sentence-transformer/README.md`\n- **Together AI Embedding Function for Chroma**（project_doc）：Together AI Embedding Function for Chroma 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`clients/new-js/packages/ai-embeddings/together-ai/README.md`\n- **Voyage AI Embedding Function for Chroma**（project_doc）：Voyage AI Embedding Function for Chroma 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`clients/new-js/packages/ai-embeddings/voyageai/README.md`\n- **chromadb**（project_doc）：Chroma is the open-source data infrastructure for AI. Chroma makes it easy to build LLM apps by making knowledge, facts, and skills pluggable for LLMs. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`clients/new-js/packages/chromadb/README.md`\n- **Example setup of the client to connect to your chroma server**（project_doc）：Chroma - the open-source data infrastructure for AI . This package is for the Python HTTP client-only library for Chroma. This client connects to the Chroma Server. If that it not what you are looking for, you might want to check out the full library . 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`clients/python/README.md`\n- **Authorization**（project_doc）：Following are the role mappings where we define roles and the actions they can perform. The actions spaces is taken from the resource actions defined above. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`examples/basic_functionality/authz/README.md`\n- **Chat with your documents**（project_doc）：This folder contains a very minimal, self-contained example of how to make an application to chat with your documents, using Chroma and OpenAI's API. It uses the 2022 and 2023 U.S state of the union addresses as example documents. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`examples/chat_with_your_documents/README.md`\n- **AWS EC2 Basic Deployment**（project_doc）：This is an example deployment to AWS EC2 Compute using terraform https://www.terraform.io/ . 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`examples/deployments/aws-terraform/README.md`\n- **Digital Ocean Droplet Deployment**（project_doc）：This is an example deployment using Digital Ocean Droplet using terraform https://www.terraform.io/ . 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`examples/deployments/do-terraform/README.md`\n- **Google Cloud Compute Deployment**（project_doc）：This is an example deployment to Google Cloud Compute using terraform https://www.terraform.io/ 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`examples/deployments/google-cloud-compute/README.md`\n- **Render.com Deployment**（project_doc）：This is an example deployment to Render.com using terraform https://www.terraform.io/ 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`examples/deployments/render-terraform/README.md`\n- **Chat with your documents**（project_doc）：This folder contains a very minimal, self-contained example of how to make an application to chat with your documents, using Chroma and Google Gemini's API. It uses the 2022 and 2023 U.S state of the union addresses as example documents. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`examples/gemini/README.md`\n- **Observability**（project_doc）：To run the Chroma with local observability stack OpenTelemetry + Zipkin , run the following command from the root of the repository: 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`examples/observability/README.md`\n- **xAI**（project_doc）：This folder contains basic examples of using Chroma with the xAI SDK. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`examples/xai/README.md`\n- **Chroma**（project_doc）：This crate provides the official Chroma Rust client. Chroma is an open-source AI-native search database that makes it easy to get private, offline, and real-time data that large language models were not trained on into their context. Where the language models provide reasoning, Chroma focuses on search, enabling your application to customize the search methods it needs most. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`rust/chroma/README.md`\n- **Sparse Index Module**（project_doc）：The sparse index module implements the Block-Max WAND Weak AND algorithm for efficient sparse vector search. This implementation is built on top of Chroma's blockfile abstraction and provides high-performance top-k retrieval for sparse vectors, commonly used in text search and information retrieval systems. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`rust/index/src/sparse/README.md`\n- **chromadb-js-bindings-darwin-arm64**（project_doc）：This is the aarch64-apple-darwin binary for chromadb-js-bindings 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`rust/js_bindings/npm/darwin-arm64/README.md`\n- **chromadb-js-bindings-darwin-x64**（project_doc）：This is the x86 64-apple-darwin binary for chromadb-js-bindings 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`rust/js_bindings/npm/darwin-x64/README.md`\n- **chromadb-js-bindings-linux-arm64-gnu**（project_doc）：chromadb-js-bindings-linux-arm64-gnu 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`rust/js_bindings/npm/linux-arm64-gnu/README.md`\n- **chromadb-js-bindings-linux-x64-gnu**（project_doc）：This is the x86 64-unknown-linux-gnu binary for chromadb-js-bindings 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`rust/js_bindings/npm/linux-x64-gnu/README.md`\n- **chromadb-js-bindings-win32-arm64-msvc**（project_doc）：chromadb-js-bindings-win32-arm64-msvc 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`rust/js_bindings/npm/win32-arm64-msvc/README.md`\n- **chromadb-js-bindings-win32-x64-msvc**（project_doc）：chromadb-js-bindings-win32-x64-msvc 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`rust/js_bindings/npm/win32-x64-msvc/README.md`\n- **Chroma Metering**（project_doc）：This library provides a procedural-macro based implementation of a metering library that is friendly for multi-threaded, asynchronous, and distributed environments. It allows users to define custom metering capabilities and contexts . An capability is globally unique in the scope of the crate into which chroma-metering is imported and represents the property of a context that allows it to react via a handler . An co… 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`rust/metering-macros/README.md`\n- **s3heap-service**（project_doc）：The s3heap-service integrates with the function manager to trigger functions at no faster than a particular cadence, with reasonable guarantees that writing data will cause a function to run. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`rust/s3heap-service/README.md`\n- **Spanner Migrations**（project_doc）：Schema migrations for Spanner database. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`rust/spanner-migrations/README.md`\n- **Design**（project_doc）：wal3 is the write-ahead lightweight logging library. It implements a linearlizable log that is built entirely on top of object storage. It relies upon the atomicity of object storage to provide the If-Match header. This allows us to create a log entirely on top of object storage without any other sources of locking or coordination. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`rust/wal3/README.md`\n- **Readme**（project_doc）：This folder houses the Rust code for the query and compactor nodes. It is a standard rust crate managed using cargo. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`rust/worker/README.md`\n- **Generative Benchmarking**（project_doc）：This project provides a comprehensive toolkit for generating custom benchmarks and replicating the results outlined in our technical report https://research.trychroma.com/generative-benchmarking . 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`sample_apps/generative_benchmarking/README.md`\n- **Movies with Chroma**（project_doc）： 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`sample_apps/movies/README.md`\n- **Embedding Function Schemas**（project_doc）：This directory contains JSON schemas for all embedding functions in Chroma. The purpose of having these schemas is to support cross-language compatibility and to validate that changes in one client library do not accidentally diverge from others. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`schemas/embedding_functions/README.md`\n\n## 证据索引\n\n- 共索引 80 条证据。\n\n- **AI instructions**（documentation）：This file provides guidance to coding agents when working in this documentation package. 证据：`docs/mintlify/AGENTS.md`\n- **Chroma Documentation**（documentation）：This is the official documentation for Chroma https://www.trychroma.com , the open-source data infrastructure for AI. 证据：`docs/mintlify/README.md`\n- **Documentation Generator Scripts**（documentation）：Generate all split reference files into docs/mintlify/reference/python/ : 证据：`docs/scripts/README.md`\n- **Chroma Codebase Guidelines for AI Agents**（documentation）：Chroma Codebase Guidelines for AI Agents 证据：`AGENTS.md`\n- **Chroma Codebase Guidelines**（documentation）：Use the TYPE scope : Description format: 证据：`CLAUDE.md`\n- **for javascript, npm install chromadb!**（documentation）：! Chroma ./docs/assets/chroma-wordmark-color.png gh-light-mode-only ! Chroma ./docs/assets/chroma-wordmark-white.png gh-dark-mode-only 证据：`README.md`\n- **Examples**（documentation）：Searching for community contributions! Join the contributing https://discord.com/channels/1073293645303795742/1074711539724058635 Discord Channel to discuss. 证据：`examples/README.md`\n- **Telemetry**（documentation）：This directory holds all the telemetry for Chroma. 证据：`chromadb/telemetry/README.md`\n- **Embedding Function Schemas**（documentation）：This directory contains JSON schemas for all embedding functions in Chroma. The purpose of having this schema is to support cross language compatibility, and to validate that changes in one client library do not accidentally diverge from others. 证据：`chromadb/utils/embedding_functions/schemas/README.md`\n- **chromadb**（documentation）：Chroma is the open-source data infrastructure for AI. Chroma makes it easy to build LLM apps by making knowledge, facts, and skills pluggable for LLMs. 证据：`clients/js/README.md`\n- **ChromaDB Examples**（documentation）：This directory contains examples for using both ChromaDB package options: 证据：`clients/js/examples/README.md`\n- **Node Example**（documentation）：This is an example of how to use ChromaDB with Node.js. 证据：`clients/js/examples/node/README.md`\n- **ChromaDB Client**（documentation）：Chroma is the open-source data infrastructure for AI. Chroma makes it easy to build LLM apps by making knowledge, facts, and skills pluggable for LLMs. 证据：`clients/js/packages/chromadb-client/README.md`\n- **API**（documentation）：This generator creates TypeScript/JavaScript client that utilizes Fetch API https://fetch.spec.whatwg.org/ . The generated Node module can be used in the following environments: 证据：`clients/js/packages/chromadb-core/src/generated/README.md`\n- **ChromaDB JavaScript Client**（documentation）：Chroma is the open-source data infrastructure for AI. Chroma makes it easy to build LLM apps by making knowledge, facts, and skills pluggable for LLMs. 证据：`clients/js/packages/chromadb/README.md`\n- **@chroma-core/all**（documentation）：All AI embedding providers for Chroma in one package. 证据：`clients/new-js/packages/ai-embeddings/all/README.md`\n- **Chroma Embeddings**（documentation）：This package provides an embedding function for the Qwen model family hosted on Chroma's cloud embedding service. 证据：`clients/new-js/packages/ai-embeddings/chroma-cloud-qwen/README.md`\n- **Chroma Cloud Splade Embeddings**（documentation）：This package provides a sparse embedding function for the Splade model family hosted on Chroma's cloud embedding service. Splade Sparse Lexical and Expansion embeddings are particularly effective for information retrieval tasks, combining the benefits of sparse representations with learned relevance. 证据：`clients/new-js/packages/ai-embeddings/chroma-cloud-splade/README.md`\n- **Cloudflare Workers AI Embedding Provider for Chroma**（documentation）：Cloudflare Workers AI Embedding Provider for Chroma 证据：`clients/new-js/packages/ai-embeddings/cloudflare-worker-ai/README.md`\n- **Cohere Embedding Function for Chroma**（documentation）：Cohere Embedding Function for Chroma 证据：`clients/new-js/packages/ai-embeddings/cohere/README.md`\n- **@chroma-core/ai-embeddings-common**（documentation）：Common utilities and shared functionality for ChromaDB AI embedding packages. 证据：`clients/new-js/packages/ai-embeddings/common/README.md`\n- **Default Embedding Function for Chroma**（documentation）：Default Embedding Function for Chroma 证据：`clients/new-js/packages/ai-embeddings/default-embed/README.md`\n- **Google Gemini Embedding Function for Chroma**（documentation）：Google Gemini Embedding Function for Chroma 证据：`clients/new-js/packages/ai-embeddings/google-gemini/README.md`\n- **Hugging Face Server Embedding Function for Chroma**（documentation）：Hugging Face Server Embedding Function for Chroma 证据：`clients/new-js/packages/ai-embeddings/huggingface-server/README.md`\n- **Jina Embedding Function for Chroma**（documentation）：This package provides a Jina AI embedding provider for Chroma. 证据：`clients/new-js/packages/ai-embeddings/jina/README.md`\n- **Mistral Embedding Function for Chroma**（documentation）：Mistral Embedding Function for Chroma 证据：`clients/new-js/packages/ai-embeddings/mistral/README.md`\n- **@chroma-core/morph**（documentation）：Chroma integration for Morph embedding models. 证据：`clients/new-js/packages/ai-embeddings/morph/README.md`\n- **Ollama Embedding Function for Chroma**（documentation）：Ollama Embedding Function for Chroma 证据：`clients/new-js/packages/ai-embeddings/ollama/README.md`\n- **OpenAI Embedding Function for Chroma**（documentation）：OpenAI Embedding Function for Chroma 证据：`clients/new-js/packages/ai-embeddings/openai/README.md`\n- **Perplexity Embedding Function for Chroma**（documentation）：Perplexity Embedding Function for Chroma 证据：`clients/new-js/packages/ai-embeddings/perplexity/README.md`\n- **Sentence Transformers Embedding Function for Chroma**（documentation）：Sentence Transformers Embedding Function for Chroma 证据：`clients/new-js/packages/ai-embeddings/sentence-transformer/README.md`\n- **Together AI Embedding Function for Chroma**（documentation）：Together AI Embedding Function for Chroma 证据：`clients/new-js/packages/ai-embeddings/together-ai/README.md`\n- **Voyage AI Embedding Function for Chroma**（documentation）：Voyage AI Embedding Function for Chroma 证据：`clients/new-js/packages/ai-embeddings/voyageai/README.md`\n- **chromadb**（documentation）：Chroma is the open-source data infrastructure for AI. Chroma makes it easy to build LLM apps by making knowledge, facts, and skills pluggable for LLMs. 证据：`clients/new-js/packages/chromadb/README.md`\n- **Example setup of the client to connect to your chroma server**（documentation）：Chroma - the open-source data infrastructure for AI . This package is for the Python HTTP client-only library for Chroma. This client connects to the Chroma Server. If that it not what you are looking for, you might want to check out the full library . 证据：`clients/python/README.md`\n- **Authorization**（documentation）：Following are the role mappings where we define roles and the actions they can perform. The actions spaces is taken from the resource actions defined above. 证据：`examples/basic_functionality/authz/README.md`\n- **Chat with your documents**（documentation）：This folder contains a very minimal, self-contained example of how to make an application to chat with your documents, using Chroma and OpenAI's API. It uses the 2022 and 2023 U.S state of the union addresses as example documents. 证据：`examples/chat_with_your_documents/README.md`\n- **AWS EC2 Basic Deployment**（documentation）：This is an example deployment to AWS EC2 Compute using terraform https://www.terraform.io/ . 证据：`examples/deployments/aws-terraform/README.md`\n- **Digital Ocean Droplet Deployment**（documentation）：This is an example deployment using Digital Ocean Droplet using terraform https://www.terraform.io/ . 证据：`examples/deployments/do-terraform/README.md`\n- **Google Cloud Compute Deployment**（documentation）：This is an example deployment to Google Cloud Compute using terraform https://www.terraform.io/ 证据：`examples/deployments/google-cloud-compute/README.md`\n- **Render.com Deployment**（documentation）：This is an example deployment to Render.com using terraform https://www.terraform.io/ 证据：`examples/deployments/render-terraform/README.md`\n- **Chat with your documents**（documentation）：This folder contains a very minimal, self-contained example of how to make an application to chat with your documents, using Chroma and Google Gemini's API. It uses the 2022 and 2023 U.S state of the union addresses as example documents. 证据：`examples/gemini/README.md`\n- **Observability**（documentation）：To run the Chroma with local observability stack OpenTelemetry + Zipkin , run the following command from the root of the repository: 证据：`examples/observability/README.md`\n- **xAI**（documentation）：This folder contains basic examples of using Chroma with the xAI SDK. 证据：`examples/xai/README.md`\n- **Chroma**（documentation）：This crate provides the official Chroma Rust client. Chroma is an open-source AI-native search database that makes it easy to get private, offline, and real-time data that large language models were not trained on into their context. Where the language models provide reasoning, Chroma focuses on search, enabling your application to customize the search methods it needs most. 证据：`rust/chroma/README.md`\n- **Sparse Index Module**（documentation）：The sparse index module implements the Block-Max WAND Weak AND algorithm for efficient sparse vector search. This implementation is built on top of Chroma's blockfile abstraction and provides high-performance top-k retrieval for sparse vectors, commonly used in text search and information retrieval systems. 证据：`rust/index/src/sparse/README.md`\n- **chromadb-js-bindings-darwin-arm64**（documentation）：This is the aarch64-apple-darwin binary for chromadb-js-bindings 证据：`rust/js_bindings/npm/darwin-arm64/README.md`\n- **chromadb-js-bindings-darwin-x64**（documentation）：This is the x86 64-apple-darwin binary for chromadb-js-bindings 证据：`rust/js_bindings/npm/darwin-x64/README.md`\n- **chromadb-js-bindings-linux-arm64-gnu**（documentation）：chromadb-js-bindings-linux-arm64-gnu 证据：`rust/js_bindings/npm/linux-arm64-gnu/README.md`\n- **chromadb-js-bindings-linux-x64-gnu**（documentation）：This is the x86 64-unknown-linux-gnu binary for chromadb-js-bindings 证据：`rust/js_bindings/npm/linux-x64-gnu/README.md`\n- **chromadb-js-bindings-win32-arm64-msvc**（documentation）：chromadb-js-bindings-win32-arm64-msvc 证据：`rust/js_bindings/npm/win32-arm64-msvc/README.md`\n- **chromadb-js-bindings-win32-x64-msvc**（documentation）：chromadb-js-bindings-win32-x64-msvc 证据：`rust/js_bindings/npm/win32-x64-msvc/README.md`\n- **Chroma Metering**（documentation）：This library provides a procedural-macro based implementation of a metering library that is friendly for multi-threaded, asynchronous, and distributed environments. It allows users to define custom metering capabilities and contexts . An capability is globally unique in the scope of the crate into which chroma-metering is imported and represents the property of a context that allows it to react via a handler . An context is a data structure that contains fields. Fields may be mutated by handlers not necessarily 1:1 when a capability is invoked. A context must have at least one field and contexts are expected to be Debug , Any , Send , and Sync . 证据：`rust/metering-macros/README.md`\n- **s3heap-service**（documentation）：The s3heap-service integrates with the function manager to trigger functions at no faster than a particular cadence, with reasonable guarantees that writing data will cause a function to run. 证据：`rust/s3heap-service/README.md`\n- **Spanner Migrations**（documentation）：Schema migrations for Spanner database. 证据：`rust/spanner-migrations/README.md`\n- **Design**（documentation）：wal3 is the write-ahead lightweight logging library. It implements a linearlizable log that is built entirely on top of object storage. It relies upon the atomicity of object storage to provide the If-Match header. This allows us to create a log entirely on top of object storage without any other sources of locking or coordination. 证据：`rust/wal3/README.md`\n- **Readme**（documentation）：This folder houses the Rust code for the query and compactor nodes. It is a standard rust crate managed using cargo. 证据：`rust/worker/README.md`\n- **Generative Benchmarking**（documentation）：This project provides a comprehensive toolkit for generating custom benchmarks and replicating the results outlined in our technical report https://research.trychroma.com/generative-benchmarking . 证据：`sample_apps/generative_benchmarking/README.md`\n- **Movies with Chroma**（documentation）：Movies with Chroma Setup your .env file: Then, run the development server: 证据：`sample_apps/movies/README.md`\n- **Embedding Function Schemas**（documentation）：This directory contains JSON schemas for all embedding functions in Chroma. The purpose of having these schemas is to support cross-language compatibility and to validate that changes in one client library do not accidentally diverge from others. 证据：`schemas/embedding_functions/README.md`\n- 其余 20 条证据见 `AI_CONTEXT_PACK.json` 或 `EVIDENCE_INDEX.json`。\n\n## 宿主 AI 必须遵守的规则\n\n- **把本资产当作开工前上下文，而不是运行环境。**：AI Context Pack 只包含证据化项目理解，不包含目标项目的可执行状态。 证据：`docs/mintlify/AGENTS.md`, `docs/mintlify/README.md`, `docs/scripts/README.md`\n- **回答用户时区分可预览内容与必须安装后才能验证的内容。**：安装前体验的消费者价值来自降低误装和误判，而不是伪装成真实运行。 证据：`docs/mintlify/AGENTS.md`, `docs/mintlify/README.md`, `docs/scripts/README.md`\n\n## 用户开工前应该回答的问题\n\n- 你准备在哪个宿主 AI 或本地环境中使用它？\n- 你只是想先体验工作流，还是准备真实安装？\n- 你最在意的是安装成本、输出质量、还是和现有规则的冲突？\n\n## 验收标准\n\n- 所有能力声明都能回指到 evidence_refs 中的文件路径。\n- AI_CONTEXT_PACK.md 没有把预览包装成真实运行。\n- 用户能在 3 分钟内看懂适合谁、能做什么、如何开始和风险边界。\n\n---\n\n## Doramagic Context Augmentation\n\n下面内容用于强化 Repomix/AI Context Pack 主体。Human Manual 只提供阅读骨架；踩坑日志会被转成宿主 AI 必须遵守的工作约束。\n\n## Human Manual 骨架\n\n使用规则：这里只是项目阅读路线和显著性信号，不是事实权威。具体事实仍必须回到 repo evidence / Claim Graph。\n\n宿主 AI 硬性规则：\n- 不得把页标题、章节顺序、摘要或 importance 当作项目事实证据。\n- 解释 Human Manual 骨架时，必须明确说它只是阅读路线/显著性信号。\n- 能力、安装、兼容性、运行状态和风险判断必须引用 repo evidence、source path 或 Claim Graph。\n\n- **Chroma Overview**：importance `high`\n  - source_paths: README.md, Cargo.toml, pyproject.toml\n- **Getting Started with Chroma**：importance `high`\n  - source_paths: chromadb/__init__.py, chromadb/api/client.py, chromadb/api/models/Collection.py, examples/basic_functionality/start_here.ipynb\n- **System Architecture Overview**：importance `high`\n  - source_paths: rust/frontend/src/server.rs, rust/worker/src/server.rs, rust/sysdb/src/sysdb.rs, rust/types/src/lib.rs, docs/mintlify/reference/architecture/overview.mdx\n- **Protocol Buffers & gRPC API**：importance `medium`\n  - source_paths: idl/chromadb/proto/chroma.proto, idl/chromadb/proto/coordinator.proto, idl/chromadb/proto/query_executor.proto, idl/makefile\n- **Python Client SDK**：importance `high`\n  - source_paths: chromadb/api/client.py, chromadb/api/async_client.py, chromadb/api/models/Collection.py, chromadb/api/types.py, clients/python/pyproject.toml\n- **JavaScript/TypeScript Client SDKs**：importance `medium`\n  - source_paths: clients/js/packages/chromadb-core/src/ChromaClient.ts, clients/new-js/packages/chromadb/src/chroma-client.ts, clients/new-js/packages/chromadb/src/api/sdk.gen.ts, clients/js/packages/chromadb-core/src/Collection.ts\n- **Rust Backend Services Architecture**：importance `high`\n  - source_paths: rust/frontend/src/lib.rs, rust/worker/src/lib.rs, rust/sysdb/src/lib.rs, rust/log-service/src/lib.rs, rust/blockstore/src/lib.rs\n- **Go Coordinator & Distributed Systems**：importance `medium`\n  - source_paths: go/pkg/sysdb/coordinator/coordinator.go, go/pkg/memberlist_manager/memberlist_manager.go, go/pkg/leader/election.go, go/cmd/coordinator/main.go\n\n## Repo Inspection Evidence / 源码检查证据\n\n- repo_clone_verified: true\n- repo_inspection_verified: true\n- repo_commit: `8f76e1bad8f00ec791ec2d8ff4816d0c3477eb4a`\n- inspected_files: `pyproject.toml`, `Dockerfile`, `README.md`, `docker-compose.yml`, `requirements.txt`, `docs/mintlify/AGENTS.md`, `docs/mintlify/CLAUDE.md`, `docs/mintlify/sync.openapi.json`, `docs/mintlify/README.md`, `docs/mintlify/docs.json`, `docs/scripts/generate_python_reference.py`, `docs/scripts/README.md`, `docs/scripts/generate_ts_reference.ts`, `docs/mintlify/reference/rust.mdx`, `docs/mintlify/reference/swift.mdx`, `docs/mintlify/reference/search.mdx`, `docs/mintlify/reference/server-env-vars.mdx`, `docs/mintlify/reference/where-filter.mdx`, `docs/mintlify/reference/overview.mdx`, `docs/mintlify/reference/kotlin.mdx`\n\n宿主 AI 硬性规则：\n- 没有 repo_clone_verified=true 时，不得声称已经读过源码。\n- 没有 repo_inspection_verified=true 时，不得把 README/docs/package 文件判断写成事实。\n- 没有 quick_start_verified=true 时，不得声称 Quick Start 已跑通。\n\n## Doramagic Pitfall Constraints / 踩坑约束\n\n这些规则来自 Doramagic 发现、验证或编译过程中的项目专属坑点。宿主 AI 必须把它们当作工作约束，而不是普通说明文字。\n\n### Constraint 1: 能力判断依赖假设\n\n- Trigger: README/documentation is current enough for a first validation pass.\n- Host AI rule: 将假设转成下游验证清单。\n- Why it matters: 假设不成立时，用户拿不到承诺的能力。\n- Evidence: capability.assumptions | github_repo:546206616 | https://github.com/chroma-core/chroma | README/documentation is current enough for a first validation pass.\n- Hard boundary: 不要把这个坑点包装成已解决、已验证或可忽略，除非后续验证证据明确证明它已经关闭。\n\n### Constraint 2: 维护活跃度未知\n\n- Trigger: 未记录 last_activity_observed。\n- Host AI rule: 补 GitHub 最近 commit、release、issue/PR 响应信号。\n- Why it matters: 新项目、停更项目和活跃项目会被混在一起，推荐信任度下降。\n- Evidence: evidence.maintainer_signals | github_repo:546206616 | https://github.com/chroma-core/chroma | last_activity_observed missing\n- Hard boundary: 不要把这个坑点包装成已解决、已验证或可忽略，除非后续验证证据明确证明它已经关闭。\n\n### Constraint 3: 下游验证发现风险项\n\n- Trigger: no_demo\n- Host AI rule: 进入安全/权限治理复核队列。\n- Why it matters: 下游已经要求复核，不能在页面中弱化。\n- Evidence: downstream_validation.risk_items | github_repo:546206616 | https://github.com/chroma-core/chroma | no_demo; severity=medium\n- Hard boundary: 不要把这个坑点包装成已解决、已验证或可忽略，除非后续验证证据明确证明它已经关闭。\n\n### Constraint 4: 存在评分风险\n\n- Trigger: no_demo\n- Host AI rule: 把风险写入边界卡，并确认是否需要人工复核。\n- Why it matters: 风险会影响是否适合普通用户安装。\n- Evidence: risks.scoring_risks | github_repo:546206616 | https://github.com/chroma-core/chroma | no_demo; severity=medium\n- Hard boundary: 不要把这个坑点包装成已解决、已验证或可忽略，除非后续验证证据明确证明它已经关闭。\n\n### Constraint 5: issue/PR 响应质量未知\n\n- Trigger: issue_or_pr_quality=unknown。\n- Host AI rule: 抽样最近 issue/PR，判断是否长期无人处理。\n- Why it matters: 用户无法判断遇到问题后是否有人维护。\n- Evidence: evidence.maintainer_signals | github_repo:546206616 | https://github.com/chroma-core/chroma | issue_or_pr_quality=unknown\n- Hard boundary: 不要把这个坑点包装成已解决、已验证或可忽略，除非后续验证证据明确证明它已经关闭。\n\n### Constraint 6: 发布节奏不明确\n\n- Trigger: release_recency=unknown。\n- Host AI rule: 确认最近 release/tag 和 README 安装命令是否一致。\n- Why it matters: 安装命令和文档可能落后于代码，用户踩坑概率升高。\n- Evidence: evidence.maintainer_signals | github_repo:546206616 | https://github.com/chroma-core/chroma | release_recency=unknown\n- Hard boundary: 不要把这个坑点包装成已解决、已验证或可忽略，除非后续验证证据明确证明它已经关闭。\n",
      "summary": "给宿主 AI 的上下文和工作边界。",
      "title": "AI Context Pack / 带给我的 AI"
    },
    "boundary_risk_card": {
      "asset_id": "boundary_risk_card",
      "filename": "BOUNDARY_RISK_CARD.md",
      "markdown": "# Boundary & Risk Card / 安装前决策卡\n\n项目：chroma-core/chroma\n\n## Doramagic 试用结论\n\n当前结论：可以进入发布前推荐检查；首次使用仍应从最小权限、临时目录和可回滚配置开始。\n\n## 用户现在可以做\n\n- 可以先阅读 Human Manual，理解项目目的和主要工作流。\n- 可以复制 Prompt Preview 做安装前体验；这只验证交互感，不代表真实运行。\n- 可以把官方 Quick Start 命令放到隔离环境中验证，不要直接进主力环境。\n\n## 现在不要做\n\n- 不要把 Prompt Preview 当成项目实际运行结果。\n- 不要把 metadata-only validation 当成沙箱安装验证。\n- 不要把未验证能力写成“已支持、已跑通、可放心安装”。\n- 不要在首次试用时交出生产数据、私人文件、真实密钥或主力配置目录。\n\n## 安装前检查\n\n- 宿主 AI 是否匹配：local_cli\n- 官方安装入口状态：已发现官方入口\n- 是否在临时目录、临时宿主或容器中验证：必须是\n- 是否能回滚配置改动：必须能\n- 是否需要 API Key、网络访问、读写文件或修改宿主配置：未确认前按高风险处理\n- 是否记录了安装命令、实际输出和失败日志：必须记录\n\n## 当前阻塞项\n\n- 无阻塞项。\n\n## 项目专属踩坑\n\n- 能力判断依赖假设（medium）：假设不成立时，用户拿不到承诺的能力。 建议检查：将假设转成下游验证清单。\n- 维护活跃度未知（medium）：新项目、停更项目和活跃项目会被混在一起，推荐信任度下降。 建议检查：补 GitHub 最近 commit、release、issue/PR 响应信号。\n- 下游验证发现风险项（medium）：下游已经要求复核，不能在页面中弱化。 建议检查：进入安全/权限治理复核队列。\n- 存在评分风险（medium）：风险会影响是否适合普通用户安装。 建议检查：把风险写入边界卡，并确认是否需要人工复核。\n- issue/PR 响应质量未知（low）：用户无法判断遇到问题后是否有人维护。 建议检查：抽样最近 issue/PR，判断是否长期无人处理。\n\n## 风险与权限提示\n\n- no_demo: medium\n\n## 证据缺口\n\n- 暂未发现结构化证据缺口。\n",
      "summary": "安装、权限、验证和推荐前风险。",
      "title": "Boundary & Risk Card / 边界与风险卡"
    },
    "human_manual": {
      "asset_id": "human_manual",
      "filename": "HUMAN_MANUAL.md",
      "markdown": "# https://github.com/chroma-core/chroma 项目说明书\n\n生成时间：2026-05-15 23:02:55 UTC\n\n## 目录\n\n- [Chroma Overview](#chroma-overview)\n- [Getting Started with Chroma](#getting-started)\n- [System Architecture Overview](#architecture-overview)\n- [Protocol Buffers & gRPC API](#protocol-buffers-api)\n- [Python Client SDK](#python-client-sdk)\n- [JavaScript/TypeScript Client SDKs](#javascript-client-sdk)\n- [Rust Backend Services Architecture](#rust-services-architecture)\n- [Go Coordinator & Distributed Systems](#go-coordinator)\n- [Data Storage & Blockstore](#data-storage-blockstore)\n- [Embedding Functions Integration](#embedding-functions)\n\n<a id='chroma-overview'></a>\n\n## Chroma Overview\n\n### 相关页面\n\n相关主题：[Getting Started with Chroma](#getting-started), [System Architecture Overview](#architecture-overview)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [README.md](https://github.com/chroma-core/chroma/blob/main/README.md)\n- [clients/python/README.md](https://github.com/chroma-core/chroma/blob/main/clients/python/README.md)\n- [clients/new-js/packages/chromadb/README.md](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/chromadb/README.md)\n- [rust/types/src/metadata.rs](https://github.com/chroma-core/chroma/blob/main/rust/types/src/metadata.rs)\n- [rust/types/src/api_types.rs](https://github.com/chroma-core/chroma/blob/main/rust/types/src/api_types.rs)\n- [rust/types/src/execution/operator.rs](https://github.com/chroma-core/chroma/blob/main/rust/types/src/execution/operator.rs)\n- [examples/deployments/do-terraform/README.md](https://github.com/chroma-core/chroma/blob/main/examples/deployments/do-terraform/README.md)\n</details>\n\n# Chroma Overview\n\n## Introduction\n\nChroma is an open-source data infrastructure platform designed specifically for AI applications. It provides the foundational building blocks for storing, querying, and managing vector embeddings along with associated metadata, enabling developers to build AI-powered applications with efficient similarity search capabilities. 资料来源：[README.md:1]()\n\nAs an open-source solution, Chroma offers flexibility for self-hosting while also providing a cloud-hosted option called Chroma Cloud, which delivers serverless vector, hybrid, and full-text search capabilities. The platform is designed to be fast, cost-effective, scalable, and straightforward to deploy. 资料来源：[README.md:17-21]()\n\n## Architecture Overview\n\nChroma follows a client-server architecture with multiple client libraries available for different programming environments. The system is built with Rust for core performance-critical components and provides idiomatic client libraries for Python and JavaScript/TypeScript.\n\n```mermaid\ngraph TD\n    A[Client Applications] --> B[Python Client / JS Client]\n    B --> C[Chroma Server API]\n    C --> D[Worker Nodes]\n    D --> E[Blockstore<br/>Arrow Storage]\n    D --> F[Compaction &<br/>Log Processing]\n    E --> G[Persistent Storage]\n    \n    H[Chroma Cloud] -.->|Optional hosted| C\n```\n\n### Client Libraries\n\nChroma provides two primary client libraries:\n\n| Client | Package | Description |\n|--------|---------|-------------|\n| Python | `chromadb` | Full-featured Python client library 资料来源：[clients/python/README.md:1]() |\n| Python HTTP | `chromadb-client` | Lightweight HTTP-only client for server connections 资料来源：[clients/python/README.md:12]() |\n| JavaScript/TypeScript | `chromadb` (npm) | Full-featured JS client for Node.js and browser 资料来源：[clients/new-js/packages/chromadb/README.md:1]() |\n\n#### Python Client Installation\n\n```bash\npip install chromadb  # Full client library\npip install chromadb-client  # HTTP client only\n```\n\n#### JavaScript Client Example\n\n```javascript\nimport { ChromaClient } from \"chromadb\";\n\nconst chroma = new ChromaClient();\nconst collection = await chroma.createCollection({ name: \"test-from-js\" });\n\nfor (let i = 0; i < 20; i++) {\n  await collection.add({\n    ids: [\"test-id-\" + i.toString()],\n    embeddings: [[1, 2, 3, 4, 5]],\n    documents: [\"test\"],\n  });\n}\n\nconst queryData = await collection.query({\n  queryEmbeddings: [[1, 2, 3, 4, 5]],\n  queryTexts: [\"test\"],\n});\n```\n\n资料来源：[clients/new-js/packages/chromadb/README.md:9-27]()\n\n## Data Model\n\n### Collection Structure\n\nCollections in Chroma serve as the primary organizational unit for storing related documents and their associated embeddings. Each collection contains:\n\n- **Documents**: The textual content to be embedded\n- **Embeddings**: Vector representations of documents\n- **Metadatas**: Key-value pairs for filtering and categorization\n- **Unique Identifiers**: User-provided IDs for each record 资料来源：[clients/python/README.md:16-27]()\n\n### Metadata Filtering\n\nChroma supports rich metadata filtering through operators that enable precise data retrieval:\n\n```mermaid\ngraph LR\n    A[Query Request] --> B[Metadata Filter]\n    B --> C{Operator Type}\n    C -->|Contains| D[String contains check]\n    C -->|NotContains| E[String excludes check]\n    C -->|Regex| F[Regular expression match]\n    C -->|NotRegex| G[Regex exclusion]\n```\n\n**Supported Document Operators:**\n\n| Operator | Description | Example |\n|----------|-------------|---------|\n| `Contains` | Document contains substring | `{\"$contains\": \"keyword\"}` |\n| `NotContains` | Document excludes substring | `{\"$not_contains\": \"spam\"}` |\n| `Regex` | Regular expression match | `{\"$regex\": \"^prefix.*\"}` |\n| `NotRegex` | Exclude by regex pattern | `{\"$not_regex\": \".*suffix$\"}` |\n\n资料来源：[rust/types/src/metadata.rs:1-30]()\n\n### Search Keys\n\nThe query system supports specialized keys for accessing different aspects of stored data:\n\n| Key | Description | Usage |\n|-----|-------------|-------|\n| `#document` | Full text content | `Key::Document` |\n| `#embedding` | Vector embeddings | `Key::Embedding` |\n| `#metadata` | Record metadata | `Key::Metadata` |\n| `#score` | Similarity score | `Key::Score` |\n| Custom fields | User-defined metadata | `Key::field(\"field_name\")` |\n\n资料来源：[rust/types/src/execution/operator.rs:1-80]()\n\n## Core Components\n\n### Storage Layer\n\nThe blockstore provides the underlying storage mechanism using Arrow format for efficient columnar data storage and retrieval. This enables high-performance queries across large datasets. 资料来源：[rust/blockstore/src/arrow/root.rs:1]()\n\n### Execution Operators\n\nChroma's query execution pipeline uses operators that transform and filter data through well-defined stages:\n\n```mermaid\ngraph TD\n    A[Query Request] --> B[Log Fetch Orchestrator]\n    B --> C[KNN Filter]\n    C --> D[Apply Logs Orchestrator]\n    D --> E[Segment Writers]\n    E --> F[Compact Collection]\n```\n\n**Key Orchestrators:**\n\n| Component | Purpose |\n|-----------|---------|\n| `LogFetchOrchestrator` | Fetches and materializes log entries 资料来源：[rust/worker/src/execution/orchestration/log_fetch_orchestrator.rs:1]() |\n| `KnnFilter` | Performs k-nearest neighbor filtering 资料来源：[rust/worker/src/execution/orchestration/knn_filter.rs:1]() |\n| `ApplyLogsOrchestrator` | Applies log entries to segment writers 资料来源：[rust/worker/src/execution/orchestration/apply_logs_orchestrator.rs:1]() |\n\n### Error Handling\n\nThe system uses a consistent error code hierarchy for reliable error management:\n\n| Error Code | Description |\n|------------|-------------|\n| `InvalidArgument` | Client-provided invalid parameters |\n| `Internal` | System-level internal errors |\n| `ResourceExhausted` | Resource limits reached (e.g., task abortion) |\n\n资料来源：[rust/blockstore/src/arrow/block/types.rs:1-20]()\n\n## Deployment Options\n\n### Self-Hosting\n\nChroma can be deployed on-premises or in cloud environments using Docker, Kubernetes, or direct installation.\n\n**Deployment Requirements:**\n\n| Component | Specification |\n|-----------|---------------|\n| Storage | Persistent volume for vector data |\n| Network | Port 8000 for API access |\n| Auth | Optional token or basic authentication (v0.4.7+) |\n\n资料来源：[examples/deployments/do-terraform/README.md:1-50]()\n\n**Starting the Server:**\n\n```bash\n# Install via pip\npip install chromadb\n\n# Run in client-server mode\nchroma run --path /chroma_db_path\n```\n\n资料来源：[README.md:14-16]()\n\n### Chroma Cloud\n\nChroma Cloud provides a fully managed hosted service with:\n\n- Serverless vector search\n- Hybrid search capabilities\n- Full-text search integration\n- Automatic scaling\n- $5 free credits for new users\n\n资料来源：[README.md:23-29]()\n\n### Cloud Deployment (Terraform Example)\n\nFor DigitalOcean deployment:\n\n```bash\nexport TF_VAR_do_token=<DIGITALOCEAN_TOKEN>\nexport TF_ssh_public_key=\"./chroma-do.pub\"\nexport TF_ssh_private_key=\"./chroma-do\"\nexport TF_VAR_chroma_release=\"0.4.12\"\nexport TF_VAR_region=\"ams2\"\nexport TF_VAR_public_access=\"true\"\nexport TF_VAR_enable_auth=\"true\"\nexport TF_VAR_auth_type=\"token\"\n\nterraform apply -auto-approve\n```\n\n资料来源：[examples/deployments/do-terraform/README.md:30-45]()\n\n## CLI Tool\n\nThe Rust-based CLI provides command-line management capabilities:\n\n```bash\nchroma run --path <db_path>     # Run the server\nchroma db create <db_name>      # Create database\nchroma db list                  # List databases\nchroma login                    # Authenticate with Chroma Cloud\nchroma profile                  # Manage profiles\nchroma install                  # Install updates\nchroma update                   # Check for updates\n```\n\n资料来源：[rust/cli/src/lib.rs:1-30]()\n\n## Embedding Integration\n\n### Ollama Integration\n\nThe JavaScript client supports Ollama for local embedding generation:\n\n**Configuration Options:**\n\n| Option | Default | Description |\n|--------|---------|-------------|\n| `url` | `http://localhost:11434` | Ollama server URL |\n| `model` | `chroma/all-minilm-l6-v2-f32` | Embedding model |\n\n**Supported Models:**\n\n| Model | Dimensions | Use Case |\n|-------|------------|----------|\n| `chroma/all-minilm-l6-v2-f32` | 384 | General purpose (default) |\n| `nomic-embed-text` | 768 | Extended context |\n| `mxbai-embed-large` | 1024 | High accuracy |\n| `snowflake-arctic-embed` | Variable | Domain-specific |\n\n资料来源：[clients/new-js/packages/ai-embeddings/ollama/README.md:1-40]()\n\n## API Response Format\n\n### Get Response Structure\n\nQuery results are returned with flexible inclusion options:\n\n```rust\npub struct GetResponse {\n    pub ids: Vec<String>,\n    pub embeddings: Option<Vec<Vec<f32>>>,      // Optional\n    pub documents: Option<Vec<Option<String>>>, // Optional\n    pub uris: Option<Vec<Option<String>>>,      // Optional\n    pub metadatas: Option<Vec<Option<Metadata>>>, // Optional\n    pub include: IncludeList,\n}\n```\n\n资料来源：[rust/types/src/api_types.rs:1-30]()\n\n## License\n\nChroma is released under the Apache 2.0 license, making it suitable for both commercial and open-source projects. 资料来源：[README.md:10]()\n\n## Community and Support\n\n| Resource | Link |\n|----------|------|\n| Documentation | https://docs.trychroma.com/ |\n| Discord | https://discord.gg/MMeYNTmh3x |\n| Homepage | https://www.trychroma.com/ |\n\n---\n\n<a id='getting-started'></a>\n\n## Getting Started with Chroma\n\n### 相关页面\n\n相关主题：[Chroma Overview](#chroma-overview), [Python Client SDK](#python-client-sdk)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [clients/python/README.md](https://github.com/chroma-core/chroma/blob/main/clients/python/README.md)\n- [clients/new-js/packages/chromadb/README.md](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/chromadb/README.md)\n- [clients/js/packages/chromadb-client/README.md](https://github.com/chroma-core/chroma/blob/main/clients/js/packages/chromadb-client/README.md)\n- [clients/new-js/packages/ai-embeddings/common/README.md](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/ai-embeddings/common/README.md)\n- [chromadb/utils/embedding_functions/schemas/README.md](https://github.com/chroma-core/chroma/blob/main/chromadb/utils/embedding_functions/schemas/README.md)\n- [README.md](https://github.com/chroma-core/chroma/blob/main/README.md)\n- [rust/chroma/README.md](https://github.com/chroma-core/chroma/blob/main/rust/chroma/README.md)\n</details>\n\n# Getting Started with Chroma\n\nChroma is an open-source data infrastructure for AI that provides vector, hybrid, and full-text search capabilities. It enables developers to build AI applications by storing embeddings, documents, and metadata with efficient querying mechanisms.\n\n## Overview\n\nChroma serves as a vector database optimized for AI workloads. It allows you to:\n\n- Store embeddings alongside documents and metadata\n- Query using text or embedding vectors\n- Filter results based on metadata\n- Work with multiple programming languages including Python and JavaScript\n\n## Installation\n\n### Python Client\n\nInstall the Python client using pip:\n\n```bash\npip install chromadb\n```\n\nFor a lightweight HTTP-only client that connects to a Chroma server:\n\n```bash\npip install chromadb-client\n```\n\n资料来源：[clients/python/README.md](https://github.com/chroma-core/chroma/blob/main/clients/python/README.md)\n\n### JavaScript/TypeScript Client\n\nFor the new JavaScript client:\n\n```bash\nnpm install chromadb\n```\n\nFor a lighter package with optional dependencies:\n\n```bash\nnpm install chromadb-client\n```\n\n资料来源：[clients/new-js/packages/chromadb/README.md](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/chromadb/README.md)\n\n## Basic Setup and Configuration\n\n### Python Client Setup\n\nConnect to a Chroma server running locally:\n\n```python\nimport chromadb\n\nclient = chromadb.HttpClient(host=\"localhost\", port=8000)\n```\n\n资料来源：[clients/python/README.md](https://github.com/chroma-core/chroma/blob/main/clients/python/README.md)\n\n### JavaScript Client Setup\n\n```javascript\nimport { ChromaClient } from \"chromadb\";\n\nconst chroma = new ChromaClient();\nconst collection = await chroma.createCollection({ name: \"test-from-js\" });\n```\n\n资料来源：[clients/new-js/packages/chromadb/README.md](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/chromadb/README.md)\n\n### Running Chroma Server\n\nTo run Chroma in client-server mode:\n\n```bash\nchroma run --path /chroma_db_path\n```\n\n资料来源：[README.md](https://github.com/chroma-core/chroma/blob/main/README.md)\n\n## Core Operations\n\n### Creating a Collection\n\nCollections are containers for your documents, embeddings, and metadata.\n\n```python\ncollection = client.create_collection(\"all-my-documents\")\n```\n\n### Adding Documents\n\nAdd documents with optional embeddings, metadata, and unique IDs:\n\n```python\ncollection.add(\n    documents=[\"This is document1\", \"This is document2\"],\n    metadatas=[{\"source\": \"notion\"}, {\"source\": \"google-docs\"}],\n    ids=[\"doc1\", \"doc2\"],\n    embeddings=[[1.2, 2.1, ...], [1.2, 2.1, ...]]\n)\n```\n\n资料来源：[clients/python/README.md](https://github.com/chroma-core/chroma/blob/main/clients/python/README.md)\n\n### Querying Documents\n\nQuery the collection using text or embeddings:\n\n```python\nresults = collection.query(\n    query_texts=[\"This is a query document\"],\n    n_results=2\n)\n```\n\n```javascript\nconst queryData = await collection.query({\n    queryEmbeddings: [[1, 2, 3, 4, 5]],\n    queryTexts: [\"test\"],\n});\n```\n\n资料来源：[clients/python/README.md](https://github.com/chroma-core/chroma/blob/main/clients/python/README.md) and [clients/new-js/packages/chromadb/README.md](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/chromadb/README.md)\n\n## Embedding Functions\n\nChroma supports various embedding providers through configurable embedding functions.\n\n### Configuration Schema\n\nEmbedding functions use JSON Schema validation to ensure cross-language compatibility:\n\n```python\nfrom chromadb.utils.embedding_functions.schemas import validate_config\n\nconfig = {\n    \"api_key_env_var\": \"CHROMA_OPENAI_API_KEY\",\n    \"model_name\": \"text-embedding-ada-002\"\n}\nvalidate_config(config, \"openai\")\n```\n\nEach schema follows JSON Schema Draft-07 specification and includes version, title, description, properties, required fields, and additionalProperties settings.\n\n资料来源：[chromadb/utils/embedding_functions/schemas/README.md](https://github.com/chroma-core/chroma/blob/main/chromadb/utils/embedding_functions/schemas/README.md)\n\n### Available Embedding Providers\n\n| Provider | Package | API Key Environment Variable |\n|----------|---------|------------------------------|\n| OpenAI | `@chroma-core/openai` | `CHROMA_OPENAI_API_KEY` |\n| Cohere | `@chroma-core/cohere` | `COHERE_API_KEY` |\n| Jina | `@chroma-core/jina` | `JINA_API_KEY` |\n| Google Gemini | `@chroma-core/google-gemini` | `GOOGLE_API_KEY` |\n| Hugging Face | `@chroma-core/hugging-face` | `HF_API_KEY` |\n| Ollama | `@chroma-core/ollama` | `OLLAMA_API_KEY` |\n| Together AI | `@chroma-core/together-ai` | `TOGETHER_API_KEY` |\n| Voyage AI | `@chroma-core/voyageai` | `VOOYAGE_API_KEY` |\n| xAI | `@chroma-core/xai` | `XAI_API_KEY` |\n\n资料来源：[clients/new-js/packages/ai-embeddings/all/README.md](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/ai-embeddings/all/README.md)\n\n### Using Embedding Functions\n\n```typescript\nimport { ChromaClient } from 'chromadb';\nimport { JinaEmbeddingFunction } from '@chroma-core/jina';\n\nconst embedder = new JinaEmbeddingFunction({\n    apiKey: 'your-api-key',\n    modelName: 'jina-embeddings-v2-base-en',\n    task: 'retrieval.passage',\n    dimensions: 768,\n    lateChunking: false,\n    truncate: true,\n    normalized: true,\n    embeddingType: 'float'\n});\n\nconst collection = await client.createCollection({\n    name: 'my-collection',\n    embeddingFunction: embedder,\n});\n```\n\n资料来源：[clients/new-js/packages/ai-embeddings/jina/README.md](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/ai-embeddings/jina/README.md)\n\n### Common Utilities\n\nThe `@chroma-core/ai-embeddings-common` package provides shared utilities:\n\n```typescript\nimport { validateConfigSchema, snakeCase, isBrowser } from '@chroma-core/ai-embeddings-common';\n\n// Convert camelCase to snake_case\nconst snakeCaseConfig = snakeCase({ modelName: 'text-embedding-3-small' });\n// Result: { model_name: 'text-embedding-3-small' }\n\n// Check environment\nif (isBrowser()) {\n    // Browser-specific logic\n}\n```\n\n资料来源：[clients/new-js/packages/ai-embeddings/common/README.md](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/ai-embeddings/common/README.md)\n\n## JavaScript Client Packages\n\n### chromadb vs chromadb-client\n\n| Feature | `chromadb` | `chromadb-client` |\n|---------|------------|-------------------|\n| Package size | Larger | Smaller |\n| Dependencies | Bundled | Optional peer dependencies |\n| Use case | Quick setup | Production with specific providers |\n\nThe `chromadb-client` package is ideal for production environments where you only use specific embedding providers.\n\n资料来源：[clients/js/packages/chromadb-client/README.md](https://github.com/chroma-core/chroma/blob/main/clients/js/packages/chromadb-client/README.md)\n\n## Chroma Cloud\n\nChroma Cloud provides a hosted service for serverless vector, hybrid, and full-text search. To use Chroma Cloud:\n\n1. Sign up at [trychroma.com](https://trychroma.com/signup)\n2. Create a database\n3. Get your API key from the dashboard\n\nConfigure environment variables for cloud access:\n\n```bash\nexport CHROMA_API_KEY=your-api-key\nexport CHROMA_TENANT=your-tenant\nexport CHROMA_DATABASE=your-database\n```\n\n资料来源：[README.md](https://github.com/chroma-core/chroma/blob/main/README.md) and [rust/chroma/README.md](https://github.com/chroma-core/chroma/blob/main/rust/chroma/README.md)\n\n## Environment Variables\n\n| Variable | Description |\n|----------|-------------|\n| `CHROMA_API_KEY` | API key for Chroma Cloud authentication |\n| `CHROMA_TENANT` | Sets the tenant (auto-inferred with API key) |\n| `CHROMA_DATABASE` | Sets the database (auto-inferred with scoped API key) |\n| `[PROVIDER]_API_KEY` | Provider-specific API keys (e.g., `OPENAI_API_KEY`) |\n\nFor local development, you can use:\n\n```rust\nlet client = ChromaHttpClient::from_env()?;\n```\n\n资料来源：[rust/chroma/README.md](https://github.com/chroma-core/chroma/blob/main/rust/chroma/README.md)\n\n## Complete Example Workflow\n\n```mermaid\ngraph TD\n    A[Install Chroma Client] --> B[Initialize Client]\n    B --> C[Create Collection]\n    C --> D[Add Documents with Embeddings]\n    D --> E[Query Collection]\n    E --> F[Get Results]\n    \n    G[Configure Embedding Function] --> D\n    H[Add Metadata] --> D\n    I[Set API Keys] --> B\n```\n\n## Quick Reference Commands\n\n### Installation\n\n```bash\n# Python\npip install chromadb\n\n# JavaScript\nnpm install chromadb\n\n# Start server\nchroma run --path /chroma_db_path\n```\n\n### Basic Operations\n\n| Operation | Python | JavaScript |\n|-----------|--------|------------|\n| Create client | `client = chromadb.HttpClient()` | `new ChromaClient()` |\n| Create collection | `client.create_collection(name)` | `client.createCollection({name})` |\n| Add documents | `collection.add(...)` | `collection.add(...)` |\n| Query | `collection.query(...)` | `collection.query(...)` |\n\n## Additional Resources\n\n- [Documentation](https://docs.trychroma.com/)\n- [Community Discord](https://discord.gg/MMeYNTmh3x)\n- [GitHub Repository](https://github.com/chroma-core/chroma)\n- [Homepage](https://www.trychroma.com/)\n\n---\n\n<a id='architecture-overview'></a>\n\n## System Architecture Overview\n\n### 相关页面\n\n相关主题：[Rust Backend Services Architecture](#rust-services-architecture), [Go Coordinator & Distributed Systems](#go-coordinator), [Protocol Buffers & gRPC API](#protocol-buffers-api)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [rust/frontend/src/server.rs](https://github.com/chroma-core/chroma/blob/main/rust/frontend/src/server.rs)\n- [rust/worker/src/server.rs](https://github.com/chroma-core/chroma/blob/main/rust/worker/src/server.rs)\n- [rust/sysdb/src/sysdb.rs](https://github.com/chroma-core/chroma/blob/main/rust/sysdb/src/sysdb.rs)\n- [rust/types/src/lib.rs](https://github.com/chroma-core/chroma/blob/main/rust/types/src/lib.rs)\n- [docs/mintlify/reference/architecture/overview.mdx](https://github.com/chroma-core/chroma/blob/main/docs/mintlify/reference/architecture/overview.mdx)\n</details>\n\n# System Architecture Overview\n\n## Introduction\n\nChroma is an open-source data infrastructure platform designed for AI applications, providing vector, hybrid, and full-text search capabilities. The system is built as a distributed, scalable architecture that handles embedding storage, indexing, and query execution across multiple components. Chroma positions itself as the open-source alternative to hosted vector database services, enabling developers to deploy sophisticated AI search infrastructure while maintaining full control over their data.\n\nThe architecture follows a modular design pattern with distinct components for API serving, query processing, data storage, and system coordination. Each component is responsible for specific aspects of the data pipeline, from ingestion through indexing to query execution.\n\n## High-Level Architecture\n\nChroma's architecture consists of three primary layers working in concert to provide vector search capabilities:\n\n1. **Frontend Layer** - Handles API requests and response formatting\n2. **Worker Layer** - Executes query operations and manages indexing\n3. **System Database (SysDB) Layer** - Maintains metadata and system state\n\n```mermaid\ngraph TD\n    A[Client Application] --> B[Frontend Server]\n    B --> C[Worker Servers]\n    C --> D[SysDB]\n    C --> E[Blockstore]\n    E --> F[Arrow Files]\n    D --> G[Collection Metadata]\n    G --> H[Topology Information]\n```\n\n## Component Architecture\n\n### Frontend Server\n\nThe frontend server component serves as the API gateway for Chroma, handling incoming HTTP/gRPC requests and translating them into internal operations. The frontend is responsible for request validation, authentication handling, and response serialization.\n\n**Key Responsibilities:**\n\n| Responsibility | Description |\n|----------------|-------------|\n| API Endpoint Handling | Exposes REST and gRPC endpoints for collection operations |\n| Request Validation | Validates incoming query parameters and payload structures |\n| Response Serialization | Converts internal data structures to API response formats |\n| Error Mapping | Translates internal errors to appropriate HTTP status codes |\n\n资料来源：[rust/frontend/src/server.rs:1-50]()\n\nThe frontend server implements the `ChromaError` trait for consistent error handling across the system. Error codes are mapped as follows:\n\n| Internal Error | HTTP Status Code |\n|----------------|------------------|\n| InvalidArgument | 400 Bad Request |\n| NotFound | 404 Not Found |\n| Internal | 500 Internal Server Error |\n| Unavailable | 503 Service Unavailable |\n\n### Worker Server\n\nThe worker server handles the core data operations including embedding storage, indexing, and query execution. Workers are the primary compute units in Chroma's architecture, responsible for processing search requests and maintaining index structures.\n\n资料来源：[rust/worker/src/server.rs:1-60]()\n\n**Worker Components:**\n\n```mermaid\ngraph LR\n    A[Query Request] --> B[Query Planner]\n    B --> C[HNSW Index]\n    B --> D[Spann Index]\n    B --> E[Record Segment]\n    B --> F[Metadata Segment]\n    C --> G[Result Merger]\n    D --> G\n    E --> G\n    F --> G\n    G --> H[Response]\n```\n\nThe worker server implements orchestration components for managing complex operations:\n\n- **ApplyLogsOrchestrator** - Coordinates log application and compaction\n- **WorkQueueClient** - Manages distributed task execution\n- **Segment Writers** - Handles data persistence for different segment types\n\n资料来源：[rust/worker/src/execution/orchestration/apply_logs_orchestrator.rs:1-80]()\n\n### System Database (SysDB)\n\nThe SysDB component maintains all metadata about collections, segments, and system topology. It provides a centralized view of the system's state and enables coordination across multiple workers.\n\n**SysDB Responsibilities:**\n\n| Function | Description |\n|----------|-------------|\n| Collection Metadata | Stores collection configurations and schemas |\n| Segment Registry | Tracks active segments and their locations |\n| Topology Management | Manages provider-region mappings for distributed deployments |\n| Transaction Coordination | Ensures consistency across distributed operations |\n\n资料来源：[rust/sysdb/src/sysdb.rs:1-100]()\n\nThe SysDB uses a provider-region topology model that supports multi-cloud and multi-region deployments:\n\n```rust\npub struct ProviderRegion<T> {\n    name: RegionName,\n    provider: String,      // e.g., \"aws\", \"gcp\"\n    region: String,        // e.g., \"us-east-1\"\n    config: T,             // Provider-specific configuration\n}\n```\n\n资料来源：[rust/types/src/topology.rs:1-60]()\n\n## Data Model Architecture\n\n### Collection Schema\n\nCollections in Chroma follow a flexible schema model that supports multiple index types and data fields.\n\n```mermaid\ngraph TD\n    A[Collection] --> B[Record Segment]\n    A --> C[Metadata Segment]\n    A --> D[Vector Index]\n    A --> E[Sparse Vector Index]\n    D --> F[HNSW Index]\n    D --> G[Spann Index]\n```\n\n**Supported Index Types:**\n\n| Index Type | Purpose | Key Configuration |\n|------------|---------|-------------------|\n| Vector Index | Dense embeddings | `Space` (Cosine, L2, Dot), HNSW params |\n| Sparse Vector Index | BM25-style inverted index | StringInvertedIndexConfig |\n| Spann Index | Memory-efficient approximate search | InternalSpannConfiguration |\n\n资料来源：[rust/types/src/collection_schema.rs:1-150]()\n\n### API Types\n\nThe API layer defines core types for query operations:\n\n| Type | Purpose |\n|------|---------|\n| `Include` | Specifies which fields to return (distances, documents, embeddings, metadatas, uris) |\n| `IncludeList` | Collection of Include values with convenience constructors |\n| `WhereDocumentOperator` | Document filtering (Contains, NotContains, Regex, NotRegex) |\n\n资料来源：[rust/types/src/api_types.rs:1-100]()\n\n```rust\npub enum Include {\n    Distance,\n    Document,\n    Embedding,\n    Metadata,\n    Uri,\n}\n\nimpl IncludeList {\n    pub fn default_query() -> Self {\n        Self(vec![Include::Document, Include::Metadata, Include::Distance])\n    }\n    pub fn all() -> Self {\n        Self(vec![Include::Document, Include::Metadata, Include::Distance, Include::Embedding, Include::Uri])\n    }\n}\n```\n\n### Metadata Filtering\n\nChroma supports rich metadata filtering through the `MetadataExpression` and `MetadataComparison` types:\n\n```mermaid\ngraph TD\n    A[MetadataExpression] --> B[key: String]\n    A --> C[comparison: MetadataComparison]\n    C --> D[Primitive: Operator + Value]\n    C --> E[Set: Operator + SetValue]\n```\n\n资料来源：[rust/types/src/metadata.rs:1-80]()\n\n## Blockstore Architecture\n\nThe blockstore provides persistent storage for indexed data using Apache Arrow format for efficient serialization and querying.\n\n### Arrow Block Structure\n\n```mermaid\ngraph LR\n    A[Write Operation] --> B[Block Delta]\n    B --> C[Commit to Block]\n    C --> D[Arrow IPC Format]\n    D --> E[Disk Storage]\n    E --> F[BlockfileReader]\n```\n\n**Block Types:**\n\n| Block Type | Description |\n|------------|-------------|\n| `OrderedBlockDelta` | Sequential writes with ordering guarantees |\n| `UnorderedBlockDelta` | High-throughput writes without ordering |\n| `DirectoryBlock` | Sparse posting directory entries |\n\n资料来源：[rust/blockstore/src/arrow/block/types.rs:1-100]()\n\nThe Arrow layout verification ensures data integrity:\n\n```rust\npub enum ArrowLayoutVerificationError {\n    BufferLengthNotAligned,\n    NoRecordBatches,\n    MultipleRecordBatches,\n    InvalidMessageType,\n    RecordBatchDecodeError,\n}\n```\n\n### Sparse Posting Blocks\n\nSparse vectors use a specialized block format for efficient storage:\n\n```\nbody = [ max_offset: u32 LE, max_weight: f32 LE ] × num_entries\n```\n\nThe `DirectoryBlock` stores per-posting-block metadata for term pruning:\n\n- `max_offset`: Largest document offset in the posting block\n- `max_weight`: Largest weight in the posting block\n\n资料来源：[rust/types/src/sparse_posting_block.rs:1-60]()\n\n## Spann Index Architecture\n\nSpann is Chroma's memory-efficient approximate nearest neighbor index that combines HNSW with posting lists.\n\n```mermaid\ngraph TD\n    A[SpannIndexWriter] --> B[HNSW Index]\n    A --> C[Posting Lists]\n    A --> D[Versions Map]\n    A --> E[MaxHeadID Blockfile]\n    B --> F[Reader with adaptive search]\n```\n\n**SpannIndexReader Structure:**\n\n| Component | Type | Purpose |\n|-----------|------|---------|\n| posting_lists | BlockfileReader<u32, SpannPostingList> | Term postings |\n| hnsw_index | HnswIndexRef | Graph-based search |\n| versions_map | BlockfileReader<u32, u32> | Version tracking |\n| dimensionality | usize | Vector dimension |\n| adaptive_search_nprobe | bool | Adaptive parameter |\n\n资料来源：[rust/index/src/spann/types.rs:1-80]()\n\n## Indexing Pipeline\n\nThe indexing pipeline handles document ingestion through the following stages:\n\n```mermaid\ngraph LR\n    A[Add Records] --> B[ApplyLogsOrchestrator]\n    B --> C[Record Segment Writer]\n    B --> D[Metadata Segment Writer]\n    B --> E[Vector Index Writer]\n    C --> F[Flush to Blockstore]\n    D --> F\n    E --> F\n    F --> G[Collection Update]\n```\n\n**Error Handling:**\n\nThe orchestrator implements comprehensive error tracking:\n\n| Error Type | Error Code | Tracing |\n|------------|------------|---------|\n| ApplyLog | Internal | Yes |\n| Channel | Internal | Yes |\n| Commit | Internal | Yes |\n| HnswSegment | Internal | Yes |\n| MetadataSegment | Internal | Yes |\n| Seal | Internal | Yes |\n| InvariantViolation | - | Always |\n\n资料来源：[rust/worker/src/execution/orchestration/apply_logs_orchestrator.rs:1-100]()\n\n## Query Execution Flow\n\n### Query Request Processing\n\n```mermaid\ngraph TD\n    A[Query Request] --> B[Parse Query]\n    B --> C[Load Segments]\n    C --> D[Parallel Segment Queries]\n    D --> E{HNSW Search}\n    D --> F{Spann Search}\n    D --> G{Record Scan}\n    E --> H[Merge Results]\n    F --> H\n    G --> H\n    H --> I[Apply Filters]\n    I --> J[Return Results]\n```\n\n### Work Queue Integration\n\nDistributed query execution uses a work queue system for task coordination:\n\n```mermaid\ngraph TD\n    A[Coordinator] --> B[WorkQueueClient]\n    B --> C[gRPC Channel]\n    C --> D[Worker Pool]\n    D --> E[Task Execution]\n    E --> F[Result Collection]\n```\n\n**Error Code Mapping:**\n\n| gRPC Code | Chroma Error Code |\n|-----------|-------------------|\n| Unavailable | Unavailable |\n| DeadlineExceeded | DeadlineExceeded |\n| ResourceExhausted | ResourceExhausted |\n| NotFound | NotFound |\n| InvalidArgument | InvalidArgument |\n\n资料来源：[rust/worker/src/work_queue/work_queue_client.rs:1-80]()\n\n## Deployment Topology\n\nChroma supports flexible deployment configurations through its topology model:\n\n```mermaid\ngraph TD\n    A[Topology] --> B[TopologyName]\n    A --> C[Vec<RegionName>]\n    A --> D[Config T]\n    C --> E[ProviderRegion]\n    E --> F[Provider]\n    E --> G[Region]\n```\n\nThe topology system enables:\n\n- Multi-cloud deployments (AWS, GCP, Azure)\n- Region-specific configurations\n- Custom provider extensions\n\n## Summary\n\nChroma's architecture provides a scalable foundation for AI-powered search with several key design principles:\n\n1. **Separation of Concerns** - Frontend, worker, and SysDB components handle distinct responsibilities\n2. **Arrow-Based Storage** - Efficient columnar storage for analytical queries\n3. **Flexible Indexing** - Support for HNSW, Spann, and sparse vector indexes\n4. **Distributed Coordination** - Work queues and topology management for multi-node deployments\n5. **Comprehensive Error Handling** - Consistent error codes and tracing across all components\n\nThe modular architecture allows Chroma to scale from single-node development deployments to distributed production clusters serving AI applications at scale.\n\n---\n\n<a id='protocol-buffers-api'></a>\n\n## Protocol Buffers & gRPC API\n\n### 相关页面\n\n相关主题：[System Architecture Overview](#architecture-overview), [Rust Backend Services Architecture](#rust-services-architecture)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [rust/types/src/record.rs](https://github.com/chroma-core/chroma/blob/main/rust/types/src/record.rs)\n- [rust/types/src/metadata.rs](https://github.com/chroma-core/chroma/blob/main/rust/types/src/metadata.rs)\n- [rust/types/src/collection_schema.rs](https://github.com/chroma-core/chroma/blob/main/rust/types/src/collection_schema.rs)\n- [rust/types/src/topology.rs](https://github.com/chroma-core/chroma/blob/main/rust/types/src/topology.rs)\n- [clients/js/packages/chromadb-core/src/generated/api.ts](https://github.com/chroma-core/chroma/blob/main/clients/js/packages/chromadb-core/src/generated/api.ts)\n- [go/README.md](https://github.com/chroma-core/chroma/blob/main/go/README.md)\n- [rust/blockstore/src/arrow/root.rs](https://github.com/chroma-core/chroma/blob/main/rust/blockstore/src/arrow/root.rs)\n</details>\n\n# Protocol Buffers & gRPC API\n\nChroma uses Protocol Buffers (protobuf) as the core serialization format for inter-service communication and data persistence. The IDL (Interface Definition Language) files in the `idl/` directory define the service APIs, data structures, and message types that power Chroma's distributed architecture.\n\n## Architecture Overview\n\nChroma employs a client-server architecture where Protocol Buffers serve as the contract between components. The protobuf definitions are centralized in the `idl/` directory and used to generate code for multiple language runtimes including Python, JavaScript, Go, and Rust.\n\n```mermaid\ngraph TD\n    subgraph \"Client Layer\"\n        JS[JavaScript Client]\n        PY[Python Client]\n        GO[Go Client]\n    end\n    \n    subgraph \"IDL Definitions\"\n        PROTO[Protocol Buffer Definitions]\n    end\n    \n    subgraph \"Server Layer\"\n        API[API Server]\n        COORD[Coordinator Service]\n        QUERY[Query Executor]\n    end\n    \n    JS -->|Generated TS Bindings| PROTO\n    PY -->|Generated Python Stub| PROTO\n    GO -->|Generated Go Code| PROTO\n    API -->|gRPC/prost| PROTO\n    COORD -->|gRPC/prost| PROTO\n    QUERY -->|gRPC/prost| PROTO\n```\n\n## Proto Definitions Structure\n\n### Core Service Definitions\n\nThe main protobuf definitions are organized in `idl/chromadb/proto/`:\n\n| Proto File | Purpose | Key Messages |\n|------------|---------|--------------|\n| `chroma.proto` | Core data types and collection operations | Collection, Database, OperationRecord |\n| `coordinator.proto` | Coordinator service for cluster management | Tenant, Database, Segment operations |\n| `query_executor.proto` | Query execution service interface | Query requests and responses |\n\n### Data Type Coverage\n\nThe protobuf definitions cover all core data types used throughout Chroma:\n\n| Data Type | Usage |\n|-----------|-------|\n| `Vector` | Embedding vectors with scalar encoding |\n| `OperationRecord` | CRUD operations for records |\n| `LogRecord` | Write-ahead log entries with offsets |\n| `Metadata` | Key-value metadata for filtering |\n| `Collection` | Collection configuration and schema |\n| `Cmek` | Customer-managed encryption keys |\n\n## Rust Type Conversions\n\nChroma's Rust backend uses protobuf-generated types and converts them to idiomatic Rust types through `TryFrom` implementations. This pattern ensures type safety and clean separation between the wire format and internal representations.\n\n### Record Conversions\n\nThe `rust/types/src/record.rs` file contains conversion logic between protobuf and Rust types:\n\n```mermaid\ngraph LR\n    A[chroma_proto::LogRecord] -->|TryFrom| B[LogRecord Rust]\n    A2[chroma_proto::Vector] -->|TryFrom| B2[(Vec<f32>, ScalarEncoding)]\n```\n\n**OperationRecord Conversion** (资料来源：[rust/types/src/record.rs:recordinfo]())\n\nThe `OperationRecord` conversion extracts metadata and document fields from protobuf representations:\n\n```rust\n// Metadata is extracted from proto, with document potentially in metadata\nlet (metadata, document) = match operation_record_proto.metadata {\n    Some(proto_metadata) => match UpdateMetadata::try_from(proto_metadata) {\n        Ok(mut metadata) => {\n            let document = metadata.remove(CHROMA_DOCUMENT_KEY);\n            match document {\n                Some(UpdateMetadataValue::Str(document)) => {\n                    (Some(metadata), Some(document))\n                }\n                _ => (Some(metadata), None),\n            }\n        }\n        Err(e) => return Err(RecordConversionError::...),\n    },\n    None => (None, None),\n};\n```\n\n### Vector Type Conversions\n\nVectors are stored with their encoding information (资料来源：[rust/types/src/record.rs:vector]())\n\n```rust\nimpl TryFrom<chroma_proto::Vector> for (Vec<f32>, ScalarEncoding) {\n    type Error = VectorConversionError;\n    // Conversion implementation\n}\n```\n\n## Metadata Filtering Types\n\nThe metadata system supports rich filtering expressions defined in protobuf and converted to Rust types (资料来源：[rust/types/src/metadata.rs:metadata-types]())\n\n### Document Operators\n\n```mermaid\ngraph TD\n    DOC_OPS[WhereDocumentOperator] --> Contains\n    DOC_OPS --> NotContains\n    DOC_OPS --> Regex\n    DOC_OPS --> NotRegex\n```\n\n| Operator | Description |\n|----------|-------------|\n| `Contains` | Document contains substring |\n| `NotContains` | Document does not contain substring |\n| `Regex` | Document matches regex pattern |\n| `NotRegex` | Document does not match regex pattern |\n\n### Metadata Expression Structure\n\n```rust\npub struct MetadataExpression {\n    pub key: String,\n    pub comparison: MetadataComparison,\n}\n```\n\nMetadata comparisons support both primitive types (strings, integers, floats, booleans) and set operations.\n\n## Collection Schema Definitions\n\nSchema definitions in `rust/types/src/collection_schema.rs` define how collections are configured for indexing (资料来源：[rust/types/src/collection_schema.rs:schema-struct]())\n\n### Schema Builder Pattern\n\nThe `Schema` struct provides a fluent builder API for index configuration:\n\n```mermaid\ngraph TD\n    SCHEMA[Schema::default] --> CREATE_INDEX[.create_index]\n    CREATE_INDEX --> VALIDATE[Validate Index Config]\n    VALIDATE -->|Valid| RETURN[Return Self]\n    VALIDATE -->|Invalid| ERROR[SchemaBuilderError]\n```\n\n**Index Creation Example** (资料来源：[rust/types/src/collection_schema.rs:create-index-example]())\n\n```rust\nlet schema = Schema::default()\n    .create_index(None, VectorIndexConfig {\n        space: Some(Space::Cosine),\n        embedding_function: None,\n        source_key: None,\n        hnsw: None,\n        spann: None,\n    }.into())?\n    .create_index(Some(\"category\"), StringInvertedIndexConfig {}.into())?;\n```\n\n### Supported Index Types\n\n| Index Type | Configuration | Applies To |\n|------------|---------------|------------|\n| `VectorIndexConfig` | HNSW, Space (Cosine/L2/IP), embedding function | `#embedding` key only |\n| `StringInvertedIndexConfig` | String indexing | Custom string keys |\n| `FtsIndexConfig` | Full-text search | Document key |\n\n## CMEK (Customer-Managed Encryption Keys)\n\nChroma supports customer-managed encryption keys through the `Cmek` type defined in protobuf (资料来源：[rust/types/src/collection_schema.rs:cmek]())\n\n### CMEK Provider Configuration\n\n| Provider | Validation Pattern | Resource Format |\n|----------|-------------------|-----------------|\n| GCP | `CMEK_GCP_RE` regex | GCP resource identifier |\n\n```rust\nimpl Cmek {\n    pub fn gcp(resource: String) -> Self;\n    pub fn validate_pattern(&self) -> bool;\n}\n```\n\n## Topology and Region Management\n\nFor multi-region deployments, Chroma uses topology definitions (资料来源：[rust/types/src/topology.rs:topology]())\n\n### Provider Region Structure\n\n```mermaid\nclassDiagram\n    class ProviderRegion {\n        +name: RegionName\n        +provider: String\n        +region: String\n        +config: T\n    }\n    \n    class Topology {\n        +name: TopologyName\n        +regions: Vec~RegionName~\n        +config: T\n    }\n```\n\n| Component | Description |\n|-----------|-------------|\n| `ProviderRegion` | Single cloud provider region configuration |\n| `Topology` | Collection of regions forming a deployment topology |\n\n## Code Generation Pipeline\n\n### Build Process\n\nProtobuf definitions are compiled to target languages using `protoc` and language-specific plugins (资料来源：[go/README.md:protobuf-setup]())\n\n```mermaid\ngraph LR\n    A[.proto files] --> B[protoc compiler]\n    B -->|Python| C[Python stubs]\n    B -->|Go| D[Go gRPC code]\n    B -->|JS/TS| E[TypeScript definitions]\n    B -->|Rust| F[Rust + prost]\n```\n\n### Required Tools\n\n| Tool | Purpose |\n|------|---------|\n| `protoc` | Protocol Buffer compiler |\n| `protoc-gen-go` | Go code generation |\n| `protoc-gen-go-grpc` | Go gRPC service generation |\n\n### Generated API Patterns\n\nThe generated TypeScript API in `clients/js/packages/chromadb-core/src/generated/api.ts` follows standard gRPC-web patterns (资料来源：[clients/js/packages/chromadb-core/src/generated/api.ts:fetch-pattern]())\n\n```typescript\nconst localVarFetchArgs = ApiApiFetchParamCreator(configuration).version(options);\nreturn (fetch: FetchAPI = defaultFetch, basePath: string = BASE_PATH) => {\n    return fetch(\n        basePath + localVarFetchArgs.url,\n        localVarFetchArgs.options,\n    ).then((response) => {\n        // Handle response by content type and status\n        if (response.status === 200) {\n            if (mimeType === \"application/json\") {\n                return response.json();\n            }\n        }\n        // Error handling for 401, 404, 409, 500\n    });\n};\n```\n\n### Error Code Mapping\n\nError types are mapped from Rust/Arrow errors to Chroma error codes (资料来源：[rust/blockstore/src/arrow/root.rs:error-mapping]())\n\n| Arrow Error Type | Chroma Error Code |\n|-----------------|-------------------|\n| `IOError` | `Internal` |\n| `ArrowError` | `Internal` |\n| `LayoutVerificationError` | `Internal` |\n| `FromBytesError` variants | `InvalidArgument` / `Internal` |\n\n## Message Format Details\n\n### Arrow Block Serialization\n\nBinary data in protobuf messages uses Arrow IPC format for efficient columnar storage (资料来源：[rust/blockstore/src/arrow/root.rs:arrow-reader]())\n\n```rust\nlet arrow_reader = arrow::ipc::reader::FileReader::try_new(&mut cursor, None);\nlet record_batch = match arrow_reader {\n    Ok(mut reader) => match reader.next() {\n        Some(Ok(batch)) => batch,\n        Some(Err(e)) => return Err(FromBytesError::ArrowError(e)),\n        None => return Err(FromBytesError::NoDataError),\n    },\n    Err(e) => return Err(FromBytesError::ArrowError(e)),\n};\n```\n\n### IPC Footer Structure\n\nThe Arrow footer format requires:\n- ARROW_MAGIC header (6 bytes)\n- Footer content\n- Footer length (4 bytes)\n- Footer checksum\n\n## See Also\n\n- [Rust Types Module](rust/types/src/) - Internal Rust type definitions\n- [Block Store Architecture](rust/blockstore/) - Data persistence with Arrow\n- [Client SDKs](clients/) - Multi-language client implementations\n- [Go Server Implementation](go/) - Server-side gRPC implementation\n\n---\n\n<a id='python-client-sdk'></a>\n\n## Python Client SDK\n\n### 相关页面\n\n相关主题：[Getting Started with Chroma](#getting-started), [JavaScript/TypeScript Client SDKs](#javascript-client-sdk), [Embedding Functions Integration](#embedding-functions)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [chromadb/api/client.py](https://github.com/chroma-core/chroma/blob/main/chromadb/api/client.py)\n- [chromadb/api/async_client.py](https://github.com/chroma-core/chroma/blob/main/chromadb/api/async_client.py)\n- [chromadb/api/models/Collection.py](https://github.com/chroma-core/chroma/blob/main/chromadb/api/models/Collection.py)\n- [chromadb/api/types.py](https://github.com/chroma-core/chroma/blob/main/chromadb/api/types.py)\n- [clients/python/pyproject.toml](https://github.com/chroma-core/chroma/blob/main/clients/python/pyproject.toml)\n</details>\n\n# Python Client SDK\n\nThe Chroma Python Client SDK is the official Python library for interacting with Chroma, an open-source vector database designed for AI applications. This SDK provides a complete interface for managing collections, storing embeddings, and performing similarity searches across vector data.\n\n## Overview\n\nChroma positions itself as the open-source data infrastructure for AI, offering developers a streamlined way to incorporate vector search capabilities into their applications. The Python Client SDK serves as the primary client library for Python developers, enabling seamless integration with Chroma's vector database capabilities.\n\nThe SDK supports two primary modes of operation: **embedded mode**, where the database runs locally within the same process, and **client-server mode**, where the Python client communicates with a remote Chroma server via HTTP. This flexibility allows developers to choose the deployment architecture that best fits their application requirements, whether they need a lightweight local setup for development and testing or a scalable server-based deployment for production environments.\n\nFor Python-specific installations, developers can choose between the full `chromadb` package, which includes all embedding libraries as dependencies, or the `chromadb-client` package, which is a lightweight HTTP-only client that connects to a running Chroma server. The installation is straightforward via pip, making it accessible for projects of all sizes.\n\nThe SDK is designed with developer productivity in mind, providing intuitive APIs for common operations like adding documents, querying collections, and managing metadata. It handles the complexity of embedding generation and vector storage behind a clean, Pythonic interface, allowing developers to focus on building their AI applications rather than managing low-level database operations.\n\n## Architecture\n\nThe Python Client SDK follows a layered architecture that separates concerns between the client interface, API communication, and data models. Understanding this architecture helps developers effectively use the SDK and troubleshoot any issues that may arise during development.\n\n```mermaid\ngraph TD\n    A[Application Code] --> B[ChromaClient / AsyncChromaClient]\n    B --> C[Collection API]\n    B --> D[Embedding Functions]\n    C --> E[REST API Layer]\n    D --> F[External Embedding Providers]\n    E --> G[Chroma Server]\n    E --> H[Embedded Mode]\n    G --> I[Persistent Storage]\n    H --> I\n```\n\n### Client Layer\n\nThe client layer forms the entry point for all SDK operations. Chroma provides two client implementations: the synchronous `Client` class for traditional Python applications and the `AsyncClient` class for asynchronous applications built with async/await patterns.\n\nThe synchronous client is suitable for most use cases, providing blocking API calls that execute immediately and return results. This approach is familiar to developers coming from traditional Python backgrounds and works well in scripts, batch processing jobs, and web applications that don't require high concurrency.\n\nThe asynchronous client, on the other hand, is designed for applications that need to handle many concurrent operations efficiently, such as web servers built on frameworks like FastAPI or Starlette. By using Python's asyncio library, the async client can perform multiple network operations concurrently, improving throughput in I/O-bound scenarios.\n\nBoth clients share a similar interface, with the async client simply wrapping the underlying HTTP calls with async/await syntax. This consistency makes it easy to switch between synchronous and asynchronous code as requirements evolve.\n\n### Collection Management\n\nCollections serve as the primary organizational unit in Chroma, analogous to tables in traditional relational databases or buckets in object storage. Each collection contains a set of vectors along with their associated metadata, documents, and unique identifiers.\n\nThe SDK provides a comprehensive collection API that supports creating new collections, retrieving existing ones, listing all collections in the database, and deleting collections when they're no longer needed. Collections can be configured with specific settings at creation time, including the embedding function to use for auto-embedding documents and the name of the collection for identification purposes.\n\nCollections maintain a schema-like structure through their use of metadata. While Chroma is schemaless in the traditional sense, the metadata associated with vectors allows developers to impose structure on their data for filtering and organization purposes.\n\n### Data Model\n\nThe data model in Chroma revolves around four core concepts: vectors, documents, metadata, and IDs. Each record in a collection consists of these four components, providing a flexible yet structured way to store and retrieve information.\n\nVectors are the mathematical representations of data in embedding space. They can be provided directly by the application or generated automatically using embedding functions. The SDK accepts vectors as lists of floating-point numbers, making it compatible with output from virtually any embedding model.\n\nDocuments are the original text or content that was transformed into vectors. Storing documents alongside their vectors enables applications to retrieve the original content during query operations without needing to maintain a separate document store.\n\nMetadata provides contextual information about each record. Examples include the source of the document, timestamps, user IDs, or any other application-specific attributes. Metadata can be used for filtering during queries, allowing applications to narrow search results based on specific criteria.\n\nIDs uniquely identify each record within a collection. The SDK accepts string identifiers, giving applications flexibility in how they choose to name and reference their data. Common patterns include using UUIDs, meaningful string identifiers derived from the document content, or sequential numbers.\n\n## Installation and Setup\n\nInstalling the Chroma Python Client SDK is straightforward using pip, Python's package manager. The SDK is available in two variants to accommodate different use cases and deployment scenarios.\n\n```bash\npip install chromadb\n```\n\nThis command installs the full Chroma package, which includes all core functionality plus built-in support for various embedding providers. This variant is recommended for most users who want a complete, self-contained installation.\n\n```bash\npip install chromadb-client\n```\n\nThis command installs only the HTTP client library, which is useful for scenarios where the Chroma server runs separately or where a minimal dependency footprint is required. This variant connects to Chroma servers via HTTP and doesn't include embedding provider libraries.\n\n## Client Initialization\n\nInitializing the Chroma client depends on the deployment mode and desired configuration. The SDK provides flexible initialization options to accommodate different environments.\n\n### Embedded Mode\n\nIn embedded mode, Chroma runs entirely within your Python process, storing data locally. This is ideal for development, testing, and small-scale deployments where a separate server isn't required.\n\n```python\nimport chromadb\n\nclient = chromadb.Client()\n```\n\nThe embedded client automatically creates a local database directory and manages all data storage internally. Data persists across process restarts, making it suitable for applications that need persistent storage without the complexity of a separate server process.\n\n### Client-Server Mode\n\nIn client-server mode, your Python application connects to a Chroma server running separately, either locally or on a remote machine. This architecture supports larger-scale deployments and enables sharing data across multiple client applications.\n\n```python\nimport chromadb\n\nclient = chromadb.HttpClient(\n    host=\"localhost\",\n    port=8000\n)\n```\n\nThe HTTP client communicates with the server using REST API calls, handling serialization, network transport, and error handling transparently. This mode requires a Chroma server to be running and accessible at the specified host and port.\n\n### Configuration Options\n\nThe client supports various configuration options to customize its behavior for specific use cases. These options can be provided during client initialization to control aspects like SSL/TLS settings, authentication, and connection pooling.\n\n| Option | Type | Default | Description |\n|--------|------|---------|-------------|\n| `host` | string | \"localhost\" | Server hostname or IP address |\n| `port` | integer | 8000 | Server port number |\n| `ssl` | boolean | false | Enable SSL/TLS encryption |\n| `headers` | dict | None | Custom HTTP headers for requests |\n| `tenant` | string | None | Tenant identifier for multi-tenant setups |\n| `database` | string | None | Database name for organized data storage |\n\n## Collection Operations\n\nCollections are the central organizing structure in Chroma, grouping related vectors, documents, and metadata together. The SDK provides a comprehensive API for creating, managing, and interacting with collections.\n\n### Creating a Collection\n\nCollections are created using the client's `create_collection` method, which accepts a name and optional configuration parameters.\n\n```python\ncollection = client.create_collection(\n    name=\"my-documents\",\n    metadata={\"description\": \"Document collection for RAG\"},\n    get_or_create=True\n)\n```\n\nThe `get_or_create` parameter is particularly useful in production applications, as it prevents errors if a collection with the same name already exists. When set to `True`, the method returns the existing collection if one exists or creates a new one if it doesn't.\n\n### Adding Data\n\nData is added to collections using the `add` method, which accepts vectors, documents, metadata, and unique identifiers. All parameters must be provided as lists of equal length, with each index representing a single record.\n\n```python\ncollection.add(\n    documents=[\"This is the first document\", \"This is the second document\"],\n    metadatas=[{\"source\": \"notion\"}, {\"source\": \"google-docs\"}],\n    ids=[\"doc-1\", \"doc-2\"],\n    embeddings=[[1.2, 2.1, 3.5], [1.1, 2.0, 3.4]]\n)\n```\n\nThe SDK supports automatic embedding generation when embedding functions are configured for the collection. In this case, documents can be provided without explicit embeddings, and the SDK will generate the vector representations automatically.\n\n### Querying Data\n\nQuerying is performed using the `query` method, which accepts query text or query vectors and returns the most similar results based on vector similarity.\n\n```python\nresults = collection.query(\n    query_texts=[\"search terms here\"],\n    n_results=2,\n    where={\"source\": \"notion\"},\n    include=[\"documents\", \"metadatas\", \"distances\"]\n)\n```\n\nThe `where` parameter enables filtering results based on metadata conditions, allowing applications to narrow search results to specific subsets of data. The `include` parameter controls which data components are returned, helping optimize bandwidth and processing for applications that don't need all available information.\n\nQuery results include the matched document IDs, the documents themselves, associated metadata, and distance scores indicating how similar each result is to the query. Lower distance scores indicate higher similarity, with zero representing an exact match.\n\n### Updating and Deleting Data\n\nThe SDK supports updating existing records and deleting unwanted data from collections. These operations are essential for maintaining data accuracy and managing collection lifecycle.\n\n```python\ncollection.update(\n    ids=[\"doc-1\"],\n    documents=[\"Updated document content\"],\n    metadatas=[{\"source\": \"notion\", \"updated\": True}]\n)\n\ncollection.delete(\n    ids=[\"doc-2\"],\n    where={\"source\": \"google-docs\"}\n)\n```\n\nUpdate operations modify existing records identified by their IDs, replacing the specified fields while preserving unchanged data. Delete operations remove records matching the provided ID or metadata filters, with the ability to delete multiple records simultaneously.\n\n## Querying and Filtering\n\nChroma provides powerful querying and filtering capabilities that enable precise retrieval of relevant results. Understanding these capabilities is essential for building effective vector search applications.\n\n### Vector Similarity Search\n\nThe core query operation performs vector similarity search, finding the most similar records to a given query vector or text. The SDK handles text queries by first embedding them using the collection's configured embedding function.\n\nResults are ranked by similarity, with the most similar results appearing first. The `n_results` parameter controls how many results are returned, allowing applications to balance result completeness with performance considerations.\n\n### Metadata Filtering\n\nMetadata filtering narrows search results based on document attributes stored alongside vectors. This is particularly useful for applications that need to search within specific subsets of data, such as documents from a particular source or within a date range.\n\n```python\nresults = collection.query(\n    query_texts=[\"search terms\"],\n    where={\n        \"source\": \"notion\",\n        \"category\": {\"$in\": [\"technical\", \"documentation\"]}\n    }\n)\n```\n\nThe filter syntax supports various operators including equality, inequality, comparison operators for numeric ranges, and set membership tests. Complex filter expressions can be constructed using logical operators to combine multiple conditions.\n\n### Result Inclusion\n\nThe `include` parameter controls which data components are included in query results. This allows applications to optimize their queries by requesting only the data they need.\n\n| Include Option | Description |\n|---------------|-------------|\n| `embeddings` | Include the full vector for each result |\n| `documents` | Include the original document text |\n| `metadatas` | Include the associated metadata |\n| `distances` | Include similarity distance scores |\n\nBy default, only documents and distances are included in results. Applications should specify only the needed components to minimize bandwidth usage and processing overhead.\n\n## Embedding Functions\n\nEmbedding functions transform text into vector representations that capture semantic meaning. Chroma supports multiple embedding providers, allowing applications to choose the approach that best fits their requirements.\n\n### Built-in Embeddings\n\nFor simple use cases, Chroma includes a default embedding function that works out of the box without additional configuration. This function is suitable for development and testing but may not provide the best quality embeddings for production applications.\n\n### External Providers\n\nFor production applications requiring higher quality embeddings, Chroma supports integration with external embedding services. These services provide state-of-the-art embedding models that can significantly improve search quality.\n\nSupported providers include OpenAI's embedding models, which offer excellent quality for English text, and various open-source alternatives. Each provider has its own configuration requirements, typically involving API keys and model selection parameters.\n\nConfiguration is typically done at the collection level, allowing different collections to use different embedding functions if needed. This flexibility supports applications that work with multiple data types or require different embedding strategies for different use cases.\n\n### Custom Embedding Functions\n\nFor specialized use cases, applications can implement custom embedding functions by conforming to the SDK's embedding function interface. This allows integration with any embedding model or service that can be accessed from Python.\n\nCustom functions receive a list of texts and return a corresponding list of vectors. They can implement any logic needed, including batching, caching, and error handling, giving applications full control over the embedding process.\n\n## Error Handling\n\nThe SDK provides comprehensive error handling to help applications gracefully manage failure scenarios. Understanding the error types and how to handle them is important for building robust applications.\n\n### Connection Errors\n\nConnection errors occur when the client cannot establish communication with the Chroma server. These errors can result from network issues, server unavailability, or incorrect server configuration.\n\n```python\ntry:\n    collection = client.get_collection(\"my-collection\")\nexcept chromadb.connection.ChromaConnectionError:\n    print(\"Unable to connect to Chroma server\")\n```\n\nApplications should implement appropriate retry logic and user-facing error messages when connection errors occur, as these situations typically require intervention beyond the application's control.\n\n### Collection Not Found\n\nOperations on non-existent collections raise specific errors that can be caught and handled appropriately.\n\n```python\ntry:\n    collection = client.get_collection(\"non-existent\")\nexcept chromadb.not_found.NotFound:\n    print(\"Collection does not exist\")\n```\n\nThe `get_or_create` parameter available during collection creation provides an alternative to explicit error handling when the existence of a collection is uncertain.\n\n### Invalid Arguments\n\nInvalid argument errors indicate problems with the data or parameters provided to SDK methods. These errors typically result from bugs in application code or invalid user input.\n\nExamples include malformed IDs, vectors of incorrect dimensions, mismatched list lengths, and invalid filter expressions. The error messages provide guidance on what parameter is problematic, making debugging straightforward.\n\n## Best Practices\n\nFollowing best practices ensures optimal performance, reliability, and maintainability when using the Python Client SDK in production applications.\n\n### Connection Management\n\nApplications should create a single client instance and reuse it across the application rather than creating new clients for each operation. The client manages connection pooling and state internally, and creating multiple instances can lead to resource waste and inconsistent state.\n\n```python\nclient = chromadb.HttpClient(host=\"localhost\", port=8000)\n\ndef get_collection():\n    return client.get_collection(\"my-documents\")\n```\n\nFor applications that require clean-up, the client should be properly closed when the application terminates, ensuring any pending operations complete and resources are released.\n\n### Batch Operations\n\nWhen adding or querying large numbers of records, batching operations improves performance by reducing network overhead and allowing the server to optimize processing. The SDK handles batching internally for the most common operations, but applications should be aware of batch size considerations.\n\n### Error Recovery\n\nProduction applications should implement comprehensive error handling that distinguishes between recoverable errors (like temporary network issues) and non-recoverable errors (like invalid input). Recoverable errors can be handled with retry logic, while non-recoverable errors should surface appropriate feedback to users.\n\n## Related Documentation\n\nFor further information on using Chroma's Python Client SDK, the following resources provide additional context and examples.\n\nThe official Chroma documentation at trychroma.com provides comprehensive guides on getting started, deployment options, and advanced usage patterns. The documentation includes tutorials, API reference material, and example applications that demonstrate real-world usage.\n\nThe GitHub repository at github.com/chroma-core/chroma contains the complete source code for Chroma, including the Python Client SDK. Developers interested in understanding implementation details or contributing to the project can explore the codebase directly.\n\nThe Chroma Discord community provides a forum for asking questions, sharing experiences, and connecting with other developers using Chroma. The community is an excellent resource for troubleshooting issues and discovering best practices from experienced users.\n\n---\n\n<a id='javascript-client-sdk'></a>\n\n## JavaScript/TypeScript Client SDKs\n\n### 相关页面\n\n相关主题：[Python Client SDK](#python-client-sdk), [Getting Started with Chroma](#getting-started)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [clients/js/packages/chromadb-core/src/ChromaClient.ts](https://github.com/chroma-core/chroma/blob/main/clients/js/packages/chromadb-core/src/ChromaClient.ts)\n- [clients/new-js/packages/chromadb/src/chroma-client.ts](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/chromadb/src/chroma-client.ts)\n- [clients/new-js/packages/chromadb/src/api/sdk.gen.ts](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/chromadb/src/api/sdk.gen.ts)\n- [clients/js/packages/chromadb-core/src/Collection.ts](https://github.com/chroma-core/chroma/blob/main/clients/js/packages/chromadb-core/src/Collection.ts)\n- [clients/js/packages/chromadb/package.json](https://github.com/chroma-core/chroma/blob/main/clients/js/packages/chromadb/package.json)\n- [clients/js/packages/chromadb-client/package.json](https://github.com/chroma-core/chroma/blob/main/clients/js/packages/chromadb-client/package.json)\n- [clients/new-js/packages/chromadb/package.json](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/chromadb/package.json)\n- [clients/new-js/packages/ai-embeddings/all/package.json](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/ai-embeddings/all/package.json)\n- [clients/js/examples/node/README.md](https://github.com/chroma-core/chroma/blob/main/clients/js/examples/node/README.md)\n\n</details>\n\n# JavaScript/TypeScript Client SDKs\n\nChroma provides comprehensive JavaScript and TypeScript client libraries for interacting with Chroma servers from browser and Node.js environments. The SDKs offer both low-level HTTP API access and high-level abstractions for collections, embedding functions, and query operations.\n\n## Architecture Overview\n\nChroma maintains two generations of JavaScript clients to support different use cases and ecosystem requirements.\n\n```mermaid\ngraph TD\n    A[Chroma Server] <--> B[HTTP API];\n    B <--> C[Legacy JS Client v2.4.7];\n    B <--> D[new-js Client v3.4.5];\n    C --> E[chromadb<br/>Bundled];\n    C --> F[chromadb-client<br/>Peer Dependencies];\n    D --> G[ChromaClient];\n    D --> H[Embedding Functions<br/>via @chroma-core/*];\n```\n\n### Client Package Versions\n\n| Package | Version | Type | Description |\n|---------|---------|------|-------------|\n| `chromadb` (legacy) | 2.4.7 | npm | Bundled package with all embedding libraries included |\n| `chromadb-client` (legacy) | 2.4.7 | npm | Client package requiring peer dependencies |\n| `chromadb` (new-js) | 3.4.5 | npm | Modern client with modular architecture |\n| `@internal/chromadb-core` | 2.4.7 | workspace | Shared core functionality |\n\n资料来源：[clients/js/packages/chromadb/package.json:3](https://github.com/chroma-core/chroma/blob/main/clients/js/packages/chromadb/package.json)  \n资料来源：[clients/new-js/packages/chromadb/package.json:3](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/chromadb/package.json)\n\n## Package Structure\n\n### Legacy Client (v2.x)\n\nThe legacy client provides two distribution options:\n\n```mermaid\ngraph LR\n    A[chromadb] --> B[chromadb-core<br/>+ All Embeddings];\n    C[chromadb-client] --> D[chromadb-core<br/>+ Peer Dependencies];\n    B --> E[@google/generative-ai];\n    B --> F[@xenova/transformers];\n    B --> G[cohere-ai];\n    D --> E;\n    D --> F;\n    D --> G;\n```\n\n| Package | Use Case | Embedding Libraries |\n|---------|----------|---------------------|\n| `chromadb` | Simple projects wanting everything included | Bundled with all providers |\n| `chromadb-client` | Projects needing specific embedding libraries | Peer dependencies required |\n\n资料来源：[clients/js/packages/chromadb-client/package.json:1-55](https://github.com/chroma-core/chroma/blob/main/clients/js/packages/chromadb-client/package.json)\n\n### New-JS Client (v3.x)\n\nThe new JavaScript client uses a modular workspace architecture with the following structure:\n\n```\nclients/new-js/\n├── packages/\n│   ├── chromadb/                    # Core client package\n│   │   └── src/\n│   │       ├── chroma-client.ts     # Main client implementation\n│   │       └── api/\n│   │           └── sdk.gen.ts       # Generated API client\n│   └── ai-embeddings/\n│       ├── common/                  # Shared utilities\n│       ├── all/                     # Aggregated providers\n│       ├── chroma-bm25/             # BM25 sparse embeddings\n│       ├── cohere/                  # Cohere provider\n│       ├── google-gemini/           # Google Gemini provider\n│       ├── huggingface-server/      # HuggingFace server\n│       ├── jina/                    # Jina AI provider\n│       ├── together-ai/             # Together AI provider\n│       └── voyageai/                # Voyage AI provider\n```\n\n资料来源：[clients/new-js/packages/ai-embeddings/all/package.json:1-45](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/ai-embeddings/all/package.json)\n\n## Module Exports Configuration\n\nBoth client generations support modern JavaScript module resolution with ESM and CommonJS exports.\n\n### Export Structure\n\n```mermaid\ngraph TD\n    A[Package Entry] --> B{Import Type};\n    B -->|ESM import| C[.mjs / .d.ts];\n    B -->|CommonJS require| D[.cjs / .d.cts];\n    C --> E[dist/*.mjs];\n    D --> F[dist/cjs/*.cjs];\n```\n\n| Export Condition | Entry Point | Type Definitions |\n|-------------------|-------------|------------------|\n| ESM `import` | `dist/chromadb.mjs` | `dist/chromadb.d.ts` |\n| CommonJS `require` | `dist/cjs/chromadb.cjs` | `dist/cjs/chromadb.d.cts` |\n\n资料来源：[clients/js/packages/chromadb/package.json:12-25](https://github.com/chroma-core/chroma/blob/main/clients/js/packages/chromadb/package.json)  \n资料来源：[clients/new-js/packages/chromadb/package.json:12-25](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/chromadb/package.json)\n\n## Client Initialization\n\n### Basic Connection\n\n```typescript\nimport { ChromaClient } from \"chromadb\";\n\n// Initialize the client\nconst chroma = new ChromaClient({ \n  path: \"http://localhost:8000\" \n});\n```\n\n资料来源：[clients/js/packages/chromadb-client/README.md:15-20](https://github.com/chroma-core/chroma/blob/main/clients/js/packages/chromadb-client/README.md)\n\n### With Embedding Function\n\n```typescript\nimport { ChromaClient } from 'chromadb';\nimport { TogetherAIEmbeddingFunction } from '@chroma-core/together-ai';\n\nconst embedder = new TogetherAIEmbeddingFunction({\n  apiKey: 'your-api-key',\n  modelName: 'togethercomputer/m2-bert-80M-8k-retrieval',\n});\n\nconst client = new ChromaClient({\n  path: 'http://localhost:8000',\n});\n```\n\n资料来源：[clients/new-js/packages/ai-embeddings/together-ai/README.md:1-35](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/ai-embeddings/together-ai/README.md)\n\n## Collection Operations\n\nCollections are the primary data structure for storing and querying embeddings.\n\n### Create Collection\n\n```typescript\nconst collection = await chroma.createCollection({\n  name: \"my-collection\",\n  embeddingFunction: embedder,  // Optional\n  metadata: {                    // Optional\n    description: \"My document collection\"\n  }\n});\n```\n\n### Add Documents\n\n```typescript\nawait collection.add({\n  ids: [\"id1\", \"id2\"],\n  embeddings: [                  // Optional if embedding function provided\n    [1.1, 2.3, 3.2],\n    [4.5, 6.9, 4.4],\n  ],\n  metadatas: [{ source: \"doc1\" }, { source: \"doc2\" }],\n  documents: [\"Document 1 content\", \"Document 2 content\"],\n});\n```\n\n### Query Collection\n\n```typescript\nconst results = await collection.query({\n  queryEmbeddings: [1.1, 2.3, 3.2],    // Or queryTexts with embedding function\n  queryTexts: [\"Sample query\"],          // Text query (uses embedding function)\n  nResults: 2,                           // Number of results\n  where: { source: \"doc1\" },             // Optional metadata filter\n  include: [\"documents\", \"metadatas\", \"distances\"]\n});\n```\n\n资料来源：[clients/js/packages/chromadb-client/README.md:25-50](https://github.com/chroma-core/chroma/blob/main/clients/js/packages/chromadb-client/README.md)\n\n## Embedding Function Providers\n\nThe new-js client provides first-class support for multiple embedding providers through the `@chroma-core/*` packages.\n\n### Available Providers\n\n| Provider Package | Model Examples | API Required |\n|------------------|----------------|--------------|\n| `@chroma-core/together-ai` | `togethercomputer/m2-bert-80M-8k-retrieval` | Yes |\n| `@chroma-core/voyageai` | `voyage-2` | Yes |\n| `@chroma-core/google-gemini` | `text-embedding-004` | Yes |\n| `@chroma-core/jina` | `jina-embeddings-v2-base-en` | Yes |\n| `@chroma-core/cohere` | Various Cohere models | Yes |\n| `@chroma-core/chroma-bm25` | N/A (local algorithm) | No |\n| `@chroma-core/all` | All providers bundled | Varies |\n\n资料来源：[clients/new-js/packages/ai-embeddings/together-ai/README.md](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/ai-embeddings/together-ai/README.md)  \n资料来源：[clients/new-js/packages/ai-embeddings/voyageai/README.md](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/ai-embeddings/voyageai/README.md)\n\n### Configuration Options\n\nEach embedding function supports common configuration patterns:\n\n```typescript\nconst embedder = new SomeEmbeddingFunction({\n  apiKey: 'your-api-key',          // Or set via environment variable\n  apiKeyEnvVar: 'PROVIDER_API_KEY', // Default env var name\n  modelName: 'provider-model-name', // Provider-specific model\n  // Provider-specific options\n  task: 'retrieval.passage',       // Jina example\n  dimensions: 768,                  // Jina example\n  truncate: true,                   // Jina example\n  normalized: true,                 // Jina example\n});\n```\n\n### Environment Variable Configuration\n\n| Provider | Environment Variable |\n|----------|---------------------|\n| Together AI | `TOGETHER_API_KEY` |\n| Voyage AI | `VOYAGE_API_KEY` |\n| Google Gemini | `GEMINI_API_KEY` |\n| Jina | `JINA_API_KEY` |\n\n资料来源：[clients/new-js/packages/ai-embeddings/jina/README.md:1-45](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/ai-embeddings/jina/README.md)\n\n## Rust Native Bindings\n\nFor performance-critical applications, Chroma provides pre-built Rust native bindings for Node.js.\n\n### Supported Platforms\n\n| Package Name | OS | Architecture | LibC |\n|--------------|-----|--------------|------|\n| `chromadb-js-bindings-darwin-x64` | macOS (Intel) | x64 | N/A |\n| `chromadb-js-bindings-darwin-arm64` | macOS (Apple Silicon) | arm64 | N/A |\n| `chromadb-js-bindings-linux-x64-gnu` | Linux | x64 | glibc |\n| `chromadb-js-bindings-linux-arm64-gnu` | Linux | arm64 | glibc |\n\nAll bindings versions: **1.3.4**  \nMinimum Node.js version: **>= 10**\n\n资料来源：[rust/js_bindings/npm/darwin-x64/package.json:1-18](https://github.com/chroma-core/chroma/blob/main/rust/js_bindings/npm/darwin-x64/package.json)  \n资料来源：[rust/js_bindings/npm/linux-x64-gnu/package.json:1-18](https://github.com/chroma-core/chroma/blob/main/rust/js_bindings/npm/linux-x64-gnu/package.json)\n\n## Build and Development\n\n### Build Scripts\n\n| Command | Description |\n|---------|-------------|\n| `pnpm build` | Build all packages |\n| `pnpm build:core` | Build only `@internal/chromadb-core` |\n| `pnpm build:packages` | Build all packages except core |\n| `pnpm watch` | Watch mode for development |\n| `pnpm test` | Run all tests |\n| `pnpm test:functional` | Run functional tests (excluding auth) |\n\n### New-JS Client Build Configuration\n\n```json\n{\n  \"scripts\": {\n    \"build\": \"tsup\",\n    \"watch\": \"tsup --watch\",\n    \"typecheck\": \"tsc --noEmit\"\n  }\n}\n```\n\nBuild tooling uses **tsup** for efficient bundling with TypeScript support.\n\n资料来源：[clients/new-js/packages/ai-embeddings/common/package.json:18-25](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/ai-embeddings/common/package.json)  \n资料来源：[clients/js/package.json:22-30](https://github.com/chroma-core/chroma/blob/main/clients/js/package.json)\n\n## Choosing a Client Package\n\n```mermaid\ngraph TD\n    A[Start] --> B{Do you need all embedding providers?};\n    B -->|Yes, convenience| C[chromadb v2.4.7<br/>or @chroma-core/all + chromadb v3.4.5];\n    B -->|No, want to minimize bundle| D{Do you have embedding requirements?};\n    D -->|Yes, specific providers| E[chromadb-client v2.4.7<br/>with peer dependencies];\n    D -->|No, just vector storage| F[chromadb-client v2.4.7<br/>or chromadb v3.4.5];\n    C --> G[Include all embedding libraries];\n    E --> H[Only install needed providers];\n    F --> I[No embedding function needed];\n```\n\n### Decision Matrix\n\n| Requirement | Recommended Package |\n|-------------|--------------------|\n| Simple setup, all features | `chromadb` (bundled) |\n| Minimal bundle size | `chromadb-client` with peer deps |\n| Modern architecture | `chromadb` (new-js v3.4.5) |\n| BM25 sparse embeddings | `@chroma-core/chroma-bm25` |\n| Cloud/Remote providers | `@chroma-core/*` packages |\n\n资料来源：[clients/js/examples/node/README.md:1-45](https://github.com/chroma-core/chroma/blob/main/clients/js/examples/node/README.md)\n\n## TypeScript Support\n\nAll JavaScript client packages include full TypeScript type definitions:\n\n```json\n{\n  \"types\": \"dist/chromadb.d.ts\",\n  \"exports\": {\n    \".\": {\n      \"import\": {\n        \"types\": \"./dist/chromadb.d.ts\"\n      },\n      \"require\": {\n        \"types\": \"./dist/cjs/chromadb.d.cts\"\n      }\n    }\n  }\n}\n```\n\nThe TypeScript minimum version requirement is **^5.0.4** for the legacy client and **^5.3.3** for new-js packages.\n\n资料来源：[clients/js/packages/chromadb/package.json:8](https://github.com/chroma-core/chroma/blob/main/clients/js/packages/chromadb/package.json)  \n资料来源：[clients/new-js/packages/ai-embeddings/common/package.json:30](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/ai-embeddings/common/package.json)\n\n## Dependencies\n\n### Core Dependencies\n\n| Package | Version | Purpose |\n|---------|---------|---------|\n| `isomorphic-fetch` | ^3.0.0 | HTTP client for browser/Node.js |\n| `ajv` | ^8.12.0 / ^8.17.1 | JSON schema validation |\n| `cliui` | ^8.0.1 | CLI utilities |\n\n### Node.js Compatibility\n\n| Package Generation | Minimum Node.js |\n|--------------------|-----------------|\n| Legacy (v2.x) | >= 14.17.0 |\n| New-JS (v3.x) | >= 20 |\n| Rust Bindings | >= 10 |\n\n资料来源：[clients/js/packages/chromadb-client/package.json:50-55](https://github.com/chroma-core/chroma/blob/main/clients/js/packages/chromadb-client/package.json)  \n资料来源：[clients/new-js/packages/ai-embeddings/common/package.json:35-38](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/ai-embeddings/common/package.json)\n\n---\n\n<a id='rust-services-architecture'></a>\n\n## Rust Backend Services Architecture\n\n### 相关页面\n\n相关主题：[System Architecture Overview](#architecture-overview), [Data Storage & Blockstore](#data-storage-blockstore)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [rust/blockstore/src/arrow/root.rs](https://github.com/chroma-core/chroma/blob/main/rust/blockstore/src/arrow/root.rs)\n- [rust/blockstore/src/arrow/block/types.rs](https://github.com/chroma-core/chroma/blob/main/rust/blockstore/src/arrow/block/types.rs)\n- [rust/blockstore/src/arrow/provider.rs](https://github.com/chroma-core/chroma/blob/main/rust/blockstore/src/arrow/provider.rs)\n- [rust/types/src/execution/operator.rs](https://github.com/chroma-core/chroma/blob/main/rust/types/src/execution/operator.rs)\n- [rust/types/src/api_types.rs](https://github.com/chroma-core/chroma/blob/main/rust/types/src/api_types.rs)\n- [rust/types/src/topology.rs](https://github.com/chroma-core/chroma/blob/main/rust/types/src/topology.rs)\n- [rust/types/src/collection_schema.rs](https://github.com/chroma-core/chroma/blob/main/rust/types/src/collection_schema.rs)\n- [rust/types/src/sparse_posting_block.rs](https://github.com/chroma-core/chroma/blob/main/rust/types/src/sparse_posting_block.rs)\n- [rust/index/src/spann/types.rs](https://github.com/chroma-core/chroma/blob/main/rust/index/src/spann/types.rs)\n- [rust/worker/src/work_queue/work_queue_client.rs](https://github.com/chroma-core/chroma/blob/main/rust/worker/src/work_queue/work_queue_client.rs)\n- [rust/worker/src/execution/orchestration/apply_logs_orchestrator.rs](https://github.com/chroma-core/chroma/blob/main/rust/worker/src/execution/orchestration/apply_logs_orchestrator.rs)\n- [rust/worker/src/execution/orchestration/knn_filter.rs](https://github.com/chroma-core/chroma/blob/main/rust/worker/src/execution/orchestration/knn_filter.rs)\n</details>\n\n# Rust Backend Services Architecture\n\n## Overview\n\nThe Chroma Rust backend provides a high-performance, scalable vector database service built entirely in Rust. The architecture follows a distributed systems design with multiple specialized services working together to handle embedding storage, indexing, and similarity search operations.\n\n### Design Goals\n\n| Goal | Description |\n|------|-------------|\n| High Performance | Arrow-based columnar storage for efficient data access |\n| Scalability | Multi-cloud, multi-region deployment support |\n| Reliability | Comprehensive error handling with typed error codes |\n| Flexibility | Multiple index types (HNSW, Spann, Inverted) |\n| Consistency | Ordered and unordered mutation ordering options |\n\n### Core Service Components\n\n```mermaid\ngraph TD\n    subgraph \"Rust Backend Services\"\n        W[Worker Service]\n        BS[Blockstore Service]\n        SYS[Sysdb Service]\n        LOG[Log Service]\n    end\n    \n    W --> BS\n    W --> SYS\n    W --> LOG\n```\n\n## Blockstore Architecture\n\nThe blockstore is the core storage layer in Chroma's Rust backend, providing persistent storage for vector embeddings and associated metadata using Arrow columnar format.\n\n### Arrow-Based Storage\n\nChroma uses Apache Arrow as its primary storage format, which provides:\n\n- **Columnar Layout**: Efficient analytic queries by column\n- **Zero-Copy Reads**: Memory-mapped access patterns\n- **Cross-Language Interop**: Standardized binary format\n- **Compression Support**: Built-in encoding/decoding\n\n资料来源：[rust/blockstore/src/arrow/root.rs:1-40]()\n\n### Blockfile Structure\n\n```mermaid\ngraph TD\n    subgraph \"Blockfile Components\"\n        BF[Blockfile]\n        BR[Block Reader]\n        BW[Block Writer]\n        RM[Root Manager]\n        BM[Block Manager]\n    end\n    \n    BF --> BR\n    BF --> BW\n    BW --> RM\n    BR --> BM\n```\n\n#### Root Management\n\nThe `Root` component manages the root directory structure and file operations:\n\n```rust\npub(super) fn get_all_block_ids_from_bytes(\n    bytes: &[u8],\n    id: Uuid,\n) -> Result<Vec<Uuid>, FromBytesError>\n```\n\nKey responsibilities:\n- Reading Arrow IPC files\n- Extracting block metadata and IDs\n- Version validation and verification\n\n资料来源：[rust/blockstore/src/arrow/root.rs:28-50]()\n\n#### Block Layout Verification\n\nThe block layout verification ensures data integrity:\n\n```rust\n#[derive(Error, Debug)]\npub enum ArrowLayoutVerificationError {\n    #[error(\"Buffer length is not 64 byte aligned\")]\n    BufferLengthNotAligned,\n    #[error(\"No record batches in footer\")]\n    NoRecordBatches,\n    #[error(\"More than one record batch in IPC file\")]\n    MultipleRecordBatches,\n    #[error(\"Invalid message type\")]\n    InvalidMessageType,\n}\n```\n\n资料来源：[rust/blockstore/src/arrow/block/types.rs:1-30]()\n\n| Error Type | Error Code | Severity |\n|------------|------------|----------|\n| `BufferLengthNotAligned` | Internal | High |\n| `NoRecordBatches` | Internal | High |\n| `MultipleRecordBatches` | Internal | Medium |\n| `InvalidMessageType` | Internal | High |\n| `RecordBatchDecodeError` | Internal | High |\n\n### Blockfile Writer Types\n\nChroma supports two mutation ordering strategies:\n\n| Ordering Type | Description | Use Case |\n|--------------|-------------|----------|\n| `Ordered` | Sequential writes with guaranteed order | Consistent state |\n| `Unordered` | Parallel writes for throughput | High-volume ingestion |\n\n资料来源：[rust/blockstore/src/arrow/provider.rs:1-50]()\n\n```rust\nmatch options.mutation_ordering {\n    BlockfileWriterMutationOrdering::Ordered => {\n        let file = ArrowOrderedBlockfileWriter::from_root(...);\n        Ok(BlockfileWriter::ArrowOrderedBlockfileWriter(file))\n    }\n    BlockfileWriterMutationOrdering::Unordered => {\n        let file = ArrowUnorderedBlockfileWriter::from_root(...);\n        Ok(BlockfileWriter::ArrowUnorderedBlockfileWriter(file))\n    }\n}\n```\n\n### Forking and Versioning\n\nBlockfiles support forking for snapshot isolation:\n\n```rust\nlet new_root = self\n    .root_manager\n    .fork::<K>(\n        &fork_from,\n        new_id,\n        &options.prefix_path,\n        self.block_manager.default_max_block_size_bytes(),\n    )\n    .await\n```\n\n资料来源：[rust/blockstore/src/arrow/provider.rs:1-30]()\n\n## Type System\n\n### Query Result Types\n\nThe execution layer uses a rich type system for search results:\n\n```rust\n#[derive(Clone, Debug, Default)]\npub struct SearchPayloadResult {\n    pub records: Vec<SearchRecord>,\n}\n```\n\n资料来源：[rust/types/src/execution/operator.rs:1-20]()\n\n#### Search Results Structure\n\n```mermaid\ngraph LR\n    SR[SearchResult] --> SPR[SearchPayloadResult]\n    SPR --> SR_vec[Vec<SearchRecord>]\n    SR --> PLB[pulled_log_bytes]\n```\n\n| Field | Type | Description |\n|-------|------|-------------|\n| `results` | `Vec<SearchPayloadResult>` | Per-query search results |\n| `pulled_log_bytes` | `u64` | Total log bytes fetched for metrics |\n\n### Include Enum\n\nThe `Include` enum controls which fields are returned in query results:\n\n```rust\npub enum Include {\n    #[serde(rename = \"distances\")]\n    Distance,\n    #[serde(rename = \"documents\")]\n    Document,\n    #[serde(rename = \"embeddings\")]\n    Embedding,\n    #[serde(rename = \"metadatas\")]\n    Metadata,\n    #[serde(rename = \"uris\")]\n    Uri,\n}\n```\n\n资料来源：[rust/types/src/api_types.rs:1-30]()\n\n| Include Value | Returned Field | Default Query |\n|---------------|----------------|---------------|\n| `distances` | Distance scores | ✓ |\n| `documents` | Text content | ✓ |\n| `embeddings` | Vector data | ✗ |\n| `metadatas` | Metadata objects | ✓ |\n| `uris` | Resource URIs | ✗ |\n\n#### IncludeList Helper Methods\n\n```rust\nimpl IncludeList {\n    pub fn empty() -> Self { Self(Vec::new()) }\n    \n    pub fn default_query() -> Self {\n        Self(vec![Include::Document, Include::Metadata, Include::Distance])\n    }\n    \n    pub fn default_get() -> Self {\n        Self(vec![Include::Document, Include::Metadata])\n    }\n    \n    pub fn all() -> Self {\n        Self(vec![Include::Document, Include::Metadata, Include::Distance, \n                  Include::Embedding, Include::Uri])\n    }\n}\n```\n\n资料来源：[rust/types/src/api_types.rs:1-60]()\n\n### Key Filter System\n\nThe `Key` enum represents filterable fields in metadata queries:\n\n```rust\npub enum Key {\n    Document,\n    Embedding,\n    Metadata,\n    Score,\n    MetadataField(String),\n}\n```\n\n资料来源：[rust/types/src/operator.rs:1-30]()\n\n| Key | Purpose | Example |\n|-----|---------|---------|\n| `#document` | Document content | `Key::Document` |\n| `#embedding` | Vector data | `Key::Embedding` |\n| `#metadata` | All metadata | `Key::Metadata` |\n| `#score` | Similarity score | `Key::Score` |\n| `field_name` | Custom metadata | `Key::MetadataField(\"status\")` |\n\n#### Key Factory Methods\n\n```rust\nimpl Key {\n    /// Creates a Key for a custom metadata field\n    pub fn field(name: impl Into<String>) -> Self {\n        Key::MetadataField(name.into())\n    }\n    \n    /// Creates an equality filter: `field == value`\n    pub fn eq(self, value: impl Into<MetadataValue>) -> ComparisonValue { ... }\n}\n```\n\n## Index Architecture\n\n### Spann Index\n\nSpann is Chroma's sparse vector index implementation combining HNSW with posting lists:\n\n```rust\n#[derive(Clone, Debug)]\npub struct SpannIndexReader<'me> {\n    pub posting_lists: BlockfileReader<'me, u32, SpannPostingList<'me>>,\n    pub hnsw_index: HnswIndexRef,\n    pub versions_map: BlockfileReader<'me, u32, u32>,\n    pub dimensionality: usize,\n    pub adaptive_search_nprobe: bool,\n    pub params: InternalSpannConfiguration,\n}\n```\n\n资料来源：[rust/index/src/spann/types.rs:1-30]()\n\n#### Spann Index Structure\n\n```mermaid\ngraph TD\n    subgraph \"Spann Index\"\n        SPI[SpannIndexReader]\n        HNSW[HNSW Index]\n        PL[Posting Lists]\n        VM[Versions Map]\n    end\n    \n    SPI --> HNSW\n    SPI --> PL\n    SPI --> VM\n```\n\n| Component | Type | Purpose |\n|-----------|------|---------|\n| `hnsw_index` | `HnswIndexRef` | Approximate nearest neighbor search |\n| `posting_lists` | `BlockfileReader<u32, SpannPostingList>` | Document postings |\n| `versions_map` | `BlockfileReader<u32, u32>` | Document versioning |\n| `adaptive_search_nprobe` | `bool` | Adaptive parameter tuning |\n\n### Sparse Posting Block\n\nThe sparse posting block implements an inverted index structure:\n\n```rust\n#[derive(Debug, Clone)]\npub struct DirectoryBlock(SparsePostingBlock);\n\nimpl DirectoryBlock {\n    pub fn new(max_offsets: &[u32], max_weights: &[f32]) \n        -> Result<Self, SparsePostingBlockError>\n}\n```\n\n资料来源：[rust/types/src/sparse_posting_block.rs:1-40]()\n\n| Field | Type | Description |\n|-------|------|-------------|\n| `max_offset` | `u32` | Largest doc offset in posting block |\n| `max_weight` | `f32` | Maximum weight for term pruning |\n\n## Schema and Index Configuration\n\n### Collection Schema\n\nThe schema system supports multiple index types:\n\n```rust\npub struct Schema {\n    pub fn create_index(\n        mut self,\n        key: Option<&str>,\n        config: IndexConfig,\n    ) -> Result<Self, SchemaBuilderError>\n}\n```\n\n资料来源：[rust/types/src/collection_schema.rs:1-50]()\n\n| Index Type | Key | Description |\n|------------|-----|-------------|\n| `VectorIndexConfig` | `None` | Global vector index (HNSW/Spann) |\n| `StringInvertedIndexConfig` | `Some(field)` | Field-specific FTS |\n| `SparseVectorIndexConfig` | `Some(field)` | Sparse vector index |\n\n### Index Configuration\n\n```rust\npub struct VectorIndexConfig {\n    pub space: Option<Space>,\n    pub embedding_function: Option<EmbeddingFunctionId>,\n    pub source_key: Option<Key>,\n    pub hnsw: Option<HnswConfig>,\n    pub spann: Option<SpannConfig>,\n}\n```\n\n| Parameter | Type | Default | Description |\n|-----------|------|---------|-------------|\n| `space` | `Option<Space>` | `None` | Vector space (Cosine, L2, etc.) |\n| `embedding_function` | `Option<EFId>` | `None` | Embedding function ID |\n| `hnsw` | `Option<HnswConfig>` | `None` | HNSW parameters |\n| `spann` | `Option<SpannConfig>` | `None` | Spann parameters |\n\n## Worker Service Architecture\n\n### Work Queue Client\n\nThe work queue client manages distributed task execution:\n\n```rust\npub enum WorkQueueClientError {\n    ConnectionError(#[from] tonic::Status),\n    RequestError(#[from] tonic::Status),\n}\n```\n\n资料来源：[rust/worker/src/work_queue/work_queue_client.rs:1-20]()\n\n#### Error Code Mapping\n\n| gRPC Code | Chroma Error Code |\n|-----------|-------------------|\n| `Unavailable` | `Unavailable` |\n| `DeadlineExceeded` | `DeadlineExceeded` |\n| `ResourceExhausted` | `ResourceExhausted` |\n| `InvalidArgument` | `InvalidArgument` |\n| `NotFound` | `NotFound` |\n| `PermissionDenied` | `PermissionDenied` |\n\n### Apply Logs Orchestrator\n\nThe apply logs orchestrator handles log-based data synchronization:\n\n```rust\n#[derive(Debug)]\npub struct ApplyLogsOrchestratorResponse {\n    pub job_id: JobId,\n    pub total_records_post_compaction: u64,\n    pub flush_results: Vec<SegmentFlushInfo>,\n    pub collection_logical_size_bytes: u64,\n}\n```\n\n资料来源：[rust/worker/src/execution/orchestration/apply_logs_orchestrator.rs:1-50]()\n\n### KNN Filter Architecture\n\nThe KNN filter orchestrates vector similarity search:\n\n```mermaid\ngraph TD\n    subgraph \"KNN Query Pipeline\"\n        Q[Query Request]\n        F[Filter Logs]\n        K[KNN Search]\n        R[Results]\n    end\n    \n    Q --> F\n    F --> K\n    K --> R\n```\n\n#### KNN Error Handling\n\n```rust\npub enum KnnError {\n    QuantizedSpannCenterSearch(QuantizedSpannError),\n    QuantizedSpannLoadCenter(QuantizedSpannError),\n    InvalidDistanceFunction,\n    Aborted,\n    InvalidSchema(#[from] SchemaError),\n}\n```\n\n资料来源：[rust/worker/src/execution/orchestration/knn_filter.rs:1-40]()\n\n| Error Type | Error Code | Traced |\n|-----------|------------|--------|\n| `QuantizedSpannCenterSearch` | From inner | ✓ |\n| `InvalidDistanceFunction` | `InvalidArgument` | ✗ |\n| `Aborted` | `ResourceExhausted` | ✗ |\n| `Result(_)` | `Internal` | ✓ |\n\n### KNN Filter Output\n\n```rust\n#[derive(Clone, Debug)]\npub struct KnnFilterOutput {\n    pub logs: FetchLogOutput,\n    pub fetch_log_bytes: u64,\n    pub filter_output: FilterOutput,\n    pub dimension: usize,\n    pub distance_function: DistanceFunction,\n}\n```\n\n## Multi-Cloud Topology\n\nChroma supports multi-cloud and multi-region deployments:\n\n```rust\npub struct ProviderRegion<T: Clone + Debug> {\n    pub name: RegionName,\n    pub provider: String,\n    pub region: String,\n    pub config: T,\n}\n```\n\n资料来源：[rust/types/src/topology.rs:1-30]()\n\n### Topology Structure\n\n```mermaid\ngraph TD\n    subgraph \"Multi-Cloud Topology\"\n        Config[Configuration]\n        Topologies[Vec<Topology>]\n        Regions[Vec<ProviderRegion>]\n        Preferred[Preferred Region]\n    end\n    \n    Config --> Topologies\n    Config --> Regions\n    Config --> Preferred\n```\n\n### Configuration Schema\n\n```rust\nstruct RawMultiCloudMultiRegionConfiguration<R, T> {\n    preferred: RegionName,\n    regions: Vec<ProviderRegion<R>>,\n    topologies: Vec<Topology<T>>,\n}\n```\n\n| Field | Type | Description |\n|-------|------|-------------|\n| `preferred` | `RegionName` | Default region for operations |\n| `regions` | `Vec<ProviderRegion>` | Available cloud regions |\n| `topologies` | `Vec<Topology>` | Topology configurations |\n\n## Error Handling Framework\n\n### Chroma Error Traits\n\nAll errors implement the `ChromaError` trait:\n\n```rust\npub trait ChromaError: std::error::Error {\n    fn code(&self) -> ErrorCodes;\n    fn should_trace_error(&self) -> bool;\n}\n```\n\n### Error Code Registry\n\n| Code | Category | Description |\n|------|----------|-------------|\n| `InvalidArgument` | Client | Malformed request |\n| `NotFound` | Client | Resource missing |\n| `AlreadyExists` | Client | Duplicate resource |\n| `PermissionDenied` | Security | Access denied |\n| `ResourceExhausted` | Rate | Quota exceeded |\n| `Internal` | Server | System error |\n\n## CLI Integration\n\nThe Rust CLI provides management commands:\n\n```rust\npub enum Command {\n    Browse(BrowseArgs),\n    Copy(CopyArgs),\n    Db(DbSubcommand),\n    Docs,\n    Install(InstallArgs),\n    Login(LoginArgs),\n    Profile(ProfileSubcommand),\n    Run(RunArgs),\n    Support,\n    Update,\n    Vacuum(VacuumArgs),\n}\n```\n\n资料来源：[rust/cli/src/lib.rs:1-30]()\n\n### Available Commands\n\n| Command | Description |\n|---------|-------------|\n| `browse` | Open web interface |\n| `copy` | Copy data between collections |\n| `db` | Database operations |\n| `docs` | Open documentation |\n| `install` | Install Chroma |\n| `login` | Authenticate user |\n| `profile` | Performance profiling |\n| `run` | Start Chroma server |\n| `support` | Open support resources |\n| `update` | Update installation |\n| `vacuum` | Compact storage |\n\n## See Also\n\n- [Blockstore Provider Configuration](rust/blockstore/src/arrow/provider.rs)\n- [Query API Types](rust/types/src/api_types.rs)\n- [Index Implementations](rust/index/src/spann/types.rs)\n- [Worker Execution](rust/worker/src/execution/orchestration/)\n\n---\n\n<a id='go-coordinator'></a>\n\n## Go Coordinator & Distributed Systems\n\n### 相关页面\n\n相关主题：[System Architecture Overview](#architecture-overview)\n\nI cannot generate this wiki page as specified. The `<details>` block lists Go source files, but the retrieved repository context contains only Rust implementation files - there are no Go coordinator or distributed systems files in the provided context.\n\nThe context includes:\n- `rust/worker/src/execution/orchestration/` - Orchestrator implementations (ApplyLogsOrchestrator, LogFetchOrchestrator, RegisterOrchestrator)\n- `rust/worker/src/compactor/scheduler.rs` - Compaction scheduler\n- `rust/types/src/topology.rs` - Topology and region management\n- `rust/blockstore/src/arrow/` - Arrow block storage\n- `rust/worker/src/work_queue/work_queue_client.rs` - Work queue client\n\nTo generate a wiki page about **Go Coordinator & Distributed Systems**, I would need access to:\n- `go/pkg/sysdb/coordinator/coordinator.go`\n- `go/pkg/memberlist_manager/memberlist_manager.go`\n- `go/pkg/leader/election.go`\n- `go/cmd/coordinator/main.go`\n\nWould you like me to:\n1. Generate a wiki page about the Rust-based distributed systems components that ARE present in the context (orchestrators, scheduler, topology)?\n2. Or provide guidance on how to retrieve the missing Go source files from the repository?\n\n---\n\n<a id='data-storage-blockstore'></a>\n\n## Data Storage & Blockstore\n\n### 相关页面\n\n相关主题：[Rust Backend Services Architecture](#rust-services-architecture), [Embedding Functions Integration](#embedding-functions)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [rust/blockstore/src/arrow/blockfile.rs](https://github.com/chroma-core/chroma/blob/main/rust/blockstore/src/arrow/blockfile.rs)\n- [rust/blockstore/src/arrow/provider.rs](https://github.com/chroma-core/chroma/blob/main/rust/blockstore/src/arrow/provider.rs)\n- [rust/blockstore/src/types/reader.rs](https://github.com/chroma-core/chroma/blob/main/rust/blockstore/src/types/reader.rs)\n- [rust/blockstore/src/types/writer.rs](https://github.com/chroma-core/chroma/blob/main/rust/blockstore/src/types/writer.rs)\n- [rust/blockstore/src/provider.rs](https://github.com/chroma-core/chroma/blob/main/rust/blockstore/src/provider.rs)\n- [rust/blockstore/src/arrow/block/types.rs](https://github.com/chroma-core/chroma/blob/main/rust/blockstore/src/arrow/block/types.rs)\n- [rust/blockstore/src/arrow/root.rs](https://github.com/chroma-core/chroma/blob/main/rust/blockstore/src/arrow/root.rs)\n- [rust/blockstore/src/memory/provider.rs](https://github.com/chroma-core/chroma/blob/main/rust/blockstore/src/memory/provider.rs)\n- [rust/blockstore/src/arrow/ordered_blockfile_writer.rs](https://github.com/chroma-core/chroma/blob/main/rust/blockstore/src/arrow/ordered_blockfile_writer.rs)\n</details>\n\n# Data Storage & Blockstore\n\n## Overview\n\nThe Chroma blockstore is the core storage subsystem responsible for persisting vector embeddings, metadata, and related data structures. It provides a unified abstraction layer over different storage backends (in-memory and Arrow-based) while maintaining performance characteristics suitable for high-throughput vector database operations.\n\nThe blockstore system is architected around the concept of **blockfiles** — persistent, columnar storage structures that organize data by prefix-based partitioning and support efficient key-value operations.\n\n## Architecture\n\n```mermaid\ngraph TD\n    subgraph \"Public API Layer\"\n        BP[BlockfileProvider]\n        BR[BlockfileReader]\n        BW[BlockfileWriter]\n        BF[BlockfileFlusher]\n    end\n\n    subgraph \"Implementation Layer\"\n        ABP[ArrowBlockfileProvider]\n        MBP[MemoryBlockfileProvider]\n        ABF[ArrowUnorderedBlockfileWriter]\n        ABO[ArrowOrderedBlockfileWriter]\n    end\n\n    subgraph \"Storage Layer\"\n        BM[BlockManager]\n        RM[RootManager]\n        ST[Storage]\n    end\n\n    subgraph \"Arrow Format\"\n        R[Root]\n        SB[Sparse Index]\n        B[Blocks]\n    end\n\n    BP --> ABP\n    BP --> MBP\n    BR --> ABP\n    BR --> MBP\n    BW --> ABF\n    BW --> ABO\n\n    ABP --> BM\n    ABP --> RM\n    ABF --> BM\n    ABF --> RM\n    ABO --> BM\n    ABO --> RM\n    BM --> ST\n    RM --> ST\n\n    RM --> R\n    R --> SB\n    R --> B\n```\n\n## Core Components\n\n### BlockfileProvider\n\nThe `BlockfileProvider` is the main entry point for creating readers and writers. It abstracts the underlying storage implementation and provides factory methods for blockfile operations.\n\n**Variants:**\n\n| Provider Type | Description | Use Case |\n|---------------|-------------|----------|\n| `HashMapBlockfileProvider` | In-memory blockfile storage | Testing, ephemeral data |\n| `ArrowBlockfileProvider` | Persistent Arrow-based storage | Production workloads |\n\n**API Methods:**\n\n```rust\npub fn storage(&self) -> Option<Arc<Storage>> {\n    match self {\n        BlockfileProvider::ArrowBlockfileProvider(provider) => Some(provider.storage().clone()),\n        BlockfileProvider::HashMapBlockfileProvider(_) => None,\n    }\n}\n\npub fn new_memory() -> Self {\n    BlockfileProvider::HashMapBlockfileProvider(MemoryBlockfileProvider::new())\n}\n```\n\n资料来源：[rust/blockstore/src/provider.rs:1-30](https://github.com/chroma-core/chroma/blob/main/rust/blockstore/src/provider.rs)\n\n### BlockfileReader\n\nThe `BlockfileReader` trait provides read access to stored data. It supports generic key and value types that implement the `ReadKey` and `ReadValue` traits.\n\n**Trait Definition:**\n\n```rust\npub trait ReadKey<'a>:\n    Key\n    + Into<KeyWrapper>\n    + TryFrom<&'a KeyWrapper, Error = InvalidKeyConversion>\n    + ArrowReadableKey<'a>\n    + Sync\n    + 'a\n{}\n\npub trait ReadValue<'a>: Value + Readable<'a> + ArrowReadableValue<'a> + Sync + 'a {}\n```\n\n资料来源：[rust/blockstore/src/provider.rs:40-55](https://github.com/chroma-core/chroma/blob/main/rust/blockstore/src/provider.rs)\n\n### BlockfileWriter\n\nThe `BlockfileWriter` trait provides write access to blockfiles with support for ordered and unordered mutation patterns.\n\n**Core Operations:**\n\n| Method | Signature | Description |\n|--------|-----------|-------------|\n| `set` | `set(prefix, key, value)` | Insert or update a key-value pair |\n| `delete` | `delete(prefix, key)` | Remove a key-value pair |\n| `commit` | `commit()` | Finalize and persist the writer |\n\n```rust\npub async fn set<\n    K: Key + Into<KeyWrapper> + ArrowWriteableKey,\n    V: Value + Writeable + ArrowWriteableValue,\n>(\n    &self,\n    prefix: &str,\n    key: K,\n    value: V,\n) -> Result<(), Box<dyn ChromaError>>\n```\n\n资料来源：[rust/blockstore/src/types/writer.rs:50-75](https://github.com/chroma-core/chroma/blob/main/rust/blockstore/src/types/writer.rs)\n\n## Arrow Blockfile Implementation\n\nThe Arrow-based blockfile is the primary production storage implementation, providing efficient columnar storage with Arrow IPC format.\n\n### Blockfile Structure\n\n```mermaid\ngraph TD\n    R[Root File<br/>Root Writer] --> SB[Sparse Index<br/>Block Key Mapping]\n    R --> BH[Block Header<br/>Metadata]\n    \n    SB --> B1[Block 1<br/>Arrow IPC]\n    SB --> B2[Block 2<br/>Arrow IPC]\n    SB --> BN[Block N<br/>Arrow IPC]\n    \n    B1 --> P1[Prefix: \"vec_1\"]\n    B1 --> P2[Prefix: \"vec_2\"]\n```\n\n### ArrowBlockfileProvider\n\nThe `ArrowBlockfileProvider` manages the lifecycle of blockfiles using Arrow IPC format with a root-sparse index architecture.\n\n**Key Features:**\n\n- **Fork Support**: Create new blockfiles from existing ones via forking\n- **CMEK Support**: Optional Customer-Managed Encryption Keys\n- **Block Size Management**: Configurable maximum block sizes\n\n```rust\npub async fn write<K: Key + ArrowWriteableKey, V: ArrowWriteableValue>(\n    &self,\n    options: BlockfileWriterOptions,\n) -> Result<BlockfileWriter, Box<CreateError>>\n```\n\n资料来源：[rust/blockstore/src/arrow/provider.rs:1-50](https://github.com/chroma-core/chroma/blob/main/rust/blockstore/src/arrow/provider.rs)\n\n### Writer Types\n\n#### ArrowUnorderedBlockfileWriter\n\nProvides high-performance unordered writes optimized for bulk insertion scenarios.\n\n```rust\nimpl ArrowUnorderedBlockfileWriter {\n    pub(super) fn new<K: ArrowWriteableKey, V: ArrowWriteableValue>(\n        id: Uuid,\n        prefix_path: &str,\n        block_manager: BlockManager,\n        root_manager: RootManager,\n        max_block_size_bytes: usize,\n        cmek: Option<Cmek>,\n    ) -> Self\n}\n```\n\n资料来源：[rust/blockstore/src/arrow/blockfile.rs:50-80](https://github.com/chroma-core/chroma/blob/main/rust/blockstore/src/arrow/blockfile.rs)\n\n#### ArrowOrderedBlockfileWriter\n\nMaintains key ordering within blocks, optimized for range queries and ordered iteration.\n\n资料来源：[rust/blockstore/src/arrow/ordered_blockfile_writer.rs:1-50](https://github.com/chroma-core/chroma/blob/main/rust/blockstore/src/arrow/ordered_blockfile_writer.rs)\n\n### BlockManager and RootManager\n\n| Component | Responsibility |\n|-----------|----------------|\n| `BlockManager` | Manages individual data blocks, handles block creation and commitment |\n| `RootManager` | Manages root files containing sparse indices and metadata |\n\n```rust\n// Forking a new root from an existing one\nlet new_root = self\n    .root_manager\n    .fork::<K>(\n        &fork_from,\n        new_id,\n        &options.prefix_path,\n        self.block_manager.default_max_block_size_bytes(),\n    )\n    .await\n```\n\n资料来源：[rust/blockstore/src/arrow/provider.rs:45-70](https://github.com/chroma-core/chroma/blob/main/rust/blockstore/src/arrow/provider.rs)\n\n## Error Handling\n\n### Error Types\n\n| Error Type | Description | Error Code |\n|------------|-------------|------------|\n| `BlockNotFound` | Requested block does not exist | Internal |\n| `BlockFetchError` | Failed to retrieve block from storage | Internal |\n| `MigrationError` | Blockfile migration failed | Internal |\n| `IOError` | Storage I/O operation failed | Internal |\n| `ArrowError` | Arrow IPC parsing/encoding error | Internal |\n| `NoRecordBatches` | Invalid Arrow file structure | Internal |\n\n```rust\n#[derive(Error, Debug)]\npub enum ArrowBlockfileError {\n    #[error(\"Block not found\")]\n    BlockNotFound,\n    #[error(\"Could not fetch block\")]\n    BlockFetchError(#[from] GetError),\n    #[error(\"Could not migrate blockfile to new version\")]\n    MigrationError(#[from] MigrationError),\n}\n```\n\n资料来源：[rust/blockstore/src/arrow/blockfile.rs:25-40](https://github.com/chroma-core/chroma/blob/main/rust/blockstore/src/arrow/blockfile.rs)\n\n### Layout Verification\n\nThe system validates Arrow file layouts to ensure data integrity:\n\n```rust\n#[derive(Error, Debug)]\npub enum ArrowLayoutVerificationError {\n    #[error(\"Buffer length is not 64 byte aligned\")]\n    BufferLengthNotAligned,\n    #[error(\"No record batches in footer\")]\n    NoRecordBatches,\n    #[error(\"More than one record batch in IPC file\")]\n    MultipleRecordBatches,\n    #[error(\"Invalid message type\")]\n    InvalidMessageType,\n}\n```\n\n资料来源：[rust/blockstore/src/arrow/block/types.rs:40-60](https://github.com/chroma-core/chroma/blob/main/rust/blockstore/src/arrow/block/types.rs)\n\n## Storage Operations\n\n### Write Flow\n\n```mermaid\nsequenceDiagram\n    participant Client\n    participant Provider as BlockfileProvider\n    participant Writer as BlockfileWriter\n    participant BM as BlockManager\n    participant RM as RootManager\n    participant Storage\n\n    Client->>Provider: write(options)\n    Provider->>Writer: create_writer()\n    Provider->>RM: create/fork_root()\n    Client->>Writer: set(prefix, key, value)\n    Writer->>BM: create_block()\n    loop Until flush\n        Writer->>Writer: accumulate_data()\n    end\n    Client->>Writer: commit()\n    Writer->>BM: commit_block()\n    Writer->>RM: update_root()\n    RM->>Storage: persist()\n    BM->>Storage: persist()\n```\n\n### Read Flow\n\n```mermaid\nsequenceDiagram\n    participant Client\n    participant Reader as BlockfileReader\n    participant RM as RootManager\n    participant BM as BlockManager\n    participant Storage\n\n    Client->>Reader: get(prefix, key)\n    Reader->>RM: get_block_ids()\n    RM->>Reader: block_id_list\n    loop For each block\n        Reader->>BM: get_block(id)\n        BM->>Storage: read()\n        Storage->>Reader: block_data\n    end\n    Reader->>Reader: search_blocks()\n    Reader->>Client: value\n```\n\n## Configuration Options\n\n### BlockfileWriterOptions\n\n| Option | Type | Default | Description |\n|--------|------|---------|-------------|\n| `prefix_path` | `String` | Required | Path prefix for storage |\n| `max_block_size_bytes` | `usize` | Provider default | Maximum size per block |\n| `mutation_ordering` | `BlockfileWriterMutationOrdering` | `Ordered` | Write ordering mode |\n| `fork_from` | `Option<Uuid>` | `None` | Source blockfile ID for forking |\n| `cmek` | `Option<Cmek>` | `None` | Customer-managed encryption key |\n\n```rust\nlet mut bf_options = BlockfileWriterOptions::new(prefix_path.to_string())\n    .max_block_size_bytes(pl_block_size);\nbf_options = bf_options.unordered_mutations();\nif let Some(cmek) = cmek {\n    bf_options = bf_options.with_cmek(cmek);\n}\n```\n\n资料来源：[rust/blockstore/src/arrow/provider.rs:90-110](https://github.com/chroma-core/chroma/blob/main/rust/blockstore/src/arrow/provider.rs)\n\n## Memory Blockfile\n\nFor testing and ephemeral use cases, Chroma provides an in-memory blockfile implementation:\n\n```rust\npub fn new_memory() -> Self {\n    BlockfileProvider::HashMapBlockfileProvider(MemoryBlockfileProvider::new())\n}\n```\n\n**Limitations:**\n- No persistence\n- No fork support\n- Limited to unordered mutations\n\n```rust\nif options.fork_from.is_some() {\n    unimplemented!();\n}\n```\n\n资料来源：[rust/blockstore/src/memory/provider.rs:40-55](https://github.com/chroma-core/chroma/blob/main/rust/blockstore/src/memory/provider.rs)\n\n## Block Reading\n\n### RootReader\n\nThe `RootReader` is responsible for reading block metadata and identifying which blocks contain specific data:\n\n```rust\nimpl RootReader {\n    pub(super) fn get_all_block_ids_from_bytes(\n        bytes: &[u8],\n        id: Uuid,\n    ) -> Result<Vec<Uuid>, FromBytesError> {\n        let mut cursor = std::io::Cursor::new(bytes);\n        let arrow_reader = arrow::ipc::reader::FileReader::try_new(&mut cursor, None);\n        \n        let record_batch = match arrow_reader {\n            Ok(mut reader) => match reader.next() {\n                Some(Ok(batch)) => batch,\n                Some(Err(e)) => return Err(FromBytesError::ArrowError(e)),\n                None => return Err(FromBytesError::NoDataError),\n            },\n            Err(e) => return Err(FromBytesError::ArrowError(e)),\n        };\n        \n        let (version, read_id) = Self::version_and_id_from_record_batch(&record_batch, id)?;\n        if read_id != id {\n            return Err(FromBytesError::IdMismatch);\n        }\n        \n        Self::block_ids_from_record_batch(&record_batch, version)\n    }\n}\n```\n\n资料来源：[rust/blockstore/src/arrow/root.rs:20-55](https://github.com/chroma-core/chroma/blob/main/rust/blockstore/src/arrow/root.rs)\n\n## Related Components\n\n### SpannIndex Integration\n\nThe blockstore is used by the Spann (Sparse + ANN) index for storing posting lists:\n\n| Component | Purpose |\n|-----------|---------|\n| `SpannIndexReader` | Reads posting lists and HNSW indices |\n| `SpannIndexWriter` | Creates and manages posting list writers |\n| `SpannPostingList` | Stores document IDs and embeddings |\n\n```rust\npub struct SpannIndexReader<'me> {\n    pub posting_lists: BlockfileReader<'me, u32, SpannPostingList<'me>>,\n    pub hnsw_index: HnswIndexRef,\n    pub versions_map: BlockfileReader<'me, u32, u32>,\n    pub dimensionality: usize,\n}\n```\n\n资料来源：[rust/index/src/spann/types.rs:30-45](https://github.com/chroma-core/chroma/blob/main/rust/index/src/spann/types.rs)\n\n## Summary\n\nThe Chroma blockstore provides a robust, extensible storage layer built on Arrow IPC format. Key architectural decisions include:\n\n1. **Separation of concerns**: BlockManager handles data blocks while RootManager manages metadata and sparse indices\n2. **Dual writer support**: Ordered and unordered writers for different access patterns\n3. **Forking capability**: Efficient creation of derived blockfiles without full copies\n4. **Error classification**: Clear mapping from internal errors to error codes for API responses\n5. **Type-safe abstractions**: Generic key-value traits enabling flexible data modeling\n\n---\n\n<a id='embedding-functions'></a>\n\n## Embedding Functions Integration\n\n### 相关页面\n\n相关主题：[Python Client SDK](#python-client-sdk), [Data Storage & Blockstore](#data-storage-blockstore)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [clients/new-js/packages/ai-embeddings/common/README.md](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/ai-embeddings/common/README.md)\n- [clients/new-js/packages/ai-embeddings/ollama/README.md](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/ai-embeddings/ollama/README.md)\n- [rust/types/src/api_types.rs](https://github.com/chroma-core/chroma/blob/main/rust/types/src/api_types.rs)\n- [clients/new-js/packages/ai-embeddings/all/README.md](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/ai-embeddings/all/README.md)\n- [clients/new-js/packages/chromadb/src/embedding-function.ts](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/chromadb/src/embedding-function.ts)\n- [clients/new-js/packages/chromadb/src/collection-configuration.ts](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/chromadb/src/collection-configuration.ts)\n- [clients/new-js/packages/ai-embeddings/morph/README.md](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/ai-embeddings/morph/README.md)\n- [clients/new-js/packages/ai-embeddings/chroma-cloud-qwen/README.md](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/ai-embeddings/chroma-cloud-qwen/README.md)\n</details>\n\n# Embedding Functions Integration\n\n## Overview\n\nEmbedding Functions in Chroma provide a standardized interface for converting text into vector embeddings. Chroma supports multiple embedding providers through a plugin architecture that allows developers to use custom embedding functions or leverage hosted services like OpenAI, Cohere, Ollama, and others.\n\nThe embedding function system serves as the bridge between raw text data and the vector representation used for similarity search. Each embedding function implements a consistent interface that handles API communication, request formatting, and response parsing for its respective provider.\n\n资料来源：[clients/new-js/packages/ai-embeddings/common/README.md](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/ai-embeddings/common/README.md)\n\n## Architecture\n\n### High-Level Architecture\n\n```mermaid\ngraph TD\n    A[Client Application] --> B[Chroma Collection]\n    B --> C[Embedding Function]\n    C --> D[Embedding Provider API]\n    D --> E[Vector Embeddings]\n    E --> B\n    \n    F[@chroma-core/openai] --> C\n    G[@chroma-core/ollama] --> C\n    H[@chroma-core/cohere] --> C\n    I[@chroma-core/morph] --> C\n    J[@chroma-core/all] --> C\n```\n\n### Embedding Function Package Structure\n\nChroma organizes embedding functions into separate packages under the `@chroma-core` namespace. Each package focuses on a specific provider while sharing common utilities.\n\n| Package | Provider | Environment Support |\n|---------|----------|---------------------|\n| `@chroma-core/ai-embeddings-common` | Shared utilities | Node.js + Browser |\n| `@chroma-core/openai` | OpenAI | Node.js + Browser |\n| `@chroma-core/ollama` | Ollama (local) | Node.js + Browser |\n| `@chroma-core/cohere` | Cohere | Node.js + Browser |\n| `@chroma-core/jina` | Jina AI | Node.js + Browser |\n| `@chroma-core/morph` | Morph | Node.js |\n| `@chroma-core/all` | All providers | Node.js + Browser |\n\n资料来源：[clients/new-js/packages/ai-embeddings/all/README.md](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/ai-embeddings/all/README.md)\n\n## Core Components\n\n### Common Utilities Package\n\nThe `@chroma-core/ai-embeddings-common` package provides shared functionality used by all embedding function implementations:\n\n```typescript\nimport { validateConfigSchema, snakeCase, isBrowser } from '@chroma-core/ai-embeddings-common';\n```\n\n**Key Features:**\n\n| Feature | Purpose |\n|---------|---------|\n| `validateConfigSchema` | Validates embedding function configurations using JSON schemas |\n| `snakeCase` | Converts camelCase JavaScript objects to snake_case for API compatibility |\n| `isBrowser` | Detects browser vs Node.js runtime environment |\n\n资料来源：[clients/new-js/packages/ai-embeddings/common/README.md](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/ai-embeddings/common/README.md)\n\n### Dynamic Loading Mechanism\n\nThe embedding function system supports dynamic loading of packages based on configuration:\n\n```typescript\nconst fullPackageName = `@chroma-core/${packageName}`;\nawait import(fullPackageName);\nembeddingFunction = knownEmbeddingFunctions.get(packageName);\n```\n\nThe system maintains mappings for known embedding function names and handles package resolution automatically when a collection is configured with a specific embedding provider.\n\n资料来源：[clients/new-js/packages/chromadb/src/embedding-function.ts](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/chromadb/src/embedding-function.ts)\n\n### Configuration Schema\n\nEmbedding functions support structured configuration with schema validation. Configuration options vary by provider but typically include:\n\n| Parameter | Description | Provider Support |\n|-----------|-------------|------------------|\n| `apiKey` | API key for authentication | OpenAI, Cohere, Jina, Gemini |\n| `modelName` | Specific model identifier | All providers |\n| `apiBase` | Custom API endpoint URL | Ollama, Morph, Gemini |\n| `encodingFormat` | Output format (float/base64) | OpenAI, Morph |\n\n资料来源：[clients/new-js/packages/ai-embeddings/morph/README.md](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/ai-embeddings/morph/README.md)\n\n## Provider Implementations\n\n### OpenAI Embeddings\n\nThe OpenAI embedding function supports the OpenAI API for generating text embeddings:\n\n```typescript\nimport { OpenAIEmbeddingFunction } from '@chroma-core/openai';\n\nconst openAIEF = new OpenAIEmbeddingFunction({\n  apiKey: 'your-api-key',\n  modelName: 'text-embedding-3-small'\n});\n```\n\n### Ollama (Local Embeddings)\n\nOllama enables local embedding generation without external API calls:\n\n```bash\n# Install Ollama from ollama.ai\n# Start the server\nollama serve\n# Pull an embedding model\nollama pull chroma/all-minilm-l6-v2-f32\n```\n\n**Supported Models:**\n\n| Model | Dimensions |\n|-------|------------|\n| `chroma/all-minilm-l6-v2-f32` (default) | 384 |\n| `nomic-embed-text` | 768 |\n| `mxbai-embed-large` | 1024 |\n| `snowflake-arctic-embed` | Variable |\n\n资料来源：[clients/new-js/packages/ai-embeddings/ollama/README.md](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/ai-embeddings/ollama/README.md)\n\n### Morph Embeddings\n\nMorph provides embeddings optimized for code-related content:\n\n```typescript\nconst morphEmbedding = new MorphEmbeddingFunction({\n  api_key: 'your-morph-api-key',\n  model_name: 'morph-embedding-v2',\n  api_base: 'https://api.morphllm.com/v1',\n  encoding_format: 'float'\n});\n```\n\n资料来源：[clients/new-js/packages/ai-embeddings/morph/README.md](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/ai-embeddings/morph/README.md)\n\n### Chroma Cloud Qwen\n\nHosted embedding service using Qwen models:\n\n```typescript\nconst qwenEmbedding = new QwenEmbeddingFunction({\n  model: 'Qwen/Qwen3-Embedding-0.6B',\n  task: 'document' // or 'query'\n});\n```\n\nConfiguration includes:\n- `model`: The Qwen model to use\n- `task`: Task type (document or query embedding)\n- `instruction_dict`: Custom instructions for specific tasks\n- `apiKeyEnvVar`: Environment variable for API key (default: `CHROMA_API_KEY`)\n\n资料来源：[clients/new-js/packages/ai-embeddings/chroma-cloud-qwen/README.md](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/ai-embeddings/chroma-cloud-qwen/README.md)\n\n## Collection Integration\n\n### Embedding Function in Collections\n\nWhen creating a collection, the embedding function can be specified at multiple levels:\n\n```typescript\nconst collection = await chroma.createCollection({\n  name: \"my-collection\",\n  embeddingFunction: openAIEF  // Specify embedding function\n});\n```\n\n### Space Configuration\n\nEmbedding functions can define supported distance spaces and default configurations:\n\n```typescript\nif (overallEf && overallEf.defaultSpace && overallEf.supportedSpaces) {\n  if (configuration?.hnsw === undefined && configuration?.spann === undefined) {\n    configuration.hnsw = { space: overallEf.defaultSpace() };\n  }\n}\n```\n\nThe system validates that configured spaces are supported by the embedding function and warns if mismatches occur:\n\n```\nSpace 'cosine' is not supported by embedding function 'openai'. \nSupported spaces: cosine, euclidean, dotproduct\n```\n\n资料来源：[clients/new-js/packages/chromadb/src/collection-configuration.ts](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/chromadb/src/collection-configuration.ts)\n\n## Query Response Structure\n\n### Include Parameter\n\nQueries support specifying which data to include in results through the `Include` parameter:\n\n```rust\npub enum Include {\n    Distance,\n    Document,\n    Embedding,\n    Metadata,\n    Uri,\n}\n```\n\n**Default Inclusion Behavior:**\n\n| Operation | Default Includes |\n|-----------|------------------|\n| Query | Document, Metadata, Distance |\n| Get | Document, Metadata |\n\n**Include List Methods:**\n\n| Method | Returns |\n|--------|---------|\n| `IncludeList::empty()` | No includes |\n| `IncludeList::default_query()` | Document, Metadata, Distance |\n| `IncludeList::default_get()` | Document, Metadata |\n| `IncludeList::all()` | All five include types |\n\n资料来源：[rust/types/src/api_types.rs](https://github.com/chroma-core/chroma/blob/main/rust/types/src/api_types.rs)\n\n## Usage Patterns\n\n### Basic Usage with JavaScript Client\n\n```javascript\nimport { ChromaClient } from \"chromadb\";\nimport { OpenAIEmbeddingFunction } from \"@chroma-core/openai\";\n\nconst chroma = new ChromaClient();\nconst embeddingFunction = new OpenAIEmbeddingFunction({\n  apiKey: process.env.OPENAI_API_KEY\n});\n\nconst collection = await chroma.createCollection({\n  name: \"documents\",\n  embeddingFunction: embeddingFunction\n});\n\nawait collection.add({\n  ids: [\"doc-1\", \"doc-2\"],\n  documents: [\"Document content here\", \"Another document\"],\n  metadatas: [{ source: \"notion\" }, { source: \"google-docs\" }]\n});\n\nconst results = await collection.query({\n  queryTexts: [\"Search query\"],\n  nResults: 2\n});\n```\n\n### Python Client Usage\n\n```python\nimport chromadb\n\nclient = chromadb.HttpClient(host=\"localhost\", port=8000)\ncollection = client.create_collection(\"documents\")\n\ncollection.add(\n    documents=[\"Document 1\", \"Document 2\"],\n    metadatas=[{\"source\": \"notion\"}, {\"source\": \"google-docs\"}],\n    ids=[\"doc1\", \"doc2\"],\n    embeddings=[[1.2, 2.1, ...], [1.2, 2.1, ...]]\n)\n\nresults = collection.query(\n    query_texts=[\"Query document\"],\n    n_results=2\n)\n```\n\n资料来源：[clients/new-js/packages/chromadb/README.md](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/chromadb/README.md)\n\n## Environment Detection\n\nEmbedding functions automatically detect the runtime environment to select the appropriate HTTP client:\n\n```typescript\nimport { isBrowser } from '@chroma-core/ai-embeddings-common';\n\nif (isBrowser()) {\n  // Use browser-compatible fetch\n} else {\n  // Use Node.js HTTP client\n}\n```\n\nThis enables packages like Ollama to work seamlessly in both browser and Node.js environments:\n\n> This package works in both Node.js and browser environments, automatically detecting the runtime and using the appropriate Ollama client.\n\n资料来源：[clients/new-js/packages/ai-embeddings/ollama/README.md](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/ai-embeddings/ollama/README.md)\n\n## Type Safety\n\nThe embedding function system provides TypeScript types and interfaces for:\n\n- Configuration validation\n- Response parsing\n- Error handling\n- Provider-specific options\n\n```typescript\nexport const getSparseEmbeddingFunction = async (\n  client: ChromaClient,\n  efConfig?: EmbeddingFunctionConfiguration\n) => {\n  // Returns SparseEmbeddingFunction instance or undefined\n};\n```\n\n资料来源：[clients/new-js/packages/chromadb/src/embedding-function.ts](https://github.com/chroma-core/chroma/blob/main/clients/new-js/packages/chromadb/src/embedding-function.ts)\n\n## Summary\n\nEmbedding Functions Integration in Chroma provides a unified, extensible system for text vectorization. Key aspects include:\n\n1. **Provider Abstraction**: Standardized interface across multiple embedding providers\n2. **Dynamic Loading**: Packages loaded on-demand based on collection configuration\n3. **Schema Validation**: JSON schema-based configuration validation\n4. **Cross-Platform**: Support for both Node.js and browser environments\n5. **Flexible Configuration**: Provider-specific options with sensible defaults\n6. **Space Support**: Distance metric configuration aligned with embedding provider capabilities\n\nThe plugin architecture allows Chroma to integrate new embedding providers while maintaining API consistency across the SDK.\n\n---\n\n---\n\n## Doramagic 踩坑日志\n\n项目：chroma-core/chroma\n\n摘要：发现 6 个潜在踩坑项，其中 0 个为 high/blocking；最高优先级：能力坑 - 能力判断依赖假设。\n\n## 1. 能力坑 · 能力判断依赖假设\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：README/documentation is current enough for a first validation pass.\n- 对用户的影响：假设不成立时，用户拿不到承诺的能力。\n- 建议检查：将假设转成下游验证清单。\n- 防护动作：假设必须转成验证项；没有验证结果前不能写成事实。\n- 证据：capability.assumptions | github_repo:546206616 | https://github.com/chroma-core/chroma | README/documentation is current enough for a first validation pass.\n\n## 2. 维护坑 · 维护活跃度未知\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：未记录 last_activity_observed。\n- 对用户的影响：新项目、停更项目和活跃项目会被混在一起，推荐信任度下降。\n- 建议检查：补 GitHub 最近 commit、release、issue/PR 响应信号。\n- 防护动作：维护活跃度未知时，推荐强度不能标为高信任。\n- 证据：evidence.maintainer_signals | github_repo:546206616 | https://github.com/chroma-core/chroma | last_activity_observed missing\n\n## 3. 安全/权限坑 · 下游验证发现风险项\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：no_demo\n- 对用户的影响：下游已经要求复核，不能在页面中弱化。\n- 建议检查：进入安全/权限治理复核队列。\n- 防护动作：下游风险存在时必须保持 review/recommendation 降级。\n- 证据：downstream_validation.risk_items | github_repo:546206616 | https://github.com/chroma-core/chroma | no_demo; severity=medium\n\n## 4. 安全/权限坑 · 存在评分风险\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：no_demo\n- 对用户的影响：风险会影响是否适合普通用户安装。\n- 建议检查：把风险写入边界卡，并确认是否需要人工复核。\n- 防护动作：评分风险必须进入边界卡，不能只作为内部分数。\n- 证据：risks.scoring_risks | github_repo:546206616 | https://github.com/chroma-core/chroma | no_demo; severity=medium\n\n## 5. 维护坑 · issue/PR 响应质量未知\n\n- 严重度：low\n- 证据强度：source_linked\n- 发现：issue_or_pr_quality=unknown。\n- 对用户的影响：用户无法判断遇到问题后是否有人维护。\n- 建议检查：抽样最近 issue/PR，判断是否长期无人处理。\n- 防护动作：issue/PR 响应未知时，必须提示维护风险。\n- 证据：evidence.maintainer_signals | github_repo:546206616 | https://github.com/chroma-core/chroma | issue_or_pr_quality=unknown\n\n## 6. 维护坑 · 发布节奏不明确\n\n- 严重度：low\n- 证据强度：source_linked\n- 发现：release_recency=unknown。\n- 对用户的影响：安装命令和文档可能落后于代码，用户踩坑概率升高。\n- 建议检查：确认最近 release/tag 和 README 安装命令是否一致。\n- 防护动作：发布节奏未知或过期时，安装说明必须标注可能漂移。\n- 证据：evidence.maintainer_signals | github_repo:546206616 | https://github.com/chroma-core/chroma | release_recency=unknown\n\n<!-- canonical_name: chroma-core/chroma; human_manual_source: deepwiki_human_wiki -->\n",
      "summary": "DeepWiki/Human Wiki 完整输出，末尾追加 Discovery Agent 踩坑日志。",
      "title": "Human Manual / 人类版说明书"
    },
    "pitfall_log": {
      "asset_id": "pitfall_log",
      "filename": "PITFALL_LOG.md",
      "markdown": "# Pitfall Log / 踩坑日志\n\n项目：chroma-core/chroma\n\n摘要：发现 6 个潜在踩坑项，其中 0 个为 high/blocking；最高优先级：能力坑 - 能力判断依赖假设。\n\n## 1. 能力坑 · 能力判断依赖假设\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：README/documentation is current enough for a first validation pass.\n- 对用户的影响：假设不成立时，用户拿不到承诺的能力。\n- 建议检查：将假设转成下游验证清单。\n- 防护动作：假设必须转成验证项；没有验证结果前不能写成事实。\n- 证据：capability.assumptions | github_repo:546206616 | https://github.com/chroma-core/chroma | README/documentation is current enough for a first validation pass.\n\n## 2. 维护坑 · 维护活跃度未知\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：未记录 last_activity_observed。\n- 对用户的影响：新项目、停更项目和活跃项目会被混在一起，推荐信任度下降。\n- 建议检查：补 GitHub 最近 commit、release、issue/PR 响应信号。\n- 防护动作：维护活跃度未知时，推荐强度不能标为高信任。\n- 证据：evidence.maintainer_signals | github_repo:546206616 | https://github.com/chroma-core/chroma | last_activity_observed missing\n\n## 3. 安全/权限坑 · 下游验证发现风险项\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：no_demo\n- 对用户的影响：下游已经要求复核，不能在页面中弱化。\n- 建议检查：进入安全/权限治理复核队列。\n- 防护动作：下游风险存在时必须保持 review/recommendation 降级。\n- 证据：downstream_validation.risk_items | github_repo:546206616 | https://github.com/chroma-core/chroma | no_demo; severity=medium\n\n## 4. 安全/权限坑 · 存在评分风险\n\n- 严重度：medium\n- 证据强度：source_linked\n- 发现：no_demo\n- 对用户的影响：风险会影响是否适合普通用户安装。\n- 建议检查：把风险写入边界卡，并确认是否需要人工复核。\n- 防护动作：评分风险必须进入边界卡，不能只作为内部分数。\n- 证据：risks.scoring_risks | github_repo:546206616 | https://github.com/chroma-core/chroma | no_demo; severity=medium\n\n## 5. 维护坑 · issue/PR 响应质量未知\n\n- 严重度：low\n- 证据强度：source_linked\n- 发现：issue_or_pr_quality=unknown。\n- 对用户的影响：用户无法判断遇到问题后是否有人维护。\n- 建议检查：抽样最近 issue/PR，判断是否长期无人处理。\n- 防护动作：issue/PR 响应未知时，必须提示维护风险。\n- 证据：evidence.maintainer_signals | github_repo:546206616 | https://github.com/chroma-core/chroma | issue_or_pr_quality=unknown\n\n## 6. 维护坑 · 发布节奏不明确\n\n- 严重度：low\n- 证据强度：source_linked\n- 发现：release_recency=unknown。\n- 对用户的影响：安装命令和文档可能落后于代码，用户踩坑概率升高。\n- 建议检查：确认最近 release/tag 和 README 安装命令是否一致。\n- 防护动作：发布节奏未知或过期时，安装说明必须标注可能漂移。\n- 证据：evidence.maintainer_signals | github_repo:546206616 | https://github.com/chroma-core/chroma | release_recency=unknown\n",
      "summary": "用户实践前最可能遇到的身份、安装、配置、运行和安全坑。",
      "title": "Pitfall Log / 踩坑日志"
    },
    "prompt_preview": {
      "asset_id": "prompt_preview",
      "filename": "PROMPT_PREVIEW.md",
      "markdown": "# chroma - Prompt Preview\n\n> Copy the prompt below into your AI host before installing anything.\n> Its purpose is to let you safely feel the project's workflow, not to claim the project has already run.\n\n## Copy this prompt\n\n```text\nYou are using an independent Doramagic capability pack for chroma-core/chroma.\n\nProject:\n- Name: chroma\n- Repository: https://github.com/chroma-core/chroma\n- Summary: Search infrastructure for AI\n- Host target: local_cli\n\nGoal:\nHelp me evaluate this project for the following task without installing it yet: Search infrastructure for AI\n\nBefore taking action:\n1. Restate my task, success standard, and boundary.\n2. Identify whether the next step requires tools, browser access, network access, filesystem access, credentials, package installation, or host configuration.\n3. Use only the Doramagic Project Pack, the upstream repository, and the source-linked evidence listed below.\n4. If a real command, install step, API call, file write, or host integration is required, mark it as \"requires post-install verification\" and ask for approval first.\n5. If evidence is missing, say \"evidence is missing\" instead of filling the gap.\n\nPreviewable capabilities:\n- Capability 1: Search infrastructure for AI\n\nCapabilities that require post-install verification:\n- Capability 1: Use the source-backed project context to guide one small, checkable workflow step.\n\nCore service flow:\n1. chroma-overview: Chroma Overview. Produce one small intermediate artifact and wait for confirmation.\n2. getting-started: Getting Started with Chroma. Produce one small intermediate artifact and wait for confirmation.\n3. architecture-overview: System Architecture Overview. Produce one small intermediate artifact and wait for confirmation.\n4. python-client-sdk: Python Client SDK. Produce one small intermediate artifact and wait for confirmation.\n5. rust-services-architecture: Rust Backend Services Architecture. Produce one small intermediate artifact and wait for confirmation.\n\nSource-backed evidence to keep in mind:\n- https://github.com/chroma-core/chroma\n- https://github.com/chroma-core/chroma#readme\n- README.md\n- Cargo.toml\n- pyproject.toml\n- chromadb/__init__.py\n- chromadb/api/client.py\n- chromadb/api/models/Collection.py\n- examples/basic_functionality/start_here.ipynb\n- rust/frontend/src/server.rs\n\nFirst response rules:\n1. Start Step 1 only.\n2. Explain the one service action you will perform first.\n3. Ask exactly three questions about my target workflow, success standard, and sandbox boundary.\n4. Stop and wait for my answers.\n\nStep 1 follow-up protocol:\n- After I answer the first three questions, stay in Step 1.\n- Produce six parts only: clarified task, success standard, boundary conditions, two or three options, tradeoffs for each option, and one recommendation.\n- End by asking whether I confirm the recommendation.\n- Do not move to Step 2 until I explicitly confirm.\n\nConversation rules:\n- Advance one step at a time and wait for confirmation after each small artifact.\n- Write outputs as recommendations or planned checks, not as completed execution.\n- Do not claim tests passed, files changed, commands ran, APIs were called, or the project was installed.\n- If the user asks for execution, first provide the sandbox setup, expected output, rollback, and approval checkpoint.\n```\n",
      "summary": "不安装项目也能感受能力节奏的安全试用 Prompt。",
      "title": "Prompt Preview / 安装前试用 Prompt"
    },
    "quick_start": {
      "asset_id": "quick_start",
      "filename": "QUICK_START.md",
      "markdown": "# Quick Start / 官方入口\n\n项目：chroma-core/chroma\n\n## 官方安装入口\n\n### Python / pip · 官方安装入口\n\n```bash\npip install chromadb\n```\n\n来源：https://github.com/chroma-core/chroma#readme\n\n## 来源\n\n- repo: https://github.com/chroma-core/chroma\n- docs: https://github.com/chroma-core/chroma#readme\n",
      "summary": "从项目官方 README 或安装文档提取的开工入口。",
      "title": "Quick Start / 官方入口"
    }
  },
  "validation_id": "dval_ce6db9c989fb420eb0e8d2f3c80e9153"
}
