{
  "canonical_name": "qdrant/fastembed",
  "compilation_id": "pack_e9f9dd99cfad444f8aeb51e98f74f04f",
  "created_at": "2026-05-22T04:28:00.740736+00:00",
  "created_by": "project-pack-compiler",
  "feedback": {
    "carrier_selection_notes": [
      "viable_asset_types=mcp_config, recipe, host_instruction, eval, preflight",
      "recommended_asset_types=mcp_config, recipe, host_instruction, eval, preflight"
    ],
    "evidence_delta": {
      "confirmed_claims": [
        "identity_anchor_present",
        "capability_and_host_targets_present",
        "install_path_declared_or_better"
      ],
      "missing_required_fields": [],
      "must_verify_forwarded": [
        "Run or inspect `pip install fastembed` in an isolated environment.",
        "Confirm the project exposes the claimed capability to at least one target host."
      ],
      "quickstart_execution_scope": "allowlisted_sandbox_smoke",
      "sandbox_command": "pip install fastembed",
      "sandbox_container_image": "python:3.12-slim",
      "sandbox_execution_backend": "docker",
      "sandbox_planner_decision": "llm_execute_isolated_install",
      "sandbox_validation_id": "sbx_886b3a0fa833474ca981a3d6a7b41c51"
    },
    "feedback_event_type": "project_pack_compilation_feedback",
    "learning_candidate_reasons": [],
    "template_gaps": []
  },
  "identity": {
    "canonical_id": "project_51e28ea3aa9c462047d4d33a22f662df",
    "canonical_name": "qdrant/fastembed",
    "homepage_url": null,
    "license": "unknown",
    "repo_url": "https://github.com/qdrant/fastembed",
    "slug": "fastembed",
    "source_packet_id": "phit_13c1afb3d2eb4a85aa6314d25f8144b5",
    "source_validation_id": "dval_8a3cf855db8e42dd8eae04da90dc0f36"
  },
  "merchandising": {
    "best_for": "需要软件开发与交付能力，并使用 chatgpt的用户",
    "github_forks": 200,
    "github_stars": 2963,
    "one_liner_en": "Fast, Accurate, Lightweight Python library to make State of the Art Embedding",
    "one_liner_zh": "Fast, Accurate, Lightweight Python library to make State of the Art Embedding",
    "primary_category": {
      "category_id": "software-development",
      "confidence": "medium",
      "name_en": "Software Development",
      "name_zh": "软件开发与交付",
      "reason": "matched_keywords:git"
    },
    "target_user": "使用 chatgpt 等宿主 AI 的用户",
    "title_en": "fastembed",
    "title_zh": "fastembed 能力包",
    "visible_tags": [
      {
        "label_en": "Knowledge Retrieval",
        "label_zh": "知识检索",
        "source": "repo_evidence_project_characteristics",
        "tag_id": "product_domain-knowledge-retrieval",
        "type": "product_domain"
      },
      {
        "label_en": "Knowledge Base Q&A",
        "label_zh": "知识库问答",
        "source": "repo_evidence_project_characteristics",
        "tag_id": "user_job-knowledge-base-q-a",
        "type": "user_job"
      },
      {
        "label_en": "Workflow Automation",
        "label_zh": "流程自动化",
        "source": "repo_evidence_project_characteristics",
        "tag_id": "core_capability-workflow-automation",
        "type": "core_capability"
      },
      {
        "label_en": "Automated Workflow",
        "label_zh": "自动化工作流",
        "source": "repo_evidence_project_characteristics",
        "tag_id": "workflow_pattern-automated-workflow",
        "type": "workflow_pattern"
      },
      {
        "label_en": "Evaluation Suite",
        "label_zh": "评测体系",
        "source": "repo_evidence_project_characteristics",
        "tag_id": "selection_signal-evaluation-suite",
        "type": "selection_signal"
      }
    ]
  },
  "packet_id": "phit_13c1afb3d2eb4a85aa6314d25f8144b5",
  "page_model": {
    "artifacts": {
      "artifact_slug": "fastembed",
      "files": [
        "PROJECT_PACK.json",
        "QUICK_START.md",
        "PROMPT_PREVIEW.md",
        "HUMAN_MANUAL.md",
        "AI_CONTEXT_PACK.md",
        "BOUNDARY_RISK_CARD.md",
        "PITFALL_LOG.md",
        "REPO_INSPECTION.json",
        "REPO_INSPECTION.md",
        "CAPABILITY_CONTRACT.json",
        "EVIDENCE_INDEX.json",
        "CLAIM_GRAPH.json"
      ],
      "required_files": [
        "PROJECT_PACK.json",
        "QUICK_START.md",
        "PROMPT_PREVIEW.md",
        "HUMAN_MANUAL.md",
        "AI_CONTEXT_PACK.md",
        "BOUNDARY_RISK_CARD.md",
        "PITFALL_LOG.md",
        "REPO_INSPECTION.json"
      ]
    },
    "detail": {
      "capability_source": "Project Hit Packet + DownstreamValidationResult",
      "commands": [
        {
          "command": "pip install fastembed",
          "label": "Python / pip · 官方安装入口",
          "source": "https://github.com/qdrant/fastembed#readme",
          "verified": true
        }
      ],
      "display_tags": [
        "知识检索",
        "知识库问答",
        "流程自动化",
        "自动化工作流",
        "评测体系"
      ],
      "eyebrow": "软件开发与交付",
      "glance": [
        {
          "body": "判断自己是不是目标用户。",
          "label": "最适合谁",
          "value": "需要软件开发与交付能力，并使用 chatgpt的用户"
        },
        {
          "body": "先理解能力边界，再决定是否继续。",
          "label": "核心价值",
          "value": "Fast, Accurate, Lightweight Python library to make State of the Art Embedding"
        },
        {
          "body": "未完成验证前保持审慎。",
          "label": "继续前",
          "value": "publish to Doramagic.ai project surfaces"
        }
      ],
      "guardrail_source": "Boundary & Risk Card",
      "guardrails": [
        {
          "body": "Prompt Preview 只展示流程，不证明项目已安装或运行。",
          "label": "Check 1",
          "value": "不要把试用当真实运行"
        },
        {
          "body": "chatgpt",
          "label": "Check 2",
          "value": "确认宿主兼容"
        },
        {
          "body": "publish to Doramagic.ai project surfaces",
          "label": "Check 3",
          "value": "先隔离验证"
        }
      ],
      "mode": "mcp_config, recipe, host_instruction, eval, preflight",
      "pitfall_log": {
        "items": [
          {
            "body": "GitHub 社区证据显示该项目存在一个安装相关的待验证问题：[Bug]: Segmentation Fault or AssertionError during initialization on Python 3.14.2",
            "category": "安装坑",
            "evidence": [
              "community_evidence:github | cevd_201d4035515846df8830ca0dad6960c5 | https://github.com/qdrant/fastembed/issues/618 | 来源讨论提到 python 相关条件，需在安装/试用前复核。"
            ],
            "severity": "high",
            "suggested_check": "来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。",
            "title": "来源证据：[Bug]: Segmentation Fault or AssertionError during initialization on Python 3.14.2",
            "user_impact": "可能增加新用户试用和生产接入成本。"
          },
          {
            "body": "GitHub 社区证据显示该项目存在一个安装相关的待验证问题：[Bug]: Unable to load 'Qdrant/bm25' on macOS python3.14",
            "category": "安装坑",
            "evidence": [
              "community_evidence:github | cevd_8147621574b345d7955e79ad98f4ba6f | https://github.com/qdrant/fastembed/issues/630 | 来源讨论提到 python 相关条件，需在安装/试用前复核。"
            ],
            "severity": "high",
            "suggested_check": "来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。",
            "title": "来源证据：[Bug]: Unable to load 'Qdrant/bm25' on macOS python3.14",
            "user_impact": "可能阻塞安装或首次运行。"
          },
          {
            "body": "Developers should check this security_permissions risk before relying on the project: [Bug]: Tar path traversal (Zip Slip) in decompress_to_cache — arbitrary file write outside cache directory",
            "category": "安全/权限坑",
            "evidence": [
              "failure_mode_cluster:github_issue | fmev_d3890c2b3360ccb937839f70fd4aa584 | https://github.com/qdrant/fastembed/issues/626 | [Bug]: Tar path traversal (Zip Slip) in decompress_to_cache — arbitrary file write outside cache directory"
            ],
            "severity": "high",
            "suggested_check": "Before packaging this project, run the relevant install/config/quickstart check for: [Bug]: Tar path traversal (Zip Slip) in decompress_to_cache — arbitrary file write outside cache directory. Context: Observed when using python",
            "title": "失败模式：security_permissions: [Bug]: Tar path traversal (Zip Slip) in decompress_to_cache — arbitrary file write outside ca...",
            "user_impact": "Developers may expose sensitive permissions or credentials: [Bug]: Tar path traversal (Zip Slip) in decompress_to_cache — arbitrary file write outside cache directory"
          },
          {
            "body": "Developers should check this installation risk before relying on the project: The dependency `py-rust-stemmers` cannot be downloaded in a pure Python environment.",
            "category": "安装坑",
            "evidence": [
              "failure_mode_cluster:github_issue | fmev_16e50a8626aff1576adeb1c0baab4785 | https://github.com/qdrant/fastembed/issues/466 | The dependency `py-rust-stemmers` cannot be downloaded in a pure Python environment."
            ],
            "severity": "medium",
            "suggested_check": "Before packaging this project, run the relevant install/config/quickstart check for: The dependency `py-rust-stemmers` cannot be downloaded in a pure Python environment.. Context: Observed when using python, docker",
            "title": "失败模式：installation: The dependency `py-rust-stemmers` cannot be downloaded in a pure Python environment.",
            "user_impact": "Developers may fail before the first successful local run: The dependency `py-rust-stemmers` cannot be downloaded in a pure Python environment."
          },
          {
            "body": "Developers should check this installation risk before relying on the project: [Bug]: Segmentation Fault or AssertionError during initialization on Python 3.14.2",
            "category": "安装坑",
            "evidence": [
              "failure_mode_cluster:github_issue | fmev_04529bc774f1c961d4adeb7190edecd7 | https://github.com/qdrant/fastembed/issues/618 | [Bug]: Segmentation Fault or AssertionError during initialization on Python 3.14.2"
            ],
            "severity": "medium",
            "suggested_check": "Before packaging this project, run the relevant install/config/quickstart check for: [Bug]: Segmentation Fault or AssertionError during initialization on Python 3.14.2. Context: Observed when using python, windows, linux",
            "title": "失败模式：installation: [Bug]: Segmentation Fault or AssertionError during initialization on Python 3.14.2",
            "user_impact": "Developers may fail before the first successful local run: [Bug]: Segmentation Fault or AssertionError during initialization on Python 3.14.2"
          },
          {
            "body": "Developers should check this installation risk before relying on the project: [Bug]: Unable to load 'Qdrant/bm25' on macOS python3.14",
            "category": "安装坑",
            "evidence": [
              "failure_mode_cluster:github_issue | fmev_79a43347d96beb6d05eb6bfec2503fb5 | https://github.com/qdrant/fastembed/issues/630 | [Bug]: Unable to load 'Qdrant/bm25' on macOS python3.14"
            ],
            "severity": "medium",
            "suggested_check": "Before packaging this project, run the relevant install/config/quickstart check for: [Bug]: Unable to load 'Qdrant/bm25' on macOS python3.14. Context: Observed when using python, macos, cuda",
            "title": "失败模式：installation: [Bug]: Unable to load 'Qdrant/bm25' on macOS python3.14",
            "user_impact": "Developers may fail before the first successful local run: [Bug]: Unable to load 'Qdrant/bm25' on macOS python3.14"
          },
          {
            "body": "Developers should check this installation risk before relying on the project: v0.5.1",
            "category": "安装坑",
            "evidence": [
              "failure_mode_cluster:github_release | fmev_8b37c58c613005c0182d0325aaf032f7 | https://github.com/qdrant/fastembed/releases/tag/v0.5.1 | v0.5.1"
            ],
            "severity": "medium",
            "suggested_check": "Before packaging this project, run the relevant install/config/quickstart check for: v0.5.1. Context: Observed when using python",
            "title": "失败模式：installation: v0.5.1",
            "user_impact": "Upgrade or migration may change expected behavior: v0.5.1"
          },
          {
            "body": "GitHub 社区证据显示该项目存在一个安装相关的待验证问题：The dependency `py-rust-stemmers` cannot be downloaded in a pure Python environment.",
            "category": "安装坑",
            "evidence": [
              "community_evidence:github | cevd_0f507b37e33e456ea259e82966cecdc5 | https://github.com/qdrant/fastembed/issues/466 | 来源讨论提到 python 相关条件，需在安装/试用前复核。"
            ],
            "severity": "medium",
            "suggested_check": "来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。",
            "title": "来源证据：The dependency `py-rust-stemmers` cannot be downloaded in a pure Python environment.",
            "user_impact": "可能增加新用户试用和生产接入成本。"
          },
          {
            "body": "GitHub 社区证据显示该项目存在一个配置相关的待验证问题：[Bug]: No timeout on model download — requests.get() can hang indefinitely",
            "category": "配置坑",
            "evidence": [
              "community_evidence:github | cevd_6d570ba91cfd414f970a3a8da522be04 | https://github.com/qdrant/fastembed/issues/627 | 来源讨论提到 python 相关条件，需在安装/试用前复核。"
            ],
            "severity": "medium",
            "suggested_check": "来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。",
            "title": "来源证据：[Bug]: No timeout on model download — requests.get() can hang indefinitely",
            "user_impact": "可能阻塞安装或首次运行。"
          },
          {
            "body": "README/documentation is current enough for a first validation pass.",
            "category": "能力坑",
            "evidence": [
              "capability.assumptions | github_repo:666260877 | https://github.com/qdrant/fastembed | README/documentation is current enough for a first validation pass."
            ],
            "severity": "medium",
            "suggested_check": "将假设转成下游验证清单。",
            "title": "能力判断依赖假设",
            "user_impact": "假设不成立时，用户拿不到承诺的能力。"
          },
          {
            "body": "Developers should check this runtime risk before relying on the project: [Bug]: Loading models with additional files fails with onnxruntime 1.24.1",
            "category": "运行坑",
            "evidence": [
              "failure_mode_cluster:github_issue | fmev_17b849ae47ffaf5d18cabbd577f373ca | https://github.com/qdrant/fastembed/issues/603 | [Bug]: Loading models with additional files fails with onnxruntime 1.24.1"
            ],
            "severity": "medium",
            "suggested_check": "Before packaging this project, run the relevant install/config/quickstart check for: [Bug]: Loading models with additional files fails with onnxruntime 1.24.1. Context: Observed when using python, linux",
            "title": "失败模式：runtime: [Bug]: Loading models with additional files fails with onnxruntime 1.24.1",
            "user_impact": "Developers may hit a documented source-backed failure mode: [Bug]: Loading models with additional files fails with onnxruntime 1.24.1"
          },
          {
            "body": "Developers should check this runtime risk before relying on the project: v0.4.2",
            "category": "运行坑",
            "evidence": [
              "failure_mode_cluster:github_release | fmev_607c8ff157108b2b5fb78f55129b60f6 | https://github.com/qdrant/fastembed/releases/tag/v0.4.2 | v0.4.2"
            ],
            "severity": "medium",
            "suggested_check": "Before packaging this project, run the relevant install/config/quickstart check for: v0.4.2. Context: Source discussion did not expose a precise runtime context.",
            "title": "失败模式：runtime: v0.4.2",
            "user_impact": "Upgrade or migration may change expected behavior: v0.4.2"
          },
          {
            "body": "Developers should check this runtime risk before relying on the project: v0.7.1",
            "category": "运行坑",
            "evidence": [
              "failure_mode_cluster:github_release | fmev_07583dafa20e640a1720d7a77188475e | https://github.com/qdrant/fastembed/releases/tag/v0.7.1 | v0.7.1"
            ],
            "severity": "medium",
            "suggested_check": "Before packaging this project, run the relevant install/config/quickstart check for: v0.7.1. Context: Source discussion did not expose a precise runtime context.",
            "title": "失败模式：runtime: v0.7.1",
            "user_impact": "Upgrade or migration may change expected behavior: v0.7.1"
          },
          {
            "body": "Developers should check this runtime risk before relying on the project: v0.8.0",
            "category": "运行坑",
            "evidence": [
              "failure_mode_cluster:github_release | fmev_054bae0c9c1667cf5568de7ecb7087c9 | https://github.com/qdrant/fastembed/releases/tag/v0.8.0 | v0.8.0"
            ],
            "severity": "medium",
            "suggested_check": "Before packaging this project, run the relevant install/config/quickstart check for: v0.8.0. Context: Observed when using python, cuda",
            "title": "失败模式：runtime: v0.8.0",
            "user_impact": "Upgrade or migration may change expected behavior: v0.8.0"
          },
          {
            "body": "GitHub 社区证据显示该项目存在一个维护/版本相关的待验证问题：[Bug]: license error in pypi metadata",
            "category": "维护坑",
            "evidence": [
              "community_evidence:github | cevd_017eda744ea84e679aeb5d77d41f7541 | https://github.com/qdrant/fastembed/issues/620 | 来源讨论提到 python 相关条件，需在安装/试用前复核。"
            ],
            "severity": "medium",
            "suggested_check": "来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。",
            "title": "来源证据：[Bug]: license error in pypi metadata",
            "user_impact": "可能增加新用户试用和生产接入成本。"
          },
          {
            "body": "未记录 last_activity_observed。",
            "category": "维护坑",
            "evidence": [
              "evidence.maintainer_signals | github_repo:666260877 | https://github.com/qdrant/fastembed | last_activity_observed missing"
            ],
            "severity": "medium",
            "suggested_check": "补 GitHub 最近 commit、release、issue/PR 响应信号。",
            "title": "维护活跃度未知",
            "user_impact": "新项目、停更项目和活跃项目会被混在一起，推荐信任度下降。"
          }
        ],
        "source": "ProjectPitfallLog + ProjectHitPacket + validation + community signals",
        "summary": "发现 29 个潜在踩坑项，其中 3 个为 high/blocking；最高优先级：安装坑 - 来源证据：[Bug]: Segmentation Fault or AssertionError during initialization on Python 3.14.2。",
        "title": "踩坑日志"
      },
      "snapshot": {
        "contributors": 29,
        "forks": 200,
        "license": "unknown",
        "note": "站点快照，非实时质量证明；用于开工前背景判断。",
        "stars": 2963
      },
      "source_url": "https://github.com/qdrant/fastembed",
      "steps": [
        {
          "body": "不安装项目，先体验能力节奏。",
          "code": "preview",
          "title": "先试 Prompt"
        },
        {
          "body": "理解输入、输出、失败模式和边界。",
          "code": "manual",
          "title": "读说明书"
        },
        {
          "body": "把上下文交给宿主 AI 继续工作。",
          "code": "context",
          "title": "带给 AI"
        },
        {
          "body": "进入主力环境前先完成安装入口与风险边界验证。",
          "code": "verify",
          "title": "沙箱验证"
        }
      ],
      "subtitle": "Fast, Accurate, Lightweight Python library to make State of the Art Embedding",
      "title": "fastembed 能力包",
      "trial_prompt": "# fastembed - Prompt Preview\n\n> Copy the prompt below into your AI host before installing anything.\n> Its purpose is to let you safely feel the project's workflow, not to claim the project has already run.\n\n## Copy this prompt\n\n```text\nYou are using an independent Doramagic capability pack for qdrant/fastembed.\n\nProject:\n- Name: fastembed\n- Repository: https://github.com/qdrant/fastembed\n- Summary: Fast, Accurate, Lightweight Python library to make State of the Art Embedding\n- Host target: chatgpt\n\nGoal:\nHelp me evaluate this project for the following task without installing it yet: Fast, Accurate, Lightweight Python library to make State of the Art Embedding\n\nBefore taking action:\n1. Restate my task, success standard, and boundary.\n2. Identify whether the next step requires tools, browser access, network access, filesystem access, credentials, package installation, or host configuration.\n3. Use only the Doramagic Project Pack, the upstream repository, and the source-linked evidence listed below.\n4. If a real command, install step, API call, file write, or host integration is required, mark it as \"requires post-install verification\" and ask for approval first.\n5. If evidence is missing, say \"evidence is missing\" instead of filling the gap.\n\nPreviewable capabilities:\n- Capability 1: Fast, Accurate, Lightweight Python library to make State of the Art Embedding\n\nCapabilities that require post-install verification:\n- Capability 1: Use the source-backed project context to guide one small, checkable workflow step.\n\nCore service flow:\n1. page-introduction: Introduction to FastEmbed. Produce one small intermediate artifact and wait for confirmation.\n2. page-installation: Installation Guide. Produce one small intermediate artifact and wait for confirmation.\n3. page-quickstart: Quick Start Guide. Produce one small intermediate artifact and wait for confirmation.\n4. page-architecture: System Architecture. Produce one small intermediate artifact and wait for confirmation.\n5. page-text-embedding: Text Embedding Module. Produce one small intermediate artifact and wait for confirmation.\n\nSource-backed evidence to keep in mind:\n- https://github.com/qdrant/fastembed\n- https://github.com/qdrant/fastembed#readme\n- README.md\n- fastembed/__init__.py\n- pyproject.toml\n- fastembed/text/text_embedding.py\n- fastembed/image/image_embedding.py\n- fastembed/text/text_embedding_base.py\n- fastembed/image/image_embedding_base.py\n- fastembed/common/onnx_model.py\n\nFirst response rules:\n1. Start Step 1 only.\n2. Explain the one service action you will perform first.\n3. Ask exactly three questions about my target workflow, success standard, and sandbox boundary.\n4. Stop and wait for my answers.\n\nStep 1 follow-up protocol:\n- After I answer the first three questions, stay in Step 1.\n- Produce six parts only: clarified task, success standard, boundary conditions, two or three options, tradeoffs for each option, and one recommendation.\n- End by asking whether I confirm the recommendation.\n- Do not move to Step 2 until I explicitly confirm.\n\nConversation rules:\n- Advance one step at a time and wait for confirmation after each small artifact.\n- Write outputs as recommendations or planned checks, not as completed execution.\n- Do not claim tests passed, files changed, commands ran, APIs were called, or the project was installed.\n- If the user asks for execution, first provide the sandbox setup, expected output, rollback, and approval checkpoint.\n```\n",
      "voices": [
        {
          "body": "来源平台：github。github/github_issue: [Bug]: Segmentation Fault or AssertionError during initialization on Pyt（https://github.com/qdrant/fastembed/issues/618）；github/github_issue: [Bug]: No timeout on model download — requests.get() can hang indefinite（https://github.com/qdrant/fastembed/issues/627）；github/github_issue: [Bug]: license error in pypi metadata（https://github.com/qdrant/fastembed/issues/620）；github/github_issue: [Bug]: Unable to load 'Qdrant/bm25' on macOS python3.14（https://github.com/qdrant/fastembed/issues/630）；github/github_issue: [Bug]: Loading models with additional files fails with onnxruntime 1.24.（https://github.com/qdrant/fastembed/issues/603）；github/github_issue: The dependency `py-rust-stemmers` cannot be downloaded in a pure Python （https://github.com/qdrant/fastembed/issues/466）；github/github_issue: [Bug]: Tar path traversal (Zip Slip) in decompress_to_cache — arbitrary （https://github.com/qdrant/fastembed/issues/626）；github/github_release: v0.8.0（https://github.com/qdrant/fastembed/releases/tag/v0.8.0）；github/github_release: v0.7.4（https://github.com/qdrant/fastembed/releases/tag/v0.7.4）；github/github_release: v0.7.2（https://github.com/qdrant/fastembed/releases/tag/v0.7.2）；github/github_release: v0.7.1（https://github.com/qdrant/fastembed/releases/tag/v0.7.1）；github/github_release: v0.7.0（https://github.com/qdrant/fastembed/releases/tag/v0.7.0）。这些是项目级外部声音，不作为单独质量证明。",
          "items": [
            {
              "kind": "github_issue",
              "source": "github",
              "title": "[Bug]: Segmentation Fault or AssertionError during initialization on Pyt",
              "url": "https://github.com/qdrant/fastembed/issues/618"
            },
            {
              "kind": "github_issue",
              "source": "github",
              "title": "[Bug]: No timeout on model download — requests.get() can hang indefinite",
              "url": "https://github.com/qdrant/fastembed/issues/627"
            },
            {
              "kind": "github_issue",
              "source": "github",
              "title": "[Bug]: license error in pypi metadata",
              "url": "https://github.com/qdrant/fastembed/issues/620"
            },
            {
              "kind": "github_issue",
              "source": "github",
              "title": "[Bug]: Unable to load 'Qdrant/bm25' on macOS python3.14",
              "url": "https://github.com/qdrant/fastembed/issues/630"
            },
            {
              "kind": "github_issue",
              "source": "github",
              "title": "[Bug]: Loading models with additional files fails with onnxruntime 1.24.",
              "url": "https://github.com/qdrant/fastembed/issues/603"
            },
            {
              "kind": "github_issue",
              "source": "github",
              "title": "The dependency `py-rust-stemmers` cannot be downloaded in a pure Python ",
              "url": "https://github.com/qdrant/fastembed/issues/466"
            },
            {
              "kind": "github_issue",
              "source": "github",
              "title": "[Bug]: Tar path traversal (Zip Slip) in decompress_to_cache — arbitrary ",
              "url": "https://github.com/qdrant/fastembed/issues/626"
            },
            {
              "kind": "github_release",
              "source": "github",
              "title": "v0.8.0",
              "url": "https://github.com/qdrant/fastembed/releases/tag/v0.8.0"
            },
            {
              "kind": "github_release",
              "source": "github",
              "title": "v0.7.4",
              "url": "https://github.com/qdrant/fastembed/releases/tag/v0.7.4"
            },
            {
              "kind": "github_release",
              "source": "github",
              "title": "v0.7.2",
              "url": "https://github.com/qdrant/fastembed/releases/tag/v0.7.2"
            },
            {
              "kind": "github_release",
              "source": "github",
              "title": "v0.7.1",
              "url": "https://github.com/qdrant/fastembed/releases/tag/v0.7.1"
            },
            {
              "kind": "github_release",
              "source": "github",
              "title": "v0.7.0",
              "url": "https://github.com/qdrant/fastembed/releases/tag/v0.7.0"
            }
          ],
          "status": "已收录 12 条来源",
          "title": "社区讨论"
        }
      ]
    },
    "homepage_card": {
      "category": "软件开发与交付",
      "desc": "Fast, Accurate, Lightweight Python library to make State of the Art Embedding",
      "effort": "安装已验证",
      "forks": 200,
      "icon": "code",
      "name": "fastembed 能力包",
      "risk": "可发布",
      "slug": "fastembed",
      "stars": 2963,
      "tags": [
        "知识检索",
        "知识库问答",
        "流程自动化",
        "自动化工作流",
        "评测体系"
      ],
      "thumb": "gray",
      "type": "MCP 配置"
    },
    "manual": {
      "markdown": "# https://github.com/qdrant/fastembed 项目说明书\n\n生成时间：2026-05-21 05:37:43 UTC\n\n## 目录\n\n- [Introduction to FastEmbed](#page-introduction)\n- [Installation Guide](#page-installation)\n- [Quick Start Guide](#page-quickstart)\n- [System Architecture](#page-architecture)\n- [Text Embedding Module](#page-text-embedding)\n- [Image Embedding Module](#page-image-embedding)\n- [Sparse Embedding Models](#page-sparse-embedding)\n- [Late Interaction Models](#page-late-interaction)\n- [ONNX Model Infrastructure](#page-onnx-model)\n- [GPU Support and Acceleration](#page-gpu-support)\n\n<a id='page-introduction'></a>\n\n## Introduction to FastEmbed\n\n### 相关页面\n\n相关主题：[Installation Guide](#page-installation), [System Architecture](#page-architecture), [Quick Start Guide](#page-quickstart)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [README.md](https://github.com/qdrant/fastembed/blob/main/README.md)\n- [fastembed/text/onnx_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/onnx_embedding.py)\n- [fastembed/text/pooled_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/pooled_embedding.py)\n- [fastembed/text/pooled_normalized_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/pooled_normalized_embedding.py)\n- [fastembed/image/onnx_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/image/onnx_embedding.py)\n- [fastembed/sparse/bm25.py](https://github.com/qdrant/fastembed/blob/main/fastembed/sparse/bm25.py)\n- [fastembed/sparse/minicoil.py](https://github.com/qdrant/fastembed/blob/main/fastembed/sparse/minicoil.py)\n</details>\n\n# Introduction to FastEmbed\n\nFastEmbed is a lightweight, high-performance Python library designed for generating text and image embeddings using ONNX-based models. It provides a unified API for dense, sparse, and late-interaction embeddings with support for multiple embedding types and cross-encoder re-ranking models.\n\n## Overview\n\nFastEmbed serves as an embedding generation engine optimized for production use cases, particularly in vector search applications. The library emphasizes:\n\n- **Performance**: Leverages ONNX Runtime for efficient CPU inference\n- **Lightweight**: Minimal dependencies and small model sizes\n- **Flexibility**: Supports dense, sparse, and multimodal embeddings\n- **Ease of use**: Simple Python API for common embedding workflows\n\n资料来源：[README.md](https://github.com/qdrant/fastembed/blob/main/README.md)\n\n## Architecture\n\nFastEmbed follows a modular architecture with separate components for different embedding types and processing stages.\n\n```mermaid\ngraph TD\n    A[FastEmbed API] --> B[Text Embedding]\n    A --> C[Image Embedding]\n    A --> D[Sparse Embedding]\n    A --> E[Cross Encoder]\n    \n    B --> B1[OnnxEmbedding]\n    B --> B2[PooledEmbedding]\n    B --> B3[PooledNormalizedEmbedding]\n    \n    C --> C1[OnnxImageModel]\n    \n    D --> D1[BM25]\n    D --> D2[SPLADE++]\n    D --> D3[MiniCOIL]\n    \n    E --> E1[OnnxCrossEncoderModel]\n    \n    B1 & B2 & B3 --> F[ONNX Runtime]\n    C1 --> F\n    E1 --> F\n    D1 & D2 & D3 --> G[Tokenization Engine]\n```\n\n### Core Components\n\n| Component | Purpose | File Location |\n|-----------|---------|---------------|\n| `TextEmbedding` | Dense text embeddings | `fastembed/text/` |\n| `ImageEmbedding` | Image embeddings | `fastembed/image/` |\n| `SparseTextEmbedding` | Sparse embeddings (BM25, SPLADE) | `fastembed/sparse/` |\n| `TextCrossEncoder` | Re-ranking models | `fastembed/rerank/` |\n| `LateInteractionTextEmbedding` | Late interaction embeddings | `fastembed/postprocess/` |\n\n资料来源：[fastembed/text/onnx_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/onnx_embedding.py)\n\n## Supported Models\n\nFastEmbed supports an extensive collection of pre-converted ONNX models across multiple categories.\n\n### Text Embedding Models\n\nText embeddings are categorized by their pooling strategy and normalization approach.\n\n#### Dense Models (Unimodal)\n\n| Model | Dimension | Languages | Max Tokens | License |\n|-------|-----------|-----------|------------|---------|\n| `BAAI/bge-base-en-v1.5` | 768 | English | 512 | MIT |\n| `BAAI/bge-large-en-v1.5` | 1024 | English | 512 | MIT |\n| `BAAI/bge-small-en-v1.5` | 384 | English | 512 | MIT |\n| `thenlper/gte-base` | 768 | English | 512 | MIT |\n| `thenlper/gte-large` | 1024 | English | 512 | MIT |\n| `snowflake/snowflake-arctic-embed-m` | 768 | English | 512 | Apache-2.0 |\n| `snowflake/snowflake-arctic-embed-l` | 1024 | English | 512 | Apache-2.0 |\n\n资料来源：[fastembed/text/onnx_embedding.py:1-50](https://github.com/qdrant/fastembed/blob/main/fastembed/text/onnx_embedding.py)\n\n#### Multilingual Models\n\n| Model | Dimension | Languages | Max Tokens |\n|-------|-----------|-----------|------------|\n| `intfloat/multilingual-e5-small` | 384 | ~100 | 512 |\n| `intfloat/multilingual-e5-large` | 1024 | ~100 | 512 |\n| `sentence-transformers/paraphrase-multilingual-mpnet-base-v2` | 768 | ~50 | 384 |\n| `sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2` | 384 | ~50 | 512 |\n\n资料来源：[fastembed/text/pooled_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/pooled_embedding.py)\n\n#### Jina AI Models\n\n| Model | Dimension | Languages | Max Tokens |\n|-------|-----------|-----------|------------|\n| `jinaai/jina-embeddings-v2-base-en` | 768 | English | 8192 |\n| `jinaai/jina-embeddings-v2-small-en` | 512 | English | 8192 |\n| `jinaai/jina-embeddings-v2-base-zh` | 768 | Chinese/English | 8192 |\n| `jinaai/jina-embeddings-v2-base-de` | 768 | German/English | 8192 |\n| `jinaai/jina-embeddings-v2-base-code` | 768 | 30 languages | 8192 |\n\n资料来源：[fastembed/text/pooled_normalized_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/pooled_normalized_embedding.py)\n\n### Sparse Embedding Models\n\nSparse embeddings provide interpretable vectors with non-zero values only at specific token positions.\n\n| Model | Type | Language | Description |\n|-------|------|----------|-------------|\n| `prithivida/Splade_PP_en_v1` | SPLADE++ | English | Sparse lexical + semantic |\n| `Qdrant/minicoil-v1` | MiniCOIL | English | Semantic + keyword match |\n| `Qdrant/bm25` | BM25 | Multilingual | Traditional BM25 ranking |\n\n资料来源：[fastembed/sparse/bm25.py](https://github.com/qdrant/fastembed/blob/main/fastembed/sparse/bm25.py)\n\nThe MiniCOIL model combines semantic understanding with exact keyword matching:\n\n```python\nclass MiniCOIL(SparseTextEmbeddingBase, OnnxTextModel[SparseEmbedding]):\n    \"\"\"\n    MiniCOIL is a sparse embedding model, that resolves semantic meaning of the words,\n    while keeping exact keyword match behavior.\n    \n    Each vocabulary token is converted into 4d component of a sparse vector, \n    which is then weighted by the token frequency in the corpus.\n    If the token is not found in the corpus, it is treated exactly like in BM25.\n    \"\"\"\n```\n\n资料来源：[fastembed/sparse/minicoil.py](https://github.com/qdrant/fastembed/blob/main/fastembed/sparse/minicoil.py)\n\n### Image Embedding Models\n\n| Model | Dimension | Type | License |\n|-------|-----------|------|---------|\n| `Qdrant/resnet50-onnx` | 2048 | Image | Apache-2.0 |\n| `Qdrant/Unicom-ViT-B-16` | 768 | Multimodal | Apache-2.0 |\n| `Qdrant/Unicom-ViT-B-32` | 512 | Multimodal | Apache-2.0 |\n| `jinaai/jina-clip-v1` | 768 | Multimodal | Apache-2.0 |\n\n资料来源：[fastembed/image/onnx_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/image/onnx_embedding.py)\n\n### Cross Encoder Reranking Models\n\n| Model | Description | License | Size |\n|-------|-------------|---------|------|\n| `Xenova/ms-marco-MiniLM-L-12-v2` | MS MARCO passage ranking | Apache-2.0 | 0.12 GB |\n| `BAAI/bge-reranker-base` | BGE reranker base | MIT | 1.04 GB |\n| `jinaai/jina-reranker-v1-tiny-en` | Fast reranking, 8K context | Apache-2.0 | 0.13 GB |\n| `jinaai/jina-reranker-v1-turbo-en` | Fast reranking, 8K context | Apache-2.0 | 0.15 GB |\n| `jinaai/jina-reranker-v2-base-multilingual` | Multilingual, 1K context | CC-BY-NC-4.0 | 1.11 GB |\n\n资料来源：[fastembed/rerank/cross_encoder/onnx_text_cross_encoder.py](https://github.com/qdrant/fastembed/blob/main/fastembed/rerank/cross_encoder/onnx_text_cross_encoder.py)\n\n## Usage Patterns\n\n### Text Embedding Generation\n\n```python\nfrom fastembed import TextEmbedding\n\n# Initialize with default model\nmodel = TextEmbedding(model_name=\"BAAI/bge-small-en-v1.5\")\n\n# Generate embeddings\ndocuments = [\n    \"passage: The capital of France is Paris\",\n    \"query: What is the capital of France?\"\n]\n\nembeddings = list(model.embed(documents))\n# Returns list of numpy arrays\n```\n\n资料来源：[README.md](https://github.com/qdrant/fastembed/blob/main/README.md)\n\n### Sparse Embedding with SPLADE++\n\n```python\nfrom fastembed import SparseTextEmbedding\n\nmodel = SparseTextEmbedding(model_name=\"prithivida/Splade_PP_en_v1\")\nembeddings = list(model.embed(documents))\n\n# Returns:\n# [\n#   SparseEmbedding(indices=[ 17, 123, 919, ... ], values=[0.71, 0.22, 0.39, ...]),\n#   SparseEmbedding(indices=[ 38,  12,  91, ... ], values=[0.11, 0.22, 0.39, ...])\n# ]\n```\n\n资料来源：[README.md](https://github.com/qdrant/fastembed/blob/main/README.md)\n\n### Custom Model Configuration\n\n```python\nfrom fastembed import TextEmbedding, PoolingType\nfrom fastembed.common import ModelSource\n\n# Use custom model with specific configuration\nmodel = TextEmbedding(\n    model_name=\"intfloat/multilingual-e5-small\",\n    pooler_type=PoolingType.MEAN,\n    normalization=True,\n    sources=ModelSource(hf=\"intfloat/multilingual-e5-small\"),\n    dim=384,\n    model_file=\"onnx/model.onnx\"\n)\n\nembeddings = list(model.embed(documents))\n```\n\n资料来源：[README.md](https://github.com/qdrant/fastembed/blob/main/README.md)\n\n### Cross Encoder Reranking\n\n```python\nfrom fastembed import TextCrossEncoder\n\nmodel = TextCrossEncoder(model_name=\"BAAI/bge-reranker-base\")\n\n# Score query-document pairs\nquery = \"What is the capital of France?\"\ndocuments = [\"Paris is the capital of France.\", \"Berlin is the capital of Germany.\"]\n\nscores = list(model.rerank(query, documents))\n```\n\n资料来源：[fastembed/rerank/cross_encoder/onnx_text_cross_encoder.py](https://github.com/qdrant/fastembed/blob/main/fastembed/rerank/cross_encoder/onnx_text_cross_encoder.py)\n\n## Post-Processing: MUVERA\n\nFastEmbed includes MUVERA (Multi-Vector Reduction Algorithm) for converting late-interaction embeddings (like ColBERT) to fixed-dimensional encodings.\n\n```python\nfrom fastembed import LateInteractionTextEmbedding\nfrom fastembed.postprocess import Muvera\n\nmodel = LateInteractionTextEmbedding(model_name=\"colbert-ir/colbertv2.0\")\nmuvera = Muvera.from_multivector_model(\n    model=model,\n    k_sim=6,\n    dim_proj=32\n)\n\n# Convert late-interaction embeddings to fixed dimension\nembeddings = np.array(list(model.embed([\"sample text\"])))\nfde = muvera.process_document(embeddings[0])\n```\n\n资料来源：[fastembed/postprocess/muvera.py](https://github.com/qdrant/fastembed/blob/main/fastembed/postprocess/muvera.py)\n\n## Model Caching\n\nFastEmbed automatically caches downloaded models to avoid repeated downloads.\n\n| Setting | Environment Variable | Default Location |\n|---------|---------------------|-------------------|\n| Cache Path | `FASTEMBED_CACHE_PATH` | System temp directory |\n\nModels are stored in ONNX format after download and converted to optimized formats on first use.\n\n## Prefix Requirements\n\nSome models require specific text prefixes for query and document inputs:\n\n| Prefix Requirement | Models | Example |\n|-------------------|--------|---------|\n| **Necessary** | E5, Nomic, Snowflake Arctic, BGE-small | Query: `query: ...`, Document: `passage: ...` |\n| **Not Necessary** | Jina, BGE (base/large), GTE | Plain text input |\n\n资料来源：[fastembed/text/onnx_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/onnx_embedding.py)\n\n## Quick Reference\n\n### Model Selection Guide\n\n| Use Case | Recommended Model |\n|----------|-------------------|\n| English semantic search | `BAAI/bge-small-en-v1.5` |\n| High-quality English | `BAAI/bge-large-en-v1.5` |\n| Multilingual (100+ languages) | `intfloat/multilingual-e5-large` |\n| Long documents (8K tokens) | `jinaai/jina-embeddings-v2-base-en` |\n| Fast re-ranking | `jinaai/jina-reranker-v1-tiny-en` |\n| Sparse lexical + semantic | `prithivida/Splade_PP_en_v1` |\n\n### API Quick Reference\n\n| Class | Import | Primary Method |\n|-------|--------|----------------|\n| Dense Text | `from fastembed import TextEmbedding` | `.embed(documents)` |\n| Sparse Text | `from fastembed import SparseTextEmbedding` | `.embed(documents)` |\n| Image | `from fastembed import ImageEmbedding` | `.embed(images)` |\n| Cross Encoder | `from fastembed import TextCrossEncoder` | `.rerank(query, documents)` |\n| Late Interaction | `from fastembed import LateInteractionTextEmbedding` | `.embed(documents)` |\n\n## Further Documentation\n\n- [FastEmbed GitHub Repository](https://github.com/qdrant/fastembed)\n- [Qdrant Documentation](https://qdrant.tech/documentation/)\n- [ONNX Runtime Documentation](https://onnxruntime.ai/docs/)\n\n---\n\n<a id='page-installation'></a>\n\n## Installation Guide\n\n### 相关页面\n\n相关主题：[Introduction to FastEmbed](#page-introduction), [GPU Support and Acceleration](#page-gpu-support)\n\n<details>\n<summary>Relevant Source Files</summary>\n\n以下源码文件用于生成本页说明：\n\n- [README.md](https://github.com/qdrant/fastembed/blob/main/README.md)\n- [RELEASE.md](https://github.com/qdrant/fastembed/blob/main/RELEASE.md)\n- [pyproject.toml](https://github.com/qdrant/fastembed/blob/main/pyproject.toml)\n- [fastembed/image/onnx_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/image/onnx_embedding.py)\n- [fastembed/text/onnx_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/onnx_embedding.py)\n- [fastembed/sparse/bm25.py](https://github.com/qdrant/fastembed/blob/main/fastembed/sparse/bm25.py)\n- [mkdocs.yml](https://github.com/qdrant/fastembed/blob/main/mkdocs.yml)\n</details>\n\n# Installation Guide\n\nFastEmbed is a lightweight, fast, and accurate embedding library developed by Qdrant. This guide covers all aspects of installing and configuring FastEmbed for various use cases including CPU inference, GPU acceleration, and integration with Qdrant vector database.\n\n## Prerequisites\n\n### System Requirements\n\n| Requirement | Minimum | Recommended |\n|-------------|---------|-------------|\n| Python Version | 3.9+ | 3.10+ |\n| RAM | 4 GB | 8 GB+ |\n| Disk Space | 2 GB | 5 GB+ |\n| GPU (Optional) | CUDA 11.8+ | CUDA 12.x |\n\nVerify your Python version before installation:\n\n```bash\npython --version\n```\n\n资料来源：[README.md:1-100]()\n\n## Package Variants\n\nFastEmbed is distributed in two package variants:\n\n| Package | Description | Use Case |\n|---------|-------------|----------|\n| `fastembed` | CPU-only version | General purpose embedding generation |\n| `fastembed-gpu` | GPU-accelerated version | High-throughput production workloads |\n\n### CPU Installation\n\nInstall the standard CPU version using pip:\n\n```bash\npip install fastembed\n```\n\n资料来源：[README.md]()\n资料来源：[RELEASE.md:1-15]()\n\n### GPU Installation\n\nFor CUDA-enabled GPU acceleration:\n\n```bash\npip install fastembed-gpu\n```\n\nThe GPU package automatically includes the CUDA Execution Provider for ONNX Runtime, enabling significantly faster inference on NVIDIA GPUs.\n\n资料来源：[RELEASE.md:1-15]()\n资料来源：[README.md](https://github.com/qdrant/fastembed/blob/main/README.md)\n\n### Installation Verification\n\nVerify successful installation:\n\n```python\nfrom fastembed import TextEmbedding\n\n# List available models\nprint(TextEmbedding.list_supported_models())\n\n# Initialize a model\nmodel = TextEmbedding(model_name=\"BAAI/bge-small-en-v1.5\")\nprint(\"FastEmbed installed successfully!\")\n```\n\n## Supported Embedding Models\n\nFastEmbed supports multiple embedding modalities organized into the following categories:\n\n### Dense Text Embeddings\n\nDense text embeddings provide fixed-dimensional vector representations for text. Supported models include:\n\n| Model | Dimension | Languages | Token Limit | License |\n|-------|-----------|-----------|-------------|---------|\n| `BAAI/bge-small-en-v1.5` | 384 | English | 512 | MIT |\n| `BAAI/bge-base-en-v1.5` | 768 | English | 512 | MIT |\n| `BAAI/bge-large-en-v1.5` | 1024 | English | 512 | MIT |\n| `jinaai/jina-embeddings-v2-base-en` | 768 | English | 8192 | Apache 2.0 |\n| `jinaai/jina-embeddings-v2-small-en` | 512 | English | 8192 | Apache 2.0 |\n| `sentence-transformers/all-MiniLM-L6-v2` | 384 | English | 256 | Apache 2.0 |\n| `mixedbread-ai/mxbai-embed-large-v1` | 1024 | English | 512 | Apache 2.0 |\n\n资料来源：[fastembed/text/onnx_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/onnx_embedding.py)\n\n### Multilingual Models\n\n| Model | Dimension | Languages | Token Limit | License |\n|-------|-----------|-----------|-------------|---------|\n| `intfloat/multilingual-e5-small` | 384 | ~100 | 512 | MIT |\n| `intfloat/multilingual-e5-large` | 1024 | ~100 | 512 | MIT |\n| `sentence-transformers/paraphrase-multilingual-mpnet-base-v2` | 768 | ~50 | 384 | Apache 2.0 |\n| `jinaai/jina-embeddings-v2-base-de` | 768 | German, English | 8192 | Apache 2.0 |\n| `jinaai/jina-embeddings-v2-base-zh` | 768 | Chinese, English | 8192 | Apache 2.0 |\n| `jinaai/jina-embeddings-v2-base-es` | 768 | Spanish, English | 8192 | Apache 2.0 |\n\n资料来源：[fastembed/text/pooled_normalized_embedding.py]()\n资料来源：[fastembed/text/pooled_embedding.py]()\n\n### Sparse Embeddings\n\nSparse embeddings represent text using high-dimensional sparse vectors, useful for keyword-based retrieval.\n\n| Model | Type | License |\n|-------|------|---------|\n| `prithivida/Splade_PP_en_v1` | SPLADE++ | Apache 2.0 |\n| `Qdrant/bm25` | BM25 | Apache 2.0 |\n\n资料来源：[fastembed/sparse/bm25.py]()\n资料来源：[README.md](https://github.com/qdrant/fastembed/blob/main/README.md)\n\n### Image Embeddings\n\n| Model | Dimension | Type | License |\n|-------|-----------|------|---------|\n| `Qdrant/resnet50-onnx` | 2048 | Image | Apache 2.0 |\n| `Qdrant/Unicom-ViT-B-16` | 768 | Multimodal | Apache 2.0 |\n| `Qdrant/Unicom-ViT-B-32` | 512 | Multimodal | Apache 2.0 |\n| `jinaai/jina-clip-v1` | 768 | Multimodal (text&image) | Apache 2.0 |\n\n资料来源：[fastembed/image/onnx_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/image/onnx_embedding.py)\n\n### Reranking Models\n\nCross-encoder models for re-ranking search results:\n\n| Model | Context Length | License |\n|-------|----------------|---------|\n| `Xenova/ms-marco-MiniLM-L-12-v2` | - | Apache 2.0 |\n| `BAAI/bge-reranker-base` | - | MIT |\n| `jinaai/jina-reranker-v1-tiny-en` | 8K | Apache 2.0 |\n| `jinaai/jina-reranker-v1-turbo-en` | 8K | Apache 2.0 |\n| `jinaai/jina-reranker-v2-base-multilingual` | 1K | CC-BY-NC-4.0 |\n\n资料来源：[fastembed/rerank/cross_encoder/onnx_text_cross_encoder.py]()\n\n## GPU Configuration\n\n### CUDA Provider Setup\n\nEnable GPU acceleration by specifying the CUDA execution provider:\n\n```python\nfrom fastembed import TextEmbedding\nfrom fastembed.common import OnnxProvider\n\nmodel = TextEmbedding(\n    model_name=\"BAAI/bge-small-en-v1.5\",\n    providers=[\"CUDAExecutionProvider\"]\n)\nprint(\"The model BAAI/bge-small-en-v1.5 is ready to use on a GPU.\")\n```\n\n资料来源：[README.md](https://github.com/qdrant/fastembed/blob/main/README.md)\n\n### GPU Installation Workflow\n\n```mermaid\ngraph TD\n    A[Install fastembed-gpu] --> B{Check CUDA Version}\n    B -->|CUDA 11.8+| C[Install Compatible Driver]\n    B -->|CUDA < 11.8| D[Upgrade CUDA]\n    C --> E[Verify ONNX Runtime GPU Support]\n    E --> F[Import TextEmbedding]\n    F --> G[Configure Providers]\n    G --> H[GPU Inference Ready]\n```\n\n## Qdrant Integration\n\nFastEmbed integrates seamlessly with the Qdrant vector database for production deployments.\n\n### Installation with Qdrant Client\n\n```bash\n# Standard Qdrant with FastEmbed\npip install qdrant-client[fastembed]\n\n# GPU-accelerated FastEmbed with Qdrant\npip install qdrant-client[fastembed-gpu]\n```\n\nOn zsh shells, use quotes:\n\n```bash\npip install 'qdrant-client[fastembed]'\n```\n\n资料来源：[README.md](https://github.com/qdrant/fastembed/blob/main/README.md)\n\n### Complete Qdrant Integration Example\n\n```python\nfrom qdrant_client import QdrantClient, models\n\n# Initialize the client\nclient = QdrantClient(\"localhost\", port=6333)  # For production\n# client = QdrantClient(\":memory:\")  # For experimentation\n\nmodel_name = \"sentence-transformers/all-MiniLM-L6-v2\"\npayload = [\n    {\"document\": \"Qdrant has Langchain integrations\", \"source\": \"Langchain-docs\"},\n    {\"document\": \"Qdrant also has Llama Index integrations\", \"source\": \"LlamaIndex-docs\"},\n]\ndocs = [models.Document(text=data[\"document\"], model=model_name) for data in payload]\nids = [42, 2]\n\nclient.create_collection(\n    \"demo_collection\",\n    vectors_config=models.VectorParams(\n        size=client.get_embedding_size(model_name), distance=models.Distance.COSINE\n    )\n)\n\nclient.upload_collection(\n    collection_name=\"demo_collection\",\n    vectors=docs,\n    ids=ids,\n    payload=payload,\n)\n\nsearch_result = client.query_points(\n    collection_name=\"demo_collection\",\n    query=docs[0],\n    limit=5,\n)\n```\n\n资料来源：[README.md](https://github.com/qdrant/fastembed/blob/main/README.md)\n\n## Cache Configuration\n\n### Default Cache Location\n\nModels are cached in `fastembed_cache` within the system's temp directory by default. This location can be customized using environment variables.\n\n### FASTEMBED_CACHE_PATH\n\nSet a custom cache directory:\n\n```bash\nexport FASTEMBED_CACHE_PATH=/path/to/custom/cache\n```\n\nThe cache directory structure follows Hugging Face's conventions, with models organized by their source repository.\n\n### Model Loading Behavior\n\n```mermaid\ngraph TD\n    A[Import FastEmbed Module] --> B{Model in Cache?}\n    B -->|Yes| C[Load from Cache]\n    B -->|No| D[Download from Source]\n    D --> E{HuggingFace Available?}\n    E -->|Yes| F[Download from HF Hub]\n    E -->|No| G[Use URL Source]\n    C --> H[Initialize ONNX Session]\n    E --> H\n    G --> H\n    H --> I[Model Ready for Inference]\n```\n\n## Usage Examples\n\n### Dense Text Embedding\n\n```python\nfrom fastembed import TextEmbedding\n\ndocuments = [\n    \"passage: FastEmbed is a fast embedding library\",\n    \"query: What is FastEmbed?\",\n    \"passage: Qdrant is a vector database\",\n]\n\nmodel = TextEmbedding(model_name=\"BAAI/bge-small-en-v1.5\")\nembeddings = list(model.embed(documents))\n\nprint(f\"Generated {len(embeddings)} embeddings\")\nprint(f\"Embedding dimension: {embeddings[0].shape}\")\n```\n\n### Sparse Text Embedding (SPLADE++)\n\n```python\nfrom fastembed import SparseTextEmbedding\n\nmodel = SparseTextEmbedding(model_name=\"prithivida/Splade_PP_en_v1\")\nembeddings = list(model.embed(documents))\n\n# Output format:\n# [\n#   SparseEmbedding(indices=[ 17, 123, 919, ... ], values=[0.71, 0.22, 0.39, ...]),\n#   SparseEmbedding(indices=[ 38,  12,  91, ... ], values=[0.11, 0.22, 0.39, ...])\n# ]\n```\n\n资料来源：[README.md](https://github.com/qdrant/fastembed/blob/main/README.md)\n\n### Image Embedding\n\n```python\nfrom fastembed import ImageEmbedding\nfrom PIL import Image\n\nmodel = ImageEmbedding(model_name=\"Qdrant/resnet50-onnx\")\nimages = [Image.open(\"path/to/image.jpg\")]\nembeddings = list(model.embed(images))\n```\n\n### Custom Model Source\n\nLoad a supported model with custom configuration:\n\n```python\nfrom fastembed import TextEmbedding\nfrom fastembed.common import ModelSource, PoolingType\n\nmodel = TextEmbedding(\n    model_name=\"custom-model\",\n    pool_type=PoolingType.MEAN,\n    normalization=True,\n    sources=ModelSource(hf=\"intfloat/multilingual-e5-small\"),\n    dim=384,\n    model_file=\"onnx/model.onnx\"\n)\n\nembeddings = list(model.embed(documents))\n```\n\n资料来源：[README.md](https://github.com/qdrant/fastembed/blob/main/README.md)\n\n## Late Interaction Models\n\nLate interaction models like ColBERT enable more sophisticated similarity matching:\n\n```python\nfrom fastembed import LateInteractionTextEmbedding\n\nmodel = LateInteractionTextEmbedding(model_name=\"colbert-ir/colbertv2.0\")\nembeddings = list(model.embed(documents))\n```\n\n## Post-Processing with Muvera\n\nThe Muvera post-processor enables Fixed Dimensional Encoding (FDE) for multi-vector models:\n\n```python\nfrom fastembed import LateInteractionTextEmbedding\nfrom fastembed.postprocess import Muvera\nimport numpy as np\n\nmodel = LateInteractionTextEmbedding(model_name=\"colbert-ir/colbertv2.0\")\nmuvera = Muvera.from_multivector_model(\n    model=model,\n    k_sim=6,\n    dim_proj=32\n)\n\nembeddings = np.array(list(model.embed([\"sample text\"])))\nfde = muvera.process_document(embeddings[0])\n```\n\n资料来源：[fastembed/postprocess/muvera.py]()\n\n## Troubleshooting\n\n### Common Installation Issues\n\n| Issue | Solution |\n|-------|----------|\n| `ModuleNotFoundError: No module named 'fastembed'` | Run `pip install fastembed` |\n| CUDA not available | Install `fastembed-gpu` and verify NVIDIA driver |\n| Model download fails | Check network connectivity and HuggingFace access |\n| Out of memory | Reduce batch size or use smaller model variant |\n\n### Checking Installed Version\n\n```python\nimport fastembed\nprint(fastembed.__version__)\n```\n\n### Verifying GPU Availability\n\n```python\nimport onnxruntime as ort\nprint(f\"Available providers: {ort.get_available_providers()}\")\n```\n\n## Advanced Configuration\n\n### Lazy Loading\n\nEnable lazy model loading for memory-efficient initialization:\n\n```python\nfrom fastembed import TextEmbedding\n\nmodel = TextEmbedding(\n    model_name=\"BAAI/bge-small-en-v1.5\",\n    lazy_load=True  # Model loads on first inference call\n)\n```\n\n### Thread Configuration\n\nOptimize CPU thread usage:\n\n```python\nfrom fastembed import TextEmbedding\n\nmodel = TextEmbedding(\n    model_name=\"BAAI/bge-small-en-v1.5\",\n    threads=4  # Limit to 4 threads\n)\n```\n\n### Device Selection\n\n```python\nfrom fastembed.common import Device\n\nmodel = TextEmbedding(\n    model_name=\"BAAI/bge-small-en-v1.5\",\n    cuda=True  # or Device.AUTO for automatic detection\n)\n```\n\n## Documentation Resources\n\nFor more information, refer to the official documentation:\n\n- [FastEmbed Documentation](https://qdrant.github.io/fastembed/)\n- [Qdrant GitHub Repository](https://github.com/qdrant/fastembed/)\n- [HuggingFace Model Hub](https://huggingface.co/Qdrant/fastembed)\n\n资料来源：[mkdocs.yml](https://github.com/qdrant/fastembed/blob/main/mkdocs.yml)\n\n---\n\n<a id='page-quickstart'></a>\n\n## Quick Start Guide\n\n### 相关页面\n\n相关主题：[Introduction to FastEmbed](#page-introduction), [Text Embedding Module](#page-text-embedding), [Image Embedding Module](#page-image-embedding)\n\n<details>\n<summary>Related Source Files</summary>\n\n以下源码文件用于生成本页说明：\n\n- [README.md](https://github.com/qdrant/fastembed/blob/main/README.md)\n- [fastembed/text/onnx_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/onnx_embedding.py)\n- [fastembed/text/pooled_normalized_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/pooled_normalized_embedding.py)\n- [fastembed/image/onnx_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/image/onnx_embedding.py)\n- [fastembed/sparse/minicoil.py](https://github.com/qdrant/fastembed/blob/main/fastembed/sparse/minicoil.py)\n- [fastembed/sparse/bm25.py](https://github.com/qdrant/fastembed/blob/main/fastembed/sparse/bm25.py)\n- [fastembed/rerank/cross_encoder/onnx_text_cross_encoder.py](https://github.com/qdrant/fastembed/blob/main/fastembed/rerank/cross_encoder/onnx_text_cross_encoder.py)\n</details>\n\n# Quick Start Guide\n\nFastEmbed is a lightweight, fast, and accurate embedding library developed by Qdrant. It provides text and image embeddings using ONNX-based models optimized for production deployment. This guide covers the essential steps to get started with FastEmbed for your embedding needs.\n\n## Overview\n\nFastEmbed enables developers to generate high-quality vector embeddings for text and images with minimal configuration. The library supports multiple embedding types including dense embeddings, sparse embeddings, and cross-encoder reranking models.\n\n```mermaid\ngraph TD\n    A[FastEmbed Library] --> B[Text Embeddings]\n    A --> C[Image Embeddings]\n    A --> D[Sparse Embeddings]\n    A --> E[Reranking Models]\n    \n    B --> B1[Dense Models]\n    B --> B2[Pooled Models]\n    \n    D --> D1[SPLADE++]\n    D --> D2[BM25]\n    D --> D3[MiniCOIL]\n```\n\n## Installation\n\nInstall FastEmbed using pip:\n\n```bash\npip install fastembed\n```\n\nFor CUDA acceleration support:\n\n```bash\npip install fastembed[cuda]\n```\n\n## Basic Text Embedding\n\n### Loading a Model\n\nImport and initialize a text embedding model:\n\n```python\nfrom fastembed import TextEmbedding\n\n# Use the default model (BAAI/bge-small-en-v1.5)\nmodel = TextEmbedding(model_name=\"BAAI/bge-small-en-v1.5\")\n```\n\n### Generating Embeddings\n\n```python\ndocuments = [\n    \"passage: A man is eating food.\",\n    \"passage: A man is eating a piece of broccoli.\",\n    \"passage: A man is eating pasta.\",\n    \"passage: A woman is cutting vegetables.\",\n]\n\nembeddings = list(model.embed(documents))\n```\n\nThe prefix `\"passage:\"` is required for some models like `BAAI/bge-small-en-v1.5` and `snowflake/snowflake-arctic-embed-xs` to indicate the text type. 资料来源：[README.md](https://github.com/qdrant/fastembed/blob/main/README.md)\n\n## Supported Models\n\n### Text Embedding Models\n\n| Model Name | Dimension | Languages | Max Tokens | License | Size (GB) |\n|------------|-----------|-----------|------------|---------|-----------|\n| BAAI/bge-small-en-v1.5 | 384 | English | 512 | MIT | 0.067 |\n| BAAI/bge-base-en-v1.5 | 768 | English | 512 | MIT | 0.21 |\n| BAAI/bge-large-en-v1.5 | 1024 | English | 512 | MIT | 1.20 |\n| intfloat/multilingual-e5-small | 384 | Multilingual (~100) | 512 | MIT | 0.09 |\n| sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 | 384 | Multilingual (~50) | 512 | Apache 2.0 | 0.22 |\n| jinaai/jina-embeddings-v2-base-en | 768 | English | 8192 | Apache 2.0 | 0.52 |\n| snowflake/snowflake-arctic-embed-xs | 384 | English | 512 | Apache 2.0 | 0.09 |\n| snowflake/snowflake-arctic-embed-m-long | 768 | English | 2048 | Apache 2.0 | 0.54 |\n\n资料来源：[fastembed/text/onnx_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/onnx_embedding.py)\n\n### Pooled Normalized Embeddings\n\nThe `PooledNormalizedEmbedding` class applies mean pooling to the model output and normalizes the result:\n\n```python\nfrom fastembed.text import PooledNormalizedEmbedding\n\nmodel = PooledNormalizedEmbedding(model_name=\"jinaai/jina-embeddings-v2-base-en\")\n```\n\nThe post-processing combines mean pooling with L2 normalization:\n\n```python\ndef _post_process_onnx_output(self, output: OnnxOutputContext, **kwargs: Any) -> Iterable[NumpyArray]:\n    embeddings = output.model_output\n    attn_mask = output.attention_mask\n    return normalize(self.mean_pooling(embeddings, attn_mask))\n```\n\n资料来源：[fastembed/text/pooled_normalized_embedding.py:78-84](https://github.com/qdrant/fastembed/blob/main/fastembed/text/pooled_normalized_embedding.py)\n\n## Image Embeddings\n\nFastEmbed supports multimodal models for image embeddings:\n\n```python\nfrom fastembed.image import OnnxImageEmbedding\n\nmodel = OnnxImageEmbedding(model_name=\"Qdrant/Unicom-ViT-B-16\")\n```\n\n### Supported Image Models\n\n| Model Name | Dimension | Type | License | Size (GB) |\n|------------|-----------|------|---------|-----------|\n| Qdrant/Unicom-ViT-B-16 | 768 | Multimodal (text&image) | Apache 2.0 | 0.82 |\n| Qdrant/Unicom-ViT-B-32 | 512 | Multimodal (text&image) | Apache 2.0 | 0.48 |\n| jinaai/jina-clip-v1 | 768 | Multimodal (text&image) | Apache 2.0 | 0.34 |\n\n资料来源：[fastembed/image/onnx_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/image/onnx_embedding.py)\n\n## Sparse Embeddings\n\nSparse embeddings provide interpretable, non-dense vector representations useful for keyword-aware semantic search.\n\n### SPLADE++\n\nSPLADE++ is a sparse embedding model that resolves semantic meaning while preserving keyword match behavior:\n\n```python\nfrom fastembed import SparseTextEmbedding\n\nmodel = SparseTextEmbedding(model_name=\"prithivida/Splade_PP_en_v1\")\nembeddings = list(model.embed(documents))\n```\n\nReturns sparse vectors with indices and values:\n\n```\n[\n  SparseEmbedding(indices=[17, 123, 919, ...], values=[0.71, 0.22, 0.39, ...]),\n  SparseEmbedding(indices=[38, 12, 91, ...], values=[0.11, 0.22, 0.39, ...])\n]\n```\n\n### BM25\n\nTraditional BM25 implemented as sparse embeddings:\n\n```python\nfrom fastembed.sparse import Bm25\n\nmodel = Bm25(language=\"en\")\n```\n\nBM25 formula:\n\n```\nscore(q, d) = SUM[ IDF(q_i) * (f(q_i, d) * (k + 1)) / (f(q_i, d) + k * (1 - b + b * (|d| / avg_len))) ]\n```\n\n资料来源：[fastembed/sparse/bm25.py:47-52](https://github.com/qdrant/fastembed/blob/main/fastembed/sparse/bm25.py)\n\n### MiniCOIL\n\nMiniCOIL combines semantic embeddings with exact keyword matching:\n\n```python\nfrom fastembed.sparse import MiniCOIL\n\nmodel = MiniCOIL(model_name=\"Qdrant/minicoil-v1\")\n```\n\n## Reranking Models\n\nCross-encoder reranking improves search results by re-scoring candidate documents:\n\n```python\nfrom fastembed import TextCrossEncoder\n\nmodel = TextCrossEncoder(model_name=\"jinaai/jina-reranker-v1-turbo-en\")\n```\n\n### Supported Reranker Models\n\n| Model Name | License | Size (GB) | Context Length |\n|------------|---------|-----------|----------------|\n| jinaai/jina-reranker-v1-turbo-en | Apache 2.0 | 0.15 | 1K |\n| jinaai/jina-reranker-v2-base-multilingual | CC BY-NC 4.0 | 1.11 | 1K (sliding window) |\n\n资料来源：[fastembed/rerank/cross_encoder/onnx_text_cross_encoder.py](https://github.com/qdrant/fastembed/blob/main/fastembed/rerank/cross_encoder/onnx_text_cross_encoder.py)\n\n## Common Configuration Options\n\n### Initialization Parameters\n\n| Parameter | Type | Default | Description |\n|-----------|------|---------|-------------|\n| `model_name` | str | \"BAAI/bge-small-en-v1.5\" | Name of the model to use |\n| `cache_dir` | str or None | None | Cache directory path |\n| `threads` | int or None | None | Number of threads for ONNX execution |\n| `providers` | Sequence[OnnxProvider] | None | ONNX execution providers (CPU, CUDA, etc.) |\n| `cuda` | bool or Device | Device.AUTO | Enable CUDA acceleration |\n| `device_ids` | list[int] | None | Specific GPU device IDs |\n| `lazy_load` | bool | False | Defer model loading until first use |\n\n资料来源：[fastembed/text/onnx_embedding.py:123-136](https://github.com/qdrant/fastembed/blob/main/fastembed/text/onnx_embedding.py)\n\n## Complete Example Workflow\n\n```mermaid\ngraph LR\n    A[Input Documents] --> B[TextEmbedding]\n    B --> C[Embedding Vectors]\n    C --> D[Vector Database]\n    D --> E[Similarity Search]\n    E --> F[Candidate Results]\n    F --> G[TextCrossEncoder]\n    G --> H[Reranked Results]\n```\n\n```python\nfrom fastembed import TextEmbedding, SparseTextEmbedding, TextCrossEncoder\n\n# 1. Load models\ntext_model = TextEmbedding(model_name=\"BAAI/bge-small-en-v1.5\")\nsparse_model = SparseTextEmbedding(model_name=\"prithivida/Splade_PP_en_v1\")\nreranker = TextCrossEncoder(model_name=\"jinaai/jina-reranker-v1-turbo-en\")\n\n# 2. Generate dense embeddings\ndocuments = [\"query: fastembed is fast\", \"passage: FastEmbed provides efficient embeddings\"]\ndense_embeddings = list(text_model.embed(documents))\n\n# 3. Generate sparse embeddings\nsparse_embeddings = list(sparse_model.embed(documents))\n\n# 4. Rerank results\nquery = \"query: What is FastEmbed?\"\nresults = reranker.rerank(query=query, documents=documents, top_k=2)\n```\n\n## Language-Specific Models\n\nFor non-English content, use multilingual models:\n\n| Language | Recommended Model |\n|----------|-------------------|\n| Chinese | `BAAI/bge-small-zh-v1.5`, `jinaai/jina-embeddings-v2-base-zh` |\n| German | `jinaai/jina-embeddings-v2-base-de` |\n| Spanish | `jinaai/jina-embeddings-v2-base-es` |\n| Code | `jinaai/jina-embeddings-v2-base-code` |\n| Multilingual | `intfloat/multilingual-e5-small`, `sentence-transformers/paraphrase-multilingual-mpnet-base-v2` |\n\n## Prefix Requirements\n\nSome models require specific prefixes to distinguish query and passage text:\n\n| Model | Prefix Required | Example |\n|-------|-----------------|---------|\n| BAAI/bge-small-en-v1.5 | Yes | `\"query: ...\"`, `\"passage: ...\"` |\n| intfloat/multilingual-e5-small | Yes | `\"query: ...\"`, `\"passage: ...\"` |\n| snowflake/snowflake-arctic-embed-xs | Yes | `\"query: ...\"`, `\"passage: ...\"` |\n| jinaai/jina-embeddings-v2-base-en | Not necessary | Plain text |\n\n资料来源：[fastembed/text/onnx_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/onnx_embedding.py)\n\n## Next Steps\n\n- Explore the [API Reference](../api_reference/) for detailed method documentation\n- Learn about [Model Selection](../models/) to choose the right embedding model\n- Read [Advanced Usage](../guides/advanced_usage/) for performance optimization\n- Check [Examples](../examples/) for real-world use cases\n\n---\n\n<a id='page-architecture'></a>\n\n## System Architecture\n\n### 相关页面\n\n相关主题：[Introduction to FastEmbed](#page-introduction), [ONNX Model Infrastructure](#page-onnx-model)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [fastembed/text/text_embedding_base.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/text_embedding_base.py)\n- [fastembed/image/image_embedding_base.py](https://github.com/qdrant/fastembed/blob/main/fastembed/image/image_embedding_base.py)\n- [fastembed/common/onnx_model.py](https://github.com/qdrant/fastembed/blob/main/fastembed/common/onnx_model.py)\n- [fastembed/common/model_management.py](https://github.com/qdrant/fastembed/blob/main/fastembed/common/model_management.py)\n- [fastembed/parallel_processor.py](https://github.com/qdrant/fastembed/blob/main/fastembed/parallel_processor.py)\n</details>\n\n# System Architecture\n\nFastEmbed is a lightweight, fast text and image embedding library built on ONNX Runtime. The architecture is designed around modularity, enabling multiple embedding types (dense, sparse, late-interaction) through a unified interface while leveraging ONNX for cross-platform inference optimization.\n\n## Architecture Overview\n\nFastEmbed follows a layered architecture with clear separation of concerns:\n\n```mermaid\ngraph TD\n    subgraph \"Public API Layer\"\n        A[\"TextEmbedding<br/>ImageEmbedding<br/>SparseTextEmbedding<br/>LateInteractionTextEmbedding\"]\n    end\n    \n    subgraph \"Embedding Base Classes\"\n        B[\"TextEmbeddingBase\"]\n        C[\"ImageEmbeddingBase\"]\n        D[\"SparseTextEmbeddingBase\"]\n        E[\"LateInteractionTextEmbeddingBase\"]\n    end\n    \n    subgraph \"ONNX Abstraction Layer\"\n        F[\"OnnxTextModel\"]\n        G[\"OnnxImageModel\"]\n    end\n    \n    subgraph \"Model Management\"\n        H[\"ModelManagement\"]\n        I[\"OnnxModel\"]\n    end\n    \n    subgraph \"Parallel Processing\"\n        J[\"ParallelProcessor\"]\n        K[\"EmbeddingWorker<br/>OnnxTextEmbeddingWorker\"]\n    end\n    \n    subgraph \"ONNX Runtime\"\n        L[\"InferenceSession\"]\n    end\n    \n    A --> B\n    A --> C\n    A --> D\n    A --> E\n    B --> F\n    C --> G\n    F --> I\n    G --> I\n    I --> L\n    H --> I\n    J --> K\n    K --> F\n```\n\n## Core Components\n\n### Base Class Hierarchy\n\nFastEmbed implements embedding models through inheritance hierarchies that separate concerns between the embedding interface and the ONNX runtime integration.\n\n#### Text Embedding Base\n\nThe `TextEmbeddingBase` class provides the foundation for all text embedding models. It defines the abstract interface that concrete implementations must follow.\n\n| Method | Purpose | Source |\n|--------|---------|--------|\n| `embed()` | Generate embeddings for input documents | [text_embedding_base.py]() |\n| `_list_supported_models()` | Return list of supported model descriptions | [text_embedding_base.py]() |\n| `_post_process_onnx_output()` | Transform raw ONNX output to final embeddings | [text_embedding_base.py]() |\n\n#### Image Embedding Base\n\nThe `ImageEmbeddingBase` class mirrors the text embedding architecture for image inputs, supporting multimodal models like Jina CLIP.\n\n| Property | Type | Description |\n|----------|------|-------------|\n| `model_name` | str | HuggingFace model identifier |\n| `cache_dir` | str \\| None | Local cache directory for model files |\n| `lazy_load` | bool | Defer model loading until first use |\n\n### ONNX Model Abstraction\n\nThe `OnnxModel` class serves as the bridge between the embedding interface and ONNX Runtime execution.\n\n```mermaid\nclassDiagram\n    class OnnxModel~T~ {\n        +str model_name\n        +str cache_dir\n        +InferenceSession inference_session\n        +load_model() Any\n        +run(input_feed)~T~\n    }\n    \n    class OnnxTextModel~T~ {\n        +mean_pooling(output, attention_mask)\n        +encode(input_data) Iterable~T~\n    }\n    \n    class OnnxImageModel~T~ {\n        +preprocess_image(image) Tensor\n        +encode(images) Iterable~T~\n    }\n    \n    OnnxModel <|-- OnnxTextModel\n    OnnxModel <|-- OnnxImageModel\n```\n\nThe ONNX model abstraction provides:\n\n1. **Model Loading**: Downloads and caches ONNX models from HuggingFace or custom URLs\n2. **Session Management**: Creates and configures ONNX Runtime inference sessions with specified providers (CPU, CUDA, TensorRT)\n3. **Input/Output Handling**: Manages input preprocessing and output postprocessing\n\n## Embedding Types\n\nFastEmbed supports multiple embedding paradigms through specialized classes.\n\n### Dense Embeddings\n\nDense embeddings convert inputs into fixed-dimensional continuous vectors. The `OnnxTextEmbedding` class provides dense text embeddings using models like BGE, GTE, and Jina embeddings.\n\n```mermaid\ngraph LR\n    A[\"Input Text\"] --> B[\"Tokenization\"]\n    B --> C[\"ONNX Inference\"]\n    C --> D[\"mean_pooling\"]\n    D --> E[\"L2 Normalization\"]\n    E --> F[\"Dense Vector\"]\n```\n\nKey supported models:\n\n| Model | Dimension | Context Length | Prefix Required |\n|-------|-----------|----------------|------------------|\n| BAAI/bge-small-en-v1.5 | 384 | 512 | No |\n| BAAI/bge-base-en-v1.5 | 768 | 512 | No |\n| BAAI/bge-large-en-v1.5 | 1024 | 512 | No |\n| jinaai/jina-embeddings-v2-base-en | 768 | 8192 | No |\n| intfloat/multilingual-e5-small | 384 | 512 | Yes |\n\n### Pooled Normalized Embeddings\n\nThe `PooledNormalizedEmbedding` class extends `PooledEmbedding` with built-in mean pooling and L2 normalization.\n\n```python\n# From pooled_normalized_embedding.py\nclass PooledNormalizedEmbedding(PooledEmbedding):\n    def _post_process_onnx_output(\n        self, output: OnnxOutputContext, **kwargs: Any\n    ) -> Iterable[NumpyArray]:\n        if output.attention_mask is None:\n            raise ValueError(\"attention_mask must be provided for document post-processing\")\n        \n        embeddings = output.model_output\n        attn_mask = output.attention_mask\n        return normalize(self.mean_pooling(embeddings, attn_mask))\n```\n\n### Sparse Embeddings\n\nSparse embeddings represent documents as sparse vectors with non-zero values only at relevant token positions. FastEmbed provides two sparse embedding approaches:\n\n#### BM25\n\nBM25 is implemented as a traditional sparse embedding model with IDF weighting. The formula used:\n\n```\nscore(q, d) = Σ[ IDF(q_i) * (f(q_i, d) * (k + 1)) / (f(q_i, d) + k * (1 - b + b * (|d| / avg_len))) ]\n```\n\nWhere:\n- `IDF(q_i)` is the inverse document frequency\n- `f(q_i, d)` is the term frequency\n- `k`, `b` are hyperparameters controlling saturation and length normalization\n\n#### SPLADE++\n\nSPLADE++ models use neural networks to generate sparse embeddings that combine semantic understanding with exact keyword matching.\n\n### Late Interaction Embeddings\n\nLate interaction models like ColBERT generate multiple token-level embeddings that can be compared efficiently during retrieval. These are often combined with postprocessors like MUVERA for fixed-dimensional encoding.\n\n## Model Management\n\n### Model Source Configuration\n\nModels can be loaded from multiple sources defined through the `ModelSource` class:\n\n```python\nsources=ModelSource(\n    hf=\"jinaai/jina-embeddings-v2-base-en\",      # HuggingFace Hub\n    url=\"https://storage.googleapis.com/...\",      # Direct URL download\n    _deprecated_tar_struct=True                    # Legacy tar format\n)\n```\n\n### Cache Strategy\n\nThe `ModelManagement` class handles model caching and lazy loading:\n\n1. **Cache Directory**: Configurable via `FASTEMBED_CACHE_PATH` environment variable or `cache_dir` parameter\n2. **Lazy Loading**: Models are not loaded until first inference when `lazy_load=True`\n3. **Provider Selection**: Automatic provider selection (CUDA > CPU) with fallback\n\n```mermaid\ngraph TD\n    A[\"Model Request\"] --> B{\"Cache Hit?\"}\n    B -->|Yes| C[\"Load from Cache\"]\n    B -->|No| D[\"Download Model\"]\n    D --> E[\"Store in Cache\"]\n    C --> F[\"Initialize ONNX Session\"]\n    E --> F\n    F --> G[\"Ready for Inference\"]\n```\n\n## Parallel Processing\n\nFastEmbed uses a worker-based parallel processing architecture for efficient batch inference.\n\n### ParallelProcessor\n\nThe `ParallelProcessor` class manages a pool of workers for parallel embedding generation:\n\n```mermaid\ngraph TD\n    subgraph \"Main Process\"\n        A[\"ParallelProcessor\"]\n        B[\"Input Queue\"]\n    end\n    \n    subgraph \"Worker Pool\"\n        C[\"Worker 1\"]\n        D[\"Worker 2\"]\n        E[\"Worker N\"]\n    end\n    \n    B --> C\n    B --> D\n    B --> E\n    \n    C --> F[\"Result Queue\"]\n    D --> F\n    E --> F\n```\n\n### Worker Classes\n\nWorkers inherit from `EmbeddingWorker` or specialized variants:\n\n| Worker Class | Purpose |\n|--------------|---------|\n| `EmbeddingWorker` | Base worker for generic embeddings |\n| `OnnxTextEmbeddingWorker` | Text embedding with ONNX inference |\n| `PooledNormalizedEmbeddingWorker` | Worker with pooling and normalization |\n\nWorkers implement the `init_embedding()` method to initialize the embedding model:\n\n```python\ndef init_embedding(\n    self,\n    model_name: str,\n    cache_dir: str | None = None,\n    threads: int | None = None,\n    providers: Sequence[OnnxProvider] | None = None,\n    cuda: bool | Device = Device.AUTO,\n    device_ids: list[int] | None = None,\n    lazy_load: bool = False,\n    device_id: int | None = None,\n    specific_model_path: str | None = None,\n    **kwargs: Any,\n):\n```\n\n## Cross-Encoder Reranking\n\nThe cross-encoder reranking system follows a separate architectural path:\n\n```mermaid\ngraph LR\n    A[\"Query\"] --> D[\"Cross-Encoder\"]\n    B[\"Candidate Doc\"] --> D\n    D --> E[\"Relevance Scores\"]\n```\n\nThe `OnnxTextCrossEncoder` class provides reranking through:\n\n```python\nclass OnnxTextCrossEncoder(TextCrossEncoderBase, OnnxCrossEncoderModel):\n    @classmethod\n    def _list_supported_models(cls) -> list[BaseModelDescription]:\n        return supported_onnx_models\n```\n\nSupported reranker models:\n\n| Model | Context Length | Languages |\n|-------|----------------|-----------|\n| jinaai/jina-reranker-v1-turbo-en | 1K | English |\n| jinaai/jina-reranker-v2-base-multilingual | 1K (sliding window) | Multilingual |\n\n## Device and Provider Management\n\nFastEmbed supports multiple execution providers with automatic device selection:\n\n```python\n# Device selection logic (simplified)\nif cuda and Device.CUDA available:\n    use CUDA provider\nelif cuda and Device.ONNX_CPU fallback:\n    use CPU provider\nelse:\n    use default provider\n```\n\n### Supported Providers\n\n| Provider | Device | Use Case |\n|----------|--------|----------|\n| CPUExecutionProvider | CPU | General purpose, no GPU |\n| CUDAExecutionProvider | NVIDIA GPU | Fast inference with CUDA |\n| TensorRTExecutionProvider | NVIDIA GPU | Optimized batch inference |\n\n## Configuration Options\n\n### Model Initialization Parameters\n\n| Parameter | Type | Default | Description |\n|-----------|------|---------|-------------|\n| `model_name` | str | \"BAAI/bge-small-en-v1.5\" | Model identifier |\n| `cache_dir` | str \\| None | None | Cache directory path |\n| `threads` | int \\| None | None | Thread count for CPU execution |\n| `providers` | Sequence[OnnxProvider] \\| None | None | ONNX execution providers |\n| `cuda` | bool \\| Device | Device.AUTO | CUDA device selection |\n| `device_ids` | list[int] \\| None | None | Specific GPU device IDs |\n| `lazy_load` | bool | False | Defer loading until first use |\n| `device_id` | int \\| None | None | Single device ID assignment |\n| `specific_model_path` | str \\| None | None | Override model file path |\n\n## Data Flow\n\n### Text Embedding Pipeline\n\n```mermaid\nsequenceDiagram\n    participant Client\n    participant TextEmbedding\n    participant OnnxTextModel\n    participant ONNXRuntime\n    \n    Client->>TextEmbedding: embed(documents)\n    TextEmbedding->>TextEmbedding: preprocess(documents)\n    TextEmbedding->>OnnxTextModel: encode(preprocessed)\n    OnnxTextModel->>ONNXRuntime: run(input_feed)\n    ONNXRuntime-->>OnnxTextModel: raw_output\n    OnnxTextModel->>OnnxTextModel: mean_pooling + normalize\n    OnnxTextModel-->>TextEmbedding: embeddings\n    TextEmbedding-->>Client: Iterable[NDArray]\n```\n\n### Model Description Schema\n\nEach model is described by a `DenseModelDescription` or `BaseModelDescription`:\n\n```python\nDenseModelDescription(\n    model=\"BAAI/bge-small-en-v1.5\",\n    dim=384,\n    description=\"Text embeddings, Unimodal (text), English, 512 input tokens truncation\",\n    license=\"mit\",\n    size_in_GB=0.067,\n    sources=ModelSource(hf=\"qdrant/bge-small-en-v1.5-onnx-q\"),\n    model_file=\"model_optimized.onnx\",\n)\n```\n\n| Field | Type | Description |\n|-------|------|-------------|\n| `model` | str | HuggingFace model identifier |\n| `dim` | int | Embedding dimension |\n| `description` | str | Human-readable model description |\n| `license` | str | Model license |\n| `size_in_GB` | float | Model file size |\n| `sources` | ModelSource | Download sources configuration |\n| `model_file` | str | ONNX model file name |\n| `additional_files` | list[str] | Extra files (vocab, configs) |\n| `requires_idf` | bool | IDF file requirement for sparse models |\n\n## Post-Processing\n\n### MUVERA Post-Processor\n\nThe `Muvera` post-processor converts late-interaction (multi-vector) embeddings to fixed-dimensional representations:\n\n```mermaid\ngraph TD\n    A[\"Multi-Vector Embeddings\"] --> B[\"Muvera Processing\"]\n    B --> C[\"Fixed-Dimensional Encoding\"]\n    C --> D[\"L2 Normalized FDE\"]\n    \n    subgraph \"Parameters\"\n        E[\"k_sim: number of partitions\"]\n        F[\"dim_proj: projection dimension\"]\n        G[\"r_reps: repetitions\"]\n    end\n```\n\nOutput dimension calculation: `r_reps * 2^k_sim * dim_proj`\n\n## Summary\n\nThe FastEmbed architecture demonstrates a well-structured approach to embedding generation:\n\n- **Modularity**: Clear separation between embedding types, ONNX abstraction, and model management\n- **Performance**: ONNX Runtime integration with automatic provider selection\n- **Flexibility**: Support for dense, sparse, and late-interaction embeddings\n- **Extensibility**: Worker-based parallel processing with lazy loading support\n- **Portability**: Cross-platform ONNX execution with CUDA and TensorRT acceleration\n\n---\n\n<a id='page-text-embedding'></a>\n\n## Text Embedding Module\n\n### 相关页面\n\n相关主题：[System Architecture](#page-architecture), [GPU Support and Acceleration](#page-gpu-support)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [fastembed/text/onnx_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/onnx_embedding.py)\n- [fastembed/text/text_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/text_embedding.py)\n- [fastembed/text/onnx_text_model.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/onnx_text_model.py)\n- [fastembed/text/pooled_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/pooled_embedding.py)\n- [fastembed/text/clip_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/clip_embedding.py)\n- [fastembed/text/pooled_normalized_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/pooled_normalized_embedding.py)\n</details>\n\n# Text Embedding Module\n\nThe Text Embedding Module in FastEmbed provides high-performance text vectorization capabilities using ONNX runtime for efficient inference. It supports dense embeddings, pooled embeddings, normalized embeddings, and multimodal (CLIP) text embeddings.\n\n## Architecture Overview\n\nThe module follows a layered architecture with base classes providing common functionality and specialized implementations for different embedding strategies.\n\n```mermaid\ngraph TD\n    Base[TextEmbeddingBase] --> OnnxTextEmbedding\n    Base[TextEmbeddingBase] --> PooledEmbedding\n    PooledEmbedding --> PooledNormalizedEmbedding\n    OnnxTextEmbedding --> CLIPOnnxEmbedding\n    OnnxTextEmbedding --> PooledEmbedding\n    \n    Worker[OnnxTextEmbeddingWorker] -.->|init_embedding| OnnxTextEmbedding\n    CLIPWorker[CLIPEmbeddingWorker] -.->|init_embedding| CLIPOnnxEmbedding\n    \n    Models[Supported Models] -->|BGE, Jina, GTE, etc| OnnxTextEmbedding\n```\n\n### Core Components\n\n| Component | File | Purpose |\n|-----------|------|---------|\n| `TextEmbeddingBase` | `text_embedding.py` | Abstract base class defining the embedding interface |\n| `OnnxTextEmbedding` | `onnx_embedding.py` | Main ONNX-based text embedding implementation |\n| `PooledEmbedding` | `pooled_embedding.py` | Mean pooling variant for document embeddings |\n| `PooledNormalizedEmbedding` | `pooled_normalized_embedding.py` | L2-normalized pooled embeddings |\n| `CLIPOnnxEmbedding` | `clip_embedding.py` | CLIP-based multimodal text embeddings |\n\n## Supported Models\n\nFastEmbed's Text Embedding Module supports numerous pre-trained models across different languages and use cases.\n\n### Model Categories\n\n| Category | Models | Dim | License | Description |\n|----------|--------|-----|---------|-------------|\n| BGE (Base) | `BAAI/bge-base-en-v1.5` | 768 | MIT | English text, 512 tokens |\n| BGE Large | `BAAI/bge-large-en-v1.5` | 1024 | MIT | High-quality English embeddings |\n| BGE Small | `BAAI/bge-small-en-v1.5` | 384 | MIT | Lightweight English embeddings |\n| Jina | `jinaai/jina-embeddings-v2-base-en` | 768 | Apache 2.0 | English, 8192 tokens |\n| Jina Code | `jinaai/jina-embeddings-v2-base-code` | 768 | Apache 2.0 | 30 programming languages |\n| GTE | `thenlper/gte-base` | 768 | MIT | General text embeddings |\n| Snowflake Arctic | `snowflake/snowflake-arctic-embed-m` | 768 | Apache 2.0 | Query-optimized embeddings |\n| Multilingual E5 | `intfloat/multilingual-e5-large` | 1024 | MIT | 100 languages support |\n\n### Model Selection Criteria\n\nDifferent models have varying requirements for query/document prefixes:\n\n```python\n# Models requiring prefixes\nprefix_required = [\"intfloat/multilingual-e5-small\", \"intfloat/multilingual-e5-large\"]\n\n# Models not requiring prefixes  \nprefix_not_required = [\"BAAI/bge-base-en-v1.5\", \"jinaai/jina-embeddings-v2-base-en\"]\n```\n\n资料来源：[fastembed/text/onnx_embedding.py:1-200]()\n\n## Class Hierarchy\n\n### TextEmbeddingBase\n\nThe abstract base class defining the contract for all text embedding implementations.\n\n```python\nclass TextEmbeddingBase(EmbeddingModel[list[float]]):\n    @classmethod\n    def _list_supported_models(cls) -> list[DenseModelDescription]:\n        ...\n    \n    def embed(self, documents: Iterable[str]) -> Iterable[list[float]]:\n        ...\n```\n\n资料来源：[fastembed/text/text_embedding.py]()\n\n### OnnxTextEmbedding\n\nThe primary implementation class that leverages ONNX runtime for inference.\n\n```python\nclass OnnxTextEmbedding(TextEmbeddingBase, OnnxTextModel[NumpyArray]):\n    def __init__(\n        self,\n        model_name: str = \"BAAI/bge-small-en-v1.5\",\n        cache_dir: str | None = None,\n        threads: int | None = None,\n        providers: Sequence[OnnxProvider] | None = None,\n        cuda: bool | Device = Device.AUTO,\n        device_ids: list[int] | None = None,\n        lazy_load: bool = False,\n        device_id: int | None = None,\n        specific_model_path: str | None = None,\n        **kwargs: Any,\n    )\n```\n\n#### Constructor Parameters\n\n| Parameter | Type | Default | Description |\n|-----------|------|---------|-------------|\n| `model_name` | `str` | `\"BAAI/bge-small-en-v1.5\"` | Name of the model to use |\n| `cache_dir` | `str \\| None` | `None` | Cache directory path |\n| `threads` | `int \\| None` | `None` | Number of threads for ONNX |\n| `providers` | `Sequence[OnnxProvider]` | `None` | ONNX execution providers |\n| `cuda` | `bool \\| Device` | `Device.AUTO` | CUDA acceleration |\n| `device_ids` | `list[int]` | `None` | GPU device IDs |\n| `lazy_load` | `bool` | `False` | Load model on first use |\n\n资料来源：[fastembed/text/onnx_embedding.py:200-250]()\n\n## Pooling Strategies\n\nThe module implements multiple pooling strategies to aggregate token-level embeddings into sentence-level embeddings.\n\n### Mean Pooling\n\nMean pooling computes the average of all token embeddings weighted by the attention mask.\n\n```python\ndef mean_pooling(self, embeddings: NumpyArray, attention_mask: NumpyArray) -> NumpyArray:\n    # Expand attention mask to broadcast\n    attention_mask_expanded = np.expand_dims(attention_mask, -1)\n    # Sum embeddings where mask is active\n    sum_embeddings = np.sum(embeddings * attention_mask_expanded, axis=1)\n    # Count valid tokens\n    counts = np.sum(attention_mask_expanded, axis=1)\n    # Return mean\n    return sum_embeddings / counts\n```\n\n### PooledEmbedding\n\nApplies mean pooling after ONNX inference to generate document embeddings.\n\n```python\nclass PooledEmbedding(OnnxTextEmbedding):\n    def _post_process_onnx_output(\n        self, output: OnnxOutputContext, **kwargs: Any\n    ) -> Iterable[NumpyArray]:\n        embeddings = output.model_output\n        attn_mask = output.attention_mask\n        return self.mean_pooling(embeddings, attn_mask)\n```\n\n### PooledNormalizedEmbedding\n\nExtends pooling with L2 normalization for cosine similarity optimization.\n\n```python\nclass PooledNormalizedEmbedding(PooledEmbedding):\n    def _post_process_onnx_output(\n        self, output: OnnxOutputContext, **kwargs: Any\n    ) -> Iterable[NumpyArray]:\n        embeddings = output.model_output\n        attn_mask = output.attention_mask\n        return normalize(self.mean_pooling(embeddings, attn_mask))\n```\n\n资料来源：[fastembed/text/pooled_embedding.py]()\n资料来源：[fastembed/text/pooled_normalized_embedding.py]()\n\n## CLIP Text Embeddings\n\nThe CLIP embedding implementation provides multimodal text/image embedding capabilities.\n\n### Supported CLIP Models\n\n| Model | Dimension | License | Description |\n|-------|-----------|---------|-------------|\n| `Qdrant/clip-ViT-B-32-text` | 512 | MIT | CLIP ViT-B/32 text encoder |\n| `jinaai/jina-clip-v1` | 768 | Apache 2.0 | Jina CLIP multimodal |\n\n### CLIPOnnxEmbedding\n\n```python\nclass CLIPOnnxEmbedding(OnnxTextEmbedding):\n    def _post_process_onnx_output(\n        self, output: OnnxOutputContext, **kwargs: Any\n    ) -> Iterable[NumpyArray]:\n        return output.model_output  # Direct passthrough, no pooling\n```\n\n资料来源：[fastembed/text/clip_embedding.py]()\n\n## Inference Workflow\n\n```mermaid\ngraph LR\n    A[Input Text] --> B[Tokenization]\n    B --> C[ONNX Inference]\n    C --> D{Embedding Type?}\n    D -->|Standard| E[Direct Output]\n    D -->|Pooled| F[Mean Pooling]\n    D -->|Pooled Norm| G[Mean Pooling + Normalize]\n    E --> H[Final Embeddings]\n    F --> H\n    G --> H\n```\n\n## Usage Examples\n\n### Basic Dense Embedding\n\n```python\nfrom fastembed import TextEmbedding\n\nmodel = TextEmbedding(model_name=\"BAAI/bge-base-en-v1.5\")\ndocuments = [\n    \"The quick brown fox jumps over the lazy dog\",\n    \"A journey of a thousand miles begins with a single step\"\n]\n\nembeddings = list(model.embed(documents))\n# Returns: list of 768-dimensional embedding vectors\n```\n\n### Pooled Normalized Embedding\n\n```python\nfrom fastembed import PooledNormalizedEmbedding\n\nmodel = PooledNormalizedEmbedding(model_name=\"BAAI/bge-base-en-v1.5\")\nembeddings = list(model.embed(documents))\n# Returns: L2-normalized pooled embeddings\n```\n\n### With Custom ONNX Providers\n\n```python\nfrom fastembed import TextEmbedding\nfrom fastembed.common import OnnxProvider\n\nmodel = TextEmbedding(\n    model_name=\"BAAI/bge-large-en-v1.5\",\n    providers=[OnnxProvider.CPUExecutionProvider],\n    threads=8\n)\n```\n\n### Multilingual E5 with Prefix\n\n```python\nfrom fastembed import TextEmbedding\n\nmodel = TextEmbedding(model_name=\"intfloat/multilingual-e5-small\")\n\n# E5 models require query prefix\nquery_text = \"query: \" + user_query\ndocument_text = \"passage: \" + document_text\n\nquery_embedding = list(model.embed([query_text]))\ndoc_embedding = list(model.embed([document_text]))\n```\n\n## Model Sources Configuration\n\nModels can be loaded from multiple sources:\n\n```python\nfrom fastembed.common import ModelSource\n\nsources=ModelSource(\n    hf=\"xenova/jina-embeddings-v2-base-en\",      # HuggingFace Hub\n    url=\"https://storage.googleapis.com/...\",     # Direct URL\n    _deprecated_tar_struct=True                   # Legacy format\n)\n```\n\n| Source Type | Priority | Description |\n|-------------|----------|-------------|\n| `hf` | Primary | HuggingFace Hub repository |\n| `url` | Fallback | Direct download URL |\n| Local cache | Cached | Previously downloaded files |\n\n## Post-Processing Pipeline\n\n```mermaid\ngraph TD\n    subgraph \"ONNX Output\"\n        A[model_output] --> B[attention_mask]\n    end\n    \n    subgraph \"Post-Processing\"\n        B --> C{Masking Required?}\n        A --> C\n        C -->|Yes| D[Apply Attention Mask]\n        D --> E{Mormalization?}\n        C -->|No| E\n        E -->|Yes| F[L2 Normalize]\n        E -->|No| G[Return Raw]\n        F --> H[Final Output]\n        G --> H\n    end\n```\n\n## Configuration Constants\n\n| Parameter | Default | Description |\n|-----------|---------|-------------|\n| `default_model` | `\"BAAI/bge-small-en-v1.5\"` | Fallback model |\n| `pooling_type` | `PoolingType.MEAN` | Pooling strategy |\n| `normalization` | `True` | L2 normalization flag |\n\n资料来源：[fastembed/text/onnx_text_model.py]()\n\n## Integration with Qdrant\n\nFastEmbed text embeddings are designed for seamless integration with Qdrant vector database:\n\n```python\nfrom fastembed import TextEmbedding\nimport qdrant_client\n\nmodel = TextEmbedding(model_name=\"BAAI/bge-base-en-v1.5\")\nembeddings = list(model.embed(documents))\n\n# Upload to Qdrant\nclient = qdrant_client.QdrantClient()\nclient.upsert(\n    collection_name=\"text_embeddings\",\n    points=[...]\n)\n```\n\n## Performance Considerations\n\n### Token Truncation Limits\n\n| Model Type | Max Tokens | Notes |\n|------------|------------|-------|\n| BGE Small/Large | 512 | Standard context |\n| Jina v2 | 8192 | Extended context |\n| Multilingual E5 | 512 | Query-optimized |\n| Arctic Embed | 512-2048 | Variable by model |\n\n### Hardware Acceleration\n\nThe module automatically detects and utilizes available hardware:\n\n1. **CUDA** - GPU acceleration via CUDAExecutionProvider\n2. **CPU** - Multi-threaded via CPUExecutionProvider\n3. **CoreML** - Apple Silicon support via CoreMLExecutionProvider\n\n资料来源：[fastembed/text/onnx_embedding.py:250-300]()\n\n## Error Handling\n\n### Common Error Cases\n\n```python\n# ValueError: attention_mask must be provided for pooled embeddings\nmodel = PooledNormalizedEmbedding(...)\n# Must ensure model outputs attention_mask\n\n# ModelNotSupportedError: Unknown model\nmodel = TextEmbedding(model_name=\"unknown/model\")\n# Falls back to default model or raises error\n```\n\n### Validation Requirements\n\n| Check | Condition | Error Type |\n|-------|-----------|------------|\n| `attention_mask` | Required for pooled | `ValueError` |\n| `model_name` | Must be in supported list | `ModelNotSupportedError` |\n| `cache_dir` | Valid path | `OSError` |\n\n## See Also\n\n- [Sparse Text Embeddings](fastembed/sparse/) - SPLADE++ and BM25\n- [Image Embeddings](fastembed/image/) - Vision model embeddings\n- [Reranking Models](fastembed/rerank/) - Cross-encoder reranking\n- [Post-Processors](fastembed/postprocess/) - MUVERA and other processors\n\n---\n\n<a id='page-image-embedding'></a>\n\n## Image Embedding Module\n\n### 相关页面\n\n相关主题：[System Architecture](#page-architecture), [Late Interaction Models](#page-late-interaction)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [fastembed/image/onnx_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/image/onnx_embedding.py)\n- [fastembed/text/onnx_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/onnx_embedding.py)\n- [fastembed/text/clip_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/clip_embedding.py)\n- [fastembed/text/pooled_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/pooled_embedding.py)\n- [fastembed/image/image_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/image/image_embedding.py)\n</details>\n\n# Image Embedding Module\n\nThe Image Embedding Module in FastEmbed provides functionality for generating vector representations (embeddings) from images using ONNX-based models. This module enables efficient image similarity search, image clustering, and multimodal retrieval applications.\n\n## Architecture Overview\n\nThe image embedding system follows a layered architecture pattern, separating the model definitions, ONNX inference logic, and embedding base classes.\n\n```mermaid\ngraph TD\n    A[Image Input] --> B[ImageEmbeddingBase]\n    B --> C[OnnxImageModel]\n    C --> D[OnnxImageEmbedding]\n    D --> E[ONNX Runtime]\n    E --> F[Embedding Vectors]\n    \n    G[Supported Models] --> D\n    G -.-> H[ResNet50]\n    G -.-> I[Unicom-ViT-B-16]\n    G -.-> J[Unicom-ViT-B-32]\n    G -.-> K[jinaai/jina-clip-v1]\n```\n\n## Supported Image Models\n\nThe module supports multiple image embedding models with varying dimensions and capabilities.\n\n| Model | Dimension | Type | License | Size (GB) | HF Source |\n|-------|-----------|------|---------|-----------|-----------|\n| `Qdrant/resnet50-onnx` | 512 | Image only | apache-2.0 | 0.10 | [link](https://huggingface.co/Qdrant/resnet50-onnx) |\n| `Qdrant/Unicom-ViT-B-16` | 768 | Multimodal (text&image) | apache-2.0 | 0.82 | [link](https://huggingface.co/Qdrant/Unicom-ViT-B-16) |\n| `Qdrant/Unicom-ViT-B-32` | 512 | Multimodal (text&image) | apache-2.0 | 0.48 | [link](https://huggingface.co/Qdrant/Unicom-ViT-B-32) |\n| `jinaai/jina-clip-v1` | 768 | Multimodal (text&image) | apache-2.0 | 0.34 | [link](https://huggingface.co/jinaai/jina-clip-v1) |\n\n资料来源：[fastembed/image/onnx_embedding.py:1-40](https://github.com/qdrant/fastembed/blob/main/fastembed/image/onnx_embedding.py)\n\n## Core Classes\n\n### OnnxImageEmbedding\n\nThe main class for generating image embeddings extends `ImageEmbeddingBase` and `OnnxImageModel[NumpyArray]`.\n\n```python\nclass OnnxImageEmbedding(ImageEmbeddingBase, OnnxImageModel[NumpyArray]):\n```\n\n**Class Hierarchy:**\n\n```mermaid\ngraph LR\n    A[TextEmbeddingBase] -->|inheritance| B[ImageEmbeddingBase]\n    C[OnnxTextModel] -->|generic| D[OnnxImageModel]\n    E[OnnxTextEmbedding] -->|reuse pattern| F[OnnxImageEmbedding]\n```\n\nThe class follows the same architectural pattern as `OnnxTextEmbedding`, sharing the ONNX inference infrastructure with text embedding models. 资料来源：[fastembed/image/onnx_embedding.py:44-50](https://github.com/qdrant/fastembed/blob/main/fastembed/image/onnx_embedding.py)\n\n### Constructor Parameters\n\n| Parameter | Type | Default | Description |\n|-----------|------|---------|-------------|\n| `model_name` | `str` | `\"Qdrant/clip-ViT-B-32\"` | Name of the model to use |\n| `cache_dir` | `str \\| None` | `None` | Path to cache directory |\n| `threads` | `int \\| None` | `None` | Number of threads for inference |\n| `providers` | `Sequence[OnnxProvider] \\| None` | `None` | ONNX execution providers |\n| `cuda` | `bool \\| Device` | `Device.AUTO` | CUDA device configuration |\n| `device_ids` | `list[int] \\| None` | `None` | Specific device IDs |\n| `lazy_load` | `bool` | `False` | Load model lazily |\n| `device_id` | `int \\| None` | `None` | Specific device ID |\n\n## Multimodal Image Models\n\n### CLIP-based Models\n\nThe `Unicom-ViT-B-16` and `Unicom-ViT-B-32` models support both image and text inputs, enabling cross-modal retrieval scenarios.\n\n| Model | Vision Dimension | Description |\n|-------|------------------|-------------|\n| Unicom-ViT-B-16 | 768 | More detailed embeddings (16x16 patches) |\n| Unicom-ViT-B-32 | 512 | Faster processing (32x32 patches) |\n\n### jina-clip-v1\n\nThe `jinaai/jina-clip-v1` model is a 2024 multimodal model supporting both text and image inputs:\n\n```python\nDenseModelDescription(\n    model=\"jinaai/jina-clip-v1\",\n    dim=768,\n    description=\"Image embeddings, Multimodal (text&image), 2024 year\",\n    license=\"apache-2.0\",\n    size_in_GB=0.34,\n    sources=ModelSource(hf=\"jinaai/jina-clip-v1\"),\n    model_file=\"onnx/vision_model.onnx\",\n),\n```\n\n资料来源：[fastembed/image/onnx_embedding.py:20-28](https://github.com/qdrant/fastembed/blob/main/fastembed/image/onnx_embedding.py)\n\n## Model Loading Workflow\n\n```mermaid\nsequenceDiagram\n    participant User\n    participant OnnxImageEmbedding\n    participant OnnxImageModel\n    participant ONNX Runtime\n    participant HuggingFace\n\n    User->>OnnxImageEmbedding: __init__(model_name)\n    OnnxImageEmbedding->>OnnxImageModel: Load model from cache/HF\n    OnnxImageModel->>HuggingFace: Download if needed\n    OnnxImageModel->>ONNX Runtime: Initialize session\n    ONNX Runtime-->>OnnxImageModel: Session ready\n    OnnxImageModel-->>OnnxImageEmbedding: Model loaded\n    User->>OnnxImageEmbedding: embed(image)\n    OnnxImageEmbedding->>ONNX Runtime: Run inference\n    ONNX Runtime-->>User: Embedding vector\n```\n\n## Usage Examples\n\n### Basic Image Embedding\n\n```python\nfrom fastembed import ImageEmbedding\n\nmodel = ImageEmbedding(model_name=\"Qdrant/Unicom-ViT-B-32\")\nembeddings = list(model.embed([\"path/to/image.jpg\"]))\n```\n\n### CLIP Text-Image Retrieval\n\nFor multimodal models like jina-clip-v1, you can perform cross-modal retrieval:\n\n```python\nfrom fastembed import ImageEmbedding, TextEmbedding\n\nimage_model = ImageEmbedding(model_name=\"jinaai/jina-clip-v1\")\ntext_model = TextEmbedding(model_name=\"jinaai/jina-clip-v1\")\n\n# Generate embeddings for both modalities\nimage_emb = list(image_model.embed([\"image_path.jpg\"]))\ntext_emb = list(text_model.embed([\"search query\"]))\n\n# Compute similarity\nfrom numpy import dot\n\nsimilarity = dot(image_emb[0], text_emb[0])\n```\n\n## Integration with Qdrant\n\nImage embeddings generated by this module are designed for use with Qdrant vector database:\n\n```python\n# Creating a Qdrant collection with image embeddings\nfrom qdrant_client import QdrantClient\n\nclient = QdrantClient(\"localhost\", port=6333)\n\nclient.create_collection(\n    collection_name=\"images\",\n    vectors_config={\n        \"image\": VectorParams(\n            size=768,  # For Unicom-ViT-B-16 or jina-clip-v1\n            distance=Distance.COSINE\n        )\n    }\n)\n```\n\n## Supported Model Files\n\nEach model specifies its ONNX model file location:\n\n| Model | Model File | Additional Files |\n|-------|------------|------------------|\n| ResNet50 | `model.onnx` | None |\n| Unicom-ViT-B-16 | `model.onnx` | None |\n| Unicom-ViT-B-32 | `model.onnx` | None |\n| jina-clip-v1 | `onnx/vision_model.onnx` | `onnx/text_model.onnx` |\n\n资料来源：[fastembed/image/onnx_embedding.py:10-28](https://github.com/qdrant/fastembed/blob/main/fastembed/image/onnx_embedding.py)\n\n## Relationship with Text Embedding Module\n\nThe Image Embedding Module shares significant implementation with the Text Embedding Module:\n\n```mermaid\ngraph TD\n    A[OnnxTextModel] -->|shared base| B[OnnxImageModel]\n    A -->|shared base| C[OnnxTextEmbedding]\n    B -->|shared base| D[OnnxImageEmbedding]\n    \n    E[supported_onnx_models] -->|text models| C\n    F[supported_image_models] -->|image models| D\n    \n    G[CLIPEmbeddingWorker] -.->|reused| H[OnnxTextEmbeddingWorker]\n```\n\nThis design ensures consistent ONNX inference behavior and reduces code duplication across embedding types. 资料来源：[fastembed/text/onnx_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/onnx_embedding.py)\n\n## Performance Considerations\n\n| Model | Size (GB) | Embedding Dim | Use Case |\n|-------|-----------|---------------|----------|\n| ResNet50 | 0.10 | 512 | Fast, lightweight embeddings |\n| Unicom-ViT-B-32 | 0.48 | 512 | Balanced speed/quality |\n| Unicom-ViT-B-16 | 0.82 | 768 | Higher quality, slower |\n| jina-clip-v1 | 0.34 | 768 | Multimodal, 2024 model |\n\n## Configuration Options\n\nThe module inherits configuration capabilities from the base classes:\n\n```python\n# Example with full configuration\nmodel = OnnxImageEmbedding(\n    model_name=\"Qdrant/Unicom-ViT-B-16\",\n    cache_dir=\"~/.cache/fastembed\",\n    providers=[\"CPUExecutionProvider\"],  # or \"CUDAExecutionProvider\"\n    threads=4,\n    lazy_load=True\n)\n```\n\n## Summary\n\nThe Image Embedding Module provides:\n\n- **4 supported image models** ranging from lightweight to high-quality\n- **Multimodal support** via CLIP-based models for text-image retrieval\n- **ONNX runtime optimization** for efficient inference\n- **Qdrant integration** for vector storage and similarity search\n- **Consistent API** with the text embedding module\n\n---\n\n<a id='page-sparse-embedding'></a>\n\n## Sparse Embedding Models\n\n### 相关页面\n\n相关主题：[Text Embedding Module](#page-text-embedding), [System Architecture](#page-architecture)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [fastembed/sparse/splade_pp.py](https://github.com/qdrant/fastembed/blob/main/fastembed/sparse/splade_pp.py)\n- [fastembed/sparse/bm25.py](https://github.com/qdrant/fastembed/blob/main/fastembed/sparse/bm25.py)\n- [fastembed/sparse/bm42.py](https://github.com/qdrant/fastembed/blob/main/fastembed/sparse/bm42.py)\n- [fastembed/sparse/minicoil.py](https://github.com/qdrant/fastembed/blob/main/fastembed/sparse/minicoil.py)\n- [fastembed/sparse/sparse_text_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/sparse/sparse_text_embedding.py)\n- [fastembed/sparse/utils/sparse_vectors_converter.py](https://github.com/qdrant/fastembed/blob/main/fastembed/sparse/utils/sparse_vectors_converter.py)\n</details>\n\n# Sparse Embedding Models\n\n## Overview\n\nSparse embedding models in FastEmbed represent text as high-dimensional sparse vectors where most dimensions have zero values. Unlike dense embeddings where every dimension contributes to meaning, sparse embeddings only store non-zero values for specific tokens or features. This approach combines semantic understanding with exact keyword matching capabilities.\n\nThe sparse representation consists of:\n- **Indices**: Token identifiers in the vocabulary\n- **Values**: Weight/scores representing token importance\n\n```python\n# Example sparse embedding output\nSparseEmbedding(indices=[17, 123, 919, ...], values=[0.71, 0.22, 0.39, ...])\n```\n\n**资料来源：** [fastembed/sparse/sparse_text_embedding.py:1-50]()\n\n## Architecture\n\n### Class Hierarchy\n\n```mermaid\ngraph TD\n    A[SparseTextEmbeddingBase] --> B[MiniCOIL]\n    A --> C[Bm25]\n    A --> D[BM42]\n    A --> E[SPLADE++]\n    \n    F[OnnxTextModel<br/>SparseEmbedding] --> A\n    \n    G[OnnxTextEmbeddingWorker] --> F\n```\n\nThe sparse embedding system is built on a base class `SparseTextEmbeddingBase` that extends `OnnxTextModel[SparseEmbedding]`, providing a unified interface for all sparse embedding implementations.\n\n**资料来源：** [fastembed/sparse/minicoil.py:30-50]()\n\n### Supported Models\n\n| Model | Type | Language | Size | License | Requires IDF |\n|-------|------|----------|------|---------|--------------|\n| SPLADE++ | Sparse/SPLADE | English | 0.22 GB | apache-2.0 | Yes |\n| BM25 | Traditional BM25 | Multi-language | 0.01 GB | apache-2.0 | Yes |\n| BM42 | Hybrid BM25+Attention | English | 0.04 GB | apache-2.0 | Yes |\n| MiniCOIL | Semantic + Keyword | English | 0.09 GB | apache-2.0 | Yes |\n\n**资料来源：** [fastembed/sparse/bm25.py:1-30]()\n\n## Core Components\n\n### SparseTextEmbeddingBase\n\nThe base class for all sparse embedding models defines the common interface and behavior:\n\n```python\nclass SparseTextEmbeddingBase(OnnxTextModel[SparseEmbedding]):\n    \"\"\"Base class for sparse text embedding models\"\"\"\n    \n    @classmethod\n    def _list_supported_models(cls) -> list[DenseModelDescription]:\n        \"\"\"Returns list of supported sparse models\"\"\"\n        \n    def _post_process_onnx_output(\n        self, output: OnnxOutputContext, **kwargs: Any\n    ) -> Iterable[SparseEmbedding]:\n        \"\"\"Post-process ONNX model output to sparse format\"\"\"\n```\n\n**资料来源：** [fastembed/sparse/sparse_text_embedding.py:1-100]()\n\n### SparseEmbedding Data Model\n\nThe `SparseEmbedding` class represents sparse vectors with two primary attributes:\n\n| Attribute | Type | Description |\n|-----------|------|-------------|\n| `indices` | list[int] | Vocabulary token IDs with non-zero values |\n| `values` | list[float] | Corresponding importance weights for each index |\n\n**资料来源：** [fastembed/sparse/sparse_text_embedding.py:100-150]()\n\n## Implementation Details\n\n### BM25\n\nBM25 (Best Matching 25) is a traditional sparse embedding model that evaluates token importance based on term frequency and inverse document frequency.\n\n**Formula:**\n```\nscore(q, d) = SUM[ IDF(q_i) * (f(q_i, d) * (k + 1)) / (f(q_i, d) + k * (1 - b + b * (|d| / avg_len))) ]\n```\n\n| Parameter | Default | Description |\n|-----------|---------|-------------|\n| `k` | 1.5 | Term frequency saturation parameter |\n| `b` | 0.75 | Length normalization parameter |\n| `avg_len` | Computed | Average document length |\n\n**WARNING:** BM25 is expected to be used with `modifier=\"idf\"` in the sparse vector index of Qdrant.\n\n**资料来源：** [fastembed/sparse/bm25.py:30-80]()\n\n### MiniCOIL\n\nMiniCOIL is a sparse embedding model that combines semantic meaning resolution with exact keyword matching behavior.\n\n**Key Characteristics:**\n- Converts vocabulary tokens into 4-dimensional components of sparse vectors\n- Weights tokens by their frequency in the corpus\n- Falls back to BM25-like behavior for out-of-vocabulary tokens\n\n```python\nclass MiniCOIL(SparseTextEmbeddingBase, OnnxTextModel[SparseEmbedding]):\n    \"\"\"\n    MiniCOIL resolves semantic meaning while keeping exact keyword match behavior.\n    Each vocabulary token is converted into 4d component of a sparse vector.\n    \"\"\"\n```\n\n**资料来源：** [fastembed/sparse/minicoil.py:30-55]()\n\n### BM42\n\nBM42 extends traditional BM25 by incorporating attention weights from transformer models, creating a hybrid sparse representation.\n\n**资料来源：** [fastembed/sparse/bm42.py:1-50]()\n\n### SPLADE++\n\nSPLADE++ (SParse Lexical AnD expRessive model) uses a sparse expansion approach where each token can expand to related terms in the vocabulary, enabling semantic matching while maintaining interpretability.\n\n**资料来源：** [fastembed/sparse/splade_pp.py:1-50]()\n\n## Data Flow\n\n```mermaid\ngraph LR\n    A[Input Text] --> B[Tokenization]\n    B --> C[ONNX Model Inference]\n    C --> D[ONNX Output Context]\n    D --> E[Post-Processing]\n    E --> F[SparseEmbedding]\n    \n    G[Vocabulary] --> C\n    G --> H[SparseVectorsConverter]\n    H --> E\n```\n\n## SparseVectorsConverter Utility\n\nThe `SparseVectorsConverter` class handles conversion between different sparse vector formats, particularly for MiniCOIL's word embeddings.\n\n**Key Operations:**\n- Converts sentence embeddings to Qdrant sparse vector format\n- Handles out-of-vocabulary (OOV) words with fallback to BM25\n- Manages vocabulary word embeddings with 4-dimensional components\n\n```python\n# Example input structure\n{\n    \"vector\": WordEmbedding({\n        \"word\": \"vector\",\n        \"forms\": [\"vector\", \"vectors\"],\n        \"count\": 2,\n        \"word_id\": 1231,\n        \"embedding\": [0.1, 0.2, 0.3, 0.4]\n    }),\n    \"axiotic\": WordEmbedding({  # OOV word\n        \"word\": \"axiotic\",\n        \"forms\": [\"axiotics\"],\n        \"count\": 1,\n        \"word_id\": -1,\n    })\n}\n```\n\n**资料来源：** [fastembed/sparse/utils/sparse_vectors_converter.py:50-100]()\n\n## Usage Examples\n\n### Basic Usage\n\n```python\nfrom fastembed import SparseTextEmbedding\n\n# Initialize with default SPLADE++ model\nmodel = SparseTextEmbedding(model_name=\"prithivida/Splade_PP_en_v1\")\n\n# Generate sparse embeddings\ndocuments = [\"Example text for embedding\", \"Another document\"]\nembeddings = list(model.embed(documents))\n\n# Output: [SparseEmbedding(indices=[17, 123, 919, ...], values=[0.71, 0.22, 0.39, ...])]\n```\n\n### BM25 Usage\n\n```python\nfrom fastembed import SparseTextEmbedding\n\nmodel = SparseTextEmbedding(model_name=\"Qdrant/bm25\")\nembeddings = list(model.embed(documents))\n```\n\n### With Qdrant\n\nSparse embeddings are designed for use with Qdrant's sparse vector index with `modifier=\"idf\"`:\n\n```python\nfrom qdrant_client import QdrantClient\nfrom fastembed import SparseTextEmbedding\n\nclient = QdrantClient()\nmodel = SparseTextEmbedding(model_name=\"prithivida/Splade_PP_en_v1\")\n\n# Embedding will have indices and values compatible with Qdrant sparse vectors\n```\n\n**资料来源：** [README.md:1-100]()\n\n## Configuration Options\n\n| Parameter | Type | Default | Description |\n|-----------|------|---------|-------------|\n| `model_name` | str | \"prithivida/Splade_PP_en_v1\" | Name of the sparse embedding model |\n| `cache_dir` | str \\| None | None | Cache directory path for model files |\n| `threads` | int \\| None | None | Number of threads for inference |\n| `providers` | Sequence[OnnxProvider] \\| None | None | ONNX execution providers |\n| `lazy_load` | bool | False | Whether to load model lazily |\n| `device_id` | int \\| None | None | Specific device ID for execution |\n\n## Language Support\n\n| Model | Languages |\n|-------|-----------|\n| SPLADE++ | English only |\n| BM25 | 15+ languages |\n| BM42 | English |\n| MiniCOIL | English |\n\n**资料来源：** [fastembed/sparse/bm25.py:1-25]()\n\n## Requirements\n\nAll sparse embedding models require IDF (Inverse Document Frequency) weighting for optimal performance. This is typically handled by the vector database (e.g., Qdrant) during indexing and search operations.\n\n---\n\n<a id='page-late-interaction'></a>\n\n## Late Interaction Models\n\n### 相关页面\n\n相关主题：[Image Embedding Module](#page-image-embedding), [System Architecture](#page-architecture), [Text Embedding Module](#page-text-embedding)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [fastembed/late_interaction/colbert.py](https://github.com/qdrant/fastembed/blob/main/fastembed/late_interaction/colbert.py)\n- [fastembed/late_interaction/jina_colbert.py](https://github.com/qdrant/fastembed/blob/main/fastembed/late_interaction/jina_colbert.py)\n- [fastembed/late_interaction/late_interaction_text_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/late_interaction/late_interaction_text_embedding.py)\n- [fastembed/late_interaction_multimodal/colpali.py](https://github.com/qdrant/fastembed/blob/main/fastembed/late_interaction_multimodal/colpali.py)\n- [fastembed/late_interaction_multimodal/colmodernvbert.py](https://github.com/qdrant/fastembed/blob/main/fastembed/late_interaction_multimodal/colmodernvbert.py)\n- [fastembed/late_interaction_multimodal/onnx_multimodal_model.py](https://github.com/qdrant/fastembed/blob/main/fastembed/late_interaction_multimodal/onnx_multimodal_model.py)\n</details>\n\n# Late Interaction Models\n\n## Overview\n\nLate Interaction Models represent an advanced embedding paradigm that departs from traditional single-vector dense representations. Unlike conventional dense embedding models that compress entire documents into a single embedding vector, late interaction models preserve token-level embeddings and defer the similarity computation until query time. This approach enables granular token-to-token interactions between queries and documents, significantly improving retrieval precision for complex semantic matching tasks.\n\nThe FastEmbed library implements two categories of late interaction models:\n\n| Category | Scope | Use Case |\n|----------|-------|----------|\n| **Text-based** | Query-document text matching | Semantic search, question answering |\n| **Multimodal** | Text + image joint embedding | Visual document retrieval, image search |\n\n## Architecture\n\n### Late Interaction vs Traditional Dense Retrieval\n\n```mermaid\ngraph TD\n    subgraph \"Traditional Dense Retrieval\"\n        A1[Query Text] --> B1[Encoder]\n        D1[Document] --> E1[Encoder]\n        B1 --> C1[Single Query Vector]\n        E1 --> F1[Single Document Vector]\n        C1 --> G1[Dot Product / Cosine Similarity]\n        F1 --> G1\n    end\n    \n    subgraph \"Late Interaction Retrieval\"\n        A2[Query Text] --> B2[Encoder]\n        D2[Document] --> E2[Encoder]\n        B2 --> C2[Query Token Embeddings]\n        E2 --> F2[Document Token Embeddings]\n        C2 --> G2[Late Interaction Module]\n        F2 --> G2\n        G2 --> H2[Max-Sum Similarity]\n    end\n    \n    style C1 fill:#ffcccc\n    style F1 fill:#ffcccc\n    style C2 fill:#ccffcc\n    style F2 fill:#ccffcc\n```\n\n### Colbert Late Interaction Mechanism\n\nThe Colbert model employs a **max-similarity** strategy where query tokens independently find their most similar document token, and relevance is computed as the sum of these maximum similarities:\n\n```mermaid\ngraph LR\n    Q1[Query: \"q1 q2 q3\"] --> QE[Query Encoder]\n    D1[Doc: \"d1 d2\"] --> DE[Document Encoder]\n    QE --> QT[\"Q_emb: [v₁, v₂, v₃]\"]\n    DE --> DT[\"D_emb: [u₁, u₂]\"]\n    QT --> MM[Similarity Matrix]\n    DT --> MM\n    MM --> MS[Max Similarity<br/>sim(qᵢ) = maxⱼ S(vᵢ, uⱼ)]\n    MS --> SR[Score = Σᵢ sim(qᵢ)]\n```\n\n## Supported Models\n\n### Text-based Late Interaction Models\n\n| Model | Dimension | Context Length | Multilingual | License | Size |\n|-------|-----------|----------------|---------------|---------|------|\n| `jinaai/jina-colbert-v2` | 128 | 8192 | Yes | cc-by-nc-4.0 | 2.24 GB |\n\n### Multimodal Late Interaction Models\n\n| Model | Modality | Description |\n|-------|----------|-------------|\n| `vidore/colpali` | Text + Image | Vision-Language late interaction |\n| `vidore/colqwen2` | Text + Image | Qwen2-based multimodal |\n| `vidore/colmodernvbert` | Text + Image | Modern vision backbone |\n\n## Base Classes\n\n### LateInteractionTextEmbedding\n\nAbstract base class for text-based late interaction models.\n\n```python\nclass LateInteractionTextEmbedding(TextEmbeddingBase):\n    @classmethod\n    def _list_supported_models(cls) -> list[DenseModelDescription]:\n        ...\n    \n    @classmethod\n    def _get_worker_class(cls) -> Type[OnnxTextEmbeddingWorker]:\n        ...\n```\n\n资料来源：[late_interaction_text_embedding.py:1-50](https://github.com/qdrant/fastembed/blob/main/fastembed/late_interaction/late_interaction_text_embedding.py)\n\n### Colbert Base Class\n\nThe `Colbert` class implements the core late interaction logic:\n\n```python\nclass Colbert(LateInteractionTextEmbedding):\n    QUERY_MARKER_TOKEN_ID: int =  1  # Default CLS token\n    DOCUMENT_MARKER_TOKEN_ID: int = 1  # Default CLS token\n    MIN_QUERY_LENGTH: int = 32\n    MASK_TOKEN: str = \"[MASK]\"\n```\n\n资料来源：[colbert.py:20-60](https://github.com/qdrant/fastembed/blob/main/fastembed/late_interaction/colbert.py)\n\nKey methods:\n\n| Method | Purpose |\n|--------|---------|\n| `encode_query()` | Encode a single query, returning token embeddings |\n| `encode_document()` | Encode a single document, returning token embeddings |\n| `score()` | Compute late interaction similarity between query and document |\n\n## Jina Colbert Implementation\n\nThe `JinaColbert` class extends the base `Colbert` with model-specific configuration:\n\n```python\nclass JinaColbert(Colbert):\n    QUERY_MARKER_TOKEN_ID = 250002\n    DOCUMENT_MARKER_TOKEN_ID = 250003\n    MIN_QUERY_LENGTH = 31  # 32 minus 1 for special token\n    MASK_TOKEN = \"<mask>\"\n```\n\n资料来源：[jina_colbert.py:15-19](https://github.com/qdrant/fastembed/blob/main/fastembed/late_interaction/jina_colbert.py)\n\n#### Model Configuration\n\n| Parameter | Value | Description |\n|-----------|-------|-------------|\n| `model` | `jinaai/jina-colbert-v2` | HuggingFace model identifier |\n| `dim` | 128 | Token embedding dimension |\n| `size_in_GB` | 2.24 | Model size |\n| `context_length` | 8192 | Maximum input tokens |\n| `license` | cc-by-nc-4.0 | Model license |\n\n## Multimodal Late Interaction\n\n### ColPali Model\n\nColPali extends the late interaction paradigm to visual documents by treating images as sequences of patches:\n\n```python\nclass ColPali(MultimodalTextImageBase, OnnxMultimodalModel[MultivectorEmbedding]):\n    @classmethod\n    def _list_supported_models(cls) -> list[DenseModelDescription]:\n        return supported_colpali_models\n```\n\n资料来源：[colpali.py:1-50](https://github.com/qdrant/fastembed/blob/main/fastembed/late_interaction_multimodal/colpali.py)\n\n### ColModernVBert Model\n\nColModernVBert provides an alternative vision-language architecture with a modern backbone:\n\n```python\nclass ColModernVBert(MultimodalTextImageBase, OnnxMultimodalModel[MultivectorEmbedding]):\n    QUERY_MARKER_TOKEN_ID = 0\n    DOCUMENT_MARKER_TOKEN_ID = 1\n    MIN_QUERY_LENGTH = 32\n    MASK_TOKEN = \"<mask>\"\n```\n\n资料来源：[colmodernvbert.py:1-50](https://github.com/qdrant/fastembed/blob/main/fastembed/late_interaction_multimodal/colmodernvbert.py)\n\n### Architecture: Multimodal Late Interaction\n\n```mermaid\ngraph TD\n    subgraph \"Query Processing\"\n        QT[Text Query] --> QE[Text Encoder]\n        QV[Query Image] --> QP[Patch Extraction]\n        QP --> QI[Query Image Embeddings]\n        QE --> QT_emb[Query Token Embeddings]\n    end\n    \n    subgraph \"Document Processing\"\n        DT[Document Text] --> DE[Text Encoder]\n        DV[Document Image] --> DP[Patch Extraction]\n        DP --> DI[Doc Image Embeddings]\n        DE --> DT_emb[Document Token Embeddings]\n    end\n    \n    subgraph \"Late Interaction\"\n        QT_emb --> LI[Interaction Module]\n        DT_emb --> LI\n        QI --> LI\n        DI --> LI\n        LI --> SM[Similarity Matrix]\n        SM --> MS[Max-Sum Pooling]\n    end\n    \n    MS --> SC[Relevance Score]\n```\n\n## Usage Examples\n\n### Text-based Late Interaction\n\n```python\nfrom fastembed import LateInteractionTextEmbedding\n\n# Initialize the model\nmodel = LateInteractionTextEmbedding(\n    model_name=\"jinaai/jina-colbert-v2\"\n)\n\n# Encode query and document separately\nquery_embedding = model.query_embed(\"What is machine learning?\")\ndoc_embedding = model.doc_embed(\"Machine learning is a subset of AI...\")\n\n# Compute late interaction score\nscore = model.score(query_embedding, doc_embedding)\n```\n\n### Multimodal Late Interaction\n\n```python\nfrom fastembed import LateInteractionTextImageEmbedding\n\nmodel = LateInteractionTextImageEmbedding(\n    model_name=\"vidore/colpali\"\n)\n\n# Encode image with optional text\nimage_embedding = model.doc_embed(image=image_bytes)\nquery_embedding = model.query_embed(\"Find charts about revenue\")\n```\n\n## Configuration Options\n\n### Common Parameters\n\n| Parameter | Type | Default | Description |\n|-----------|------|---------|-------------|\n| `model_name` | str | Required | Model identifier |\n| `cache_dir` | str | None | Local cache directory |\n| `threads` | int | None | CPU threads for inference |\n| `providers` | Sequence[OnnxProvider] | None | ONNX execution providers |\n| `lazy_load` | bool | False | Defer model loading until first use |\n| `device_id` | int | None | Specific device index |\n\n### ONNX Providers\n\n| Provider | Description | Priority |\n|----------|-------------|----------|\n| `CPUExecutionProvider` | CPU inference | Default fallback |\n| `CUDAExecutionProvider` | NVIDIA GPU | Preferred for speed |\n| `CoreMLExecutionProvider` | Apple Silicon | Mobile/iOS |\n\n## Post-processing: Muvera\n\nMuvera is a post-processing technique that converts late interaction embeddings (multi-vector) into fixed-dimensional representations:\n\n```python\nfrom fastembed.postprocess import Muvera\n\n# Convert from multi-vector to fixed-dim\nmuvera = Muvera.from_multivector_model(\n    model=late_interaction_model,\n    k_sim=6,\n    dim_proj=32\n)\n\n# Process document\nfde = muvera.process_document(multivector_embedding)\n```\n\n资料来源：[postprocess/muvera.py:1-100](https://github.com/qdrant/fastembed/blob/main/fastembed/postprocess/muvera.py)\n\n### Muvera Configuration\n\n| Parameter | Description | Impact |\n|-----------|-------------|--------|\n| `k_sim` | Log₂ of number of buckets | Memory vs precision |\n| `dim_proj` | Projection dimension | Output size |\n| `r_reps` | Number of repetitions | Robustness |\n| `random_seed` | Random seed | Reproducibility |\n\nOutput dimension formula: `r_reps × 2^k_sim × dim_proj`\n\n## Performance Considerations\n\n### Token Length Limits\n\n| Model | Query Limit | Document Limit |\n|-------|-------------|----------------|\n| jina-colbert-v2 | 31 tokens | 8192 tokens |\n| colpali | Variable | Variable |\n\n### Memory Usage\n\nLate interaction models store per-token embeddings rather than single vectors:\n\n- **Traditional dense**: N × D memory (N documents, D dimension)\n- **Late interaction**: N × T × D memory (T = avg token count)\n\nThis trade-off enables better precision at the cost of increased memory footprint.\n\n## Comparison with Dense Retrieval\n\n| Aspect | Dense Retrieval | Late Interaction |\n|--------|-----------------|------------------|\n| Embedding type | Single vector | Token-level vectors |\n| Query speed | O(1) comparison | O(Q × D) interaction |\n| Precision | Good for semantic similarity | Excellent for term matching |\n| Memory | Lower | Higher |\n| Interpretability | Limited | Token-level attribution |\n\n## Related Components\n\n| Component | File | Purpose |\n|-----------|------|---------|\n| `ColbertEmbeddingWorker` | `colbert.py` | Parallel embedding worker |\n| `OnnxMultimodalModel` | `onnx_multimodal_model.py` | Base for ONNX multimodal |\n| `MultivectorEmbedding` | Types | Output type for late interaction |\n| `Muvera` | `muvera.py` | Dimensionality reduction post-process |\n\n## References\n\n- Original Colbert paper: Khattab & Zaharia, \"ColBERT: Efficient and Effective Passage Search via Contextualized Late Interaction\"\n- Model source: [jinaai/jina-colbert-v2](https://huggingface.co/jinaai/jina-colbert-v2)\n\n---\n\n<a id='page-onnx-model'></a>\n\n## ONNX Model Infrastructure\n\n### 相关页面\n\n相关主题：[System Architecture](#page-architecture), [GPU Support and Acceleration](#page-gpu-support)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [fastembed/common/onnx_model.py](https://github.com/qdrant/fastembed/blob/main/fastembed/common/onnx_model.py)\n- [fastembed/common/types.py](https://github.com/qdrant/fastembed/blob/main/fastembed/common/types.py)\n- [fastembed/common/preprocessor_utils.py](https://github.com/qdrant/fastembed/blob/main/fastembed/common/preprocessor_utils.py)\n- [fastembed/text/onnx_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/onnx_embedding.py)\n- [fastembed/image/onnx_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/image/onnx_embedding.py)\n- [fastembed/text/clip_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/clip_embedding.py)\n- [fastembed/sparse/minicoil.py](https://github.com/qdrant/fastembed/blob/main/fastembed/sparse/minicoil.py)\n</details>\n\n# ONNX Model Infrastructure\n\n## Overview\n\nThe ONNX Model Infrastructure is the core runtime layer in FastEmbed that enables efficient execution of embedding models through the ONNX (Open Neural Network Exchange) format. This infrastructure provides a unified abstraction for loading, executing, and post-processing ONNX models across different embedding modalities including text, images, and sparse embeddings.\n\nFastEmbed leverages ONNX Runtime to achieve cross-platform compatibility and optimized inference performance without requiring PyTorch or TensorFlow dependencies. The architecture separates model execution concerns from embedding-specific logic, enabling a clean separation of concerns between the ONNX runtime layer and higher-level embedding abstractions. 资料来源：[fastembed/text/onnx_embedding.py:1-50]()\n\n## Architecture Overview\n\nThe ONNX infrastructure follows a class hierarchy pattern where base classes define the runtime contract and concrete implementations provide modality-specific behavior. The architecture is designed around the following core components:\n\n```mermaid\ngraph TD\n    A[ONNX Runtime] --> B[OnnxModel Base]\n    B --> C[OnnxTextModel]\n    B --> D[OnnxImageModel]\n    B --> E[OnnxCrossEncoderModel]\n    C --> F[OnnxTextEmbedding]\n    C --> G[PooledEmbedding]\n    C --> H[CLIPOnnxEmbedding]\n    C --> I[MiniCOIL]\n    D --> J[OnnxImageEmbedding]\n    E --> K[OnnxTextCrossEncoder]\n    \n    L[TextEmbeddingBase] --> F\n    M[ImageEmbeddingBase] --> J\n    N[SparseTextEmbeddingBase] --> I\n```\n\n### Core Base Classes\n\nThe infrastructure defines three primary base classes that orchestrate ONNX model execution:\n\n| Class | File | Purpose |\n|-------|------|---------|\n| `OnnxModel` | `fastembed/common/onnx_model.py` | Core runtime for ONNX session management |\n| `OnnxTextModel` | `fastembed/text/onnx_text_model.py` | Text-specific ONNX execution |\n| `OnnxImageModel` | `fastembed/image/onnx_embedding.py` | Image-specific ONNX execution |\n\n资料来源：[fastembed/common/onnx_model.py:1-100]()\n\n## ONNX Session Management\n\n### Model Loading\n\nThe `OnnxModel` base class handles the lifecycle of ONNX model loading and execution. When a model is initialized, it performs the following operations:\n\n1. Resolves the model file path from cache or downloads from source\n2. Configures ONNX Runtime session options (threads, providers)\n3. Creates an inference session with the specified execution providers\n4. Validates model inputs and outputs\n\n```python\nclass OnnxModel(Generic[T]):\n    def __init__(\n        self,\n        model_dir: Path,\n        model_file: str,\n        threads: int | None = None,\n        providers: Sequence[OnnxProvider] | None = None,\n        cuda: bool | Device = Device.AUTO,\n        device_id: int | None = None,\n        **kwargs: Any,\n    ):\n        self._load_onnx_model(\n            model_dir=model_dir,\n            model_file=model_file,\n            threads=threads,\n            providers=providers,\n            cuda=cuda,\n            device_id=device_id,\n        )\n```\n\n资料来源：[fastembed/common/onnx_model.py:50-80]()\n\n### Execution Providers\n\nThe infrastructure supports multiple ONNX Runtime execution providers for hardware acceleration:\n\n| Provider | Priority | Use Case |\n|----------|----------|----------|\n| `CUDAExecutionProvider` | GPU acceleration | NVIDIA GPUs |\n| `CPUExecutionProvider` | Fallback | CPU inference |\n\nThe `Device` enum provides automatic device selection:\n\n```python\nclass Device(Enum):\n    CPU = \"cpu\"\n    CUDA = \"cuda\"\n    AUTO = \"auto\"\n```\n\n资料来源：[fastembed/common/types.py:1-50]()\n\n## Text Embedding Infrastructure\n\n### OnnxTextEmbedding\n\nThe `OnnxTextEmbedding` class is the primary implementation for text embedding generation. It inherits from both `TextEmbeddingBase` and `OnnxTextModel`, combining the ONNX runtime with embedding-specific logic.\n\n```python\nclass OnnxTextEmbedding(TextEmbeddingBase, OnnxTextModel[NumpyArray]):\n    \"\"\"Implementation of the Flag Embedding model.\"\"\"\n    \n    @classmethod\n    def _list_supported_models(cls) -> list[DenseModelDescription]:\n        return supported_onnx_models\n```\n\n资料来源：[fastembed/text/onnx_embedding.py:60-85]()\n\n#### Supported Models\n\nThe text embedding infrastructure includes a comprehensive list of supported models:\n\n| Model | Dimension | License | Size (GB) | Token Limit |\n|-------|-----------|---------|-----------|-------------|\n| `BAAI/bge-base-en` | 768 | mit | 0.42 | 512 |\n| `BAAI/bge-base-en-v1.5` | 768 | mit | 0.21 | 512 |\n| `BAAI/bge-large-en-v1.5` | 1024 | mit | 1.20 | 512 |\n| `BAAI/bge-small-en-v1.5` | 384 | mit | 0.067 | 512 |\n| `snowflake/snowflake-arctic-embed-m` | 768 | apache-2.0 | 0.43 | 512 |\n| `snowflake/snowflake-arctic-embed-m-long` | 768 | apache-2.0 | 0.54 | 2048 |\n| `jinaai/jina-clip-v1` | 768 | apache-2.0 | 0.55 | multimodal |\n| `mixedbread-ai/mxbai-embed-large-v1` | 1024 | apache-2.0 | 0.64 | 512 |\n\n资料来源：[fastembed/text/onnx_embedding.py:30-150]()\n\n### Pooled Embedding Variants\n\nThe infrastructure provides specialized pooling strategies through inheritance:\n\n#### PooledNormalizedEmbedding\n\nApplies mean pooling over token embeddings followed by L2 normalization:\n\n```python\nclass PooledNormalizedEmbedding(PooledEmbedding):\n    def _post_process_onnx_output(\n        self, output: OnnxOutputContext, **kwargs: Any\n    ) -> Iterable[NumpyArray]:\n        embeddings = output.model_output\n        attn_mask = output.attention_mask\n        return normalize(self.mean_pooling(embeddings, attn_mask))\n```\n\nSupported models for pooled normalized embeddings include:\n- `jinaai/jina-embeddings-v2-base-en` (768 dim, 8192 tokens)\n- `jinaai/jina-embeddings-v2-small-en` (512 dim, 8192 tokens)\n- `thenlper/gte-base` (768 dim, 512 tokens)\n- `thenlper/gte-large` (1024 dim, 512 tokens)\n\n资料来源：[fastembed/text/pooled_normalized_embedding.py:50-100]()\n\n#### PooledEmbedding\n\nStandard pooled embedding with mean pooling over token representations:\n\n```python\nclass PooledEmbedding(OnnxTextEmbedding):\n    @classmethod\n    def _get_worker_class(cls) -> Type[OnnxTextEmbeddingWorker]:\n        return PooledEmbeddingWorker\n```\n\nSupported multilingual and specialized models:\n- `sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2` (384 dim, ~50 languages)\n- `sentence-transformers/paraphrase-multilingual-mpnet-base-v2` (768 dim, ~50 languages)\n- `intfloat/multilingual-e5-large` (1024 dim, ~100 languages)\n\n资料来源：[fastembed/text/pooled_embedding.py:50-150]()\n\n## Image Embedding Infrastructure\n\n### OnnxImageEmbedding\n\nImage embedding models inherit from `OnnxImageModel` and `ImageEmbeddingBase`:\n\n```python\nclass OnnxImageEmbedding(ImageEmbeddingBase, OnnxImageModel[NumpyArray]):\n    def __init__(self, model_name: str, cache_dir: str | None = None, ...):\n        ...\n```\n\nSupported image models:\n\n| Model | Dimension | License | Size (GB) |\n|-------|-----------|---------|-----------|\n| `Qdrant/resnet50-onnx` | 2048 | apache-2.0 | - |\n| `Qdrant/Unicom-ViT-B-16` | 768 | apache-2.0 | 0.82 |\n| `Qdrant/Unicom-ViT-B-32` | 512 | apache-2.0 | 0.48 |\n| `jinaai/jina-clip-v1` | 768 | apache-2.0 | 0.34 |\n\n资料来源：[fastembed/image/onnx_embedding.py:30-80]()\n\n## Multimodal Embedding\n\n### CLIP Embeddings\n\nThe `CLIPOnnxEmbedding` class provides multimodal (text and image) embedding capabilities:\n\n```python\nclass CLIPOnnxEmbedding(OnnxTextEmbedding):\n    @classmethod\n    def _list_supported_models(cls) -> list[DenseModelDescription]:\n        return supported_clip_models\n    \n    def _post_process_onnx_output(\n        self, output: OnnxOutputContext, **kwargs: Any\n    ) -> Iterable[NumpyArray]:\n        return output.model_output\n```\n\nCurrently supported CLIP model:\n- `Qdrant/clip-ViT-B-32-text` (512 dim, 77 tokens)\n\n资料来源：[fastembed/text/clip_embedding.py:20-50]()\n\n### Late Interaction Multimodal\n\nThe `ColModernVbert` class implements late interaction models with image processing capabilities:\n\n```python\ndef load_onnx_model(self) -> None:\n    self._load_onnx_model(...)\n    \n    # Load image processing configuration\n    processor_config_path = self._model_dir / \"processor_config.json\"\n    self.image_seq_len = processor_config.get(\"image_seq_len\", 64)\n    self.max_image_size = preprocessor_config.get(\"max_image_size\", {}).get(\"longest_edge\", 512)\n```\n\n资料来源：[fastembed/late_interaction_multimodal/colmodernvbert.py:50-100]()\n\n## Sparse Embedding Infrastructure\n\n### MiniCOIL\n\nThe `MiniCOIL` class implements sparse embedding with semantic resolution:\n\n```python\nclass MiniCOIL(SparseTextEmbeddingBase, OnnxTextModel[SparseEmbedding]):\n    \"\"\"\n    MiniCOIL is a sparse embedding model, that resolves semantic meaning of the words,\n    while keeping exact keyword match behavior.\n    \"\"\"\n```\n\nEach vocabulary token is converted into a 4-dimensional component of a sparse vector, weighted by token frequency in the corpus. If a token is not found in the corpus, it is treated exactly like in BM25.\n\nSupported sparse models:\n- `Qdrant/minicoil-v1` (0.09 GB, requires IDF weighting)\n\n资料来源：[fastembed/sparse/minicoil.py:40-80]()\n\n## Worker Architecture\n\nThe infrastructure uses a worker-based pattern for parallel embedding generation:\n\n```mermaid\ngraph LR\n    A[Main Thread] --> B[OnnxTextEmbeddingWorker]\n    B --> C[ONNX Session]\n    C --> D[Tokenization]\n    D --> E[Model Inference]\n    E --> F[Post-processing]\n    F --> G[Normalized Embeddings]\n```\n\n### Worker Classes\n\n| Worker Class | Parent | Purpose |\n|--------------|--------|---------|\n| `OnnxTextEmbeddingWorker` | Base | Standard text embedding generation |\n| `PooledEmbeddingWorker` | `OnnxTextEmbeddingWorker` | Mean pooling after inference |\n| `PooledNormalizedEmbeddingWorker` | `OnnxTextEmbeddingWorker` | Pooling + L2 normalization |\n| `CLIPEmbeddingWorker` | `OnnxTextEmbeddingWorker` | CLIP-specific processing |\n\n资料来源：[fastembed/text/onnx_text_model.py:1-50]()\n\n## Reranking Infrastructure\n\n### OnnxTextCrossEncoder\n\nThe cross-encoder reranking uses a specialized ONNX model class:\n\n```python\nclass OnnxTextCrossEncoder(TextCrossEncoderBase, OnnxCrossEncoderModel):\n    @classmethod\n    def _list_supported_models(cls) -> list[BaseModelDescription]:\n        return supported_onnx_models\n```\n\nSupported reranker models:\n\n| Model | License | Size (GB) | Context |\n|-------|---------|-----------|---------|\n| `jinaai/jina-reranker-v1-turbo-en` | apache-2.0 | 0.15 | 1K context |\n| `jinaai/jina-reranker-v2-base-multilingual` | cc-by-nc-4.0 | 1.11 | 1K context, sliding window |\n\n资料来源：[fastembed/rerank/cross_encoder/onnx_text_cross_encoder.py:30-80]()\n\n## Model Source Configuration\n\n### ModelSource\n\nModels can be loaded from multiple sources:\n\n```python\n@dataclass\nclass ModelSource:\n    hf: str | None = None           # HuggingFace Hub\n    url: str | None = None         # Direct URL download\n    _deprecated_tar_struct: bool = False  # Legacy tar format\n```\n\n### ModelDescription\n\nThe base model description structure:\n\n```python\n@dataclass\nclass DenseModelDescription:\n    model: str\n    dim: int\n    description: str\n    license: str\n    size_in_GB: float\n    sources: ModelSource\n    model_file: str\n    additional_files: list[str] | None = None\n```\n\n资料来源：[fastembed/common/model_description.py:1-80]()\n\n## Inference Workflow\n\n```mermaid\nsequenceDiagram\n    participant User\n    participant EmbeddingClass\n    participant OnnxModel\n    participant ONNXRuntime\n    \n    User->>EmbeddingClass: embed(texts)\n    EmbeddingClass->>OnnxModel: preprocess(texts)\n    OnnxModel->>OnnxModel: tokenize()\n    OnnxModel->>ONNXRuntime: run(session)\n    ONNXRuntime-->>OnnxModel: model_output\n    OnnxModel->>EmbeddingClass: _post_process_onnx_output()\n    EmbeddingClass->>EmbeddingClass: normalize/pool()\n    EmbeddingClass-->>User: numpy arrays\n```\n\n## Configuration Parameters\n\n### Common Parameters\n\n| Parameter | Type | Default | Description |\n|-----------|------|---------|-------------|\n| `model_name` | `str` | model-specific | Name of the model to use |\n| `cache_dir` | `str \\| None` | `None` | Cache directory for model files |\n| `threads` | `int \\| None` | `None` | Number of threads for ONNX |\n| `providers` | `Sequence[OnnxProvider] \\| None` | `None` | Execution providers |\n| `cuda` | `bool \\| Device` | `Device.AUTO` | CUDA device selection |\n| `device_ids` | `list[int] \\| None` | `None` | Multiple GPU device IDs |\n| `lazy_load` | `bool` | `False` | Defer model loading |\n| `device_id` | `int \\| None` | `None` | Specific device ID |\n| `specific_model_path` | `str \\| None` | `None` | Custom model file path |\n\n资料来源：[fastembed/text/onnx_embedding.py:85-120]()\n\n## Lazy Loading\n\nThe infrastructure supports lazy loading for memory-efficient initialization:\n\n```python\ndef __init__(\n    self,\n    lazy_load: bool = False,\n    ...\n):\n    if not lazy_load:\n        self.load_onnx_model()\n```\n\nWhen `lazy_load=True`, the ONNX model is not loaded until the first inference call, reducing startup memory footprint.\n\n## Type System\n\n### Core Types\n\n| Type | Definition | Usage |\n|------|------------|-------|\n| `NumpyArray` | `np.ndarray[Any, np.dtype[Any]]` | Dense embedding arrays |\n| `SparseEmbedding` | Custom sparse representation | Sparse embedding vectors |\n| `OnnxProvider` | Execution provider type | CPU, CUDA providers |\n| `Device` | Enum | Device selection (CPU/CUDA/AUTO) |\n\n资料来源：[fastembed/common/types.py:1-100]()\n\n## Summary\n\nThe ONNX Model Infrastructure provides a robust, extensible foundation for embedding generation in FastEmbed. Key characteristics include:\n\n- **Unified Runtime**: Single ONNX execution layer across all embedding modalities\n- **Hardware Acceleration**: Support for CUDA and CPU execution providers\n- **Model Flexibility**: Dynamic model loading from HuggingFace, URLs, or local cache\n- **Extensible Architecture**: Clean inheritance hierarchy for adding new embedding types\n- **Memory Efficiency**: Lazy loading and optimized session management\n- **Cross-Modal Support**: Text, image, sparse, and multimodal embeddings\n\nThis infrastructure enables FastEmbed to deliver high-performance embedding generation without external ML framework dependencies, making it suitable for production deployments with varying hardware constraints.\n\n---\n\n<a id='page-gpu-support'></a>\n\n## GPU Support and Acceleration\n\n### 相关页面\n\n相关主题：[Installation Guide](#page-installation), [ONNX Model Infrastructure](#page-onnx-model)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [fastembed/text/onnx_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/onnx_embedding.py)\n- [fastembed/image/onnx_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/image/onnx_embedding.py)\n- [fastembed/text/pooled_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/pooled_embedding.py)\n- [fastembed/text/pooled_normalized_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/pooled_normalized_embedding.py)\n- [fastembed/rerank/cross_encoder/onnx_text_cross_encoder.py](https://github.com/qdrant/fastembed/blob/main/fastembed/rerank/cross_encoder/onnx_text_cross_encoder.py)\n- [fastembed/sparse/splade_pp.py](https://github.com/qdrant/fastembed/blob/main/fastembed/sparse/splade_pp.py)\n- [fastembed/text/text_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/text_embedding.py)\n- [README.md](https://github.com/qdrant/fastembed/blob/main/README.md)\n</details>\n\n# GPU Support and Acceleration\n\nFastEmbed provides comprehensive GPU acceleration support through ONNX Runtime's execution providers, enabling high-performance inference on NVIDIA GPUs. The library offers flexible device management with automatic detection, multi-GPU support for parallel processing, and lazy loading capabilities for efficient resource utilization.\n\n## Architecture Overview\n\nFastEmbed's GPU acceleration is built on top of ONNX Runtime, which provides hardware-accelerated inference for ONNX models. All embedding model classes inherit from base ONNX model classes that handle device initialization and session management.\n\n```mermaid\ngraph TD\n    A[User Code] --> B[TextEmbedding / ImageEmbedding / CrossEncoder]\n    B --> C[ONNX Model Base Classes]\n    C --> D[ONNX Runtime Session]\n    D --> E{Hardware Acceleration}\n    E --> F[CUDA Execution Provider]\n    E --> G[CPU Execution Provider]\n    E --> H[TensorRT Provider]\n    \n    F --> H1[NVIDIA GPU]\n    G --> H2[CPU Fallback]\n    \n    style F fill:#4CAF50,color:#fff\n    style H1 fill:#2196F3,color:#fff\n```\n\n## Supported Model Types\n\nFastEmbed supports GPU acceleration across multiple embedding modalities and processing types.\n\n| Model Type | Class | GPU Support | Description |\n|------------|-------|-------------|-------------|\n| Text Embeddings | `OnnxTextEmbedding` | ✅ | Dense text embeddings via ONNX |\n| Pooled Embeddings | `PooledEmbedding` | ✅ | Pooled representation embeddings |\n| Normalized Pooled | `PooledNormalizedEmbedding` | ✅ | L2-normalized pooled embeddings |\n| Image Embeddings | `OnnxImageEmbedding` | ✅ | Vision model embeddings |\n| Cross Encoders | `OnnxTextCrossEncoder` | ✅ | Reranking and relevance scoring |\n| Sparse Embeddings | SPLADE models | ✅ | Lexical sparse embeddings |\n\n资料来源：[fastembed/text/onnx_embedding.py:1-50]()\n\n## Device Configuration\n\n### Device Enum\n\nThe `Device` enum defines available compute devices with automatic selection capability.\n\n```python\nclass Device(Enum):\n    AUTO = \"auto\"  # Automatically select best available device\n    CPU = \"cpu\"    # Force CPU execution\n    CUDA = \"cuda\"  # NVIDIA GPU acceleration\n```\n\n### Initialization Parameters\n\nAll ONNX embedding classes accept the following GPU-related parameters:\n\n| Parameter | Type | Default | Description |\n|-----------|------|---------|-------------|\n| `cuda` | `bool \\| Device` | `Device.AUTO` | Enable CUDA or specify device type |\n| `providers` | `Sequence[OnnxProvider]` | `None` | ONNX Runtime providers (mutually exclusive with cuda) |\n| `device_ids` | `list[int]` | `None` | GPU device IDs for multi-GPU data parallelism |\n| `device_id` | `int` | `None` | Specific device ID for single-process loading |\n| `lazy_load` | `bool` | `False` | Defer model loading until first use |\n\n资料来源：[fastembed/text/onnx_embedding.py:47-57]()\n\n### Constructor Signature\n\n```python\ndef __init__(\n    self,\n    model_name: str = \"BAAI/bge-small-en-v1.5\",\n    cache_dir: str | None = None,\n    threads: int | None = None,\n    providers: Sequence[OnnxProvider] | None = None,\n    cuda: bool | Device = Device.AUTO,\n    device_ids: list[int] | None = None,\n    lazy_load: bool = False,\n    device_id: int | None = None,\n    specific_model_path: str | None = None,\n    **kwargs: Any,\n):\n```\n\n资料来源：[fastembed/text/onnx_embedding.py:44-66]()\n\n## GPU Initialization Workflow\n\n```mermaid\nsequenceDiagram\n    participant User\n    participant Embedding as Embedding Class\n    participant Base as ONNX Model Base\n    participant Runtime as ONNX Runtime\n    participant Device as Compute Device\n    \n    User->>Embedding: Initialize(cuda=True)\n    Embedding->>Base: super().__init__()\n    Base->>Device: Auto-detect device\n    Device-->>Base: Available devices\n    Base->>Runtime: Create InferenceSession\n    Runtime->>Device: Load model to GPU\n    Device-->>Runtime: Model loaded\n    Runtime-->>Base: Session ready\n    Base-->>Embedding: Return session\n    Embedding-->>User: Instance ready\n```\n\n## Multi-GPU Configuration\n\n### Data Parallel Processing\n\nFor scenarios requiring distribution across multiple GPUs, FastEmbed supports device ID specification for data-parallel workloads.\n\n```python\nfrom fastembed import TextEmbedding\n\n# Initialize for multi-GPU data parallelism\nembedding_model = TextEmbedding(\n    model_name=\"BAAI/bge-base-en-v1.5\",\n    cuda=True,\n    device_ids=[0, 1, 2, 3],  # Use 4 GPUs\n    lazy_load=True  # Required for multi-GPU setup\n)\n```\n\n资料来源：[fastembed/text/onnx_embedding.py:52-55]()\n\n### Lazy Loading for Multi-GPU\n\nWhen using multiple GPUs, `lazy_load=True` defers model loading until first inference, which is essential for avoiding resource conflicts in multi-process scenarios.\n\n```python\nembedding_model = TextEmbedding(\n    model_name=\"BAAI/bge-small-en-v1.5\",\n    cuda=True,\n    device_ids=[0, 1],\n    lazy_load=True  # Load on-demand in worker processes\n)\n```\n\n资料来源：[fastembed/text/onnx_embedding.py:54]()\n\n## ONNX Runtime Providers\n\n### Provider Selection\n\nONNX Runtime supports multiple execution providers. FastEmbed allows explicit provider specification via the `providers` parameter, which is mutually exclusive with the `cuda` parameter.\n\n```python\nfrom fastembed import TextEmbedding\n\n# Using explicit provider specification\nmodel = TextEmbedding(\n    model_name=\"BAAI/bge-small-en-v1.5\",\n    providers=[\"CUDAExecutionProvider\", \"CPUExecutionProvider\"]\n)\n```\n\n### Provider Priority\n\nWhen multiple providers are specified, ONNX Runtime attempts to use them in order of preference, falling back to subsequent providers if the preferred one is unavailable.\n\n```mermaid\ngraph LR\n    A[Query] --> B{CUDA Available?}\n    B -->|Yes| C[CUDAExecutionProvider]\n    B -->|No| D{CPU Provider Available?}\n    D -->|Yes| E[CPUExecutionProvider]\n    D -->|No| F[Error]\n    \n    C --> G[GPU Inference]\n    E --> H[CPU Inference]\n    \n    style C fill:#4CAF50,color:#fff\n    style E fill:#FF9800,color:#fff\n```\n\n## GPU Installation\n\n### Package Variants\n\nFastEmbed offers separate packages for CPU and GPU operation.\n\n| Package | Command | Use Case |\n|---------|---------|----------|\n| CPU (default) | `pip install fastembed` | Standard installations |\n| GPU | `pip install fastembed-gpu` | NVIDIA GPU acceleration |\n\n资料来源：[README.md:1-20]()\n\n### Qdrant Integration\n\nFor vector database workflows with GPU acceleration:\n\n```bash\npip install qdrant-client[fastembed-gpu]\n```\n\n```python\nfrom fastembed import TextEmbedding\n\n# GPU-accelerated embedding for Qdrant\nmodel = TextEmbedding(\n    model_name=\"BAAI/bge-small-en-v1.5\",\n    providers=[\"CUDAExecutionProvider\"]\n)\nprint(\"The model BAAI/bge-small-en-v1.5 is ready to use on a GPU.\")\n```\n\n资料来源：[README.md:1-30]()\n\n## Implementation Across Model Classes\n\n### OnnxTextEmbedding\n\nThe primary text embedding class with full GPU support:\n\n```python\nclass OnnxTextEmbedding(TextEmbeddingBase, OnnxTextModel[NumpyArray]):\n    \"\"\"Implementation of the Flag Embedding model with ONNX acceleration.\"\"\"\n    \n    def __init__(\n        self,\n        model_name: str = \"BAAI/bge-small-en-v1.5\",\n        cache_dir: str | None = None,\n        threads: int | None = None,\n        providers: Sequence[OnnxProvider] | None = None,\n        cuda: bool | Device = Device.AUTO,\n        device_ids: list[int] | None = None,\n        lazy_load: bool = False,\n        device_id: int | None = None,\n        specific_model_path: str | None = None,\n        **kwargs: Any,\n    ):\n```\n\n资料来源：[fastembed/text/onnx_embedding.py:48-70]()\n\n### OnnxImageEmbedding\n\nImage embeddings also inherit the same GPU acceleration framework:\n\n```python\nclass OnnxImageEmbedding(ImageEmbeddingBase, OnnxImageModel[NumpyArray]):\n    def __init__(\n        self,\n        model_name: str,\n        cache_dir: str | None = None,\n        threads: int | None = None,\n        providers: Sequence[OnnxProvider] | None = None,\n        cuda: bool | Device = Device.AUTO,\n        device_ids: list[int] | None = None,\n        lazy_load: bool = False,\n        device_id: int | None = None,\n        specific_model_path: str | None = None,\n        **kwargs: Any,\n    ):\n```\n\n资料来源：[fastembed/image/onnx_embedding.py:1-30]()\n\n### OnnxTextCrossEncoder\n\nReranking models support GPU acceleration for cross-encoder inference:\n\n```python\nclass OnnxTextCrossEncoder(TextCrossEncoderBase, OnnxCrossEncoderModel):\n    def __init__(\n        self,\n        model_name: str,\n        cache_dir: str | None = None,\n        threads: int | None = None,\n        providers: Sequence[OnnxProvider] | None = None,\n        cuda: bool | Device = Device.AUTO,\n        device_ids: list[int] | None = None,\n        lazy_load: bool = False,\n        device_id: int | None = None,\n        specific_model_path: str | None = None,\n        **kwargs: Any,\n    ):\n```\n\n资料来源：[fastembed/rerank/cross_encoder/onnx_text_cross_encoder.py:1-50]()\n\n## Unified TextEmbedding Entry Point\n\nThe `TextEmbedding` class provides a unified interface that automatically selects the appropriate embedding type:\n\n```python\nclass TextEmbedding:\n    def __init__(\n        self,\n        model_name: str = \"BAAI/bge-small-en-v1.5\",\n        cache_dir: str | None = None,\n        threads: int | None = None,\n        providers: Sequence[OnnxProvider] | None = None,\n        cuda: bool | Device = Device.AUTO,\n        device_ids: list[int] | None = None,\n        lazy_load: bool = False,\n        **kwargs: Any,\n    ):\n        super().__init__(model_name, cache_dir, threads, **kwargs)\n        # Automatically routes to appropriate embedding type\n        for EMBEDDING_MODEL_TYPE in self.EMBEDDINGS_REGISTRY:\n            supported_models = EMBEDDING_MODEL_TYPE._list_supported_models()\n            if any(model_name.lower() == model.model.lower() \n                   for model in supported_models):\n                self.model = EMBEDDING_MODEL_TYPE(\n                    model_name=model_name,\n                    cache_dir=cache_dir,\n                    threads=threads,\n                    providers=providers,\n                    cuda=cuda,\n                    device_ids=device_ids,\n                    lazy_load=lazy_load,\n                )\n```\n\n资料来源：[fastembed/text/text_embedding.py:1-100]()\n\n## Supported Models with GPU Acceleration\n\n### Text Embedding Models\n\n| Model | Dimension | License | Size (GB) | Token Limit |\n|-------|-----------|---------|-----------|-------------|\n| `BAAI/bge-small-en-v1.5` | 384 | MIT | 0.067 | 512 |\n| `BAAI/bge-base-en-v1.5` | 768 | MIT | 0.21 | 512 |\n| `BAAI/bge-large-en-v1.5` | 1024 | MIT | 1.20 | 512 |\n| `jinaai/jina-embeddings-v2-base-en` | 768 | Apache 2.0 | 0.52 | 8192 |\n| `sentence-transformers/all-MiniLM-L6-v2` | 384 | Apache 2.0 | 0.09 | 256 |\n| `mixedbread-ai/mxbai-embed-large-v1` | 1024 | Apache 2.0 | 0.64 | 512 |\n| `nomic-ai/nomic-embed-text-v1.5` | 768 | Apache 2.0 | 0.13 | 8192 |\n\n资料来源：[fastembed/text/onnx_embedding.py:1-150](), [fastembed/text/pooled_embedding.py:1-80]()\n\n### Image Embedding Models\n\n| Model | Dimension | License | Size (GB) |\n|-------|-----------|---------|-----------|\n| `Qdrant/Unicom-ViT-B-16` | 768 | Apache 2.0 | 0.82 |\n| `Qdrant/Unicom-ViT-B-32` | 512 | Apache 2.0 | 0.48 |\n| `jinaai/jina-clip-v1` | 768 | Apache 2.0 | 0.55 |\n\n资料来源：[fastembed/image/onnx_embedding.py:1-50]()\n\n### Reranking Models\n\n| Model | License | Size (GB) |\n|-------|---------|-----------|\n| `jinaai/jina-reranker-v1-turbo-en` | Apache 2.0 | 0.15 |\n| `jinaai/jina-reranker-v2-base-multilingual` | CC BY-NC 4.0 | 1.11 |\n\n资料来源：[fastembed/rerank/cross_encoder/onnx_text_cross_encoder.py:1-40]()\n\n## Best Practices\n\n### Device Selection\n\n```python\nfrom fastembed.common.types import Device\n\n# Recommended: Automatic detection\nmodel = TextEmbedding(model_name=\"BAAI/bge-small-en-v1.5\", cuda=Device.AUTO)\n\n# Explicit CUDA\nmodel = TextEmbedding(model_name=\"BAAI/bge-small-en-v1.5\", cuda=True)\n\n# Force CPU (for debugging)\nmodel = TextEmbedding(model_name=\"BAAI/bge-small-en-v1.5\", cuda=False)\n```\n\n### Multi-GPU Inference\n\n```python\n# For batch processing across multiple GPUs\nmodel = TextEmbedding(\n    model_name=\"BAAI/bge-base-en-v1.5\",\n    cuda=True,\n    device_ids=[0, 1],  # Parallel GPU usage\n    lazy_load=True\n)\n```\n\n### Provider Fallback\n\n```python\n# Explicit provider chain with fallback\nmodel = TextEmbedding(\n    model_name=\"BAAI/bge-small-en-v1.5\",\n    providers=[\n        \"CUDAExecutionProvider\",  # Preferred\n        \"CPUExecutionProvider\"    # Fallback\n    ]\n)\n```\n\n## Limitations and Considerations\n\n| Aspect | Description |\n|--------|-------------|\n| Mutual Exclusivity | `providers` and `cuda` parameters cannot be used together |\n| Device ID Scope | `device_ids` is for data parallelism; `device_id` is for single-process loading |\n| Lazy Loading | Required for multi-GPU setups to avoid resource conflicts |\n| Model Support | All ONNX-exported models support GPU; not all models have ONNX exports |\n\n资料来源：[fastembed/text/onnx_embedding.py:47-57]()\n\n## Summary\n\nFastEmbed's GPU acceleration framework provides:\n\n1. **Automatic device detection** via the `Device.AUTO` enum value\n2. **Flexible provider configuration** through ONNX Runtime's provider system\n3. **Multi-GPU support** with device ID lists for data-parallel workloads\n4. **Lazy loading** for efficient multi-process GPU utilization\n5. **Consistent API** across text, image, and reranking models\n6. **Seamless fallback** to CPU when CUDA is unavailable\n\n---\n\n---\n\n## Doramagic Pitfall Log\n\nProject: qdrant/fastembed\n\nSummary: Found 29 potential pitfall items; 3 are high/blocking. Highest priority: installation - 来源证据：[Bug]: Segmentation Fault or AssertionError during initialization on Python 3.14.2.\n\n## 1. installation · 来源证据：[Bug]: Segmentation Fault or AssertionError during initialization on Python 3.14.2\n\n- Severity: high\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安装相关的待验证问题：[Bug]: Segmentation Fault or AssertionError during initialization on Python 3.14.2\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_201d4035515846df8830ca0dad6960c5 | https://github.com/qdrant/fastembed/issues/618 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 2. installation · 来源证据：[Bug]: Unable to load 'Qdrant/bm25' on macOS python3.14\n\n- Severity: high\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安装相关的待验证问题：[Bug]: Unable to load 'Qdrant/bm25' on macOS python3.14\n- User impact: 可能阻塞安装或首次运行。\n- Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_8147621574b345d7955e79ad98f4ba6f | https://github.com/qdrant/fastembed/issues/630 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 3. security_permissions · 失败模式：security_permissions: [Bug]: Tar path traversal (Zip Slip) in decompress_to_cache — arbitrary file write outside ca...\n\n- Severity: high\n- Evidence strength: source_linked\n- Finding: Developers should check this security_permissions risk before relying on the project: [Bug]: Tar path traversal (Zip Slip) in decompress_to_cache — arbitrary file write outside cache directory\n- User impact: Developers may expose sensitive permissions or credentials: [Bug]: Tar path traversal (Zip Slip) in decompress_to_cache — arbitrary file write outside cache directory\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: [Bug]: Tar path traversal (Zip Slip) in decompress_to_cache — arbitrary file write outside cache directory. Context: Observed when using python\n- Guardrail action: Do not recommend enabling privileged or credential-bearing paths until the source-backed risk is reviewed: https://github.com/qdrant/fastembed/issues/626\n- Evidence: failure_mode_cluster:github_issue | fmev_d3890c2b3360ccb937839f70fd4aa584 | https://github.com/qdrant/fastembed/issues/626 | [Bug]: Tar path traversal (Zip Slip) in decompress_to_cache — arbitrary file write outside cache directory\n\n## 4. installation · 失败模式：installation: The dependency `py-rust-stemmers` cannot be downloaded in a pure Python environment.\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this installation risk before relying on the project: The dependency `py-rust-stemmers` cannot be downloaded in a pure Python environment.\n- User impact: Developers may fail before the first successful local run: The dependency `py-rust-stemmers` cannot be downloaded in a pure Python environment.\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: The dependency `py-rust-stemmers` cannot be downloaded in a pure Python environment.. Context: Observed when using python, docker\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_issue | fmev_16e50a8626aff1576adeb1c0baab4785 | https://github.com/qdrant/fastembed/issues/466 | The dependency `py-rust-stemmers` cannot be downloaded in a pure Python environment.\n\n## 5. installation · 失败模式：installation: [Bug]: Segmentation Fault or AssertionError during initialization on Python 3.14.2\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this installation risk before relying on the project: [Bug]: Segmentation Fault or AssertionError during initialization on Python 3.14.2\n- User impact: Developers may fail before the first successful local run: [Bug]: Segmentation Fault or AssertionError during initialization on Python 3.14.2\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: [Bug]: Segmentation Fault or AssertionError during initialization on Python 3.14.2. Context: Observed when using python, windows, linux\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_issue | fmev_04529bc774f1c961d4adeb7190edecd7 | https://github.com/qdrant/fastembed/issues/618 | [Bug]: Segmentation Fault or AssertionError during initialization on Python 3.14.2\n\n## 6. installation · 失败模式：installation: [Bug]: Unable to load 'Qdrant/bm25' on macOS python3.14\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this installation risk before relying on the project: [Bug]: Unable to load 'Qdrant/bm25' on macOS python3.14\n- User impact: Developers may fail before the first successful local run: [Bug]: Unable to load 'Qdrant/bm25' on macOS python3.14\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: [Bug]: Unable to load 'Qdrant/bm25' on macOS python3.14. Context: Observed when using python, macos, cuda\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_issue | fmev_79a43347d96beb6d05eb6bfec2503fb5 | https://github.com/qdrant/fastembed/issues/630 | [Bug]: Unable to load 'Qdrant/bm25' on macOS python3.14\n\n## 7. installation · 失败模式：installation: v0.5.1\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this installation risk before relying on the project: v0.5.1\n- User impact: Upgrade or migration may change expected behavior: v0.5.1\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: v0.5.1. Context: Observed when using python\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_release | fmev_8b37c58c613005c0182d0325aaf032f7 | https://github.com/qdrant/fastembed/releases/tag/v0.5.1 | v0.5.1\n\n## 8. installation · 来源证据：The dependency `py-rust-stemmers` cannot be downloaded in a pure Python environment.\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安装相关的待验证问题：The dependency `py-rust-stemmers` cannot be downloaded in a pure Python environment.\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_0f507b37e33e456ea259e82966cecdc5 | https://github.com/qdrant/fastembed/issues/466 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 9. configuration · 来源证据：[Bug]: No timeout on model download — requests.get() can hang indefinitely\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个配置相关的待验证问题：[Bug]: No timeout on model download — requests.get() can hang indefinitely\n- User impact: 可能阻塞安装或首次运行。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_6d570ba91cfd414f970a3a8da522be04 | https://github.com/qdrant/fastembed/issues/627 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 10. capability · 能力判断依赖假设\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: README/documentation is current enough for a first validation pass.\n- User impact: 假设不成立时，用户拿不到承诺的能力。\n- Suggested check: 将假设转成下游验证清单。\n- Guardrail action: 假设必须转成验证项；没有验证结果前不能写成事实。\n- Evidence: capability.assumptions | github_repo:666260877 | https://github.com/qdrant/fastembed | README/documentation is current enough for a first validation pass.\n\n## 11. runtime · 失败模式：runtime: [Bug]: Loading models with additional files fails with onnxruntime 1.24.1\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this runtime risk before relying on the project: [Bug]: Loading models with additional files fails with onnxruntime 1.24.1\n- User impact: Developers may hit a documented source-backed failure mode: [Bug]: Loading models with additional files fails with onnxruntime 1.24.1\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: [Bug]: Loading models with additional files fails with onnxruntime 1.24.1. Context: Observed when using python, linux\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_issue | fmev_17b849ae47ffaf5d18cabbd577f373ca | https://github.com/qdrant/fastembed/issues/603 | [Bug]: Loading models with additional files fails with onnxruntime 1.24.1\n\n## 12. runtime · 失败模式：runtime: v0.4.2\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this runtime risk before relying on the project: v0.4.2\n- User impact: Upgrade or migration may change expected behavior: v0.4.2\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: v0.4.2. Context: Source discussion did not expose a precise runtime context.\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_release | fmev_607c8ff157108b2b5fb78f55129b60f6 | https://github.com/qdrant/fastembed/releases/tag/v0.4.2 | v0.4.2\n\n## 13. runtime · 失败模式：runtime: v0.7.1\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this runtime risk before relying on the project: v0.7.1\n- User impact: Upgrade or migration may change expected behavior: v0.7.1\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: v0.7.1. Context: Source discussion did not expose a precise runtime context.\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_release | fmev_07583dafa20e640a1720d7a77188475e | https://github.com/qdrant/fastembed/releases/tag/v0.7.1 | v0.7.1\n\n## 14. runtime · 失败模式：runtime: v0.8.0\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this runtime risk before relying on the project: v0.8.0\n- User impact: Upgrade or migration may change expected behavior: v0.8.0\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: v0.8.0. Context: Observed when using python, cuda\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_release | fmev_054bae0c9c1667cf5568de7ecb7087c9 | https://github.com/qdrant/fastembed/releases/tag/v0.8.0 | v0.8.0\n\n## 15. maintenance · 来源证据：[Bug]: license error in pypi metadata\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个维护/版本相关的待验证问题：[Bug]: license error in pypi metadata\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_017eda744ea84e679aeb5d77d41f7541 | https://github.com/qdrant/fastembed/issues/620 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 16. maintenance · 维护活跃度未知\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: 未记录 last_activity_observed。\n- User impact: 新项目、停更项目和活跃项目会被混在一起，推荐信任度下降。\n- Suggested check: 补 GitHub 最近 commit、release、issue/PR 响应信号。\n- Guardrail action: 维护活跃度未知时，推荐强度不能标为高信任。\n- Evidence: evidence.maintainer_signals | github_repo:666260877 | https://github.com/qdrant/fastembed | last_activity_observed missing\n\n## 17. security_permissions · 下游验证发现风险项\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: no_demo\n- User impact: 下游已经要求复核，不能在页面中弱化。\n- Suggested check: 进入安全/权限治理复核队列。\n- Guardrail action: 下游风险存在时必须保持 review/recommendation 降级。\n- Evidence: downstream_validation.risk_items | github_repo:666260877 | https://github.com/qdrant/fastembed | no_demo; severity=medium\n\n## 18. security_permissions · 存在评分风险\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: no_demo\n- User impact: 风险会影响是否适合普通用户安装。\n- Suggested check: 把风险写入边界卡，并确认是否需要人工复核。\n- Guardrail action: 评分风险必须进入边界卡，不能只作为内部分数。\n- Evidence: risks.scoring_risks | github_repo:666260877 | https://github.com/qdrant/fastembed | no_demo; severity=medium\n\n## 19. security_permissions · 来源证据：[Bug]: Loading models with additional files fails with onnxruntime 1.24.1\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：[Bug]: Loading models with additional files fails with onnxruntime 1.24.1\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_cfcd79475b1344a0bba508ac4346edf4 | https://github.com/qdrant/fastembed/issues/603 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 20. security_permissions · 来源证据：[Bug]: Tar path traversal (Zip Slip) in decompress_to_cache — arbitrary file write outside cache directory\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：[Bug]: Tar path traversal (Zip Slip) in decompress_to_cache — arbitrary file write outside cache directory\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_7603e6da390349c9aff6eed6cc5072a9 | https://github.com/qdrant/fastembed/issues/626 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 21. capability · 失败模式：capability: [Bug]: license error in pypi metadata\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: Developers should check this capability risk before relying on the project: [Bug]: license error in pypi metadata\n- User impact: Developers may hit a documented source-backed failure mode: [Bug]: license error in pypi metadata\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: [Bug]: license error in pypi metadata. Context: Observed when using python\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_issue | fmev_9e209e0c78e000b92930bccb67db1440 | https://github.com/qdrant/fastembed/issues/620 | [Bug]: license error in pypi metadata\n\n## 22. runtime · 失败模式：performance: [Bug]: No timeout on model download — requests.get() can hang indefinitely\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: Developers should check this performance risk before relying on the project: [Bug]: No timeout on model download — requests.get() can hang indefinitely\n- User impact: Developers may hit a documented source-backed failure mode: [Bug]: No timeout on model download — requests.get() can hang indefinitely\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: [Bug]: No timeout on model download — requests.get() can hang indefinitely. Context: Observed when using python, docker\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_issue | fmev_69d0e997d6514a5d87d46c30a69795b3 | https://github.com/qdrant/fastembed/issues/627 | [Bug]: No timeout on model download — requests.get() can hang indefinitely\n\n## 23. runtime · 失败模式：performance: v0.7.4\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: Developers should check this performance risk before relying on the project: v0.7.4\n- User impact: Upgrade or migration may change expected behavior: v0.7.4\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: v0.7.4. Context: Source discussion did not expose a precise runtime context.\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_release | fmev_df69e175afaf53788c9f212e22680b87 | https://github.com/qdrant/fastembed/releases/tag/v0.7.4 | v0.7.4\n\n## 24. maintenance · issue/PR 响应质量未知\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: issue_or_pr_quality=unknown。\n- User impact: 用户无法判断遇到问题后是否有人维护。\n- Suggested check: 抽样最近 issue/PR，判断是否长期无人处理。\n- Guardrail action: issue/PR 响应未知时，必须提示维护风险。\n- Evidence: evidence.maintainer_signals | github_repo:666260877 | https://github.com/qdrant/fastembed | issue_or_pr_quality=unknown\n\n## 25. maintenance · 发布节奏不明确\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: release_recency=unknown。\n- User impact: 安装命令和文档可能落后于代码，用户踩坑概率升高。\n- Suggested check: 确认最近 release/tag 和 README 安装命令是否一致。\n- Guardrail action: 发布节奏未知或过期时，安装说明必须标注可能漂移。\n- Evidence: evidence.maintainer_signals | github_repo:666260877 | https://github.com/qdrant/fastembed | release_recency=unknown\n\n## 26. maintenance · 失败模式：maintenance: v0.6.0\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: Developers should check this maintenance risk before relying on the project: v0.6.0\n- User impact: Upgrade or migration may change expected behavior: v0.6.0\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: v0.6.0. Context: Observed when using python\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_release | fmev_abc84a0766f60148974f2e932e8dc4f4 | https://github.com/qdrant/fastembed/releases/tag/v0.6.0 | v0.6.0\n\n## 27. maintenance · 失败模式：maintenance: v0.6.1\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: Developers should check this maintenance risk before relying on the project: v0.6.1\n- User impact: Upgrade or migration may change expected behavior: v0.6.1\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: v0.6.1. Context: Source discussion did not expose a precise runtime context.\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_release | fmev_e909fbcb26efacd532e8194ede2ff4f0 | https://github.com/qdrant/fastembed/releases/tag/v0.6.1 | v0.6.1\n\n## 28. maintenance · 失败模式：maintenance: v0.7.0\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: Developers should check this maintenance risk before relying on the project: v0.7.0\n- User impact: Upgrade or migration may change expected behavior: v0.7.0\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: v0.7.0. Context: Source discussion did not expose a precise runtime context.\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_release | fmev_0ccd78cc792f8d6a79212fc6cfa512e4 | https://github.com/qdrant/fastembed/releases/tag/v0.7.0 | v0.7.0\n\n## 29. maintenance · 失败模式：maintenance: v0.7.2\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: Developers should check this maintenance risk before relying on the project: v0.7.2\n- User impact: Upgrade or migration may change expected behavior: v0.7.2\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: v0.7.2. Context: Source discussion did not expose a precise runtime context.\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_release | fmev_28a8ed6e38ab5a351c6a64532998a7df | https://github.com/qdrant/fastembed/releases/tag/v0.7.2 | v0.7.2\n\n<!-- canonical_name: qdrant/fastembed; human_manual_source: deepwiki_human_wiki -->\n",
      "markdown_key": "fastembed",
      "pages": "draft",
      "source_refs": [
        {
          "evidence_id": "github_repo:666260877",
          "kind": "repo",
          "supports_claim_ids": [
            "claim_identity",
            "claim_distribution",
            "claim_capability"
          ],
          "url": "https://github.com/qdrant/fastembed"
        },
        {
          "evidence_id": "art_38812aa1eec94c3d93805c4429a0b835",
          "kind": "docs",
          "supports_claim_ids": [
            "claim_identity",
            "claim_distribution",
            "claim_capability"
          ],
          "url": "https://github.com/qdrant/fastembed#readme"
        }
      ],
      "summary": "DeepWiki/Human Wiki output with a Doramagic pitfall appendix.",
      "title": "fastembed 说明书",
      "toc": [
        "https://github.com/qdrant/fastembed 项目说明书",
        "目录",
        "Introduction to FastEmbed",
        "Overview",
        "Architecture",
        "Supported Models",
        "Usage Patterns",
        "Initialize with default model",
        "Doramagic 踩坑日志"
      ]
    }
  },
  "quality_gate": {
    "blocking_gaps": [],
    "category_confidence": "medium",
    "compile_status": "ready_for_review",
    "five_assets_present": true,
    "install_sandbox_verified": true,
    "missing_evidence": [],
    "next_action": "publish to Doramagic.ai project surfaces",
    "prompt_preview_boundary_ok": true,
    "publish_status": "publishable",
    "quick_start_verified": true,
    "repo_clone_verified": true,
    "repo_commit": "fde1e0b36138fbea21d36b4b4782695bdf2eeae3",
    "repo_inspection_error": null,
    "repo_inspection_files": [
      "pyproject.toml",
      "README.md",
      "docs/index.md"
    ],
    "repo_inspection_verified": true,
    "review_reasons": [],
    "tag_count_ok": true,
    "unsupported_claims": []
  },
  "schema_version": "0.1",
  "user_assets": {
    "ai_context_pack": {
      "asset_id": "ai_context_pack",
      "filename": "AI_CONTEXT_PACK.md",
      "markdown": "# fastembed - Doramagic AI Context Pack\n\n> 定位：安装前体验与判断资产。它帮助宿主 AI 有一个好的开始，但不代表已经安装、执行或验证目标项目。\n\n## 充分原则\n\n- **充分原则，不是压缩原则**：AI Context Pack 应该充分到让宿主 AI 在开工前理解项目价值、能力边界、使用入口、风险和证据来源；它可以分层组织，但不以最短摘要为目标。\n- **压缩策略**：只压缩噪声和重复内容，不压缩会影响判断和开工质量的上下文。\n\n## 给宿主 AI 的使用方式\n\n你正在读取 Doramagic 为 fastembed 编译的 AI Context Pack。请把它当作开工前上下文：帮助用户理解适合谁、能做什么、如何开始、哪些必须安装后验证、风险在哪里。不要声称你已经安装、运行或执行了目标项目。\n\n## Claim 消费规则\n\n- **事实来源**：Repo Evidence + Claim/Evidence Graph；Human Wiki 只提供显著性、术语和叙事结构。\n- **事实最低状态**：`supported`\n- `supported`：可以作为项目事实使用，但回答中必须引用 claim_id 和证据路径。\n- `weak`：只能作为低置信度线索，必须要求用户继续核实。\n- `inferred`：只能用于风险提示或待确认问题，不能包装成项目事实。\n- `unverified`：不得作为事实使用，应明确说证据不足。\n- `contradicted`：必须展示冲突来源，不得替用户强行选择一个版本。\n\n## 它最适合谁\n\n- **想在安装前理解开源项目价值和边界的用户**：当前证据主要来自项目文档。 证据：`README.md` Claim：`clm_0002` supported 0.86\n\n## 它能做什么\n\n- **命令行启动或安装流程**（需要安装后验证）：项目文档中存在可执行命令，真实使用需要在本地或宿主环境中运行这些命令。 证据：`README.md` Claim：`clm_0001` supported 0.86\n\n## 怎么开始\n\n- `pip install fastembed` 证据：`README.md` Claim：`clm_0003` supported 0.86, `clm_0004` supported 0.86\n- `pip install fastembed-gpu` 证据：`README.md` Claim：`clm_0004` supported 0.86\n- `pip install qdrant-client[fastembed]` 证据：`README.md` Claim：`clm_0005` supported 0.86, `clm_0006` supported 0.86\n- `pip install qdrant-client[fastembed-gpu]` 证据：`README.md` Claim：`clm_0006` supported 0.86\n\n## 继续前判断卡\n\n- **当前建议**：仅建议沙盒试装\n- **为什么**：项目存在安装命令、宿主配置或本地写入线索，不建议直接进入主力环境，应先在隔离环境试装。\n\n### 30 秒判断\n\n- **现在怎么做**：仅建议沙盒试装\n- **最小安全下一步**：先跑 Prompt Preview；若仍要安装，只在隔离环境试装\n- **先别相信**：真实输出质量不能在安装前相信。\n- **继续会触碰**：命令执行、本地环境或项目文件、宿主 AI 上下文\n\n### 现在可以相信\n\n- **适合人群线索：想在安装前理解开源项目价值和边界的用户**（supported）：有 supported claim 或项目证据支撑，但仍不等于真实安装效果。 证据：`README.md` Claim：`clm_0002` supported 0.86\n- **能力存在：命令行启动或安装流程**（supported）：可以相信项目包含这类能力线索；是否适合你的具体任务仍要试用或安装后验证。 证据：`README.md` Claim：`clm_0001` supported 0.86\n- **存在 Quick Start / 安装命令线索**（supported）：可以相信项目文档出现过启动或安装入口；不要因此直接在主力环境运行。 证据：`README.md` Claim：`clm_0003` supported 0.86, `clm_0004` supported 0.86\n\n### 现在还不能相信\n\n- **真实输出质量不能在安装前相信。**（unverified）：Prompt Preview 只能展示引导方式，不能证明真实项目中的结果质量。\n- **宿主 AI 版本兼容性不能在安装前相信。**（unverified）：Claude、Cursor、Codex、Gemini 等宿主加载规则和版本差异必须在真实环境验证。\n- **不会污染现有宿主 AI 行为，不能直接相信。**（inferred）：Skill、plugin、AGENTS/CLAUDE/GEMINI 指令可能改变宿主 AI 的默认行为。\n- **可安全回滚不能默认相信。**（unverified）：除非项目明确提供卸载和恢复说明，否则必须先在隔离环境验证。\n- **真实安装后是否与用户当前宿主 AI 版本兼容？**（unverified）：兼容性只能通过实际宿主环境验证。\n- **项目输出质量是否满足用户具体任务？**（unverified）：安装前预览只能展示流程和边界，不能替代真实评测。\n- **安装命令是否需要网络、权限或全局写入？**（unverified）：这影响企业环境和个人环境的安装风险。 证据：`README.md`\n\n### 继续会触碰什么\n\n- **命令执行**：包管理器、网络下载、本地插件目录、项目配置或用户主目录。 原因：运行第一条命令就可能产生环境改动；必须先判断是否值得跑。 证据：`README.md`\n- **本地环境或项目文件**：安装结果、插件缓存、项目配置或本地依赖目录。 原因：安装前无法证明写入范围和回滚方式，需要隔离验证。 证据：`README.md`\n- **宿主 AI 上下文**：AI Context Pack、Prompt Preview、Skill 路由、风险规则和项目事实。 原因：导入上下文会影响宿主 AI 后续判断，必须避免把未验证项包装成事实。\n\n### 最小安全下一步\n\n- **先跑 Prompt Preview**：用安装前交互式试用判断工作方式是否匹配，不需要授权或改环境。（适用：任何项目都适用，尤其是输出质量未知时。）\n- **只在隔离目录或测试账号试装**：避免安装命令污染主力宿主 AI、真实项目或用户主目录。（适用：存在命令执行、插件配置或本地写入线索时。）\n- **安装后只验证一个最小任务**：先验证加载、兼容、输出质量和回滚，再决定是否深用。（适用：准备从试用进入真实工作流时。）\n\n### 退出方式\n\n- **保留安装前状态**：记录原始宿主配置和项目状态，后续才能判断是否可恢复。\n- **记录安装命令和写入路径**：没有明确卸载说明时，至少要知道哪些目录或配置需要手动清理。\n- **如果没有回滚路径，不进入主力环境**：不可回滚是继续前阻断项，不应靠信任或运气继续。\n\n## 哪些只能预览\n\n- 解释项目适合谁和能做什么\n- 基于项目文档演示典型对话流程\n- 帮助用户判断是否值得安装或继续研究\n\n## 哪些必须安装后验证\n\n- 真实安装 Skill、插件或 CLI\n- 执行脚本、修改本地文件或访问外部服务\n- 验证真实输出质量、性能和兼容性\n\n## 边界与风险判断卡\n\n- **把安装前预览误认为真实运行**：用户可能高估项目已经完成的配置、权限和兼容性验证。 处理方式：明确区分 prompt_preview_can_do 与 runtime_required。 Claim：`clm_0007` inferred 0.45\n- **命令执行会修改本地环境**：安装命令可能写入用户主目录、宿主插件目录或项目配置。 处理方式：先在隔离环境或测试账号中运行。 证据：`README.md` Claim：`clm_0008` supported 0.86\n- **待确认**：真实安装后是否与用户当前宿主 AI 版本兼容？。原因：兼容性只能通过实际宿主环境验证。\n- **待确认**：项目输出质量是否满足用户具体任务？。原因：安装前预览只能展示流程和边界，不能替代真实评测。\n- **待确认**：安装命令是否需要网络、权限或全局写入？。原因：这影响企业环境和个人环境的安装风险。\n\n## 开工前工作上下文\n\n### 加载顺序\n\n- 先读取 how_to_use.host_ai_instruction，建立安装前判断资产的边界。\n- 读取 claim_graph_summary，确认事实来自 Claim/Evidence Graph，而不是 Human Wiki 叙事。\n- 再读取 intended_users、capabilities 和 quick_start_candidates，判断用户是否匹配。\n- 需要执行具体任务时，优先查 role_skill_index，再查 evidence_index。\n- 遇到真实安装、文件修改、网络访问、性能或兼容性问题时，转入 risk_card 和 boundaries.runtime_required。\n\n### 任务路由\n\n- **命令行启动或安装流程**：先说明这是安装后验证能力，再给出安装前检查清单。 边界：必须真实安装或运行后验证。 证据：`README.md` Claim：`clm_0001` supported 0.86\n\n### 上下文规模\n\n- 文件总数：117\n- 重要文件覆盖：27/117\n- 证据索引条目：27\n- 角色 / Skill 条目：5\n\n### 证据不足时的处理\n\n- **missing_evidence**：说明证据不足，要求用户提供目标文件、README 段落或安装后验证记录；不要补全事实。\n- **out_of_scope_request**：说明该任务超出当前 AI Context Pack 证据范围，并建议用户先查看 Human Manual 或真实安装后验证。\n- **runtime_request**：给出安装前检查清单和命令来源，但不要替用户执行命令或声称已执行。\n- **source_conflict**：同时展示冲突来源，标记为待核实，不要强行选择一个版本。\n\n## Prompt Recipes\n\n### 适配判断\n\n- 目标：判断这个项目是否适合用户当前任务。\n- 预期输出：适配结论、关键理由、证据引用、安装前可预览内容、必须安装后验证内容、下一步建议。\n\n```text\n请基于 fastembed 的 AI Context Pack，先问我 3 个必要问题，然后判断它是否适合我的任务。回答必须包含：适合谁、能做什么、不能做什么、是否值得安装、证据来自哪里。所有项目事实必须引用 evidence_refs、source_paths 或 claim_id。\n```\n\n### 安装前体验\n\n- 目标：让用户在安装前感受核心工作流，同时避免把预览包装成真实能力或营销承诺。\n- 预期输出：一段带边界标签的体验剧本、安装后验证清单和谨慎建议；不含真实运行承诺或强营销表述。\n\n```text\n请把 fastembed 当作安装前体验资产，而不是已安装工具或真实运行环境。\n\n请严格输出四段：\n1. 先问我 3 个必要问题。\n2. 给出一段“体验剧本”：用 [安装前可预览]、[必须安装后验证]、[证据不足] 三种标签展示它可能如何引导工作流。\n3. 给出安装后验证清单：列出哪些能力只有真实安装、真实宿主加载、真实项目运行后才能确认。\n4. 给出谨慎建议：只能说“值得继续研究/试装”“先补充信息后再判断”或“不建议继续”，不得替项目背书。\n\n硬性边界：\n- 不要声称已经安装、运行、执行测试、修改文件或产生真实结果。\n- 不要写“自动适配”“确保通过”“完美适配”“强烈建议安装”等承诺性表达。\n- 如果描述安装后的工作方式，必须使用“如果安装成功且宿主正确加载 Skill，它可能会……”这种条件句。\n- 体验剧本只能写成“示例台词/假设流程”：使用“可能会询问/可能会建议/可能会展示”，不要写“已写入、已生成、已通过、正在运行、正在生成”。\n- Prompt Preview 不负责给安装命令；如用户准备试装，只能提示先阅读 Quick Start 和 Risk Card，并在隔离环境验证。\n- 所有项目事实必须来自 supported claim、evidence_refs 或 source_paths；inferred/unverified 只能作风险或待确认项。\n\n```\n\n### 角色 / Skill 选择\n\n- 目标：从项目里的角色或 Skill 中挑选最匹配的资产。\n- 预期输出：候选角色或 Skill 列表，每项包含适用场景、证据路径、风险边界和是否需要安装后验证。\n\n```text\n请读取 role_skill_index，根据我的目标任务推荐 3-5 个最相关的角色或 Skill。每个推荐都要说明适用场景、可能输出、风险边界和 evidence_refs。\n```\n\n### 风险预检\n\n- 目标：安装或引入前识别环境、权限、规则冲突和质量风险。\n- 预期输出：环境、权限、依赖、许可、宿主冲突、质量风险和未知项的检查清单。\n\n```text\n请基于 risk_card、boundaries 和 quick_start_candidates，给我一份安装前风险预检清单。不要替我执行命令，只说明我应该检查什么、为什么检查、失败会有什么影响。\n```\n\n### 宿主 AI 开工指令\n\n- 目标：把项目上下文转成一次对话开始前的宿主 AI 指令。\n- 预期输出：一段边界明确、证据引用明确、适合复制给宿主 AI 的开工前指令。\n\n```text\n请基于 fastembed 的 AI Context Pack，生成一段我可以粘贴给宿主 AI 的开工前指令。这段指令必须遵守 not_runtime=true，不能声称项目已经安装、运行或产生真实结果。\n```\n\n\n## 角色 / Skill 索引\n\n- 共索引 5 个角色 / Skill / 项目文档条目。\n\n- **⚡️ What is FastEmbed?**（project_doc）：FastEmbed is a lightweight, fast, Python library built for embedding generation. We support popular text models https://qdrant.github.io/fastembed/examples/Supported Models/ . Please open a GitHub issue https://github.com/qdrant/fastembed/issues/new if you want us to add a new model. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`README.md`\n- **Contributing to FastEmbed!**（project_doc）：:+1::tada: First off, thanks for taking the time to contribute! :tada::+1: 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`CONTRIBUTING.md`\n- **⚡️ What is FastEmbed?**（project_doc）：FastEmbed is a lightweight, fast, Python library built for embedding generation. We support popular text models https://qdrant.github.io/fastembed/examples/Supported Models/ . Please open a Github issue https://github.com/qdrant/fastembed/issues/new if you want us to add a new model. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`docs/index.md`\n- **All Submissions:**（project_doc）：Have you followed the guidelines in our Contributing document? Have you checked to ensure there aren't other open Pull Requests ../../../pulls for the same update/change? 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`.github/PULL_REQUEST_TEMPLATE.md`\n- **Releasing FastEmbed**（project_doc）：This is a guide how to release fastembed and fastembed-gpu packages. 激活提示：当用户需要理解项目结构、安装方式或边界时参考。 证据：`RELEASE.md`\n\n## 证据索引\n\n- 共索引 27 条证据。\n\n- **⚡️ What is FastEmbed?**（documentation）：FastEmbed is a lightweight, fast, Python library built for embedding generation. We support popular text models https://qdrant.github.io/fastembed/examples/Supported Models/ . Please open a GitHub issue https://github.com/qdrant/fastembed/issues/new if you want us to add a new model. 证据：`README.md`\n- **Contributing to FastEmbed!**（documentation）：:+1::tada: First off, thanks for taking the time to contribute! :tada::+1: 证据：`CONTRIBUTING.md`\n- **License**（source_file）：Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ 证据：`LICENSE`\n- **⚡️ What is FastEmbed?**（documentation）：FastEmbed is a lightweight, fast, Python library built for embedding generation. We support popular text models https://qdrant.github.io/fastembed/examples/Supported Models/ . Please open a Github issue https://github.com/qdrant/fastembed/issues/new if you want us to add a new model. 证据：`docs/index.md`\n- **All Submissions:**（documentation）：Have you followed the guidelines in our Contributing document? Have you checked to ensure there aren't other open Pull Requests ../../../pulls for the same update/change? 证据：`.github/PULL_REQUEST_TEMPLATE.md`\n- **Releasing FastEmbed**（documentation）：This is a guide how to release fastembed and fastembed-gpu packages. 证据：`RELEASE.md`\n- **Byte-compiled / optimized / DLL files**（source_file）：Byte-compiled / optimized / DLL files pycache / .py cod $py.class 证据：`.gitignore`\n- **.Pre Commit Config**（source_file）：repos: - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.3.4 hooks: - id: ruff types or: python, pyi, jupyter args: --fix - id: ruff-format types or: python, pyi, jupyter 证据：`.pre-commit-config.yaml`\n- **Notice**（source_file）：This product includes software developed by Qdrant 证据：`NOTICE`\n- **Getting Started**（source_file）：{ \"cells\": { \"cell type\": \"markdown\", \"id\": \"3f159fb4\", \"metadata\": {}, \"source\": \" 🚶🏻‍♂️ Getting Started\\n\", \"\\n\", \"Here you will learn how to use the fastembed package to embed your data into a vector space. The package is designed to be easy to use and fast. It is built on top of the ONNX https://onnx.ai/ standard, which allows for fast inference on a variety of hardware called Runtimes in ONNX . \\n\", \"\\n\", \" Quick Start\\n\", \"\\n\", \"The fastembed package is designed to be easy to use. We'll be using TextEmbedding class. It takes a list of strings as input and returns a generator of vectors.\\n\", \"\\n\", \" 💡 You can learn more about generators from Python Wiki https://wiki.python.org/moin/Gen… 证据：`docs/Getting Started.ipynb`\n- **01 Onnx Port**（source_file）：{ \"cells\": { \"cell type\": \"code\", \"execution count\": null, \"id\": \"0e9dbcde\", \"metadata\": {}, \"outputs\": , \"source\": \"%load ext autoreload\\n\", \"%autoreload 2\" }, { \"cell type\": \"code\", \"execution count\": null, \"id\": \"c37e1fda-c7f1-46e7-a5d4-19fa05c36ac1\", \"metadata\": {}, \"outputs\": , \"source\": \"from pathlib import Path\\n\", \"from typing import Any\\n\", \"\\n\", \"import numpy as np\\n\", \"import time\\n\", \"from torch import Tensor\\n\", \"from transformers import AutoTokenizer, AutoModel\\n\", \"\\n\", \"from optimum.onnxruntime import AutoOptimizationConfig, ORTModelForFeatureExtraction, ORTOptimizer\\n\", \"from optimum.pipelines import pipeline\\n\", \"import torch.nn.functional as F\" }, { \"cell type\": \"code\", \"… 证据：`experiments/01_ONNX_Port.ipynb`\n- **02 Splade To Onnx**（source_file）：{ \"cells\": { \"cell type\": \"code\", \"execution count\": 1, \"metadata\": {}, \"outputs\": , \"source\": \"import numpy as np\\n\", \"import torch\\n\", \"from transformers import AutoModelForMaskedLM, AutoTokenizer\" }, { \"cell type\": \"markdown\", \"metadata\": {}, \"source\": \" Running the model with Transformers and Torch\" }, { \"cell type\": \"code\", \"execution count\": 2, \"metadata\": {}, \"outputs\": , \"source\": \"sentences = \\n\", \" \\\"Hello World\\\",\\n\", \" \\\"Built by Nirant Kasliwal\\\",\\n\", \" \" }, { \"cell type\": \"markdown\", \"metadata\": {}, \"source\": \" PyTorch Code from the SPLADERunner https://github.com/PrithivirajDamodaran/SPLADERunner library\" }, { \"cell type\": \"code\", \"execution count\": 3, \"metadata\": {}, \"output… 证据：`experiments/02_SPLADE_to_ONNX.ipynb`\n- **Example. Convert Resnet50 To Onnx**（source_file）：{ \"cells\": { \"cell type\": \"code\", \"execution count\": 1, \"id\": \"4bdb2a91-fa2a-4cee-ad5a-176cc957394d\", \"metadata\": { \"ExecuteTime\": { \"end time\": \"2024-05-23T12:15:28.171586Z\", \"start time\": \"2024-05-23T12:15:28.076314Z\" } }, \"outputs\": { \"ename\": \"ModuleNotFoundError\", \"evalue\": \"No module named 'torch'\", \"output type\": \"error\", \"traceback\": \"\\u001B 0;31m---------------------------------------------------------------------------\\u001B 0m\", \"\\u001B 0;31mModuleNotFoundError\\u001B 0m Traceback most recent call last \", \"Cell \\u001B 0;32mIn 1 , line 1\\u001B 0m\\n\\u001B 0;32m---- 1\\u001B 0m \\u001B 38;5;28;01mimport\\u001B 39;00m \\u001B 38;5;21;01mtorch\\u001B 39;00m\\n\\u001B 1;32m 2\\u001B 0m \\u001B 3… 证据：`experiments/Example. Convert Resnet50 to ONNX.ipynb`\n- **Throughput Across Models**（source_file）：{ \"cells\": { \"cell type\": \"markdown\", \"metadata\": {}, \"source\": \" 🤗 Huggingface vs ⚡ FastEmbed️\\n\", \"\\n\", \"Comparing the performance of Huggingface's 🤗 Transformers and ⚡ FastEmbed️ on a simple task on the following machine: Apple M2 Max, 32 GB RAM\\n\", \"\\n\", \" 📦 Imports\\n\", \"\\n\", \"Importing the necessary libraries for this comparison.\" }, { \"cell type\": \"code\", \"execution count\": 1, \"metadata\": { \"ExecuteTime\": { \"end time\": \"2024-03-30T00:33:35.753669Z\", \"start time\": \"2024-03-30T00:33:34.371658Z\" } }, \"outputs\": { \"name\": \"stderr\", \"output type\": \"stream\", \"text\": \"/Users/joein/work/qdrant/fastembed/venv/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Pleas… 证据：`experiments/Throughput_Across_Models.ipynb`\n- **export if the output model does not exist**（source_file）：from optimum.exporters.onnx import main export from transformers import AutoTokenizer 证据：`experiments/attention_export.py`\n- **Prepare the input**（source_file）：import numpy as np import onnx import onnxruntime from transformers import AutoTokenizer 证据：`experiments/try_attention_export.py`\n- **Init**（source_file）：from fastembed.image import ImageEmbedding from fastembed.late interaction import LateInteractionTextEmbedding from fastembed.late interaction multimodal import LateInteractionMultimodalEmbedding from fastembed.sparse import SparseEmbedding, SparseTextEmbedding from fastembed.text import TextEmbedding 证据：`fastembed/__init__.py`\n- **Embedding**（source_file）：from fastembed import TextEmbedding 证据：`fastembed/embedding.py`\n- **Single item should be processed in less than:**（source_file）：import logging import os from collections import defaultdict from copy import deepcopy from enum import Enum from multiprocessing import Queue, get context from multiprocessing.context import BaseContext from multiprocessing.process import BaseProcess from multiprocessing.sharedctypes import Synchronized as BaseValue from queue import Empty from typing import Any, Iterable, Type 证据：`fastembed/parallel_processor.py`\n- **Py**（source_file）：partial 证据：`fastembed/py.typed`\n- **Primary color**（source_file）：site name: FastEmbed site url: https://qdrant.github.io/fastembed/ site author: Nirant Kasliwal repo url: https://github.com/qdrant/fastembed/ repo name: qdrant/fastembed 证据：`mkdocs.yml`\n- **Pyproject**（source_file）：tool.poetry name = \"fastembed\" version = \"0.8.0\" description = \"Fast, light, accurate library built for retrieval embedding generation\" authors = \"Qdrant Team \", \"NirantK \" license = \"Apache-2.0\" readme = \"README.md\" packages = {include = \"fastembed\"} homepage = \"https://github.com/qdrant/fastembed\" repository = \"https://github.com/qdrant/fastembed\" keywords = \"vector\", \"embedding\", \"neural\", \"search\", \"qdrant\", \"sentence-transformers\" 证据：`pyproject.toml`\n- **disable DeprecationWarning https://github.com/jupyter/jupyter core/issues/398**（source_file）：disable DeprecationWarning https://github.com/jupyter/jupyter core/issues/398 os.environ \"JUPYTER PLATFORM DIRS\" = \"1\" 证据：`tests/__init__.py`\n- **Config**（source_file）：TEST DIR = Path file .parent TEST MISC DIR = TEST DIR / \"misc\" 证据：`tests/config.py`\n- **%% markdown**（source_file）：%% markdown 🤗 Huggingface vs ⚡ FastEmbed️ Comparing the performance of Huggingface's 🤗 Transformers and ⚡ FastEmbed️ on a simple task on the following machine: Apple M2 Max, 32 GB RAM 📦 Imports Importing the necessary libraries for this comparison. 证据：`tests/profiling.py`\n- **Type Stub**（source_file）：from fastembed import TextEmbedding, LateInteractionTextEmbedding, SparseTextEmbedding from fastembed.sparse.bm25 import Bm25 from fastembed.rerank.cross encoder import TextCrossEncoder 证据：`tests/type_stub.py`\n- **todo: PermissionDenied is raised on blobs removal in Windows, with blobs 2GB**（source_file）：from pathlib import Path from types import TracebackType from typing import Callable, Any, Type 证据：`tests/utils.py`\n\n## 宿主 AI 必须遵守的规则\n\n- **把本资产当作开工前上下文，而不是运行环境。**：AI Context Pack 只包含证据化项目理解，不包含目标项目的可执行状态。 证据：`README.md`, `CONTRIBUTING.md`, `LICENSE`\n- **回答用户时区分可预览内容与必须安装后才能验证的内容。**：安装前体验的消费者价值来自降低误装和误判，而不是伪装成真实运行。 证据：`README.md`, `CONTRIBUTING.md`, `LICENSE`\n\n## 用户开工前应该回答的问题\n\n- 你准备在哪个宿主 AI 或本地环境中使用它？\n- 你只是想先体验工作流，还是准备真实安装？\n- 你最在意的是安装成本、输出质量、还是和现有规则的冲突？\n\n## 验收标准\n\n- 所有能力声明都能回指到 evidence_refs 中的文件路径。\n- AI_CONTEXT_PACK.md 没有把预览包装成真实运行。\n- 用户能在 3 分钟内看懂适合谁、能做什么、如何开始和风险边界。\n\n---\n\n## Doramagic Context Augmentation\n\nThe following material strengthens the Repomix/AI Context Pack body. Human Manual is only a reading skeleton; pitfall logs become hard operating constraints for the host AI.\n\n## Human Manual Skeleton\n\nUsage rule: this is only a reading path and salience signal, not factual authority. Concrete facts must still come from repo evidence / Claim Graph.\n\nHard rules for the host AI:\n- Do not treat page titles, order, summaries, or importance as project facts.\n- When explaining the Human Manual skeleton, state that it is only a reading path / salience signal.\n- Capability, installation, compatibility, runtime status, and risk judgments must cite repo evidence, source paths, or Claim Graph.\n\n- **Introduction to FastEmbed**：importance `high`\n  - source_paths: README.md, fastembed/__init__.py\n- **Installation Guide**：importance `high`\n  - source_paths: pyproject.toml, README.md\n- **Quick Start Guide**：importance `high`\n  - source_paths: README.md, fastembed/text/text_embedding.py, fastembed/image/image_embedding.py\n- **System Architecture**：importance `high`\n  - source_paths: fastembed/text/text_embedding_base.py, fastembed/image/image_embedding_base.py, fastembed/common/onnx_model.py, fastembed/common/model_management.py, fastembed/parallel_processor.py\n- **Text Embedding Module**：importance `high`\n  - source_paths: fastembed/text/onnx_embedding.py, fastembed/text/text_embedding.py, fastembed/text/onnx_text_model.py, fastembed/text/pooled_embedding.py, fastembed/text/clip_embedding.py\n- **Image Embedding Module**：importance `medium`\n  - source_paths: fastembed/image/onnx_embedding.py, fastembed/image/image_embedding.py, fastembed/image/onnx_image_model.py, fastembed/image/transform/operators.py, fastembed/image/transform/functional.py\n- **Sparse Embedding Models**：importance `medium`\n  - source_paths: fastembed/sparse/splade_pp.py, fastembed/sparse/bm25.py, fastembed/sparse/bm42.py, fastembed/sparse/minicoil.py, fastembed/sparse/sparse_text_embedding.py\n- **Late Interaction Models**：importance `medium`\n  - source_paths: fastembed/late_interaction/colbert.py, fastembed/late_interaction/jina_colbert.py, fastembed/late_interaction/late_interaction_text_embedding.py, fastembed/late_interaction_multimodal/colpali.py, fastembed/late_interaction_multimodal/colmodernvbert.py\n\n## Repo Inspection Evidence\n\n- repo_clone_verified: true\n- repo_inspection_verified: true\n- repo_commit: `fde1e0b36138fbea21d36b4b4782695bdf2eeae3`\n- inspected_files: `pyproject.toml`, `README.md`, `docs/index.md`\n\nHard rules for the host AI:\n- Without repo_clone_verified=true, do not claim the source code has been read.\n- Without repo_inspection_verified=true, do not turn README/docs/package observations into facts.\n- Without quick_start_verified=true, do not claim the Quick Start has been successfully run.\n\n## Doramagic Pitfall Constraints\n\nThese rules come from Doramagic discovery, validation, or compilation pitfalls. The host AI must treat them as operating constraints, not general background notes.\n\n### Constraint 1: 来源证据：[Bug]: Segmentation Fault or AssertionError during initialization on Python 3.14.2\n\n- Trigger: GitHub 社区证据显示该项目存在一个安装相关的待验证问题：[Bug]: Segmentation Fault or AssertionError during initialization on Python 3.14.2\n- Host AI rule: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Why it matters: 可能增加新用户试用和生产接入成本。\n- Evidence: community_evidence:github | cevd_201d4035515846df8830ca0dad6960c5 | https://github.com/qdrant/fastembed/issues/618 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n- Hard boundary: do not present this pitfall as solved, verified, or safe to ignore unless later validation evidence explicitly closes it.\n\n### Constraint 2: 来源证据：[Bug]: Unable to load 'Qdrant/bm25' on macOS python3.14\n\n- Trigger: GitHub 社区证据显示该项目存在一个安装相关的待验证问题：[Bug]: Unable to load 'Qdrant/bm25' on macOS python3.14\n- Host AI rule: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Why it matters: 可能阻塞安装或首次运行。\n- Evidence: community_evidence:github | cevd_8147621574b345d7955e79ad98f4ba6f | https://github.com/qdrant/fastembed/issues/630 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n- Hard boundary: do not present this pitfall as solved, verified, or safe to ignore unless later validation evidence explicitly closes it.\n\n### Constraint 3: 失败模式：security_permissions: [Bug]: Tar path traversal (Zip Slip) in decompress_to_cache — arbitrary file write outside ca...\n\n- Trigger: Developers should check this security_permissions risk before relying on the project: [Bug]: Tar path traversal (Zip Slip) in decompress_to_cache — arbitrary file write outside cache directory\n- Host AI rule: Before packaging this project, run the relevant install/config/quickstart check for: [Bug]: Tar path traversal (Zip Slip) in decompress_to_cache — arbitrary file write outside cache directory. Context: Observed when using python\n- Why it matters: Developers may expose sensitive permissions or credentials: [Bug]: Tar path traversal (Zip Slip) in decompress_to_cache — arbitrary file write outside cache directory\n- Evidence: failure_mode_cluster:github_issue | fmev_d3890c2b3360ccb937839f70fd4aa584 | https://github.com/qdrant/fastembed/issues/626 | [Bug]: Tar path traversal (Zip Slip) in decompress_to_cache — arbitrary file write outside cache directory\n- Hard boundary: do not present this pitfall as solved, verified, or safe to ignore unless later validation evidence explicitly closes it.\n\n### Constraint 4: 失败模式：installation: The dependency `py-rust-stemmers` cannot be downloaded in a pure Python environment.\n\n- Trigger: Developers should check this installation risk before relying on the project: The dependency `py-rust-stemmers` cannot be downloaded in a pure Python environment.\n- Host AI rule: Before packaging this project, run the relevant install/config/quickstart check for: The dependency `py-rust-stemmers` cannot be downloaded in a pure Python environment.. Context: Observed when using python, docker\n- Why it matters: Developers may fail before the first successful local run: The dependency `py-rust-stemmers` cannot be downloaded in a pure Python environment.\n- Evidence: failure_mode_cluster:github_issue | fmev_16e50a8626aff1576adeb1c0baab4785 | https://github.com/qdrant/fastembed/issues/466 | The dependency `py-rust-stemmers` cannot be downloaded in a pure Python environment.\n- Hard boundary: do not present this pitfall as solved, verified, or safe to ignore unless later validation evidence explicitly closes it.\n\n### Constraint 5: 失败模式：installation: [Bug]: Segmentation Fault or AssertionError during initialization on Python 3.14.2\n\n- Trigger: Developers should check this installation risk before relying on the project: [Bug]: Segmentation Fault or AssertionError during initialization on Python 3.14.2\n- Host AI rule: Before packaging this project, run the relevant install/config/quickstart check for: [Bug]: Segmentation Fault or AssertionError during initialization on Python 3.14.2. Context: Observed when using python, windows, linux\n- Why it matters: Developers may fail before the first successful local run: [Bug]: Segmentation Fault or AssertionError during initialization on Python 3.14.2\n- Evidence: failure_mode_cluster:github_issue | fmev_04529bc774f1c961d4adeb7190edecd7 | https://github.com/qdrant/fastembed/issues/618 | [Bug]: Segmentation Fault or AssertionError during initialization on Python 3.14.2\n- Hard boundary: do not present this pitfall as solved, verified, or safe to ignore unless later validation evidence explicitly closes it.\n\n### Constraint 6: 失败模式：installation: [Bug]: Unable to load 'Qdrant/bm25' on macOS python3.14\n\n- Trigger: Developers should check this installation risk before relying on the project: [Bug]: Unable to load 'Qdrant/bm25' on macOS python3.14\n- Host AI rule: Before packaging this project, run the relevant install/config/quickstart check for: [Bug]: Unable to load 'Qdrant/bm25' on macOS python3.14. Context: Observed when using python, macos, cuda\n- Why it matters: Developers may fail before the first successful local run: [Bug]: Unable to load 'Qdrant/bm25' on macOS python3.14\n- Evidence: failure_mode_cluster:github_issue | fmev_79a43347d96beb6d05eb6bfec2503fb5 | https://github.com/qdrant/fastembed/issues/630 | [Bug]: Unable to load 'Qdrant/bm25' on macOS python3.14\n- Hard boundary: do not present this pitfall as solved, verified, or safe to ignore unless later validation evidence explicitly closes it.\n\n### Constraint 7: 失败模式：installation: v0.5.1\n\n- Trigger: Developers should check this installation risk before relying on the project: v0.5.1\n- Host AI rule: Before packaging this project, run the relevant install/config/quickstart check for: v0.5.1. Context: Observed when using python\n- Why it matters: Upgrade or migration may change expected behavior: v0.5.1\n- Evidence: failure_mode_cluster:github_release | fmev_8b37c58c613005c0182d0325aaf032f7 | https://github.com/qdrant/fastembed/releases/tag/v0.5.1 | v0.5.1\n- Hard boundary: do not present this pitfall as solved, verified, or safe to ignore unless later validation evidence explicitly closes it.\n\n### Constraint 8: 来源证据：The dependency `py-rust-stemmers` cannot be downloaded in a pure Python environment.\n\n- Trigger: GitHub 社区证据显示该项目存在一个安装相关的待验证问题：The dependency `py-rust-stemmers` cannot be downloaded in a pure Python environment.\n- Host AI rule: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Why it matters: 可能增加新用户试用和生产接入成本。\n- Evidence: community_evidence:github | cevd_0f507b37e33e456ea259e82966cecdc5 | https://github.com/qdrant/fastembed/issues/466 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n- Hard boundary: do not present this pitfall as solved, verified, or safe to ignore unless later validation evidence explicitly closes it.\n\n### Constraint 9: 来源证据：[Bug]: No timeout on model download — requests.get() can hang indefinitely\n\n- Trigger: GitHub 社区证据显示该项目存在一个配置相关的待验证问题：[Bug]: No timeout on model download — requests.get() can hang indefinitely\n- Host AI rule: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Why it matters: 可能阻塞安装或首次运行。\n- Evidence: community_evidence:github | cevd_6d570ba91cfd414f970a3a8da522be04 | https://github.com/qdrant/fastembed/issues/627 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n- Hard boundary: do not present this pitfall as solved, verified, or safe to ignore unless later validation evidence explicitly closes it.\n\n### Constraint 10: 能力判断依赖假设\n\n- Trigger: README/documentation is current enough for a first validation pass.\n- Host AI rule: 将假设转成下游验证清单。\n- Why it matters: 假设不成立时，用户拿不到承诺的能力。\n- Evidence: capability.assumptions | github_repo:666260877 | https://github.com/qdrant/fastembed | README/documentation is current enough for a first validation pass.\n- Hard boundary: do not present this pitfall as solved, verified, or safe to ignore unless later validation evidence explicitly closes it.\n",
      "summary": "Context and operating boundaries for host AI agents.",
      "title": "AI Context Pack"
    },
    "boundary_risk_card": {
      "asset_id": "boundary_risk_card",
      "filename": "BOUNDARY_RISK_CARD.md",
      "markdown": "# Boundary & Risk Card\n\nProject: qdrant/fastembed\n\n## Doramagic Trial Decision\n\nCurrent decision: it can enter pre-publication recommendation checks. First use should still start with least privilege, a temporary directory, and reversible configuration.\n\n## What The User Can Do Now\n\n- Read the Human Manual first to understand the project purpose and main workflows.\n- Use Prompt Preview for pre-install exploration; it validates interaction shape, not real execution.\n- Run official Quick Start commands only inside an isolated environment, not a primary setup.\n\n## Do Not Do Yet\n\n- Do not treat Prompt Preview as a real project execution result.\n- Do not treat metadata-only validation as sandbox installation validation.\n- Do not describe unverified capabilities as supported, working, or safe to install.\n- Do not provide production data, private files, real secrets, or primary host configuration on first trial.\n\n## Pre-Install Checklist\n\n- Host AI match: chatgpt\n- Official installation entry status: official entry point found\n- Isolated temporary directory, temporary host, or container validation: required\n- Configuration rollback path: required\n- API keys, network access, file access, or host configuration changes: treat as high risk until confirmed\n- Installation command, actual output, and failure logs: must be recorded\n\n## Current Blockers\n\n- No blockers.\n\n## Project-Specific Pitfalls\n\n- 来源证据：[Bug]: Segmentation Fault or AssertionError during initialization on Python 3.14.2 (high): 可能增加新用户试用和生产接入成本。 Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- 来源证据：[Bug]: Unable to load 'Qdrant/bm25' on macOS python3.14 (high): 可能阻塞安装或首次运行。 Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- 失败模式：security_permissions: [Bug]: Tar path traversal (Zip Slip) in decompress_to_cache — arbitrary file write outside ca... (high): Developers may expose sensitive permissions or credentials: [Bug]: Tar path traversal (Zip Slip) in decompress_to_cache — arbitrary file write outside cache directory Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: [Bug]: Tar path traversal (Zip Slip) in decompress_to_cache — arbitrary file write outside cache directory. Context: Observed when using python\n- 失败模式：installation: The dependency `py-rust-stemmers` cannot be downloaded in a pure Python environment. (medium): Developers may fail before the first successful local run: The dependency `py-rust-stemmers` cannot be downloaded in a pure Python environment. Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: The dependency `py-rust-stemmers` cannot be downloaded in a pure Python environment.. Context: Observed when using python, docker\n- 失败模式：installation: [Bug]: Segmentation Fault or AssertionError during initialization on Python 3.14.2 (medium): Developers may fail before the first successful local run: [Bug]: Segmentation Fault or AssertionError during initialization on Python 3.14.2 Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: [Bug]: Segmentation Fault or AssertionError during initialization on Python 3.14.2. Context: Observed when using python, windows, linux\n\n## Risk And Permission Notes\n\n- no_demo: medium\n\n## Evidence Gaps\n\n- No structured evidence gaps are currently visible.\n",
      "summary": "Installation, permission, validation, and pre-recommendation risks.",
      "title": "Boundary & Risk Card"
    },
    "human_manual": {
      "asset_id": "human_manual",
      "filename": "HUMAN_MANUAL.md",
      "markdown": "# https://github.com/qdrant/fastembed 项目说明书\n\n生成时间：2026-05-21 05:37:43 UTC\n\n## 目录\n\n- [Introduction to FastEmbed](#page-introduction)\n- [Installation Guide](#page-installation)\n- [Quick Start Guide](#page-quickstart)\n- [System Architecture](#page-architecture)\n- [Text Embedding Module](#page-text-embedding)\n- [Image Embedding Module](#page-image-embedding)\n- [Sparse Embedding Models](#page-sparse-embedding)\n- [Late Interaction Models](#page-late-interaction)\n- [ONNX Model Infrastructure](#page-onnx-model)\n- [GPU Support and Acceleration](#page-gpu-support)\n\n<a id='page-introduction'></a>\n\n## Introduction to FastEmbed\n\n### 相关页面\n\n相关主题：[Installation Guide](#page-installation), [System Architecture](#page-architecture), [Quick Start Guide](#page-quickstart)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [README.md](https://github.com/qdrant/fastembed/blob/main/README.md)\n- [fastembed/text/onnx_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/onnx_embedding.py)\n- [fastembed/text/pooled_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/pooled_embedding.py)\n- [fastembed/text/pooled_normalized_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/pooled_normalized_embedding.py)\n- [fastembed/image/onnx_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/image/onnx_embedding.py)\n- [fastembed/sparse/bm25.py](https://github.com/qdrant/fastembed/blob/main/fastembed/sparse/bm25.py)\n- [fastembed/sparse/minicoil.py](https://github.com/qdrant/fastembed/blob/main/fastembed/sparse/minicoil.py)\n</details>\n\n# Introduction to FastEmbed\n\nFastEmbed is a lightweight, high-performance Python library designed for generating text and image embeddings using ONNX-based models. It provides a unified API for dense, sparse, and late-interaction embeddings with support for multiple embedding types and cross-encoder re-ranking models.\n\n## Overview\n\nFastEmbed serves as an embedding generation engine optimized for production use cases, particularly in vector search applications. The library emphasizes:\n\n- **Performance**: Leverages ONNX Runtime for efficient CPU inference\n- **Lightweight**: Minimal dependencies and small model sizes\n- **Flexibility**: Supports dense, sparse, and multimodal embeddings\n- **Ease of use**: Simple Python API for common embedding workflows\n\n资料来源：[README.md](https://github.com/qdrant/fastembed/blob/main/README.md)\n\n## Architecture\n\nFastEmbed follows a modular architecture with separate components for different embedding types and processing stages.\n\n```mermaid\ngraph TD\n    A[FastEmbed API] --> B[Text Embedding]\n    A --> C[Image Embedding]\n    A --> D[Sparse Embedding]\n    A --> E[Cross Encoder]\n    \n    B --> B1[OnnxEmbedding]\n    B --> B2[PooledEmbedding]\n    B --> B3[PooledNormalizedEmbedding]\n    \n    C --> C1[OnnxImageModel]\n    \n    D --> D1[BM25]\n    D --> D2[SPLADE++]\n    D --> D3[MiniCOIL]\n    \n    E --> E1[OnnxCrossEncoderModel]\n    \n    B1 & B2 & B3 --> F[ONNX Runtime]\n    C1 --> F\n    E1 --> F\n    D1 & D2 & D3 --> G[Tokenization Engine]\n```\n\n### Core Components\n\n| Component | Purpose | File Location |\n|-----------|---------|---------------|\n| `TextEmbedding` | Dense text embeddings | `fastembed/text/` |\n| `ImageEmbedding` | Image embeddings | `fastembed/image/` |\n| `SparseTextEmbedding` | Sparse embeddings (BM25, SPLADE) | `fastembed/sparse/` |\n| `TextCrossEncoder` | Re-ranking models | `fastembed/rerank/` |\n| `LateInteractionTextEmbedding` | Late interaction embeddings | `fastembed/postprocess/` |\n\n资料来源：[fastembed/text/onnx_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/onnx_embedding.py)\n\n## Supported Models\n\nFastEmbed supports an extensive collection of pre-converted ONNX models across multiple categories.\n\n### Text Embedding Models\n\nText embeddings are categorized by their pooling strategy and normalization approach.\n\n#### Dense Models (Unimodal)\n\n| Model | Dimension | Languages | Max Tokens | License |\n|-------|-----------|-----------|------------|---------|\n| `BAAI/bge-base-en-v1.5` | 768 | English | 512 | MIT |\n| `BAAI/bge-large-en-v1.5` | 1024 | English | 512 | MIT |\n| `BAAI/bge-small-en-v1.5` | 384 | English | 512 | MIT |\n| `thenlper/gte-base` | 768 | English | 512 | MIT |\n| `thenlper/gte-large` | 1024 | English | 512 | MIT |\n| `snowflake/snowflake-arctic-embed-m` | 768 | English | 512 | Apache-2.0 |\n| `snowflake/snowflake-arctic-embed-l` | 1024 | English | 512 | Apache-2.0 |\n\n资料来源：[fastembed/text/onnx_embedding.py:1-50](https://github.com/qdrant/fastembed/blob/main/fastembed/text/onnx_embedding.py)\n\n#### Multilingual Models\n\n| Model | Dimension | Languages | Max Tokens |\n|-------|-----------|-----------|------------|\n| `intfloat/multilingual-e5-small` | 384 | ~100 | 512 |\n| `intfloat/multilingual-e5-large` | 1024 | ~100 | 512 |\n| `sentence-transformers/paraphrase-multilingual-mpnet-base-v2` | 768 | ~50 | 384 |\n| `sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2` | 384 | ~50 | 512 |\n\n资料来源：[fastembed/text/pooled_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/pooled_embedding.py)\n\n#### Jina AI Models\n\n| Model | Dimension | Languages | Max Tokens |\n|-------|-----------|-----------|------------|\n| `jinaai/jina-embeddings-v2-base-en` | 768 | English | 8192 |\n| `jinaai/jina-embeddings-v2-small-en` | 512 | English | 8192 |\n| `jinaai/jina-embeddings-v2-base-zh` | 768 | Chinese/English | 8192 |\n| `jinaai/jina-embeddings-v2-base-de` | 768 | German/English | 8192 |\n| `jinaai/jina-embeddings-v2-base-code` | 768 | 30 languages | 8192 |\n\n资料来源：[fastembed/text/pooled_normalized_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/pooled_normalized_embedding.py)\n\n### Sparse Embedding Models\n\nSparse embeddings provide interpretable vectors with non-zero values only at specific token positions.\n\n| Model | Type | Language | Description |\n|-------|------|----------|-------------|\n| `prithivida/Splade_PP_en_v1` | SPLADE++ | English | Sparse lexical + semantic |\n| `Qdrant/minicoil-v1` | MiniCOIL | English | Semantic + keyword match |\n| `Qdrant/bm25` | BM25 | Multilingual | Traditional BM25 ranking |\n\n资料来源：[fastembed/sparse/bm25.py](https://github.com/qdrant/fastembed/blob/main/fastembed/sparse/bm25.py)\n\nThe MiniCOIL model combines semantic understanding with exact keyword matching:\n\n```python\nclass MiniCOIL(SparseTextEmbeddingBase, OnnxTextModel[SparseEmbedding]):\n    \"\"\"\n    MiniCOIL is a sparse embedding model, that resolves semantic meaning of the words,\n    while keeping exact keyword match behavior.\n    \n    Each vocabulary token is converted into 4d component of a sparse vector, \n    which is then weighted by the token frequency in the corpus.\n    If the token is not found in the corpus, it is treated exactly like in BM25.\n    \"\"\"\n```\n\n资料来源：[fastembed/sparse/minicoil.py](https://github.com/qdrant/fastembed/blob/main/fastembed/sparse/minicoil.py)\n\n### Image Embedding Models\n\n| Model | Dimension | Type | License |\n|-------|-----------|------|---------|\n| `Qdrant/resnet50-onnx` | 2048 | Image | Apache-2.0 |\n| `Qdrant/Unicom-ViT-B-16` | 768 | Multimodal | Apache-2.0 |\n| `Qdrant/Unicom-ViT-B-32` | 512 | Multimodal | Apache-2.0 |\n| `jinaai/jina-clip-v1` | 768 | Multimodal | Apache-2.0 |\n\n资料来源：[fastembed/image/onnx_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/image/onnx_embedding.py)\n\n### Cross Encoder Reranking Models\n\n| Model | Description | License | Size |\n|-------|-------------|---------|------|\n| `Xenova/ms-marco-MiniLM-L-12-v2` | MS MARCO passage ranking | Apache-2.0 | 0.12 GB |\n| `BAAI/bge-reranker-base` | BGE reranker base | MIT | 1.04 GB |\n| `jinaai/jina-reranker-v1-tiny-en` | Fast reranking, 8K context | Apache-2.0 | 0.13 GB |\n| `jinaai/jina-reranker-v1-turbo-en` | Fast reranking, 8K context | Apache-2.0 | 0.15 GB |\n| `jinaai/jina-reranker-v2-base-multilingual` | Multilingual, 1K context | CC-BY-NC-4.0 | 1.11 GB |\n\n资料来源：[fastembed/rerank/cross_encoder/onnx_text_cross_encoder.py](https://github.com/qdrant/fastembed/blob/main/fastembed/rerank/cross_encoder/onnx_text_cross_encoder.py)\n\n## Usage Patterns\n\n### Text Embedding Generation\n\n```python\nfrom fastembed import TextEmbedding\n\n# Initialize with default model\nmodel = TextEmbedding(model_name=\"BAAI/bge-small-en-v1.5\")\n\n# Generate embeddings\ndocuments = [\n    \"passage: The capital of France is Paris\",\n    \"query: What is the capital of France?\"\n]\n\nembeddings = list(model.embed(documents))\n# Returns list of numpy arrays\n```\n\n资料来源：[README.md](https://github.com/qdrant/fastembed/blob/main/README.md)\n\n### Sparse Embedding with SPLADE++\n\n```python\nfrom fastembed import SparseTextEmbedding\n\nmodel = SparseTextEmbedding(model_name=\"prithivida/Splade_PP_en_v1\")\nembeddings = list(model.embed(documents))\n\n# Returns:\n# [\n#   SparseEmbedding(indices=[ 17, 123, 919, ... ], values=[0.71, 0.22, 0.39, ...]),\n#   SparseEmbedding(indices=[ 38,  12,  91, ... ], values=[0.11, 0.22, 0.39, ...])\n# ]\n```\n\n资料来源：[README.md](https://github.com/qdrant/fastembed/blob/main/README.md)\n\n### Custom Model Configuration\n\n```python\nfrom fastembed import TextEmbedding, PoolingType\nfrom fastembed.common import ModelSource\n\n# Use custom model with specific configuration\nmodel = TextEmbedding(\n    model_name=\"intfloat/multilingual-e5-small\",\n    pooler_type=PoolingType.MEAN,\n    normalization=True,\n    sources=ModelSource(hf=\"intfloat/multilingual-e5-small\"),\n    dim=384,\n    model_file=\"onnx/model.onnx\"\n)\n\nembeddings = list(model.embed(documents))\n```\n\n资料来源：[README.md](https://github.com/qdrant/fastembed/blob/main/README.md)\n\n### Cross Encoder Reranking\n\n```python\nfrom fastembed import TextCrossEncoder\n\nmodel = TextCrossEncoder(model_name=\"BAAI/bge-reranker-base\")\n\n# Score query-document pairs\nquery = \"What is the capital of France?\"\ndocuments = [\"Paris is the capital of France.\", \"Berlin is the capital of Germany.\"]\n\nscores = list(model.rerank(query, documents))\n```\n\n资料来源：[fastembed/rerank/cross_encoder/onnx_text_cross_encoder.py](https://github.com/qdrant/fastembed/blob/main/fastembed/rerank/cross_encoder/onnx_text_cross_encoder.py)\n\n## Post-Processing: MUVERA\n\nFastEmbed includes MUVERA (Multi-Vector Reduction Algorithm) for converting late-interaction embeddings (like ColBERT) to fixed-dimensional encodings.\n\n```python\nfrom fastembed import LateInteractionTextEmbedding\nfrom fastembed.postprocess import Muvera\n\nmodel = LateInteractionTextEmbedding(model_name=\"colbert-ir/colbertv2.0\")\nmuvera = Muvera.from_multivector_model(\n    model=model,\n    k_sim=6,\n    dim_proj=32\n)\n\n# Convert late-interaction embeddings to fixed dimension\nembeddings = np.array(list(model.embed([\"sample text\"])))\nfde = muvera.process_document(embeddings[0])\n```\n\n资料来源：[fastembed/postprocess/muvera.py](https://github.com/qdrant/fastembed/blob/main/fastembed/postprocess/muvera.py)\n\n## Model Caching\n\nFastEmbed automatically caches downloaded models to avoid repeated downloads.\n\n| Setting | Environment Variable | Default Location |\n|---------|---------------------|-------------------|\n| Cache Path | `FASTEMBED_CACHE_PATH` | System temp directory |\n\nModels are stored in ONNX format after download and converted to optimized formats on first use.\n\n## Prefix Requirements\n\nSome models require specific text prefixes for query and document inputs:\n\n| Prefix Requirement | Models | Example |\n|-------------------|--------|---------|\n| **Necessary** | E5, Nomic, Snowflake Arctic, BGE-small | Query: `query: ...`, Document: `passage: ...` |\n| **Not Necessary** | Jina, BGE (base/large), GTE | Plain text input |\n\n资料来源：[fastembed/text/onnx_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/onnx_embedding.py)\n\n## Quick Reference\n\n### Model Selection Guide\n\n| Use Case | Recommended Model |\n|----------|-------------------|\n| English semantic search | `BAAI/bge-small-en-v1.5` |\n| High-quality English | `BAAI/bge-large-en-v1.5` |\n| Multilingual (100+ languages) | `intfloat/multilingual-e5-large` |\n| Long documents (8K tokens) | `jinaai/jina-embeddings-v2-base-en` |\n| Fast re-ranking | `jinaai/jina-reranker-v1-tiny-en` |\n| Sparse lexical + semantic | `prithivida/Splade_PP_en_v1` |\n\n### API Quick Reference\n\n| Class | Import | Primary Method |\n|-------|--------|----------------|\n| Dense Text | `from fastembed import TextEmbedding` | `.embed(documents)` |\n| Sparse Text | `from fastembed import SparseTextEmbedding` | `.embed(documents)` |\n| Image | `from fastembed import ImageEmbedding` | `.embed(images)` |\n| Cross Encoder | `from fastembed import TextCrossEncoder` | `.rerank(query, documents)` |\n| Late Interaction | `from fastembed import LateInteractionTextEmbedding` | `.embed(documents)` |\n\n## Further Documentation\n\n- [FastEmbed GitHub Repository](https://github.com/qdrant/fastembed)\n- [Qdrant Documentation](https://qdrant.tech/documentation/)\n- [ONNX Runtime Documentation](https://onnxruntime.ai/docs/)\n\n---\n\n<a id='page-installation'></a>\n\n## Installation Guide\n\n### 相关页面\n\n相关主题：[Introduction to FastEmbed](#page-introduction), [GPU Support and Acceleration](#page-gpu-support)\n\n<details>\n<summary>Relevant Source Files</summary>\n\n以下源码文件用于生成本页说明：\n\n- [README.md](https://github.com/qdrant/fastembed/blob/main/README.md)\n- [RELEASE.md](https://github.com/qdrant/fastembed/blob/main/RELEASE.md)\n- [pyproject.toml](https://github.com/qdrant/fastembed/blob/main/pyproject.toml)\n- [fastembed/image/onnx_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/image/onnx_embedding.py)\n- [fastembed/text/onnx_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/onnx_embedding.py)\n- [fastembed/sparse/bm25.py](https://github.com/qdrant/fastembed/blob/main/fastembed/sparse/bm25.py)\n- [mkdocs.yml](https://github.com/qdrant/fastembed/blob/main/mkdocs.yml)\n</details>\n\n# Installation Guide\n\nFastEmbed is a lightweight, fast, and accurate embedding library developed by Qdrant. This guide covers all aspects of installing and configuring FastEmbed for various use cases including CPU inference, GPU acceleration, and integration with Qdrant vector database.\n\n## Prerequisites\n\n### System Requirements\n\n| Requirement | Minimum | Recommended |\n|-------------|---------|-------------|\n| Python Version | 3.9+ | 3.10+ |\n| RAM | 4 GB | 8 GB+ |\n| Disk Space | 2 GB | 5 GB+ |\n| GPU (Optional) | CUDA 11.8+ | CUDA 12.x |\n\nVerify your Python version before installation:\n\n```bash\npython --version\n```\n\n资料来源：[README.md:1-100]()\n\n## Package Variants\n\nFastEmbed is distributed in two package variants:\n\n| Package | Description | Use Case |\n|---------|-------------|----------|\n| `fastembed` | CPU-only version | General purpose embedding generation |\n| `fastembed-gpu` | GPU-accelerated version | High-throughput production workloads |\n\n### CPU Installation\n\nInstall the standard CPU version using pip:\n\n```bash\npip install fastembed\n```\n\n资料来源：[README.md]()\n资料来源：[RELEASE.md:1-15]()\n\n### GPU Installation\n\nFor CUDA-enabled GPU acceleration:\n\n```bash\npip install fastembed-gpu\n```\n\nThe GPU package automatically includes the CUDA Execution Provider for ONNX Runtime, enabling significantly faster inference on NVIDIA GPUs.\n\n资料来源：[RELEASE.md:1-15]()\n资料来源：[README.md](https://github.com/qdrant/fastembed/blob/main/README.md)\n\n### Installation Verification\n\nVerify successful installation:\n\n```python\nfrom fastembed import TextEmbedding\n\n# List available models\nprint(TextEmbedding.list_supported_models())\n\n# Initialize a model\nmodel = TextEmbedding(model_name=\"BAAI/bge-small-en-v1.5\")\nprint(\"FastEmbed installed successfully!\")\n```\n\n## Supported Embedding Models\n\nFastEmbed supports multiple embedding modalities organized into the following categories:\n\n### Dense Text Embeddings\n\nDense text embeddings provide fixed-dimensional vector representations for text. Supported models include:\n\n| Model | Dimension | Languages | Token Limit | License |\n|-------|-----------|-----------|-------------|---------|\n| `BAAI/bge-small-en-v1.5` | 384 | English | 512 | MIT |\n| `BAAI/bge-base-en-v1.5` | 768 | English | 512 | MIT |\n| `BAAI/bge-large-en-v1.5` | 1024 | English | 512 | MIT |\n| `jinaai/jina-embeddings-v2-base-en` | 768 | English | 8192 | Apache 2.0 |\n| `jinaai/jina-embeddings-v2-small-en` | 512 | English | 8192 | Apache 2.0 |\n| `sentence-transformers/all-MiniLM-L6-v2` | 384 | English | 256 | Apache 2.0 |\n| `mixedbread-ai/mxbai-embed-large-v1` | 1024 | English | 512 | Apache 2.0 |\n\n资料来源：[fastembed/text/onnx_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/onnx_embedding.py)\n\n### Multilingual Models\n\n| Model | Dimension | Languages | Token Limit | License |\n|-------|-----------|-----------|-------------|---------|\n| `intfloat/multilingual-e5-small` | 384 | ~100 | 512 | MIT |\n| `intfloat/multilingual-e5-large` | 1024 | ~100 | 512 | MIT |\n| `sentence-transformers/paraphrase-multilingual-mpnet-base-v2` | 768 | ~50 | 384 | Apache 2.0 |\n| `jinaai/jina-embeddings-v2-base-de` | 768 | German, English | 8192 | Apache 2.0 |\n| `jinaai/jina-embeddings-v2-base-zh` | 768 | Chinese, English | 8192 | Apache 2.0 |\n| `jinaai/jina-embeddings-v2-base-es` | 768 | Spanish, English | 8192 | Apache 2.0 |\n\n资料来源：[fastembed/text/pooled_normalized_embedding.py]()\n资料来源：[fastembed/text/pooled_embedding.py]()\n\n### Sparse Embeddings\n\nSparse embeddings represent text using high-dimensional sparse vectors, useful for keyword-based retrieval.\n\n| Model | Type | License |\n|-------|------|---------|\n| `prithivida/Splade_PP_en_v1` | SPLADE++ | Apache 2.0 |\n| `Qdrant/bm25` | BM25 | Apache 2.0 |\n\n资料来源：[fastembed/sparse/bm25.py]()\n资料来源：[README.md](https://github.com/qdrant/fastembed/blob/main/README.md)\n\n### Image Embeddings\n\n| Model | Dimension | Type | License |\n|-------|-----------|------|---------|\n| `Qdrant/resnet50-onnx` | 2048 | Image | Apache 2.0 |\n| `Qdrant/Unicom-ViT-B-16` | 768 | Multimodal | Apache 2.0 |\n| `Qdrant/Unicom-ViT-B-32` | 512 | Multimodal | Apache 2.0 |\n| `jinaai/jina-clip-v1` | 768 | Multimodal (text&image) | Apache 2.0 |\n\n资料来源：[fastembed/image/onnx_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/image/onnx_embedding.py)\n\n### Reranking Models\n\nCross-encoder models for re-ranking search results:\n\n| Model | Context Length | License |\n|-------|----------------|---------|\n| `Xenova/ms-marco-MiniLM-L-12-v2` | - | Apache 2.0 |\n| `BAAI/bge-reranker-base` | - | MIT |\n| `jinaai/jina-reranker-v1-tiny-en` | 8K | Apache 2.0 |\n| `jinaai/jina-reranker-v1-turbo-en` | 8K | Apache 2.0 |\n| `jinaai/jina-reranker-v2-base-multilingual` | 1K | CC-BY-NC-4.0 |\n\n资料来源：[fastembed/rerank/cross_encoder/onnx_text_cross_encoder.py]()\n\n## GPU Configuration\n\n### CUDA Provider Setup\n\nEnable GPU acceleration by specifying the CUDA execution provider:\n\n```python\nfrom fastembed import TextEmbedding\nfrom fastembed.common import OnnxProvider\n\nmodel = TextEmbedding(\n    model_name=\"BAAI/bge-small-en-v1.5\",\n    providers=[\"CUDAExecutionProvider\"]\n)\nprint(\"The model BAAI/bge-small-en-v1.5 is ready to use on a GPU.\")\n```\n\n资料来源：[README.md](https://github.com/qdrant/fastembed/blob/main/README.md)\n\n### GPU Installation Workflow\n\n```mermaid\ngraph TD\n    A[Install fastembed-gpu] --> B{Check CUDA Version}\n    B -->|CUDA 11.8+| C[Install Compatible Driver]\n    B -->|CUDA < 11.8| D[Upgrade CUDA]\n    C --> E[Verify ONNX Runtime GPU Support]\n    E --> F[Import TextEmbedding]\n    F --> G[Configure Providers]\n    G --> H[GPU Inference Ready]\n```\n\n## Qdrant Integration\n\nFastEmbed integrates seamlessly with the Qdrant vector database for production deployments.\n\n### Installation with Qdrant Client\n\n```bash\n# Standard Qdrant with FastEmbed\npip install qdrant-client[fastembed]\n\n# GPU-accelerated FastEmbed with Qdrant\npip install qdrant-client[fastembed-gpu]\n```\n\nOn zsh shells, use quotes:\n\n```bash\npip install 'qdrant-client[fastembed]'\n```\n\n资料来源：[README.md](https://github.com/qdrant/fastembed/blob/main/README.md)\n\n### Complete Qdrant Integration Example\n\n```python\nfrom qdrant_client import QdrantClient, models\n\n# Initialize the client\nclient = QdrantClient(\"localhost\", port=6333)  # For production\n# client = QdrantClient(\":memory:\")  # For experimentation\n\nmodel_name = \"sentence-transformers/all-MiniLM-L6-v2\"\npayload = [\n    {\"document\": \"Qdrant has Langchain integrations\", \"source\": \"Langchain-docs\"},\n    {\"document\": \"Qdrant also has Llama Index integrations\", \"source\": \"LlamaIndex-docs\"},\n]\ndocs = [models.Document(text=data[\"document\"], model=model_name) for data in payload]\nids = [42, 2]\n\nclient.create_collection(\n    \"demo_collection\",\n    vectors_config=models.VectorParams(\n        size=client.get_embedding_size(model_name), distance=models.Distance.COSINE\n    )\n)\n\nclient.upload_collection(\n    collection_name=\"demo_collection\",\n    vectors=docs,\n    ids=ids,\n    payload=payload,\n)\n\nsearch_result = client.query_points(\n    collection_name=\"demo_collection\",\n    query=docs[0],\n    limit=5,\n)\n```\n\n资料来源：[README.md](https://github.com/qdrant/fastembed/blob/main/README.md)\n\n## Cache Configuration\n\n### Default Cache Location\n\nModels are cached in `fastembed_cache` within the system's temp directory by default. This location can be customized using environment variables.\n\n### FASTEMBED_CACHE_PATH\n\nSet a custom cache directory:\n\n```bash\nexport FASTEMBED_CACHE_PATH=/path/to/custom/cache\n```\n\nThe cache directory structure follows Hugging Face's conventions, with models organized by their source repository.\n\n### Model Loading Behavior\n\n```mermaid\ngraph TD\n    A[Import FastEmbed Module] --> B{Model in Cache?}\n    B -->|Yes| C[Load from Cache]\n    B -->|No| D[Download from Source]\n    D --> E{HuggingFace Available?}\n    E -->|Yes| F[Download from HF Hub]\n    E -->|No| G[Use URL Source]\n    C --> H[Initialize ONNX Session]\n    E --> H\n    G --> H\n    H --> I[Model Ready for Inference]\n```\n\n## Usage Examples\n\n### Dense Text Embedding\n\n```python\nfrom fastembed import TextEmbedding\n\ndocuments = [\n    \"passage: FastEmbed is a fast embedding library\",\n    \"query: What is FastEmbed?\",\n    \"passage: Qdrant is a vector database\",\n]\n\nmodel = TextEmbedding(model_name=\"BAAI/bge-small-en-v1.5\")\nembeddings = list(model.embed(documents))\n\nprint(f\"Generated {len(embeddings)} embeddings\")\nprint(f\"Embedding dimension: {embeddings[0].shape}\")\n```\n\n### Sparse Text Embedding (SPLADE++)\n\n```python\nfrom fastembed import SparseTextEmbedding\n\nmodel = SparseTextEmbedding(model_name=\"prithivida/Splade_PP_en_v1\")\nembeddings = list(model.embed(documents))\n\n# Output format:\n# [\n#   SparseEmbedding(indices=[ 17, 123, 919, ... ], values=[0.71, 0.22, 0.39, ...]),\n#   SparseEmbedding(indices=[ 38,  12,  91, ... ], values=[0.11, 0.22, 0.39, ...])\n# ]\n```\n\n资料来源：[README.md](https://github.com/qdrant/fastembed/blob/main/README.md)\n\n### Image Embedding\n\n```python\nfrom fastembed import ImageEmbedding\nfrom PIL import Image\n\nmodel = ImageEmbedding(model_name=\"Qdrant/resnet50-onnx\")\nimages = [Image.open(\"path/to/image.jpg\")]\nembeddings = list(model.embed(images))\n```\n\n### Custom Model Source\n\nLoad a supported model with custom configuration:\n\n```python\nfrom fastembed import TextEmbedding\nfrom fastembed.common import ModelSource, PoolingType\n\nmodel = TextEmbedding(\n    model_name=\"custom-model\",\n    pool_type=PoolingType.MEAN,\n    normalization=True,\n    sources=ModelSource(hf=\"intfloat/multilingual-e5-small\"),\n    dim=384,\n    model_file=\"onnx/model.onnx\"\n)\n\nembeddings = list(model.embed(documents))\n```\n\n资料来源：[README.md](https://github.com/qdrant/fastembed/blob/main/README.md)\n\n## Late Interaction Models\n\nLate interaction models like ColBERT enable more sophisticated similarity matching:\n\n```python\nfrom fastembed import LateInteractionTextEmbedding\n\nmodel = LateInteractionTextEmbedding(model_name=\"colbert-ir/colbertv2.0\")\nembeddings = list(model.embed(documents))\n```\n\n## Post-Processing with Muvera\n\nThe Muvera post-processor enables Fixed Dimensional Encoding (FDE) for multi-vector models:\n\n```python\nfrom fastembed import LateInteractionTextEmbedding\nfrom fastembed.postprocess import Muvera\nimport numpy as np\n\nmodel = LateInteractionTextEmbedding(model_name=\"colbert-ir/colbertv2.0\")\nmuvera = Muvera.from_multivector_model(\n    model=model,\n    k_sim=6,\n    dim_proj=32\n)\n\nembeddings = np.array(list(model.embed([\"sample text\"])))\nfde = muvera.process_document(embeddings[0])\n```\n\n资料来源：[fastembed/postprocess/muvera.py]()\n\n## Troubleshooting\n\n### Common Installation Issues\n\n| Issue | Solution |\n|-------|----------|\n| `ModuleNotFoundError: No module named 'fastembed'` | Run `pip install fastembed` |\n| CUDA not available | Install `fastembed-gpu` and verify NVIDIA driver |\n| Model download fails | Check network connectivity and HuggingFace access |\n| Out of memory | Reduce batch size or use smaller model variant |\n\n### Checking Installed Version\n\n```python\nimport fastembed\nprint(fastembed.__version__)\n```\n\n### Verifying GPU Availability\n\n```python\nimport onnxruntime as ort\nprint(f\"Available providers: {ort.get_available_providers()}\")\n```\n\n## Advanced Configuration\n\n### Lazy Loading\n\nEnable lazy model loading for memory-efficient initialization:\n\n```python\nfrom fastembed import TextEmbedding\n\nmodel = TextEmbedding(\n    model_name=\"BAAI/bge-small-en-v1.5\",\n    lazy_load=True  # Model loads on first inference call\n)\n```\n\n### Thread Configuration\n\nOptimize CPU thread usage:\n\n```python\nfrom fastembed import TextEmbedding\n\nmodel = TextEmbedding(\n    model_name=\"BAAI/bge-small-en-v1.5\",\n    threads=4  # Limit to 4 threads\n)\n```\n\n### Device Selection\n\n```python\nfrom fastembed.common import Device\n\nmodel = TextEmbedding(\n    model_name=\"BAAI/bge-small-en-v1.5\",\n    cuda=True  # or Device.AUTO for automatic detection\n)\n```\n\n## Documentation Resources\n\nFor more information, refer to the official documentation:\n\n- [FastEmbed Documentation](https://qdrant.github.io/fastembed/)\n- [Qdrant GitHub Repository](https://github.com/qdrant/fastembed/)\n- [HuggingFace Model Hub](https://huggingface.co/Qdrant/fastembed)\n\n资料来源：[mkdocs.yml](https://github.com/qdrant/fastembed/blob/main/mkdocs.yml)\n\n---\n\n<a id='page-quickstart'></a>\n\n## Quick Start Guide\n\n### 相关页面\n\n相关主题：[Introduction to FastEmbed](#page-introduction), [Text Embedding Module](#page-text-embedding), [Image Embedding Module](#page-image-embedding)\n\n<details>\n<summary>Related Source Files</summary>\n\n以下源码文件用于生成本页说明：\n\n- [README.md](https://github.com/qdrant/fastembed/blob/main/README.md)\n- [fastembed/text/onnx_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/onnx_embedding.py)\n- [fastembed/text/pooled_normalized_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/pooled_normalized_embedding.py)\n- [fastembed/image/onnx_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/image/onnx_embedding.py)\n- [fastembed/sparse/minicoil.py](https://github.com/qdrant/fastembed/blob/main/fastembed/sparse/minicoil.py)\n- [fastembed/sparse/bm25.py](https://github.com/qdrant/fastembed/blob/main/fastembed/sparse/bm25.py)\n- [fastembed/rerank/cross_encoder/onnx_text_cross_encoder.py](https://github.com/qdrant/fastembed/blob/main/fastembed/rerank/cross_encoder/onnx_text_cross_encoder.py)\n</details>\n\n# Quick Start Guide\n\nFastEmbed is a lightweight, fast, and accurate embedding library developed by Qdrant. It provides text and image embeddings using ONNX-based models optimized for production deployment. This guide covers the essential steps to get started with FastEmbed for your embedding needs.\n\n## Overview\n\nFastEmbed enables developers to generate high-quality vector embeddings for text and images with minimal configuration. The library supports multiple embedding types including dense embeddings, sparse embeddings, and cross-encoder reranking models.\n\n```mermaid\ngraph TD\n    A[FastEmbed Library] --> B[Text Embeddings]\n    A --> C[Image Embeddings]\n    A --> D[Sparse Embeddings]\n    A --> E[Reranking Models]\n    \n    B --> B1[Dense Models]\n    B --> B2[Pooled Models]\n    \n    D --> D1[SPLADE++]\n    D --> D2[BM25]\n    D --> D3[MiniCOIL]\n```\n\n## Installation\n\nInstall FastEmbed using pip:\n\n```bash\npip install fastembed\n```\n\nFor CUDA acceleration support:\n\n```bash\npip install fastembed[cuda]\n```\n\n## Basic Text Embedding\n\n### Loading a Model\n\nImport and initialize a text embedding model:\n\n```python\nfrom fastembed import TextEmbedding\n\n# Use the default model (BAAI/bge-small-en-v1.5)\nmodel = TextEmbedding(model_name=\"BAAI/bge-small-en-v1.5\")\n```\n\n### Generating Embeddings\n\n```python\ndocuments = [\n    \"passage: A man is eating food.\",\n    \"passage: A man is eating a piece of broccoli.\",\n    \"passage: A man is eating pasta.\",\n    \"passage: A woman is cutting vegetables.\",\n]\n\nembeddings = list(model.embed(documents))\n```\n\nThe prefix `\"passage:\"` is required for some models like `BAAI/bge-small-en-v1.5` and `snowflake/snowflake-arctic-embed-xs` to indicate the text type. 资料来源：[README.md](https://github.com/qdrant/fastembed/blob/main/README.md)\n\n## Supported Models\n\n### Text Embedding Models\n\n| Model Name | Dimension | Languages | Max Tokens | License | Size (GB) |\n|------------|-----------|-----------|------------|---------|-----------|\n| BAAI/bge-small-en-v1.5 | 384 | English | 512 | MIT | 0.067 |\n| BAAI/bge-base-en-v1.5 | 768 | English | 512 | MIT | 0.21 |\n| BAAI/bge-large-en-v1.5 | 1024 | English | 512 | MIT | 1.20 |\n| intfloat/multilingual-e5-small | 384 | Multilingual (~100) | 512 | MIT | 0.09 |\n| sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 | 384 | Multilingual (~50) | 512 | Apache 2.0 | 0.22 |\n| jinaai/jina-embeddings-v2-base-en | 768 | English | 8192 | Apache 2.0 | 0.52 |\n| snowflake/snowflake-arctic-embed-xs | 384 | English | 512 | Apache 2.0 | 0.09 |\n| snowflake/snowflake-arctic-embed-m-long | 768 | English | 2048 | Apache 2.0 | 0.54 |\n\n资料来源：[fastembed/text/onnx_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/onnx_embedding.py)\n\n### Pooled Normalized Embeddings\n\nThe `PooledNormalizedEmbedding` class applies mean pooling to the model output and normalizes the result:\n\n```python\nfrom fastembed.text import PooledNormalizedEmbedding\n\nmodel = PooledNormalizedEmbedding(model_name=\"jinaai/jina-embeddings-v2-base-en\")\n```\n\nThe post-processing combines mean pooling with L2 normalization:\n\n```python\ndef _post_process_onnx_output(self, output: OnnxOutputContext, **kwargs: Any) -> Iterable[NumpyArray]:\n    embeddings = output.model_output\n    attn_mask = output.attention_mask\n    return normalize(self.mean_pooling(embeddings, attn_mask))\n```\n\n资料来源：[fastembed/text/pooled_normalized_embedding.py:78-84](https://github.com/qdrant/fastembed/blob/main/fastembed/text/pooled_normalized_embedding.py)\n\n## Image Embeddings\n\nFastEmbed supports multimodal models for image embeddings:\n\n```python\nfrom fastembed.image import OnnxImageEmbedding\n\nmodel = OnnxImageEmbedding(model_name=\"Qdrant/Unicom-ViT-B-16\")\n```\n\n### Supported Image Models\n\n| Model Name | Dimension | Type | License | Size (GB) |\n|------------|-----------|------|---------|-----------|\n| Qdrant/Unicom-ViT-B-16 | 768 | Multimodal (text&image) | Apache 2.0 | 0.82 |\n| Qdrant/Unicom-ViT-B-32 | 512 | Multimodal (text&image) | Apache 2.0 | 0.48 |\n| jinaai/jina-clip-v1 | 768 | Multimodal (text&image) | Apache 2.0 | 0.34 |\n\n资料来源：[fastembed/image/onnx_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/image/onnx_embedding.py)\n\n## Sparse Embeddings\n\nSparse embeddings provide interpretable, non-dense vector representations useful for keyword-aware semantic search.\n\n### SPLADE++\n\nSPLADE++ is a sparse embedding model that resolves semantic meaning while preserving keyword match behavior:\n\n```python\nfrom fastembed import SparseTextEmbedding\n\nmodel = SparseTextEmbedding(model_name=\"prithivida/Splade_PP_en_v1\")\nembeddings = list(model.embed(documents))\n```\n\nReturns sparse vectors with indices and values:\n\n```\n[\n  SparseEmbedding(indices=[17, 123, 919, ...], values=[0.71, 0.22, 0.39, ...]),\n  SparseEmbedding(indices=[38, 12, 91, ...], values=[0.11, 0.22, 0.39, ...])\n]\n```\n\n### BM25\n\nTraditional BM25 implemented as sparse embeddings:\n\n```python\nfrom fastembed.sparse import Bm25\n\nmodel = Bm25(language=\"en\")\n```\n\nBM25 formula:\n\n```\nscore(q, d) = SUM[ IDF(q_i) * (f(q_i, d) * (k + 1)) / (f(q_i, d) + k * (1 - b + b * (|d| / avg_len))) ]\n```\n\n资料来源：[fastembed/sparse/bm25.py:47-52](https://github.com/qdrant/fastembed/blob/main/fastembed/sparse/bm25.py)\n\n### MiniCOIL\n\nMiniCOIL combines semantic embeddings with exact keyword matching:\n\n```python\nfrom fastembed.sparse import MiniCOIL\n\nmodel = MiniCOIL(model_name=\"Qdrant/minicoil-v1\")\n```\n\n## Reranking Models\n\nCross-encoder reranking improves search results by re-scoring candidate documents:\n\n```python\nfrom fastembed import TextCrossEncoder\n\nmodel = TextCrossEncoder(model_name=\"jinaai/jina-reranker-v1-turbo-en\")\n```\n\n### Supported Reranker Models\n\n| Model Name | License | Size (GB) | Context Length |\n|------------|---------|-----------|----------------|\n| jinaai/jina-reranker-v1-turbo-en | Apache 2.0 | 0.15 | 1K |\n| jinaai/jina-reranker-v2-base-multilingual | CC BY-NC 4.0 | 1.11 | 1K (sliding window) |\n\n资料来源：[fastembed/rerank/cross_encoder/onnx_text_cross_encoder.py](https://github.com/qdrant/fastembed/blob/main/fastembed/rerank/cross_encoder/onnx_text_cross_encoder.py)\n\n## Common Configuration Options\n\n### Initialization Parameters\n\n| Parameter | Type | Default | Description |\n|-----------|------|---------|-------------|\n| `model_name` | str | \"BAAI/bge-small-en-v1.5\" | Name of the model to use |\n| `cache_dir` | str or None | None | Cache directory path |\n| `threads` | int or None | None | Number of threads for ONNX execution |\n| `providers` | Sequence[OnnxProvider] | None | ONNX execution providers (CPU, CUDA, etc.) |\n| `cuda` | bool or Device | Device.AUTO | Enable CUDA acceleration |\n| `device_ids` | list[int] | None | Specific GPU device IDs |\n| `lazy_load` | bool | False | Defer model loading until first use |\n\n资料来源：[fastembed/text/onnx_embedding.py:123-136](https://github.com/qdrant/fastembed/blob/main/fastembed/text/onnx_embedding.py)\n\n## Complete Example Workflow\n\n```mermaid\ngraph LR\n    A[Input Documents] --> B[TextEmbedding]\n    B --> C[Embedding Vectors]\n    C --> D[Vector Database]\n    D --> E[Similarity Search]\n    E --> F[Candidate Results]\n    F --> G[TextCrossEncoder]\n    G --> H[Reranked Results]\n```\n\n```python\nfrom fastembed import TextEmbedding, SparseTextEmbedding, TextCrossEncoder\n\n# 1. Load models\ntext_model = TextEmbedding(model_name=\"BAAI/bge-small-en-v1.5\")\nsparse_model = SparseTextEmbedding(model_name=\"prithivida/Splade_PP_en_v1\")\nreranker = TextCrossEncoder(model_name=\"jinaai/jina-reranker-v1-turbo-en\")\n\n# 2. Generate dense embeddings\ndocuments = [\"query: fastembed is fast\", \"passage: FastEmbed provides efficient embeddings\"]\ndense_embeddings = list(text_model.embed(documents))\n\n# 3. Generate sparse embeddings\nsparse_embeddings = list(sparse_model.embed(documents))\n\n# 4. Rerank results\nquery = \"query: What is FastEmbed?\"\nresults = reranker.rerank(query=query, documents=documents, top_k=2)\n```\n\n## Language-Specific Models\n\nFor non-English content, use multilingual models:\n\n| Language | Recommended Model |\n|----------|-------------------|\n| Chinese | `BAAI/bge-small-zh-v1.5`, `jinaai/jina-embeddings-v2-base-zh` |\n| German | `jinaai/jina-embeddings-v2-base-de` |\n| Spanish | `jinaai/jina-embeddings-v2-base-es` |\n| Code | `jinaai/jina-embeddings-v2-base-code` |\n| Multilingual | `intfloat/multilingual-e5-small`, `sentence-transformers/paraphrase-multilingual-mpnet-base-v2` |\n\n## Prefix Requirements\n\nSome models require specific prefixes to distinguish query and passage text:\n\n| Model | Prefix Required | Example |\n|-------|-----------------|---------|\n| BAAI/bge-small-en-v1.5 | Yes | `\"query: ...\"`, `\"passage: ...\"` |\n| intfloat/multilingual-e5-small | Yes | `\"query: ...\"`, `\"passage: ...\"` |\n| snowflake/snowflake-arctic-embed-xs | Yes | `\"query: ...\"`, `\"passage: ...\"` |\n| jinaai/jina-embeddings-v2-base-en | Not necessary | Plain text |\n\n资料来源：[fastembed/text/onnx_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/onnx_embedding.py)\n\n## Next Steps\n\n- Explore the [API Reference](../api_reference/) for detailed method documentation\n- Learn about [Model Selection](../models/) to choose the right embedding model\n- Read [Advanced Usage](../guides/advanced_usage/) for performance optimization\n- Check [Examples](../examples/) for real-world use cases\n\n---\n\n<a id='page-architecture'></a>\n\n## System Architecture\n\n### 相关页面\n\n相关主题：[Introduction to FastEmbed](#page-introduction), [ONNX Model Infrastructure](#page-onnx-model)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [fastembed/text/text_embedding_base.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/text_embedding_base.py)\n- [fastembed/image/image_embedding_base.py](https://github.com/qdrant/fastembed/blob/main/fastembed/image/image_embedding_base.py)\n- [fastembed/common/onnx_model.py](https://github.com/qdrant/fastembed/blob/main/fastembed/common/onnx_model.py)\n- [fastembed/common/model_management.py](https://github.com/qdrant/fastembed/blob/main/fastembed/common/model_management.py)\n- [fastembed/parallel_processor.py](https://github.com/qdrant/fastembed/blob/main/fastembed/parallel_processor.py)\n</details>\n\n# System Architecture\n\nFastEmbed is a lightweight, fast text and image embedding library built on ONNX Runtime. The architecture is designed around modularity, enabling multiple embedding types (dense, sparse, late-interaction) through a unified interface while leveraging ONNX for cross-platform inference optimization.\n\n## Architecture Overview\n\nFastEmbed follows a layered architecture with clear separation of concerns:\n\n```mermaid\ngraph TD\n    subgraph \"Public API Layer\"\n        A[\"TextEmbedding<br/>ImageEmbedding<br/>SparseTextEmbedding<br/>LateInteractionTextEmbedding\"]\n    end\n    \n    subgraph \"Embedding Base Classes\"\n        B[\"TextEmbeddingBase\"]\n        C[\"ImageEmbeddingBase\"]\n        D[\"SparseTextEmbeddingBase\"]\n        E[\"LateInteractionTextEmbeddingBase\"]\n    end\n    \n    subgraph \"ONNX Abstraction Layer\"\n        F[\"OnnxTextModel\"]\n        G[\"OnnxImageModel\"]\n    end\n    \n    subgraph \"Model Management\"\n        H[\"ModelManagement\"]\n        I[\"OnnxModel\"]\n    end\n    \n    subgraph \"Parallel Processing\"\n        J[\"ParallelProcessor\"]\n        K[\"EmbeddingWorker<br/>OnnxTextEmbeddingWorker\"]\n    end\n    \n    subgraph \"ONNX Runtime\"\n        L[\"InferenceSession\"]\n    end\n    \n    A --> B\n    A --> C\n    A --> D\n    A --> E\n    B --> F\n    C --> G\n    F --> I\n    G --> I\n    I --> L\n    H --> I\n    J --> K\n    K --> F\n```\n\n## Core Components\n\n### Base Class Hierarchy\n\nFastEmbed implements embedding models through inheritance hierarchies that separate concerns between the embedding interface and the ONNX runtime integration.\n\n#### Text Embedding Base\n\nThe `TextEmbeddingBase` class provides the foundation for all text embedding models. It defines the abstract interface that concrete implementations must follow.\n\n| Method | Purpose | Source |\n|--------|---------|--------|\n| `embed()` | Generate embeddings for input documents | [text_embedding_base.py]() |\n| `_list_supported_models()` | Return list of supported model descriptions | [text_embedding_base.py]() |\n| `_post_process_onnx_output()` | Transform raw ONNX output to final embeddings | [text_embedding_base.py]() |\n\n#### Image Embedding Base\n\nThe `ImageEmbeddingBase` class mirrors the text embedding architecture for image inputs, supporting multimodal models like Jina CLIP.\n\n| Property | Type | Description |\n|----------|------|-------------|\n| `model_name` | str | HuggingFace model identifier |\n| `cache_dir` | str \\| None | Local cache directory for model files |\n| `lazy_load` | bool | Defer model loading until first use |\n\n### ONNX Model Abstraction\n\nThe `OnnxModel` class serves as the bridge between the embedding interface and ONNX Runtime execution.\n\n```mermaid\nclassDiagram\n    class OnnxModel~T~ {\n        +str model_name\n        +str cache_dir\n        +InferenceSession inference_session\n        +load_model() Any\n        +run(input_feed)~T~\n    }\n    \n    class OnnxTextModel~T~ {\n        +mean_pooling(output, attention_mask)\n        +encode(input_data) Iterable~T~\n    }\n    \n    class OnnxImageModel~T~ {\n        +preprocess_image(image) Tensor\n        +encode(images) Iterable~T~\n    }\n    \n    OnnxModel <|-- OnnxTextModel\n    OnnxModel <|-- OnnxImageModel\n```\n\nThe ONNX model abstraction provides:\n\n1. **Model Loading**: Downloads and caches ONNX models from HuggingFace or custom URLs\n2. **Session Management**: Creates and configures ONNX Runtime inference sessions with specified providers (CPU, CUDA, TensorRT)\n3. **Input/Output Handling**: Manages input preprocessing and output postprocessing\n\n## Embedding Types\n\nFastEmbed supports multiple embedding paradigms through specialized classes.\n\n### Dense Embeddings\n\nDense embeddings convert inputs into fixed-dimensional continuous vectors. The `OnnxTextEmbedding` class provides dense text embeddings using models like BGE, GTE, and Jina embeddings.\n\n```mermaid\ngraph LR\n    A[\"Input Text\"] --> B[\"Tokenization\"]\n    B --> C[\"ONNX Inference\"]\n    C --> D[\"mean_pooling\"]\n    D --> E[\"L2 Normalization\"]\n    E --> F[\"Dense Vector\"]\n```\n\nKey supported models:\n\n| Model | Dimension | Context Length | Prefix Required |\n|-------|-----------|----------------|------------------|\n| BAAI/bge-small-en-v1.5 | 384 | 512 | No |\n| BAAI/bge-base-en-v1.5 | 768 | 512 | No |\n| BAAI/bge-large-en-v1.5 | 1024 | 512 | No |\n| jinaai/jina-embeddings-v2-base-en | 768 | 8192 | No |\n| intfloat/multilingual-e5-small | 384 | 512 | Yes |\n\n### Pooled Normalized Embeddings\n\nThe `PooledNormalizedEmbedding` class extends `PooledEmbedding` with built-in mean pooling and L2 normalization.\n\n```python\n# From pooled_normalized_embedding.py\nclass PooledNormalizedEmbedding(PooledEmbedding):\n    def _post_process_onnx_output(\n        self, output: OnnxOutputContext, **kwargs: Any\n    ) -> Iterable[NumpyArray]:\n        if output.attention_mask is None:\n            raise ValueError(\"attention_mask must be provided for document post-processing\")\n        \n        embeddings = output.model_output\n        attn_mask = output.attention_mask\n        return normalize(self.mean_pooling(embeddings, attn_mask))\n```\n\n### Sparse Embeddings\n\nSparse embeddings represent documents as sparse vectors with non-zero values only at relevant token positions. FastEmbed provides two sparse embedding approaches:\n\n#### BM25\n\nBM25 is implemented as a traditional sparse embedding model with IDF weighting. The formula used:\n\n```\nscore(q, d) = Σ[ IDF(q_i) * (f(q_i, d) * (k + 1)) / (f(q_i, d) + k * (1 - b + b * (|d| / avg_len))) ]\n```\n\nWhere:\n- `IDF(q_i)` is the inverse document frequency\n- `f(q_i, d)` is the term frequency\n- `k`, `b` are hyperparameters controlling saturation and length normalization\n\n#### SPLADE++\n\nSPLADE++ models use neural networks to generate sparse embeddings that combine semantic understanding with exact keyword matching.\n\n### Late Interaction Embeddings\n\nLate interaction models like ColBERT generate multiple token-level embeddings that can be compared efficiently during retrieval. These are often combined with postprocessors like MUVERA for fixed-dimensional encoding.\n\n## Model Management\n\n### Model Source Configuration\n\nModels can be loaded from multiple sources defined through the `ModelSource` class:\n\n```python\nsources=ModelSource(\n    hf=\"jinaai/jina-embeddings-v2-base-en\",      # HuggingFace Hub\n    url=\"https://storage.googleapis.com/...\",      # Direct URL download\n    _deprecated_tar_struct=True                    # Legacy tar format\n)\n```\n\n### Cache Strategy\n\nThe `ModelManagement` class handles model caching and lazy loading:\n\n1. **Cache Directory**: Configurable via `FASTEMBED_CACHE_PATH` environment variable or `cache_dir` parameter\n2. **Lazy Loading**: Models are not loaded until first inference when `lazy_load=True`\n3. **Provider Selection**: Automatic provider selection (CUDA > CPU) with fallback\n\n```mermaid\ngraph TD\n    A[\"Model Request\"] --> B{\"Cache Hit?\"}\n    B -->|Yes| C[\"Load from Cache\"]\n    B -->|No| D[\"Download Model\"]\n    D --> E[\"Store in Cache\"]\n    C --> F[\"Initialize ONNX Session\"]\n    E --> F\n    F --> G[\"Ready for Inference\"]\n```\n\n## Parallel Processing\n\nFastEmbed uses a worker-based parallel processing architecture for efficient batch inference.\n\n### ParallelProcessor\n\nThe `ParallelProcessor` class manages a pool of workers for parallel embedding generation:\n\n```mermaid\ngraph TD\n    subgraph \"Main Process\"\n        A[\"ParallelProcessor\"]\n        B[\"Input Queue\"]\n    end\n    \n    subgraph \"Worker Pool\"\n        C[\"Worker 1\"]\n        D[\"Worker 2\"]\n        E[\"Worker N\"]\n    end\n    \n    B --> C\n    B --> D\n    B --> E\n    \n    C --> F[\"Result Queue\"]\n    D --> F\n    E --> F\n```\n\n### Worker Classes\n\nWorkers inherit from `EmbeddingWorker` or specialized variants:\n\n| Worker Class | Purpose |\n|--------------|---------|\n| `EmbeddingWorker` | Base worker for generic embeddings |\n| `OnnxTextEmbeddingWorker` | Text embedding with ONNX inference |\n| `PooledNormalizedEmbeddingWorker` | Worker with pooling and normalization |\n\nWorkers implement the `init_embedding()` method to initialize the embedding model:\n\n```python\ndef init_embedding(\n    self,\n    model_name: str,\n    cache_dir: str | None = None,\n    threads: int | None = None,\n    providers: Sequence[OnnxProvider] | None = None,\n    cuda: bool | Device = Device.AUTO,\n    device_ids: list[int] | None = None,\n    lazy_load: bool = False,\n    device_id: int | None = None,\n    specific_model_path: str | None = None,\n    **kwargs: Any,\n):\n```\n\n## Cross-Encoder Reranking\n\nThe cross-encoder reranking system follows a separate architectural path:\n\n```mermaid\ngraph LR\n    A[\"Query\"] --> D[\"Cross-Encoder\"]\n    B[\"Candidate Doc\"] --> D\n    D --> E[\"Relevance Scores\"]\n```\n\nThe `OnnxTextCrossEncoder` class provides reranking through:\n\n```python\nclass OnnxTextCrossEncoder(TextCrossEncoderBase, OnnxCrossEncoderModel):\n    @classmethod\n    def _list_supported_models(cls) -> list[BaseModelDescription]:\n        return supported_onnx_models\n```\n\nSupported reranker models:\n\n| Model | Context Length | Languages |\n|-------|----------------|-----------|\n| jinaai/jina-reranker-v1-turbo-en | 1K | English |\n| jinaai/jina-reranker-v2-base-multilingual | 1K (sliding window) | Multilingual |\n\n## Device and Provider Management\n\nFastEmbed supports multiple execution providers with automatic device selection:\n\n```python\n# Device selection logic (simplified)\nif cuda and Device.CUDA available:\n    use CUDA provider\nelif cuda and Device.ONNX_CPU fallback:\n    use CPU provider\nelse:\n    use default provider\n```\n\n### Supported Providers\n\n| Provider | Device | Use Case |\n|----------|--------|----------|\n| CPUExecutionProvider | CPU | General purpose, no GPU |\n| CUDAExecutionProvider | NVIDIA GPU | Fast inference with CUDA |\n| TensorRTExecutionProvider | NVIDIA GPU | Optimized batch inference |\n\n## Configuration Options\n\n### Model Initialization Parameters\n\n| Parameter | Type | Default | Description |\n|-----------|------|---------|-------------|\n| `model_name` | str | \"BAAI/bge-small-en-v1.5\" | Model identifier |\n| `cache_dir` | str \\| None | None | Cache directory path |\n| `threads` | int \\| None | None | Thread count for CPU execution |\n| `providers` | Sequence[OnnxProvider] \\| None | None | ONNX execution providers |\n| `cuda` | bool \\| Device | Device.AUTO | CUDA device selection |\n| `device_ids` | list[int] \\| None | None | Specific GPU device IDs |\n| `lazy_load` | bool | False | Defer loading until first use |\n| `device_id` | int \\| None | None | Single device ID assignment |\n| `specific_model_path` | str \\| None | None | Override model file path |\n\n## Data Flow\n\n### Text Embedding Pipeline\n\n```mermaid\nsequenceDiagram\n    participant Client\n    participant TextEmbedding\n    participant OnnxTextModel\n    participant ONNXRuntime\n    \n    Client->>TextEmbedding: embed(documents)\n    TextEmbedding->>TextEmbedding: preprocess(documents)\n    TextEmbedding->>OnnxTextModel: encode(preprocessed)\n    OnnxTextModel->>ONNXRuntime: run(input_feed)\n    ONNXRuntime-->>OnnxTextModel: raw_output\n    OnnxTextModel->>OnnxTextModel: mean_pooling + normalize\n    OnnxTextModel-->>TextEmbedding: embeddings\n    TextEmbedding-->>Client: Iterable[NDArray]\n```\n\n### Model Description Schema\n\nEach model is described by a `DenseModelDescription` or `BaseModelDescription`:\n\n```python\nDenseModelDescription(\n    model=\"BAAI/bge-small-en-v1.5\",\n    dim=384,\n    description=\"Text embeddings, Unimodal (text), English, 512 input tokens truncation\",\n    license=\"mit\",\n    size_in_GB=0.067,\n    sources=ModelSource(hf=\"qdrant/bge-small-en-v1.5-onnx-q\"),\n    model_file=\"model_optimized.onnx\",\n)\n```\n\n| Field | Type | Description |\n|-------|------|-------------|\n| `model` | str | HuggingFace model identifier |\n| `dim` | int | Embedding dimension |\n| `description` | str | Human-readable model description |\n| `license` | str | Model license |\n| `size_in_GB` | float | Model file size |\n| `sources` | ModelSource | Download sources configuration |\n| `model_file` | str | ONNX model file name |\n| `additional_files` | list[str] | Extra files (vocab, configs) |\n| `requires_idf` | bool | IDF file requirement for sparse models |\n\n## Post-Processing\n\n### MUVERA Post-Processor\n\nThe `Muvera` post-processor converts late-interaction (multi-vector) embeddings to fixed-dimensional representations:\n\n```mermaid\ngraph TD\n    A[\"Multi-Vector Embeddings\"] --> B[\"Muvera Processing\"]\n    B --> C[\"Fixed-Dimensional Encoding\"]\n    C --> D[\"L2 Normalized FDE\"]\n    \n    subgraph \"Parameters\"\n        E[\"k_sim: number of partitions\"]\n        F[\"dim_proj: projection dimension\"]\n        G[\"r_reps: repetitions\"]\n    end\n```\n\nOutput dimension calculation: `r_reps * 2^k_sim * dim_proj`\n\n## Summary\n\nThe FastEmbed architecture demonstrates a well-structured approach to embedding generation:\n\n- **Modularity**: Clear separation between embedding types, ONNX abstraction, and model management\n- **Performance**: ONNX Runtime integration with automatic provider selection\n- **Flexibility**: Support for dense, sparse, and late-interaction embeddings\n- **Extensibility**: Worker-based parallel processing with lazy loading support\n- **Portability**: Cross-platform ONNX execution with CUDA and TensorRT acceleration\n\n---\n\n<a id='page-text-embedding'></a>\n\n## Text Embedding Module\n\n### 相关页面\n\n相关主题：[System Architecture](#page-architecture), [GPU Support and Acceleration](#page-gpu-support)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [fastembed/text/onnx_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/onnx_embedding.py)\n- [fastembed/text/text_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/text_embedding.py)\n- [fastembed/text/onnx_text_model.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/onnx_text_model.py)\n- [fastembed/text/pooled_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/pooled_embedding.py)\n- [fastembed/text/clip_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/clip_embedding.py)\n- [fastembed/text/pooled_normalized_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/pooled_normalized_embedding.py)\n</details>\n\n# Text Embedding Module\n\nThe Text Embedding Module in FastEmbed provides high-performance text vectorization capabilities using ONNX runtime for efficient inference. It supports dense embeddings, pooled embeddings, normalized embeddings, and multimodal (CLIP) text embeddings.\n\n## Architecture Overview\n\nThe module follows a layered architecture with base classes providing common functionality and specialized implementations for different embedding strategies.\n\n```mermaid\ngraph TD\n    Base[TextEmbeddingBase] --> OnnxTextEmbedding\n    Base[TextEmbeddingBase] --> PooledEmbedding\n    PooledEmbedding --> PooledNormalizedEmbedding\n    OnnxTextEmbedding --> CLIPOnnxEmbedding\n    OnnxTextEmbedding --> PooledEmbedding\n    \n    Worker[OnnxTextEmbeddingWorker] -.->|init_embedding| OnnxTextEmbedding\n    CLIPWorker[CLIPEmbeddingWorker] -.->|init_embedding| CLIPOnnxEmbedding\n    \n    Models[Supported Models] -->|BGE, Jina, GTE, etc| OnnxTextEmbedding\n```\n\n### Core Components\n\n| Component | File | Purpose |\n|-----------|------|---------|\n| `TextEmbeddingBase` | `text_embedding.py` | Abstract base class defining the embedding interface |\n| `OnnxTextEmbedding` | `onnx_embedding.py` | Main ONNX-based text embedding implementation |\n| `PooledEmbedding` | `pooled_embedding.py` | Mean pooling variant for document embeddings |\n| `PooledNormalizedEmbedding` | `pooled_normalized_embedding.py` | L2-normalized pooled embeddings |\n| `CLIPOnnxEmbedding` | `clip_embedding.py` | CLIP-based multimodal text embeddings |\n\n## Supported Models\n\nFastEmbed's Text Embedding Module supports numerous pre-trained models across different languages and use cases.\n\n### Model Categories\n\n| Category | Models | Dim | License | Description |\n|----------|--------|-----|---------|-------------|\n| BGE (Base) | `BAAI/bge-base-en-v1.5` | 768 | MIT | English text, 512 tokens |\n| BGE Large | `BAAI/bge-large-en-v1.5` | 1024 | MIT | High-quality English embeddings |\n| BGE Small | `BAAI/bge-small-en-v1.5` | 384 | MIT | Lightweight English embeddings |\n| Jina | `jinaai/jina-embeddings-v2-base-en` | 768 | Apache 2.0 | English, 8192 tokens |\n| Jina Code | `jinaai/jina-embeddings-v2-base-code` | 768 | Apache 2.0 | 30 programming languages |\n| GTE | `thenlper/gte-base` | 768 | MIT | General text embeddings |\n| Snowflake Arctic | `snowflake/snowflake-arctic-embed-m` | 768 | Apache 2.0 | Query-optimized embeddings |\n| Multilingual E5 | `intfloat/multilingual-e5-large` | 1024 | MIT | 100 languages support |\n\n### Model Selection Criteria\n\nDifferent models have varying requirements for query/document prefixes:\n\n```python\n# Models requiring prefixes\nprefix_required = [\"intfloat/multilingual-e5-small\", \"intfloat/multilingual-e5-large\"]\n\n# Models not requiring prefixes  \nprefix_not_required = [\"BAAI/bge-base-en-v1.5\", \"jinaai/jina-embeddings-v2-base-en\"]\n```\n\n资料来源：[fastembed/text/onnx_embedding.py:1-200]()\n\n## Class Hierarchy\n\n### TextEmbeddingBase\n\nThe abstract base class defining the contract for all text embedding implementations.\n\n```python\nclass TextEmbeddingBase(EmbeddingModel[list[float]]):\n    @classmethod\n    def _list_supported_models(cls) -> list[DenseModelDescription]:\n        ...\n    \n    def embed(self, documents: Iterable[str]) -> Iterable[list[float]]:\n        ...\n```\n\n资料来源：[fastembed/text/text_embedding.py]()\n\n### OnnxTextEmbedding\n\nThe primary implementation class that leverages ONNX runtime for inference.\n\n```python\nclass OnnxTextEmbedding(TextEmbeddingBase, OnnxTextModel[NumpyArray]):\n    def __init__(\n        self,\n        model_name: str = \"BAAI/bge-small-en-v1.5\",\n        cache_dir: str | None = None,\n        threads: int | None = None,\n        providers: Sequence[OnnxProvider] | None = None,\n        cuda: bool | Device = Device.AUTO,\n        device_ids: list[int] | None = None,\n        lazy_load: bool = False,\n        device_id: int | None = None,\n        specific_model_path: str | None = None,\n        **kwargs: Any,\n    )\n```\n\n#### Constructor Parameters\n\n| Parameter | Type | Default | Description |\n|-----------|------|---------|-------------|\n| `model_name` | `str` | `\"BAAI/bge-small-en-v1.5\"` | Name of the model to use |\n| `cache_dir` | `str \\| None` | `None` | Cache directory path |\n| `threads` | `int \\| None` | `None` | Number of threads for ONNX |\n| `providers` | `Sequence[OnnxProvider]` | `None` | ONNX execution providers |\n| `cuda` | `bool \\| Device` | `Device.AUTO` | CUDA acceleration |\n| `device_ids` | `list[int]` | `None` | GPU device IDs |\n| `lazy_load` | `bool` | `False` | Load model on first use |\n\n资料来源：[fastembed/text/onnx_embedding.py:200-250]()\n\n## Pooling Strategies\n\nThe module implements multiple pooling strategies to aggregate token-level embeddings into sentence-level embeddings.\n\n### Mean Pooling\n\nMean pooling computes the average of all token embeddings weighted by the attention mask.\n\n```python\ndef mean_pooling(self, embeddings: NumpyArray, attention_mask: NumpyArray) -> NumpyArray:\n    # Expand attention mask to broadcast\n    attention_mask_expanded = np.expand_dims(attention_mask, -1)\n    # Sum embeddings where mask is active\n    sum_embeddings = np.sum(embeddings * attention_mask_expanded, axis=1)\n    # Count valid tokens\n    counts = np.sum(attention_mask_expanded, axis=1)\n    # Return mean\n    return sum_embeddings / counts\n```\n\n### PooledEmbedding\n\nApplies mean pooling after ONNX inference to generate document embeddings.\n\n```python\nclass PooledEmbedding(OnnxTextEmbedding):\n    def _post_process_onnx_output(\n        self, output: OnnxOutputContext, **kwargs: Any\n    ) -> Iterable[NumpyArray]:\n        embeddings = output.model_output\n        attn_mask = output.attention_mask\n        return self.mean_pooling(embeddings, attn_mask)\n```\n\n### PooledNormalizedEmbedding\n\nExtends pooling with L2 normalization for cosine similarity optimization.\n\n```python\nclass PooledNormalizedEmbedding(PooledEmbedding):\n    def _post_process_onnx_output(\n        self, output: OnnxOutputContext, **kwargs: Any\n    ) -> Iterable[NumpyArray]:\n        embeddings = output.model_output\n        attn_mask = output.attention_mask\n        return normalize(self.mean_pooling(embeddings, attn_mask))\n```\n\n资料来源：[fastembed/text/pooled_embedding.py]()\n资料来源：[fastembed/text/pooled_normalized_embedding.py]()\n\n## CLIP Text Embeddings\n\nThe CLIP embedding implementation provides multimodal text/image embedding capabilities.\n\n### Supported CLIP Models\n\n| Model | Dimension | License | Description |\n|-------|-----------|---------|-------------|\n| `Qdrant/clip-ViT-B-32-text` | 512 | MIT | CLIP ViT-B/32 text encoder |\n| `jinaai/jina-clip-v1` | 768 | Apache 2.0 | Jina CLIP multimodal |\n\n### CLIPOnnxEmbedding\n\n```python\nclass CLIPOnnxEmbedding(OnnxTextEmbedding):\n    def _post_process_onnx_output(\n        self, output: OnnxOutputContext, **kwargs: Any\n    ) -> Iterable[NumpyArray]:\n        return output.model_output  # Direct passthrough, no pooling\n```\n\n资料来源：[fastembed/text/clip_embedding.py]()\n\n## Inference Workflow\n\n```mermaid\ngraph LR\n    A[Input Text] --> B[Tokenization]\n    B --> C[ONNX Inference]\n    C --> D{Embedding Type?}\n    D -->|Standard| E[Direct Output]\n    D -->|Pooled| F[Mean Pooling]\n    D -->|Pooled Norm| G[Mean Pooling + Normalize]\n    E --> H[Final Embeddings]\n    F --> H\n    G --> H\n```\n\n## Usage Examples\n\n### Basic Dense Embedding\n\n```python\nfrom fastembed import TextEmbedding\n\nmodel = TextEmbedding(model_name=\"BAAI/bge-base-en-v1.5\")\ndocuments = [\n    \"The quick brown fox jumps over the lazy dog\",\n    \"A journey of a thousand miles begins with a single step\"\n]\n\nembeddings = list(model.embed(documents))\n# Returns: list of 768-dimensional embedding vectors\n```\n\n### Pooled Normalized Embedding\n\n```python\nfrom fastembed import PooledNormalizedEmbedding\n\nmodel = PooledNormalizedEmbedding(model_name=\"BAAI/bge-base-en-v1.5\")\nembeddings = list(model.embed(documents))\n# Returns: L2-normalized pooled embeddings\n```\n\n### With Custom ONNX Providers\n\n```python\nfrom fastembed import TextEmbedding\nfrom fastembed.common import OnnxProvider\n\nmodel = TextEmbedding(\n    model_name=\"BAAI/bge-large-en-v1.5\",\n    providers=[OnnxProvider.CPUExecutionProvider],\n    threads=8\n)\n```\n\n### Multilingual E5 with Prefix\n\n```python\nfrom fastembed import TextEmbedding\n\nmodel = TextEmbedding(model_name=\"intfloat/multilingual-e5-small\")\n\n# E5 models require query prefix\nquery_text = \"query: \" + user_query\ndocument_text = \"passage: \" + document_text\n\nquery_embedding = list(model.embed([query_text]))\ndoc_embedding = list(model.embed([document_text]))\n```\n\n## Model Sources Configuration\n\nModels can be loaded from multiple sources:\n\n```python\nfrom fastembed.common import ModelSource\n\nsources=ModelSource(\n    hf=\"xenova/jina-embeddings-v2-base-en\",      # HuggingFace Hub\n    url=\"https://storage.googleapis.com/...\",     # Direct URL\n    _deprecated_tar_struct=True                   # Legacy format\n)\n```\n\n| Source Type | Priority | Description |\n|-------------|----------|-------------|\n| `hf` | Primary | HuggingFace Hub repository |\n| `url` | Fallback | Direct download URL |\n| Local cache | Cached | Previously downloaded files |\n\n## Post-Processing Pipeline\n\n```mermaid\ngraph TD\n    subgraph \"ONNX Output\"\n        A[model_output] --> B[attention_mask]\n    end\n    \n    subgraph \"Post-Processing\"\n        B --> C{Masking Required?}\n        A --> C\n        C -->|Yes| D[Apply Attention Mask]\n        D --> E{Mormalization?}\n        C -->|No| E\n        E -->|Yes| F[L2 Normalize]\n        E -->|No| G[Return Raw]\n        F --> H[Final Output]\n        G --> H\n    end\n```\n\n## Configuration Constants\n\n| Parameter | Default | Description |\n|-----------|---------|-------------|\n| `default_model` | `\"BAAI/bge-small-en-v1.5\"` | Fallback model |\n| `pooling_type` | `PoolingType.MEAN` | Pooling strategy |\n| `normalization` | `True` | L2 normalization flag |\n\n资料来源：[fastembed/text/onnx_text_model.py]()\n\n## Integration with Qdrant\n\nFastEmbed text embeddings are designed for seamless integration with Qdrant vector database:\n\n```python\nfrom fastembed import TextEmbedding\nimport qdrant_client\n\nmodel = TextEmbedding(model_name=\"BAAI/bge-base-en-v1.5\")\nembeddings = list(model.embed(documents))\n\n# Upload to Qdrant\nclient = qdrant_client.QdrantClient()\nclient.upsert(\n    collection_name=\"text_embeddings\",\n    points=[...]\n)\n```\n\n## Performance Considerations\n\n### Token Truncation Limits\n\n| Model Type | Max Tokens | Notes |\n|------------|------------|-------|\n| BGE Small/Large | 512 | Standard context |\n| Jina v2 | 8192 | Extended context |\n| Multilingual E5 | 512 | Query-optimized |\n| Arctic Embed | 512-2048 | Variable by model |\n\n### Hardware Acceleration\n\nThe module automatically detects and utilizes available hardware:\n\n1. **CUDA** - GPU acceleration via CUDAExecutionProvider\n2. **CPU** - Multi-threaded via CPUExecutionProvider\n3. **CoreML** - Apple Silicon support via CoreMLExecutionProvider\n\n资料来源：[fastembed/text/onnx_embedding.py:250-300]()\n\n## Error Handling\n\n### Common Error Cases\n\n```python\n# ValueError: attention_mask must be provided for pooled embeddings\nmodel = PooledNormalizedEmbedding(...)\n# Must ensure model outputs attention_mask\n\n# ModelNotSupportedError: Unknown model\nmodel = TextEmbedding(model_name=\"unknown/model\")\n# Falls back to default model or raises error\n```\n\n### Validation Requirements\n\n| Check | Condition | Error Type |\n|-------|-----------|------------|\n| `attention_mask` | Required for pooled | `ValueError` |\n| `model_name` | Must be in supported list | `ModelNotSupportedError` |\n| `cache_dir` | Valid path | `OSError` |\n\n## See Also\n\n- [Sparse Text Embeddings](fastembed/sparse/) - SPLADE++ and BM25\n- [Image Embeddings](fastembed/image/) - Vision model embeddings\n- [Reranking Models](fastembed/rerank/) - Cross-encoder reranking\n- [Post-Processors](fastembed/postprocess/) - MUVERA and other processors\n\n---\n\n<a id='page-image-embedding'></a>\n\n## Image Embedding Module\n\n### 相关页面\n\n相关主题：[System Architecture](#page-architecture), [Late Interaction Models](#page-late-interaction)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [fastembed/image/onnx_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/image/onnx_embedding.py)\n- [fastembed/text/onnx_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/onnx_embedding.py)\n- [fastembed/text/clip_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/clip_embedding.py)\n- [fastembed/text/pooled_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/pooled_embedding.py)\n- [fastembed/image/image_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/image/image_embedding.py)\n</details>\n\n# Image Embedding Module\n\nThe Image Embedding Module in FastEmbed provides functionality for generating vector representations (embeddings) from images using ONNX-based models. This module enables efficient image similarity search, image clustering, and multimodal retrieval applications.\n\n## Architecture Overview\n\nThe image embedding system follows a layered architecture pattern, separating the model definitions, ONNX inference logic, and embedding base classes.\n\n```mermaid\ngraph TD\n    A[Image Input] --> B[ImageEmbeddingBase]\n    B --> C[OnnxImageModel]\n    C --> D[OnnxImageEmbedding]\n    D --> E[ONNX Runtime]\n    E --> F[Embedding Vectors]\n    \n    G[Supported Models] --> D\n    G -.-> H[ResNet50]\n    G -.-> I[Unicom-ViT-B-16]\n    G -.-> J[Unicom-ViT-B-32]\n    G -.-> K[jinaai/jina-clip-v1]\n```\n\n## Supported Image Models\n\nThe module supports multiple image embedding models with varying dimensions and capabilities.\n\n| Model | Dimension | Type | License | Size (GB) | HF Source |\n|-------|-----------|------|---------|-----------|-----------|\n| `Qdrant/resnet50-onnx` | 512 | Image only | apache-2.0 | 0.10 | [link](https://huggingface.co/Qdrant/resnet50-onnx) |\n| `Qdrant/Unicom-ViT-B-16` | 768 | Multimodal (text&image) | apache-2.0 | 0.82 | [link](https://huggingface.co/Qdrant/Unicom-ViT-B-16) |\n| `Qdrant/Unicom-ViT-B-32` | 512 | Multimodal (text&image) | apache-2.0 | 0.48 | [link](https://huggingface.co/Qdrant/Unicom-ViT-B-32) |\n| `jinaai/jina-clip-v1` | 768 | Multimodal (text&image) | apache-2.0 | 0.34 | [link](https://huggingface.co/jinaai/jina-clip-v1) |\n\n资料来源：[fastembed/image/onnx_embedding.py:1-40](https://github.com/qdrant/fastembed/blob/main/fastembed/image/onnx_embedding.py)\n\n## Core Classes\n\n### OnnxImageEmbedding\n\nThe main class for generating image embeddings extends `ImageEmbeddingBase` and `OnnxImageModel[NumpyArray]`.\n\n```python\nclass OnnxImageEmbedding(ImageEmbeddingBase, OnnxImageModel[NumpyArray]):\n```\n\n**Class Hierarchy:**\n\n```mermaid\ngraph LR\n    A[TextEmbeddingBase] -->|inheritance| B[ImageEmbeddingBase]\n    C[OnnxTextModel] -->|generic| D[OnnxImageModel]\n    E[OnnxTextEmbedding] -->|reuse pattern| F[OnnxImageEmbedding]\n```\n\nThe class follows the same architectural pattern as `OnnxTextEmbedding`, sharing the ONNX inference infrastructure with text embedding models. 资料来源：[fastembed/image/onnx_embedding.py:44-50](https://github.com/qdrant/fastembed/blob/main/fastembed/image/onnx_embedding.py)\n\n### Constructor Parameters\n\n| Parameter | Type | Default | Description |\n|-----------|------|---------|-------------|\n| `model_name` | `str` | `\"Qdrant/clip-ViT-B-32\"` | Name of the model to use |\n| `cache_dir` | `str \\| None` | `None` | Path to cache directory |\n| `threads` | `int \\| None` | `None` | Number of threads for inference |\n| `providers` | `Sequence[OnnxProvider] \\| None` | `None` | ONNX execution providers |\n| `cuda` | `bool \\| Device` | `Device.AUTO` | CUDA device configuration |\n| `device_ids` | `list[int] \\| None` | `None` | Specific device IDs |\n| `lazy_load` | `bool` | `False` | Load model lazily |\n| `device_id` | `int \\| None` | `None` | Specific device ID |\n\n## Multimodal Image Models\n\n### CLIP-based Models\n\nThe `Unicom-ViT-B-16` and `Unicom-ViT-B-32` models support both image and text inputs, enabling cross-modal retrieval scenarios.\n\n| Model | Vision Dimension | Description |\n|-------|------------------|-------------|\n| Unicom-ViT-B-16 | 768 | More detailed embeddings (16x16 patches) |\n| Unicom-ViT-B-32 | 512 | Faster processing (32x32 patches) |\n\n### jina-clip-v1\n\nThe `jinaai/jina-clip-v1` model is a 2024 multimodal model supporting both text and image inputs:\n\n```python\nDenseModelDescription(\n    model=\"jinaai/jina-clip-v1\",\n    dim=768,\n    description=\"Image embeddings, Multimodal (text&image), 2024 year\",\n    license=\"apache-2.0\",\n    size_in_GB=0.34,\n    sources=ModelSource(hf=\"jinaai/jina-clip-v1\"),\n    model_file=\"onnx/vision_model.onnx\",\n),\n```\n\n资料来源：[fastembed/image/onnx_embedding.py:20-28](https://github.com/qdrant/fastembed/blob/main/fastembed/image/onnx_embedding.py)\n\n## Model Loading Workflow\n\n```mermaid\nsequenceDiagram\n    participant User\n    participant OnnxImageEmbedding\n    participant OnnxImageModel\n    participant ONNX Runtime\n    participant HuggingFace\n\n    User->>OnnxImageEmbedding: __init__(model_name)\n    OnnxImageEmbedding->>OnnxImageModel: Load model from cache/HF\n    OnnxImageModel->>HuggingFace: Download if needed\n    OnnxImageModel->>ONNX Runtime: Initialize session\n    ONNX Runtime-->>OnnxImageModel: Session ready\n    OnnxImageModel-->>OnnxImageEmbedding: Model loaded\n    User->>OnnxImageEmbedding: embed(image)\n    OnnxImageEmbedding->>ONNX Runtime: Run inference\n    ONNX Runtime-->>User: Embedding vector\n```\n\n## Usage Examples\n\n### Basic Image Embedding\n\n```python\nfrom fastembed import ImageEmbedding\n\nmodel = ImageEmbedding(model_name=\"Qdrant/Unicom-ViT-B-32\")\nembeddings = list(model.embed([\"path/to/image.jpg\"]))\n```\n\n### CLIP Text-Image Retrieval\n\nFor multimodal models like jina-clip-v1, you can perform cross-modal retrieval:\n\n```python\nfrom fastembed import ImageEmbedding, TextEmbedding\n\nimage_model = ImageEmbedding(model_name=\"jinaai/jina-clip-v1\")\ntext_model = TextEmbedding(model_name=\"jinaai/jina-clip-v1\")\n\n# Generate embeddings for both modalities\nimage_emb = list(image_model.embed([\"image_path.jpg\"]))\ntext_emb = list(text_model.embed([\"search query\"]))\n\n# Compute similarity\nfrom numpy import dot\n\nsimilarity = dot(image_emb[0], text_emb[0])\n```\n\n## Integration with Qdrant\n\nImage embeddings generated by this module are designed for use with Qdrant vector database:\n\n```python\n# Creating a Qdrant collection with image embeddings\nfrom qdrant_client import QdrantClient\n\nclient = QdrantClient(\"localhost\", port=6333)\n\nclient.create_collection(\n    collection_name=\"images\",\n    vectors_config={\n        \"image\": VectorParams(\n            size=768,  # For Unicom-ViT-B-16 or jina-clip-v1\n            distance=Distance.COSINE\n        )\n    }\n)\n```\n\n## Supported Model Files\n\nEach model specifies its ONNX model file location:\n\n| Model | Model File | Additional Files |\n|-------|------------|------------------|\n| ResNet50 | `model.onnx` | None |\n| Unicom-ViT-B-16 | `model.onnx` | None |\n| Unicom-ViT-B-32 | `model.onnx` | None |\n| jina-clip-v1 | `onnx/vision_model.onnx` | `onnx/text_model.onnx` |\n\n资料来源：[fastembed/image/onnx_embedding.py:10-28](https://github.com/qdrant/fastembed/blob/main/fastembed/image/onnx_embedding.py)\n\n## Relationship with Text Embedding Module\n\nThe Image Embedding Module shares significant implementation with the Text Embedding Module:\n\n```mermaid\ngraph TD\n    A[OnnxTextModel] -->|shared base| B[OnnxImageModel]\n    A -->|shared base| C[OnnxTextEmbedding]\n    B -->|shared base| D[OnnxImageEmbedding]\n    \n    E[supported_onnx_models] -->|text models| C\n    F[supported_image_models] -->|image models| D\n    \n    G[CLIPEmbeddingWorker] -.->|reused| H[OnnxTextEmbeddingWorker]\n```\n\nThis design ensures consistent ONNX inference behavior and reduces code duplication across embedding types. 资料来源：[fastembed/text/onnx_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/onnx_embedding.py)\n\n## Performance Considerations\n\n| Model | Size (GB) | Embedding Dim | Use Case |\n|-------|-----------|---------------|----------|\n| ResNet50 | 0.10 | 512 | Fast, lightweight embeddings |\n| Unicom-ViT-B-32 | 0.48 | 512 | Balanced speed/quality |\n| Unicom-ViT-B-16 | 0.82 | 768 | Higher quality, slower |\n| jina-clip-v1 | 0.34 | 768 | Multimodal, 2024 model |\n\n## Configuration Options\n\nThe module inherits configuration capabilities from the base classes:\n\n```python\n# Example with full configuration\nmodel = OnnxImageEmbedding(\n    model_name=\"Qdrant/Unicom-ViT-B-16\",\n    cache_dir=\"~/.cache/fastembed\",\n    providers=[\"CPUExecutionProvider\"],  # or \"CUDAExecutionProvider\"\n    threads=4,\n    lazy_load=True\n)\n```\n\n## Summary\n\nThe Image Embedding Module provides:\n\n- **4 supported image models** ranging from lightweight to high-quality\n- **Multimodal support** via CLIP-based models for text-image retrieval\n- **ONNX runtime optimization** for efficient inference\n- **Qdrant integration** for vector storage and similarity search\n- **Consistent API** with the text embedding module\n\n---\n\n<a id='page-sparse-embedding'></a>\n\n## Sparse Embedding Models\n\n### 相关页面\n\n相关主题：[Text Embedding Module](#page-text-embedding), [System Architecture](#page-architecture)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [fastembed/sparse/splade_pp.py](https://github.com/qdrant/fastembed/blob/main/fastembed/sparse/splade_pp.py)\n- [fastembed/sparse/bm25.py](https://github.com/qdrant/fastembed/blob/main/fastembed/sparse/bm25.py)\n- [fastembed/sparse/bm42.py](https://github.com/qdrant/fastembed/blob/main/fastembed/sparse/bm42.py)\n- [fastembed/sparse/minicoil.py](https://github.com/qdrant/fastembed/blob/main/fastembed/sparse/minicoil.py)\n- [fastembed/sparse/sparse_text_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/sparse/sparse_text_embedding.py)\n- [fastembed/sparse/utils/sparse_vectors_converter.py](https://github.com/qdrant/fastembed/blob/main/fastembed/sparse/utils/sparse_vectors_converter.py)\n</details>\n\n# Sparse Embedding Models\n\n## Overview\n\nSparse embedding models in FastEmbed represent text as high-dimensional sparse vectors where most dimensions have zero values. Unlike dense embeddings where every dimension contributes to meaning, sparse embeddings only store non-zero values for specific tokens or features. This approach combines semantic understanding with exact keyword matching capabilities.\n\nThe sparse representation consists of:\n- **Indices**: Token identifiers in the vocabulary\n- **Values**: Weight/scores representing token importance\n\n```python\n# Example sparse embedding output\nSparseEmbedding(indices=[17, 123, 919, ...], values=[0.71, 0.22, 0.39, ...])\n```\n\n**资料来源：** [fastembed/sparse/sparse_text_embedding.py:1-50]()\n\n## Architecture\n\n### Class Hierarchy\n\n```mermaid\ngraph TD\n    A[SparseTextEmbeddingBase] --> B[MiniCOIL]\n    A --> C[Bm25]\n    A --> D[BM42]\n    A --> E[SPLADE++]\n    \n    F[OnnxTextModel<br/>SparseEmbedding] --> A\n    \n    G[OnnxTextEmbeddingWorker] --> F\n```\n\nThe sparse embedding system is built on a base class `SparseTextEmbeddingBase` that extends `OnnxTextModel[SparseEmbedding]`, providing a unified interface for all sparse embedding implementations.\n\n**资料来源：** [fastembed/sparse/minicoil.py:30-50]()\n\n### Supported Models\n\n| Model | Type | Language | Size | License | Requires IDF |\n|-------|------|----------|------|---------|--------------|\n| SPLADE++ | Sparse/SPLADE | English | 0.22 GB | apache-2.0 | Yes |\n| BM25 | Traditional BM25 | Multi-language | 0.01 GB | apache-2.0 | Yes |\n| BM42 | Hybrid BM25+Attention | English | 0.04 GB | apache-2.0 | Yes |\n| MiniCOIL | Semantic + Keyword | English | 0.09 GB | apache-2.0 | Yes |\n\n**资料来源：** [fastembed/sparse/bm25.py:1-30]()\n\n## Core Components\n\n### SparseTextEmbeddingBase\n\nThe base class for all sparse embedding models defines the common interface and behavior:\n\n```python\nclass SparseTextEmbeddingBase(OnnxTextModel[SparseEmbedding]):\n    \"\"\"Base class for sparse text embedding models\"\"\"\n    \n    @classmethod\n    def _list_supported_models(cls) -> list[DenseModelDescription]:\n        \"\"\"Returns list of supported sparse models\"\"\"\n        \n    def _post_process_onnx_output(\n        self, output: OnnxOutputContext, **kwargs: Any\n    ) -> Iterable[SparseEmbedding]:\n        \"\"\"Post-process ONNX model output to sparse format\"\"\"\n```\n\n**资料来源：** [fastembed/sparse/sparse_text_embedding.py:1-100]()\n\n### SparseEmbedding Data Model\n\nThe `SparseEmbedding` class represents sparse vectors with two primary attributes:\n\n| Attribute | Type | Description |\n|-----------|------|-------------|\n| `indices` | list[int] | Vocabulary token IDs with non-zero values |\n| `values` | list[float] | Corresponding importance weights for each index |\n\n**资料来源：** [fastembed/sparse/sparse_text_embedding.py:100-150]()\n\n## Implementation Details\n\n### BM25\n\nBM25 (Best Matching 25) is a traditional sparse embedding model that evaluates token importance based on term frequency and inverse document frequency.\n\n**Formula:**\n```\nscore(q, d) = SUM[ IDF(q_i) * (f(q_i, d) * (k + 1)) / (f(q_i, d) + k * (1 - b + b * (|d| / avg_len))) ]\n```\n\n| Parameter | Default | Description |\n|-----------|---------|-------------|\n| `k` | 1.5 | Term frequency saturation parameter |\n| `b` | 0.75 | Length normalization parameter |\n| `avg_len` | Computed | Average document length |\n\n**WARNING:** BM25 is expected to be used with `modifier=\"idf\"` in the sparse vector index of Qdrant.\n\n**资料来源：** [fastembed/sparse/bm25.py:30-80]()\n\n### MiniCOIL\n\nMiniCOIL is a sparse embedding model that combines semantic meaning resolution with exact keyword matching behavior.\n\n**Key Characteristics:**\n- Converts vocabulary tokens into 4-dimensional components of sparse vectors\n- Weights tokens by their frequency in the corpus\n- Falls back to BM25-like behavior for out-of-vocabulary tokens\n\n```python\nclass MiniCOIL(SparseTextEmbeddingBase, OnnxTextModel[SparseEmbedding]):\n    \"\"\"\n    MiniCOIL resolves semantic meaning while keeping exact keyword match behavior.\n    Each vocabulary token is converted into 4d component of a sparse vector.\n    \"\"\"\n```\n\n**资料来源：** [fastembed/sparse/minicoil.py:30-55]()\n\n### BM42\n\nBM42 extends traditional BM25 by incorporating attention weights from transformer models, creating a hybrid sparse representation.\n\n**资料来源：** [fastembed/sparse/bm42.py:1-50]()\n\n### SPLADE++\n\nSPLADE++ (SParse Lexical AnD expRessive model) uses a sparse expansion approach where each token can expand to related terms in the vocabulary, enabling semantic matching while maintaining interpretability.\n\n**资料来源：** [fastembed/sparse/splade_pp.py:1-50]()\n\n## Data Flow\n\n```mermaid\ngraph LR\n    A[Input Text] --> B[Tokenization]\n    B --> C[ONNX Model Inference]\n    C --> D[ONNX Output Context]\n    D --> E[Post-Processing]\n    E --> F[SparseEmbedding]\n    \n    G[Vocabulary] --> C\n    G --> H[SparseVectorsConverter]\n    H --> E\n```\n\n## SparseVectorsConverter Utility\n\nThe `SparseVectorsConverter` class handles conversion between different sparse vector formats, particularly for MiniCOIL's word embeddings.\n\n**Key Operations:**\n- Converts sentence embeddings to Qdrant sparse vector format\n- Handles out-of-vocabulary (OOV) words with fallback to BM25\n- Manages vocabulary word embeddings with 4-dimensional components\n\n```python\n# Example input structure\n{\n    \"vector\": WordEmbedding({\n        \"word\": \"vector\",\n        \"forms\": [\"vector\", \"vectors\"],\n        \"count\": 2,\n        \"word_id\": 1231,\n        \"embedding\": [0.1, 0.2, 0.3, 0.4]\n    }),\n    \"axiotic\": WordEmbedding({  # OOV word\n        \"word\": \"axiotic\",\n        \"forms\": [\"axiotics\"],\n        \"count\": 1,\n        \"word_id\": -1,\n    })\n}\n```\n\n**资料来源：** [fastembed/sparse/utils/sparse_vectors_converter.py:50-100]()\n\n## Usage Examples\n\n### Basic Usage\n\n```python\nfrom fastembed import SparseTextEmbedding\n\n# Initialize with default SPLADE++ model\nmodel = SparseTextEmbedding(model_name=\"prithivida/Splade_PP_en_v1\")\n\n# Generate sparse embeddings\ndocuments = [\"Example text for embedding\", \"Another document\"]\nembeddings = list(model.embed(documents))\n\n# Output: [SparseEmbedding(indices=[17, 123, 919, ...], values=[0.71, 0.22, 0.39, ...])]\n```\n\n### BM25 Usage\n\n```python\nfrom fastembed import SparseTextEmbedding\n\nmodel = SparseTextEmbedding(model_name=\"Qdrant/bm25\")\nembeddings = list(model.embed(documents))\n```\n\n### With Qdrant\n\nSparse embeddings are designed for use with Qdrant's sparse vector index with `modifier=\"idf\"`:\n\n```python\nfrom qdrant_client import QdrantClient\nfrom fastembed import SparseTextEmbedding\n\nclient = QdrantClient()\nmodel = SparseTextEmbedding(model_name=\"prithivida/Splade_PP_en_v1\")\n\n# Embedding will have indices and values compatible with Qdrant sparse vectors\n```\n\n**资料来源：** [README.md:1-100]()\n\n## Configuration Options\n\n| Parameter | Type | Default | Description |\n|-----------|------|---------|-------------|\n| `model_name` | str | \"prithivida/Splade_PP_en_v1\" | Name of the sparse embedding model |\n| `cache_dir` | str \\| None | None | Cache directory path for model files |\n| `threads` | int \\| None | None | Number of threads for inference |\n| `providers` | Sequence[OnnxProvider] \\| None | None | ONNX execution providers |\n| `lazy_load` | bool | False | Whether to load model lazily |\n| `device_id` | int \\| None | None | Specific device ID for execution |\n\n## Language Support\n\n| Model | Languages |\n|-------|-----------|\n| SPLADE++ | English only |\n| BM25 | 15+ languages |\n| BM42 | English |\n| MiniCOIL | English |\n\n**资料来源：** [fastembed/sparse/bm25.py:1-25]()\n\n## Requirements\n\nAll sparse embedding models require IDF (Inverse Document Frequency) weighting for optimal performance. This is typically handled by the vector database (e.g., Qdrant) during indexing and search operations.\n\n---\n\n<a id='page-late-interaction'></a>\n\n## Late Interaction Models\n\n### 相关页面\n\n相关主题：[Image Embedding Module](#page-image-embedding), [System Architecture](#page-architecture), [Text Embedding Module](#page-text-embedding)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [fastembed/late_interaction/colbert.py](https://github.com/qdrant/fastembed/blob/main/fastembed/late_interaction/colbert.py)\n- [fastembed/late_interaction/jina_colbert.py](https://github.com/qdrant/fastembed/blob/main/fastembed/late_interaction/jina_colbert.py)\n- [fastembed/late_interaction/late_interaction_text_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/late_interaction/late_interaction_text_embedding.py)\n- [fastembed/late_interaction_multimodal/colpali.py](https://github.com/qdrant/fastembed/blob/main/fastembed/late_interaction_multimodal/colpali.py)\n- [fastembed/late_interaction_multimodal/colmodernvbert.py](https://github.com/qdrant/fastembed/blob/main/fastembed/late_interaction_multimodal/colmodernvbert.py)\n- [fastembed/late_interaction_multimodal/onnx_multimodal_model.py](https://github.com/qdrant/fastembed/blob/main/fastembed/late_interaction_multimodal/onnx_multimodal_model.py)\n</details>\n\n# Late Interaction Models\n\n## Overview\n\nLate Interaction Models represent an advanced embedding paradigm that departs from traditional single-vector dense representations. Unlike conventional dense embedding models that compress entire documents into a single embedding vector, late interaction models preserve token-level embeddings and defer the similarity computation until query time. This approach enables granular token-to-token interactions between queries and documents, significantly improving retrieval precision for complex semantic matching tasks.\n\nThe FastEmbed library implements two categories of late interaction models:\n\n| Category | Scope | Use Case |\n|----------|-------|----------|\n| **Text-based** | Query-document text matching | Semantic search, question answering |\n| **Multimodal** | Text + image joint embedding | Visual document retrieval, image search |\n\n## Architecture\n\n### Late Interaction vs Traditional Dense Retrieval\n\n```mermaid\ngraph TD\n    subgraph \"Traditional Dense Retrieval\"\n        A1[Query Text] --> B1[Encoder]\n        D1[Document] --> E1[Encoder]\n        B1 --> C1[Single Query Vector]\n        E1 --> F1[Single Document Vector]\n        C1 --> G1[Dot Product / Cosine Similarity]\n        F1 --> G1\n    end\n    \n    subgraph \"Late Interaction Retrieval\"\n        A2[Query Text] --> B2[Encoder]\n        D2[Document] --> E2[Encoder]\n        B2 --> C2[Query Token Embeddings]\n        E2 --> F2[Document Token Embeddings]\n        C2 --> G2[Late Interaction Module]\n        F2 --> G2\n        G2 --> H2[Max-Sum Similarity]\n    end\n    \n    style C1 fill:#ffcccc\n    style F1 fill:#ffcccc\n    style C2 fill:#ccffcc\n    style F2 fill:#ccffcc\n```\n\n### Colbert Late Interaction Mechanism\n\nThe Colbert model employs a **max-similarity** strategy where query tokens independently find their most similar document token, and relevance is computed as the sum of these maximum similarities:\n\n```mermaid\ngraph LR\n    Q1[Query: \"q1 q2 q3\"] --> QE[Query Encoder]\n    D1[Doc: \"d1 d2\"] --> DE[Document Encoder]\n    QE --> QT[\"Q_emb: [v₁, v₂, v₃]\"]\n    DE --> DT[\"D_emb: [u₁, u₂]\"]\n    QT --> MM[Similarity Matrix]\n    DT --> MM\n    MM --> MS[Max Similarity<br/>sim(qᵢ) = maxⱼ S(vᵢ, uⱼ)]\n    MS --> SR[Score = Σᵢ sim(qᵢ)]\n```\n\n## Supported Models\n\n### Text-based Late Interaction Models\n\n| Model | Dimension | Context Length | Multilingual | License | Size |\n|-------|-----------|----------------|---------------|---------|------|\n| `jinaai/jina-colbert-v2` | 128 | 8192 | Yes | cc-by-nc-4.0 | 2.24 GB |\n\n### Multimodal Late Interaction Models\n\n| Model | Modality | Description |\n|-------|----------|-------------|\n| `vidore/colpali` | Text + Image | Vision-Language late interaction |\n| `vidore/colqwen2` | Text + Image | Qwen2-based multimodal |\n| `vidore/colmodernvbert` | Text + Image | Modern vision backbone |\n\n## Base Classes\n\n### LateInteractionTextEmbedding\n\nAbstract base class for text-based late interaction models.\n\n```python\nclass LateInteractionTextEmbedding(TextEmbeddingBase):\n    @classmethod\n    def _list_supported_models(cls) -> list[DenseModelDescription]:\n        ...\n    \n    @classmethod\n    def _get_worker_class(cls) -> Type[OnnxTextEmbeddingWorker]:\n        ...\n```\n\n资料来源：[late_interaction_text_embedding.py:1-50](https://github.com/qdrant/fastembed/blob/main/fastembed/late_interaction/late_interaction_text_embedding.py)\n\n### Colbert Base Class\n\nThe `Colbert` class implements the core late interaction logic:\n\n```python\nclass Colbert(LateInteractionTextEmbedding):\n    QUERY_MARKER_TOKEN_ID: int =  1  # Default CLS token\n    DOCUMENT_MARKER_TOKEN_ID: int = 1  # Default CLS token\n    MIN_QUERY_LENGTH: int = 32\n    MASK_TOKEN: str = \"[MASK]\"\n```\n\n资料来源：[colbert.py:20-60](https://github.com/qdrant/fastembed/blob/main/fastembed/late_interaction/colbert.py)\n\nKey methods:\n\n| Method | Purpose |\n|--------|---------|\n| `encode_query()` | Encode a single query, returning token embeddings |\n| `encode_document()` | Encode a single document, returning token embeddings |\n| `score()` | Compute late interaction similarity between query and document |\n\n## Jina Colbert Implementation\n\nThe `JinaColbert` class extends the base `Colbert` with model-specific configuration:\n\n```python\nclass JinaColbert(Colbert):\n    QUERY_MARKER_TOKEN_ID = 250002\n    DOCUMENT_MARKER_TOKEN_ID = 250003\n    MIN_QUERY_LENGTH = 31  # 32 minus 1 for special token\n    MASK_TOKEN = \"<mask>\"\n```\n\n资料来源：[jina_colbert.py:15-19](https://github.com/qdrant/fastembed/blob/main/fastembed/late_interaction/jina_colbert.py)\n\n#### Model Configuration\n\n| Parameter | Value | Description |\n|-----------|-------|-------------|\n| `model` | `jinaai/jina-colbert-v2` | HuggingFace model identifier |\n| `dim` | 128 | Token embedding dimension |\n| `size_in_GB` | 2.24 | Model size |\n| `context_length` | 8192 | Maximum input tokens |\n| `license` | cc-by-nc-4.0 | Model license |\n\n## Multimodal Late Interaction\n\n### ColPali Model\n\nColPali extends the late interaction paradigm to visual documents by treating images as sequences of patches:\n\n```python\nclass ColPali(MultimodalTextImageBase, OnnxMultimodalModel[MultivectorEmbedding]):\n    @classmethod\n    def _list_supported_models(cls) -> list[DenseModelDescription]:\n        return supported_colpali_models\n```\n\n资料来源：[colpali.py:1-50](https://github.com/qdrant/fastembed/blob/main/fastembed/late_interaction_multimodal/colpali.py)\n\n### ColModernVBert Model\n\nColModernVBert provides an alternative vision-language architecture with a modern backbone:\n\n```python\nclass ColModernVBert(MultimodalTextImageBase, OnnxMultimodalModel[MultivectorEmbedding]):\n    QUERY_MARKER_TOKEN_ID = 0\n    DOCUMENT_MARKER_TOKEN_ID = 1\n    MIN_QUERY_LENGTH = 32\n    MASK_TOKEN = \"<mask>\"\n```\n\n资料来源：[colmodernvbert.py:1-50](https://github.com/qdrant/fastembed/blob/main/fastembed/late_interaction_multimodal/colmodernvbert.py)\n\n### Architecture: Multimodal Late Interaction\n\n```mermaid\ngraph TD\n    subgraph \"Query Processing\"\n        QT[Text Query] --> QE[Text Encoder]\n        QV[Query Image] --> QP[Patch Extraction]\n        QP --> QI[Query Image Embeddings]\n        QE --> QT_emb[Query Token Embeddings]\n    end\n    \n    subgraph \"Document Processing\"\n        DT[Document Text] --> DE[Text Encoder]\n        DV[Document Image] --> DP[Patch Extraction]\n        DP --> DI[Doc Image Embeddings]\n        DE --> DT_emb[Document Token Embeddings]\n    end\n    \n    subgraph \"Late Interaction\"\n        QT_emb --> LI[Interaction Module]\n        DT_emb --> LI\n        QI --> LI\n        DI --> LI\n        LI --> SM[Similarity Matrix]\n        SM --> MS[Max-Sum Pooling]\n    end\n    \n    MS --> SC[Relevance Score]\n```\n\n## Usage Examples\n\n### Text-based Late Interaction\n\n```python\nfrom fastembed import LateInteractionTextEmbedding\n\n# Initialize the model\nmodel = LateInteractionTextEmbedding(\n    model_name=\"jinaai/jina-colbert-v2\"\n)\n\n# Encode query and document separately\nquery_embedding = model.query_embed(\"What is machine learning?\")\ndoc_embedding = model.doc_embed(\"Machine learning is a subset of AI...\")\n\n# Compute late interaction score\nscore = model.score(query_embedding, doc_embedding)\n```\n\n### Multimodal Late Interaction\n\n```python\nfrom fastembed import LateInteractionTextImageEmbedding\n\nmodel = LateInteractionTextImageEmbedding(\n    model_name=\"vidore/colpali\"\n)\n\n# Encode image with optional text\nimage_embedding = model.doc_embed(image=image_bytes)\nquery_embedding = model.query_embed(\"Find charts about revenue\")\n```\n\n## Configuration Options\n\n### Common Parameters\n\n| Parameter | Type | Default | Description |\n|-----------|------|---------|-------------|\n| `model_name` | str | Required | Model identifier |\n| `cache_dir` | str | None | Local cache directory |\n| `threads` | int | None | CPU threads for inference |\n| `providers` | Sequence[OnnxProvider] | None | ONNX execution providers |\n| `lazy_load` | bool | False | Defer model loading until first use |\n| `device_id` | int | None | Specific device index |\n\n### ONNX Providers\n\n| Provider | Description | Priority |\n|----------|-------------|----------|\n| `CPUExecutionProvider` | CPU inference | Default fallback |\n| `CUDAExecutionProvider` | NVIDIA GPU | Preferred for speed |\n| `CoreMLExecutionProvider` | Apple Silicon | Mobile/iOS |\n\n## Post-processing: Muvera\n\nMuvera is a post-processing technique that converts late interaction embeddings (multi-vector) into fixed-dimensional representations:\n\n```python\nfrom fastembed.postprocess import Muvera\n\n# Convert from multi-vector to fixed-dim\nmuvera = Muvera.from_multivector_model(\n    model=late_interaction_model,\n    k_sim=6,\n    dim_proj=32\n)\n\n# Process document\nfde = muvera.process_document(multivector_embedding)\n```\n\n资料来源：[postprocess/muvera.py:1-100](https://github.com/qdrant/fastembed/blob/main/fastembed/postprocess/muvera.py)\n\n### Muvera Configuration\n\n| Parameter | Description | Impact |\n|-----------|-------------|--------|\n| `k_sim` | Log₂ of number of buckets | Memory vs precision |\n| `dim_proj` | Projection dimension | Output size |\n| `r_reps` | Number of repetitions | Robustness |\n| `random_seed` | Random seed | Reproducibility |\n\nOutput dimension formula: `r_reps × 2^k_sim × dim_proj`\n\n## Performance Considerations\n\n### Token Length Limits\n\n| Model | Query Limit | Document Limit |\n|-------|-------------|----------------|\n| jina-colbert-v2 | 31 tokens | 8192 tokens |\n| colpali | Variable | Variable |\n\n### Memory Usage\n\nLate interaction models store per-token embeddings rather than single vectors:\n\n- **Traditional dense**: N × D memory (N documents, D dimension)\n- **Late interaction**: N × T × D memory (T = avg token count)\n\nThis trade-off enables better precision at the cost of increased memory footprint.\n\n## Comparison with Dense Retrieval\n\n| Aspect | Dense Retrieval | Late Interaction |\n|--------|-----------------|------------------|\n| Embedding type | Single vector | Token-level vectors |\n| Query speed | O(1) comparison | O(Q × D) interaction |\n| Precision | Good for semantic similarity | Excellent for term matching |\n| Memory | Lower | Higher |\n| Interpretability | Limited | Token-level attribution |\n\n## Related Components\n\n| Component | File | Purpose |\n|-----------|------|---------|\n| `ColbertEmbeddingWorker` | `colbert.py` | Parallel embedding worker |\n| `OnnxMultimodalModel` | `onnx_multimodal_model.py` | Base for ONNX multimodal |\n| `MultivectorEmbedding` | Types | Output type for late interaction |\n| `Muvera` | `muvera.py` | Dimensionality reduction post-process |\n\n## References\n\n- Original Colbert paper: Khattab & Zaharia, \"ColBERT: Efficient and Effective Passage Search via Contextualized Late Interaction\"\n- Model source: [jinaai/jina-colbert-v2](https://huggingface.co/jinaai/jina-colbert-v2)\n\n---\n\n<a id='page-onnx-model'></a>\n\n## ONNX Model Infrastructure\n\n### 相关页面\n\n相关主题：[System Architecture](#page-architecture), [GPU Support and Acceleration](#page-gpu-support)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [fastembed/common/onnx_model.py](https://github.com/qdrant/fastembed/blob/main/fastembed/common/onnx_model.py)\n- [fastembed/common/types.py](https://github.com/qdrant/fastembed/blob/main/fastembed/common/types.py)\n- [fastembed/common/preprocessor_utils.py](https://github.com/qdrant/fastembed/blob/main/fastembed/common/preprocessor_utils.py)\n- [fastembed/text/onnx_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/onnx_embedding.py)\n- [fastembed/image/onnx_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/image/onnx_embedding.py)\n- [fastembed/text/clip_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/clip_embedding.py)\n- [fastembed/sparse/minicoil.py](https://github.com/qdrant/fastembed/blob/main/fastembed/sparse/minicoil.py)\n</details>\n\n# ONNX Model Infrastructure\n\n## Overview\n\nThe ONNX Model Infrastructure is the core runtime layer in FastEmbed that enables efficient execution of embedding models through the ONNX (Open Neural Network Exchange) format. This infrastructure provides a unified abstraction for loading, executing, and post-processing ONNX models across different embedding modalities including text, images, and sparse embeddings.\n\nFastEmbed leverages ONNX Runtime to achieve cross-platform compatibility and optimized inference performance without requiring PyTorch or TensorFlow dependencies. The architecture separates model execution concerns from embedding-specific logic, enabling a clean separation of concerns between the ONNX runtime layer and higher-level embedding abstractions. 资料来源：[fastembed/text/onnx_embedding.py:1-50]()\n\n## Architecture Overview\n\nThe ONNX infrastructure follows a class hierarchy pattern where base classes define the runtime contract and concrete implementations provide modality-specific behavior. The architecture is designed around the following core components:\n\n```mermaid\ngraph TD\n    A[ONNX Runtime] --> B[OnnxModel Base]\n    B --> C[OnnxTextModel]\n    B --> D[OnnxImageModel]\n    B --> E[OnnxCrossEncoderModel]\n    C --> F[OnnxTextEmbedding]\n    C --> G[PooledEmbedding]\n    C --> H[CLIPOnnxEmbedding]\n    C --> I[MiniCOIL]\n    D --> J[OnnxImageEmbedding]\n    E --> K[OnnxTextCrossEncoder]\n    \n    L[TextEmbeddingBase] --> F\n    M[ImageEmbeddingBase] --> J\n    N[SparseTextEmbeddingBase] --> I\n```\n\n### Core Base Classes\n\nThe infrastructure defines three primary base classes that orchestrate ONNX model execution:\n\n| Class | File | Purpose |\n|-------|------|---------|\n| `OnnxModel` | `fastembed/common/onnx_model.py` | Core runtime for ONNX session management |\n| `OnnxTextModel` | `fastembed/text/onnx_text_model.py` | Text-specific ONNX execution |\n| `OnnxImageModel` | `fastembed/image/onnx_embedding.py` | Image-specific ONNX execution |\n\n资料来源：[fastembed/common/onnx_model.py:1-100]()\n\n## ONNX Session Management\n\n### Model Loading\n\nThe `OnnxModel` base class handles the lifecycle of ONNX model loading and execution. When a model is initialized, it performs the following operations:\n\n1. Resolves the model file path from cache or downloads from source\n2. Configures ONNX Runtime session options (threads, providers)\n3. Creates an inference session with the specified execution providers\n4. Validates model inputs and outputs\n\n```python\nclass OnnxModel(Generic[T]):\n    def __init__(\n        self,\n        model_dir: Path,\n        model_file: str,\n        threads: int | None = None,\n        providers: Sequence[OnnxProvider] | None = None,\n        cuda: bool | Device = Device.AUTO,\n        device_id: int | None = None,\n        **kwargs: Any,\n    ):\n        self._load_onnx_model(\n            model_dir=model_dir,\n            model_file=model_file,\n            threads=threads,\n            providers=providers,\n            cuda=cuda,\n            device_id=device_id,\n        )\n```\n\n资料来源：[fastembed/common/onnx_model.py:50-80]()\n\n### Execution Providers\n\nThe infrastructure supports multiple ONNX Runtime execution providers for hardware acceleration:\n\n| Provider | Priority | Use Case |\n|----------|----------|----------|\n| `CUDAExecutionProvider` | GPU acceleration | NVIDIA GPUs |\n| `CPUExecutionProvider` | Fallback | CPU inference |\n\nThe `Device` enum provides automatic device selection:\n\n```python\nclass Device(Enum):\n    CPU = \"cpu\"\n    CUDA = \"cuda\"\n    AUTO = \"auto\"\n```\n\n资料来源：[fastembed/common/types.py:1-50]()\n\n## Text Embedding Infrastructure\n\n### OnnxTextEmbedding\n\nThe `OnnxTextEmbedding` class is the primary implementation for text embedding generation. It inherits from both `TextEmbeddingBase` and `OnnxTextModel`, combining the ONNX runtime with embedding-specific logic.\n\n```python\nclass OnnxTextEmbedding(TextEmbeddingBase, OnnxTextModel[NumpyArray]):\n    \"\"\"Implementation of the Flag Embedding model.\"\"\"\n    \n    @classmethod\n    def _list_supported_models(cls) -> list[DenseModelDescription]:\n        return supported_onnx_models\n```\n\n资料来源：[fastembed/text/onnx_embedding.py:60-85]()\n\n#### Supported Models\n\nThe text embedding infrastructure includes a comprehensive list of supported models:\n\n| Model | Dimension | License | Size (GB) | Token Limit |\n|-------|-----------|---------|-----------|-------------|\n| `BAAI/bge-base-en` | 768 | mit | 0.42 | 512 |\n| `BAAI/bge-base-en-v1.5` | 768 | mit | 0.21 | 512 |\n| `BAAI/bge-large-en-v1.5` | 1024 | mit | 1.20 | 512 |\n| `BAAI/bge-small-en-v1.5` | 384 | mit | 0.067 | 512 |\n| `snowflake/snowflake-arctic-embed-m` | 768 | apache-2.0 | 0.43 | 512 |\n| `snowflake/snowflake-arctic-embed-m-long` | 768 | apache-2.0 | 0.54 | 2048 |\n| `jinaai/jina-clip-v1` | 768 | apache-2.0 | 0.55 | multimodal |\n| `mixedbread-ai/mxbai-embed-large-v1` | 1024 | apache-2.0 | 0.64 | 512 |\n\n资料来源：[fastembed/text/onnx_embedding.py:30-150]()\n\n### Pooled Embedding Variants\n\nThe infrastructure provides specialized pooling strategies through inheritance:\n\n#### PooledNormalizedEmbedding\n\nApplies mean pooling over token embeddings followed by L2 normalization:\n\n```python\nclass PooledNormalizedEmbedding(PooledEmbedding):\n    def _post_process_onnx_output(\n        self, output: OnnxOutputContext, **kwargs: Any\n    ) -> Iterable[NumpyArray]:\n        embeddings = output.model_output\n        attn_mask = output.attention_mask\n        return normalize(self.mean_pooling(embeddings, attn_mask))\n```\n\nSupported models for pooled normalized embeddings include:\n- `jinaai/jina-embeddings-v2-base-en` (768 dim, 8192 tokens)\n- `jinaai/jina-embeddings-v2-small-en` (512 dim, 8192 tokens)\n- `thenlper/gte-base` (768 dim, 512 tokens)\n- `thenlper/gte-large` (1024 dim, 512 tokens)\n\n资料来源：[fastembed/text/pooled_normalized_embedding.py:50-100]()\n\n#### PooledEmbedding\n\nStandard pooled embedding with mean pooling over token representations:\n\n```python\nclass PooledEmbedding(OnnxTextEmbedding):\n    @classmethod\n    def _get_worker_class(cls) -> Type[OnnxTextEmbeddingWorker]:\n        return PooledEmbeddingWorker\n```\n\nSupported multilingual and specialized models:\n- `sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2` (384 dim, ~50 languages)\n- `sentence-transformers/paraphrase-multilingual-mpnet-base-v2` (768 dim, ~50 languages)\n- `intfloat/multilingual-e5-large` (1024 dim, ~100 languages)\n\n资料来源：[fastembed/text/pooled_embedding.py:50-150]()\n\n## Image Embedding Infrastructure\n\n### OnnxImageEmbedding\n\nImage embedding models inherit from `OnnxImageModel` and `ImageEmbeddingBase`:\n\n```python\nclass OnnxImageEmbedding(ImageEmbeddingBase, OnnxImageModel[NumpyArray]):\n    def __init__(self, model_name: str, cache_dir: str | None = None, ...):\n        ...\n```\n\nSupported image models:\n\n| Model | Dimension | License | Size (GB) |\n|-------|-----------|---------|-----------|\n| `Qdrant/resnet50-onnx` | 2048 | apache-2.0 | - |\n| `Qdrant/Unicom-ViT-B-16` | 768 | apache-2.0 | 0.82 |\n| `Qdrant/Unicom-ViT-B-32` | 512 | apache-2.0 | 0.48 |\n| `jinaai/jina-clip-v1` | 768 | apache-2.0 | 0.34 |\n\n资料来源：[fastembed/image/onnx_embedding.py:30-80]()\n\n## Multimodal Embedding\n\n### CLIP Embeddings\n\nThe `CLIPOnnxEmbedding` class provides multimodal (text and image) embedding capabilities:\n\n```python\nclass CLIPOnnxEmbedding(OnnxTextEmbedding):\n    @classmethod\n    def _list_supported_models(cls) -> list[DenseModelDescription]:\n        return supported_clip_models\n    \n    def _post_process_onnx_output(\n        self, output: OnnxOutputContext, **kwargs: Any\n    ) -> Iterable[NumpyArray]:\n        return output.model_output\n```\n\nCurrently supported CLIP model:\n- `Qdrant/clip-ViT-B-32-text` (512 dim, 77 tokens)\n\n资料来源：[fastembed/text/clip_embedding.py:20-50]()\n\n### Late Interaction Multimodal\n\nThe `ColModernVbert` class implements late interaction models with image processing capabilities:\n\n```python\ndef load_onnx_model(self) -> None:\n    self._load_onnx_model(...)\n    \n    # Load image processing configuration\n    processor_config_path = self._model_dir / \"processor_config.json\"\n    self.image_seq_len = processor_config.get(\"image_seq_len\", 64)\n    self.max_image_size = preprocessor_config.get(\"max_image_size\", {}).get(\"longest_edge\", 512)\n```\n\n资料来源：[fastembed/late_interaction_multimodal/colmodernvbert.py:50-100]()\n\n## Sparse Embedding Infrastructure\n\n### MiniCOIL\n\nThe `MiniCOIL` class implements sparse embedding with semantic resolution:\n\n```python\nclass MiniCOIL(SparseTextEmbeddingBase, OnnxTextModel[SparseEmbedding]):\n    \"\"\"\n    MiniCOIL is a sparse embedding model, that resolves semantic meaning of the words,\n    while keeping exact keyword match behavior.\n    \"\"\"\n```\n\nEach vocabulary token is converted into a 4-dimensional component of a sparse vector, weighted by token frequency in the corpus. If a token is not found in the corpus, it is treated exactly like in BM25.\n\nSupported sparse models:\n- `Qdrant/minicoil-v1` (0.09 GB, requires IDF weighting)\n\n资料来源：[fastembed/sparse/minicoil.py:40-80]()\n\n## Worker Architecture\n\nThe infrastructure uses a worker-based pattern for parallel embedding generation:\n\n```mermaid\ngraph LR\n    A[Main Thread] --> B[OnnxTextEmbeddingWorker]\n    B --> C[ONNX Session]\n    C --> D[Tokenization]\n    D --> E[Model Inference]\n    E --> F[Post-processing]\n    F --> G[Normalized Embeddings]\n```\n\n### Worker Classes\n\n| Worker Class | Parent | Purpose |\n|--------------|--------|---------|\n| `OnnxTextEmbeddingWorker` | Base | Standard text embedding generation |\n| `PooledEmbeddingWorker` | `OnnxTextEmbeddingWorker` | Mean pooling after inference |\n| `PooledNormalizedEmbeddingWorker` | `OnnxTextEmbeddingWorker` | Pooling + L2 normalization |\n| `CLIPEmbeddingWorker` | `OnnxTextEmbeddingWorker` | CLIP-specific processing |\n\n资料来源：[fastembed/text/onnx_text_model.py:1-50]()\n\n## Reranking Infrastructure\n\n### OnnxTextCrossEncoder\n\nThe cross-encoder reranking uses a specialized ONNX model class:\n\n```python\nclass OnnxTextCrossEncoder(TextCrossEncoderBase, OnnxCrossEncoderModel):\n    @classmethod\n    def _list_supported_models(cls) -> list[BaseModelDescription]:\n        return supported_onnx_models\n```\n\nSupported reranker models:\n\n| Model | License | Size (GB) | Context |\n|-------|---------|-----------|---------|\n| `jinaai/jina-reranker-v1-turbo-en` | apache-2.0 | 0.15 | 1K context |\n| `jinaai/jina-reranker-v2-base-multilingual` | cc-by-nc-4.0 | 1.11 | 1K context, sliding window |\n\n资料来源：[fastembed/rerank/cross_encoder/onnx_text_cross_encoder.py:30-80]()\n\n## Model Source Configuration\n\n### ModelSource\n\nModels can be loaded from multiple sources:\n\n```python\n@dataclass\nclass ModelSource:\n    hf: str | None = None           # HuggingFace Hub\n    url: str | None = None         # Direct URL download\n    _deprecated_tar_struct: bool = False  # Legacy tar format\n```\n\n### ModelDescription\n\nThe base model description structure:\n\n```python\n@dataclass\nclass DenseModelDescription:\n    model: str\n    dim: int\n    description: str\n    license: str\n    size_in_GB: float\n    sources: ModelSource\n    model_file: str\n    additional_files: list[str] | None = None\n```\n\n资料来源：[fastembed/common/model_description.py:1-80]()\n\n## Inference Workflow\n\n```mermaid\nsequenceDiagram\n    participant User\n    participant EmbeddingClass\n    participant OnnxModel\n    participant ONNXRuntime\n    \n    User->>EmbeddingClass: embed(texts)\n    EmbeddingClass->>OnnxModel: preprocess(texts)\n    OnnxModel->>OnnxModel: tokenize()\n    OnnxModel->>ONNXRuntime: run(session)\n    ONNXRuntime-->>OnnxModel: model_output\n    OnnxModel->>EmbeddingClass: _post_process_onnx_output()\n    EmbeddingClass->>EmbeddingClass: normalize/pool()\n    EmbeddingClass-->>User: numpy arrays\n```\n\n## Configuration Parameters\n\n### Common Parameters\n\n| Parameter | Type | Default | Description |\n|-----------|------|---------|-------------|\n| `model_name` | `str` | model-specific | Name of the model to use |\n| `cache_dir` | `str \\| None` | `None` | Cache directory for model files |\n| `threads` | `int \\| None` | `None` | Number of threads for ONNX |\n| `providers` | `Sequence[OnnxProvider] \\| None` | `None` | Execution providers |\n| `cuda` | `bool \\| Device` | `Device.AUTO` | CUDA device selection |\n| `device_ids` | `list[int] \\| None` | `None` | Multiple GPU device IDs |\n| `lazy_load` | `bool` | `False` | Defer model loading |\n| `device_id` | `int \\| None` | `None` | Specific device ID |\n| `specific_model_path` | `str \\| None` | `None` | Custom model file path |\n\n资料来源：[fastembed/text/onnx_embedding.py:85-120]()\n\n## Lazy Loading\n\nThe infrastructure supports lazy loading for memory-efficient initialization:\n\n```python\ndef __init__(\n    self,\n    lazy_load: bool = False,\n    ...\n):\n    if not lazy_load:\n        self.load_onnx_model()\n```\n\nWhen `lazy_load=True`, the ONNX model is not loaded until the first inference call, reducing startup memory footprint.\n\n## Type System\n\n### Core Types\n\n| Type | Definition | Usage |\n|------|------------|-------|\n| `NumpyArray` | `np.ndarray[Any, np.dtype[Any]]` | Dense embedding arrays |\n| `SparseEmbedding` | Custom sparse representation | Sparse embedding vectors |\n| `OnnxProvider` | Execution provider type | CPU, CUDA providers |\n| `Device` | Enum | Device selection (CPU/CUDA/AUTO) |\n\n资料来源：[fastembed/common/types.py:1-100]()\n\n## Summary\n\nThe ONNX Model Infrastructure provides a robust, extensible foundation for embedding generation in FastEmbed. Key characteristics include:\n\n- **Unified Runtime**: Single ONNX execution layer across all embedding modalities\n- **Hardware Acceleration**: Support for CUDA and CPU execution providers\n- **Model Flexibility**: Dynamic model loading from HuggingFace, URLs, or local cache\n- **Extensible Architecture**: Clean inheritance hierarchy for adding new embedding types\n- **Memory Efficiency**: Lazy loading and optimized session management\n- **Cross-Modal Support**: Text, image, sparse, and multimodal embeddings\n\nThis infrastructure enables FastEmbed to deliver high-performance embedding generation without external ML framework dependencies, making it suitable for production deployments with varying hardware constraints.\n\n---\n\n<a id='page-gpu-support'></a>\n\n## GPU Support and Acceleration\n\n### 相关页面\n\n相关主题：[Installation Guide](#page-installation), [ONNX Model Infrastructure](#page-onnx-model)\n\n<details>\n<summary>相关源码文件</summary>\n\n以下源码文件用于生成本页说明：\n\n- [fastembed/text/onnx_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/onnx_embedding.py)\n- [fastembed/image/onnx_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/image/onnx_embedding.py)\n- [fastembed/text/pooled_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/pooled_embedding.py)\n- [fastembed/text/pooled_normalized_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/pooled_normalized_embedding.py)\n- [fastembed/rerank/cross_encoder/onnx_text_cross_encoder.py](https://github.com/qdrant/fastembed/blob/main/fastembed/rerank/cross_encoder/onnx_text_cross_encoder.py)\n- [fastembed/sparse/splade_pp.py](https://github.com/qdrant/fastembed/blob/main/fastembed/sparse/splade_pp.py)\n- [fastembed/text/text_embedding.py](https://github.com/qdrant/fastembed/blob/main/fastembed/text/text_embedding.py)\n- [README.md](https://github.com/qdrant/fastembed/blob/main/README.md)\n</details>\n\n# GPU Support and Acceleration\n\nFastEmbed provides comprehensive GPU acceleration support through ONNX Runtime's execution providers, enabling high-performance inference on NVIDIA GPUs. The library offers flexible device management with automatic detection, multi-GPU support for parallel processing, and lazy loading capabilities for efficient resource utilization.\n\n## Architecture Overview\n\nFastEmbed's GPU acceleration is built on top of ONNX Runtime, which provides hardware-accelerated inference for ONNX models. All embedding model classes inherit from base ONNX model classes that handle device initialization and session management.\n\n```mermaid\ngraph TD\n    A[User Code] --> B[TextEmbedding / ImageEmbedding / CrossEncoder]\n    B --> C[ONNX Model Base Classes]\n    C --> D[ONNX Runtime Session]\n    D --> E{Hardware Acceleration}\n    E --> F[CUDA Execution Provider]\n    E --> G[CPU Execution Provider]\n    E --> H[TensorRT Provider]\n    \n    F --> H1[NVIDIA GPU]\n    G --> H2[CPU Fallback]\n    \n    style F fill:#4CAF50,color:#fff\n    style H1 fill:#2196F3,color:#fff\n```\n\n## Supported Model Types\n\nFastEmbed supports GPU acceleration across multiple embedding modalities and processing types.\n\n| Model Type | Class | GPU Support | Description |\n|------------|-------|-------------|-------------|\n| Text Embeddings | `OnnxTextEmbedding` | ✅ | Dense text embeddings via ONNX |\n| Pooled Embeddings | `PooledEmbedding` | ✅ | Pooled representation embeddings |\n| Normalized Pooled | `PooledNormalizedEmbedding` | ✅ | L2-normalized pooled embeddings |\n| Image Embeddings | `OnnxImageEmbedding` | ✅ | Vision model embeddings |\n| Cross Encoders | `OnnxTextCrossEncoder` | ✅ | Reranking and relevance scoring |\n| Sparse Embeddings | SPLADE models | ✅ | Lexical sparse embeddings |\n\n资料来源：[fastembed/text/onnx_embedding.py:1-50]()\n\n## Device Configuration\n\n### Device Enum\n\nThe `Device` enum defines available compute devices with automatic selection capability.\n\n```python\nclass Device(Enum):\n    AUTO = \"auto\"  # Automatically select best available device\n    CPU = \"cpu\"    # Force CPU execution\n    CUDA = \"cuda\"  # NVIDIA GPU acceleration\n```\n\n### Initialization Parameters\n\nAll ONNX embedding classes accept the following GPU-related parameters:\n\n| Parameter | Type | Default | Description |\n|-----------|------|---------|-------------|\n| `cuda` | `bool \\| Device` | `Device.AUTO` | Enable CUDA or specify device type |\n| `providers` | `Sequence[OnnxProvider]` | `None` | ONNX Runtime providers (mutually exclusive with cuda) |\n| `device_ids` | `list[int]` | `None` | GPU device IDs for multi-GPU data parallelism |\n| `device_id` | `int` | `None` | Specific device ID for single-process loading |\n| `lazy_load` | `bool` | `False` | Defer model loading until first use |\n\n资料来源：[fastembed/text/onnx_embedding.py:47-57]()\n\n### Constructor Signature\n\n```python\ndef __init__(\n    self,\n    model_name: str = \"BAAI/bge-small-en-v1.5\",\n    cache_dir: str | None = None,\n    threads: int | None = None,\n    providers: Sequence[OnnxProvider] | None = None,\n    cuda: bool | Device = Device.AUTO,\n    device_ids: list[int] | None = None,\n    lazy_load: bool = False,\n    device_id: int | None = None,\n    specific_model_path: str | None = None,\n    **kwargs: Any,\n):\n```\n\n资料来源：[fastembed/text/onnx_embedding.py:44-66]()\n\n## GPU Initialization Workflow\n\n```mermaid\nsequenceDiagram\n    participant User\n    participant Embedding as Embedding Class\n    participant Base as ONNX Model Base\n    participant Runtime as ONNX Runtime\n    participant Device as Compute Device\n    \n    User->>Embedding: Initialize(cuda=True)\n    Embedding->>Base: super().__init__()\n    Base->>Device: Auto-detect device\n    Device-->>Base: Available devices\n    Base->>Runtime: Create InferenceSession\n    Runtime->>Device: Load model to GPU\n    Device-->>Runtime: Model loaded\n    Runtime-->>Base: Session ready\n    Base-->>Embedding: Return session\n    Embedding-->>User: Instance ready\n```\n\n## Multi-GPU Configuration\n\n### Data Parallel Processing\n\nFor scenarios requiring distribution across multiple GPUs, FastEmbed supports device ID specification for data-parallel workloads.\n\n```python\nfrom fastembed import TextEmbedding\n\n# Initialize for multi-GPU data parallelism\nembedding_model = TextEmbedding(\n    model_name=\"BAAI/bge-base-en-v1.5\",\n    cuda=True,\n    device_ids=[0, 1, 2, 3],  # Use 4 GPUs\n    lazy_load=True  # Required for multi-GPU setup\n)\n```\n\n资料来源：[fastembed/text/onnx_embedding.py:52-55]()\n\n### Lazy Loading for Multi-GPU\n\nWhen using multiple GPUs, `lazy_load=True` defers model loading until first inference, which is essential for avoiding resource conflicts in multi-process scenarios.\n\n```python\nembedding_model = TextEmbedding(\n    model_name=\"BAAI/bge-small-en-v1.5\",\n    cuda=True,\n    device_ids=[0, 1],\n    lazy_load=True  # Load on-demand in worker processes\n)\n```\n\n资料来源：[fastembed/text/onnx_embedding.py:54]()\n\n## ONNX Runtime Providers\n\n### Provider Selection\n\nONNX Runtime supports multiple execution providers. FastEmbed allows explicit provider specification via the `providers` parameter, which is mutually exclusive with the `cuda` parameter.\n\n```python\nfrom fastembed import TextEmbedding\n\n# Using explicit provider specification\nmodel = TextEmbedding(\n    model_name=\"BAAI/bge-small-en-v1.5\",\n    providers=[\"CUDAExecutionProvider\", \"CPUExecutionProvider\"]\n)\n```\n\n### Provider Priority\n\nWhen multiple providers are specified, ONNX Runtime attempts to use them in order of preference, falling back to subsequent providers if the preferred one is unavailable.\n\n```mermaid\ngraph LR\n    A[Query] --> B{CUDA Available?}\n    B -->|Yes| C[CUDAExecutionProvider]\n    B -->|No| D{CPU Provider Available?}\n    D -->|Yes| E[CPUExecutionProvider]\n    D -->|No| F[Error]\n    \n    C --> G[GPU Inference]\n    E --> H[CPU Inference]\n    \n    style C fill:#4CAF50,color:#fff\n    style E fill:#FF9800,color:#fff\n```\n\n## GPU Installation\n\n### Package Variants\n\nFastEmbed offers separate packages for CPU and GPU operation.\n\n| Package | Command | Use Case |\n|---------|---------|----------|\n| CPU (default) | `pip install fastembed` | Standard installations |\n| GPU | `pip install fastembed-gpu` | NVIDIA GPU acceleration |\n\n资料来源：[README.md:1-20]()\n\n### Qdrant Integration\n\nFor vector database workflows with GPU acceleration:\n\n```bash\npip install qdrant-client[fastembed-gpu]\n```\n\n```python\nfrom fastembed import TextEmbedding\n\n# GPU-accelerated embedding for Qdrant\nmodel = TextEmbedding(\n    model_name=\"BAAI/bge-small-en-v1.5\",\n    providers=[\"CUDAExecutionProvider\"]\n)\nprint(\"The model BAAI/bge-small-en-v1.5 is ready to use on a GPU.\")\n```\n\n资料来源：[README.md:1-30]()\n\n## Implementation Across Model Classes\n\n### OnnxTextEmbedding\n\nThe primary text embedding class with full GPU support:\n\n```python\nclass OnnxTextEmbedding(TextEmbeddingBase, OnnxTextModel[NumpyArray]):\n    \"\"\"Implementation of the Flag Embedding model with ONNX acceleration.\"\"\"\n    \n    def __init__(\n        self,\n        model_name: str = \"BAAI/bge-small-en-v1.5\",\n        cache_dir: str | None = None,\n        threads: int | None = None,\n        providers: Sequence[OnnxProvider] | None = None,\n        cuda: bool | Device = Device.AUTO,\n        device_ids: list[int] | None = None,\n        lazy_load: bool = False,\n        device_id: int | None = None,\n        specific_model_path: str | None = None,\n        **kwargs: Any,\n    ):\n```\n\n资料来源：[fastembed/text/onnx_embedding.py:48-70]()\n\n### OnnxImageEmbedding\n\nImage embeddings also inherit the same GPU acceleration framework:\n\n```python\nclass OnnxImageEmbedding(ImageEmbeddingBase, OnnxImageModel[NumpyArray]):\n    def __init__(\n        self,\n        model_name: str,\n        cache_dir: str | None = None,\n        threads: int | None = None,\n        providers: Sequence[OnnxProvider] | None = None,\n        cuda: bool | Device = Device.AUTO,\n        device_ids: list[int] | None = None,\n        lazy_load: bool = False,\n        device_id: int | None = None,\n        specific_model_path: str | None = None,\n        **kwargs: Any,\n    ):\n```\n\n资料来源：[fastembed/image/onnx_embedding.py:1-30]()\n\n### OnnxTextCrossEncoder\n\nReranking models support GPU acceleration for cross-encoder inference:\n\n```python\nclass OnnxTextCrossEncoder(TextCrossEncoderBase, OnnxCrossEncoderModel):\n    def __init__(\n        self,\n        model_name: str,\n        cache_dir: str | None = None,\n        threads: int | None = None,\n        providers: Sequence[OnnxProvider] | None = None,\n        cuda: bool | Device = Device.AUTO,\n        device_ids: list[int] | None = None,\n        lazy_load: bool = False,\n        device_id: int | None = None,\n        specific_model_path: str | None = None,\n        **kwargs: Any,\n    ):\n```\n\n资料来源：[fastembed/rerank/cross_encoder/onnx_text_cross_encoder.py:1-50]()\n\n## Unified TextEmbedding Entry Point\n\nThe `TextEmbedding` class provides a unified interface that automatically selects the appropriate embedding type:\n\n```python\nclass TextEmbedding:\n    def __init__(\n        self,\n        model_name: str = \"BAAI/bge-small-en-v1.5\",\n        cache_dir: str | None = None,\n        threads: int | None = None,\n        providers: Sequence[OnnxProvider] | None = None,\n        cuda: bool | Device = Device.AUTO,\n        device_ids: list[int] | None = None,\n        lazy_load: bool = False,\n        **kwargs: Any,\n    ):\n        super().__init__(model_name, cache_dir, threads, **kwargs)\n        # Automatically routes to appropriate embedding type\n        for EMBEDDING_MODEL_TYPE in self.EMBEDDINGS_REGISTRY:\n            supported_models = EMBEDDING_MODEL_TYPE._list_supported_models()\n            if any(model_name.lower() == model.model.lower() \n                   for model in supported_models):\n                self.model = EMBEDDING_MODEL_TYPE(\n                    model_name=model_name,\n                    cache_dir=cache_dir,\n                    threads=threads,\n                    providers=providers,\n                    cuda=cuda,\n                    device_ids=device_ids,\n                    lazy_load=lazy_load,\n                )\n```\n\n资料来源：[fastembed/text/text_embedding.py:1-100]()\n\n## Supported Models with GPU Acceleration\n\n### Text Embedding Models\n\n| Model | Dimension | License | Size (GB) | Token Limit |\n|-------|-----------|---------|-----------|-------------|\n| `BAAI/bge-small-en-v1.5` | 384 | MIT | 0.067 | 512 |\n| `BAAI/bge-base-en-v1.5` | 768 | MIT | 0.21 | 512 |\n| `BAAI/bge-large-en-v1.5` | 1024 | MIT | 1.20 | 512 |\n| `jinaai/jina-embeddings-v2-base-en` | 768 | Apache 2.0 | 0.52 | 8192 |\n| `sentence-transformers/all-MiniLM-L6-v2` | 384 | Apache 2.0 | 0.09 | 256 |\n| `mixedbread-ai/mxbai-embed-large-v1` | 1024 | Apache 2.0 | 0.64 | 512 |\n| `nomic-ai/nomic-embed-text-v1.5` | 768 | Apache 2.0 | 0.13 | 8192 |\n\n资料来源：[fastembed/text/onnx_embedding.py:1-150](), [fastembed/text/pooled_embedding.py:1-80]()\n\n### Image Embedding Models\n\n| Model | Dimension | License | Size (GB) |\n|-------|-----------|---------|-----------|\n| `Qdrant/Unicom-ViT-B-16` | 768 | Apache 2.0 | 0.82 |\n| `Qdrant/Unicom-ViT-B-32` | 512 | Apache 2.0 | 0.48 |\n| `jinaai/jina-clip-v1` | 768 | Apache 2.0 | 0.55 |\n\n资料来源：[fastembed/image/onnx_embedding.py:1-50]()\n\n### Reranking Models\n\n| Model | License | Size (GB) |\n|-------|---------|-----------|\n| `jinaai/jina-reranker-v1-turbo-en` | Apache 2.0 | 0.15 |\n| `jinaai/jina-reranker-v2-base-multilingual` | CC BY-NC 4.0 | 1.11 |\n\n资料来源：[fastembed/rerank/cross_encoder/onnx_text_cross_encoder.py:1-40]()\n\n## Best Practices\n\n### Device Selection\n\n```python\nfrom fastembed.common.types import Device\n\n# Recommended: Automatic detection\nmodel = TextEmbedding(model_name=\"BAAI/bge-small-en-v1.5\", cuda=Device.AUTO)\n\n# Explicit CUDA\nmodel = TextEmbedding(model_name=\"BAAI/bge-small-en-v1.5\", cuda=True)\n\n# Force CPU (for debugging)\nmodel = TextEmbedding(model_name=\"BAAI/bge-small-en-v1.5\", cuda=False)\n```\n\n### Multi-GPU Inference\n\n```python\n# For batch processing across multiple GPUs\nmodel = TextEmbedding(\n    model_name=\"BAAI/bge-base-en-v1.5\",\n    cuda=True,\n    device_ids=[0, 1],  # Parallel GPU usage\n    lazy_load=True\n)\n```\n\n### Provider Fallback\n\n```python\n# Explicit provider chain with fallback\nmodel = TextEmbedding(\n    model_name=\"BAAI/bge-small-en-v1.5\",\n    providers=[\n        \"CUDAExecutionProvider\",  # Preferred\n        \"CPUExecutionProvider\"    # Fallback\n    ]\n)\n```\n\n## Limitations and Considerations\n\n| Aspect | Description |\n|--------|-------------|\n| Mutual Exclusivity | `providers` and `cuda` parameters cannot be used together |\n| Device ID Scope | `device_ids` is for data parallelism; `device_id` is for single-process loading |\n| Lazy Loading | Required for multi-GPU setups to avoid resource conflicts |\n| Model Support | All ONNX-exported models support GPU; not all models have ONNX exports |\n\n资料来源：[fastembed/text/onnx_embedding.py:47-57]()\n\n## Summary\n\nFastEmbed's GPU acceleration framework provides:\n\n1. **Automatic device detection** via the `Device.AUTO` enum value\n2. **Flexible provider configuration** through ONNX Runtime's provider system\n3. **Multi-GPU support** with device ID lists for data-parallel workloads\n4. **Lazy loading** for efficient multi-process GPU utilization\n5. **Consistent API** across text, image, and reranking models\n6. **Seamless fallback** to CPU when CUDA is unavailable\n\n---\n\n---\n\n## Doramagic Pitfall Log\n\nProject: qdrant/fastembed\n\nSummary: Found 29 potential pitfall items; 3 are high/blocking. Highest priority: installation - 来源证据：[Bug]: Segmentation Fault or AssertionError during initialization on Python 3.14.2.\n\n## 1. installation · 来源证据：[Bug]: Segmentation Fault or AssertionError during initialization on Python 3.14.2\n\n- Severity: high\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安装相关的待验证问题：[Bug]: Segmentation Fault or AssertionError during initialization on Python 3.14.2\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_201d4035515846df8830ca0dad6960c5 | https://github.com/qdrant/fastembed/issues/618 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 2. installation · 来源证据：[Bug]: Unable to load 'Qdrant/bm25' on macOS python3.14\n\n- Severity: high\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安装相关的待验证问题：[Bug]: Unable to load 'Qdrant/bm25' on macOS python3.14\n- User impact: 可能阻塞安装或首次运行。\n- Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_8147621574b345d7955e79ad98f4ba6f | https://github.com/qdrant/fastembed/issues/630 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 3. security_permissions · 失败模式：security_permissions: [Bug]: Tar path traversal (Zip Slip) in decompress_to_cache — arbitrary file write outside ca...\n\n- Severity: high\n- Evidence strength: source_linked\n- Finding: Developers should check this security_permissions risk before relying on the project: [Bug]: Tar path traversal (Zip Slip) in decompress_to_cache — arbitrary file write outside cache directory\n- User impact: Developers may expose sensitive permissions or credentials: [Bug]: Tar path traversal (Zip Slip) in decompress_to_cache — arbitrary file write outside cache directory\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: [Bug]: Tar path traversal (Zip Slip) in decompress_to_cache — arbitrary file write outside cache directory. Context: Observed when using python\n- Guardrail action: Do not recommend enabling privileged or credential-bearing paths until the source-backed risk is reviewed: https://github.com/qdrant/fastembed/issues/626\n- Evidence: failure_mode_cluster:github_issue | fmev_d3890c2b3360ccb937839f70fd4aa584 | https://github.com/qdrant/fastembed/issues/626 | [Bug]: Tar path traversal (Zip Slip) in decompress_to_cache — arbitrary file write outside cache directory\n\n## 4. installation · 失败模式：installation: The dependency `py-rust-stemmers` cannot be downloaded in a pure Python environment.\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this installation risk before relying on the project: The dependency `py-rust-stemmers` cannot be downloaded in a pure Python environment.\n- User impact: Developers may fail before the first successful local run: The dependency `py-rust-stemmers` cannot be downloaded in a pure Python environment.\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: The dependency `py-rust-stemmers` cannot be downloaded in a pure Python environment.. Context: Observed when using python, docker\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_issue | fmev_16e50a8626aff1576adeb1c0baab4785 | https://github.com/qdrant/fastembed/issues/466 | The dependency `py-rust-stemmers` cannot be downloaded in a pure Python environment.\n\n## 5. installation · 失败模式：installation: [Bug]: Segmentation Fault or AssertionError during initialization on Python 3.14.2\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this installation risk before relying on the project: [Bug]: Segmentation Fault or AssertionError during initialization on Python 3.14.2\n- User impact: Developers may fail before the first successful local run: [Bug]: Segmentation Fault or AssertionError during initialization on Python 3.14.2\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: [Bug]: Segmentation Fault or AssertionError during initialization on Python 3.14.2. Context: Observed when using python, windows, linux\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_issue | fmev_04529bc774f1c961d4adeb7190edecd7 | https://github.com/qdrant/fastembed/issues/618 | [Bug]: Segmentation Fault or AssertionError during initialization on Python 3.14.2\n\n## 6. installation · 失败模式：installation: [Bug]: Unable to load 'Qdrant/bm25' on macOS python3.14\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this installation risk before relying on the project: [Bug]: Unable to load 'Qdrant/bm25' on macOS python3.14\n- User impact: Developers may fail before the first successful local run: [Bug]: Unable to load 'Qdrant/bm25' on macOS python3.14\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: [Bug]: Unable to load 'Qdrant/bm25' on macOS python3.14. Context: Observed when using python, macos, cuda\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_issue | fmev_79a43347d96beb6d05eb6bfec2503fb5 | https://github.com/qdrant/fastembed/issues/630 | [Bug]: Unable to load 'Qdrant/bm25' on macOS python3.14\n\n## 7. installation · 失败模式：installation: v0.5.1\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this installation risk before relying on the project: v0.5.1\n- User impact: Upgrade or migration may change expected behavior: v0.5.1\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: v0.5.1. Context: Observed when using python\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_release | fmev_8b37c58c613005c0182d0325aaf032f7 | https://github.com/qdrant/fastembed/releases/tag/v0.5.1 | v0.5.1\n\n## 8. installation · 来源证据：The dependency `py-rust-stemmers` cannot be downloaded in a pure Python environment.\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安装相关的待验证问题：The dependency `py-rust-stemmers` cannot be downloaded in a pure Python environment.\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_0f507b37e33e456ea259e82966cecdc5 | https://github.com/qdrant/fastembed/issues/466 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 9. configuration · 来源证据：[Bug]: No timeout on model download — requests.get() can hang indefinitely\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个配置相关的待验证问题：[Bug]: No timeout on model download — requests.get() can hang indefinitely\n- User impact: 可能阻塞安装或首次运行。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_6d570ba91cfd414f970a3a8da522be04 | https://github.com/qdrant/fastembed/issues/627 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 10. capability · 能力判断依赖假设\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: README/documentation is current enough for a first validation pass.\n- User impact: 假设不成立时，用户拿不到承诺的能力。\n- Suggested check: 将假设转成下游验证清单。\n- Guardrail action: 假设必须转成验证项；没有验证结果前不能写成事实。\n- Evidence: capability.assumptions | github_repo:666260877 | https://github.com/qdrant/fastembed | README/documentation is current enough for a first validation pass.\n\n## 11. runtime · 失败模式：runtime: [Bug]: Loading models with additional files fails with onnxruntime 1.24.1\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this runtime risk before relying on the project: [Bug]: Loading models with additional files fails with onnxruntime 1.24.1\n- User impact: Developers may hit a documented source-backed failure mode: [Bug]: Loading models with additional files fails with onnxruntime 1.24.1\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: [Bug]: Loading models with additional files fails with onnxruntime 1.24.1. Context: Observed when using python, linux\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_issue | fmev_17b849ae47ffaf5d18cabbd577f373ca | https://github.com/qdrant/fastembed/issues/603 | [Bug]: Loading models with additional files fails with onnxruntime 1.24.1\n\n## 12. runtime · 失败模式：runtime: v0.4.2\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this runtime risk before relying on the project: v0.4.2\n- User impact: Upgrade or migration may change expected behavior: v0.4.2\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: v0.4.2. Context: Source discussion did not expose a precise runtime context.\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_release | fmev_607c8ff157108b2b5fb78f55129b60f6 | https://github.com/qdrant/fastembed/releases/tag/v0.4.2 | v0.4.2\n\n## 13. runtime · 失败模式：runtime: v0.7.1\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this runtime risk before relying on the project: v0.7.1\n- User impact: Upgrade or migration may change expected behavior: v0.7.1\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: v0.7.1. Context: Source discussion did not expose a precise runtime context.\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_release | fmev_07583dafa20e640a1720d7a77188475e | https://github.com/qdrant/fastembed/releases/tag/v0.7.1 | v0.7.1\n\n## 14. runtime · 失败模式：runtime: v0.8.0\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this runtime risk before relying on the project: v0.8.0\n- User impact: Upgrade or migration may change expected behavior: v0.8.0\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: v0.8.0. Context: Observed when using python, cuda\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_release | fmev_054bae0c9c1667cf5568de7ecb7087c9 | https://github.com/qdrant/fastembed/releases/tag/v0.8.0 | v0.8.0\n\n## 15. maintenance · 来源证据：[Bug]: license error in pypi metadata\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个维护/版本相关的待验证问题：[Bug]: license error in pypi metadata\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_017eda744ea84e679aeb5d77d41f7541 | https://github.com/qdrant/fastembed/issues/620 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 16. maintenance · 维护活跃度未知\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: 未记录 last_activity_observed。\n- User impact: 新项目、停更项目和活跃项目会被混在一起，推荐信任度下降。\n- Suggested check: 补 GitHub 最近 commit、release、issue/PR 响应信号。\n- Guardrail action: 维护活跃度未知时，推荐强度不能标为高信任。\n- Evidence: evidence.maintainer_signals | github_repo:666260877 | https://github.com/qdrant/fastembed | last_activity_observed missing\n\n## 17. security_permissions · 下游验证发现风险项\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: no_demo\n- User impact: 下游已经要求复核，不能在页面中弱化。\n- Suggested check: 进入安全/权限治理复核队列。\n- Guardrail action: 下游风险存在时必须保持 review/recommendation 降级。\n- Evidence: downstream_validation.risk_items | github_repo:666260877 | https://github.com/qdrant/fastembed | no_demo; severity=medium\n\n## 18. security_permissions · 存在评分风险\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: no_demo\n- User impact: 风险会影响是否适合普通用户安装。\n- Suggested check: 把风险写入边界卡，并确认是否需要人工复核。\n- Guardrail action: 评分风险必须进入边界卡，不能只作为内部分数。\n- Evidence: risks.scoring_risks | github_repo:666260877 | https://github.com/qdrant/fastembed | no_demo; severity=medium\n\n## 19. security_permissions · 来源证据：[Bug]: Loading models with additional files fails with onnxruntime 1.24.1\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：[Bug]: Loading models with additional files fails with onnxruntime 1.24.1\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_cfcd79475b1344a0bba508ac4346edf4 | https://github.com/qdrant/fastembed/issues/603 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 20. security_permissions · 来源证据：[Bug]: Tar path traversal (Zip Slip) in decompress_to_cache — arbitrary file write outside cache directory\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：[Bug]: Tar path traversal (Zip Slip) in decompress_to_cache — arbitrary file write outside cache directory\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_7603e6da390349c9aff6eed6cc5072a9 | https://github.com/qdrant/fastembed/issues/626 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 21. capability · 失败模式：capability: [Bug]: license error in pypi metadata\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: Developers should check this capability risk before relying on the project: [Bug]: license error in pypi metadata\n- User impact: Developers may hit a documented source-backed failure mode: [Bug]: license error in pypi metadata\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: [Bug]: license error in pypi metadata. Context: Observed when using python\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_issue | fmev_9e209e0c78e000b92930bccb67db1440 | https://github.com/qdrant/fastembed/issues/620 | [Bug]: license error in pypi metadata\n\n## 22. runtime · 失败模式：performance: [Bug]: No timeout on model download — requests.get() can hang indefinitely\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: Developers should check this performance risk before relying on the project: [Bug]: No timeout on model download — requests.get() can hang indefinitely\n- User impact: Developers may hit a documented source-backed failure mode: [Bug]: No timeout on model download — requests.get() can hang indefinitely\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: [Bug]: No timeout on model download — requests.get() can hang indefinitely. Context: Observed when using python, docker\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_issue | fmev_69d0e997d6514a5d87d46c30a69795b3 | https://github.com/qdrant/fastembed/issues/627 | [Bug]: No timeout on model download — requests.get() can hang indefinitely\n\n## 23. runtime · 失败模式：performance: v0.7.4\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: Developers should check this performance risk before relying on the project: v0.7.4\n- User impact: Upgrade or migration may change expected behavior: v0.7.4\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: v0.7.4. Context: Source discussion did not expose a precise runtime context.\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_release | fmev_df69e175afaf53788c9f212e22680b87 | https://github.com/qdrant/fastembed/releases/tag/v0.7.4 | v0.7.4\n\n## 24. maintenance · issue/PR 响应质量未知\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: issue_or_pr_quality=unknown。\n- User impact: 用户无法判断遇到问题后是否有人维护。\n- Suggested check: 抽样最近 issue/PR，判断是否长期无人处理。\n- Guardrail action: issue/PR 响应未知时，必须提示维护风险。\n- Evidence: evidence.maintainer_signals | github_repo:666260877 | https://github.com/qdrant/fastembed | issue_or_pr_quality=unknown\n\n## 25. maintenance · 发布节奏不明确\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: release_recency=unknown。\n- User impact: 安装命令和文档可能落后于代码，用户踩坑概率升高。\n- Suggested check: 确认最近 release/tag 和 README 安装命令是否一致。\n- Guardrail action: 发布节奏未知或过期时，安装说明必须标注可能漂移。\n- Evidence: evidence.maintainer_signals | github_repo:666260877 | https://github.com/qdrant/fastembed | release_recency=unknown\n\n## 26. maintenance · 失败模式：maintenance: v0.6.0\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: Developers should check this maintenance risk before relying on the project: v0.6.0\n- User impact: Upgrade or migration may change expected behavior: v0.6.0\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: v0.6.0. Context: Observed when using python\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_release | fmev_abc84a0766f60148974f2e932e8dc4f4 | https://github.com/qdrant/fastembed/releases/tag/v0.6.0 | v0.6.0\n\n## 27. maintenance · 失败模式：maintenance: v0.6.1\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: Developers should check this maintenance risk before relying on the project: v0.6.1\n- User impact: Upgrade or migration may change expected behavior: v0.6.1\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: v0.6.1. Context: Source discussion did not expose a precise runtime context.\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_release | fmev_e909fbcb26efacd532e8194ede2ff4f0 | https://github.com/qdrant/fastembed/releases/tag/v0.6.1 | v0.6.1\n\n## 28. maintenance · 失败模式：maintenance: v0.7.0\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: Developers should check this maintenance risk before relying on the project: v0.7.0\n- User impact: Upgrade or migration may change expected behavior: v0.7.0\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: v0.7.0. Context: Source discussion did not expose a precise runtime context.\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_release | fmev_0ccd78cc792f8d6a79212fc6cfa512e4 | https://github.com/qdrant/fastembed/releases/tag/v0.7.0 | v0.7.0\n\n## 29. maintenance · 失败模式：maintenance: v0.7.2\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: Developers should check this maintenance risk before relying on the project: v0.7.2\n- User impact: Upgrade or migration may change expected behavior: v0.7.2\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: v0.7.2. Context: Source discussion did not expose a precise runtime context.\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_release | fmev_28a8ed6e38ab5a351c6a64532998a7df | https://github.com/qdrant/fastembed/releases/tag/v0.7.2 | v0.7.2\n\n<!-- canonical_name: qdrant/fastembed; human_manual_source: deepwiki_human_wiki -->\n",
      "summary": "DeepWiki/Human Wiki output with a Doramagic pitfall appendix.",
      "title": "Human Manual"
    },
    "pitfall_log": {
      "asset_id": "pitfall_log",
      "filename": "PITFALL_LOG.md",
      "markdown": "# Pitfall Log\n\nProject: qdrant/fastembed\n\nSummary: Found 29 potential pitfall items; 3 are high/blocking. Highest priority: installation - 来源证据：[Bug]: Segmentation Fault or AssertionError during initialization on Python 3.14.2.\n\n## 1. installation · 来源证据：[Bug]: Segmentation Fault or AssertionError during initialization on Python 3.14.2\n\n- Severity: high\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安装相关的待验证问题：[Bug]: Segmentation Fault or AssertionError during initialization on Python 3.14.2\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_201d4035515846df8830ca0dad6960c5 | https://github.com/qdrant/fastembed/issues/618 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 2. installation · 来源证据：[Bug]: Unable to load 'Qdrant/bm25' on macOS python3.14\n\n- Severity: high\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安装相关的待验证问题：[Bug]: Unable to load 'Qdrant/bm25' on macOS python3.14\n- User impact: 可能阻塞安装或首次运行。\n- Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_8147621574b345d7955e79ad98f4ba6f | https://github.com/qdrant/fastembed/issues/630 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 3. security_permissions · 失败模式：security_permissions: [Bug]: Tar path traversal (Zip Slip) in decompress_to_cache — arbitrary file write outside ca...\n\n- Severity: high\n- Evidence strength: source_linked\n- Finding: Developers should check this security_permissions risk before relying on the project: [Bug]: Tar path traversal (Zip Slip) in decompress_to_cache — arbitrary file write outside cache directory\n- User impact: Developers may expose sensitive permissions or credentials: [Bug]: Tar path traversal (Zip Slip) in decompress_to_cache — arbitrary file write outside cache directory\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: [Bug]: Tar path traversal (Zip Slip) in decompress_to_cache — arbitrary file write outside cache directory. Context: Observed when using python\n- Guardrail action: Do not recommend enabling privileged or credential-bearing paths until the source-backed risk is reviewed: https://github.com/qdrant/fastembed/issues/626\n- Evidence: failure_mode_cluster:github_issue | fmev_d3890c2b3360ccb937839f70fd4aa584 | https://github.com/qdrant/fastembed/issues/626 | [Bug]: Tar path traversal (Zip Slip) in decompress_to_cache — arbitrary file write outside cache directory\n\n## 4. installation · 失败模式：installation: The dependency `py-rust-stemmers` cannot be downloaded in a pure Python environment.\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this installation risk before relying on the project: The dependency `py-rust-stemmers` cannot be downloaded in a pure Python environment.\n- User impact: Developers may fail before the first successful local run: The dependency `py-rust-stemmers` cannot be downloaded in a pure Python environment.\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: The dependency `py-rust-stemmers` cannot be downloaded in a pure Python environment.. Context: Observed when using python, docker\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_issue | fmev_16e50a8626aff1576adeb1c0baab4785 | https://github.com/qdrant/fastembed/issues/466 | The dependency `py-rust-stemmers` cannot be downloaded in a pure Python environment.\n\n## 5. installation · 失败模式：installation: [Bug]: Segmentation Fault or AssertionError during initialization on Python 3.14.2\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this installation risk before relying on the project: [Bug]: Segmentation Fault or AssertionError during initialization on Python 3.14.2\n- User impact: Developers may fail before the first successful local run: [Bug]: Segmentation Fault or AssertionError during initialization on Python 3.14.2\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: [Bug]: Segmentation Fault or AssertionError during initialization on Python 3.14.2. Context: Observed when using python, windows, linux\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_issue | fmev_04529bc774f1c961d4adeb7190edecd7 | https://github.com/qdrant/fastembed/issues/618 | [Bug]: Segmentation Fault or AssertionError during initialization on Python 3.14.2\n\n## 6. installation · 失败模式：installation: [Bug]: Unable to load 'Qdrant/bm25' on macOS python3.14\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this installation risk before relying on the project: [Bug]: Unable to load 'Qdrant/bm25' on macOS python3.14\n- User impact: Developers may fail before the first successful local run: [Bug]: Unable to load 'Qdrant/bm25' on macOS python3.14\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: [Bug]: Unable to load 'Qdrant/bm25' on macOS python3.14. Context: Observed when using python, macos, cuda\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_issue | fmev_79a43347d96beb6d05eb6bfec2503fb5 | https://github.com/qdrant/fastembed/issues/630 | [Bug]: Unable to load 'Qdrant/bm25' on macOS python3.14\n\n## 7. installation · 失败模式：installation: v0.5.1\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this installation risk before relying on the project: v0.5.1\n- User impact: Upgrade or migration may change expected behavior: v0.5.1\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: v0.5.1. Context: Observed when using python\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_release | fmev_8b37c58c613005c0182d0325aaf032f7 | https://github.com/qdrant/fastembed/releases/tag/v0.5.1 | v0.5.1\n\n## 8. installation · 来源证据：The dependency `py-rust-stemmers` cannot be downloaded in a pure Python environment.\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安装相关的待验证问题：The dependency `py-rust-stemmers` cannot be downloaded in a pure Python environment.\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_0f507b37e33e456ea259e82966cecdc5 | https://github.com/qdrant/fastembed/issues/466 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 9. configuration · 来源证据：[Bug]: No timeout on model download — requests.get() can hang indefinitely\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个配置相关的待验证问题：[Bug]: No timeout on model download — requests.get() can hang indefinitely\n- User impact: 可能阻塞安装或首次运行。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_6d570ba91cfd414f970a3a8da522be04 | https://github.com/qdrant/fastembed/issues/627 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 10. capability · 能力判断依赖假设\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: README/documentation is current enough for a first validation pass.\n- User impact: 假设不成立时，用户拿不到承诺的能力。\n- Suggested check: 将假设转成下游验证清单。\n- Guardrail action: 假设必须转成验证项；没有验证结果前不能写成事实。\n- Evidence: capability.assumptions | github_repo:666260877 | https://github.com/qdrant/fastembed | README/documentation is current enough for a first validation pass.\n\n## 11. runtime · 失败模式：runtime: [Bug]: Loading models with additional files fails with onnxruntime 1.24.1\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this runtime risk before relying on the project: [Bug]: Loading models with additional files fails with onnxruntime 1.24.1\n- User impact: Developers may hit a documented source-backed failure mode: [Bug]: Loading models with additional files fails with onnxruntime 1.24.1\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: [Bug]: Loading models with additional files fails with onnxruntime 1.24.1. Context: Observed when using python, linux\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_issue | fmev_17b849ae47ffaf5d18cabbd577f373ca | https://github.com/qdrant/fastembed/issues/603 | [Bug]: Loading models with additional files fails with onnxruntime 1.24.1\n\n## 12. runtime · 失败模式：runtime: v0.4.2\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this runtime risk before relying on the project: v0.4.2\n- User impact: Upgrade or migration may change expected behavior: v0.4.2\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: v0.4.2. Context: Source discussion did not expose a precise runtime context.\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_release | fmev_607c8ff157108b2b5fb78f55129b60f6 | https://github.com/qdrant/fastembed/releases/tag/v0.4.2 | v0.4.2\n\n## 13. runtime · 失败模式：runtime: v0.7.1\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this runtime risk before relying on the project: v0.7.1\n- User impact: Upgrade or migration may change expected behavior: v0.7.1\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: v0.7.1. Context: Source discussion did not expose a precise runtime context.\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_release | fmev_07583dafa20e640a1720d7a77188475e | https://github.com/qdrant/fastembed/releases/tag/v0.7.1 | v0.7.1\n\n## 14. runtime · 失败模式：runtime: v0.8.0\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: Developers should check this runtime risk before relying on the project: v0.8.0\n- User impact: Upgrade or migration may change expected behavior: v0.8.0\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: v0.8.0. Context: Observed when using python, cuda\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_release | fmev_054bae0c9c1667cf5568de7ecb7087c9 | https://github.com/qdrant/fastembed/releases/tag/v0.8.0 | v0.8.0\n\n## 15. maintenance · 来源证据：[Bug]: license error in pypi metadata\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个维护/版本相关的待验证问题：[Bug]: license error in pypi metadata\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_017eda744ea84e679aeb5d77d41f7541 | https://github.com/qdrant/fastembed/issues/620 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 16. maintenance · 维护活跃度未知\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: 未记录 last_activity_observed。\n- User impact: 新项目、停更项目和活跃项目会被混在一起，推荐信任度下降。\n- Suggested check: 补 GitHub 最近 commit、release、issue/PR 响应信号。\n- Guardrail action: 维护活跃度未知时，推荐强度不能标为高信任。\n- Evidence: evidence.maintainer_signals | github_repo:666260877 | https://github.com/qdrant/fastembed | last_activity_observed missing\n\n## 17. security_permissions · 下游验证发现风险项\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: no_demo\n- User impact: 下游已经要求复核，不能在页面中弱化。\n- Suggested check: 进入安全/权限治理复核队列。\n- Guardrail action: 下游风险存在时必须保持 review/recommendation 降级。\n- Evidence: downstream_validation.risk_items | github_repo:666260877 | https://github.com/qdrant/fastembed | no_demo; severity=medium\n\n## 18. security_permissions · 存在评分风险\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: no_demo\n- User impact: 风险会影响是否适合普通用户安装。\n- Suggested check: 把风险写入边界卡，并确认是否需要人工复核。\n- Guardrail action: 评分风险必须进入边界卡，不能只作为内部分数。\n- Evidence: risks.scoring_risks | github_repo:666260877 | https://github.com/qdrant/fastembed | no_demo; severity=medium\n\n## 19. security_permissions · 来源证据：[Bug]: Loading models with additional files fails with onnxruntime 1.24.1\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：[Bug]: Loading models with additional files fails with onnxruntime 1.24.1\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源显示可能已有修复、规避或版本变化，说明书中必须标注适用版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_cfcd79475b1344a0bba508ac4346edf4 | https://github.com/qdrant/fastembed/issues/603 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 20. security_permissions · 来源证据：[Bug]: Tar path traversal (Zip Slip) in decompress_to_cache — arbitrary file write outside cache directory\n\n- Severity: medium\n- Evidence strength: source_linked\n- Finding: GitHub 社区证据显示该项目存在一个安全/权限相关的待验证问题：[Bug]: Tar path traversal (Zip Slip) in decompress_to_cache — arbitrary file write outside cache directory\n- User impact: 可能增加新用户试用和生产接入成本。\n- Suggested check: 来源问题仍为 open，Pack Agent 需要复核是否仍影响当前版本。\n- Guardrail action: 不得脱离来源链接放大为确定性结论；需要标注适用版本和复核状态。\n- Evidence: community_evidence:github | cevd_7603e6da390349c9aff6eed6cc5072a9 | https://github.com/qdrant/fastembed/issues/626 | 来源讨论提到 python 相关条件，需在安装/试用前复核。\n\n## 21. capability · 失败模式：capability: [Bug]: license error in pypi metadata\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: Developers should check this capability risk before relying on the project: [Bug]: license error in pypi metadata\n- User impact: Developers may hit a documented source-backed failure mode: [Bug]: license error in pypi metadata\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: [Bug]: license error in pypi metadata. Context: Observed when using python\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_issue | fmev_9e209e0c78e000b92930bccb67db1440 | https://github.com/qdrant/fastembed/issues/620 | [Bug]: license error in pypi metadata\n\n## 22. runtime · 失败模式：performance: [Bug]: No timeout on model download — requests.get() can hang indefinitely\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: Developers should check this performance risk before relying on the project: [Bug]: No timeout on model download — requests.get() can hang indefinitely\n- User impact: Developers may hit a documented source-backed failure mode: [Bug]: No timeout on model download — requests.get() can hang indefinitely\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: [Bug]: No timeout on model download — requests.get() can hang indefinitely. Context: Observed when using python, docker\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_issue | fmev_69d0e997d6514a5d87d46c30a69795b3 | https://github.com/qdrant/fastembed/issues/627 | [Bug]: No timeout on model download — requests.get() can hang indefinitely\n\n## 23. runtime · 失败模式：performance: v0.7.4\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: Developers should check this performance risk before relying on the project: v0.7.4\n- User impact: Upgrade or migration may change expected behavior: v0.7.4\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: v0.7.4. Context: Source discussion did not expose a precise runtime context.\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_release | fmev_df69e175afaf53788c9f212e22680b87 | https://github.com/qdrant/fastembed/releases/tag/v0.7.4 | v0.7.4\n\n## 24. maintenance · issue/PR 响应质量未知\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: issue_or_pr_quality=unknown。\n- User impact: 用户无法判断遇到问题后是否有人维护。\n- Suggested check: 抽样最近 issue/PR，判断是否长期无人处理。\n- Guardrail action: issue/PR 响应未知时，必须提示维护风险。\n- Evidence: evidence.maintainer_signals | github_repo:666260877 | https://github.com/qdrant/fastembed | issue_or_pr_quality=unknown\n\n## 25. maintenance · 发布节奏不明确\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: release_recency=unknown。\n- User impact: 安装命令和文档可能落后于代码，用户踩坑概率升高。\n- Suggested check: 确认最近 release/tag 和 README 安装命令是否一致。\n- Guardrail action: 发布节奏未知或过期时，安装说明必须标注可能漂移。\n- Evidence: evidence.maintainer_signals | github_repo:666260877 | https://github.com/qdrant/fastembed | release_recency=unknown\n\n## 26. maintenance · 失败模式：maintenance: v0.6.0\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: Developers should check this maintenance risk before relying on the project: v0.6.0\n- User impact: Upgrade or migration may change expected behavior: v0.6.0\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: v0.6.0. Context: Observed when using python\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_release | fmev_abc84a0766f60148974f2e932e8dc4f4 | https://github.com/qdrant/fastembed/releases/tag/v0.6.0 | v0.6.0\n\n## 27. maintenance · 失败模式：maintenance: v0.6.1\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: Developers should check this maintenance risk before relying on the project: v0.6.1\n- User impact: Upgrade or migration may change expected behavior: v0.6.1\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: v0.6.1. Context: Source discussion did not expose a precise runtime context.\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_release | fmev_e909fbcb26efacd532e8194ede2ff4f0 | https://github.com/qdrant/fastembed/releases/tag/v0.6.1 | v0.6.1\n\n## 28. maintenance · 失败模式：maintenance: v0.7.0\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: Developers should check this maintenance risk before relying on the project: v0.7.0\n- User impact: Upgrade or migration may change expected behavior: v0.7.0\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: v0.7.0. Context: Source discussion did not expose a precise runtime context.\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_release | fmev_0ccd78cc792f8d6a79212fc6cfa512e4 | https://github.com/qdrant/fastembed/releases/tag/v0.7.0 | v0.7.0\n\n## 29. maintenance · 失败模式：maintenance: v0.7.2\n\n- Severity: low\n- Evidence strength: source_linked\n- Finding: Developers should check this maintenance risk before relying on the project: v0.7.2\n- User impact: Upgrade or migration may change expected behavior: v0.7.2\n- Suggested check: Before packaging this project, run the relevant install/config/quickstart check for: v0.7.2. Context: Source discussion did not expose a precise runtime context.\n- Guardrail action: State this as source-backed community evidence, not as Doramagic reproduction.\n- Evidence: failure_mode_cluster:github_release | fmev_28a8ed6e38ab5a351c6a64532998a7df | https://github.com/qdrant/fastembed/releases/tag/v0.7.2 | v0.7.2\n",
      "summary": "Identity, installation, configuration, runtime, and safety pitfalls before user trial.",
      "title": "Pitfall Log"
    },
    "prompt_preview": {
      "asset_id": "prompt_preview",
      "filename": "PROMPT_PREVIEW.md",
      "markdown": "# fastembed - Prompt Preview\n\n> Copy the prompt below into your AI host before installing anything.\n> Its purpose is to let you safely feel the project's workflow, not to claim the project has already run.\n\n## Copy this prompt\n\n```text\nYou are using an independent Doramagic capability pack for qdrant/fastembed.\n\nProject:\n- Name: fastembed\n- Repository: https://github.com/qdrant/fastembed\n- Summary: Fast, Accurate, Lightweight Python library to make State of the Art Embedding\n- Host target: chatgpt\n\nGoal:\nHelp me evaluate this project for the following task without installing it yet: Fast, Accurate, Lightweight Python library to make State of the Art Embedding\n\nBefore taking action:\n1. Restate my task, success standard, and boundary.\n2. Identify whether the next step requires tools, browser access, network access, filesystem access, credentials, package installation, or host configuration.\n3. Use only the Doramagic Project Pack, the upstream repository, and the source-linked evidence listed below.\n4. If a real command, install step, API call, file write, or host integration is required, mark it as \"requires post-install verification\" and ask for approval first.\n5. If evidence is missing, say \"evidence is missing\" instead of filling the gap.\n\nPreviewable capabilities:\n- Capability 1: Fast, Accurate, Lightweight Python library to make State of the Art Embedding\n\nCapabilities that require post-install verification:\n- Capability 1: Use the source-backed project context to guide one small, checkable workflow step.\n\nCore service flow:\n1. page-introduction: Introduction to FastEmbed. Produce one small intermediate artifact and wait for confirmation.\n2. page-installation: Installation Guide. Produce one small intermediate artifact and wait for confirmation.\n3. page-quickstart: Quick Start Guide. Produce one small intermediate artifact and wait for confirmation.\n4. page-architecture: System Architecture. Produce one small intermediate artifact and wait for confirmation.\n5. page-text-embedding: Text Embedding Module. Produce one small intermediate artifact and wait for confirmation.\n\nSource-backed evidence to keep in mind:\n- https://github.com/qdrant/fastembed\n- https://github.com/qdrant/fastembed#readme\n- README.md\n- fastembed/__init__.py\n- pyproject.toml\n- fastembed/text/text_embedding.py\n- fastembed/image/image_embedding.py\n- fastembed/text/text_embedding_base.py\n- fastembed/image/image_embedding_base.py\n- fastembed/common/onnx_model.py\n\nFirst response rules:\n1. Start Step 1 only.\n2. Explain the one service action you will perform first.\n3. Ask exactly three questions about my target workflow, success standard, and sandbox boundary.\n4. Stop and wait for my answers.\n\nStep 1 follow-up protocol:\n- After I answer the first three questions, stay in Step 1.\n- Produce six parts only: clarified task, success standard, boundary conditions, two or three options, tradeoffs for each option, and one recommendation.\n- End by asking whether I confirm the recommendation.\n- Do not move to Step 2 until I explicitly confirm.\n\nConversation rules:\n- Advance one step at a time and wait for confirmation after each small artifact.\n- Write outputs as recommendations or planned checks, not as completed execution.\n- Do not claim tests passed, files changed, commands ran, APIs were called, or the project was installed.\n- If the user asks for execution, first provide the sandbox setup, expected output, rollback, and approval checkpoint.\n```\n",
      "summary": "不安装项目也能感受能力节奏的安全试用 Prompt。",
      "title": "Prompt Preview / 安装前试用 Prompt"
    },
    "quick_start": {
      "asset_id": "quick_start",
      "filename": "QUICK_START.md",
      "markdown": "# Quick Start\n\nProject: qdrant/fastembed\n\n## Official Entry Points\n\n### Python / pip · 官方安装入口\n\n```bash\npip install fastembed\n```\n\nSource：https://github.com/qdrant/fastembed#readme\n\n## Sources\n\n- repo: https://github.com/qdrant/fastembed\n- docs: https://github.com/qdrant/fastembed#readme\n",
      "summary": "Entry points extracted from official README or installation documentation.",
      "title": "Quick Start"
    }
  },
  "validation_id": "dval_8a3cf855db8e42dd8eae04da90dc0f36"
}
