udpate chromadb import to be lazy #27
@ -17,14 +17,19 @@ journalist draft is chunked, embedded, and stored in a collection; the editor
|
||||
receives the top-N most relevant chunks as context.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
import string
|
||||
from datetime import datetime
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import chromadb # noqa: F811
|
||||
|
||||
import chromadb
|
||||
from crewai.flow.flow import Flow, listen, start
|
||||
from ollama import Client
|
||||
from pydantic import BaseModel, ConfigDict
|
||||
@ -85,7 +90,21 @@ class BlogFlow(Flow[BlogFlowState]):
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _get_chroma_client() -> chromadb.HttpClient:
|
||||
def _get_chroma_client() -> "chromadb.HttpClient":
|
||||
# Lazily import chromadb here rather than at module level.
|
||||
# chromadb unconditionally loads hnswlib (a native C++ library
|
||||
# compiled with AVX instructions) even when using HttpClient
|
||||
# to talk to an external ChromaDB server. Deferring the import
|
||||
# avoids "Illegal instruction" (SIGILL) crashes in environments
|
||||
# that lack AVX support (e.g. act, older CI runners).
|
||||
try:
|
||||
import chromadb
|
||||
except ImportError as exc:
|
||||
raise RuntimeError(
|
||||
"chromadb is required for the editor phase but could not be "
|
||||
f"imported: {exc}"
|
||||
) from exc
|
||||
|
||||
chroma_port = int(os.environ["CHROMA_PORT"])
|
||||
return chromadb.HttpClient(host=os.environ["CHROMA_HOST"], port=chroma_port)
|
||||
|
||||
@ -127,7 +146,7 @@ class BlogFlow(Flow[BlogFlowState]):
|
||||
print(f"Error generating embeddings: {exc}")
|
||||
return []
|
||||
|
||||
def _load_drafts_to_vector_db(self, drafts: list[str]) -> chromadb.Collection:
|
||||
def _load_drafts_to_vector_db(self, drafts: list[str]) -> "chromadb.Collection":
|
||||
"""Load journalist drafts into a new ChromaDB collection and return it."""
|
||||
chroma = self._get_chroma_client()
|
||||
collection_name = (
|
||||
@ -165,7 +184,7 @@ class BlogFlow(Flow[BlogFlowState]):
|
||||
return collection
|
||||
|
||||
@staticmethod
|
||||
def _query_vector_db(collection: chromadb.Collection, query_text: str) -> str:
|
||||
def _query_vector_db(collection: "chromadb.Collection", query_text: str) -> str:
|
||||
"""Query the ChromaDB collection and return the most relevant
|
||||
document chunks joined as a single string."""
|
||||
ollama_client = BlogFlow._get_ollama_client()
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user