diff --git a/src/ai_generators/blog_flow.py b/src/ai_generators/blog_flow.py index d3257d5..aa18110 100644 --- a/src/ai_generators/blog_flow.py +++ b/src/ai_generators/blog_flow.py @@ -17,14 +17,19 @@ journalist draft is chunked, embedded, and stored in a collection; the editor receives the top-N most relevant chunks as context. """ +from __future__ import annotations + import json import os import random import re import string from datetime import datetime +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + import chromadb # noqa: F811 -import chromadb from crewai.flow.flow import Flow, listen, start from ollama import Client from pydantic import BaseModel, ConfigDict @@ -85,7 +90,21 @@ class BlogFlow(Flow[BlogFlowState]): ) @staticmethod - def _get_chroma_client() -> chromadb.HttpClient: + def _get_chroma_client() -> "chromadb.HttpClient": + # Lazily import chromadb here rather than at module level. + # chromadb unconditionally loads hnswlib (a native C++ library + # compiled with AVX instructions) even when using HttpClient + # to talk to an external ChromaDB server. Deferring the import + # avoids "Illegal instruction" (SIGILL) crashes in environments + # that lack AVX support (e.g. act, older CI runners). + try: + import chromadb + except ImportError as exc: + raise RuntimeError( + "chromadb is required for the editor phase but could not be " + f"imported: {exc}" + ) from exc + chroma_port = int(os.environ["CHROMA_PORT"]) return chromadb.HttpClient(host=os.environ["CHROMA_HOST"], port=chroma_port) @@ -127,7 +146,7 @@ class BlogFlow(Flow[BlogFlowState]): print(f"Error generating embeddings: {exc}") return [] - def _load_drafts_to_vector_db(self, drafts: list[str]) -> chromadb.Collection: + def _load_drafts_to_vector_db(self, drafts: list[str]) -> "chromadb.Collection": """Load journalist drafts into a new ChromaDB collection and return it.""" chroma = self._get_chroma_client() collection_name = ( @@ -165,7 +184,7 @@ class BlogFlow(Flow[BlogFlowState]): return collection @staticmethod - def _query_vector_db(collection: chromadb.Collection, query_text: str) -> str: + def _query_vector_db(collection: "chromadb.Collection", query_text: str) -> str: """Query the ChromaDB collection and return the most relevant document chunks joined as a single string.""" ollama_client = BlogFlow._get_ollama_client()