Merge pull request 'udpate chromadb import to be lazy' (#27) from fix-random-illegal-instruction into master
All checks were successful
Create Blog Article if new notes exist / prepare_blog_drafts_and_push (push) Successful in 10m2s

Reviewed-on: #27
This commit is contained in:
armistace 2026-05-21 22:44:37 +10:00
commit 6d2b043eca

View File

@ -17,14 +17,19 @@ journalist draft is chunked, embedded, and stored in a collection; the editor
receives the top-N most relevant chunks as context. receives the top-N most relevant chunks as context.
""" """
from __future__ import annotations
import json import json
import os import os
import random import random
import re import re
import string import string
from datetime import datetime from datetime import datetime
from typing import TYPE_CHECKING
if TYPE_CHECKING:
import chromadb # noqa: F811
import chromadb
from crewai.flow.flow import Flow, listen, start from crewai.flow.flow import Flow, listen, start
from ollama import Client from ollama import Client
from pydantic import BaseModel, ConfigDict from pydantic import BaseModel, ConfigDict
@ -85,7 +90,21 @@ class BlogFlow(Flow[BlogFlowState]):
) )
@staticmethod @staticmethod
def _get_chroma_client() -> chromadb.HttpClient: def _get_chroma_client() -> "chromadb.HttpClient":
# Lazily import chromadb here rather than at module level.
# chromadb unconditionally loads hnswlib (a native C++ library
# compiled with AVX instructions) even when using HttpClient
# to talk to an external ChromaDB server. Deferring the import
# avoids "Illegal instruction" (SIGILL) crashes in environments
# that lack AVX support (e.g. act, older CI runners).
try:
import chromadb
except ImportError as exc:
raise RuntimeError(
"chromadb is required for the editor phase but could not be "
f"imported: {exc}"
) from exc
chroma_port = int(os.environ["CHROMA_PORT"]) chroma_port = int(os.environ["CHROMA_PORT"])
return chromadb.HttpClient(host=os.environ["CHROMA_HOST"], port=chroma_port) return chromadb.HttpClient(host=os.environ["CHROMA_HOST"], port=chroma_port)
@ -127,7 +146,7 @@ class BlogFlow(Flow[BlogFlowState]):
print(f"Error generating embeddings: {exc}") print(f"Error generating embeddings: {exc}")
return [] return []
def _load_drafts_to_vector_db(self, drafts: list[str]) -> chromadb.Collection: def _load_drafts_to_vector_db(self, drafts: list[str]) -> "chromadb.Collection":
"""Load journalist drafts into a new ChromaDB collection and return it.""" """Load journalist drafts into a new ChromaDB collection and return it."""
chroma = self._get_chroma_client() chroma = self._get_chroma_client()
collection_name = ( collection_name = (
@ -165,7 +184,7 @@ class BlogFlow(Flow[BlogFlowState]):
return collection return collection
@staticmethod @staticmethod
def _query_vector_db(collection: chromadb.Collection, query_text: str) -> str: def _query_vector_db(collection: "chromadb.Collection", query_text: str) -> str:
"""Query the ChromaDB collection and return the most relevant """Query the ChromaDB collection and return the most relevant
document chunks joined as a single string.""" document chunks joined as a single string."""
ollama_client = BlogFlow._get_ollama_client() ollama_client = BlogFlow._get_ollama_client()