Compare commits

..

3 Commits

Author SHA1 Message Date
6d2b043eca Merge pull request 'udpate chromadb import to be lazy' (#27) from fix-random-illegal-instruction into master
All checks were successful
Create Blog Article if new notes exist / prepare_blog_drafts_and_push (push) Successful in 10m2s
Reviewed-on: #27
2026-05-21 22:44:37 +10:00
fa611192c8
reviewer fix 2026-05-21 22:44:04 +10:00
e69b83694c
udpate chromadb import to be lazy 2026-05-21 21:28:03 +10:00

View File

@ -17,14 +17,19 @@ journalist draft is chunked, embedded, and stored in a collection; the editor
receives the top-N most relevant chunks as context.
"""
from __future__ import annotations
import json
import os
import random
import re
import string
from datetime import datetime
from typing import TYPE_CHECKING
if TYPE_CHECKING:
import chromadb # noqa: F811
import chromadb
from crewai.flow.flow import Flow, listen, start
from ollama import Client
from pydantic import BaseModel, ConfigDict
@ -85,7 +90,21 @@ class BlogFlow(Flow[BlogFlowState]):
)
@staticmethod
def _get_chroma_client() -> chromadb.HttpClient:
def _get_chroma_client() -> "chromadb.HttpClient":
# Lazily import chromadb here rather than at module level.
# chromadb unconditionally loads hnswlib (a native C++ library
# compiled with AVX instructions) even when using HttpClient
# to talk to an external ChromaDB server. Deferring the import
# avoids "Illegal instruction" (SIGILL) crashes in environments
# that lack AVX support (e.g. act, older CI runners).
try:
import chromadb
except ImportError as exc:
raise RuntimeError(
"chromadb is required for the editor phase but could not be "
f"imported: {exc}"
) from exc
chroma_port = int(os.environ["CHROMA_PORT"])
return chromadb.HttpClient(host=os.environ["CHROMA_HOST"], port=chroma_port)
@ -127,7 +146,7 @@ class BlogFlow(Flow[BlogFlowState]):
print(f"Error generating embeddings: {exc}")
return []
def _load_drafts_to_vector_db(self, drafts: list[str]) -> chromadb.Collection:
def _load_drafts_to_vector_db(self, drafts: list[str]) -> "chromadb.Collection":
"""Load journalist drafts into a new ChromaDB collection and return it."""
chroma = self._get_chroma_client()
collection_name = (
@ -165,7 +184,7 @@ class BlogFlow(Flow[BlogFlowState]):
return collection
@staticmethod
def _query_vector_db(collection: chromadb.Collection, query_text: str) -> str:
def _query_vector_db(collection: "chromadb.Collection", query_text: str) -> str:
"""Query the ChromaDB collection and return the most relevant
document chunks joined as a single string."""
ollama_client = BlogFlow._get_ollama_client()