2026-04-28 23:30:52 +10:00
20 changed files with 1364 additions and 424 deletions
--- a/.gitea/workflows/deploy.yml
+++ b/.gitea/workflows/deploy.yml
@ -1,56 +1,57 @@
 name: Create Blog Article if new notes exist
 on:
-    schedule:
+  schedule:
-        - cron: "15 18 * * *"
+    - cron: "15 18 * * *"
-    push:
+  push:
-        branches:
+    branches:
-            - master
+      - master
 jobs:
-    prepare_blog_drafts_and_push:
+  prepare_blog_drafts_and_push:
-        runs-on: ubuntu-latest
+    runs-on: ubuntu-latest
-        steps:
+    steps:
-            - name: Checkout repository
+      - name: Checkout repository
-              uses: actions/checkout@v4
+        uses: actions/checkout@v4
-            - name: Install dependencies
+      - name: Install dependencies
-              shell: bash
+        shell: bash
-              run: |
+        run: |
-                  apt update && apt upgrade -y
+          apt update && apt upgrade -y
-                  apt install rustc cargo python-is-python3 pip python3-venv python3-virtualenv libmagic-dev git -y
+          apt install rustc cargo python-is-python3 pip python3-venv python3-virtualenv libmagic-dev git -y
-                  virtualenv .venv
+          virtualenv .venv
-                  source .venv/bin/activate
+          source .venv/bin/activate
-                  pip install --upgrade pip
+          pip install --upgrade pip
-                  pip install -r requirements.txt
+          pip install -r requirements.txt
-                  git config --global user.name "Blog Creator"
+          git config --global user.name "Blog Creator"
-                  git config --global user.email "ridgway.infrastructure@gmail.com"
+          git config --global user.email "ridgway.infrastructure@gmail.com"
-                  git config --global push.autoSetupRemote true
+          git config --global push.autoSetupRemote true
-            - name: Create .env
+      - name: Create .env
-              shell: bash
+        shell: bash
-              run: |
+        run: |
-                  echo "TRILIUM_HOST=${{ vars.TRILIUM_HOST }}" > .env
+          echo "TRILIUM_HOST=${{ vars.TRILIUM_HOST }}" > .env
-                  echo "TRILIUM_PORT='${{ vars.TRILIUM_PORT }}'" >> .env
+          echo "TRILIUM_PORT='${{ vars.TRILIUM_PORT }}'" >> .env
-                  echo "TRILIUM_PROTOCOL='${{ vars.TRILIUM_PROTOCOL }}'" >> .env
+          echo "TRILIUM_PROTOCOL='${{ vars.TRILIUM_PROTOCOL }}'" >> .env
-                  echo "TRILIUM_PASS='${{ secrets.TRILIUM_PASS }}'" >> .env
+          echo "TRILIUM_PASS='${{ secrets.TRILIUM_PASS }}'" >> .env
-                  echo "TRILIUM_TOKEN='${{ secrets.TRILIUM_TOKEN }}'" >> .env
+          echo "TRILIUM_TOKEN='${{ secrets.TRILIUM_TOKEN }}'" >> .env
-                  echo "OLLAMA_PROTOCOL='${{ vars.OLLAMA_PROTOCOL }}'" >> .env
+          echo "OLLAMA_PROTOCOL='${{ vars.OLLAMA_PROTOCOL }}'" >> .env
-                  echo "OLLAMA_HOST='${{ vars.OLLAMA_HOST }}'" >> .env
+          echo "OLLAMA_HOST='${{ vars.OLLAMA_HOST }}'" >> .env
-                  echo "OLLAMA_PORT='${{ vars.OLLAMA_PORT }}'" >> .env
+          echo "OLLAMA_PORT='${{ vars.OLLAMA_PORT }}'" >> .env
-                  echo "EMBEDDING_MODEL='${{ vars.EMBEDDING_MODEL }}'" >> .env
+          echo "EMBEDDING_MODEL='${{ vars.EMBEDDING_MODEL }}'" >> .env
-                  echo "EDITOR_MODEL='${{ vars.EDITOR_MODEL }}'" >> .env
+          echo "EDITOR_MODEL='${{ vars.EDITOR_MODEL }}'" >> .env
-                  export PURE='["${{ vars.CONTENT_CREATOR_MODELS_1 }}", "${{ vars.CONTENT_CREATOR_MODELS_2 }}", "${{ vars.CONTENT_CREATOR_MODELS_3 }}", "${{ vars.CONTENT_CREATOR_MODELS_4 }}"]'
+          export PURE='["${{ vars.CONTENT_CREATOR_MODELS_1 }}", "${{ vars.CONTENT_CREATOR_MODELS_2 }}", "${{ vars.CONTENT_CREATOR_MODELS_3 }}", "${{ vars.CONTENT_CREATOR_MODELS_4 }}"]'
-                  echo "CONTENT_CREATOR_MODELS='$PURE'" >> .env
+          echo "CONTENT_CREATOR_MODELS='$PURE'" >> .env
-                  echo "GIT_PROTOCOL='${{ vars.GIT_PROTOCOL }}'" >> .env
+          echo "GIT_PROTOCOL='${{ vars.GIT_PROTOCOL }}'" >> .env
-                  echo "GIT_REMOTE='${{ vars.GIT_REMOTE }}'" >> .env
+          echo "GIT_REMOTE='${{ vars.GIT_REMOTE }}'" >> .env
-                  echo "GIT_USER='${{ vars.GIT_USER }}'" >> .env
+          echo "GIT_USER='${{ vars.GIT_USER }}'" >> .env
-                  echo "GIT_PASS='${{ secrets.GIT_PASS }}'" >> .env
+          echo "GIT_PASS='${{ secrets.GIT_PASS }}'" >> .env
-                  echo "N8N_SECRET='${{ secrets.N8N_SECRET }}'" >> .env
+          echo "N8N_SECRET='${{ secrets.N8N_SECRET }}'" >> .env
-                  echo "N8N_WEBHOOK_URL='${{ vars.N8N_WEBHOOK_URL }}'" >> .env
+          echo "N8N_WEBHOOK_URL='${{ vars.N8N_WEBHOOK_URL }}'" >> .env
-                  echo "CHROMA_HOST='${{ vars.CHROMA_HOST }}'" >> .env
+          echo "CHROMA_HOST='${{ vars.CHROMA_HOST }}'" >> .env
-                  echo "CHROMA_PORT='${{ vars.CHROMA_PORT }}'" >> .env
+          echo "CHROMA_PORT='${{ vars.CHROMA_PORT }}'" >> .env
          echo "OLLAMA_API_KEY='${{ secrets.OLLAMA_API_KEY }}'" >> .env
-            - name: Create Blogs
+      - name: Create Blogs
-              shell: bash
+        shell: bash
-              run: |
+        run: |
-                  source .venv/bin/activate
+          source .venv/bin/activate
-                  python src/main.py
+          python src/main.py
--- a/README.md
+++ b/README.md
@ -1,64 +1,290 @@
-## BLOG CREATOR
+# Blog Creator
-This creator requires you to use a working Trilium Instance and create a .env file with the following
+An automated blog generation system that uses CrewAI agents to research, write, and edit blog posts from Trilium notes.
 ## Architecture
 The system uses three CrewAI crews orchestrated by a Flow:
 1. **Research Crew** - A critical researcher agent with web search capabilities investigates the topic and produces verified findings
 2. **Writing Crew** - Four creative journalist agents write draft blog articles in parallel, each with different creative styles
 3. **Editor Crew** - A critical editor loads the drafts into a vector database, queries for relevant context, and produces the final polished document with metadata
 ## Requirements
 - Python 3.10 or later
 - Ollama server running with required models
 - ChromaDB server for vector storage
 - Trilium notes instance
 - Gitea instance (for automated workflows)
 - n8n instance (for notifications)
 ## Environment Variables
 Create a `.env` file in the project root with the following variables:
 ```
 # Trilium Configuration
 TRILIUM_HOST=
 TRILIUM_PORT=
-TRILIUM_PROTOCOL=
+TRILIUM_PROTOCOL=https
 TRILIUM_PASS=
 TRILIUM_TOKEN=
-OLLAMA_PROTOCOL=
+
 # Ollama Configuration
 OLLAMA_PROTOCOL=http
 OLLAMA_HOST=
 OLLAMA_PORT=11434
-EMBEDDING_MODEL=
+EMBEDDING_MODEL=nomic-embed-text
-EDITOR_MODEL=
+EDITOR_MODEL=llama3.1:8b
-# This is expected in python list format example `[phi4-mini:latest, qwen3:1.7b, gemma3:latest]`
+CONTENT_CREATOR_MODELS=["phi4-mini:latest", "qwen3:1.7b", "gemma3:latest"]
-CONTENT_CREATOR_MODELS=
+
-CHROMA_SERVER=<IP_ADDRESS>
+# ChromaDB Configuration
 CHROMA_HOST=chroma
 CHROMA_PORT=8000
 # Git Configuration
 GIT_USER=
 GIT_PASS=
 GIT_PROTOCOL=https
 GIT_REMOTE=git.aridgwayweb.com/armistace/blog.git
 # Notification Configuration
 N8N_SECRET=
 N8N_WEBHOOK_URL=
 # Ollama Web Search (required for researcher agent)
 OLLAMA_API_KEY=
 ```
-This container is going to be what I use to trigger a blog creation event
+### CONTENT_CREATOR_MODELS Format
-To do this we will
+The `CONTENT_CREATOR_MODELS` variable should be a JSON array of Ollama model names. Each model will be used by one of the three journalist agents. Example:
 1. Download a Note from Trillium (I need to work out how to choose this, maybe something with a tag and then this can add a tag when it's used? each note is a seperate post, a tag to indicate if it's ready as well?)
 `SELECT NOTES WHERE blog_tag = true AND used_tag = false AND ready_tag = true?`
 2. Check if the ollama server is available (it's currently on a box that may not be on)
 - If not on stop
 3. `git pull git.aridgwayweb.com/blog`
 - set up git creds: git.name = ai git.email = ridgwayinfrastructure@gmail.com get git password stored (create service user in gitea for this)
 - `git config set upstream Auto true`
 4. cd /src/content
 5. take the information from the trillium note and prepare a 500 word blog post, insert the following at the top
 ```
-Title: <title>
+CONTENT_CREATOR_MODELS=["llama3.1:8b", "qwen2.5:7b", "phi4:latest"]
 Date: <date post created>
 Modified: <date post created>
 Category: <this will come from a tag on the post (category: <category>)
 Tags: <ai generated tags>, ai_content, not_human_content
 Slug: <have ai write slug?>
 Authors: <model name>.ai
 Summary: <have ai write a 10 word summary of the post
 ```
-6. write it to `<title>.md`
+### OLLAMA_API_KEY
-7. `git checkout -b <title>`
+The researcher agent uses Ollama's native web search API. Create an API key from your Ollama account (https://ollama.com) and add it to your `.env` file. This uses your existing Ollama subscription for web searches.
-8. `git add .`
+## Project Structure
-9. `git commit -m "<have ai write a git commit about the post>"`
+```
 blog_creator/
 ├── .env                          # Environment variables (create this)
 ├── .gitea/workflows/deploy.yml   # Gitea Actions workflow
 ├── docker-compose.yml            # Local development setup
 ├── requirements.txt              # Python dependencies
 ├── README.md                     # This file
 └── src/
    ├── main.py                   # Entry point
    └── ai_generators/
        ├── ollama_md_generator.py    # Main interface (used by main.py)
        ├── blog_flow.py              # CrewAI Flow orchestrator
        ├── crews/
        │   ├── research_crew/        # Researcher agent with web search
        │   ├── writing_crew/         # Three journalist agents
        │   └── editor_crew/          # Editor agent with metadata generation
        └── tools/
 ```
-10. `git push`
+## Local Development Setup
-11. Send notification via n8n to matrix for me to review?
+### Using Docker Compose
 1. Clone the repository and navigate to the project directory
 2. Create your `.env` file with all required variables
 3. Start the services:
 ```bash
 docker-compose up -d
 ```
 This starts:
 - `blog_creator` - The main application container
 - `chroma` - ChromaDB vector database
 4. The container will run `main.py` automatically on startup. To run manually:
 ```bash
 docker-compose exec blog_creator python src/main.py
 ```
 ### Manual Setup (without Docker)
 1. Install system dependencies:
 ```bash
 apt update && apt install -y rustc cargo python-is-python3 pip python3-venv libmagic-dev git
 ```
 2. Create and activate a virtual environment:
 ```bash
 python -m venv .venv
 source .venv/bin/activate
 ```
 3. Install Python dependencies:
 ```bash
 pip install -r requirements.txt
 ```
 4. Configure Git:
 ```bash
 git config --global user.name "Blog Creator"
 git config --global user.email "your-email@example.com"
 git config --global push.autoSetupRemote true
 ```
 5. Run the application:
 ```bash
 python src/main.py
 ```
 ## How It Works
 ### Trilium Integration
 The system fetches notes from Trilium that are tagged for blog creation. Each note becomes one blog post. The note content is used as the basis for the AI-generated article.
 ### Blog Generation Flow
 1. **Research Phase** - The researcher agent investigates the topic using web search, critically evaluates claims, and produces verified findings
 2. **Writing Phase** - Three journalist agents write creative drafts in parallel, each with different temperature and top_p settings for variety
 3. **Editor Phase** - The editor:
   - Chunks and embeds all drafts into ChromaDB
   - Queries the vector database for relevant context
   - Generates the final polished document with metadata header
 ### Output Format
 Each blog post includes a metadata header followed by the markdown body:
 ```
 Title: Designing and Building an AI Enhanced CCTV System
 Date: 2026-02-02 20:00
 Modified: 2026-02-02 20:00
 Category: Homelab
 Tags: proxmox, hardware, self host, homelab, ai_content, not_human_content
 Slug: ai-enhanced-cctv
 Authors: phi4-mini.ai, qwen3.ai, gemma3.ai
 Summary: Home CCTV Security has become a bastion of cloud subscription awfulness. This blog describes creating your own AI enhanced system.
 <full markdown blog body follows>
 ```
 The metadata fields are generated as follows:
 - **Title** - From the Trilium note title
 - **Date/Modified** - Current datetime when generated
 - **Category** - AI-generated single word (e.g., Homelab, DevOps, Security)
 - **Tags** - AI-generated relevant tags plus `ai_content, not_human_content`
 - **Slug** - AI-generated URL-friendly slug
 - **Authors** - Derived from CONTENT_CREATOR_MODELS (model name + `.ai`)
 - **Summary** - AI-generated 15-25 word summary
 ### Git Workflow
 After generation, the blog post is:
 1. Committed to a new branch named after the slug
 2. Pushed to the configured Git remote
 3. A notification is sent via n8n to Matrix for review
 ## Gitea Actions Workflow
 The `.gitea/workflows/deploy.yml` file defines an automated workflow that:
 - Runs on a schedule (daily at 18:15 UTC) or on push to master branch
 - Installs all dependencies
 - Creates the `.env` file from Gitea secrets and variables
 - Runs the blog generation script
 ### Setting Up Gitea Variables
 In your Gitea repository settings, configure the following:
 **Variables** (Repository Settings -> Variables):
 - `TRILIUM_HOST` - Your Trilium server hostname
 - `TRILIUM_PORT` - Trilium port
 - `TRILIUM_PROTOCOL` - http or https
 - `OLLAMA_PROTOCOL` - http or https
 - `OLLAMA_HOST` - Ollama server hostname
 - `OLLAMA_PORT` - Ollama port (default 11434)
 - `EMBEDDING_MODEL` - Embedding model name
 - `EDITOR_MODEL` - Editor/Researcher model name
 - `CONTENT_CREATOR_MODELS_1` through `CONTENT_CREATOR_MODELS_4` - Individual model names (the workflow joins these into an array)
 - `GIT_PROTOCOL` - https or ssh
 - `GIT_REMOTE` - Git repository URL
 - `GIT_USER` - Git username for pushing
 - `N8N_WEBHOOK_URL` - n8n webhook URL for notifications
 - `CHROMA_HOST` - ChromaDB hostname
 - `CHROMA_PORT` - ChromaDB port
 **Secrets** (Repository Settings -> Secrets):
 - `TRILIUM_PASS` - Trilium password
 - `TRILIUM_TOKEN` - Trilium API token
 - `GIT_PASS` - Git password or personal access token
 - `N8N_SECRET` - n8n webhook secret key
 - `OLLAMA_API_KEY` - Ollama API key for web search
 ### Workflow Triggers
 The workflow runs automatically when:
 - A push is made to the master branch
 - The scheduled cron time is reached (18:15 UTC daily)
 To trigger manually, push any change to master or modify the cron schedule in `.gitea/workflows/deploy.yml`.
 ## Customizing Agent Behavior
 Agent personalities and task instructions are defined in YAML files under `src/ai_generators/crews/*/config/`. You can modify these without changing Python code:
 - `research_crew/config/agents.yaml` - Researcher role, goal, backstory
 - `research_crew/config/tasks.yaml` - Research task description
 - `writing_crew/config/agents.yaml` - Four journalist personalities
 - `writing_crew/config/tasks.yaml` - Writing task descriptions
 - `editor_crew/config/agents.yaml` - Editor role, goal, backstory
 - `editor_crew/config/tasks.yaml` - Editing task and metadata format
 After editing YAML files, restart the application or container to apply changes.
 ## Troubleshooting
 ### Ollama Connection Errors
 Ensure the Ollama server is running and accessible from the blog_creator container. Check `OLLAMA_HOST` and `OLLAMA_PORT` in your `.env` file.
 ### ChromaDB Connection Errors
 Verify ChromaDB is running and the `CHROMA_HOST` and `CHROMA_PORT` variables are correct. In Docker Compose, use `chroma` as the host name.
 ### Ollama Web Search Errors
 If the researcher agent fails with web search errors, check that `OLLAMA_API_KEY` is set correctly. Verify your Ollama subscription is active and has web search access.
 ### Empty Output
 If blog posts are generated but empty, check:
 - Ollama models are downloaded and available
 - `CONTENT_CREATOR_MODELS` contains valid model names
 - Sufficient timeout for model inference (default is 30 minutes per operation)
 ### Git Push Failures
 Verify `GIT_USER` and `GIT_PASS` are correct and the user has write access to the remote repository. Check that the remote URL in `GIT_REMOTE` is accessible.
 ## Development Notes
 - The `main.py` entry point should not be modified for normal operation
 - All AI generation logic is in `src/ai_generators/`
 - The Flow pattern allows easy addition of new crews or steps
 - Vector database collections are named `blog_{title}_{random_id}` and persist across runs
--- a/requirements.txt
+++ b/requirements.txt
@ -3,6 +3,7 @@ trilium-py
 gitpython
 PyGithub
 chromadb
-langchain-ollama
+crewai
 crewai-tools
 PyJWT
 dotenv
--- a/src/ai_generators/blog_flow.py
+++ b/src/ai_generators/blog_flow.py
@ -0,0 +1,318 @@
 """
 CrewAI Flow that orchestrates the blog-generation pipeline.
 Flow
 ----
 1. **Research crew** – a critical researcher with web-search investigates the
   topic and produces verified findings.
 2. **Writing crew** – four creative journalists write draft blog articles
   in parallel (async tasks).
 3. **Editor crew** – a critical editor loads the journalist drafts into
   ChromaDB, queries for the most relevant context, and produces the final
   polished markdown document complete with a metadata header (Title, Date,
   Category, Tags, Slug, Authors, Summary).
 The ChromaDB integration is preserved from the original implementation: each
 journalist draft is chunked, embedded, and stored in a collection; the editor
 receives the top-N most relevant chunks as context.
 """
 import json
 import os
 import random
 import re
 import string
 from datetime import datetime
 import chromadb
 from crewai.flow.flow import Flow, listen, start
 from ollama import Client
 from pydantic import BaseModel, ConfigDict
 from ai_generators.crews.editor_crew.editor_crew import EditorCrew
 from ai_generators.crews.research_crew.research_crew import ResearchCrew
 from ai_generators.crews.writing_crew.writing_crew import WritingCrew
 # ---------------------------------------------------------------------------
 # State
 # ---------------------------------------------------------------------------
 class BlogFlowState(BaseModel):
    """Structured state for the blog generation flow."""
    model_config = ConfigDict(arbitrary_types_allowed=True)
    title: str = ""
    inner_title: str = ""
    content: str = ""
    research_findings: str = ""
    drafts: list[str] = []
    final_document: str = ""
    date: str = ""
    authors: str = ""
    category: str = ""
 # ---------------------------------------------------------------------------
 # Flow
 # ---------------------------------------------------------------------------
 class BlogFlow(Flow[BlogFlowState]):
    """Orchestrate researcher → journalists → editor via CrewAI Flows.
    Usage::
        flow = BlogFlow()
        result = flow.kickoff(inputs={
            "title": "my_blog_slug",
            "inner_title": "My Blog Title",
            "content": "<original content>",
        })
        print(result)  # final markdown document
    """
    # ------------------------------------------------------------------
    # Helpers – Ollama / ChromaDB / embedding utilities
    # ------------------------------------------------------------------
    @staticmethod
    def _get_ollama_url() -> str:
        return (
            f"{os.environ['OLLAMA_PROTOCOL']}://"
            f"{os.environ['OLLAMA_HOST']}:{os.environ['OLLAMA_PORT']}"
        )
    @staticmethod
    def _get_chroma_client() -> chromadb.HttpClient:
        chroma_port = int(os.environ["CHROMA_PORT"])
        return chromadb.HttpClient(host=os.environ["CHROMA_HOST"], port=chroma_port)
    @staticmethod
    def _get_ollama_client() -> Client:
        return Client(host=BlogFlow._get_ollama_url())
    @staticmethod
    def _id_generator(size: int = 6) -> str:
        return "".join(
            random.choice(string.ascii_uppercase + string.digits) for _ in range(size)
        )
    @staticmethod
    def _split_into_chunks(text: str, chunk_size: int = 100) -> list[str]:
        words = re.findall(r"\S+", text)
        chunks: list[str] = []
        current_chunk: list[str] = []
        word_count = 0
        for word in words:
            current_chunk.append(word)
            word_count += 1
            if word_count >= chunk_size:
                chunks.append(" ".join(current_chunk))
                current_chunk = []
                word_count = 0
        if current_chunk:
            chunks.append(" ".join(current_chunk))
        return chunks
    @staticmethod
    def _get_embeddings(chunks: list[str]) -> list[list[float]]:
        ollama_client = BlogFlow._get_ollama_client()
        embed_model = os.environ["EMBEDDING_MODEL"]
        try:
            embeds = ollama_client.embed(model=embed_model, input=chunks)
            return embeds.get("embeddings", [])  # type: ignore[no-any-return]
        except Exception as exc:
            print(f"Error generating embeddings: {exc}")
            return []
    def _load_drafts_to_vector_db(self, drafts: list[str]) -> chromadb.Collection:
        """Load journalist drafts into a new ChromaDB collection and return it."""
        chroma = self._get_chroma_client()
        collection_name = (
            f"blog_{self.state.title.lower().replace(' ', '_')}_{self._id_generator()}"
        )
        collection = chroma.get_or_create_collection(name=collection_name)
        for i, draft in enumerate(drafts):
            model_name = f"journalist_{i + 1}"
            chunks = self._split_into_chunks(draft)
            if not chunks or all(chunk.strip() == "" for chunk in chunks):
                print(f"Skipping {model_name} – no content generated")
                continue
            print(f"Generating embeddings for {model_name}")
            embeds = self._get_embeddings(chunks)
            if not embeds:
                print(f"Skipping {model_name} – no embeddings generated")
                continue
            if len(embeds) != len(chunks):
                min_length = min(len(embeds), len(chunks))
                chunks = chunks[:min_length]
                embeds = embeds[:min_length]
                if min_length == 0:
                    print(f"Skipping {model_name} – no valid content/embeddings pairs")
                    continue
            ids = [model_name + str(j) for j in range(len(chunks))]
            metadata = [{"model_agent": model_name} for _ in chunks]
            print(f"Loading into collection for {model_name}")
            collection.add(
                documents=chunks,
                embeddings=embeds,  # type: ignore[arg-type]
                ids=ids,
                metadatas=metadata,  # type: ignore[arg-type]
            )
        return collection
    @staticmethod
    def _query_vector_db(collection: chromadb.Collection, query_text: str) -> str:
        """Query the ChromaDB collection and return the most relevant
        document chunks joined as a single string."""
        ollama_client = BlogFlow._get_ollama_client()
        embed_model = os.environ["EMBEDDING_MODEL"]
        try:
            embed_result = ollama_client.embed(model=embed_model, input=query_text)
            query_embed = embed_result.get("embeddings", [])
            if not query_embed:
                print(
                    "Warning: Failed to generate query embeddings, "
                    "falling back to empty list"
                )
                query_embed = [[]]
        except Exception as exc:
            print(f"Error generating query embeddings: {exc}")
            query_embed = [[]]
        try:
            query_result = collection.query(
                query_embeddings=query_embed,
                n_results=100,  # type: ignore[arg-type]
            )
            documents = query_result.get("documents", [])
            if documents and len(documents) > 0 and len(documents[0]) > 0:
                return "\n\n".join(documents[0])
            print("Warning: No relevant documents found in collection")
            return "No relevant information found in drafts."
        except Exception as exc:
            print(f"Error querying collection: {exc}")
            return "No relevant information found in drafts due to query error."
    # ------------------------------------------------------------------
    # Flow steps
    # ------------------------------------------------------------------
    @start()
    def research(self) -> str:
        """Run the research crew to investigate the blog topic."""
        print("=" * 60)
        print("RESEARCH PHASE – investigating topic")
        print("=" * 60)
        result = (
            ResearchCrew()
            .crew()
            .kickoff(
                inputs={
                    "inner_title": self.state.inner_title,
                    "content": self.state.content,
                }
            )
        )
        self.state.research_findings = result.raw
        print("Research phase complete")
        return result.raw
    @listen(research)
    def write_drafts(self, research_findings: str) -> list[str]:
        """Run the writing crew (4 journalists in parallel) and collect
        their draft outputs."""
        print("=" * 60)
        print("WRITING PHASE – 4 journalists drafting in parallel")
        print("=" * 60)
        result = (
            WritingCrew()
            .crew()
            .kickoff(
                inputs={
                    "inner_title": self.state.inner_title,
                    "content": self.state.content,
                    "research_findings": research_findings,
                }
            )
        )
        # Collect all draft outputs from the crew's task outputs
        drafts: list[str] = []
        for task_output in result.tasks_output:
            drafts.append(task_output.raw)
        self.state.drafts = drafts
        print(f"Writing phase complete – {len(drafts)} drafts produced")
        return drafts
    @staticmethod
    def _compute_authors() -> str:
        """Build an author string from the CONTENT_CREATOR_MODELS env var.
        Each model name is stripped of any tag suffix (e.g. ``:latest``)
        and ``.ai`` is appended.  Multiple models are joined with ``', '``.
        """
        try:
            models = json.loads(os.environ["CONTENT_CREATOR_MODELS"])
        except (KeyError, json.JSONDecodeError):
            models = []
        authors = ", ".join(m.split(":")[0].split("/")[-1] + ".ai" for m in models)
        return authors or "unknown.ai"
    @listen(write_drafts)
    def edit_final(self, drafts: list[str]) -> str:
        """Load journalist drafts into the vector DB, query for the most
        relevant context, and run the editor crew to produce the final
        polished document with a metadata header."""
        print("=" * 60)
        print("EDITOR PHASE – producing final document")
        print("=" * 60)
        # ---- Compute date and authors for the metadata header ----
        if not self.state.date:
            self.state.date = datetime.now().strftime("%Y-%m-%d %H:%M")
        self.state.authors = self._compute_authors()
        if not self.state.category:
            self.state.category = "<pick one word that best describes the topic, e.g. Homelab, DevOps, Security, Networking>"
        # ---- Vector DB integration ----
        print("Loading drafts into vector database")
        collection = self._load_drafts_to_vector_db(drafts)
        # Build the editor's brief so we can query the vector DB with it
        editor_brief = (
            f"You are an editor taking information from 3 Software "
            f"Developers and Data experts writing a 5000 word blog article. "
            f"You like when they use almost no code examples. "
            f"You are also Australian. The title for the blog is "
            f"{self.state.inner_title}. "
            f"The basis for the content of the blog is: "
            f"<blog>{self.state.content}</blog>"
        )
        draft_context = self._query_vector_db(collection, editor_brief)
        print("Showing pertinent info from drafts used in final edited edition")
        # ---- Editor crew ----
        result = (
            EditorCrew()
            .crew()
            .kickoff(
                inputs={
                    "inner_title": self.state.inner_title,
                    "content": self.state.content,
                    "draft_context": draft_context,
                    "date": self.state.date,
                    "authors": self.state.authors,
                    "category": self.state.category,
                }
            )
        )
        self.state.final_document = result.raw
        print("Editor phase complete")
        return result.raw
--- a/src/ai_generators/crews/init.py
+++ b/src/ai_generators/crews/init.py
--- a/src/ai_generators/crews/editor_crew/init.py
+++ b/src/ai_generators/crews/editor_crew/init.py
--- a/src/ai_generators/crews/editor_crew/config/agents.yaml
+++ b/src/ai_generators/crews/editor_crew/config/agents.yaml
@ -0,0 +1,20 @@
 editor:
  role: >
    Critical Blog Editor
  goal: >
    Produce the final, polished ~5000-word version of a blog about {inner_title},
    complete with a metadata header (Title, Date, Category, Tags, Slug, Authors,
    Summary)
  backstory: >
    You are an editor taking information from 3 Software Developers and
    Data experts writing a 5000 word blog article. You like when they use
    almost no code examples. You are also Australian. The content may have
    light comedic elements; you are more professional and will attempt to
    tone these down. You are critical of repeated sentences, inconsistencies,
    and weak arguments. You ensure the final document is cohesive,
    well-structured, and publication-ready. You never leave placeholder
    text — every section must contain finished content. You always begin
    your output with a plain-text metadata block (Title, Date, Modified,
    Category, Tags, Slug, Authors, Summary) followed by a blank line and
    then the full markdown body. You generate sensible Category, Tags,
    Slug and Summary values based on the blog content.
--- a/src/ai_generators/crews/editor_crew/config/tasks.yaml
+++ b/src/ai_generators/crews/editor_crew/config/tasks.yaml
@ -0,0 +1,45 @@
 edit_task:
  description: >
    Generate the final, 5000 word blog post using this information
    from the journalist drafts:
    <context>{draft_context}</context>
    You are an editor taking information from 3 Software Developers and
    Data experts writing a 5000 word blog article. You like when they use
    almost no code examples. You are also Australian. The content may have
    light comedic elements; you are more professional and will attempt to
    tone these down. As this person produce the final version of this blog
    as a markdown document keeping in mind the context provided by the
    previous drafts. You are to produce the content not placeholders for
    further editors. The title for the blog is {inner_title}. Avoid
    repeated sentences. The basis for the content of the blog is:
    <blog>{content}</blog>
    IMPORTANT: The output MUST start with a metadata block in exactly this
    format, followed by a blank line, then the blog body. Do not wrap the
    metadata block in code fences or any other markup. Generate sensible
    values for Category, Tags, Slug and Summary based on the blog content.
    Title: {inner_title}
    Date: {date}
    Modified: {date}
    Category: {category}
    Tags: <generate 3-5 short lowercase tags relevant to the content>, ai_content, not_human_content
    Slug: <generate a short URL-friendly slug using lowercase words separated by hyphens>
    Authors: {authors}
    Summary: <write a single sentence summary of roughly 15-25 words>
    After the metadata block and blank line, write the full blog body in
    markdown. Do not repeat the title as a heading in the body.
    - Only output the metadata block and then the markdown body.
    - Do not wrap in markdown code fences.
    - Do not provide a commentary on the drafts in the context.
    - Produce real content, not placeholders for further editors.
    - Avoid repeated sentences.
  expected_output: >
    A metadata block (Title, Date, Modified, Category, Tags, Slug, Authors,
    Summary) followed by a blank line and then a polished ~5000-word markdown
    blog article about {inner_title}. No commentary. No placeholders. Cohesive
    and publication-ready.
  agent: editor
--- a/src/ai_generators/crews/editor_crew/editor_crew.py
+++ b/src/ai_generators/crews/editor_crew/editor_crew.py
@ -0,0 +1,51 @@
 """Editor crew – produces the final polished blog document."""
 import os
 from crewai import LLM, Agent, Crew, Process, Task
 from crewai.project import CrewBase, agent, crew, task
 def _get_ollama_url() -> str:
    return (
        f"{os.environ['OLLAMA_PROTOCOL']}://"
        f"{os.environ['OLLAMA_HOST']}:{os.environ['OLLAMA_PORT']}"
    )
@CrewBase
 class EditorCrew:
    """Crew with a single critical editor who produces the final blog."""
    agents_config = "config/agents.yaml"
    tasks_config = "config/tasks.yaml"
    @agent
    def editor(self) -> Agent:
        return Agent(
            config=self.agents_config["editor"],  # type: ignore[index]
            llm=LLM(
                model=f"ollama/{os.environ['EDITOR_MODEL']}",
                base_url=_get_ollama_url(),
                temperature=0.6,
                top_p=0.5,
            ),
            verbose=True,
            max_iter=30,
            respect_context_window=True,
        )
    @task
    def edit_task(self) -> Task:
        return Task(
            config=self.tasks_config["edit_task"],  # type: ignore[index]
        )
    @crew
    def crew(self) -> Crew:
        return Crew(
            agents=self.agents,
            tasks=self.tasks,
            process=Process.sequential,
            verbose=True,
        )
--- a/src/ai_generators/crews/research_crew/init.py
+++ b/src/ai_generators/crews/research_crew/init.py
--- a/src/ai_generators/crews/research_crew/config/agents.yaml
+++ b/src/ai_generators/crews/research_crew/config/agents.yaml
@ -0,0 +1,15 @@
 researcher:
  role: >
    Critical Technology Researcher
  goal: >
    Research and critically evaluate information related to {inner_title}
  backstory: >
    You are a skeptical, thorough technology researcher with years of
    experience in Software Development and DevOps. You never accept
    information at face value and always cross-reference claims with
    multiple sources. You are particularly critical of hype, marketing
    language, and unsubstantiated technical claims. You prefer primary
    sources, official documentation, and peer-reviewed material over
    blog posts and opinion pieces. When conflicting information is found
    you clearly note the discrepancy and provide both viewpoints with
    credibility assessments.
--- a/src/ai_generators/crews/research_crew/config/tasks.yaml
+++ b/src/ai_generators/crews/research_crew/config/tasks.yaml
@ -0,0 +1,23 @@
 research_task:
  description: >
    Research the topic: {inner_title}
    The original content to research and expand upon is:
    <blog>{content}</blog>
    Your task is to:
    1. Search the web for current, accurate information related to this topic.
    2. Critically evaluate the claims made in the original content.
    3. Find supporting or contradicting evidence from reputable sources.
    4. Identify any outdated information, common misconceptions, or factual errors.
    5. Provide a comprehensive research summary with verified facts, clearly
       distinguishing between confirmed information and areas of uncertainty.
    Be thorough and skeptical. Only include information you can verify from
    reliable sources. Flag anything that seems exaggerated or unverified.
  expected_output: >
    A comprehensive research report with verified facts, source citations,
    and credibility assessments. Clearly distinguish between confirmed
    information and areas of uncertainty. Include supporting and
    contradicting evidence where found.
  agent: researcher
--- a/src/ai_generators/crews/research_crew/research_crew.py
+++ b/src/ai_generators/crews/research_crew/research_crew.py
@ -0,0 +1,54 @@
 """Research crew – investigates a blog topic using web search."""
 import os
 from crewai import LLM, Agent, Crew, Process, Task
 from crewai.project import CrewBase, agent, crew, task
 from ai_generators.tools import OllamaWebSearchTool
 def _get_ollama_url() -> str:
    return (
        f"{os.environ['OLLAMA_PROTOCOL']}://"
        f"{os.environ['OLLAMA_HOST']}:{os.environ['OLLAMA_PORT']}"
    )
@CrewBase
 class ResearchCrew:
    """Crew that researches a blog topic with a critical, web-searching
    researcher agent."""
    agents_config = "config/agents.yaml"
    tasks_config = "config/tasks.yaml"
    @agent
    def researcher(self) -> Agent:
        return Agent(
            config=self.agents_config["researcher"],  # type: ignore[index]
            tools=[OllamaWebSearchTool()],
            llm=LLM(
                model=f"ollama/{os.environ['EDITOR_MODEL']}",
                base_url=_get_ollama_url(),
                temperature=0.3,
            ),
            verbose=True,
            max_iter=25,
            respect_context_window=True,
        )
    @task
    def research_task(self) -> Task:
        return Task(
            config=self.tasks_config["research_task"],  # type: ignore[index]
        )
    @crew
    def crew(self) -> Crew:
        return Crew(
            agents=self.agents,
            tasks=self.tasks,
            process=Process.sequential,
            verbose=True,
        )
--- a/src/ai_generators/crews/writing_crew/init.py
+++ b/src/ai_generators/crews/writing_crew/init.py
--- a/src/ai_generators/crews/writing_crew/config/agents.yaml
+++ b/src/ai_generators/crews/writing_crew/config/agents.yaml
@ -0,0 +1,48 @@
 journalist_one:
  role: >
    Creative Technology Journalist
  goal: >
    Write a creative, engaging ~5000-word draft blog article about {inner_title}
  backstory: >
    You are a journalist, Software Developer and DevOps expert writing a
    draft blog article for other tech enthusiasts. You like to use almost no
    code examples and prefer to talk in a light comedic tone. You are also
    Australian. You favour vivid analogies and storytelling to explain
    technical concepts. Your writing is warm, slightly irreverent, and
    accessible.
 journalist_two:
  role: >
    Creative Technology Journalist
  goal: >
    Write a creative, engaging ~5000-word draft blog article about {inner_title}
  backstory: >
    You are a journalist, Software Developer and DevOps expert writing a
    draft blog article for other tech enthusiasts. You like to use almost no
    code examples and prefer to talk in a light comedic tone. You are also
    Australian. You lean into sharp wit and concise, punchy sentences. You
    love finding unexpected connections between seemingly unrelated topics.
 journalist_three:
  role: >
    Creative Technology Journalist
  goal: >
    Write a creative, engaging ~5000-word draft blog article about {inner_title}
  backstory: >
    You are a journalist, Software Developer and DevOps expert writing a
    draft blog article for other tech enthusiasts. You like to use almost no
    code examples and prefer to talk in a light comedic tone. You are also
    Australian. You prefer a conversational, meandering style that draws the
    reader in with personal anecdotes and rhetorical questions.
 journalist_four:
  role: >
    Creative Technology Journalist
  goal: >
    Write a creative, engaging ~5000-word draft blog article about {inner_title}
  backstory: >
    You are a journalist, Software Developer and DevOps expert writing a
    draft blog article for other tech enthusiasts. You like to use almost no
    code examples and prefer to talk in a light comedic tone. You are also
    Australian. You take a methodical, analytical approach with detailed
    explanations and systematic breakdowns of complex topics.
--- a/src/ai_generators/crews/writing_crew/config/tasks.yaml
+++ b/src/ai_generators/crews/writing_crew/config/tasks.yaml
@ -0,0 +1,79 @@
 write_draft_one:
  description: >
    Write a 5000 word draft blog article as a markdown document.
    The title for the blog is {inner_title}.
    Do not output the title in the markdown.
    The basis for the content of the blog is:
    <blog>{content}</blog>
    Research findings to incorporate and validate against:
    <research>{research_findings}</research>
    Write creatively, with a light comedic tone. You are Australian.
    Use almost no code examples. Make it engaging for tech enthusiasts.
    Only output the markdown content — no commentary, no meta-description.
  expected_output: >
    A ~5000-word markdown draft blog article about {inner_title}.
    No title in the output. No commentary or meta-description.
  agent: journalist_one
 write_draft_two:
  description: >
    Write a 5000 word draft blog article as a markdown document.
    The title for the blog is {inner_title}.
    Do not output the title in the markdown.
    The basis for the content of the blog is:
    <blog>{content}</blog>
    Research findings to incorporate and validate against:
    <research>{research_findings}</research>
    Write creatively, with a light comedic tone. You are Australian.
    Use almost no code examples. Make it engaging for tech enthusiasts.
    Only output the markdown content — no commentary, no meta-description.
  expected_output: >
    A ~5000-word markdown draft blog article about {inner_title}.
    No title in the output. No commentary or meta-description.
  agent: journalist_two
 write_draft_three:
  description: >
    Write a 5000 word draft blog article as a markdown document.
    The title for the blog is {inner_title}.
    Do not output the title in the markdown.
    The basis for the content of the blog is:
    <blog>{content}</blog>
    Research findings to incorporate and validate against:
    <research>{research_findings}</research>
    Write creatively, with a light comedic tone. You are Australian.
    Use almost no code examples. Make it engaging for tech enthusiasts.
    Only output the markdown content — no commentary, no meta-description.
  expected_output: >
    A ~5000-word markdown draft blog article about {inner_title}.
    No title in the output. No commentary or meta-description.
  agent: journalist_three
 write_draft_four:
  description: >
    Write a 5000 word draft blog article as a markdown document.
    The title for the blog is {inner_title}.
    Do not output the title in the markdown.
    The basis for the content of the blog is:
    <blog>{content}</blog>
    Research findings to incorporate and validate against:
    <research>{research_findings}</research>
    Write creatively, with a light comedic tone. You are Australian.
    Use almost no code examples. Make it engaging for tech enthusiasts.
    Only output the markdown content — no commentary, no meta-description.
  expected_output: >
    A ~5000-word markdown draft blog article about {inner_title}.
    No title in the output. No commentary or meta-description.
  agent: journalist_four
--- a/src/ai_generators/crews/writing_crew/writing_crew.py
+++ b/src/ai_generators/crews/writing_crew/writing_crew.py
@ -0,0 +1,128 @@
 """Writing crew – three journalists who write creative blog drafts in parallel."""
 import json
 import os
 from crewai import LLM, Agent, Crew, Process, Task
 from crewai.project import CrewBase, agent, crew, task
 def _get_ollama_url() -> str:
    return (
        f"{os.environ['OLLAMA_PROTOCOL']}://"
        f"{os.environ['OLLAMA_HOST']}:{os.environ['OLLAMA_PORT']}"
    )
 def _get_agent_models() -> list[str]:
    return json.loads(os.environ["CONTENT_CREATOR_MODELS"])
 # Creative-style presets per journalist: (temperature, top_p)
 _JOURNALIST_PARAMS: dict[int, tuple[float, float]] = {
    1: (0.70, 0.60),  # moderate creativity
    2: (0.85, 0.50),  # high creativity, tighter focus
    3: (0.60, 0.70),  # lower creativity, wider associations
    4: (0.50, 0.80),  # methodical, analytical approach
 }
@CrewBase
 class WritingCrew:
    """Crew of three creative journalists who write blog drafts in parallel."""
    agents_config = "config/agents.yaml"
    tasks_config = "config/tasks.yaml"
    # ---- helpers ----
    @staticmethod
    def _journalist_llm(index: int) -> LLM:
        models = _get_agent_models()
        model = models[index % len(models)]
        temp, top_p = _JOURNALIST_PARAMS[index + 1]
        return LLM(
            model=f"ollama/{model}",
            base_url=_get_ollama_url(),
            temperature=temp,
            top_p=top_p,
        )
    # ---- agents ----
    @agent
    def journalist_one(self) -> Agent:
        return Agent(
            config=self.agents_config["journalist_one"],  # type: ignore[index]
            llm=self._journalist_llm(0),
            verbose=True,
            max_iter=30,
            respect_context_window=True,
        )
    @agent
    def journalist_two(self) -> Agent:
        return Agent(
            config=self.agents_config["journalist_two"],  # type: ignore[index]
            llm=self._journalist_llm(1),
            verbose=True,
            max_iter=30,
            respect_context_window=True,
        )
    @agent
    def journalist_three(self) -> Agent:
        return Agent(
            config=self.agents_config["journalist_three"],  # type: ignore[index]
            llm=self._journalist_llm(2),
            verbose=True,
            max_iter=30,
            respect_context_window=True,
        )
    @agent
    def journalist_four(self) -> Agent:
        return Agent(
            config=self.agents_config["journalist_four"],  # type: ignore[index]
            llm=self._journalist_llm(3),
            verbose=True,
            max_iter=30,
            respect_context_window=True,
        )
    # ---- tasks ----
    @task
    def write_draft_one(self) -> Task:
        return Task(
            config=self.tasks_config["write_draft_one"],  # type: ignore[index]
        )
    @task
    def write_draft_two(self) -> Task:
        return Task(
            config=self.tasks_config["write_draft_two"],  # type: ignore[index]
        )
    @task
    def write_draft_three(self) -> Task:
        return Task(
            config=self.tasks_config["write_draft_three"],  # type: ignore[index]
        )
    @task
    def write_draft_four(self) -> Task:
        return Task(
            config=self.tasks_config["write_draft_four"],  # type: ignore[index]
        )
    # ---- crew ----
    @crew
    def crew(self) -> Crew:
        return Crew(
            agents=self.agents,
            tasks=self.tasks,
            process=Process.sequential,
            verbose=True,
        )
--- a/src/ai_generators/ollama_md_generator.py
+++ b/src/ai_generators/ollama_md_generator.py
@ -1,352 +1,147 @@
 """
 OllamaGenerator – public interface for blog generation.
 This module preserves the same API that ``main.py`` relies on while
 delegating the heavy lifting to a CrewAI Flow (``blog_flow.BlogFlow``)
 that orchestrates a researcher, four journalists, and an editor via
 YAML-configured crews.
 Breaking changes from the previous implementation
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 * ``langchain-ollama`` is no longer required – the ``generate_system_message``
  helper now talks directly to the Ollama HTTP API via the ``ollama`` client.
 * Internally, blog generation is driven by CrewAI agents, crews and a Flow
  rather than by hand-rolled retry loops and thread-pool executors.
 Public interface (unchanged)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 * ``OllamaGenerator(title, content, inner_title)``
 * ``save_to_file(filename)`` – generates the blog and writes it to disk
 * ``generate_system_message(prompt_system, prompt_human)`` – simple LLM call
 * ``self.response`` – the final markdown text (populated after ``save_to_file``)
 """
 import json
 import os
 import random
 import re
 import string
 import time
 from concurrent.futures import ThreadPoolExecutor, TimeoutError
 import chromadb
 from langchain_ollama import ChatOllama
 from ollama import Client
 from ai_generators.blog_flow import BlogFlow
 class OllamaGenerator:
-    def __init__(self, title: str, content: str, inner_title: str):
+    """Generate a polished blog post from raw content using CrewAI agents.
    Parameters
    ----------
    title : str
        An OS-friendly slug used for file names and ChromaDB collection
        names (e.g. ``"my_blog_title"``).
    content : str
        The raw source content that the blog should be based on.
    inner_title : str
        The human-readable blog title (used in prompts and output).
    """
    def __init__(
        self,
        title: str,
        content: str,
        inner_title: str,
        date: str | None = None,
        category: str | None = None,
    ):
        self.title = title
        self.inner_title = inner_title
        self.content = content
-        self.response = None
+        self.date = date
-        print("In Class")
+        self.category = category
-        print(os.environ["CONTENT_CREATOR_MODELS"])
+        self.response: str | None = None
-        try:
+
-            chroma_port = int(os.environ["CHROMA_PORT"])
+        # ---- Ollama connection (used by generate_system_message) ----
-        except ValueError as e:
+        ollama_url = (
-            raise Exception(f"CHROMA_PORT is not an integer: {e}")
+            f"{os.environ['OLLAMA_PROTOCOL']}://"
-        self.chroma = chromadb.HttpClient(
+            f"{os.environ['OLLAMA_HOST']}:{os.environ['OLLAMA_PORT']}"
            host=os.environ["CHROMA_HOST"], port=chroma_port
        )
        ollama_url = f"{os.environ['OLLAMA_PROTOCOL']}://{os.environ['OLLAMA_HOST']}:{os.environ['OLLAMA_PORT']}"
        self.ollama_client = Client(host=ollama_url)
        self.ollama_model = os.environ["EDITOR_MODEL"]
        self.embed_model = os.environ["EMBEDDING_MODEL"]
        self.agent_models = json.loads(os.environ["CONTENT_CREATOR_MODELS"])
        self.llm = ChatOllama(
            model=self.ollama_model, temperature=0.6, top_p=0.5
        )  # This  is the level head in the room
        self.prompt_inject = f"""
            You are a journalist, Software Developer and DevOps expert
            writing a 5000 word draft blog article for other tech enthusiasts.
            You like to use almost no code examples and prefer to talk
            in a light comedic tone. You are also Australian
            As this person write this blog as a markdown document.
            The title for the blog is {self.inner_title}.
            Do not output the title in the markdown.
            The basis for the content of the blog is:
                <blog>{self.content}</blog>
            """
-    def split_into_chunks(self, text, chunk_size=100):
+        # ---- Validate required env vars early ----
        """Split text into chunks of size chunk_size"""
        words = re.findall(r"\S+", text)
        chunks = []
        current_chunk = []
        word_count = 0
        for word in words:
            current_chunk.append(word)
            word_count += 1
        if word_count >= chunk_size:
            chunks.append(" ".join(current_chunk))
            current_chunk = []
            word_count = 0
        if current_chunk:
            chunks.append(" ".join(current_chunk))
        return chunks
    def generate_draft(self, model) -> str:
        """Generate a draft blog post using the specified model"""
        def _generate():
            # the idea behind this is to make the "creativity" random amongst the content creators
            # contorlling temperature will allow cause the output to allow more "random" connections in sentences
            # Controlling top_p will tighten or loosen the embedding connections made
            # The result should be varied levels of "creativity" in the writing of the drafts
            # for more see https://python.langchain.com/v0.2/api_reference/ollama/chat_models/langchain_ollama.chat_models.ChatOllama.html
            temp = random.uniform(0.5, 1.0)
            top_p = random.uniform(0.4, 0.8)
            top_k = int(random.uniform(30, 80))
            agent_llm = ChatOllama(
                model=model, temperature=temp, top_p=top_p, top_k=top_k
            )
            messages = [
                (
                    "system",
                    "You are a creative writer specialising in writing about technology",
                ),
                ("human", self.prompt_inject),
            ]
            response = agent_llm.invoke(messages)
            return (
                response.text if hasattr(response, "text") else str(response)
            )  # ['message']['content']
        # Retry mechanism with 30-minute timeout
        timeout_seconds = 30 * 60  # 30 minutes
        max_retries = 3
        for attempt in range(max_retries):
            try:
                with ThreadPoolExecutor(max_workers=1) as executor:
                    future = executor.submit(_generate)
                    result = future.result(timeout=timeout_seconds)
                    return result
            except TimeoutError:
                print(
                    f"AI call timed out after {timeout_seconds} seconds on attempt {attempt + 1}"
                )
                if attempt < max_retries - 1:
                    print("Retrying...")
                    time.sleep(5)  # Wait 5 seconds before retrying
                    continue
                else:
                    raise Exception(
                        f"AI call failed to complete after {max_retries} attempts with {timeout_seconds} second timeouts"
                    )
            except Exception as e:
                if attempt < max_retries - 1:
                    print(f"Attempt {attempt + 1} failed with error: {e}. Retrying...")
                    time.sleep(5)  # Wait 5 seconds before retrying
                    continue
                else:
                    raise Exception(
                        f"Failed to generate blog draft after {max_retries} attempts: {e}"
                    )
    def get_draft_embeddings(self, draft_chunks):
        """Get embeddings for the draft chunks"""
        try:
-            # Handle empty draft chunks
+            _ = json.loads(os.environ["CONTENT_CREATOR_MODELS"])
-            if not draft_chunks:
+        except (KeyError, json.JSONDecodeError) as exc:
-                print("Warning: No draft chunks to embed")
+            raise Exception(
-                return []
+                f"CONTENT_CREATOR_MODELS env var is missing or invalid: {exc}"
            embeds = self.ollama_client.embed(
                model=self.embed_model, input=draft_chunks
            )
            embeddings = embeds.get("embeddings", [])
            # Check if embeddings were generated successfully
            if not embeddings:
                print("Warning: No embeddings generated")
                return []
            return embeddings
        except Exception as e:
            print(f"Error generating embeddings: {e}")
            return []
    def id_generator(self, size=6, chars=string.ascii_uppercase + string.digits):
        return "".join(random.choice(chars) for _ in range(size))
    def load_to_vector_db(self):
        """Load the generated blog drafts into a vector database"""
        collection_name = (
            f"blog_{self.title.lower().replace(' ', '_')}_{self.id_generator()}"
        )
        collection = self.chroma.get_or_create_collection(
            name=collection_name
        )  # , metadata={"hnsw:space": "cosine"})
        # if any(collection.name == collectionname for collectionname in self.chroma.list_collections()):
        #    self.chroma.delete_collection("blog_creator")
        for model in self.agent_models:
            print(f"Generating draft from {model} for load into vector database")
            try:
                draft_content = self.generate_draft(model)
                draft_chunks = self.split_into_chunks(draft_content)
                # Skip if no content was generated
                if not draft_chunks or all(
                    chunk.strip() == "" for chunk in draft_chunks
                ):
                    print(f"Skipping {model} - no content generated")
                    continue
                print(f"generating embeds for {model}")
                embeds = self.get_draft_embeddings(draft_chunks)
                # Skip if no embeddings were generated
                if not embeds:
                    print(f"Skipping {model} - no embeddings generated")
                    continue
                # Ensure we have the same number of embeddings as chunks
                if len(embeds) != len(draft_chunks):
                    print(
                        f"Warning: Mismatch between chunks ({len(draft_chunks)}) and embeddings ({len(embeds)}) for {model}"
                    )
                    # Truncate or pad to match
                    min_length = min(len(embeds), len(draft_chunks))
                    draft_chunks = draft_chunks[:min_length]
                    embeds = embeds[:min_length]
                    if min_length == 0:
                        print(f"Skipping {model} - no valid content/embeddings pairs")
                        continue
                ids = [model + str(i) for i in range(len(draft_chunks))]
                chunknumber = list(range(len(draft_chunks)))
                metadata = [{"model_agent": model} for index in chunknumber]
                print(f"loading into collection for {model}")
                collection.add(
                    documents=draft_chunks,
                    embeddings=embeds,
                    ids=ids,
                    metadatas=metadata,
                )
            except Exception as e:
                print(f"Error processing model {model}: {e}")
                # Continue with other models rather than failing completely
                continue
        return collection
    def generate_markdown(self) -> str:
        prompt_human = f"""
            You are an editor taking information from {len(self.agent_models)} Software
            Developers and Data experts
            writing a 5000 word blog article. You like when they use almost no code examples.
            You are also Australian. The content may have light comedic elements,
            you are more professional and will attempt to tone these down
            As this person produce the final version of this blog as a markdown document
            keeping in mind the context provided by the previous drafts.
            You are to produce the content not placeholders for further editors
            The title for the blog is {self.inner_title}.
            Do not output the title in the markdown. Avoid repeated sentences
            The basis for the content of the blog is:
                <blog>{self.content}</blog>
            """
        def _generate_final_document():
            try:
                embed_result = self.ollama_client.embed(
                    model=self.embed_model, input=prompt_human
                )
                query_embed = embed_result.get("embeddings", [])
                if not query_embed:
                    print(
                        "Warning: Failed to generate query embeddings, using empty list"
                    )
                    query_embed = [[]]  # Use a single empty embedding as fallback
            except Exception as e:
                print(f"Error generating query embeddings: {e}")
                # Generate empty embeddings as fallback
                query_embed = [[]]  # Use a single empty embedding as fallback
            collection = self.load_to_vector_db()
            # Try to query the collection, with fallback for empty collections
            try:
                collection_query = collection.query(
                    query_embeddings=query_embed, n_results=100
                )
                print("Showing pertinent info from drafts used in final edited edition")
                # Get documents with error handling
                query_result = collection.query(
                    query_embeddings=query_embed, n_results=100
                )
                documents = query_result.get("documents", [])
                if documents and len(documents) > 0 and len(documents[0]) > 0:
                    pertinent_draft_info = "\n\n".join(documents[0])
                else:
                    print("Warning: No relevant documents found in collection")
                    pertinent_draft_info = "No relevant information found in drafts."
            except Exception as query_error:
                print(f"Error querying collection: {query_error}")
                pertinent_draft_info = (
                    "No relevant information found in drafts due to query error."
                )
            # print(pertinent_draft_info)
            prompt_system = f"""Generate the final, 5000 word, draft of the blog using this information from the drafts: <context>{pertinent_draft_info}</context>
                            - Only output in markdown, do not wrap in markdown tags, Only provide the draft not a commentary on the drafts in the context
                            """
            print("Generating final document")
            messages = [
                ("system", prompt_system),
                ("human", prompt_human),
            ]
            response = self.llm.invoke(messages)
            return response.text if hasattr(response, "text") else str(response)
        try:
-            # Retry mechanism with 30-minute timeout
+            _ = int(os.environ["CHROMA_PORT"])
-            timeout_seconds = 30 * 60  # 30 minutes
+        except (KeyError, ValueError) as exc:
-            max_retries = 3
+            raise Exception(f"CHROMA_PORT is not an integer: {exc}")
-            for attempt in range(max_retries):
+    # ------------------------------------------------------------------
-                try:
+    # Public API
-                    with ThreadPoolExecutor(max_workers=1) as executor:
+    # ------------------------------------------------------------------
                        future = executor.submit(_generate_final_document)
                        self.response = future.result(timeout=timeout_seconds)
                        break  # Success, exit the retry loop
                except TimeoutError:
                    print(
                        f"AI call timed out after {timeout_seconds} seconds on attempt {attempt + 1}"
                    )
                    if attempt < max_retries - 1:
                        print("Retrying...")
                        time.sleep(5)  # Wait 5 seconds before retrying
                        continue
                    else:
                        raise Exception(
                            f"AI call failed to complete after {max_retries} attempts with {timeout_seconds} second timeouts"
                        )
                except Exception as e:
                    if attempt < max_retries - 1:
                        print(
                            f"Attempt {attempt + 1} failed with error: {e}. Retrying..."
                        )
                        time.sleep(5)  # Wait 5 seconds before retrying
                        continue
                    else:
                        raise Exception(
                            f"Failed to generate markdown after {max_retries} attempts: {e}"
                        )
            # self.response = self.ollama_client.chat(model=self.ollama_model,
            #                                         messages=[
            #             'content': f'{prompt_enhanced}',
            #         },
            #     ])
            # print ("Markdown Generated")
            # print (self.response)
            return self.response  # ['message']['content']
        except Exception as e:
            raise Exception(f"Failed to generate markdown: {e}")
    def save_to_file(self, filename: str) -> None:
        """Run the full CrewAI blog-generation flow and write the result
        to *filename*.
        After this call ``self.response`` contains the final markdown text.
        """
        self.response = self.generate_markdown()
        with open(filename, "w") as f:
-            f.write(self.generate_markdown())
+            f.write(self.response)
-    def generate_system_message(self, prompt_system, prompt_human):
+    def generate_markdown(self) -> str:
-        def _generate():
+        """Execute the CrewAI Flow and return the final markdown document.
            messages = [
                ("system", prompt_system),
                ("human", prompt_human),
            ]
            response = self.llm.invoke(messages)
            ai_message = response.text if hasattr(response, "text") else str(response)
            return ai_message
-        # Retry mechanism with 30-minute timeout
+        The Flow:
-        timeout_seconds = 30 * 60  # 30 minutes
+          1. **Research crew** – a critical researcher with web search
             investigates the topic and produces verified findings.
          2. **Writing crew** – four creative journalists write draft
             blog articles in parallel.
          3. **Editor crew** – a critical editor loads the journalist drafts
             into the vector DB, queries for relevant context, and produces
             the polished final document.
        """
        inputs = {
            "title": self.title,
            "inner_title": self.inner_title,
            "content": self.content,
        }
        if self.date is not None:
            inputs["date"] = self.date
        if self.category is not None:
            inputs["category"] = self.category
        flow = BlogFlow()
        result = flow.kickoff(inputs=inputs)
        return str(result)
    def generate_system_message(self, prompt_system: str, prompt_human: str) -> str:
        """Send a system/human message pair to the editor model and return
        the assistant's response.
        This is a lightweight helper used by ``main.py`` for generating
        commit messages and notification text – it does **not** invoke the
        full CrewAI Flow.
        """
        def _generate() -> str:
            response = self.ollama_client.chat(
                model=self.ollama_model,
                messages=[
                    {"role": "system", "content": prompt_system},
                    {"role": "user", "content": prompt_human},
                ],
            )
            return response["message"]["content"]
        # Retry mechanism with 30-minute timeout (same as the original)
        timeout_seconds = 30 * 60
        max_retries = 3
        for attempt in range(max_retries):
@ -357,22 +152,30 @@ class OllamaGenerator:
                    return result
            except TimeoutError:
                print(
-                    f"AI call timed out after {timeout_seconds} seconds on attempt {attempt + 1}"
+                    f"AI call timed out after {timeout_seconds} seconds "
                    f"on attempt {attempt + 1}"
                )
                if attempt < max_retries - 1:
                    print("Retrying...")
-                    time.sleep(5)  # Wait 5 seconds before retrying
+                    time.sleep(5)
                    continue
                else:
                    raise Exception(
-                        f"AI call failed to complete after {max_retries} attempts with {timeout_seconds} second timeouts"
+                        f"AI call failed to complete after {max_retries} "
                        f"attempts with {timeout_seconds} second timeouts"
                    )
-            except Exception as e:
+            except Exception as exc:
                if attempt < max_retries - 1:
-                    print(f"Attempt {attempt + 1} failed with error: {e}. Retrying...")
+                    print(
-                    time.sleep(5)  # Wait 5 seconds before retrying
+                        f"Attempt {attempt + 1} failed with error: {exc}. Retrying..."
                    )
                    time.sleep(5)
                    continue
                else:
                    raise Exception(
-                        f"Failed to generate system message after {max_retries} attempts: {e}"
+                        f"Failed to generate system message after "
                        f"{max_retries} attempts: {exc}"
                    )
        # Should never reach here, but satisfy type-checkers
        raise RuntimeError("Unexpected exit from generate_system_message")
--- a/src/ai_generators/tools/init.py
+++ b/src/ai_generators/tools/init.py
@ -0,0 +1,4 @@
 # Tools package for the blog generation CrewAI flow.
 from ai_generators.tools.ollama_web_search_tool import OllamaWebSearchTool
 __all__ = ["OllamaWebSearchTool"]
--- a/src/ai_generators/tools/ollama_web_search_tool.py
+++ b/src/ai_generators/tools/ollama_web_search_tool.py
@ -0,0 +1,124 @@
 """
 Custom CrewAI tool that wraps Ollama's native web search API.
 This tool allows CrewAI agents to perform web searches using an Ollama
 subscription instead of third-party services like Serper or EXA.
 Requires:
 - Ollama Python library: pip install ollama
 - OLLAMA_API_KEY environment variable set with your Ollama API key
 """
 import os
 import ollama
 from crewai.tools import BaseTool
 from pydantic import BaseModel, Field
 class OllamaWebSearchInput(BaseModel):
    """Input schema for OllamaWebSearchTool."""
    query: str = Field(
        ...,
        description="The web search query string. Be specific and include relevant keywords.",
    )
    max_results: int = Field(
        default=5,
        ge=1,
        le=10,
        description="Maximum number of search results to return (1-10, default 5).",
    )
 class OllamaWebSearchTool(BaseTool):
    """
    Web search tool using Ollama's native web search API.
    This tool performs live web searches and returns relevant results with
    titles, URLs, and content snippets. It's ideal for research tasks that
    require current, up-to-date information from the internet.
    The tool requires an Ollama subscription and the OLLAMA_API_KEY environment
    variable to be set.
    Example usage:
        from ai_generators.tools.ollama_web_search_tool import OllamaWebSearchTool
        researcher = Agent(
            role="Researcher",
            goal="Research topics thoroughly",
            tools=[OllamaWebSearchTool()],
        )
    """
    name: str = "ollama_web_search"
    description: str = (
        "Search the web for current information using Ollama's web search API. "
        "Use this tool when you need to find up-to-date information, verify claims, "
        "find supporting or contradicting evidence, or research topics that require "
        "current data. Returns search results with titles, URLs, and content snippets."
    )
    args_schema: type[BaseModel] = OllamaWebSearchInput
    def _run(self, query: str, max_results: int = 5) -> str:
        """
        Execute a web search and return formatted results.
        Args:
            query: The search query string
            max_results: Maximum number of results to return (1-10)
        Returns:
            Formatted string with search results, each containing title, URL, and content
        """
        try:
            # Ensure API key is set
            if not os.environ.get("OLLAMA_API_KEY"):
                return "Error: OLLAMA_API_KEY environment variable is not set. Please set your Ollama API key."
            # Perform the web search
            response = ollama.web_search(query=query, max_results=max_results)
            # Extract and format results
            results = response.get("results", [])
            if not results:
                return f"No search results found for query: '{query}'"
            formatted_results = []
            for i, result in enumerate(results, 1):
                title = result.get("title", "No title")
                url = result.get("url", "No URL")
                content = result.get("content", "No content available")
                formatted_results.append(
                    f"Result {i}:\nTitle: {title}\nURL: {url}\nContent: {content}\n"
                )
            return "\n".join(formatted_results)
        except Exception as exc:
            return f"Error performing web search: {exc}"
    def _handle_exception(self, exc: Exception) -> str:
        """Handle exceptions gracefully and return a user-friendly error message."""
        error_message = str(exc)
        # Check for common error types
        if "authentication" in error_message.lower() or "401" in error_message:
            return (
                "Authentication error: Your OLLAMA_API_KEY may be invalid or expired. "
                "Please check your API key and ensure it's set correctly in the environment."
            )
        elif "rate limit" in error_message.lower() or "429" in error_message:
            return "Rate limit exceeded: Too many search requests. Please wait a moment and try again."
        elif (
            "network" in error_message.lower() or "connection" in error_message.lower()
        ):
            return (
                "Network error: Unable to connect to Ollama's web search service. "
                "Please check your internet connection and try again."
            )
        else:
            return f"Search failed: {error_message}"