Compare commits
13 Commits
master
...
rag_inclus
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4119b2ec41 | ||
|
|
01b7f1cd78 | ||
| c606f72d90 | |||
| 8a64d9c959 | |||
| 0c090c8489 | |||
|
|
e0b2c80bc9 | ||
|
|
44141ab545 | ||
|
|
e57d6eb6b6 | ||
|
|
c80f692cb0 | ||
|
|
bc2f8a8bca | ||
|
|
e7f7a79d86 | ||
|
|
9b11fea0e7 | ||
|
|
6320571528 |
@ -1,57 +0,0 @@
|
|||||||
name: Create Blog Article if new notes exist
|
|
||||||
on:
|
|
||||||
schedule:
|
|
||||||
- cron: "15 18 * * *"
|
|
||||||
push:
|
|
||||||
branches:
|
|
||||||
- master
|
|
||||||
jobs:
|
|
||||||
prepare_blog_drafts_and_push:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Checkout repository
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Install dependencies
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
apt update && apt upgrade -y
|
|
||||||
apt install rustc cargo python-is-python3 pip python3-venv python3-virtualenv libmagic-dev git -y
|
|
||||||
virtualenv .venv
|
|
||||||
source .venv/bin/activate
|
|
||||||
pip install --upgrade pip
|
|
||||||
pip install -r requirements.txt
|
|
||||||
git config --global user.name "Blog Creator"
|
|
||||||
git config --global user.email "ridgway.infrastructure@gmail.com"
|
|
||||||
git config --global push.autoSetupRemote true
|
|
||||||
|
|
||||||
- name: Create .env
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
echo "TRILIUM_HOST=${{ vars.TRILIUM_HOST }}" > .env
|
|
||||||
echo "TRILIUM_PORT='${{ vars.TRILIUM_PORT }}'" >> .env
|
|
||||||
echo "TRILIUM_PROTOCOL='${{ vars.TRILIUM_PROTOCOL }}'" >> .env
|
|
||||||
echo "TRILIUM_PASS='${{ secrets.TRILIUM_PASS }}'" >> .env
|
|
||||||
echo "TRILIUM_TOKEN='${{ secrets.TRILIUM_TOKEN }}'" >> .env
|
|
||||||
echo "OLLAMA_PROTOCOL='${{ vars.OLLAMA_PROTOCOL }}'" >> .env
|
|
||||||
echo "OLLAMA_HOST='${{ vars.OLLAMA_HOST }}'" >> .env
|
|
||||||
echo "OLLAMA_PORT='${{ vars.OLLAMA_PORT }}'" >> .env
|
|
||||||
echo "EMBEDDING_MODEL='${{ vars.EMBEDDING_MODEL }}'" >> .env
|
|
||||||
echo "EDITOR_MODEL='${{ vars.EDITOR_MODEL }}'" >> .env
|
|
||||||
export PURE='["${{ vars.CONTENT_CREATOR_MODELS_1 }}", "${{ vars.CONTENT_CREATOR_MODELS_2 }}", "${{ vars.CONTENT_CREATOR_MODELS_3 }}", "${{ vars.CONTENT_CREATOR_MODELS_4 }}"]'
|
|
||||||
echo "CONTENT_CREATOR_MODELS='$PURE'" >> .env
|
|
||||||
echo "GIT_PROTOCOL='${{ vars.GIT_PROTOCOL }}'" >> .env
|
|
||||||
echo "GIT_REMOTE='${{ vars.GIT_REMOTE }}'" >> .env
|
|
||||||
echo "GIT_USER='${{ vars.GIT_USER }}'" >> .env
|
|
||||||
echo "GIT_PASS='${{ secrets.GIT_PASS }}'" >> .env
|
|
||||||
echo "N8N_SECRET='${{ secrets.N8N_SECRET }}'" >> .env
|
|
||||||
echo "N8N_WEBHOOK_URL='${{ vars.N8N_WEBHOOK_URL }}'" >> .env
|
|
||||||
echo "CHROMA_HOST='${{ vars.CHROMA_HOST }}'" >> .env
|
|
||||||
echo "CHROMA_PORT='${{ vars.CHROMA_PORT }}'" >> .env
|
|
||||||
echo "OLLAMA_API_KEY='${{ secrets.OLLAMA_API_KEY }}'" >> .env
|
|
||||||
|
|
||||||
- name: Create Blogs
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
source .venv/bin/activate
|
|
||||||
python src/main.py
|
|
||||||
3
.gitignore
vendored
3
.gitignore
vendored
@ -5,6 +5,3 @@ __pycache__
|
|||||||
.vscode
|
.vscode
|
||||||
.zed
|
.zed
|
||||||
pyproject.toml
|
pyproject.toml
|
||||||
.ropeproject
|
|
||||||
generated_files/*
|
|
||||||
pyright*
|
|
||||||
|
|||||||
@ -7,12 +7,8 @@ ENV PYTHONUNBUFFERED 1
|
|||||||
|
|
||||||
ADD src/ /blog_creator
|
ADD src/ /blog_creator
|
||||||
|
|
||||||
RUN apt-get update && apt-get install -y rustc cargo python-is-python3 pip python3-venv libmagic-dev git
|
RUN apt-get update && apt-get install -y rustc cargo python-is-python3 pip python3-venv libmagic-dev git
|
||||||
# Need to set up git here or we get funky errors
|
|
||||||
RUN git config --global user.name "Blog Creator"
|
|
||||||
RUN git config --global user.email "ridgway.infrastructure@gmail.com"
|
|
||||||
RUN git config --global push.autoSetupRemote true
|
|
||||||
#Get a python venv going as well cause safety
|
|
||||||
RUN python -m venv /opt/venv
|
RUN python -m venv /opt/venv
|
||||||
ENV PATH="/opt/venv/bin:$PATH"
|
ENV PATH="/opt/venv/bin:$PATH"
|
||||||
|
|
||||||
|
|||||||
312
README.md
312
README.md
@ -1,290 +1,64 @@
|
|||||||
# Blog Creator
|
## BLOG CREATOR
|
||||||
|
|
||||||
An automated blog generation system that uses CrewAI agents to research, write, and edit blog posts from Trilium notes.
|
This creator requires you to use a working Trilium Instance and create a .env file with the following
|
||||||
|
|
||||||
## Architecture
|
|
||||||
|
|
||||||
The system uses three CrewAI crews orchestrated by a Flow:
|
|
||||||
|
|
||||||
1. **Research Crew** - A critical researcher agent with web search capabilities investigates the topic and produces verified findings
|
|
||||||
2. **Writing Crew** - Four creative journalist agents write draft blog articles in parallel, each with different creative styles
|
|
||||||
3. **Editor Crew** - A critical editor loads the drafts into a vector database, queries for relevant context, and produces the final polished document with metadata
|
|
||||||
|
|
||||||
## Requirements
|
|
||||||
|
|
||||||
- Python 3.10 or later
|
|
||||||
- Ollama server running with required models
|
|
||||||
- ChromaDB server for vector storage
|
|
||||||
- Trilium notes instance
|
|
||||||
- Gitea instance (for automated workflows)
|
|
||||||
- n8n instance (for notifications)
|
|
||||||
|
|
||||||
## Environment Variables
|
|
||||||
|
|
||||||
Create a `.env` file in the project root with the following variables:
|
|
||||||
|
|
||||||
```
|
```
|
||||||
# Trilium Configuration
|
|
||||||
TRILIUM_HOST=
|
TRILIUM_HOST=
|
||||||
TRILIUM_PORT=
|
TRILIUM_PORT=
|
||||||
TRILIUM_PROTOCOL=https
|
TRILIUM_PROTOCOL=
|
||||||
TRILIUM_PASS=
|
TRILIUM_PASS=
|
||||||
TRILIUM_TOKEN=
|
TRILIUM_TOKEN=
|
||||||
|
OLLAMA_PROTOCOL=
|
||||||
# Ollama Configuration
|
|
||||||
OLLAMA_PROTOCOL=http
|
|
||||||
OLLAMA_HOST=
|
OLLAMA_HOST=
|
||||||
OLLAMA_PORT=11434
|
OLLAMA_PORT=11434
|
||||||
EMBEDDING_MODEL=nomic-embed-text
|
EMBEDDING_MODEL=
|
||||||
EDITOR_MODEL=llama3.1:8b
|
EDITOR_MODEL=
|
||||||
CONTENT_CREATOR_MODELS=["phi4-mini:latest", "qwen3:1.7b", "gemma3:latest"]
|
# This is expected in python list format example `[phi4-mini:latest, qwen3:1.7b, gemma3:latest]`
|
||||||
|
CONTENT_CREATOR_MODELS=
|
||||||
# ChromaDB Configuration
|
CHROMA_SERVER=<IP_ADDRESS>
|
||||||
CHROMA_HOST=chroma
|
|
||||||
CHROMA_PORT=8000
|
|
||||||
|
|
||||||
# Git Configuration
|
|
||||||
GIT_USER=
|
|
||||||
GIT_PASS=
|
|
||||||
GIT_PROTOCOL=https
|
|
||||||
GIT_REMOTE=git.aridgwayweb.com/armistace/blog.git
|
|
||||||
|
|
||||||
# Notification Configuration
|
|
||||||
N8N_SECRET=
|
|
||||||
N8N_WEBHOOK_URL=
|
|
||||||
|
|
||||||
# Ollama Web Search (required for researcher agent)
|
|
||||||
OLLAMA_API_KEY=
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### CONTENT_CREATOR_MODELS Format
|
This container is going to be what I use to trigger a blog creation event
|
||||||
|
|
||||||
The `CONTENT_CREATOR_MODELS` variable should be a JSON array of Ollama model names. Each model will be used by one of the three journalist agents. Example:
|
To do this we will
|
||||||
|
|
||||||
|
1. Download a Note from Trillium (I need to work out how to choose this, maybe something with a tag and then this can add a tag when it's used? each note is a seperate post, a tag to indicate if it's ready as well?)
|
||||||
|
|
||||||
|
`SELECT NOTES WHERE blog_tag = true AND used_tag = false AND ready_tag = true?`
|
||||||
|
|
||||||
|
2. Check if the ollama server is available (it's currently on a box that may not be on)
|
||||||
|
|
||||||
|
- If not on stop
|
||||||
|
|
||||||
|
3. `git pull git.aridgwayweb.com/blog`
|
||||||
|
|
||||||
|
- set up git creds: git.name = ai git.email = ridgwayinfrastructure@gmail.com get git password stored (create service user in gitea for this)
|
||||||
|
|
||||||
|
- `git config set upstream Auto true`
|
||||||
|
|
||||||
|
4. cd /src/content
|
||||||
|
|
||||||
|
5. take the information from the trillium note and prepare a 500 word blog post, insert the following at the top
|
||||||
|
|
||||||
```
|
```
|
||||||
CONTENT_CREATOR_MODELS=["llama3.1:8b", "qwen2.5:7b", "phi4:latest"]
|
Title: <title>
|
||||||
|
Date: <date post created>
|
||||||
|
Modified: <date post created>
|
||||||
|
Category: <this will come from a tag on the post (category: <category>)
|
||||||
|
Tags: <ai generated tags>, ai_content, not_human_content
|
||||||
|
Slug: <have ai write slug?>
|
||||||
|
Authors: <model name>.ai
|
||||||
|
Summary: <have ai write a 10 word summary of the post
|
||||||
```
|
```
|
||||||
|
|
||||||
### OLLAMA_API_KEY
|
6. write it to `<title>.md`
|
||||||
|
|
||||||
The researcher agent uses Ollama's native web search API. Create an API key from your Ollama account (https://ollama.com) and add it to your `.env` file. This uses your existing Ollama subscription for web searches.
|
7. `git checkout -b <title>`
|
||||||
|
|
||||||
## Project Structure
|
8. `git add .`
|
||||||
|
|
||||||
```
|
9. `git commit -m "<have ai write a git commit about the post>"`
|
||||||
blog_creator/
|
|
||||||
├── .env # Environment variables (create this)
|
|
||||||
├── .gitea/workflows/deploy.yml # Gitea Actions workflow
|
|
||||||
├── docker-compose.yml # Local development setup
|
|
||||||
├── requirements.txt # Python dependencies
|
|
||||||
├── README.md # This file
|
|
||||||
└── src/
|
|
||||||
├── main.py # Entry point
|
|
||||||
└── ai_generators/
|
|
||||||
├── ollama_md_generator.py # Main interface (used by main.py)
|
|
||||||
├── blog_flow.py # CrewAI Flow orchestrator
|
|
||||||
├── crews/
|
|
||||||
│ ├── research_crew/ # Researcher agent with web search
|
|
||||||
│ ├── writing_crew/ # Three journalist agents
|
|
||||||
│ └── editor_crew/ # Editor agent with metadata generation
|
|
||||||
└── tools/
|
|
||||||
```
|
|
||||||
|
|
||||||
## Local Development Setup
|
10. `git push`
|
||||||
|
|
||||||
### Using Docker Compose
|
11. Send notification via n8n to matrix for me to review?
|
||||||
|
|
||||||
1. Clone the repository and navigate to the project directory
|
|
||||||
|
|
||||||
2. Create your `.env` file with all required variables
|
|
||||||
|
|
||||||
3. Start the services:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
docker-compose up -d
|
|
||||||
```
|
|
||||||
|
|
||||||
This starts:
|
|
||||||
- `blog_creator` - The main application container
|
|
||||||
- `chroma` - ChromaDB vector database
|
|
||||||
|
|
||||||
4. The container will run `main.py` automatically on startup. To run manually:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
docker-compose exec blog_creator python src/main.py
|
|
||||||
```
|
|
||||||
|
|
||||||
### Manual Setup (without Docker)
|
|
||||||
|
|
||||||
1. Install system dependencies:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
apt update && apt install -y rustc cargo python-is-python3 pip python3-venv libmagic-dev git
|
|
||||||
```
|
|
||||||
|
|
||||||
2. Create and activate a virtual environment:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
python -m venv .venv
|
|
||||||
source .venv/bin/activate
|
|
||||||
```
|
|
||||||
|
|
||||||
3. Install Python dependencies:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
pip install -r requirements.txt
|
|
||||||
```
|
|
||||||
|
|
||||||
4. Configure Git:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
git config --global user.name "Blog Creator"
|
|
||||||
git config --global user.email "your-email@example.com"
|
|
||||||
git config --global push.autoSetupRemote true
|
|
||||||
```
|
|
||||||
|
|
||||||
5. Run the application:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
python src/main.py
|
|
||||||
```
|
|
||||||
|
|
||||||
## How It Works
|
|
||||||
|
|
||||||
### Trilium Integration
|
|
||||||
|
|
||||||
The system fetches notes from Trilium that are tagged for blog creation. Each note becomes one blog post. The note content is used as the basis for the AI-generated article.
|
|
||||||
|
|
||||||
### Blog Generation Flow
|
|
||||||
|
|
||||||
1. **Research Phase** - The researcher agent investigates the topic using web search, critically evaluates claims, and produces verified findings
|
|
||||||
|
|
||||||
2. **Writing Phase** - Three journalist agents write creative drafts in parallel, each with different temperature and top_p settings for variety
|
|
||||||
|
|
||||||
3. **Editor Phase** - The editor:
|
|
||||||
- Chunks and embeds all drafts into ChromaDB
|
|
||||||
- Queries the vector database for relevant context
|
|
||||||
- Generates the final polished document with metadata header
|
|
||||||
|
|
||||||
### Output Format
|
|
||||||
|
|
||||||
Each blog post includes a metadata header followed by the markdown body:
|
|
||||||
|
|
||||||
```
|
|
||||||
Title: Designing and Building an AI Enhanced CCTV System
|
|
||||||
Date: 2026-02-02 20:00
|
|
||||||
Modified: 2026-02-02 20:00
|
|
||||||
Category: Homelab
|
|
||||||
Tags: proxmox, hardware, self host, homelab, ai_content, not_human_content
|
|
||||||
Slug: ai-enhanced-cctv
|
|
||||||
Authors: phi4-mini.ai, qwen3.ai, gemma3.ai
|
|
||||||
Summary: Home CCTV Security has become a bastion of cloud subscription awfulness. This blog describes creating your own AI enhanced system.
|
|
||||||
|
|
||||||
<full markdown blog body follows>
|
|
||||||
```
|
|
||||||
|
|
||||||
The metadata fields are generated as follows:
|
|
||||||
- **Title** - From the Trilium note title
|
|
||||||
- **Date/Modified** - Current datetime when generated
|
|
||||||
- **Category** - AI-generated single word (e.g., Homelab, DevOps, Security)
|
|
||||||
- **Tags** - AI-generated relevant tags plus `ai_content, not_human_content`
|
|
||||||
- **Slug** - AI-generated URL-friendly slug
|
|
||||||
- **Authors** - Derived from CONTENT_CREATOR_MODELS (model name + `.ai`)
|
|
||||||
- **Summary** - AI-generated 15-25 word summary
|
|
||||||
|
|
||||||
### Git Workflow
|
|
||||||
|
|
||||||
After generation, the blog post is:
|
|
||||||
1. Committed to a new branch named after the slug
|
|
||||||
2. Pushed to the configured Git remote
|
|
||||||
3. A notification is sent via n8n to Matrix for review
|
|
||||||
|
|
||||||
## Gitea Actions Workflow
|
|
||||||
|
|
||||||
The `.gitea/workflows/deploy.yml` file defines an automated workflow that:
|
|
||||||
|
|
||||||
- Runs on a schedule (daily at 18:15 UTC) or on push to master branch
|
|
||||||
- Installs all dependencies
|
|
||||||
- Creates the `.env` file from Gitea secrets and variables
|
|
||||||
- Runs the blog generation script
|
|
||||||
|
|
||||||
### Setting Up Gitea Variables
|
|
||||||
|
|
||||||
In your Gitea repository settings, configure the following:
|
|
||||||
|
|
||||||
**Variables** (Repository Settings -> Variables):
|
|
||||||
- `TRILIUM_HOST` - Your Trilium server hostname
|
|
||||||
- `TRILIUM_PORT` - Trilium port
|
|
||||||
- `TRILIUM_PROTOCOL` - http or https
|
|
||||||
- `OLLAMA_PROTOCOL` - http or https
|
|
||||||
- `OLLAMA_HOST` - Ollama server hostname
|
|
||||||
- `OLLAMA_PORT` - Ollama port (default 11434)
|
|
||||||
- `EMBEDDING_MODEL` - Embedding model name
|
|
||||||
- `EDITOR_MODEL` - Editor/Researcher model name
|
|
||||||
- `CONTENT_CREATOR_MODELS_1` through `CONTENT_CREATOR_MODELS_4` - Individual model names (the workflow joins these into an array)
|
|
||||||
- `GIT_PROTOCOL` - https or ssh
|
|
||||||
- `GIT_REMOTE` - Git repository URL
|
|
||||||
- `GIT_USER` - Git username for pushing
|
|
||||||
- `N8N_WEBHOOK_URL` - n8n webhook URL for notifications
|
|
||||||
- `CHROMA_HOST` - ChromaDB hostname
|
|
||||||
- `CHROMA_PORT` - ChromaDB port
|
|
||||||
|
|
||||||
**Secrets** (Repository Settings -> Secrets):
|
|
||||||
- `TRILIUM_PASS` - Trilium password
|
|
||||||
- `TRILIUM_TOKEN` - Trilium API token
|
|
||||||
- `GIT_PASS` - Git password or personal access token
|
|
||||||
- `N8N_SECRET` - n8n webhook secret key
|
|
||||||
- `OLLAMA_API_KEY` - Ollama API key for web search
|
|
||||||
|
|
||||||
### Workflow Triggers
|
|
||||||
|
|
||||||
The workflow runs automatically when:
|
|
||||||
- A push is made to the master branch
|
|
||||||
- The scheduled cron time is reached (18:15 UTC daily)
|
|
||||||
|
|
||||||
To trigger manually, push any change to master or modify the cron schedule in `.gitea/workflows/deploy.yml`.
|
|
||||||
|
|
||||||
## Customizing Agent Behavior
|
|
||||||
|
|
||||||
Agent personalities and task instructions are defined in YAML files under `src/ai_generators/crews/*/config/`. You can modify these without changing Python code:
|
|
||||||
|
|
||||||
- `research_crew/config/agents.yaml` - Researcher role, goal, backstory
|
|
||||||
- `research_crew/config/tasks.yaml` - Research task description
|
|
||||||
- `writing_crew/config/agents.yaml` - Four journalist personalities
|
|
||||||
- `writing_crew/config/tasks.yaml` - Writing task descriptions
|
|
||||||
- `editor_crew/config/agents.yaml` - Editor role, goal, backstory
|
|
||||||
- `editor_crew/config/tasks.yaml` - Editing task and metadata format
|
|
||||||
|
|
||||||
After editing YAML files, restart the application or container to apply changes.
|
|
||||||
|
|
||||||
## Troubleshooting
|
|
||||||
|
|
||||||
### Ollama Connection Errors
|
|
||||||
|
|
||||||
Ensure the Ollama server is running and accessible from the blog_creator container. Check `OLLAMA_HOST` and `OLLAMA_PORT` in your `.env` file.
|
|
||||||
|
|
||||||
### ChromaDB Connection Errors
|
|
||||||
|
|
||||||
Verify ChromaDB is running and the `CHROMA_HOST` and `CHROMA_PORT` variables are correct. In Docker Compose, use `chroma` as the host name.
|
|
||||||
|
|
||||||
### Ollama Web Search Errors
|
|
||||||
|
|
||||||
If the researcher agent fails with web search errors, check that `OLLAMA_API_KEY` is set correctly. Verify your Ollama subscription is active and has web search access.
|
|
||||||
|
|
||||||
### Empty Output
|
|
||||||
|
|
||||||
If blog posts are generated but empty, check:
|
|
||||||
- Ollama models are downloaded and available
|
|
||||||
- `CONTENT_CREATOR_MODELS` contains valid model names
|
|
||||||
- Sufficient timeout for model inference (default is 30 minutes per operation)
|
|
||||||
|
|
||||||
### Git Push Failures
|
|
||||||
|
|
||||||
Verify `GIT_USER` and `GIT_PASS` are correct and the user has write access to the remote repository. Check that the remote URL in `GIT_REMOTE` is accessible.
|
|
||||||
|
|
||||||
## Development Notes
|
|
||||||
|
|
||||||
- The `main.py` entry point should not be modified for normal operation
|
|
||||||
- All AI generation logic is in `src/ai_generators/`
|
|
||||||
- The Flow pattern allows easy addition of new crews or steps
|
|
||||||
- Vector database collections are named `blog_{title}_{random_id}` and persist across runs
|
|
||||||
|
|||||||
2
generated_files/.gitignore
vendored
2
generated_files/.gitignore
vendored
@ -1,2 +0,0 @@
|
|||||||
*
|
|
||||||
!.gitignore
|
|
||||||
29
generated_files/creating_an_ollama_blog_writer.md
Normal file
29
generated_files/creating_an_ollama_blog_writer.md
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
```markdown
|
||||||
|
# Creating an Ollama Blog Writer: A Hilariously Tedious Adventure
|
||||||
|
|
||||||
|
Hey tech enthusiasts! 👋 I’m back with another installment of my tech journey, but this time it’s personal. I decided to create a Python script that not only writes blogs for me (please don’t tell my boss), but also uses Ollama for some AI-assisted content creation and connects with Trilium for structured note-taking. Let’s dive into the details!
|
||||||
|
|
||||||
|
### Step 1: Get Your Ollama On
|
||||||
|
|
||||||
|
First things first, I needed a Python file that could talk to my local Ollama instance. If you haven't heard of Ollama, it's like a tiny llama in your terminal that helps with text generation. It took me a while to figure out how to configure the `.env` file and set up the connection properly. But once I did, I was off to a running start!
|
||||||
|
|
||||||
|
### Step 2: Connecting Trilium for Structured Notes
|
||||||
|
|
||||||
|
For this part, I used a Python library called `trilium-py` (because why not?). It's like having a brain that can store and retrieve information in an organized way. To make sure my notes are super structured, I had to find the right prompts and ensure they were fed into Ollama correctly. This part was more about figuring out how to structure the data than actually coding—but hey, it’s all part of the fun!
|
||||||
|
|
||||||
|
### Step 3: Automating the Blog Creation
|
||||||
|
|
||||||
|
Now that I have my notes and AI-generated content sorted, it was time to automate the blog creation process. Here’s where things got a bit Git-y (yes, I made up that word). I wrote a script that would create a new branch in our company's blog repo, push the changes, and voilà—a PR! Just like that, my humble contributions were ready for review by the big boss.
|
||||||
|
|
||||||
|
### Step 4: Sending Notifications to Matrix
|
||||||
|
|
||||||
|
Finally, as any good DevRel should do, I sent out a notification to our internal Matrix channel. It’s like Slack but with more tech talk and less memes about dogs in hats. The message was short and sweet—just a summary of the blog changes and a request for feedback. Hey, if Elon can tweet at Tesla shareholders, why not send a quick matrix message?
|
||||||
|
|
||||||
|
### Wrapping Up
|
||||||
|
|
||||||
|
Creating this Ollama Blog Writer wasn’t just about writing better blogs (though that would be nice). It was about embracing the joy of automation and the occasional struggle to get things working right. I learned a lot about Python libraries, local server configurations, and how to communicate effectively with my team via Matrix.
|
||||||
|
|
||||||
|
So there you have it—a step-by-step guide on how not to write blogs but definitely how to automate the process. If you’re into tech, automation, or just want to laugh at someone else’s coding mishaps, this blog is for you!
|
||||||
|
|
||||||
|
Keep on hacking (and automating), [Your Name]
|
||||||
|
```
|
||||||
23
generated_files/powerbi_and_api_performance.md
Normal file
23
generated_files/powerbi_and_api_performance.md
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
Title: When Data Visualization Meets Frustration: A Comic Take on PowerBI's API Woes
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
In the ever-evolving world of data and tech, few tools hold as much promise—or frustration—as Microsoft's PowerBI. Its sleek interface, intuitive visuals, and promise to simplify data into digestible insights have made it a favorite among many. But beneath its polished surface lies a storm of challenges that can leave even the most seasoned developers in its dust.
|
||||||
|
|
||||||
|
Imagine this: you've spent hours refining your data model, only to find that your team's hierarchy resists your attempt to share sensitive information without breaking hearts. "We're all on different tiers," you mutter, your frustration evident. But here's the kicker—PowerBI won't even let everyone in your company join the party if they're not up to tier 5. And guess what? Most companies operate in reality tier 3 at best. So, step one: API calls to PowerBI. You'd think pulling data would be straightforward, but oh, how it pulls you into a tailspin.
|
||||||
|
|
||||||
|
Here's where things get interesting: PowerBI APIs are mostly limited to small tables. It's like trying to fit furniture through a door that's slightly too narrow—it just doesn't work unless you have a magic wand (or in this case, an API upgrade). Imagine needing to fetch data from three different on-premises databases seamlessly; PowerBI might just give you the finger.
|
||||||
|
|
||||||
|
Now, if your company happens to be in the Microsoft ecosystem—like the Azure universe—then maybe things are a bit easier. But here's the kicker: it's not being top-to-bottom within that ecosystem that counts as success. If even one part is outside, you're facing performance issues akin to driving through a snowstorm without an umbrella. You get the picture.
|
||||||
|
|
||||||
|
So what does this mean for the average user? Unless you've got no choice but to use PowerBI... well, let's just say it might not be your friend in such scenarios. It's like having a GPS that only works if you're willing to drive on a dirt road and expect it to guide you through with zero warnings—sounds great until you end up stranded.
|
||||||
|
|
||||||
|
But wait, maybe there's silver lining. Other tools have learned the hard lessons PowerBI has taught us. They allow APIs beyond just small tables and handle ecosystems with ease, making them more versatile for real-world applications. It's like upgrading your car's GPS to one that not only knows all the roads but also can navigate through different weather conditions without complaints.
|
||||||
|
|
||||||
|
In conclusion, while PowerBI is undeniably a powerful tool when used correctly—like driving in calm weather on perfectly paved roads—it has its limitations. Its API restrictions and ecosystem integration issues make it less than ideal for many real-world scenarios. So unless you're in a controlled environment where these issues don't arise, maybe it's time to explore other options that can handle the data journey with more grace.
|
||||||
|
|
||||||
|
After all, Data Overload isn't just a Star Trek term—it could be your reality if you're not careful with PowerBI.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
*So, is PowerBI still your best friend in this complex tech world? Or are there better tools out there waiting to be discovered? Share your thoughts and experiences below!*
|
||||||
35
generated_files/the_melding_of_data_engineering_and_ai.md
Normal file
35
generated_files/the_melding_of_data_engineering_and_ai.md
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
# Wrangling Data: A Reality Check
|
||||||
|
|
||||||
|
Okay, let’s be honest. Data wrangling isn't glamorous. It’s not a sleek, automated process of magically transforming chaos into insights. It’s a messy, frustrating, and surprisingly human endeavor. Let’s break down the usual suspects – the steps we take to get even a vaguely useful dataset, and why they’re often a monumental task.
|
||||||
|
|
||||||
|
**Phase 1: The Hunt**
|
||||||
|
|
||||||
|
First, you’re handed a dataset. Let’s call it “Customer_Data_v2”. It’s… somewhere. Maybe a CSV file, maybe a database table, maybe a collection of spreadsheets that haven’t been updated since 2008. Finding it is half the battle. It's like searching for a decent cup of coffee in Melbourne – you know it’s out there, but it’s often hidden behind a wall of bureaucracy.
|
||||||
|
|
||||||
|
**Phase 2: Deciphering the Ancient Texts**
|
||||||
|
|
||||||
|
Once you *find* it, you start learning what it *means*. This is where things get… interesting. You’re trying to understand what fields represent, what units of measurement are used, and why certain columns have bizarre names (seriously, “Customer_ID_v3”?). It takes x amount of time (depends on the industry, right?). One week for a small bakery, six months for a multinational insurance company. It’s a wild ride.
|
||||||
|
|
||||||
|
You’ll spend a lot of time trying to understand the business context. "CRMs" for Customer Relationship Management? Seriously? It’s a constant stream of jargon and acronyms that make your head spin.
|
||||||
|
|
||||||
|
**Phase 3: The Schema Struggle**
|
||||||
|
|
||||||
|
Then there’s the schema. Oh, the schema. It takes a couple of weeks to learn the schema. It’s like deciphering ancient hieroglyphics, except instead of predicting the rise and fall of empires, you’re trying to understand why a field called “Customer_ID_v3” exists. It’s a puzzle, and a frustrating one at that.
|
||||||
|
|
||||||
|
**Phase 4: The Tooling Tango**
|
||||||
|
|
||||||
|
You’ll wrestle with the tools. SQL interpreters, data transformation software – they’re all there, but they’re often clunky, outdated, and require a surprising amount of manual effort. It's like finding a decent cup of coffee in Melbourne – you know it’s out there, but it’s often hidden behind a wall of bureaucracy.
|
||||||
|
|
||||||
|
**Phase 5: The Reporting Revelation (and Despair)**
|
||||||
|
|
||||||
|
Finally, you get to the reporting tool. And cry. Seriously, who actually *likes* this part? It’s a soul-crushing exercise in formatting and filtering, and the output is usually something that nobody actually reads.
|
||||||
|
|
||||||
|
**The AI Factor – A Realistic Perspective**
|
||||||
|
|
||||||
|
Now, everyone’s talking about AI. And, look, I’m not saying AI is a bad thing. It’s got potential. But let’s be realistic. This will for quite some time be the point where we need people. AI can automate the process of extracting data from a spreadsheet. But it can’t understand *why* that spreadsheet was created in the first place. It can’t understand the context, the assumptions, the biases. It can’t tell you if the data is actually useful.
|
||||||
|
|
||||||
|
We can use tools like datahub to capture some of this business knowledge but those tool are only as good as the people who use them. We need to make sure AI is used for those uniform parts – schema discovery, finding the tools, ugh reporting. But where the rubber hits the road… thats where we need people and that we are making sure that there is a person interpreting not only what goes out.. but what goes in.
|
||||||
|
|
||||||
|
**The Bottom Line**
|
||||||
|
|
||||||
|
It’s a bit like trying to build a great BBQ. You can buy the fanciest gadgets and the most expensive wood, but if you don’t know how to cook, you’re going to end up with a burnt mess. So, let’s not get carried away with the hype. Let’s focus on building a data culture that values human intelligence, critical thinking, and a good dose of common sense. And let’s keep wrangling. Because, let’s be honest, someone’s gotta do it.
|
||||||
@ -1,10 +1,6 @@
|
|||||||
ollama
|
ollama
|
||||||
httpx
|
|
||||||
trilium-py
|
trilium-py
|
||||||
gitpython
|
gitpython
|
||||||
PyGithub
|
PyGithub
|
||||||
chromadb
|
chromadb
|
||||||
crewai
|
langchain-ollama
|
||||||
crewai-tools
|
|
||||||
PyJWT
|
|
||||||
dotenv
|
|
||||||
|
|||||||
@ -1,318 +0,0 @@
|
|||||||
"""
|
|
||||||
CrewAI Flow that orchestrates the blog-generation pipeline.
|
|
||||||
|
|
||||||
Flow
|
|
||||||
----
|
|
||||||
1. **Research crew** – a critical researcher with web-search investigates the
|
|
||||||
topic and produces verified findings.
|
|
||||||
2. **Writing crew** – four creative journalists write draft blog articles
|
|
||||||
in parallel (async tasks).
|
|
||||||
3. **Editor crew** – a critical editor loads the journalist drafts into
|
|
||||||
ChromaDB, queries for the most relevant context, and produces the final
|
|
||||||
polished markdown document complete with a metadata header (Title, Date,
|
|
||||||
Category, Tags, Slug, Authors, Summary).
|
|
||||||
|
|
||||||
The ChromaDB integration is preserved from the original implementation: each
|
|
||||||
journalist draft is chunked, embedded, and stored in a collection; the editor
|
|
||||||
receives the top-N most relevant chunks as context.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import random
|
|
||||||
import re
|
|
||||||
import string
|
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
import chromadb
|
|
||||||
from crewai.flow.flow import Flow, listen, start
|
|
||||||
from ollama import Client
|
|
||||||
from pydantic import BaseModel, ConfigDict
|
|
||||||
|
|
||||||
from ai_generators.crews.editor_crew.editor_crew import EditorCrew
|
|
||||||
from ai_generators.crews.research_crew.research_crew import ResearchCrew
|
|
||||||
from ai_generators.crews.writing_crew.writing_crew import WritingCrew
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# State
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
|
|
||||||
class BlogFlowState(BaseModel):
|
|
||||||
"""Structured state for the blog generation flow."""
|
|
||||||
|
|
||||||
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
||||||
|
|
||||||
title: str = ""
|
|
||||||
inner_title: str = ""
|
|
||||||
content: str = ""
|
|
||||||
research_findings: str = ""
|
|
||||||
drafts: list[str] = []
|
|
||||||
final_document: str = ""
|
|
||||||
date: str = ""
|
|
||||||
authors: str = ""
|
|
||||||
category: str = ""
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# Flow
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
|
|
||||||
class BlogFlow(Flow[BlogFlowState]):
|
|
||||||
"""Orchestrate researcher → journalists → editor via CrewAI Flows.
|
|
||||||
|
|
||||||
Usage::
|
|
||||||
|
|
||||||
flow = BlogFlow()
|
|
||||||
result = flow.kickoff(inputs={
|
|
||||||
"title": "my_blog_slug",
|
|
||||||
"inner_title": "My Blog Title",
|
|
||||||
"content": "<original content>",
|
|
||||||
})
|
|
||||||
print(result) # final markdown document
|
|
||||||
"""
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
|
||||||
# Helpers – Ollama / ChromaDB / embedding utilities
|
|
||||||
# ------------------------------------------------------------------
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _get_ollama_url() -> str:
|
|
||||||
return (
|
|
||||||
f"{os.environ['OLLAMA_PROTOCOL']}://"
|
|
||||||
f"{os.environ['OLLAMA_HOST']}:{os.environ['OLLAMA_PORT']}"
|
|
||||||
)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _get_chroma_client() -> chromadb.HttpClient:
|
|
||||||
chroma_port = int(os.environ["CHROMA_PORT"])
|
|
||||||
return chromadb.HttpClient(host=os.environ["CHROMA_HOST"], port=chroma_port)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _get_ollama_client() -> Client:
|
|
||||||
return Client(host=BlogFlow._get_ollama_url())
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _id_generator(size: int = 6) -> str:
|
|
||||||
return "".join(
|
|
||||||
random.choice(string.ascii_uppercase + string.digits) for _ in range(size)
|
|
||||||
)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _split_into_chunks(text: str, chunk_size: int = 100) -> list[str]:
|
|
||||||
words = re.findall(r"\S+", text)
|
|
||||||
chunks: list[str] = []
|
|
||||||
current_chunk: list[str] = []
|
|
||||||
word_count = 0
|
|
||||||
for word in words:
|
|
||||||
current_chunk.append(word)
|
|
||||||
word_count += 1
|
|
||||||
if word_count >= chunk_size:
|
|
||||||
chunks.append(" ".join(current_chunk))
|
|
||||||
current_chunk = []
|
|
||||||
word_count = 0
|
|
||||||
if current_chunk:
|
|
||||||
chunks.append(" ".join(current_chunk))
|
|
||||||
return chunks
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _get_embeddings(chunks: list[str]) -> list[list[float]]:
|
|
||||||
ollama_client = BlogFlow._get_ollama_client()
|
|
||||||
embed_model = os.environ["EMBEDDING_MODEL"]
|
|
||||||
try:
|
|
||||||
embeds = ollama_client.embed(model=embed_model, input=chunks)
|
|
||||||
return embeds.get("embeddings", []) # type: ignore[no-any-return]
|
|
||||||
except Exception as exc:
|
|
||||||
print(f"Error generating embeddings: {exc}")
|
|
||||||
return []
|
|
||||||
|
|
||||||
def _load_drafts_to_vector_db(self, drafts: list[str]) -> chromadb.Collection:
|
|
||||||
"""Load journalist drafts into a new ChromaDB collection and return it."""
|
|
||||||
chroma = self._get_chroma_client()
|
|
||||||
collection_name = (
|
|
||||||
f"blog_{self.state.title.lower().replace(' ', '_')}_{self._id_generator()}"
|
|
||||||
)
|
|
||||||
collection = chroma.get_or_create_collection(name=collection_name)
|
|
||||||
|
|
||||||
for i, draft in enumerate(drafts):
|
|
||||||
model_name = f"journalist_{i + 1}"
|
|
||||||
chunks = self._split_into_chunks(draft)
|
|
||||||
if not chunks or all(chunk.strip() == "" for chunk in chunks):
|
|
||||||
print(f"Skipping {model_name} – no content generated")
|
|
||||||
continue
|
|
||||||
print(f"Generating embeddings for {model_name}")
|
|
||||||
embeds = self._get_embeddings(chunks)
|
|
||||||
if not embeds:
|
|
||||||
print(f"Skipping {model_name} – no embeddings generated")
|
|
||||||
continue
|
|
||||||
if len(embeds) != len(chunks):
|
|
||||||
min_length = min(len(embeds), len(chunks))
|
|
||||||
chunks = chunks[:min_length]
|
|
||||||
embeds = embeds[:min_length]
|
|
||||||
if min_length == 0:
|
|
||||||
print(f"Skipping {model_name} – no valid content/embeddings pairs")
|
|
||||||
continue
|
|
||||||
ids = [model_name + str(j) for j in range(len(chunks))]
|
|
||||||
metadata = [{"model_agent": model_name} for _ in chunks]
|
|
||||||
print(f"Loading into collection for {model_name}")
|
|
||||||
collection.add(
|
|
||||||
documents=chunks,
|
|
||||||
embeddings=embeds, # type: ignore[arg-type]
|
|
||||||
ids=ids,
|
|
||||||
metadatas=metadata, # type: ignore[arg-type]
|
|
||||||
)
|
|
||||||
return collection
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _query_vector_db(collection: chromadb.Collection, query_text: str) -> str:
|
|
||||||
"""Query the ChromaDB collection and return the most relevant
|
|
||||||
document chunks joined as a single string."""
|
|
||||||
ollama_client = BlogFlow._get_ollama_client()
|
|
||||||
embed_model = os.environ["EMBEDDING_MODEL"]
|
|
||||||
try:
|
|
||||||
embed_result = ollama_client.embed(model=embed_model, input=query_text)
|
|
||||||
query_embed = embed_result.get("embeddings", [])
|
|
||||||
if not query_embed:
|
|
||||||
print(
|
|
||||||
"Warning: Failed to generate query embeddings, "
|
|
||||||
"falling back to empty list"
|
|
||||||
)
|
|
||||||
query_embed = [[]]
|
|
||||||
except Exception as exc:
|
|
||||||
print(f"Error generating query embeddings: {exc}")
|
|
||||||
query_embed = [[]]
|
|
||||||
|
|
||||||
try:
|
|
||||||
query_result = collection.query(
|
|
||||||
query_embeddings=query_embed,
|
|
||||||
n_results=100, # type: ignore[arg-type]
|
|
||||||
)
|
|
||||||
documents = query_result.get("documents", [])
|
|
||||||
if documents and len(documents) > 0 and len(documents[0]) > 0:
|
|
||||||
return "\n\n".join(documents[0])
|
|
||||||
print("Warning: No relevant documents found in collection")
|
|
||||||
return "No relevant information found in drafts."
|
|
||||||
except Exception as exc:
|
|
||||||
print(f"Error querying collection: {exc}")
|
|
||||||
return "No relevant information found in drafts due to query error."
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
|
||||||
# Flow steps
|
|
||||||
# ------------------------------------------------------------------
|
|
||||||
|
|
||||||
@start()
|
|
||||||
def research(self) -> str:
|
|
||||||
"""Run the research crew to investigate the blog topic."""
|
|
||||||
print("=" * 60)
|
|
||||||
print("RESEARCH PHASE – investigating topic")
|
|
||||||
print("=" * 60)
|
|
||||||
|
|
||||||
result = (
|
|
||||||
ResearchCrew()
|
|
||||||
.crew()
|
|
||||||
.kickoff(
|
|
||||||
inputs={
|
|
||||||
"inner_title": self.state.inner_title,
|
|
||||||
"content": self.state.content,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
)
|
|
||||||
self.state.research_findings = result.raw
|
|
||||||
print("Research phase complete")
|
|
||||||
return result.raw
|
|
||||||
|
|
||||||
@listen(research)
|
|
||||||
def write_drafts(self, research_findings: str) -> list[str]:
|
|
||||||
"""Run the writing crew (4 journalists in parallel) and collect
|
|
||||||
their draft outputs."""
|
|
||||||
print("=" * 60)
|
|
||||||
print("WRITING PHASE – 4 journalists drafting in parallel")
|
|
||||||
print("=" * 60)
|
|
||||||
|
|
||||||
result = (
|
|
||||||
WritingCrew()
|
|
||||||
.crew()
|
|
||||||
.kickoff(
|
|
||||||
inputs={
|
|
||||||
"inner_title": self.state.inner_title,
|
|
||||||
"content": self.state.content,
|
|
||||||
"research_findings": research_findings,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
# Collect all draft outputs from the crew's task outputs
|
|
||||||
drafts: list[str] = []
|
|
||||||
for task_output in result.tasks_output:
|
|
||||||
drafts.append(task_output.raw)
|
|
||||||
|
|
||||||
self.state.drafts = drafts
|
|
||||||
print(f"Writing phase complete – {len(drafts)} drafts produced")
|
|
||||||
return drafts
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _compute_authors() -> str:
|
|
||||||
"""Build an author string from the CONTENT_CREATOR_MODELS env var.
|
|
||||||
|
|
||||||
Each model name is stripped of any tag suffix (e.g. ``:latest``)
|
|
||||||
and ``.ai`` is appended. Multiple models are joined with ``', '``.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
models = json.loads(os.environ["CONTENT_CREATOR_MODELS"])
|
|
||||||
except (KeyError, json.JSONDecodeError):
|
|
||||||
models = []
|
|
||||||
authors = ", ".join(m.split(":")[0].split("/")[-1] + ".ai" for m in models)
|
|
||||||
return authors or "unknown.ai"
|
|
||||||
|
|
||||||
@listen(write_drafts)
|
|
||||||
def edit_final(self, drafts: list[str]) -> str:
|
|
||||||
"""Load journalist drafts into the vector DB, query for the most
|
|
||||||
relevant context, and run the editor crew to produce the final
|
|
||||||
polished document with a metadata header."""
|
|
||||||
print("=" * 60)
|
|
||||||
print("EDITOR PHASE – producing final document")
|
|
||||||
print("=" * 60)
|
|
||||||
|
|
||||||
# ---- Compute date and authors for the metadata header ----
|
|
||||||
if not self.state.date:
|
|
||||||
self.state.date = datetime.now().strftime("%Y-%m-%d %H:%M")
|
|
||||||
self.state.authors = self._compute_authors()
|
|
||||||
if not self.state.category:
|
|
||||||
self.state.category = "<pick one word that best describes the topic, e.g. Homelab, DevOps, Security, Networking>"
|
|
||||||
|
|
||||||
# ---- Vector DB integration ----
|
|
||||||
print("Loading drafts into vector database")
|
|
||||||
collection = self._load_drafts_to_vector_db(drafts)
|
|
||||||
|
|
||||||
# Build the editor's brief so we can query the vector DB with it
|
|
||||||
editor_brief = (
|
|
||||||
f"You are an editor taking information from 3 Software "
|
|
||||||
f"Developers and Data experts writing a 5000 word blog article. "
|
|
||||||
f"You like when they use almost no code examples. "
|
|
||||||
f"You are also Australian. The title for the blog is "
|
|
||||||
f"{self.state.inner_title}. "
|
|
||||||
f"The basis for the content of the blog is: "
|
|
||||||
f"<blog>{self.state.content}</blog>"
|
|
||||||
)
|
|
||||||
draft_context = self._query_vector_db(collection, editor_brief)
|
|
||||||
print("Showing pertinent info from drafts used in final edited edition")
|
|
||||||
|
|
||||||
# ---- Editor crew ----
|
|
||||||
result = (
|
|
||||||
EditorCrew()
|
|
||||||
.crew()
|
|
||||||
.kickoff(
|
|
||||||
inputs={
|
|
||||||
"inner_title": self.state.inner_title,
|
|
||||||
"content": self.state.content,
|
|
||||||
"draft_context": draft_context,
|
|
||||||
"date": self.state.date,
|
|
||||||
"authors": self.state.authors,
|
|
||||||
"category": self.state.category,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
)
|
|
||||||
self.state.final_document = result.raw
|
|
||||||
print("Editor phase complete")
|
|
||||||
return result.raw
|
|
||||||
@ -1,20 +0,0 @@
|
|||||||
editor:
|
|
||||||
role: >
|
|
||||||
Critical Blog Editor
|
|
||||||
goal: >
|
|
||||||
Produce the final, polished ~5000-word version of a blog about {inner_title},
|
|
||||||
complete with a metadata header (Title, Date, Category, Tags, Slug, Authors,
|
|
||||||
Summary)
|
|
||||||
backstory: >
|
|
||||||
You are an editor taking information from 3 Software Developers and
|
|
||||||
Data experts writing a 5000 word blog article. You like when they use
|
|
||||||
almost no code examples. You are also Australian. The content may have
|
|
||||||
light comedic elements; you are more professional and will attempt to
|
|
||||||
tone these down. You are critical of repeated sentences, inconsistencies,
|
|
||||||
and weak arguments. You ensure the final document is cohesive,
|
|
||||||
well-structured, and publication-ready. You never leave placeholder
|
|
||||||
text — every section must contain finished content. You always begin
|
|
||||||
your output with a plain-text metadata block (Title, Date, Modified,
|
|
||||||
Category, Tags, Slug, Authors, Summary) followed by a blank line and
|
|
||||||
then the full markdown body. You generate sensible Category, Tags,
|
|
||||||
Slug and Summary values based on the blog content.
|
|
||||||
@ -1,45 +0,0 @@
|
|||||||
edit_task:
|
|
||||||
description: >
|
|
||||||
Generate the final, 5000 word blog post using this information
|
|
||||||
from the journalist drafts:
|
|
||||||
<context>{draft_context}</context>
|
|
||||||
|
|
||||||
You are an editor taking information from 3 Software Developers and
|
|
||||||
Data experts writing a 5000 word blog article. You like when they use
|
|
||||||
almost no code examples. You are also Australian. The content may have
|
|
||||||
light comedic elements; you are more professional and will attempt to
|
|
||||||
tone these down. As this person produce the final version of this blog
|
|
||||||
as a markdown document keeping in mind the context provided by the
|
|
||||||
previous drafts. You are to produce the content not placeholders for
|
|
||||||
further editors. The title for the blog is {inner_title}. Avoid
|
|
||||||
repeated sentences. The basis for the content of the blog is:
|
|
||||||
<blog>{content}</blog>
|
|
||||||
|
|
||||||
IMPORTANT: The output MUST start with a metadata block in exactly this
|
|
||||||
format, followed by a blank line, then the blog body. Do not wrap the
|
|
||||||
metadata block in code fences or any other markup. Generate sensible
|
|
||||||
values for Category, Tags, Slug and Summary based on the blog content.
|
|
||||||
|
|
||||||
Title: {inner_title}
|
|
||||||
Date: {date}
|
|
||||||
Modified: {date}
|
|
||||||
Category: {category}
|
|
||||||
Tags: <generate 3-5 short lowercase tags relevant to the content>, ai_content, not_human_content
|
|
||||||
Slug: <generate a short URL-friendly slug using lowercase words separated by hyphens>
|
|
||||||
Authors: {authors}
|
|
||||||
Summary: <write a single sentence summary of roughly 15-25 words>
|
|
||||||
|
|
||||||
After the metadata block and blank line, write the full blog body in
|
|
||||||
markdown. Do not repeat the title as a heading in the body.
|
|
||||||
|
|
||||||
- Only output the metadata block and then the markdown body.
|
|
||||||
- Do not wrap in markdown code fences.
|
|
||||||
- Do not provide a commentary on the drafts in the context.
|
|
||||||
- Produce real content, not placeholders for further editors.
|
|
||||||
- Avoid repeated sentences.
|
|
||||||
expected_output: >
|
|
||||||
A metadata block (Title, Date, Modified, Category, Tags, Slug, Authors,
|
|
||||||
Summary) followed by a blank line and then a polished ~5000-word markdown
|
|
||||||
blog article about {inner_title}. No commentary. No placeholders. Cohesive
|
|
||||||
and publication-ready.
|
|
||||||
agent: editor
|
|
||||||
@ -1,51 +0,0 @@
|
|||||||
"""Editor crew – produces the final polished blog document."""
|
|
||||||
|
|
||||||
import os
|
|
||||||
|
|
||||||
from crewai import LLM, Agent, Crew, Process, Task
|
|
||||||
from crewai.project import CrewBase, agent, crew, task
|
|
||||||
|
|
||||||
|
|
||||||
def _get_ollama_url() -> str:
|
|
||||||
return (
|
|
||||||
f"{os.environ['OLLAMA_PROTOCOL']}://"
|
|
||||||
f"{os.environ['OLLAMA_HOST']}:{os.environ['OLLAMA_PORT']}"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@CrewBase
|
|
||||||
class EditorCrew:
|
|
||||||
"""Crew with a single critical editor who produces the final blog."""
|
|
||||||
|
|
||||||
agents_config = "config/agents.yaml"
|
|
||||||
tasks_config = "config/tasks.yaml"
|
|
||||||
|
|
||||||
@agent
|
|
||||||
def editor(self) -> Agent:
|
|
||||||
return Agent(
|
|
||||||
config=self.agents_config["editor"], # type: ignore[index]
|
|
||||||
llm=LLM(
|
|
||||||
model=f"ollama/{os.environ['EDITOR_MODEL']}",
|
|
||||||
base_url=_get_ollama_url(),
|
|
||||||
temperature=0.6,
|
|
||||||
top_p=0.5,
|
|
||||||
),
|
|
||||||
verbose=True,
|
|
||||||
max_iter=30,
|
|
||||||
respect_context_window=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
@task
|
|
||||||
def edit_task(self) -> Task:
|
|
||||||
return Task(
|
|
||||||
config=self.tasks_config["edit_task"], # type: ignore[index]
|
|
||||||
)
|
|
||||||
|
|
||||||
@crew
|
|
||||||
def crew(self) -> Crew:
|
|
||||||
return Crew(
|
|
||||||
agents=self.agents,
|
|
||||||
tasks=self.tasks,
|
|
||||||
process=Process.sequential,
|
|
||||||
verbose=True,
|
|
||||||
)
|
|
||||||
@ -1,15 +0,0 @@
|
|||||||
researcher:
|
|
||||||
role: >
|
|
||||||
Critical Technology Researcher
|
|
||||||
goal: >
|
|
||||||
Research and critically evaluate information related to {inner_title}
|
|
||||||
backstory: >
|
|
||||||
You are a skeptical, thorough technology researcher with years of
|
|
||||||
experience in Software Development and DevOps. You never accept
|
|
||||||
information at face value and always cross-reference claims with
|
|
||||||
multiple sources. You are particularly critical of hype, marketing
|
|
||||||
language, and unsubstantiated technical claims. You prefer primary
|
|
||||||
sources, official documentation, and peer-reviewed material over
|
|
||||||
blog posts and opinion pieces. When conflicting information is found
|
|
||||||
you clearly note the discrepancy and provide both viewpoints with
|
|
||||||
credibility assessments.
|
|
||||||
@ -1,23 +0,0 @@
|
|||||||
research_task:
|
|
||||||
description: >
|
|
||||||
Research the topic: {inner_title}
|
|
||||||
|
|
||||||
The original content to research and expand upon is:
|
|
||||||
<blog>{content}</blog>
|
|
||||||
|
|
||||||
Your task is to:
|
|
||||||
1. Search the web for current, accurate information related to this topic.
|
|
||||||
2. Critically evaluate the claims made in the original content.
|
|
||||||
3. Find supporting or contradicting evidence from reputable sources.
|
|
||||||
4. Identify any outdated information, common misconceptions, or factual errors.
|
|
||||||
5. Provide a comprehensive research summary with verified facts, clearly
|
|
||||||
distinguishing between confirmed information and areas of uncertainty.
|
|
||||||
|
|
||||||
Be thorough and skeptical. Only include information you can verify from
|
|
||||||
reliable sources. Flag anything that seems exaggerated or unverified.
|
|
||||||
expected_output: >
|
|
||||||
A comprehensive research report with verified facts, source citations,
|
|
||||||
and credibility assessments. Clearly distinguish between confirmed
|
|
||||||
information and areas of uncertainty. Include supporting and
|
|
||||||
contradicting evidence where found.
|
|
||||||
agent: researcher
|
|
||||||
@ -1,54 +0,0 @@
|
|||||||
"""Research crew – investigates a blog topic using web search."""
|
|
||||||
|
|
||||||
import os
|
|
||||||
|
|
||||||
from crewai import LLM, Agent, Crew, Process, Task
|
|
||||||
from crewai.project import CrewBase, agent, crew, task
|
|
||||||
|
|
||||||
from ai_generators.tools import OllamaWebSearchTool
|
|
||||||
|
|
||||||
|
|
||||||
def _get_ollama_url() -> str:
|
|
||||||
return (
|
|
||||||
f"{os.environ['OLLAMA_PROTOCOL']}://"
|
|
||||||
f"{os.environ['OLLAMA_HOST']}:{os.environ['OLLAMA_PORT']}"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@CrewBase
|
|
||||||
class ResearchCrew:
|
|
||||||
"""Crew that researches a blog topic with a critical, web-searching
|
|
||||||
researcher agent."""
|
|
||||||
|
|
||||||
agents_config = "config/agents.yaml"
|
|
||||||
tasks_config = "config/tasks.yaml"
|
|
||||||
|
|
||||||
@agent
|
|
||||||
def researcher(self) -> Agent:
|
|
||||||
return Agent(
|
|
||||||
config=self.agents_config["researcher"], # type: ignore[index]
|
|
||||||
tools=[OllamaWebSearchTool()],
|
|
||||||
llm=LLM(
|
|
||||||
model=f"ollama/{os.environ['EDITOR_MODEL']}",
|
|
||||||
base_url=_get_ollama_url(),
|
|
||||||
temperature=0.3,
|
|
||||||
),
|
|
||||||
verbose=True,
|
|
||||||
max_iter=25,
|
|
||||||
respect_context_window=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
@task
|
|
||||||
def research_task(self) -> Task:
|
|
||||||
return Task(
|
|
||||||
config=self.tasks_config["research_task"], # type: ignore[index]
|
|
||||||
)
|
|
||||||
|
|
||||||
@crew
|
|
||||||
def crew(self) -> Crew:
|
|
||||||
return Crew(
|
|
||||||
agents=self.agents,
|
|
||||||
tasks=self.tasks,
|
|
||||||
process=Process.sequential,
|
|
||||||
verbose=True,
|
|
||||||
)
|
|
||||||
@ -1,48 +0,0 @@
|
|||||||
journalist_one:
|
|
||||||
role: >
|
|
||||||
Creative Technology Journalist
|
|
||||||
goal: >
|
|
||||||
Write a creative, engaging ~5000-word draft blog article about {inner_title}
|
|
||||||
backstory: >
|
|
||||||
You are a journalist, Software Developer and DevOps expert writing a
|
|
||||||
draft blog article for other tech enthusiasts. You like to use almost no
|
|
||||||
code examples and prefer to talk in a light comedic tone. You are also
|
|
||||||
Australian. You favour vivid analogies and storytelling to explain
|
|
||||||
technical concepts. Your writing is warm, slightly irreverent, and
|
|
||||||
accessible.
|
|
||||||
|
|
||||||
journalist_two:
|
|
||||||
role: >
|
|
||||||
Creative Technology Journalist
|
|
||||||
goal: >
|
|
||||||
Write a creative, engaging ~5000-word draft blog article about {inner_title}
|
|
||||||
backstory: >
|
|
||||||
You are a journalist, Software Developer and DevOps expert writing a
|
|
||||||
draft blog article for other tech enthusiasts. You like to use almost no
|
|
||||||
code examples and prefer to talk in a light comedic tone. You are also
|
|
||||||
Australian. You lean into sharp wit and concise, punchy sentences. You
|
|
||||||
love finding unexpected connections between seemingly unrelated topics.
|
|
||||||
|
|
||||||
journalist_three:
|
|
||||||
role: >
|
|
||||||
Creative Technology Journalist
|
|
||||||
goal: >
|
|
||||||
Write a creative, engaging ~5000-word draft blog article about {inner_title}
|
|
||||||
backstory: >
|
|
||||||
You are a journalist, Software Developer and DevOps expert writing a
|
|
||||||
draft blog article for other tech enthusiasts. You like to use almost no
|
|
||||||
code examples and prefer to talk in a light comedic tone. You are also
|
|
||||||
Australian. You prefer a conversational, meandering style that draws the
|
|
||||||
reader in with personal anecdotes and rhetorical questions.
|
|
||||||
|
|
||||||
journalist_four:
|
|
||||||
role: >
|
|
||||||
Creative Technology Journalist
|
|
||||||
goal: >
|
|
||||||
Write a creative, engaging ~5000-word draft blog article about {inner_title}
|
|
||||||
backstory: >
|
|
||||||
You are a journalist, Software Developer and DevOps expert writing a
|
|
||||||
draft blog article for other tech enthusiasts. You like to use almost no
|
|
||||||
code examples and prefer to talk in a light comedic tone. You are also
|
|
||||||
Australian. You take a methodical, analytical approach with detailed
|
|
||||||
explanations and systematic breakdowns of complex topics.
|
|
||||||
@ -1,79 +0,0 @@
|
|||||||
write_draft_one:
|
|
||||||
description: >
|
|
||||||
Write a 5000 word draft blog article as a markdown document.
|
|
||||||
The title for the blog is {inner_title}.
|
|
||||||
Do not output the title in the markdown.
|
|
||||||
|
|
||||||
The basis for the content of the blog is:
|
|
||||||
<blog>{content}</blog>
|
|
||||||
|
|
||||||
Research findings to incorporate and validate against:
|
|
||||||
<research>{research_findings}</research>
|
|
||||||
|
|
||||||
Write creatively, with a light comedic tone. You are Australian.
|
|
||||||
Use almost no code examples. Make it engaging for tech enthusiasts.
|
|
||||||
Only output the markdown content — no commentary, no meta-description.
|
|
||||||
expected_output: >
|
|
||||||
A ~5000-word markdown draft blog article about {inner_title}.
|
|
||||||
No title in the output. No commentary or meta-description.
|
|
||||||
agent: journalist_one
|
|
||||||
|
|
||||||
write_draft_two:
|
|
||||||
description: >
|
|
||||||
Write a 5000 word draft blog article as a markdown document.
|
|
||||||
The title for the blog is {inner_title}.
|
|
||||||
Do not output the title in the markdown.
|
|
||||||
|
|
||||||
The basis for the content of the blog is:
|
|
||||||
<blog>{content}</blog>
|
|
||||||
|
|
||||||
Research findings to incorporate and validate against:
|
|
||||||
<research>{research_findings}</research>
|
|
||||||
|
|
||||||
Write creatively, with a light comedic tone. You are Australian.
|
|
||||||
Use almost no code examples. Make it engaging for tech enthusiasts.
|
|
||||||
Only output the markdown content — no commentary, no meta-description.
|
|
||||||
expected_output: >
|
|
||||||
A ~5000-word markdown draft blog article about {inner_title}.
|
|
||||||
No title in the output. No commentary or meta-description.
|
|
||||||
agent: journalist_two
|
|
||||||
|
|
||||||
write_draft_three:
|
|
||||||
description: >
|
|
||||||
Write a 5000 word draft blog article as a markdown document.
|
|
||||||
The title for the blog is {inner_title}.
|
|
||||||
Do not output the title in the markdown.
|
|
||||||
|
|
||||||
The basis for the content of the blog is:
|
|
||||||
<blog>{content}</blog>
|
|
||||||
|
|
||||||
Research findings to incorporate and validate against:
|
|
||||||
<research>{research_findings}</research>
|
|
||||||
|
|
||||||
Write creatively, with a light comedic tone. You are Australian.
|
|
||||||
Use almost no code examples. Make it engaging for tech enthusiasts.
|
|
||||||
Only output the markdown content — no commentary, no meta-description.
|
|
||||||
expected_output: >
|
|
||||||
A ~5000-word markdown draft blog article about {inner_title}.
|
|
||||||
No title in the output. No commentary or meta-description.
|
|
||||||
agent: journalist_three
|
|
||||||
|
|
||||||
write_draft_four:
|
|
||||||
description: >
|
|
||||||
Write a 5000 word draft blog article as a markdown document.
|
|
||||||
The title for the blog is {inner_title}.
|
|
||||||
Do not output the title in the markdown.
|
|
||||||
|
|
||||||
The basis for the content of the blog is:
|
|
||||||
<blog>{content}</blog>
|
|
||||||
|
|
||||||
Research findings to incorporate and validate against:
|
|
||||||
<research>{research_findings}</research>
|
|
||||||
|
|
||||||
Write creatively, with a light comedic tone. You are Australian.
|
|
||||||
Use almost no code examples. Make it engaging for tech enthusiasts.
|
|
||||||
Only output the markdown content — no commentary, no meta-description.
|
|
||||||
expected_output: >
|
|
||||||
A ~5000-word markdown draft blog article about {inner_title}.
|
|
||||||
No title in the output. No commentary or meta-description.
|
|
||||||
agent: journalist_four
|
|
||||||
@ -1,128 +0,0 @@
|
|||||||
"""Writing crew – three journalists who write creative blog drafts in parallel."""
|
|
||||||
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
|
|
||||||
from crewai import LLM, Agent, Crew, Process, Task
|
|
||||||
from crewai.project import CrewBase, agent, crew, task
|
|
||||||
|
|
||||||
|
|
||||||
def _get_ollama_url() -> str:
|
|
||||||
return (
|
|
||||||
f"{os.environ['OLLAMA_PROTOCOL']}://"
|
|
||||||
f"{os.environ['OLLAMA_HOST']}:{os.environ['OLLAMA_PORT']}"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _get_agent_models() -> list[str]:
|
|
||||||
return json.loads(os.environ["CONTENT_CREATOR_MODELS"])
|
|
||||||
|
|
||||||
|
|
||||||
# Creative-style presets per journalist: (temperature, top_p)
|
|
||||||
_JOURNALIST_PARAMS: dict[int, tuple[float, float]] = {
|
|
||||||
1: (0.70, 0.60), # moderate creativity
|
|
||||||
2: (0.85, 0.50), # high creativity, tighter focus
|
|
||||||
3: (0.60, 0.70), # lower creativity, wider associations
|
|
||||||
4: (0.50, 0.80), # methodical, analytical approach
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@CrewBase
|
|
||||||
class WritingCrew:
|
|
||||||
"""Crew of three creative journalists who write blog drafts in parallel."""
|
|
||||||
|
|
||||||
agents_config = "config/agents.yaml"
|
|
||||||
tasks_config = "config/tasks.yaml"
|
|
||||||
|
|
||||||
# ---- helpers ----
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _journalist_llm(index: int) -> LLM:
|
|
||||||
models = _get_agent_models()
|
|
||||||
model = models[index % len(models)]
|
|
||||||
temp, top_p = _JOURNALIST_PARAMS[index + 1]
|
|
||||||
return LLM(
|
|
||||||
model=f"ollama/{model}",
|
|
||||||
base_url=_get_ollama_url(),
|
|
||||||
temperature=temp,
|
|
||||||
top_p=top_p,
|
|
||||||
)
|
|
||||||
|
|
||||||
# ---- agents ----
|
|
||||||
|
|
||||||
@agent
|
|
||||||
def journalist_one(self) -> Agent:
|
|
||||||
return Agent(
|
|
||||||
config=self.agents_config["journalist_one"], # type: ignore[index]
|
|
||||||
llm=self._journalist_llm(0),
|
|
||||||
verbose=True,
|
|
||||||
max_iter=30,
|
|
||||||
respect_context_window=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
@agent
|
|
||||||
def journalist_two(self) -> Agent:
|
|
||||||
return Agent(
|
|
||||||
config=self.agents_config["journalist_two"], # type: ignore[index]
|
|
||||||
llm=self._journalist_llm(1),
|
|
||||||
verbose=True,
|
|
||||||
max_iter=30,
|
|
||||||
respect_context_window=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
@agent
|
|
||||||
def journalist_three(self) -> Agent:
|
|
||||||
return Agent(
|
|
||||||
config=self.agents_config["journalist_three"], # type: ignore[index]
|
|
||||||
llm=self._journalist_llm(2),
|
|
||||||
verbose=True,
|
|
||||||
max_iter=30,
|
|
||||||
respect_context_window=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
@agent
|
|
||||||
def journalist_four(self) -> Agent:
|
|
||||||
return Agent(
|
|
||||||
config=self.agents_config["journalist_four"], # type: ignore[index]
|
|
||||||
llm=self._journalist_llm(3),
|
|
||||||
verbose=True,
|
|
||||||
max_iter=30,
|
|
||||||
respect_context_window=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
# ---- tasks ----
|
|
||||||
|
|
||||||
@task
|
|
||||||
def write_draft_one(self) -> Task:
|
|
||||||
return Task(
|
|
||||||
config=self.tasks_config["write_draft_one"], # type: ignore[index]
|
|
||||||
)
|
|
||||||
|
|
||||||
@task
|
|
||||||
def write_draft_two(self) -> Task:
|
|
||||||
return Task(
|
|
||||||
config=self.tasks_config["write_draft_two"], # type: ignore[index]
|
|
||||||
)
|
|
||||||
|
|
||||||
@task
|
|
||||||
def write_draft_three(self) -> Task:
|
|
||||||
return Task(
|
|
||||||
config=self.tasks_config["write_draft_three"], # type: ignore[index]
|
|
||||||
)
|
|
||||||
|
|
||||||
@task
|
|
||||||
def write_draft_four(self) -> Task:
|
|
||||||
return Task(
|
|
||||||
config=self.tasks_config["write_draft_four"], # type: ignore[index]
|
|
||||||
)
|
|
||||||
|
|
||||||
# ---- crew ----
|
|
||||||
|
|
||||||
@crew
|
|
||||||
def crew(self) -> Crew:
|
|
||||||
return Crew(
|
|
||||||
agents=self.agents,
|
|
||||||
tasks=self.tasks,
|
|
||||||
process=Process.sequential,
|
|
||||||
verbose=True,
|
|
||||||
)
|
|
||||||
@ -1,181 +1,159 @@
|
|||||||
"""
|
import os, re, json, random, time
|
||||||
OllamaGenerator – public interface for blog generation.
|
|
||||||
|
|
||||||
This module preserves the same API that ``main.py`` relies on while
|
|
||||||
delegating the heavy lifting to a CrewAI Flow (``blog_flow.BlogFlow``)
|
|
||||||
that orchestrates a researcher, four journalists, and an editor via
|
|
||||||
YAML-configured crews.
|
|
||||||
|
|
||||||
Breaking changes from the previous implementation
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
* ``langchain-ollama`` is no longer required – the ``generate_system_message``
|
|
||||||
helper now talks directly to the Ollama HTTP API via the ``ollama`` client.
|
|
||||||
* Internally, blog generation is driven by CrewAI agents, crews and a Flow
|
|
||||||
rather than by hand-rolled retry loops and thread-pool executors.
|
|
||||||
|
|
||||||
Public interface (unchanged)
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
* ``OllamaGenerator(title, content, inner_title)``
|
|
||||||
* ``save_to_file(filename)`` – generates the blog and writes it to disk
|
|
||||||
* ``generate_system_message(prompt_system, prompt_human)`` – simple LLM call
|
|
||||||
* ``self.response`` – the final markdown text (populated after ``save_to_file``)
|
|
||||||
"""
|
|
||||||
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import time
|
|
||||||
from concurrent.futures import ThreadPoolExecutor, TimeoutError
|
|
||||||
|
|
||||||
from ollama import Client
|
from ollama import Client
|
||||||
|
import chromadb
|
||||||
from ai_generators.blog_flow import BlogFlow
|
from langchain_ollama import ChatOllama
|
||||||
|
|
||||||
|
|
||||||
class OllamaGenerator:
|
class OllamaGenerator:
|
||||||
"""Generate a polished blog post from raw content using CrewAI agents.
|
|
||||||
|
|
||||||
Parameters
|
def __init__(self, title: str, content: str, inner_title: str):
|
||||||
----------
|
|
||||||
title : str
|
|
||||||
An OS-friendly slug used for file names and ChromaDB collection
|
|
||||||
names (e.g. ``"my_blog_title"``).
|
|
||||||
content : str
|
|
||||||
The raw source content that the blog should be based on.
|
|
||||||
inner_title : str
|
|
||||||
The human-readable blog title (used in prompts and output).
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
title: str,
|
|
||||||
content: str,
|
|
||||||
inner_title: str,
|
|
||||||
date: str | None = None,
|
|
||||||
category: str | None = None,
|
|
||||||
):
|
|
||||||
self.title = title
|
self.title = title
|
||||||
self.inner_title = inner_title
|
self.inner_title = inner_title
|
||||||
self.content = content
|
self.content = content
|
||||||
self.date = date
|
self.response = None
|
||||||
self.category = category
|
self.chroma = chromadb.HttpClient(host="172.19.0.2", port=8000)
|
||||||
self.response: str | None = None
|
ollama_url = f"{os.environ["OLLAMA_PROTOCOL"]}://{os.environ["OLLAMA_HOST"]}:{os.environ["OLLAMA_PORT"]}"
|
||||||
|
|
||||||
# ---- Ollama connection (used by generate_system_message) ----
|
|
||||||
ollama_url = (
|
|
||||||
f"{os.environ['OLLAMA_PROTOCOL']}://"
|
|
||||||
f"{os.environ['OLLAMA_HOST']}:{os.environ['OLLAMA_PORT']}"
|
|
||||||
)
|
|
||||||
self.ollama_client = Client(host=ollama_url)
|
self.ollama_client = Client(host=ollama_url)
|
||||||
self.ollama_model = os.environ["EDITOR_MODEL"]
|
self.ollama_model = os.environ["EDITOR_MODEL"]
|
||||||
|
self.embed_model = os.environ["EMBEDDING_MODEL"]
|
||||||
|
self.agent_models = json.loads(os.environ["CONTENT_CREATOR_MODELS"])
|
||||||
|
self.llm = ChatOllama(model=self.ollama_model, temperature=0.6, top_p=0.5) #This is the level head in the room
|
||||||
|
self.prompt_inject = f"""
|
||||||
|
You are a journalist, Software Developer and DevOps expert
|
||||||
|
writing a 1000 word draft blog for other tech enthusiasts.
|
||||||
|
You like to use almost no code examples and prefer to talk
|
||||||
|
in a light comedic tone. You are also Australian
|
||||||
|
As this person write this blog as a markdown document.
|
||||||
|
The title for the blog is {self.inner_title}.
|
||||||
|
Do not output the title in the markdown.
|
||||||
|
The basis for the content of the blog is:
|
||||||
|
{self.content}
|
||||||
|
"""
|
||||||
|
|
||||||
# ---- Validate required env vars early ----
|
def split_into_chunks(self, text, chunk_size=100):
|
||||||
|
'''Split text into chunks of size chunk_size'''
|
||||||
|
words = re.findall(r'\S+', text)
|
||||||
|
|
||||||
|
chunks = []
|
||||||
|
current_chunk = []
|
||||||
|
word_count = 0
|
||||||
|
|
||||||
|
for word in words:
|
||||||
|
current_chunk.append(word)
|
||||||
|
word_count += 1
|
||||||
|
|
||||||
|
if word_count >= chunk_size:
|
||||||
|
chunks.append(' '.join(current_chunk))
|
||||||
|
current_chunk = []
|
||||||
|
word_count = 0
|
||||||
|
|
||||||
|
if current_chunk:
|
||||||
|
chunks.append(' '.join(current_chunk))
|
||||||
|
|
||||||
|
return chunks
|
||||||
|
|
||||||
|
def generate_draft(self, model) -> str:
|
||||||
|
'''Generate a draft blog post using the specified model'''
|
||||||
try:
|
try:
|
||||||
_ = json.loads(os.environ["CONTENT_CREATOR_MODELS"])
|
# the idea behind this is to make the "creativity" random amongst the content creators
|
||||||
except (KeyError, json.JSONDecodeError) as exc:
|
# contorlling temperature will allow cause the output to allow more "random" connections in sentences
|
||||||
raise Exception(
|
# Controlling top_p will tighten or loosen the embedding connections made
|
||||||
f"CONTENT_CREATOR_MODELS env var is missing or invalid: {exc}"
|
# The result should be varied levels of "creativity" in the writing of the drafts
|
||||||
)
|
# for more see https://python.langchain.com/v0.2/api_reference/ollama/chat_models/langchain_ollama.chat_models.ChatOllama.html
|
||||||
|
temp = random.uniform(0.5, 1.0)
|
||||||
|
top_p = random.uniform(0.4, 0.8)
|
||||||
|
top_k = int(random.uniform(30, 80))
|
||||||
|
agent_llm = ChatOllama(model=model, temperature=temp, top_p=top_p, top_k=top_k)
|
||||||
|
messages = [
|
||||||
|
("system", self.prompt_inject),
|
||||||
|
("human", "make the blog post in a format to be edited easily" )
|
||||||
|
]
|
||||||
|
response = agent_llm.invoke(messages)
|
||||||
|
# self.response = self.ollama_client.chat(model=model,
|
||||||
|
# messages=[
|
||||||
|
# {
|
||||||
|
# 'role': 'user',
|
||||||
|
# 'content': f'{self.prompt_inject}',
|
||||||
|
# },
|
||||||
|
# ])
|
||||||
|
#print ("draft")
|
||||||
|
#print (response)
|
||||||
|
return response.text()#['message']['content']
|
||||||
|
|
||||||
try:
|
except Exception as e:
|
||||||
_ = int(os.environ["CHROMA_PORT"])
|
raise Exception(f"Failed to generate blog draft: {e}")
|
||||||
except (KeyError, ValueError) as exc:
|
|
||||||
raise Exception(f"CHROMA_PORT is not an integer: {exc}")
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
def get_draft_embeddings(self, draft_chunks):
|
||||||
# Public API
|
'''Get embeddings for the draft chunks'''
|
||||||
# ------------------------------------------------------------------
|
embeds = self.ollama_client.embed(model=self.embed_model, input=draft_chunks)
|
||||||
|
return embeds.get('embeddings', [])
|
||||||
|
|
||||||
def save_to_file(self, filename: str) -> None:
|
|
||||||
"""Run the full CrewAI blog-generation flow and write the result
|
|
||||||
to *filename*.
|
|
||||||
|
|
||||||
After this call ``self.response`` contains the final markdown text.
|
def load_to_vector_db(self):
|
||||||
"""
|
'''Load the generated blog drafts into a vector database'''
|
||||||
self.response = self.generate_markdown()
|
collection_name = f"blog_{self.title.lower().replace(" ", "_")}"
|
||||||
with open(filename, "w") as f:
|
collection = self.chroma.get_or_create_collection(name=collection_name, metadata={"hnsw:space": "cosine"})
|
||||||
f.write(self.response)
|
#if any(collection.name == collectionname for collectionname in self.chroma.list_collections()):
|
||||||
|
# self.chroma.delete_collection("blog_creator")
|
||||||
|
for model in self.agent_models:
|
||||||
|
print (f"Generating draft from {model} for load into vector database")
|
||||||
|
draft_chunks = self.split_into_chunks(self.generate_draft(model))
|
||||||
|
print(f"generating embeds")
|
||||||
|
embeds = self.get_draft_embeddings(draft_chunks)
|
||||||
|
ids = [model + str(i) for i in range(len(draft_chunks))]
|
||||||
|
chunknumber = list(range(len(draft_chunks)))
|
||||||
|
metadata = [{"model_agent": model} for index in chunknumber]
|
||||||
|
print(f'loading into collection')
|
||||||
|
collection.add(documents=draft_chunks, embeddings=embeds, ids=ids, metadatas=metadata)
|
||||||
|
|
||||||
|
return collection
|
||||||
|
|
||||||
|
|
||||||
def generate_markdown(self) -> str:
|
def generate_markdown(self) -> str:
|
||||||
"""Execute the CrewAI Flow and return the final markdown document.
|
|
||||||
|
|
||||||
The Flow:
|
prompt_system = f"""
|
||||||
1. **Research crew** – a critical researcher with web search
|
You are an editor taking information from {len(self.agent_models)} Software
|
||||||
investigates the topic and produces verified findings.
|
Developers and Data experts
|
||||||
2. **Writing crew** – four creative journalists write draft
|
writing a 3000 word blog for other tech enthusiasts.
|
||||||
blog articles in parallel.
|
You like when they use almost no code examples and the
|
||||||
3. **Editor crew** – a critical editor loads the journalist drafts
|
voice is in a light comedic tone. You are also Australian
|
||||||
into the vector DB, queries for relevant context, and produces
|
As this person produce and an amalgamtion of this blog as a markdown document.
|
||||||
the polished final document.
|
The title for the blog is {self.inner_title}.
|
||||||
"""
|
Do not output the title in the markdown. Avoid repeated sentences
|
||||||
inputs = {
|
The basis for the content of the blog is:
|
||||||
"title": self.title,
|
{self.content}
|
||||||
"inner_title": self.inner_title,
|
"""
|
||||||
"content": self.content,
|
try:
|
||||||
}
|
query_embed = self.ollama_client.embed(model=self.embed_model, input=prompt_system)['embeddings']
|
||||||
if self.date is not None:
|
collection = self.load_to_vector_db()
|
||||||
inputs["date"] = self.date
|
collection_query = collection.query(query_embeddings=query_embed, n_results=100)
|
||||||
if self.category is not None:
|
print("Showing pertinent info from drafts used in final edited edition")
|
||||||
inputs["category"] = self.category
|
pertinent_draft_info = '\n\n'.join(collection.query(query_embeddings=query_embed, n_results=100)['documents'][0])
|
||||||
|
#print(pertinent_draft_info)
|
||||||
|
prompt_human = f"Generate the final document using this information from the drafts: {pertinent_draft_info} - ONLY OUTPUT THE MARKDOWN"
|
||||||
|
print("Generating final document")
|
||||||
|
messages = [("system", prompt_system), ("human", prompt_human),]
|
||||||
|
self.response = self.llm.invoke(messages).text()
|
||||||
|
# self.response = self.ollama_client.chat(model=self.ollama_model,
|
||||||
|
# messages=[
|
||||||
|
# {
|
||||||
|
# 'role': 'user',
|
||||||
|
# 'content': f'{prompt_enhanced}',
|
||||||
|
# },
|
||||||
|
# ])
|
||||||
|
#print ("Markdown Generated")
|
||||||
|
#print (self.response)
|
||||||
|
return self.response#['message']['content']
|
||||||
|
|
||||||
flow = BlogFlow()
|
except Exception as e:
|
||||||
result = flow.kickoff(inputs=inputs)
|
raise Exception(f"Failed to generate markdown: {e}")
|
||||||
return str(result)
|
|
||||||
|
|
||||||
def generate_system_message(self, prompt_system: str, prompt_human: str) -> str:
|
def save_to_file(self, filename: str) -> None:
|
||||||
"""Send a system/human message pair to the editor model and return
|
with open(filename, "w") as f:
|
||||||
the assistant's response.
|
f.write(self.generate_markdown())
|
||||||
|
|
||||||
This is a lightweight helper used by ``main.py`` for generating
|
def generate_commit_message(self):
|
||||||
commit messages and notification text – it does **not** invoke the
|
prompt_system = "You are a blog creator commiting a piece of content to a central git repo"
|
||||||
full CrewAI Flow.
|
prompt_human = f"Generate a 10 word git commit message describing {self.response}"
|
||||||
"""
|
messages = [("system", prompt_system), ("human", prompt_human),]
|
||||||
|
commit_message = self.llm.invoke(messages).text()
|
||||||
def _generate() -> str:
|
return commit_message
|
||||||
response = self.ollama_client.chat(
|
|
||||||
model=self.ollama_model,
|
|
||||||
messages=[
|
|
||||||
{"role": "system", "content": prompt_system},
|
|
||||||
{"role": "user", "content": prompt_human},
|
|
||||||
],
|
|
||||||
)
|
|
||||||
return response["message"]["content"]
|
|
||||||
|
|
||||||
# Retry mechanism with 30-minute timeout (same as the original)
|
|
||||||
timeout_seconds = 30 * 60
|
|
||||||
max_retries = 3
|
|
||||||
|
|
||||||
for attempt in range(max_retries):
|
|
||||||
try:
|
|
||||||
with ThreadPoolExecutor(max_workers=1) as executor:
|
|
||||||
future = executor.submit(_generate)
|
|
||||||
result = future.result(timeout=timeout_seconds)
|
|
||||||
return result
|
|
||||||
except TimeoutError:
|
|
||||||
print(
|
|
||||||
f"AI call timed out after {timeout_seconds} seconds "
|
|
||||||
f"on attempt {attempt + 1}"
|
|
||||||
)
|
|
||||||
if attempt < max_retries - 1:
|
|
||||||
print("Retrying...")
|
|
||||||
time.sleep(5)
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
raise Exception(
|
|
||||||
f"AI call failed to complete after {max_retries} "
|
|
||||||
f"attempts with {timeout_seconds} second timeouts"
|
|
||||||
)
|
|
||||||
except Exception as exc:
|
|
||||||
if attempt < max_retries - 1:
|
|
||||||
print(
|
|
||||||
f"Attempt {attempt + 1} failed with error: {exc}. Retrying..."
|
|
||||||
)
|
|
||||||
time.sleep(5)
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
raise Exception(
|
|
||||||
f"Failed to generate system message after "
|
|
||||||
f"{max_retries} attempts: {exc}"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Should never reach here, but satisfy type-checkers
|
|
||||||
raise RuntimeError("Unexpected exit from generate_system_message")
|
|
||||||
@ -1,4 +0,0 @@
|
|||||||
# Tools package for the blog generation CrewAI flow.
|
|
||||||
from ai_generators.tools.ollama_web_search_tool import OllamaWebSearchTool
|
|
||||||
|
|
||||||
__all__ = ["OllamaWebSearchTool"]
|
|
||||||
@ -1,156 +0,0 @@
|
|||||||
"""
|
|
||||||
Custom CrewAI tool that wraps Ollama's native web search API.
|
|
||||||
|
|
||||||
This tool allows CrewAI agents to perform web searches using an Ollama
|
|
||||||
subscription instead of third-party services like Serper or EXA.
|
|
||||||
|
|
||||||
Uses direct HTTP requests via httpx with explicit Authorization: Bearer
|
|
||||||
header to ensure the OLLAMA_API_KEY is properly passed to the Ollama cloud
|
|
||||||
API endpoint (https://ollama.com/api/web_search).
|
|
||||||
|
|
||||||
Requires:
|
|
||||||
- httpx library: pip install httpx (already a transitive dependency of crewai)
|
|
||||||
- OLLAMA_API_KEY environment variable set with your Ollama API key
|
|
||||||
|
|
||||||
Reference: https://docs.ollama.com/capabilities/web-search
|
|
||||||
"""
|
|
||||||
|
|
||||||
import os
|
|
||||||
|
|
||||||
import httpx
|
|
||||||
from crewai.tools import BaseTool
|
|
||||||
from pydantic import BaseModel, Field
|
|
||||||
|
|
||||||
OLLAMA_WEB_SEARCH_URL = "https://ollama.com/api/web_search"
|
|
||||||
|
|
||||||
|
|
||||||
class OllamaWebSearchInput(BaseModel):
|
|
||||||
"""Input schema for OllamaWebSearchTool."""
|
|
||||||
|
|
||||||
query: str = Field(
|
|
||||||
...,
|
|
||||||
description="The web search query string. Be specific and include relevant keywords.",
|
|
||||||
)
|
|
||||||
max_results: int = Field(
|
|
||||||
default=5,
|
|
||||||
ge=1,
|
|
||||||
le=10,
|
|
||||||
description="Maximum number of search results to return (1-10, default 5).",
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class OllamaWebSearchTool(BaseTool):
|
|
||||||
"""
|
|
||||||
Web search tool using Ollama's native web search API.
|
|
||||||
|
|
||||||
This tool performs live web searches and returns relevant results with
|
|
||||||
titles, URLs, and content snippets. It's ideal for research tasks that
|
|
||||||
require current, up-to-date information from the internet.
|
|
||||||
|
|
||||||
The tool requires an Ollama subscription and the OLLAMA_API_KEY environment
|
|
||||||
variable to be set.
|
|
||||||
|
|
||||||
Authentication is handled by sending the OLLAMA_API_KEY as a Bearer token
|
|
||||||
in the Authorization header, as documented at:
|
|
||||||
https://docs.ollama.com/capabilities/web-search
|
|
||||||
|
|
||||||
Example usage:
|
|
||||||
from ai_generators.tools.ollama_web_search_tool import OllamaWebSearchTool
|
|
||||||
|
|
||||||
researcher = Agent(
|
|
||||||
role="Researcher",
|
|
||||||
goal="Research topics thoroughly",
|
|
||||||
tools=[OllamaWebSearchTool()],
|
|
||||||
)
|
|
||||||
"""
|
|
||||||
|
|
||||||
name: str = "ollama_web_search"
|
|
||||||
description: str = (
|
|
||||||
"Search the web for current information using Ollama's web search API. "
|
|
||||||
"Use this tool when you need to find up-to-date information, verify claims, "
|
|
||||||
"find supporting or contradicting evidence, or research topics that require "
|
|
||||||
"current data. Returns search results with titles, URLs, and content snippets."
|
|
||||||
)
|
|
||||||
args_schema: type[BaseModel] = OllamaWebSearchInput
|
|
||||||
|
|
||||||
def _run(self, query: str, max_results: int = 5) -> str:
|
|
||||||
"""
|
|
||||||
Execute a web search and return formatted results.
|
|
||||||
|
|
||||||
Makes a POST request to https://ollama.com/api/web_search with the
|
|
||||||
OLLAMA_API_KEY as a Bearer token in the Authorization header.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
query: The search query string
|
|
||||||
max_results: Maximum number of results to return (1-10)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Formatted string with search results, each containing title, URL, and content
|
|
||||||
"""
|
|
||||||
api_key = os.environ.get("OLLAMA_API_KEY")
|
|
||||||
if not api_key:
|
|
||||||
return (
|
|
||||||
"Error: OLLAMA_API_KEY environment variable is not set. "
|
|
||||||
"Please set your Ollama API key."
|
|
||||||
)
|
|
||||||
|
|
||||||
try:
|
|
||||||
response = httpx.post(
|
|
||||||
OLLAMA_WEB_SEARCH_URL,
|
|
||||||
json={"query": query, "max_results": max_results},
|
|
||||||
headers={
|
|
||||||
"Authorization": f"Bearer {api_key}",
|
|
||||||
"Content-Type": "application/json",
|
|
||||||
},
|
|
||||||
timeout=30.0,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Raise for HTTP errors so we can catch them with specific messages
|
|
||||||
if response.status_code == 401:
|
|
||||||
return (
|
|
||||||
"Authentication error: OLLAMA_API_KEY was rejected. "
|
|
||||||
"Your key may be invalid or expired. Please verify it at "
|
|
||||||
"https://ollama.com/settings/keys"
|
|
||||||
)
|
|
||||||
if response.status_code == 429:
|
|
||||||
return (
|
|
||||||
"Rate limit exceeded: Too many search requests. "
|
|
||||||
"Please wait a moment and try again."
|
|
||||||
)
|
|
||||||
response.raise_for_status()
|
|
||||||
|
|
||||||
data = response.json()
|
|
||||||
results = data.get("results", [])
|
|
||||||
|
|
||||||
if not results:
|
|
||||||
return f"No search results found for query: '{query}'"
|
|
||||||
|
|
||||||
formatted_results = []
|
|
||||||
for i, result in enumerate(results, 1):
|
|
||||||
title = result.get("title", "No title")
|
|
||||||
url = result.get("url", "No URL")
|
|
||||||
content = result.get("content", "No content available")
|
|
||||||
|
|
||||||
formatted_results.append(
|
|
||||||
f"Result {i}:\nTitle: {title}\nURL: {url}\nContent: {content}\n"
|
|
||||||
)
|
|
||||||
|
|
||||||
return "\n".join(formatted_results)
|
|
||||||
|
|
||||||
except httpx.TimeoutException:
|
|
||||||
return (
|
|
||||||
"Timeout error: The web search request timed out. "
|
|
||||||
"Please try again with a simpler query."
|
|
||||||
)
|
|
||||||
except httpx.ConnectError:
|
|
||||||
return (
|
|
||||||
"Network error: Unable to connect to Ollama's web search service. "
|
|
||||||
"Please check your internet connection and try again."
|
|
||||||
)
|
|
||||||
except httpx.HTTPStatusError as exc:
|
|
||||||
return (
|
|
||||||
f"HTTP error {exc.response.status_code} from Ollama web search API: "
|
|
||||||
f"{exc.response.text}"
|
|
||||||
)
|
|
||||||
except Exception as exc:
|
|
||||||
return f"Error performing web search: {exc}"
|
|
||||||
54
src/main.py
54
src/main.py
@ -1,13 +1,7 @@
|
|||||||
import ai_generators.ollama_md_generator as omg
|
import ai_generators.ollama_md_generator as omg
|
||||||
import trilium.notes as tn
|
import trilium.notes as tn
|
||||||
import repo_management.repo_manager as git_repo
|
import repo_management.repo_manager as git_repo
|
||||||
from notifications.n8n import N8NWebhookJwt
|
|
||||||
import string,os
|
import string,os
|
||||||
from datetime import datetime
|
|
||||||
from dotenv import load_dotenv
|
|
||||||
load_dotenv()
|
|
||||||
print(os.environ["CONTENT_CREATOR_MODELS"])
|
|
||||||
|
|
||||||
|
|
||||||
tril = tn.TrilumNotes()
|
tril = tn.TrilumNotes()
|
||||||
|
|
||||||
@ -30,51 +24,11 @@ for note in tril_notes:
|
|||||||
ai_gen = omg.OllamaGenerator(os_friendly_title,
|
ai_gen = omg.OllamaGenerator(os_friendly_title,
|
||||||
tril_notes[note]['content'],
|
tril_notes[note]['content'],
|
||||||
tril_notes[note]['title'])
|
tril_notes[note]['title'])
|
||||||
blog_path = f"generated_files/{os_friendly_title}.md"
|
blog_path = f"/blog_creator/generated_files/{os_friendly_title}.md"
|
||||||
ai_gen.save_to_file(blog_path)
|
ai_gen.save_to_file(blog_path)
|
||||||
|
|
||||||
|
|
||||||
# Generate commit messages and push to repo
|
# Generate commit messages and push to repo
|
||||||
print("Generating Commit Message")
|
commit_message = ai_gen.generate_commit_message()
|
||||||
git_sytem_prompt = "You are a blog creator commiting a piece of content to a central git repo"
|
git_user = os.environp["GIT_USER"]
|
||||||
git_human_prompt = f"Generate a 5 word git commit message describing {ai_gen.response}. ONLY OUTPUT THE RESPONSE"
|
|
||||||
commit_message = ai_gen.generate_system_message(git_sytem_prompt, git_human_prompt)
|
|
||||||
git_user = os.environ["GIT_USER"]
|
|
||||||
git_pass = os.environ["GIT_PASS"]
|
git_pass = os.environ["GIT_PASS"]
|
||||||
repo_manager = git_repo.GitRepository("blog/", git_user, git_pass)
|
repo_manager = git_repo("blog/", git_user, git_pass)
|
||||||
print("Pushing to Repo")
|
|
||||||
repo_manager.create_copy_commit_push(blog_path, os_friendly_title, commit_message)
|
repo_manager.create_copy_commit_push(blog_path, os_friendly_title, commit_message)
|
||||||
|
|
||||||
# Generate notification for Matrix
|
|
||||||
print("Generating Notification Message")
|
|
||||||
git_branch_url = f'https://git.aridgwayweb.com/armistace/blog/src/branch/{os_friendly_title}/src/content/{os_friendly_title}.md'
|
|
||||||
n8n_system_prompt = f"You are a blog creator notifiying the final editor of the final creation of blog available at {git_branch_url}"
|
|
||||||
n8n_prompt_human = f"""
|
|
||||||
Generate an informal 100 word
|
|
||||||
summary describing {ai_gen.response}.
|
|
||||||
Don't address it or use names. ONLY OUTPUT THE RESPONSE.
|
|
||||||
ONLY OUTPUT IN PLAINTEXT STRIP ALL MARKDOWN
|
|
||||||
"""
|
|
||||||
notification_message = ai_gen.generate_system_message(n8n_system_prompt, n8n_prompt_human)
|
|
||||||
secret_key = os.environ['N8N_SECRET']
|
|
||||||
webhook_url = os.environ['N8N_WEBHOOK_URL']
|
|
||||||
notification_string = f"""
|
|
||||||
<h2>{tril_notes[note]['title']}</h2>
|
|
||||||
<h3>Summary</h3>
|
|
||||||
<p>{notification_message}</p>
|
|
||||||
<h3>Branch</h3>
|
|
||||||
<p>{os_friendly_title}</p>
|
|
||||||
<p><a href="{git_branch_url}">Link to Branch</a></p>
|
|
||||||
"""
|
|
||||||
|
|
||||||
payload = {
|
|
||||||
"message": f"{notification_string}",
|
|
||||||
"timestamp": datetime.now().isoformat()
|
|
||||||
}
|
|
||||||
|
|
||||||
webhook_client = N8NWebhookJwt(secret_key, webhook_url)
|
|
||||||
|
|
||||||
print("Notifying")
|
|
||||||
n8n_result = webhook_client.send_webhook(payload)
|
|
||||||
|
|
||||||
print(f"N8N response: {n8n_result['status']}")
|
|
||||||
|
|||||||
@ -1,45 +0,0 @@
|
|||||||
from datetime import datetime, timedelta
|
|
||||||
import jwt
|
|
||||||
import requests
|
|
||||||
from typing import Dict, Optional
|
|
||||||
|
|
||||||
class N8NWebhookJwt:
|
|
||||||
def __init__(self, secret_key: str, webhook_url: str):
|
|
||||||
self.secret_key = secret_key
|
|
||||||
self.webhook_url = webhook_url
|
|
||||||
self.token_expiration = datetime.now() + timedelta(hours=1)
|
|
||||||
|
|
||||||
def _generate_jwt_token(self, payload: Dict) -> str:
|
|
||||||
"""Generate JWT token with the given payload."""
|
|
||||||
# Include expiration time (optional)
|
|
||||||
payload["exp"] = self.token_expiration.timestamp()
|
|
||||||
encoded_jwt = jwt.encode(
|
|
||||||
payload,
|
|
||||||
self.secret_key,
|
|
||||||
algorithm="HS256",
|
|
||||||
)
|
|
||||||
return encoded_jwt #jwt.decode(encoded_jwt, self.secret_key, algorithms=['HS256'])
|
|
||||||
|
|
||||||
def send_webhook(self, payload: Dict) -> Dict:
|
|
||||||
"""Send a webhook request with JWT authentication."""
|
|
||||||
# Generate JWT token
|
|
||||||
token = self._generate_jwt_token(payload)
|
|
||||||
|
|
||||||
# Set headers with JWT token
|
|
||||||
headers = {
|
|
||||||
"Authorization": f"Bearer {token}",
|
|
||||||
"Content-Type": "application/json"
|
|
||||||
}
|
|
||||||
|
|
||||||
# Send POST request
|
|
||||||
response = requests.post(
|
|
||||||
self.webhook_url,
|
|
||||||
json=payload,
|
|
||||||
headers=headers
|
|
||||||
)
|
|
||||||
|
|
||||||
# Handle response
|
|
||||||
if response.status_code == 200:
|
|
||||||
return {"status": "success", "response": response.json()}
|
|
||||||
else:
|
|
||||||
return {"status": "error", "response": response.status_code, "message": response.text}
|
|
||||||
@ -1,11 +1,7 @@
|
|||||||
import os
|
import os, shutil
|
||||||
import shutil
|
|
||||||
from urllib.parse import quote
|
|
||||||
|
|
||||||
from git import Repo
|
from git import Repo
|
||||||
from git.exc import GitCommandError
|
from git.exc import GitCommandError
|
||||||
|
|
||||||
|
|
||||||
class GitRepository:
|
class GitRepository:
|
||||||
# This is designed to be transitory it will desctruvtively create the repo at repo_path
|
# This is designed to be transitory it will desctruvtively create the repo at repo_path
|
||||||
# if you have uncommited changes you can kiss them goodbye!
|
# if you have uncommited changes you can kiss them goodbye!
|
||||||
@ -14,20 +10,11 @@ class GitRepository:
|
|||||||
def __init__(self, repo_path, username=None, password=None):
|
def __init__(self, repo_path, username=None, password=None):
|
||||||
git_protocol = os.environ["GIT_PROTOCOL"]
|
git_protocol = os.environ["GIT_PROTOCOL"]
|
||||||
git_remote = os.environ["GIT_REMOTE"]
|
git_remote = os.environ["GIT_REMOTE"]
|
||||||
# if username is not set we don't need parse to the url
|
remote = f"{git_protocol}://{username}:{password}@{git_remote}"
|
||||||
if username == None or password == None:
|
|
||||||
remote = f"{git_protocol}://{git_remote}"
|
|
||||||
else:
|
|
||||||
# of course if it is we need to parse and escape it so that it
|
|
||||||
# can generate a url
|
|
||||||
git_user = quote(username)
|
|
||||||
git_password = quote(password)
|
|
||||||
remote = f"{git_protocol}://{git_user}:{git_password}@{git_remote}"
|
|
||||||
|
|
||||||
if os.path.exists(repo_path):
|
if os.path.exists(repo_path):
|
||||||
shutil.rmtree(repo_path)
|
shutil.rmtree(repo_path)
|
||||||
self.repo_path = repo_path
|
self.repo_path = repo_path
|
||||||
print("Cloning Repo")
|
|
||||||
Repo.clone_from(remote, repo_path)
|
Repo.clone_from(remote, repo_path)
|
||||||
self.repo = Repo(repo_path)
|
self.repo = Repo(repo_path)
|
||||||
self.username = username
|
self.username = username
|
||||||
@ -42,7 +29,7 @@ class GitRepository:
|
|||||||
print(f"Cloning failed: {e}")
|
print(f"Cloning failed: {e}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def fetch(self, remote_name="origin", ref_name="master"):
|
def fetch(self, remote_name='origin', ref_name='main'):
|
||||||
"""Fetch updates from a remote repository with authentication"""
|
"""Fetch updates from a remote repository with authentication"""
|
||||||
try:
|
try:
|
||||||
self.repo.remotes[remote_name].fetch(ref_name=ref_name)
|
self.repo.remotes[remote_name].fetch(ref_name=ref_name)
|
||||||
@ -51,11 +38,10 @@ class GitRepository:
|
|||||||
print(f"Fetching failed: {e}")
|
print(f"Fetching failed: {e}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def pull(self, remote_name="origin", ref_name="master"):
|
def pull(self, remote_name='origin', ref_name='main'):
|
||||||
"""Pull updates from a remote repository with authentication"""
|
"""Pull updates from a remote repository with authentication"""
|
||||||
print("Pulling Latest Updates (if any)")
|
|
||||||
try:
|
try:
|
||||||
self.repo.remotes[remote_name].pull(ref_name)
|
self.repo.remotes[remote_name].pull(ref_name=ref_name)
|
||||||
return True
|
return True
|
||||||
except GitCommandError as e:
|
except GitCommandError as e:
|
||||||
print(f"Pulling failed: {e}")
|
print(f"Pulling failed: {e}")
|
||||||
@ -64,11 +50,21 @@ class GitRepository:
|
|||||||
def get_branches(self):
|
def get_branches(self):
|
||||||
"""List all branches in the repository"""
|
"""List all branches in the repository"""
|
||||||
return [branch.name for branch in self.repo.branches]
|
return [branch.name for branch in self.repo.branches]
|
||||||
|
|
||||||
|
|
||||||
|
def create_branch(self, branch_name, remote_name='origin', ref_name='main'):
|
||||||
|
"""Create a new branch in the repository with authentication."""
|
||||||
|
try:
|
||||||
|
# Use the same remote and ref as before
|
||||||
|
self.repo.git.branch(branch_name, commit=True)
|
||||||
|
return True
|
||||||
|
except GitCommandError as e:
|
||||||
|
print(f"Failed to create branch: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
def add_and_commit(self, message=None):
|
def add_and_commit(self, message=None):
|
||||||
"""Add and commit changes to the repository."""
|
"""Add and commit changes to the repository."""
|
||||||
try:
|
try:
|
||||||
print("Commiting latest draft")
|
|
||||||
# Add all changes
|
# Add all changes
|
||||||
self.repo.git.add(all=True)
|
self.repo.git.add(all=True)
|
||||||
# Commit with the provided message or a default
|
# Commit with the provided message or a default
|
||||||
@ -76,33 +72,20 @@ class GitRepository:
|
|||||||
commit_message = "Added and committed new content"
|
commit_message = "Added and committed new content"
|
||||||
else:
|
else:
|
||||||
commit_message = message
|
commit_message = message
|
||||||
self.repo.git.commit(message=commit_message)
|
self.repo.git.commit(commit_message=commit_message)
|
||||||
return True
|
return True
|
||||||
except GitCommandError as e:
|
except GitCommandError as e:
|
||||||
print(f"Commit failed: {e}")
|
print(f"Commit failed: {e}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def create_copy_commit_push(self, file_path, title, commit_messge):
|
||||||
|
self.create_branch(title)
|
||||||
|
|
||||||
def create_copy_commit_push(self, file_path, title, commit_message):
|
shutil.copy(f"{file_path}", f"{self.repo_path}src/content/")
|
||||||
# Check if branch exists remotely
|
|
||||||
remote_branches = [
|
|
||||||
ref.name.split("/")[-1] for ref in self.repo.remotes.origin.refs
|
|
||||||
]
|
|
||||||
|
|
||||||
if title in remote_branches:
|
self.add_and_commit(commit_messge)
|
||||||
# Branch exists remotely, checkout and pull
|
|
||||||
self.repo.git.checkout(title)
|
|
||||||
self.pull(ref_name=title)
|
|
||||||
else:
|
|
||||||
# New branch, create from master
|
|
||||||
self.repo.git.checkout("-b", title, "origin/master")
|
|
||||||
|
|
||||||
# Ensure destination directory exists
|
self.repo.git.push(remote_name='origin', ref_name=title, force=True)
|
||||||
dest_dir = f"{self.repo_path}src/content/"
|
|
||||||
os.makedirs(dest_dir, exist_ok=True)
|
|
||||||
|
|
||||||
# Copy file
|
def remove_repo(self):
|
||||||
shutil.copy(f"{file_path}", dest_dir)
|
shutil.rmtree(self.repo_path)
|
||||||
|
|
||||||
# Commit and push
|
|
||||||
self.add_and_commit(commit_message)
|
|
||||||
self.repo.git.push("--set-upstream", "origin", title)
|
|
||||||
Loading…
x
Reference in New Issue
Block a user