Merge pull request 'repo_work_fix' (#2) from repo_work_fix into master

Reviewed-on: #2
This commit is contained in:
armistace 2025-05-30 17:47:31 +10:00
commit 9a9228bc07
10 changed files with 334 additions and 133 deletions

4
.gitignore vendored
View File

@ -3,3 +3,7 @@ __pycache__
.venv
.aider*
.vscode
.zed
pyproject.toml
.ropeproject
generated_files/*

View File

@ -7,8 +7,12 @@ ENV PYTHONUNBUFFERED 1
ADD src/ /blog_creator
RUN apt-get update && apt-get install -y rustc cargo python-is-python3 pip python3.12-venv libmagic-dev
RUN apt-get update && apt-get install -y rustc cargo python-is-python3 pip python3-venv libmagic-dev git
# Need to set up git here or we get funky errors
RUN git config --global user.name "Blog Creator"
RUN git config --global user.email "ridgway.infrastructure@gmail.com"
RUN git config --global push.autoSetupRemote true
#Get a python venv going as well cause safety
RUN python -m venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"

View File

@ -3,10 +3,19 @@
This creator requires you to use a working Trilium Instance and create a .env file with the following
```
TRILIUM_HOST
TRILIUM_PORT
TRILIUM_PROTOCOL
TRILIUM_PASS
TRILIUM_HOST=
TRILIUM_PORT=
TRILIUM_PROTOCOL=
TRILIUM_PASS=
TRILIUM_TOKEN=
OLLAMA_PROTOCOL=
OLLAMA_HOST=
OLLAMA_PORT=11434
EMBEDDING_MODEL=
EDITOR_MODEL=
# This is expected in python list format example `[phi4-mini:latest, qwen3:1.7b, gemma3:latest]`
CONTENT_CREATOR_MODELS=
CHROMA_SERVER=<IP_ADDRESS>
```
This container is going to be what I use to trigger a blog creation event

View File

@ -1,3 +1,7 @@
networks:
net:
driver: bridge
services:
blog_creator:
build:
@ -8,4 +12,33 @@ services:
- .env
volumes:
- ./generated_files/:/blog_creator/generated_files
networks:
- net
chroma:
image: chromadb/chroma
container_name: chroma
volumes:
# Be aware that indexed data are located in "/chroma/chroma/"
# Default configuration for persist_directory in chromadb/config.py
# Read more about deployments: https://docs.trychroma.com/deployment
- chroma-data:/chroma/chroma
#command: "--host 0.0.0.0 --port 8000 --proxy-headers --log-config chromadb/log_config.yml --timeout-keep-alive 30"
environment:
- IS_PERSISTENT=TRUE
restart: unless-stopped # possible values are: "no", always", "on-failure", "unless-stopped"
ports:
- "8000:8000"
healthcheck:
# Adjust below to match your container port
test:
["CMD", "curl", "-f", "http://localhost:8000/api/v2/heartbeat"]
interval: 30s
timeout: 10s
retries: 3
networks:
- net
volumes:
chroma-data:
driver: local

View File

@ -2,3 +2,5 @@ ollama
trilium-py
gitpython
PyGithub
chromadb
langchain-ollama

View File

@ -1,44 +1,151 @@
import os
import os, re, json, random, time, string
from ollama import Client
import re
import chromadb
from langchain_ollama import ChatOllama
class OllamaGenerator:
def __init__(self, title: str, content: str, model: str):
def __init__(self, title: str, content: str, inner_title: str):
self.title = title
self.inner_title = inner_title
self.content = content
self.response = None
self.chroma = chromadb.HttpClient(host="172.18.0.2", port=8000)
ollama_url = f"{os.environ["OLLAMA_PROTOCOL"]}://{os.environ["OLLAMA_HOST"]}:{os.environ["OLLAMA_PORT"]}"
self.ollama_client = Client(host=ollama_url)
self.ollama_model = model
def generate_markdown(self) -> str:
prompt = f"""
You are a Software Developer and DevOps expert
who has transistioned in Developer Relations
writing a 1000 word blog for other tech enthusiast.
self.ollama_model = os.environ["EDITOR_MODEL"]
self.embed_model = os.environ["EMBEDDING_MODEL"]
self.agent_models = json.loads(os.environ["CONTENT_CREATOR_MODELS"])
self.llm = ChatOllama(model=self.ollama_model, temperature=0.6, top_p=0.5) #This is the level head in the room
self.prompt_inject = f"""
You are a journalist, Software Developer and DevOps expert
writing a 1000 word draft blog for other tech enthusiasts.
You like to use almost no code examples and prefer to talk
in a light comedic tone. You are also Australian
As this person write this blog as a markdown document.
The title for the blog is {self.title}.
The title for the blog is {self.inner_title}.
Do not output the title in the markdown.
The basis for the content of the blog is:
{self.content}
Only output markdown DO NOT GENERATE AN EXPLANATION
"""
def split_into_chunks(self, text, chunk_size=100):
'''Split text into chunks of size chunk_size'''
words = re.findall(r'\S+', text)
chunks = []
current_chunk = []
word_count = 0
for word in words:
current_chunk.append(word)
word_count += 1
if word_count >= chunk_size:
chunks.append(' '.join(current_chunk))
current_chunk = []
word_count = 0
if current_chunk:
chunks.append(' '.join(current_chunk))
return chunks
def generate_draft(self, model) -> str:
'''Generate a draft blog post using the specified model'''
try:
# the idea behind this is to make the "creativity" random amongst the content creators
# contorlling temperature will allow cause the output to allow more "random" connections in sentences
# Controlling top_p will tighten or loosen the embedding connections made
# The result should be varied levels of "creativity" in the writing of the drafts
# for more see https://python.langchain.com/v0.2/api_reference/ollama/chat_models/langchain_ollama.chat_models.ChatOllama.html
temp = random.uniform(0.5, 1.0)
top_p = random.uniform(0.4, 0.8)
top_k = int(random.uniform(30, 80))
agent_llm = ChatOllama(model=model, temperature=temp, top_p=top_p, top_k=top_k)
messages = [
("system", self.prompt_inject),
("human", "make the blog post in a format to be edited easily" )
]
response = agent_llm.invoke(messages)
# self.response = self.ollama_client.chat(model=model,
# messages=[
# {
# 'role': 'user',
# 'content': f'{self.prompt_inject}',
# },
# ])
#print ("draft")
#print (response)
return response.text()#['message']['content']
except Exception as e:
raise Exception(f"Failed to generate blog draft: {e}")
def get_draft_embeddings(self, draft_chunks):
'''Get embeddings for the draft chunks'''
embeds = self.ollama_client.embed(model=self.embed_model, input=draft_chunks)
return embeds.get('embeddings', [])
def id_generator(self, size=6, chars=string.ascii_uppercase + string.digits):
return ''.join(random.choice(chars) for _ in range(size))
def load_to_vector_db(self):
'''Load the generated blog drafts into a vector database'''
collection_name = f"blog_{self.title.lower().replace(" ", "_")}_{self.id_generator()}"
collection = self.chroma.get_or_create_collection(name=collection_name)#, metadata={"hnsw:space": "cosine"})
#if any(collection.name == collectionname for collectionname in self.chroma.list_collections()):
# self.chroma.delete_collection("blog_creator")
for model in self.agent_models:
print (f"Generating draft from {model} for load into vector database")
draft_chunks = self.split_into_chunks(self.generate_draft(model))
print(f"generating embeds")
embeds = self.get_draft_embeddings(draft_chunks)
ids = [model + str(i) for i in range(len(draft_chunks))]
chunknumber = list(range(len(draft_chunks)))
metadata = [{"model_agent": model} for index in chunknumber]
print(f'loading into collection')
collection.add(documents=draft_chunks, embeddings=embeds, ids=ids, metadatas=metadata)
return collection
def generate_markdown(self) -> str:
prompt_system = f"""
You are an editor taking information from {len(self.agent_models)} Software
Developers and Data experts
writing a 3000 word blog for other tech enthusiasts.
You like when they use almost no code examples and the
voice is in a light comedic tone. You are also Australian
As this person produce and an amalgamtion of this blog as a markdown document.
The title for the blog is {self.inner_title}.
Do not output the title in the markdown. Avoid repeated sentences
The basis for the content of the blog is:
{self.content}
"""
try:
self.response = self.ollama_client.chat(model=self.ollama_model,
messages=[
{
'role': 'user',
'content': f'{prompt}',
},
])
# the deepseek model returns <think> this removes those tabs from the output
# return re.sub(r"<think|.\n\r+?|([^;]*)\/think>",'',self.response['message']['content'])
return self.response['message']['content']
query_embed = self.ollama_client.embed(model=self.embed_model, input=prompt_system)['embeddings']
collection = self.load_to_vector_db()
collection_query = collection.query(query_embeddings=query_embed, n_results=100)
print("Showing pertinent info from drafts used in final edited edition")
pertinent_draft_info = '\n\n'.join(collection.query(query_embeddings=query_embed, n_results=100)['documents'][0])
#print(pertinent_draft_info)
prompt_human = f"Generate the final document using this information from the drafts: {pertinent_draft_info} - ONLY OUTPUT THE MARKDOWN"
print("Generating final document")
messages = [("system", prompt_system), ("human", prompt_human),]
self.response = self.llm.invoke(messages).text()
# self.response = self.ollama_client.chat(model=self.ollama_model,
# messages=[
# {
# 'role': 'user',
# 'content': f'{prompt_enhanced}',
# },
# ])
#print ("Markdown Generated")
#print (self.response)
return self.response#['message']['content']
except Exception as e:
raise Exception(f"Failed to generate markdown: {e}")
@ -47,3 +154,9 @@ class OllamaGenerator:
with open(filename, "w") as f:
f.write(self.generate_markdown())
def generate_commit_message(self):
prompt_system = "You are a blog creator commiting a piece of content to a central git repo"
prompt_human = f"Generate a 5 word git commit message describing {self.response}"
messages = [("system", prompt_system), ("human", prompt_human),]
commit_message = self.llm.invoke(messages).text()
return commit_message

View File

@ -1,5 +1,7 @@
import ai_generators.ollama_md_generator as omg
import trilium.notes as tn
import repo_management.repo_manager as git_repo
import string,os
tril = tn.TrilumNotes()
@ -7,16 +9,26 @@ tril.get_new_notes()
tril_notes = tril.get_notes_content()
def convert_to_lowercase_with_underscores(string):
return string.lower().replace(" ", "_")
def convert_to_lowercase_with_underscores(s):
allowed = set(string.ascii_letters + string.digits + ' ')
filtered_string = ''.join(c for c in s if c in allowed)
return filtered_string.lower().replace(" ", "_")
for note in tril_notes:
print(tril_notes[note]['title'])
# print(tril_notes[note]['content'])
print("Generating Document")
ai_gen = omg.OllamaGenerator(tril_notes[note]['title'],
tril_notes[note]['content'],
"deepseek-r1:7b")
os_friendly_title = convert_to_lowercase_with_underscores(tril_notes[note]['title'])
ai_gen.save_to_file(f"./generated_files/{os_friendly_title}.md")
ai_gen = omg.OllamaGenerator(os_friendly_title,
tril_notes[note]['content'],
tril_notes[note]['title'])
blog_path = f"/blog_creator/generated_files/{os_friendly_title}.md"
ai_gen.save_to_file(blog_path)
# Generate commit messages and push to repo
commit_message = ai_gen.generate_commit_message()
git_user = os.environ["GIT_USER"]
git_pass = os.environ["GIT_PASS"]
repo_manager = git_repo.GitRepository("blog/", git_user, git_pass)
repo_manager.create_copy_commit_push(blog_path, os_friendly_title, commit_message)

View File

@ -1,48 +0,0 @@
import os
import sys
from git import Repo
# Set these variables accordingly
REPO_OWNER = "your_repo_owner"
REPO_NAME = "your_repo_name"
def clone_repo(repo_url, branch="main"):
Repo.clone_from(repo_url, ".", branch=branch)
def create_markdown_file(file_name, content):
with open(f"{file_name}.md", "w") as f:
f.write(content)
def commit_and_push(file_name, message):
repo = Repo(".")
repo.index.add([f"{file_name}.md"])
repo.index.commit(message)
repo.remote().push()
def create_new_branch(branch_name):
repo = Repo(".")
repo.create_head(branch_name).checkout()
repo.head.reference.set_tracking_url(f"https://your_git_server/{REPO_OWNER}/{REPO_NAME}.git/{branch_name}")
repo.remote().push()
if __name__ == "__main__":
if len(sys.argv) < 3:
print("Usage: python push_markdown.py <repo_url> <markdown_file_name>")
sys.exit(1)
repo_url = sys.argv[1]
file_name = sys.argv[2]
# Clone the repository
clone_repo(repo_url)
# Create a new Markdown file with content
create_markdown_file(file_name, "Hello, World!\n")
# Commit and push changes to the main branch
commit_and_push(file_name, f"Add {file_name}.md")
# Create a new branch named after the Markdown file
create_new_branch(file_name)
print(f"Successfully created '{file_name}' branch with '{file_name}.md'.")

View File

@ -1,35 +1,102 @@
import os
from git import Git
from git.repo import BaseRepository
from git.exc import InvalidGitRepositoryError
from git.remote import RemoteAction
import os, shutil
from urllib.parse import quote
from git import Repo
from git.exc import GitCommandError
# Set the path to your blog repo here
blog_repo = "/path/to/your/blog/repo"
class GitRepository:
# This is designed to be transitory it will desctruvtively create the repo at repo_path
# if you have uncommited changes you can kiss them goodbye!
# Don't use the repo created by this function for dev -> its a tool!
# It is expected that when used you will add, commit, push, delete
def __init__(self, repo_path, username=None, password=None):
git_protocol = os.environ["GIT_PROTOCOL"]
git_remote = os.environ["GIT_REMOTE"]
#if username is not set we don't need parse to the url
if username==None or password == None:
remote = f"{git_protocol}://{git_remote}"
else:
# of course if it is we need to parse and escape it so that it
# can generate a url
git_user = quote(username)
git_password = quote(password)
remote = f"{git_protocol}://{git_user}:{git_password}@{git_remote}"
# Checkout a new branch and create a new file for our blog post
branch_name = "new-post"
try:
repo = Git(blog_repo)
repo.checkout("-b", branch_name, "origin/main")
with open("my-blog-post.md", "w") as f:
f.write(content)
except InvalidGitRepositoryError:
# Handle repository errors gracefully
pass
if os.path.exists(repo_path):
shutil.rmtree(repo_path)
self.repo_path = repo_path
print("Cloning Repo")
Repo.clone_from(remote, repo_path)
self.repo = Repo(repo_path)
self.username = username
self.password = password
# Add and commit the changes to Git
repo.add("my-blog-post.md")
repo.commit("-m", "Added new blog post about DevOps best practices.")
def clone(self, remote_url, destination_path):
"""Clone a Git repository with authentication"""
try:
self.repo.clone(remote_url, destination_path)
return True
except GitCommandError as e:
print(f"Cloning failed: {e}")
return False
# Push the changes to Git and create a PR
repo.remote().push("refs/heads/{0}:refs/for/main".format(branch_name), "--set-upstream")
base_branch = "origin/main"
target_branch = "main"
pr_title = "DevOps best practices"
try:
repo.create_head("{0}-{1}", base=base_branch, message="{}".format(pr_title))
except RemoteAction.GitExitStatus as e:
# Handle Git exit status errors gracefully
pass
def fetch(self, remote_name='origin', ref_name='main'):
"""Fetch updates from a remote repository with authentication"""
try:
self.repo.remotes[remote_name].fetch(ref_name=ref_name)
return True
except GitCommandError as e:
print(f"Fetching failed: {e}")
return False
def pull(self, remote_name='origin', ref_name='main'):
"""Pull updates from a remote repository with authentication"""
print("Pulling Latest Updates (if any)")
try:
self.repo.remotes[remote_name].pull(ref_name)
return True
except GitCommandError as e:
print(f"Pulling failed: {e}")
return False
def get_branches(self):
"""List all branches in the repository"""
return [branch.name for branch in self.repo.branches]
def create_and_switch_branch(self, branch_name, remote_name='origin', ref_name='main'):
"""Create a new branch in the repository with authentication."""
try:
print(f"Creating Branch {branch_name}")
# Use the same remote and ref as before
self.repo.git.branch(branch_name)
except GitCommandError:
print("Branch already exists switching")
# ensure remote commits are pulled into local
self.repo.git.checkout(branch_name)
def add_and_commit(self, message=None):
"""Add and commit changes to the repository."""
try:
print("Commiting latest draft")
# Add all changes
self.repo.git.add(all=True)
# Commit with the provided message or a default
if message is None:
commit_message = "Added and committed new content"
else:
commit_message = message
self.repo.git.commit(message=commit_message)
return True
except GitCommandError as e:
print(f"Commit failed: {e}")
return False
def create_copy_commit_push(self, file_path, title, commit_messge):
self.create_and_switch_branch(title)
self.pull(ref_name=title)
shutil.copy(f"{file_path}", f"{self.repo_path}src/content/")
self.add_and_commit(f"'{commit_messge}'")
self.repo.git.push()

View File

@ -18,9 +18,13 @@ class TrilumNotes:
print("Please run get_token and set your token")
else:
self.ea = ETAPI(self.server_url, self.token)
self.new_notes = None
self.note_content = None
def get_token(self):
ea = ETAPI(self.server_url)
if self.tril_pass == None:
raise ValueError("Trillium password can not be none")
token = ea.login(self.tril_pass)
print(token)
print("I would recomend you update the env file with this tootsweet!")
@ -40,10 +44,11 @@ class TrilumNotes:
def get_notes_content(self):
content_dict = {}
if self.new_notes is None:
raise ValueError("How did you do this? new_notes is None!")
for note in self.new_notes['results']:
content_dict[note['noteId']] = {"title" : f"{note['title']}",
"content" : f"{self._get_content(note['noteId'])}"
}
self.note_content = content_dict
return content_dict