hard reset for the repo work

This commit is contained in:
armistace 2025-05-30 17:20:58 +10:00
parent 446978704d
commit 0005ad1fd3
5 changed files with 264 additions and 115 deletions

View File

@ -1,44 +1,151 @@
import os import os, re, json, random, time, string
from ollama import Client from ollama import Client
import re import chromadb
from langchain_ollama import ChatOllama
class OllamaGenerator: class OllamaGenerator:
def __init__(self, title: str, content: str, model: str): def __init__(self, title: str, content: str, inner_title: str):
self.title = title self.title = title
self.inner_title = inner_title
self.content = content self.content = content
self.response = None
self.chroma = chromadb.HttpClient(host="172.18.0.2", port=8000)
ollama_url = f"{os.environ["OLLAMA_PROTOCOL"]}://{os.environ["OLLAMA_HOST"]}:{os.environ["OLLAMA_PORT"]}" ollama_url = f"{os.environ["OLLAMA_PROTOCOL"]}://{os.environ["OLLAMA_HOST"]}:{os.environ["OLLAMA_PORT"]}"
self.ollama_client = Client(host=ollama_url) self.ollama_client = Client(host=ollama_url)
self.ollama_model = model self.ollama_model = os.environ["EDITOR_MODEL"]
self.embed_model = os.environ["EMBEDDING_MODEL"]
def generate_markdown(self) -> str: self.agent_models = json.loads(os.environ["CONTENT_CREATOR_MODELS"])
self.llm = ChatOllama(model=self.ollama_model, temperature=0.6, top_p=0.5) #This is the level head in the room
prompt = f""" self.prompt_inject = f"""
You are a Software Developer and DevOps expert You are a journalist, Software Developer and DevOps expert
who has transistioned in Developer Relations writing a 1000 word draft blog for other tech enthusiasts.
writing a 1000 word blog for other tech enthusiast.
You like to use almost no code examples and prefer to talk You like to use almost no code examples and prefer to talk
in a light comedic tone. You are also Australian in a light comedic tone. You are also Australian
As this person write this blog as a markdown document. As this person write this blog as a markdown document.
The title for the blog is {self.title}. The title for the blog is {self.inner_title}.
Do not output the title in the markdown. Do not output the title in the markdown.
The basis for the content of the blog is: The basis for the content of the blog is:
{self.content} {self.content}
Only output markdown DO NOT GENERATE AN EXPLANATION """
def split_into_chunks(self, text, chunk_size=100):
'''Split text into chunks of size chunk_size'''
words = re.findall(r'\S+', text)
chunks = []
current_chunk = []
word_count = 0
for word in words:
current_chunk.append(word)
word_count += 1
if word_count >= chunk_size:
chunks.append(' '.join(current_chunk))
current_chunk = []
word_count = 0
if current_chunk:
chunks.append(' '.join(current_chunk))
return chunks
def generate_draft(self, model) -> str:
'''Generate a draft blog post using the specified model'''
try:
# the idea behind this is to make the "creativity" random amongst the content creators
# contorlling temperature will allow cause the output to allow more "random" connections in sentences
# Controlling top_p will tighten or loosen the embedding connections made
# The result should be varied levels of "creativity" in the writing of the drafts
# for more see https://python.langchain.com/v0.2/api_reference/ollama/chat_models/langchain_ollama.chat_models.ChatOllama.html
temp = random.uniform(0.5, 1.0)
top_p = random.uniform(0.4, 0.8)
top_k = int(random.uniform(30, 80))
agent_llm = ChatOllama(model=model, temperature=temp, top_p=top_p, top_k=top_k)
messages = [
("system", self.prompt_inject),
("human", "make the blog post in a format to be edited easily" )
]
response = agent_llm.invoke(messages)
# self.response = self.ollama_client.chat(model=model,
# messages=[
# {
# 'role': 'user',
# 'content': f'{self.prompt_inject}',
# },
# ])
#print ("draft")
#print (response)
return response.text()#['message']['content']
except Exception as e:
raise Exception(f"Failed to generate blog draft: {e}")
def get_draft_embeddings(self, draft_chunks):
'''Get embeddings for the draft chunks'''
embeds = self.ollama_client.embed(model=self.embed_model, input=draft_chunks)
return embeds.get('embeddings', [])
def id_generator(self, size=6, chars=string.ascii_uppercase + string.digits):
return ''.join(random.choice(chars) for _ in range(size))
def load_to_vector_db(self):
'''Load the generated blog drafts into a vector database'''
collection_name = f"blog_{self.title.lower().replace(" ", "_")}_{self.id_generator()}"
collection = self.chroma.get_or_create_collection(name=collection_name)#, metadata={"hnsw:space": "cosine"})
#if any(collection.name == collectionname for collectionname in self.chroma.list_collections()):
# self.chroma.delete_collection("blog_creator")
for model in self.agent_models:
print (f"Generating draft from {model} for load into vector database")
draft_chunks = self.split_into_chunks(self.generate_draft(model))
print(f"generating embeds")
embeds = self.get_draft_embeddings(draft_chunks)
ids = [model + str(i) for i in range(len(draft_chunks))]
chunknumber = list(range(len(draft_chunks)))
metadata = [{"model_agent": model} for index in chunknumber]
print(f'loading into collection')
collection.add(documents=draft_chunks, embeddings=embeds, ids=ids, metadatas=metadata)
return collection
def generate_markdown(self) -> str:
prompt_system = f"""
You are an editor taking information from {len(self.agent_models)} Software
Developers and Data experts
writing a 3000 word blog for other tech enthusiasts.
You like when they use almost no code examples and the
voice is in a light comedic tone. You are also Australian
As this person produce and an amalgamtion of this blog as a markdown document.
The title for the blog is {self.inner_title}.
Do not output the title in the markdown. Avoid repeated sentences
The basis for the content of the blog is:
{self.content}
""" """
try: try:
self.response = self.ollama_client.chat(model=self.ollama_model, query_embed = self.ollama_client.embed(model=self.embed_model, input=prompt_system)['embeddings']
messages=[ collection = self.load_to_vector_db()
{ collection_query = collection.query(query_embeddings=query_embed, n_results=100)
'role': 'user', print("Showing pertinent info from drafts used in final edited edition")
'content': f'{prompt}', pertinent_draft_info = '\n\n'.join(collection.query(query_embeddings=query_embed, n_results=100)['documents'][0])
}, #print(pertinent_draft_info)
]) prompt_human = f"Generate the final document using this information from the drafts: {pertinent_draft_info} - ONLY OUTPUT THE MARKDOWN"
print("Generating final document")
# the deepseek model returns <think> this removes those tabs from the output messages = [("system", prompt_system), ("human", prompt_human),]
# return re.sub(r"<think|.\n\r+?|([^;]*)\/think>",'',self.response['message']['content']) self.response = self.llm.invoke(messages).text()
return self.response['message']['content'] # self.response = self.ollama_client.chat(model=self.ollama_model,
# messages=[
# {
# 'role': 'user',
# 'content': f'{prompt_enhanced}',
# },
# ])
#print ("Markdown Generated")
#print (self.response)
return self.response#['message']['content']
except Exception as e: except Exception as e:
raise Exception(f"Failed to generate markdown: {e}") raise Exception(f"Failed to generate markdown: {e}")
@ -47,3 +154,9 @@ class OllamaGenerator:
with open(filename, "w") as f: with open(filename, "w") as f:
f.write(self.generate_markdown()) f.write(self.generate_markdown())
def generate_commit_message(self):
prompt_system = "You are a blog creator commiting a piece of content to a central git repo"
prompt_human = f"Generate a 5 word git commit message describing {self.response}"
messages = [("system", prompt_system), ("human", prompt_human),]
commit_message = self.llm.invoke(messages).text()
return commit_message

View File

@ -1,5 +1,7 @@
import ai_generators.ollama_md_generator as omg import ai_generators.ollama_md_generator as omg
import trilium.notes as tn import trilium.notes as tn
import repo_management.repo_manager as git_repo
import string,os
tril = tn.TrilumNotes() tril = tn.TrilumNotes()
@ -7,16 +9,26 @@ tril.get_new_notes()
tril_notes = tril.get_notes_content() tril_notes = tril.get_notes_content()
def convert_to_lowercase_with_underscores(string): def convert_to_lowercase_with_underscores(s):
return string.lower().replace(" ", "_") allowed = set(string.ascii_letters + string.digits + ' ')
filtered_string = ''.join(c for c in s if c in allowed)
return filtered_string.lower().replace(" ", "_")
for note in tril_notes: for note in tril_notes:
print(tril_notes[note]['title']) print(tril_notes[note]['title'])
# print(tril_notes[note]['content']) # print(tril_notes[note]['content'])
print("Generating Document") print("Generating Document")
ai_gen = omg.OllamaGenerator(tril_notes[note]['title'],
tril_notes[note]['content'],
"deepseek-r1:7b")
os_friendly_title = convert_to_lowercase_with_underscores(tril_notes[note]['title']) os_friendly_title = convert_to_lowercase_with_underscores(tril_notes[note]['title'])
ai_gen.save_to_file(f"./generated_files/{os_friendly_title}.md") ai_gen = omg.OllamaGenerator(os_friendly_title,
tril_notes[note]['content'],
tril_notes[note]['title'])
blog_path = f"/blog_creator/generated_files/{os_friendly_title}.md"
ai_gen.save_to_file(blog_path)
# Generate commit messages and push to repo
commit_message = ai_gen.generate_commit_message()
git_user = os.environ["GIT_USER"]
git_pass = os.environ["GIT_PASS"]
repo_manager = git_repo.GitRepository("blog/", git_user, git_pass)
repo_manager.create_copy_commit_push(blog_path, os_friendly_title, commit_message)

View File

@ -1,48 +0,0 @@
import os
import sys
from git import Repo
# Set these variables accordingly
REPO_OWNER = "your_repo_owner"
REPO_NAME = "your_repo_name"
def clone_repo(repo_url, branch="main"):
Repo.clone_from(repo_url, ".", branch=branch)
def create_markdown_file(file_name, content):
with open(f"{file_name}.md", "w") as f:
f.write(content)
def commit_and_push(file_name, message):
repo = Repo(".")
repo.index.add([f"{file_name}.md"])
repo.index.commit(message)
repo.remote().push()
def create_new_branch(branch_name):
repo = Repo(".")
repo.create_head(branch_name).checkout()
repo.head.reference.set_tracking_url(f"https://your_git_server/{REPO_OWNER}/{REPO_NAME}.git/{branch_name}")
repo.remote().push()
if __name__ == "__main__":
if len(sys.argv) < 3:
print("Usage: python push_markdown.py <repo_url> <markdown_file_name>")
sys.exit(1)
repo_url = sys.argv[1]
file_name = sys.argv[2]
# Clone the repository
clone_repo(repo_url)
# Create a new Markdown file with content
create_markdown_file(file_name, "Hello, World!\n")
# Commit and push changes to the main branch
commit_and_push(file_name, f"Add {file_name}.md")
# Create a new branch named after the Markdown file
create_new_branch(file_name)
print(f"Successfully created '{file_name}' branch with '{file_name}.md'.")

View File

@ -1,35 +1,102 @@
import os import os, shutil
from git import Git from urllib.parse import quote
from git.repo import BaseRepository from git import Repo
from git.exc import InvalidGitRepositoryError from git.exc import GitCommandError
from git.remote import RemoteAction
# Set the path to your blog repo here class GitRepository:
blog_repo = "/path/to/your/blog/repo" # This is designed to be transitory it will desctruvtively create the repo at repo_path
# if you have uncommited changes you can kiss them goodbye!
# Don't use the repo created by this function for dev -> its a tool!
# It is expected that when used you will add, commit, push, delete
def __init__(self, repo_path, username=None, password=None):
git_protocol = os.environ["GIT_PROTOCOL"]
git_remote = os.environ["GIT_REMOTE"]
#if username is not set we don't need parse to the url
if username==None or password == None:
remote = f"{git_protocol}://{git_remote}"
else:
# of course if it is we need to parse and escape it so that it
# can generate a url
git_user = quote(username)
git_password = quote(password)
remote = f"{git_protocol}://{git_user}:{git_password}@{git_remote}"
# Checkout a new branch and create a new file for our blog post if os.path.exists(repo_path):
branch_name = "new-post" shutil.rmtree(repo_path)
try: self.repo_path = repo_path
repo = Git(blog_repo) print("Cloning Repo")
repo.checkout("-b", branch_name, "origin/main") Repo.clone_from(remote, repo_path)
with open("my-blog-post.md", "w") as f: self.repo = Repo(repo_path)
f.write(content) self.username = username
except InvalidGitRepositoryError: self.password = password
# Handle repository errors gracefully
pass
# Add and commit the changes to Git def clone(self, remote_url, destination_path):
repo.add("my-blog-post.md") """Clone a Git repository with authentication"""
repo.commit("-m", "Added new blog post about DevOps best practices.") try:
self.repo.clone(remote_url, destination_path)
return True
except GitCommandError as e:
print(f"Cloning failed: {e}")
return False
# Push the changes to Git and create a PR def fetch(self, remote_name='origin', ref_name='main'):
repo.remote().push("refs/heads/{0}:refs/for/main".format(branch_name), "--set-upstream") """Fetch updates from a remote repository with authentication"""
base_branch = "origin/main" try:
target_branch = "main" self.repo.remotes[remote_name].fetch(ref_name=ref_name)
pr_title = "DevOps best practices" return True
try: except GitCommandError as e:
repo.create_head("{0}-{1}", base=base_branch, message="{}".format(pr_title)) print(f"Fetching failed: {e}")
except RemoteAction.GitExitStatus as e: return False
# Handle Git exit status errors gracefully
pass
def pull(self, remote_name='origin', ref_name='main'):
"""Pull updates from a remote repository with authentication"""
print("Pulling Latest Updates (if any)")
try:
self.repo.remotes[remote_name].pull(ref_name)
return True
except GitCommandError as e:
print(f"Pulling failed: {e}")
return False
def get_branches(self):
"""List all branches in the repository"""
return [branch.name for branch in self.repo.branches]
def create_and_switch_branch(self, branch_name, remote_name='origin', ref_name='main'):
"""Create a new branch in the repository with authentication."""
try:
print(f"Creating Branch {branch_name}")
# Use the same remote and ref as before
self.repo.git.branch(branch_name)
except GitCommandError:
print("Branch already exists switching")
# ensure remote commits are pulled into local
self.repo.git.checkout(branch_name)
def add_and_commit(self, message=None):
"""Add and commit changes to the repository."""
try:
print("Commiting latest draft")
# Add all changes
self.repo.git.add(all=True)
# Commit with the provided message or a default
if message is None:
commit_message = "Added and committed new content"
else:
commit_message = message
self.repo.git.commit(message=commit_message)
return True
except GitCommandError as e:
print(f"Commit failed: {e}")
return False
def create_copy_commit_push(self, file_path, title, commit_messge):
self.create_and_switch_branch(title)
self.pull(ref_name=title)
shutil.copy(f"{file_path}", f"{self.repo_path}src/content/")
self.add_and_commit(f"'{commit_messge}'")
self.repo.git.push()

View File

@ -11,16 +11,20 @@ class TrilumNotes:
self.token = os.environ.get('TRILIUM_TOKEN') self.token = os.environ.get('TRILIUM_TOKEN')
if not all([self.protocol, self.host, self.port, self.tril_pass]): if not all([self.protocol, self.host, self.port, self.tril_pass]):
print("One or more required environment variables not found. Have you set a .env?") print("One or more required environment variables not found. Have you set a .env?")
self.server_url = f'{self.protocol}://{self.host}:{self.port}' self.server_url = f'{self.protocol}://{self.host}:{self.port}'
if not self.token: if not self.token:
print("Please run get_token and set your token") print("Please run get_token and set your token")
else: else:
self.ea = ETAPI(self.server_url, self.token) self.ea = ETAPI(self.server_url, self.token)
self.new_notes = None
self.note_content = None
def get_token(self): def get_token(self):
ea = ETAPI(self.server_url) ea = ETAPI(self.server_url)
if self.tril_pass == None:
raise ValueError("Trillium password can not be none")
token = ea.login(self.tril_pass) token = ea.login(self.tril_pass)
print(token) print(token)
print("I would recomend you update the env file with this tootsweet!") print("I would recomend you update the env file with this tootsweet!")
@ -40,10 +44,11 @@ class TrilumNotes:
def get_notes_content(self): def get_notes_content(self):
content_dict = {} content_dict = {}
if self.new_notes is None:
raise ValueError("How did you do this? new_notes is None!")
for note in self.new_notes['results']: for note in self.new_notes['results']:
content_dict[note['noteId']] = {"title" : f"{note['title']}", content_dict[note['noteId']] = {"title" : f"{note['title']}",
"content" : f"{self._get_content(note['noteId'])}" "content" : f"{self._get_content(note['noteId'])}"
} }
self.note_content = content_dict self.note_content = content_dict
return content_dict return content_dict