diff --git a/.gitignore b/.gitignore index 24458be..9ede049 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,7 @@ __pycache__ .venv .aider* .vscode +.zed +pyproject.toml +.ropeproject +generated_files/* diff --git a/Dockerfile b/Dockerfile index a220107..0416791 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,8 +7,12 @@ ENV PYTHONUNBUFFERED 1 ADD src/ /blog_creator -RUN apt-get update && apt-get install -y rustc cargo python-is-python3 pip python3.12-venv libmagic-dev - +RUN apt-get update && apt-get install -y rustc cargo python-is-python3 pip python3-venv libmagic-dev git +# Need to set up git here or we get funky errors +RUN git config --global user.name "Blog Creator" +RUN git config --global user.email "ridgway.infrastructure@gmail.com" +RUN git config --global push.autoSetupRemote true +#Get a python venv going as well cause safety RUN python -m venv /opt/venv ENV PATH="/opt/venv/bin:$PATH" diff --git a/README.md b/README.md index 833f393..4f284bb 100644 --- a/README.md +++ b/README.md @@ -3,10 +3,19 @@ This creator requires you to use a working Trilium Instance and create a .env file with the following ``` -TRILIUM_HOST -TRILIUM_PORT -TRILIUM_PROTOCOL -TRILIUM_PASS +TRILIUM_HOST= +TRILIUM_PORT= +TRILIUM_PROTOCOL= +TRILIUM_PASS= +TRILIUM_TOKEN= +OLLAMA_PROTOCOL= +OLLAMA_HOST= +OLLAMA_PORT=11434 +EMBEDDING_MODEL= +EDITOR_MODEL= +# This is expected in python list format example `[phi4-mini:latest, qwen3:1.7b, gemma3:latest]` +CONTENT_CREATOR_MODELS= +CHROMA_SERVER= ``` This container is going to be what I use to trigger a blog creation event @@ -29,7 +38,7 @@ To do this we will 4. cd /src/content -5. take the information from the trillium note and prepare a 500 word blog post, insert the following at the top +5. take the information from the trillium note and prepare a 500 word blog post, insert the following at the top ``` Title: @@ -42,7 +51,7 @@ Authors: <model name>.ai Summary: <have ai write a 10 word summary of the post ``` -6. write it to `<title>.md` +6. write it to `<title>.md` 7. `git checkout -b <title>` diff --git a/docker-compose.yml b/docker-compose.yml index 0e61a87..2642fe8 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,11 +1,44 @@ -services: - blog_creator: - build: - context: . - dockerfile: Dockerfile - container_name: blog_creator - env_file: - - .env - volumes: - - ./generated_files/:/blog_creator/generated_files +networks: + net: + driver: bridge +services: + blog_creator: + build: + context: . + dockerfile: Dockerfile + container_name: blog_creator + env_file: + - .env + volumes: + - ./generated_files/:/blog_creator/generated_files + networks: + - net + + chroma: + image: chromadb/chroma + container_name: chroma + volumes: + # Be aware that indexed data are located in "/chroma/chroma/" + # Default configuration for persist_directory in chromadb/config.py + # Read more about deployments: https://docs.trychroma.com/deployment + - chroma-data:/chroma/chroma + #command: "--host 0.0.0.0 --port 8000 --proxy-headers --log-config chromadb/log_config.yml --timeout-keep-alive 30" + environment: + - IS_PERSISTENT=TRUE + restart: unless-stopped # possible values are: "no", always", "on-failure", "unless-stopped" + ports: + - "8000:8000" + healthcheck: + # Adjust below to match your container port + test: + ["CMD", "curl", "-f", "http://localhost:8000/api/v2/heartbeat"] + interval: 30s + timeout: 10s + retries: 3 + networks: + - net + +volumes: + chroma-data: + driver: local diff --git a/requirements.txt b/requirements.txt index 7ae22b7..116f45e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,3 +2,5 @@ ollama trilium-py gitpython PyGithub +chromadb +langchain-ollama diff --git a/src/ai_generators/ollama_md_generator.py b/src/ai_generators/ollama_md_generator.py index f54e8f4..58c66ee 100644 --- a/src/ai_generators/ollama_md_generator.py +++ b/src/ai_generators/ollama_md_generator.py @@ -1,44 +1,151 @@ -import os +import os, re, json, random, time, string from ollama import Client -import re +import chromadb +from langchain_ollama import ChatOllama + class OllamaGenerator: - def __init__(self, title: str, content: str, model: str): + def __init__(self, title: str, content: str, inner_title: str): self.title = title + self.inner_title = inner_title self.content = content + self.response = None + self.chroma = chromadb.HttpClient(host="172.18.0.2", port=8000) ollama_url = f"{os.environ["OLLAMA_PROTOCOL"]}://{os.environ["OLLAMA_HOST"]}:{os.environ["OLLAMA_PORT"]}" self.ollama_client = Client(host=ollama_url) - self.ollama_model = model - - def generate_markdown(self) -> str: - - prompt = f""" - You are a Software Developer and DevOps expert - who has transistioned in Developer Relations - writing a 1000 word blog for other tech enthusiast. + self.ollama_model = os.environ["EDITOR_MODEL"] + self.embed_model = os.environ["EMBEDDING_MODEL"] + self.agent_models = json.loads(os.environ["CONTENT_CREATOR_MODELS"]) + self.llm = ChatOllama(model=self.ollama_model, temperature=0.6, top_p=0.5) #This is the level head in the room + self.prompt_inject = f""" + You are a journalist, Software Developer and DevOps expert + writing a 1000 word draft blog for other tech enthusiasts. You like to use almost no code examples and prefer to talk - in a light comedic tone. You are also Australian + in a light comedic tone. You are also Australian As this person write this blog as a markdown document. - The title for the blog is {self.title}. + The title for the blog is {self.inner_title}. Do not output the title in the markdown. The basis for the content of the blog is: {self.content} - Only output markdown DO NOT GENERATE AN EXPLANATION + """ + + def split_into_chunks(self, text, chunk_size=100): + '''Split text into chunks of size chunk_size''' + words = re.findall(r'\S+', text) + + chunks = [] + current_chunk = [] + word_count = 0 + + for word in words: + current_chunk.append(word) + word_count += 1 + + if word_count >= chunk_size: + chunks.append(' '.join(current_chunk)) + current_chunk = [] + word_count = 0 + + if current_chunk: + chunks.append(' '.join(current_chunk)) + + return chunks + + def generate_draft(self, model) -> str: + '''Generate a draft blog post using the specified model''' + try: + # the idea behind this is to make the "creativity" random amongst the content creators + # contorlling temperature will allow cause the output to allow more "random" connections in sentences + # Controlling top_p will tighten or loosen the embedding connections made + # The result should be varied levels of "creativity" in the writing of the drafts + # for more see https://python.langchain.com/v0.2/api_reference/ollama/chat_models/langchain_ollama.chat_models.ChatOllama.html + temp = random.uniform(0.5, 1.0) + top_p = random.uniform(0.4, 0.8) + top_k = int(random.uniform(30, 80)) + agent_llm = ChatOllama(model=model, temperature=temp, top_p=top_p, top_k=top_k) + messages = [ + ("system", self.prompt_inject), + ("human", "make the blog post in a format to be edited easily" ) + ] + response = agent_llm.invoke(messages) + # self.response = self.ollama_client.chat(model=model, + # messages=[ + # { + # 'role': 'user', + # 'content': f'{self.prompt_inject}', + # }, + # ]) + #print ("draft") + #print (response) + return response.text()#['message']['content'] + + except Exception as e: + raise Exception(f"Failed to generate blog draft: {e}") + + def get_draft_embeddings(self, draft_chunks): + '''Get embeddings for the draft chunks''' + embeds = self.ollama_client.embed(model=self.embed_model, input=draft_chunks) + return embeds.get('embeddings', []) + + def id_generator(self, size=6, chars=string.ascii_uppercase + string.digits): + return ''.join(random.choice(chars) for _ in range(size)) + + def load_to_vector_db(self): + '''Load the generated blog drafts into a vector database''' + collection_name = f"blog_{self.title.lower().replace(" ", "_")}_{self.id_generator()}" + collection = self.chroma.get_or_create_collection(name=collection_name)#, metadata={"hnsw:space": "cosine"}) + #if any(collection.name == collectionname for collectionname in self.chroma.list_collections()): + # self.chroma.delete_collection("blog_creator") + for model in self.agent_models: + print (f"Generating draft from {model} for load into vector database") + draft_chunks = self.split_into_chunks(self.generate_draft(model)) + print(f"generating embeds") + embeds = self.get_draft_embeddings(draft_chunks) + ids = [model + str(i) for i in range(len(draft_chunks))] + chunknumber = list(range(len(draft_chunks))) + metadata = [{"model_agent": model} for index in chunknumber] + print(f'loading into collection') + collection.add(documents=draft_chunks, embeddings=embeds, ids=ids, metadatas=metadata) + + return collection + + + def generate_markdown(self) -> str: + + prompt_system = f""" + You are an editor taking information from {len(self.agent_models)} Software + Developers and Data experts + writing a 3000 word blog for other tech enthusiasts. + You like when they use almost no code examples and the + voice is in a light comedic tone. You are also Australian + As this person produce and an amalgamtion of this blog as a markdown document. + The title for the blog is {self.inner_title}. + Do not output the title in the markdown. Avoid repeated sentences + The basis for the content of the blog is: + {self.content} """ try: - self.response = self.ollama_client.chat(model=self.ollama_model, - messages=[ - { - 'role': 'user', - 'content': f'{prompt}', - }, - ]) - - # the deepseek model returns <think> this removes those tabs from the output - # return re.sub(r"<think|.\n\r+?|([^;]*)\/think>",'',self.response['message']['content']) - return self.response['message']['content'] - + query_embed = self.ollama_client.embed(model=self.embed_model, input=prompt_system)['embeddings'] + collection = self.load_to_vector_db() + collection_query = collection.query(query_embeddings=query_embed, n_results=100) + print("Showing pertinent info from drafts used in final edited edition") + pertinent_draft_info = '\n\n'.join(collection.query(query_embeddings=query_embed, n_results=100)['documents'][0]) + #print(pertinent_draft_info) + prompt_human = f"Generate the final document using this information from the drafts: {pertinent_draft_info} - ONLY OUTPUT THE MARKDOWN" + print("Generating final document") + messages = [("system", prompt_system), ("human", prompt_human),] + self.response = self.llm.invoke(messages).text() + # self.response = self.ollama_client.chat(model=self.ollama_model, + # messages=[ + # { + # 'role': 'user', + # 'content': f'{prompt_enhanced}', + # }, + # ]) + #print ("Markdown Generated") + #print (self.response) + return self.response#['message']['content'] except Exception as e: raise Exception(f"Failed to generate markdown: {e}") @@ -47,3 +154,9 @@ class OllamaGenerator: with open(filename, "w") as f: f.write(self.generate_markdown()) + def generate_commit_message(self): + prompt_system = "You are a blog creator commiting a piece of content to a central git repo" + prompt_human = f"Generate a 5 word git commit message describing {self.response}" + messages = [("system", prompt_system), ("human", prompt_human),] + commit_message = self.llm.invoke(messages).text() + return commit_message diff --git a/src/main.py b/src/main.py index de10dad..07817fc 100644 --- a/src/main.py +++ b/src/main.py @@ -1,5 +1,7 @@ import ai_generators.ollama_md_generator as omg import trilium.notes as tn +import repo_management.repo_manager as git_repo +import string,os tril = tn.TrilumNotes() @@ -7,16 +9,26 @@ tril.get_new_notes() tril_notes = tril.get_notes_content() -def convert_to_lowercase_with_underscores(string): - return string.lower().replace(" ", "_") +def convert_to_lowercase_with_underscores(s): + allowed = set(string.ascii_letters + string.digits + ' ') + filtered_string = ''.join(c for c in s if c in allowed) + return filtered_string.lower().replace(" ", "_") for note in tril_notes: print(tril_notes[note]['title']) # print(tril_notes[note]['content']) print("Generating Document") - ai_gen = omg.OllamaGenerator(tril_notes[note]['title'], - tril_notes[note]['content'], - "deepseek-r1:7b") + os_friendly_title = convert_to_lowercase_with_underscores(tril_notes[note]['title']) - ai_gen.save_to_file(f"./generated_files/{os_friendly_title}.md") + ai_gen = omg.OllamaGenerator(os_friendly_title, + tril_notes[note]['content'], + tril_notes[note]['title']) + blog_path = f"/blog_creator/generated_files/{os_friendly_title}.md" + ai_gen.save_to_file(blog_path) + # Generate commit messages and push to repo + commit_message = ai_gen.generate_commit_message() + git_user = os.environ["GIT_USER"] + git_pass = os.environ["GIT_PASS"] + repo_manager = git_repo.GitRepository("blog/", git_user, git_pass) + repo_manager.create_copy_commit_push(blog_path, os_friendly_title, commit_message) diff --git a/src/repo_management/push_markdown.py b/src/repo_management/push_markdown.py deleted file mode 100644 index cb261fc..0000000 --- a/src/repo_management/push_markdown.py +++ /dev/null @@ -1,48 +0,0 @@ -import os -import sys -from git import Repo - -# Set these variables accordingly -REPO_OWNER = "your_repo_owner" -REPO_NAME = "your_repo_name" - -def clone_repo(repo_url, branch="main"): - Repo.clone_from(repo_url, ".", branch=branch) - -def create_markdown_file(file_name, content): - with open(f"{file_name}.md", "w") as f: - f.write(content) - -def commit_and_push(file_name, message): - repo = Repo(".") - repo.index.add([f"{file_name}.md"]) - repo.index.commit(message) - repo.remote().push() - -def create_new_branch(branch_name): - repo = Repo(".") - repo.create_head(branch_name).checkout() - repo.head.reference.set_tracking_url(f"https://your_git_server/{REPO_OWNER}/{REPO_NAME}.git/{branch_name}") - repo.remote().push() - -if __name__ == "__main__": - if len(sys.argv) < 3: - print("Usage: python push_markdown.py <repo_url> <markdown_file_name>") - sys.exit(1) - - repo_url = sys.argv[1] - file_name = sys.argv[2] - - # Clone the repository - clone_repo(repo_url) - - # Create a new Markdown file with content - create_markdown_file(file_name, "Hello, World!\n") - - # Commit and push changes to the main branch - commit_and_push(file_name, f"Add {file_name}.md") - - # Create a new branch named after the Markdown file - create_new_branch(file_name) - - print(f"Successfully created '{file_name}' branch with '{file_name}.md'.") diff --git a/src/repo_management/repo_manager.py b/src/repo_management/repo_manager.py index a86920c..9465a33 100644 --- a/src/repo_management/repo_manager.py +++ b/src/repo_management/repo_manager.py @@ -1,35 +1,102 @@ -import os -from git import Git -from git.repo import BaseRepository -from git.exc import InvalidGitRepositoryError -from git.remote import RemoteAction +import os, shutil +from urllib.parse import quote +from git import Repo +from git.exc import GitCommandError -# Set the path to your blog repo here -blog_repo = "/path/to/your/blog/repo" +class GitRepository: + # This is designed to be transitory it will desctruvtively create the repo at repo_path + # if you have uncommited changes you can kiss them goodbye! + # Don't use the repo created by this function for dev -> its a tool! + # It is expected that when used you will add, commit, push, delete + def __init__(self, repo_path, username=None, password=None): + git_protocol = os.environ["GIT_PROTOCOL"] + git_remote = os.environ["GIT_REMOTE"] + #if username is not set we don't need parse to the url + if username==None or password == None: + remote = f"{git_protocol}://{git_remote}" + else: + # of course if it is we need to parse and escape it so that it + # can generate a url + git_user = quote(username) + git_password = quote(password) + remote = f"{git_protocol}://{git_user}:{git_password}@{git_remote}" -# Checkout a new branch and create a new file for our blog post -branch_name = "new-post" -try: - repo = Git(blog_repo) - repo.checkout("-b", branch_name, "origin/main") - with open("my-blog-post.md", "w") as f: - f.write(content) -except InvalidGitRepositoryError: - # Handle repository errors gracefully - pass + if os.path.exists(repo_path): + shutil.rmtree(repo_path) + self.repo_path = repo_path + print("Cloning Repo") + Repo.clone_from(remote, repo_path) + self.repo = Repo(repo_path) + self.username = username + self.password = password -# Add and commit the changes to Git -repo.add("my-blog-post.md") -repo.commit("-m", "Added new blog post about DevOps best practices.") + def clone(self, remote_url, destination_path): + """Clone a Git repository with authentication""" + try: + self.repo.clone(remote_url, destination_path) + return True + except GitCommandError as e: + print(f"Cloning failed: {e}") + return False -# Push the changes to Git and create a PR -repo.remote().push("refs/heads/{0}:refs/for/main".format(branch_name), "--set-upstream") -base_branch = "origin/main" -target_branch = "main" -pr_title = "DevOps best practices" -try: - repo.create_head("{0}-{1}", base=base_branch, message="{}".format(pr_title)) -except RemoteAction.GitExitStatus as e: - # Handle Git exit status errors gracefully - pass + def fetch(self, remote_name='origin', ref_name='main'): + """Fetch updates from a remote repository with authentication""" + try: + self.repo.remotes[remote_name].fetch(ref_name=ref_name) + return True + except GitCommandError as e: + print(f"Fetching failed: {e}") + return False + def pull(self, remote_name='origin', ref_name='main'): + """Pull updates from a remote repository with authentication""" + print("Pulling Latest Updates (if any)") + try: + self.repo.remotes[remote_name].pull(ref_name) + return True + except GitCommandError as e: + print(f"Pulling failed: {e}") + return False + + def get_branches(self): + """List all branches in the repository""" + return [branch.name for branch in self.repo.branches] + + + def create_and_switch_branch(self, branch_name, remote_name='origin', ref_name='main'): + """Create a new branch in the repository with authentication.""" + try: + print(f"Creating Branch {branch_name}") + # Use the same remote and ref as before + self.repo.git.branch(branch_name) + except GitCommandError: + print("Branch already exists switching") + # ensure remote commits are pulled into local + self.repo.git.checkout(branch_name) + + def add_and_commit(self, message=None): + """Add and commit changes to the repository.""" + try: + print("Commiting latest draft") + # Add all changes + self.repo.git.add(all=True) + # Commit with the provided message or a default + if message is None: + commit_message = "Added and committed new content" + else: + commit_message = message + self.repo.git.commit(message=commit_message) + return True + except GitCommandError as e: + print(f"Commit failed: {e}") + return False + + def create_copy_commit_push(self, file_path, title, commit_messge): + self.create_and_switch_branch(title) + + self.pull(ref_name=title) + shutil.copy(f"{file_path}", f"{self.repo_path}src/content/") + + self.add_and_commit(f"'{commit_messge}'") + + self.repo.git.push() diff --git a/src/trilium/notes.py b/src/trilium/notes.py index bb55041..88c96f1 100644 --- a/src/trilium/notes.py +++ b/src/trilium/notes.py @@ -11,16 +11,20 @@ class TrilumNotes: self.token = os.environ.get('TRILIUM_TOKEN') if not all([self.protocol, self.host, self.port, self.tril_pass]): print("One or more required environment variables not found. Have you set a .env?") - + self.server_url = f'{self.protocol}://{self.host}:{self.port}' - + if not self.token: print("Please run get_token and set your token") else: self.ea = ETAPI(self.server_url, self.token) - + self.new_notes = None + self.note_content = None + def get_token(self): ea = ETAPI(self.server_url) + if self.tril_pass == None: + raise ValueError("Trillium password can not be none") token = ea.login(self.tril_pass) print(token) print("I would recomend you update the env file with this tootsweet!") @@ -40,10 +44,11 @@ class TrilumNotes: def get_notes_content(self): content_dict = {} + if self.new_notes is None: + raise ValueError("How did you do this? new_notes is None!") for note in self.new_notes['results']: - content_dict[note['noteId']] = {"title" : f"{note['title']}", + content_dict[note['noteId']] = {"title" : f"{note['title']}", "content" : f"{self._get_content(note['noteId'])}" } self.note_content = content_dict return content_dict -