Merge pull request 'repo_work_fix' (#2) from repo_work_fix into master

Reviewed-on: #2
2025-05-30 17:47:31 +10:00 · 2025-05-30 17:47:31 +10:00 · 9a9228bc07
commit 9a9228bc07
parent 446978704d 2dd371408f
10 changed files with 334 additions and 133 deletions
--- a/.gitignore
+++ b/.gitignore
@ -3,3 +3,7 @@ __pycache__
 .venv
 .aider*
 .vscode
+.zed
+pyproject.toml
+.ropeproject
+generated_files/*
--- a/8
+++ b/8
@ -7,8 +7,12 @@ ENV PYTHONUNBUFFERED 1

 ADD src/ /blog_creator

-RUN apt-get update && apt-get install -y rustc cargo python-is-python3 pip python3.12-venv libmagic-dev 
-
+RUN apt-get update && apt-get install -y rustc cargo python-is-python3 pip python3-venv libmagic-dev git
+# Need to set up git here or we get funky errors
+RUN git config --global user.name "Blog Creator"
+RUN git config --global user.email "ridgway.infrastructure@gmail.com"
+RUN git config --global push.autoSetupRemote true
+#Get a python venv going as well cause safety
 RUN python -m venv /opt/venv
 ENV PATH="/opt/venv/bin:$PATH"

--- a/README.md
+++ b/README.md
@ -3,10 +3,19 @@
 This creator requires you to use a working Trilium Instance and create a .env file with the following

 ```
-TRILIUM_HOST
-TRILIUM_PORT
-TRILIUM_PROTOCOL
-TRILIUM_PASS
+TRILIUM_HOST=
+TRILIUM_PORT=
+TRILIUM_PROTOCOL=
+TRILIUM_PASS=
+TRILIUM_TOKEN=
+OLLAMA_PROTOCOL=
+OLLAMA_HOST=
+OLLAMA_PORT=11434
+EMBEDDING_MODEL=
+EDITOR_MODEL=
+# This is expected in python list format example `[phi4-mini:latest, qwen3:1.7b, gemma3:latest]`
+CONTENT_CREATOR_MODELS=
+CHROMA_SERVER=<IP_ADDRESS>
 ```

 This container is going to be what I use to trigger a blog creation event
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -1,3 +1,7 @@
+networks:
+    net:
+        driver: bridge
+
 services:
    blog_creator:
        build:
@ -8,4 +12,33 @@ services:
            - .env
        volumes:
            - ./generated_files/:/blog_creator/generated_files
+        networks:
+            - net

+    chroma:
+        image: chromadb/chroma
+        container_name: chroma
+        volumes:
+            # Be aware that indexed data are located in "/chroma/chroma/"
+            # Default configuration for persist_directory in chromadb/config.py
+            # Read more about deployments: https://docs.trychroma.com/deployment
+            - chroma-data:/chroma/chroma
+        #command: "--host 0.0.0.0 --port 8000 --proxy-headers --log-config chromadb/log_config.yml --timeout-keep-alive 30"
+        environment:
+            - IS_PERSISTENT=TRUE
+        restart: unless-stopped # possible values are: "no", always", "on-failure", "unless-stopped"
+        ports:
+            - "8000:8000"
+        healthcheck:
+            # Adjust below to match your container port
+            test:
+                ["CMD", "curl", "-f", "http://localhost:8000/api/v2/heartbeat"]
+            interval: 30s
+            timeout: 10s
+            retries: 3
+        networks:
+            - net
+
+volumes:
+    chroma-data:
+        driver: local
--- a/requirements.txt
+++ b/requirements.txt
@ -2,3 +2,5 @@ ollama
 trilium-py
 gitpython
 PyGithub
+chromadb
+langchain-ollama
--- a/src/ai_generators/ollama_md_generator.py
+++ b/src/ai_generators/ollama_md_generator.py
@ -1,44 +1,151 @@
-import os
+import os, re, json, random, time, string
 from ollama import Client
-import re
+import chromadb
+from langchain_ollama import ChatOllama
+

 class OllamaGenerator:

-    def __init__(self, title: str, content: str, model: str):
+    def __init__(self, title: str, content: str, inner_title: str):
        self.title = title
+        self.inner_title = inner_title
        self.content = content
+        self.response = None
+        self.chroma = chromadb.HttpClient(host="172.18.0.2", port=8000)
        ollama_url = f"{os.environ["OLLAMA_PROTOCOL"]}://{os.environ["OLLAMA_HOST"]}:{os.environ["OLLAMA_PORT"]}"
        self.ollama_client = Client(host=ollama_url)
-        self.ollama_model = model
-
-    def generate_markdown(self) -> str:
-
-        prompt = f"""
-            You are a Software Developer and DevOps expert
-            who has transistioned in Developer Relations 
-            writing a 1000 word blog for other tech enthusiast.
+        self.ollama_model = os.environ["EDITOR_MODEL"]
+        self.embed_model = os.environ["EMBEDDING_MODEL"]
+        self.agent_models = json.loads(os.environ["CONTENT_CREATOR_MODELS"])
+        self.llm = ChatOllama(model=self.ollama_model, temperature=0.6, top_p=0.5) #This  is the level head in the room
+        self.prompt_inject = f"""
+            You are a journalist, Software Developer and DevOps expert
+            writing a 1000 word draft blog for other tech enthusiasts.
            You like to use almost no code examples and prefer to talk
            in a light comedic tone. You are also Australian
            As this person write this blog as a markdown document.
-            The title for the blog is {self.title}.
+            The title for the blog is {self.inner_title}.
            Do not output the title in the markdown.
            The basis for the content of the blog is:
                {self.content}
-            Only output markdown DO NOT GENERATE AN EXPLANATION
+            """
+
+    def split_into_chunks(self, text, chunk_size=100):
+        '''Split text into chunks of size chunk_size'''
+        words = re.findall(r'\S+', text)
+
+        chunks = []
+        current_chunk = []
+        word_count = 0
+
+        for word in words:
+            current_chunk.append(word)
+            word_count += 1
+
+        if word_count >= chunk_size:
+            chunks.append(' '.join(current_chunk))
+            current_chunk = []
+            word_count = 0
+
+        if current_chunk:
+            chunks.append(' '.join(current_chunk))
+
+        return chunks
+
+    def generate_draft(self, model) -> str:
+        '''Generate a draft blog post using the specified model'''
+        try:
+            # the idea behind this is to make the "creativity" random amongst the content creators
+            # contorlling temperature will allow cause the output to allow more "random" connections in sentences
+            # Controlling top_p will tighten or loosen the embedding connections made
+            # The result should be varied levels of "creativity" in the writing of the drafts
+            # for more see https://python.langchain.com/v0.2/api_reference/ollama/chat_models/langchain_ollama.chat_models.ChatOllama.html
+            temp = random.uniform(0.5, 1.0)
+            top_p = random.uniform(0.4, 0.8)
+            top_k = int(random.uniform(30, 80))
+            agent_llm = ChatOllama(model=model, temperature=temp, top_p=top_p, top_k=top_k)
+            messages = [
+                ("system", self.prompt_inject),
+                ("human", "make the blog post in a format to be edited easily" )
+            ]
+            response = agent_llm.invoke(messages)
+            # self.response = self.ollama_client.chat(model=model,
+            #                                         messages=[
+            #         {
+            #             'role': 'user',
+            #             'content': f'{self.prompt_inject}',
+            #         },
+            #     ])
+            #print ("draft")
+            #print (response)
+            return response.text()#['message']['content']
+
+        except Exception as e:
+            raise Exception(f"Failed to generate blog draft: {e}")
+
+    def get_draft_embeddings(self, draft_chunks):
+        '''Get embeddings for the draft chunks'''
+        embeds = self.ollama_client.embed(model=self.embed_model, input=draft_chunks)
+        return embeds.get('embeddings', [])
+
+    def id_generator(self, size=6, chars=string.ascii_uppercase + string.digits):
+        return ''.join(random.choice(chars) for _ in range(size))
+
+    def load_to_vector_db(self):
+        '''Load the generated blog drafts into a vector database'''
+        collection_name = f"blog_{self.title.lower().replace(" ", "_")}_{self.id_generator()}"
+        collection = self.chroma.get_or_create_collection(name=collection_name)#, metadata={"hnsw:space": "cosine"})
+        #if any(collection.name == collectionname for collectionname in self.chroma.list_collections()):
+        #    self.chroma.delete_collection("blog_creator")
+        for model in self.agent_models:
+            print (f"Generating draft from {model} for load into vector database")
+            draft_chunks = self.split_into_chunks(self.generate_draft(model))
+            print(f"generating embeds")
+            embeds = self.get_draft_embeddings(draft_chunks)
+            ids = [model + str(i) for i in range(len(draft_chunks))]
+            chunknumber = list(range(len(draft_chunks)))
+            metadata = [{"model_agent": model} for index in chunknumber]
+            print(f'loading into collection')
+            collection.add(documents=draft_chunks, embeddings=embeds, ids=ids, metadatas=metadata)
+
+        return collection
+
+
+    def generate_markdown(self) -> str:
+
+        prompt_system = f"""
+            You are an editor taking information from {len(self.agent_models)} Software
+            Developers and Data experts
+            writing a 3000 word blog for other tech enthusiasts.
+            You like when they use almost no code examples and the
+            voice is in a light comedic tone. You are also Australian
+            As this person produce and an amalgamtion of this blog as a markdown document.
+            The title for the blog is {self.inner_title}.
+            Do not output the title in the markdown. Avoid repeated sentences
+            The basis for the content of the blog is:
+                {self.content}
            """
        try:
-            self.response = self.ollama_client.chat(model=self.ollama_model,
-                                                    messages=[
-                    {
-                        'role': 'user',
-                        'content': f'{prompt}',
-                    },
-                ])
-            
-            # the deepseek model returns <think> this removes those tabs from the output
-            # return re.sub(r"<think|.\n\r+?|([^;]*)\/think>",'',self.response['message']['content'])
-            return self.response['message']['content']
-
+            query_embed = self.ollama_client.embed(model=self.embed_model, input=prompt_system)['embeddings']
+            collection = self.load_to_vector_db()
+            collection_query = collection.query(query_embeddings=query_embed, n_results=100)
+            print("Showing pertinent info from drafts used in final edited edition")
+            pertinent_draft_info = '\n\n'.join(collection.query(query_embeddings=query_embed, n_results=100)['documents'][0])
+            #print(pertinent_draft_info)
+            prompt_human = f"Generate the final document using this information from the drafts: {pertinent_draft_info} - ONLY OUTPUT THE MARKDOWN"
+            print("Generating final document")
+            messages = [("system", prompt_system), ("human", prompt_human),]
+            self.response = self.llm.invoke(messages).text()
+            # self.response = self.ollama_client.chat(model=self.ollama_model,
+            #                                         messages=[
+            #         {
+            #             'role': 'user',
+            #             'content': f'{prompt_enhanced}',
+            #         },
+            #     ])
+            #print ("Markdown Generated")
+            #print (self.response)
+            return self.response#['message']['content']

        except Exception as e:
            raise Exception(f"Failed to generate markdown: {e}")
@ -47,3 +154,9 @@ class OllamaGenerator:
        with open(filename, "w") as f:
            f.write(self.generate_markdown())

+    def generate_commit_message(self):
+        prompt_system = "You are a blog creator commiting a piece of content to a central git repo"
+        prompt_human = f"Generate a 5 word git commit message describing {self.response}"
+        messages = [("system", prompt_system), ("human", prompt_human),]
+        commit_message = self.llm.invoke(messages).text()
+        return commit_message
--- a/src/main.py
+++ b/src/main.py
@ -1,5 +1,7 @@
 import ai_generators.ollama_md_generator as omg
 import trilium.notes as tn
+import repo_management.repo_manager as git_repo
+import string,os

 tril = tn.TrilumNotes()

@ -7,16 +9,26 @@ tril.get_new_notes()
 tril_notes = tril.get_notes_content()


-def convert_to_lowercase_with_underscores(string):
-    return string.lower().replace(" ", "_")
+def convert_to_lowercase_with_underscores(s):
+    allowed = set(string.ascii_letters + string.digits + ' ')
+    filtered_string = ''.join(c for c in s if c in allowed)
+    return filtered_string.lower().replace(" ", "_")


 for note in tril_notes:
    print(tril_notes[note]['title'])
    # print(tril_notes[note]['content'])
    print("Generating Document")
-    ai_gen = omg.OllamaGenerator(tril_notes[note]['title'],
-                                 tril_notes[note]['content'],
-                                 "deepseek-r1:7b")
+
    os_friendly_title = convert_to_lowercase_with_underscores(tril_notes[note]['title'])
-    ai_gen.save_to_file(f"./generated_files/{os_friendly_title}.md")
+    ai_gen = omg.OllamaGenerator(os_friendly_title,
+                                 tril_notes[note]['content'],
+                                 tril_notes[note]['title'])
+    blog_path = f"/blog_creator/generated_files/{os_friendly_title}.md"
+    ai_gen.save_to_file(blog_path)
+    # Generate commit messages and push to repo
+    commit_message = ai_gen.generate_commit_message()
+    git_user = os.environ["GIT_USER"]
+    git_pass = os.environ["GIT_PASS"]
+    repo_manager = git_repo.GitRepository("blog/", git_user, git_pass)
+    repo_manager.create_copy_commit_push(blog_path, os_friendly_title, commit_message)
--- a/src/repo_management/push_markdown.py
+++ b/src/repo_management/push_markdown.py
@ -1,48 +0,0 @@
-import os
-import sys
-from git import Repo
-
-# Set these variables accordingly
-REPO_OWNER = "your_repo_owner"
-REPO_NAME = "your_repo_name"
-
-def clone_repo(repo_url, branch="main"):
-    Repo.clone_from(repo_url, ".", branch=branch)
-
-def create_markdown_file(file_name, content):
-    with open(f"{file_name}.md", "w") as f:
-        f.write(content)
-
-def commit_and_push(file_name, message):
-    repo = Repo(".")
-    repo.index.add([f"{file_name}.md"])
-    repo.index.commit(message)
-    repo.remote().push()
-
-def create_new_branch(branch_name):
-    repo = Repo(".")
-    repo.create_head(branch_name).checkout()
-    repo.head.reference.set_tracking_url(f"https://your_git_server/{REPO_OWNER}/{REPO_NAME}.git/{branch_name}")
-    repo.remote().push()
-
-if __name__ == "__main__":
-    if len(sys.argv) < 3:
-        print("Usage: python push_markdown.py <repo_url> <markdown_file_name>")
-        sys.exit(1)
-
-    repo_url = sys.argv[1]
-    file_name = sys.argv[2]
-
-    # Clone the repository
-    clone_repo(repo_url)
-
-    # Create a new Markdown file with content
-    create_markdown_file(file_name, "Hello, World!\n")
-
-    # Commit and push changes to the main branch
-    commit_and_push(file_name, f"Add {file_name}.md")
-
-    # Create a new branch named after the Markdown file
-    create_new_branch(file_name)
-
-    print(f"Successfully created '{file_name}' branch with '{file_name}.md'.")
--- a/src/repo_management/repo_manager.py
+++ b/src/repo_management/repo_manager.py
@ -1,35 +1,102 @@
-import os
-from git import Git
-from git.repo import BaseRepository
-from git.exc import InvalidGitRepositoryError
-from git.remote import RemoteAction
+import os, shutil
+from urllib.parse import quote
+from git import Repo
+from git.exc import GitCommandError

-# Set the path to your blog repo here
-blog_repo = "/path/to/your/blog/repo"
+class GitRepository:
+    # This is designed to be transitory it will desctruvtively create the repo at repo_path
+    # if you have uncommited changes you can kiss them goodbye!
+    # Don't use the repo created by this function for dev -> its a tool!
+    # It is expected that  when used you will add, commit, push, delete
+    def __init__(self, repo_path, username=None, password=None):
+        git_protocol = os.environ["GIT_PROTOCOL"]
+        git_remote = os.environ["GIT_REMOTE"]
+        #if username is not set we don't need parse to the url
+        if username==None or password == None:
+            remote = f"{git_protocol}://{git_remote}"
+        else:
+            # of course if it is we need to parse and escape it so that it
+            # can generate a url
+            git_user = quote(username)
+            git_password = quote(password)
+            remote = f"{git_protocol}://{git_user}:{git_password}@{git_remote}"

-# Checkout a new branch and create a new file for our blog post
-branch_name = "new-post"
-try:
-    repo = Git(blog_repo)
-    repo.checkout("-b", branch_name, "origin/main")
-    with open("my-blog-post.md", "w") as f:
-        f.write(content)
-except InvalidGitRepositoryError:
-    # Handle repository errors gracefully
-    pass
+        if os.path.exists(repo_path):
+            shutil.rmtree(repo_path)
+        self.repo_path = repo_path
+        print("Cloning Repo")
+        Repo.clone_from(remote, repo_path)
+        self.repo = Repo(repo_path)
+        self.username = username
+        self.password = password

-# Add and commit the changes to Git
-repo.add("my-blog-post.md")
-repo.commit("-m", "Added new blog post about DevOps best practices.")
+    def clone(self, remote_url, destination_path):
+        """Clone a Git repository with authentication"""
+        try:
+            self.repo.clone(remote_url, destination_path)
+            return True
+        except GitCommandError as e:
+            print(f"Cloning failed: {e}")
+            return False

-# Push the changes to Git and create a PR
-repo.remote().push("refs/heads/{0}:refs/for/main".format(branch_name), "--set-upstream")
-base_branch = "origin/main"
-target_branch = "main"
-pr_title = "DevOps best practices"
-try:
-    repo.create_head("{0}-{1}", base=base_branch, message="{}".format(pr_title))
-except RemoteAction.GitExitStatus as e:
-    # Handle Git exit status errors gracefully
-    pass
+    def fetch(self, remote_name='origin', ref_name='main'):
+        """Fetch updates from a remote repository with authentication"""
+        try:
+            self.repo.remotes[remote_name].fetch(ref_name=ref_name)
+            return True
+        except GitCommandError as e:
+            print(f"Fetching failed: {e}")
+            return False

+    def pull(self, remote_name='origin', ref_name='main'):
+        """Pull updates from a remote repository with authentication"""
+        print("Pulling Latest Updates (if any)")
+        try:
+            self.repo.remotes[remote_name].pull(ref_name)
+            return True
+        except GitCommandError as e:
+            print(f"Pulling failed: {e}")
+            return False
+
+    def get_branches(self):
+        """List all branches in the repository"""
+        return [branch.name for branch in self.repo.branches]
+
+
+    def create_and_switch_branch(self, branch_name, remote_name='origin', ref_name='main'):
+        """Create a new branch in the repository with authentication."""
+        try:
+            print(f"Creating Branch {branch_name}")
+            # Use the same remote and ref as before
+            self.repo.git.branch(branch_name)
+        except GitCommandError:
+            print("Branch already exists switching")
+            # ensure remote commits are pulled into local
+        self.repo.git.checkout(branch_name)
+
+    def add_and_commit(self, message=None):
+        """Add and commit changes to the repository."""
+        try:
+            print("Commiting latest draft")
+            # Add all changes
+            self.repo.git.add(all=True)
+            # Commit with the provided message or a default
+            if message is None:
+                commit_message = "Added and committed new content"
+            else:
+                commit_message = message
+            self.repo.git.commit(message=commit_message)
+            return True
+        except GitCommandError as e:
+            print(f"Commit failed: {e}")
+            return False
+
+    def create_copy_commit_push(self, file_path, title, commit_messge):
+        self.create_and_switch_branch(title)
+
+        self.pull(ref_name=title)
+        shutil.copy(f"{file_path}", f"{self.repo_path}src/content/")
+
+        self.add_and_commit(f"'{commit_messge}'")
+
+        self.repo.git.push()
--- a/src/trilium/notes.py
+++ b/src/trilium/notes.py
@ -18,9 +18,13 @@ class TrilumNotes:
            print("Please run get_token and set your token")
        else:
            self.ea = ETAPI(self.server_url, self.token)
+        self.new_notes = None
+        self.note_content = None

    def get_token(self):
        ea = ETAPI(self.server_url)
+        if self.tril_pass == None:
+            raise ValueError("Trillium password can not be none")
        token = ea.login(self.tril_pass)
        print(token)
        print("I would recomend you update the env file with this tootsweet!")
@ -40,10 +44,11 @@ class TrilumNotes:

    def get_notes_content(self):
        content_dict = {}
+        if self.new_notes is None:
+            raise ValueError("How did you do this? new_notes is None!")
        for note in self.new_notes['results']:
            content_dict[note['noteId']] = {"title" : f"{note['title']}",
                                            "content" : f"{self._get_content(note['noteId'])}"
                                            }
        self.note_content = content_dict
        return content_dict
-