hard reset for the repo work

2025-05-30 17:20:58 +10:00 · 2025-05-30 17:20:58 +10:00 · 0005ad1fd3
commit 0005ad1fd3
parent 446978704d
5 changed files with 264 additions and 115 deletions
--- a/src/ai_generators/ollama_md_generator.py
+++ b/src/ai_generators/ollama_md_generator.py
@ -1,44 +1,151 @@
-import os
+import os, re, json, random, time, string
 from ollama import Client
-import re
+import chromadb
 from langchain_ollama import ChatOllama
 class OllamaGenerator:
-    def __init__(self, title: str, content: str, model: str):
+    def __init__(self, title: str, content: str, inner_title: str):
        self.title = title
        self.inner_title = inner_title
        self.content = content
        self.response = None
        self.chroma = chromadb.HttpClient(host="172.18.0.2", port=8000)
        ollama_url = f"{os.environ["OLLAMA_PROTOCOL"]}://{os.environ["OLLAMA_HOST"]}:{os.environ["OLLAMA_PORT"]}"
        self.ollama_client = Client(host=ollama_url)
-        self.ollama_model = model
+        self.ollama_model = os.environ["EDITOR_MODEL"]
-
+        self.embed_model = os.environ["EMBEDDING_MODEL"]
-    def generate_markdown(self) -> str:
+        self.agent_models = json.loads(os.environ["CONTENT_CREATOR_MODELS"])
-
+        self.llm = ChatOllama(model=self.ollama_model, temperature=0.6, top_p=0.5) #This  is the level head in the room
-        prompt = f"""
+        self.prompt_inject = f"""
-            You are a Software Developer and DevOps expert
+            You are a journalist, Software Developer and DevOps expert
-            who has transistioned in Developer Relations 
+            writing a 1000 word draft blog for other tech enthusiasts.
            writing a 1000 word blog for other tech enthusiast.
            You like to use almost no code examples and prefer to talk
-            in a light comedic tone. You are also Australian 
+            in a light comedic tone. You are also Australian
            As this person write this blog as a markdown document.
-            The title for the blog is {self.title}.
+            The title for the blog is {self.inner_title}.
            Do not output the title in the markdown.
            The basis for the content of the blog is:
                {self.content}
-            Only output markdown DO NOT GENERATE AN EXPLANATION
+            """
    def split_into_chunks(self, text, chunk_size=100):
        '''Split text into chunks of size chunk_size'''
        words = re.findall(r'\S+', text)
        chunks = []
        current_chunk = []
        word_count = 0
        for word in words:
            current_chunk.append(word)
            word_count += 1
        if word_count >= chunk_size:
            chunks.append(' '.join(current_chunk))
            current_chunk = []
            word_count = 0
        if current_chunk:
            chunks.append(' '.join(current_chunk))
        return chunks
    def generate_draft(self, model) -> str:
        '''Generate a draft blog post using the specified model'''
        try:
            # the idea behind this is to make the "creativity" random amongst the content creators
            # contorlling temperature will allow cause the output to allow more "random" connections in sentences
            # Controlling top_p will tighten or loosen the embedding connections made
            # The result should be varied levels of "creativity" in the writing of the drafts
            # for more see https://python.langchain.com/v0.2/api_reference/ollama/chat_models/langchain_ollama.chat_models.ChatOllama.html
            temp = random.uniform(0.5, 1.0)
            top_p = random.uniform(0.4, 0.8)
            top_k = int(random.uniform(30, 80))
            agent_llm = ChatOllama(model=model, temperature=temp, top_p=top_p, top_k=top_k)
            messages = [
                ("system", self.prompt_inject),
                ("human", "make the blog post in a format to be edited easily" )
            ]
            response = agent_llm.invoke(messages)
            # self.response = self.ollama_client.chat(model=model,
            #                                         messages=[
            #         {
            #             'role': 'user',
            #             'content': f'{self.prompt_inject}',
            #         },
            #     ])
            #print ("draft")
            #print (response)
            return response.text()#['message']['content']
        except Exception as e:
            raise Exception(f"Failed to generate blog draft: {e}")
    def get_draft_embeddings(self, draft_chunks):
        '''Get embeddings for the draft chunks'''
        embeds = self.ollama_client.embed(model=self.embed_model, input=draft_chunks)
        return embeds.get('embeddings', [])
    def id_generator(self, size=6, chars=string.ascii_uppercase + string.digits):
        return ''.join(random.choice(chars) for _ in range(size))
    def load_to_vector_db(self):
        '''Load the generated blog drafts into a vector database'''
        collection_name = f"blog_{self.title.lower().replace(" ", "_")}_{self.id_generator()}"
        collection = self.chroma.get_or_create_collection(name=collection_name)#, metadata={"hnsw:space": "cosine"})
        #if any(collection.name == collectionname for collectionname in self.chroma.list_collections()):
        #    self.chroma.delete_collection("blog_creator")
        for model in self.agent_models:
            print (f"Generating draft from {model} for load into vector database")
            draft_chunks = self.split_into_chunks(self.generate_draft(model))
            print(f"generating embeds")
            embeds = self.get_draft_embeddings(draft_chunks)
            ids = [model + str(i) for i in range(len(draft_chunks))]
            chunknumber = list(range(len(draft_chunks)))
            metadata = [{"model_agent": model} for index in chunknumber]
            print(f'loading into collection')
            collection.add(documents=draft_chunks, embeddings=embeds, ids=ids, metadatas=metadata)
        return collection
    def generate_markdown(self) -> str:
        prompt_system = f"""
            You are an editor taking information from {len(self.agent_models)} Software
            Developers and Data experts
            writing a 3000 word blog for other tech enthusiasts.
            You like when they use almost no code examples and the
            voice is in a light comedic tone. You are also Australian
            As this person produce and an amalgamtion of this blog as a markdown document.
            The title for the blog is {self.inner_title}.
            Do not output the title in the markdown. Avoid repeated sentences
            The basis for the content of the blog is:
                {self.content}
            """
        try:
-            self.response = self.ollama_client.chat(model=self.ollama_model,
+            query_embed = self.ollama_client.embed(model=self.embed_model, input=prompt_system)['embeddings']
-                                                    messages=[
+            collection = self.load_to_vector_db()
-                    {
+            collection_query = collection.query(query_embeddings=query_embed, n_results=100)
-                        'role': 'user',
+            print("Showing pertinent info from drafts used in final edited edition")
-                        'content': f'{prompt}',
+            pertinent_draft_info = '\n\n'.join(collection.query(query_embeddings=query_embed, n_results=100)['documents'][0])
-                    },
+            #print(pertinent_draft_info)
-                ])
+            prompt_human = f"Generate the final document using this information from the drafts: {pertinent_draft_info} - ONLY OUTPUT THE MARKDOWN"
-            
+            print("Generating final document")
-            # the deepseek model returns <think> this removes those tabs from the output
+            messages = [("system", prompt_system), ("human", prompt_human),]
-            # return re.sub(r"<think|.\n\r+?|([^;]*)\/think>",'',self.response['message']['content'])
+            self.response = self.llm.invoke(messages).text()
-            return self.response['message']['content']
+            # self.response = self.ollama_client.chat(model=self.ollama_model,
-
+            #                                         messages=[
            #         {
            #             'role': 'user',
            #             'content': f'{prompt_enhanced}',
            #         },
            #     ])
            #print ("Markdown Generated")
            #print (self.response)
            return self.response#['message']['content']
        except Exception as e:
            raise Exception(f"Failed to generate markdown: {e}")
@ -47,3 +154,9 @@ class OllamaGenerator:
        with open(filename, "w") as f:
            f.write(self.generate_markdown())
    def generate_commit_message(self):
        prompt_system = "You are a blog creator commiting a piece of content to a central git repo"
        prompt_human = f"Generate a 5 word git commit message describing {self.response}"
        messages = [("system", prompt_system), ("human", prompt_human),]
        commit_message = self.llm.invoke(messages).text()
        return commit_message
--- a/src/main.py
+++ b/src/main.py
@ -1,5 +1,7 @@
 import ai_generators.ollama_md_generator as omg
 import trilium.notes as tn
 import repo_management.repo_manager as git_repo
 import string,os
 tril = tn.TrilumNotes()
@ -7,16 +9,26 @@ tril.get_new_notes()
 tril_notes = tril.get_notes_content()
-def convert_to_lowercase_with_underscores(string):
+def convert_to_lowercase_with_underscores(s):
-    return string.lower().replace(" ", "_")
+    allowed = set(string.ascii_letters + string.digits + ' ')
    filtered_string = ''.join(c for c in s if c in allowed)
    return filtered_string.lower().replace(" ", "_")
 for note in tril_notes:
    print(tril_notes[note]['title'])
    # print(tril_notes[note]['content'])
    print("Generating Document")
-    ai_gen = omg.OllamaGenerator(tril_notes[note]['title'],
+
                                 tril_notes[note]['content'],
                                 "deepseek-r1:7b")
    os_friendly_title = convert_to_lowercase_with_underscores(tril_notes[note]['title'])
-    ai_gen.save_to_file(f"./generated_files/{os_friendly_title}.md")
+    ai_gen = omg.OllamaGenerator(os_friendly_title,
                                 tril_notes[note]['content'],
                                 tril_notes[note]['title'])
    blog_path = f"/blog_creator/generated_files/{os_friendly_title}.md"
    ai_gen.save_to_file(blog_path)
    # Generate commit messages and push to repo
    commit_message = ai_gen.generate_commit_message()
    git_user = os.environ["GIT_USER"]
    git_pass = os.environ["GIT_PASS"]
    repo_manager = git_repo.GitRepository("blog/", git_user, git_pass)
    repo_manager.create_copy_commit_push(blog_path, os_friendly_title, commit_message)
--- a/src/repo_management/push_markdown.py
+++ b/src/repo_management/push_markdown.py
@ -1,48 +0,0 @@
 import os
 import sys
 from git import Repo
 # Set these variables accordingly
 REPO_OWNER = "your_repo_owner"
 REPO_NAME = "your_repo_name"
 def clone_repo(repo_url, branch="main"):
    Repo.clone_from(repo_url, ".", branch=branch)
 def create_markdown_file(file_name, content):
    with open(f"{file_name}.md", "w") as f:
        f.write(content)
 def commit_and_push(file_name, message):
    repo = Repo(".")
    repo.index.add([f"{file_name}.md"])
    repo.index.commit(message)
    repo.remote().push()
 def create_new_branch(branch_name):
    repo = Repo(".")
    repo.create_head(branch_name).checkout()
    repo.head.reference.set_tracking_url(f"https://your_git_server/{REPO_OWNER}/{REPO_NAME}.git/{branch_name}")
    repo.remote().push()
 if __name__ == "__main__":
    if len(sys.argv) < 3:
        print("Usage: python push_markdown.py <repo_url> <markdown_file_name>")
        sys.exit(1)
    repo_url = sys.argv[1]
    file_name = sys.argv[2]
    # Clone the repository
    clone_repo(repo_url)
    # Create a new Markdown file with content
    create_markdown_file(file_name, "Hello, World!\n")
    # Commit and push changes to the main branch
    commit_and_push(file_name, f"Add {file_name}.md")
    # Create a new branch named after the Markdown file
    create_new_branch(file_name)
    print(f"Successfully created '{file_name}' branch with '{file_name}.md'.")
--- a/src/repo_management/repo_manager.py
+++ b/src/repo_management/repo_manager.py
@ -1,35 +1,102 @@
-import os
+import os, shutil
-from git import Git
+from urllib.parse import quote
-from git.repo import BaseRepository
+from git import Repo
-from git.exc import InvalidGitRepositoryError
+from git.exc import GitCommandError
 from git.remote import RemoteAction
-# Set the path to your blog repo here
+class GitRepository:
-blog_repo = "/path/to/your/blog/repo"
+    # This is designed to be transitory it will desctruvtively create the repo at repo_path
    # if you have uncommited changes you can kiss them goodbye!
    # Don't use the repo created by this function for dev -> its a tool!
    # It is expected that  when used you will add, commit, push, delete
    def __init__(self, repo_path, username=None, password=None):
        git_protocol = os.environ["GIT_PROTOCOL"]
        git_remote = os.environ["GIT_REMOTE"]
        #if username is not set we don't need parse to the url
        if username==None or password == None:
            remote = f"{git_protocol}://{git_remote}"
        else:
            # of course if it is we need to parse and escape it so that it
            # can generate a url
            git_user = quote(username)
            git_password = quote(password)
            remote = f"{git_protocol}://{git_user}:{git_password}@{git_remote}"
-# Checkout a new branch and create a new file for our blog post
+        if os.path.exists(repo_path):
-branch_name = "new-post"
+            shutil.rmtree(repo_path)
-try:
+        self.repo_path = repo_path
-    repo = Git(blog_repo)
+        print("Cloning Repo")
-    repo.checkout("-b", branch_name, "origin/main")
+        Repo.clone_from(remote, repo_path)
-    with open("my-blog-post.md", "w") as f:
+        self.repo = Repo(repo_path)
-        f.write(content)
+        self.username = username
-except InvalidGitRepositoryError:
+        self.password = password
    # Handle repository errors gracefully
    pass
-# Add and commit the changes to Git
+    def clone(self, remote_url, destination_path):
-repo.add("my-blog-post.md")
+        """Clone a Git repository with authentication"""
-repo.commit("-m", "Added new blog post about DevOps best practices.")
+        try:
            self.repo.clone(remote_url, destination_path)
            return True
        except GitCommandError as e:
            print(f"Cloning failed: {e}")
            return False
-# Push the changes to Git and create a PR
+    def fetch(self, remote_name='origin', ref_name='main'):
-repo.remote().push("refs/heads/{0}:refs/for/main".format(branch_name), "--set-upstream")
+        """Fetch updates from a remote repository with authentication"""
-base_branch = "origin/main"
+        try:
-target_branch = "main"
+            self.repo.remotes[remote_name].fetch(ref_name=ref_name)
-pr_title = "DevOps best practices"
+            return True
-try:
+        except GitCommandError as e:
-    repo.create_head("{0}-{1}", base=base_branch, message="{}".format(pr_title))
+            print(f"Fetching failed: {e}")
-except RemoteAction.GitExitStatus as e:
+            return False
    # Handle Git exit status errors gracefully
    pass
    def pull(self, remote_name='origin', ref_name='main'):
        """Pull updates from a remote repository with authentication"""
        print("Pulling Latest Updates (if any)")
        try:
            self.repo.remotes[remote_name].pull(ref_name)
            return True
        except GitCommandError as e:
            print(f"Pulling failed: {e}")
            return False
    def get_branches(self):
        """List all branches in the repository"""
        return [branch.name for branch in self.repo.branches]
    def create_and_switch_branch(self, branch_name, remote_name='origin', ref_name='main'):
        """Create a new branch in the repository with authentication."""
        try:
            print(f"Creating Branch {branch_name}")
            # Use the same remote and ref as before
            self.repo.git.branch(branch_name)
        except GitCommandError:
            print("Branch already exists switching")
            # ensure remote commits are pulled into local
        self.repo.git.checkout(branch_name)
    def add_and_commit(self, message=None):
        """Add and commit changes to the repository."""
        try:
            print("Commiting latest draft")
            # Add all changes
            self.repo.git.add(all=True)
            # Commit with the provided message or a default
            if message is None:
                commit_message = "Added and committed new content"
            else:
                commit_message = message
            self.repo.git.commit(message=commit_message)
            return True
        except GitCommandError as e:
            print(f"Commit failed: {e}")
            return False
    def create_copy_commit_push(self, file_path, title, commit_messge):
        self.create_and_switch_branch(title)
        self.pull(ref_name=title)
        shutil.copy(f"{file_path}", f"{self.repo_path}src/content/")
        self.add_and_commit(f"'{commit_messge}'")
        self.repo.git.push()
--- a/src/trilium/notes.py
+++ b/src/trilium/notes.py
@ -11,16 +11,20 @@ class TrilumNotes:
        self.token = os.environ.get('TRILIUM_TOKEN')
        if not all([self.protocol, self.host, self.port, self.tril_pass]):
            print("One or more required environment variables not found. Have you set a .env?")
-        
+
        self.server_url = f'{self.protocol}://{self.host}:{self.port}'
-        
+
        if not self.token:
            print("Please run get_token and set your token")
        else:
            self.ea = ETAPI(self.server_url, self.token)
-    
+        self.new_notes = None
        self.note_content = None
    def get_token(self):
        ea = ETAPI(self.server_url)
        if self.tril_pass == None:
            raise ValueError("Trillium password can not be none")
        token = ea.login(self.tril_pass)
        print(token)
        print("I would recomend you update the env file with this tootsweet!")
@ -40,10 +44,11 @@ class TrilumNotes:
    def get_notes_content(self):
        content_dict = {}
        if self.new_notes is None:
            raise ValueError("How did you do this? new_notes is None!")
        for note in self.new_notes['results']:
-            content_dict[note['noteId']] = {"title" : f"{note['title']}", 
+            content_dict[note['noteId']] = {"title" : f"{note['title']}",
                                            "content" : f"{self._get_content(note['noteId'])}"
                                            }
        self.note_content = content_dict
        return content_dict