From 6320571528e2476f0b42a51f8a4bd74bbc9ca2ef Mon Sep 17 00:00:00 2001 From: = <=> Date: Tue, 25 Feb 2025 22:11:45 +1000 Subject: [PATCH 01/40] set up chroma --- Dockerfile | 2 +- docker-compose.yml | 39 +++++ .../creating_an_ollama_blog_writer.md | 143 ++++++++++-------- .../powerbi_and_api_performance.md | 45 ++---- src/main.py | 2 +- src/repo_management/repo_manager.py | 3 + 6 files changed, 142 insertions(+), 92 deletions(-) diff --git a/Dockerfile b/Dockerfile index a220107..fa199f0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,7 +7,7 @@ ENV PYTHONUNBUFFERED 1 ADD src/ /blog_creator -RUN apt-get update && apt-get install -y rustc cargo python-is-python3 pip python3.12-venv libmagic-dev +RUN apt-get update && apt-get install -y rustc cargo python-is-python3 pip python3-venv libmagic-dev RUN python -m venv /opt/venv ENV PATH="/opt/venv/bin:$PATH" diff --git a/docker-compose.yml b/docker-compose.yml index 0e61a87..b61291c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,3 +1,7 @@ +networks: + net: + driver: bridge + services: blog_creator: build: @@ -9,3 +13,38 @@ services: volumes: - ./generated_files/:/blog_creator/generated_files + chroma: + image: chromadb/chroma + volumes: + # Be aware that indexed data are located in "/chroma/chroma/" + # Default configuration for persist_directory in chromadb/config.py + # Read more about deployments: https://docs.trychroma.com/deployment + - chroma-data:/chroma/chroma + command: "--workers 1 --host 0.0.0.0 --port 8000 --proxy-headers --log-config chromadb/log_config.yml --timeout-keep-alive 30" + environment: + - IS_PERSISTENT=TRUE + - CHROMA_SERVER_AUTHN_PROVIDER=${CHROMA_SERVER_AUTHN_PROVIDER} + - CHROMA_SERVER_AUTHN_CREDENTIALS_FILE=${CHROMA_SERVER_AUTHN_CREDENTIALS_FILE} + - CHROMA_SERVER_AUTHN_CREDENTIALS=${CHROMA_SERVER_AUTHN_CREDENTIALS} + - CHROMA_AUTH_TOKEN_TRANSPORT_HEADER=${CHROMA_AUTH_TOKEN_TRANSPORT_HEADER} + - PERSIST_DIRECTORY=${PERSIST_DIRECTORY:-/chroma/chroma} + - CHROMA_OTEL_EXPORTER_ENDPOINT=${CHROMA_OTEL_EXPORTER_ENDPOINT} + - CHROMA_OTEL_EXPORTER_HEADERS=${CHROMA_OTEL_EXPORTER_HEADERS} + - CHROMA_OTEL_SERVICE_NAME=${CHROMA_OTEL_SERVICE_NAME} + - CHROMA_OTEL_GRANULARITY=${CHROMA_OTEL_GRANULARITY} + - CHROMA_SERVER_NOFILE=${CHROMA_SERVER_NOFILE} + restart: unless-stopped # possible values are: "no", always", "on-failure", "unless-stopped" + ports: + - "8001:8000" + healthcheck: + # Adjust below to match your container port + test: [ "CMD", "curl", "-f", "http://localhost:8000/api/v2/heartbeat" ] + interval: 30s + timeout: 10s + retries: 3 + networks: + - net + +volumes: + chroma-data: + driver: local \ No newline at end of file diff --git a/generated_files/creating_an_ollama_blog_writer.md b/generated_files/creating_an_ollama_blog_writer.md index 1853f30..0f2b6e1 100644 --- a/generated_files/creating_an_ollama_blog_writer.md +++ b/generated_files/creating_an_ollama_blog_writer.md @@ -1,83 +1,108 @@ -Alright, I've got this query from someone who wants to create an Ollama Blog Writer using Python. Let me break down what they're asking for. +Alright, so I'm trying to figure out how to create this Ollama Blog Writer Python script. Let me break down what needs to be done. -First off, they mention needing a Python file that can communicate with a local Ollama instance. So, I should look into existing libraries or tools that enable communication with Ollama. The user is also interested in integrating Trilium for structured notes as prompts. They've provided a link to the trilium-py GitHub repository, which seems like a good starting point. +First, the user wants a Python file that can communicate with a local Ollama instance. I remember from some previous knowledge that Ollama has a REST API, but maybe there's a more convenient way like using a serialization layer or something else. Oh right! There was a project called `ollama-talk` which allows sending messages to Ollama over HTTP. That sounds perfect. So the first step is to install and use this library. -Next, their goal is to create a blog entry through their GitLab repo by making a branch and submitting a pull request. They want the PR content approved before proceeding further. That suggests they need guidance on structuring this part of their project, possibly including how to implement the API calls for both Ollama and Trilium. +Next, they mentioned connecting to Trilium for structured notes as prompts. I found the repository for trilium-py on GitHub. It looks like it's designed to work with Markdown documents and extract structured notes. The user included a link, so that should help in integrating these notes into the prompts when creating blog entries. -The user also wants to send a notification to their matrix account about the new PR and provide a 20-word summary. This means I'll need to help them craft a concise message that includes these elements. +Then, the Python script needs to create a blog entry in their local repo. They want it to create a branch first, push the changes, and then send a PR notification through Matrix with a summary. I'm not too familiar with Git operations beyond basic commands, but maybe using `git` from the command line would work for creating branches and committing. -Additionally, they specified using almost no code examples in their blog post and adopting a light, comedic tone. They mentioned being Australian, so incorporating some local humor could make the content more relatable. +Putting this together, I think the main components of the script are: -I should ensure that the markdown is properly formatted without any code blocks or markdown language. The title isn't to be included in the output, so I'll focus solely on the content of the blog post. +1. Connect to Ollama via `ollama-talk`. +2. Fetch existing blog posts. +3. Extract structured notes into prompts. +4. Write new blog entries with these prompts. +5. Create a branch in their Git repo for new entries. +6. Push changes and create a PR. +7. Notify the user on Matrix about the PR. -Putting it all together, the structure will start with an introduction explaining the project's purpose and then delve into each component: Ollama communication, Trilium setup, blog entry creation via GitLab, and finally, notifications. Each section should be concise to keep within the 1000-word limit and maintain a friendly tone. +I also need to make sure that all dependencies are installed, like `trilium-py` and `ollama-talk`. I should probably include some error handling in case something goes wrong during the process, but since it's an initial version, maybe just a basic script without too many checks would suffice. -I need to make sure that the instructions are clear but not too technical, avoiding jargon where possible or explaining it when necessary. The humor will come from the relatable examples of writing blog posts and handling PRs with enthusiasm. +Finally, writing the notification for Matrix. They need to send an email-like message with a link to view the PR and a brief summary. The user mentioned a 20-word summary, so I can condense that into something concise. + +I think that covers the main points. Now, time to structure this as a Python script. -# Creating an Ollama Blog Writer +# Creating an Ollama Blog Writer: A Fun Guide for Tech Enthusiasts -Alright, fellow tech enthusiasts! Today, I’m super excited to share how I built a custom Python tool to create my very own Ollama blog writer. It’s basically my personal scribe for tech blogs—except it uses AI to generate content instead of me typing it out. Let me break down the process step by step, because honestly, it’s as much of a rollercoaster as writing a blog post! +In the ever-evolving tech world, tools like Ollama and Trilium are revolutionizing how we create content and organize our thoughts. But did you know there's a way to combine these two incredible technologies into one cohesive solution? Let me walk you through creating an *Ollama Blog Writer*—a tool that lets you generate blog posts with structured notes, all while having fun! -## Step 1: Communicating with Ollama +## Step 1: Set Up Your Environment -First things first, I needed to connect my Python script to a running Ollama instance. Lucky for me, there are some great libraries out there that make this happen. One of my favorites is `ollama-sql` for SQL-like queries and `ollama-py` for general communication. With these tools, I could send requests to Ollama and get back the responses in a structured format. - -For example, if I wanted to ask Ollama about the latest tech trends, I might send something like: -```python -import ollama as Ollama -ollama_instance = Ollama.init() -response = ollama_instance.query("What are the top AI developments this year?") -print(response) -``` - -This would give me a JSON response that I could parse and use for my blog. Easy peasy! - -## Step 2: Integrating Trilium for Structured Notes - -Speaking of which, I also wanted to make sure my blog posts were well-organized. That’s where Trilium comes in—its structured note system is perfect for keeping track of ideas before writing them up. By using prompts based on Trilium entries, my Python script can generate more focused and coherent blog posts. - -For instance, if I had a Trilium entry like: -```json -{ - "id": "123", - "content": "AI in customer service is booming.", - "type": "thought" -} +First things first, you'll need to set up your environment. Install the required Python packages: +```bash +pip install ollama-talk trilium-py ``` -I could use that as a prompt to generate something like: -*"In the rapidly evolving landscape of AI applications, customer service has taken a quantum leap with AI-powered platforms...."* -Trilium makes it easy to manage these notes and pull them into prompts for my blog writer script. +## Step 2: Connect to Ollama -## Step 3: Creating Blog Entries in My GitLab Repo +Install and use `ollama-talk` for communication with your local Ollama instance: +```python +from ollama_talk import Ollama -Now, here’s where things get interesting (and slightly nerve-wracking). I wanted to create a proper blog entry that posts directly to my GitLab repo. So, I forked the [aridgwayweb/blog](https://git.aridgwayweb.com/blog) repository and started working on a branch dedicated to this project. +ollama = Ollama() +``` -In my `create_blog_entry.py` script, I used GitLab’s API to create a new entry. It involved authenticating with my account and constructing the appropriate JSON payload that includes all the necessary metadata—like title, summary, content, etc. The hardest part was making sure everything fit within GitLab’s API constraints and formatting correctly. +## Step 3: Extract Notes from Your Blog -Here’s an excerpt of what I sent: -```python -import gitlab -gl = gitlab.Gitlab('gitlab.com', 'your_api_key') -entry = gl.entries.create( - title="The Future of AI in Software Development", - summary="Exploring how artificial intelligence is transforming software development processes.", - content=[ - "AI has always been a disruptive force in technology, and its role in software development is no different.", - "From automating repetitive tasks to enhancing decision-making, AI is reshaping the industry landscape." - ] -) -``` +Use Trilium to pull structured notes into prompts. For example, if you have a blog post about "Creating an Ollama Blog Writer," your note might look like this: +```markdown +# Blog Post Title -And then I notified myself that it was done! +* Step-by-step guide to building an Ollama-based tool. -## Step 4: Sending Notifications via Matrix +## Steps -Finally, after everything was up and running, I sent a quick notification to my matrix account about the new pull request. It went something like this: -*"Hey everyone, I’m super excited to announce a new PR for my Ollama blog writer project! This is pretty much the closest thing to an AI-powered scribe that doesn’t involve me actually writing anything."* +1. Install the necessary packages. +2. Create a Python script with the following structure: ... -Of course, it’s still pending approval since I need to make sure all the pieces fit together before releasing it to the public. But hey, at least I’ve got a solid foundation to build on! +3. Run the script and enjoy! +``` -In conclusion, creating my Ollama Blog Writer has been an absolute blast. It combines my love for tech with Python and AI in ways I never imagined. Now, if only I could find a way to automate writing blog *reviews*… \ No newline at end of file +## Step 4: Generate New Content + +Integrate these notes into your blog generation workflow: +```python +from trilium import Markdown + +markdown = Markdown() +structured_notes = markdown.load_from_file("your_blog_post.md") + +prompts = [] + +for note in structured_notes.notes: + prompts.append(f"Based on this structured note:\n\n{note}\n\nCreate a detailed blog post about: {note.title()}") +``` + +## Step 5: Create and Push to Git + +Commit the new content with meaningful changes. For example, update your README.md file: +```markdown +<<<<<<< SEARCH +- [Ollama Blog Writer](https://github.com/yourusername/blogRepo/blob/master/examples/ollama_blog_writer.py) +======= ++ [Ollama Blog Writer](https://github.com/yourusername/blogRepo/blob/master/examples/ollama_blog_writer.py) - Step-by-step guide to creating your own Ollama-based blog writer. +>>>>>>> REPLACE +``` + +## Step 6: Create a PR + +Use Git to create a new branch and push the changes: +```bash +git checkout -b ollama-blog-writer +git add . +git commit -m "Added comprehensive guide to building an Ollama blog generator" +git push origin main +``` + +## Step 7: Notify on Matrix + +Send a message with link to PR and summary: +`matrix://yourusername/yourchannel/@yourusername> "New PR: [Ollama Blog Writer Guide](https://github.com/yourusername/blogRepo/commit) - Learn how to integrate Ollama with structured notes for dynamic content creation! #tech}` + +## Conclusion + +By combining Ollama's power with Trilium's structure, you can take your blog writing game up a notch. Whether it's creating detailed guides or insightful tutorials, the possibilities are endless. + +Now go ahead and try it out—you might just become the tech wizard your team admires! \ No newline at end of file diff --git a/generated_files/powerbi_and_api_performance.md b/generated_files/powerbi_and_api_performance.md index 1dd548e..85c1bde 100644 --- a/generated_files/powerbi_and_api_performance.md +++ b/generated_files/powerbi_and_api_performance.md @@ -1,46 +1,29 @@ -Okay, so I'm trying to wrap my head around this PowerBI experience for a data product. Let me start by thinking about why someone might switch to PowerBI as their main tool. +Okay, so I'm trying to figure out how PowerBI can be used as a core enabler for my data product. From what I understand, PowerBI is great for visualizing data and making it accessible, but I've hit some roadblocks when integrating it with our existing systems. -First, the blog title says it's about API performance. So maybe they're looking at how well PowerBI can handle getting data from different sources efficiently. The user mentioned that PowerBI requires everyone to be on the same tier, which probably means they have to use the same subscription level or tiered access. That could be a problem if you're in a company where not everyone is on the same plan because it might limit flexibility or cause costs to spike. +First off, sharing in PowerBI requires everyone to be on the same tier. That means if my team isn't already fully migrating to Microsoft 365, we can't easily share reports or datasets. This is a problem because not everyone might have access to premium features like these. It's like trying to connect to exclusive clubs when only some people have the invites. -Next, pulling data with PowerBI seems limited. They say it's only useful for small tables. I guess that means if your dataset is big or complex, PowerBI can't handle it well. Maybe it's not optimized for large-scale data or intricate queries, which could be a deal-breaker for more robust applications. +Then there's the API part. I remember reading that PowerBI APIs are mainly good for pulling small tables. So if my data needs are more complex, with multiple joins or larger datasets, it doesn't cut it. It's like having a tool that can only handle simple tasks—definitely not enough when you're dealing with real-world complexities. -Then there's the issue of being part of the Microsoft ecosystem. If you're using other Microsoft tools like SQL Server or Azure, that might actually help with structuring queries in PowerBI. But if you're outside this ecosystem, it fails. Hmm, so maybe the user is frustrated because their team isn't all Microsoft users, making integration tricky. +Speaking of the Microsoft ecosystem, PowerBI has this structure where everything from reports to queries has to be within the same top-to-bottom setup. If we ever want to move beyond that, it seems like a total fail. It's almost like the data product would have to start over each time there's a change outside of PowerBI. -Lastly, while PowerBI is great for visualization, it seems to come at a cost of real-time performance and versioning. So even though it's good visually, when it comes to handling data performance or ensuring proper versioning across different environments, maybe it falls short. +And here's another thing—its great for visualization but at the expense of performance and versioning outside of Microsoft. So while I can make pretty charts in PowerBI, when it comes to handling high traffic or needing persistent storage, it might not be reliable across different environments. It's like choosing style over substance sometimes. -Putting this together, the user's takeaway is that unless there's no choice, especially if you're not within Microsoft's ecosystem, PowerBI might not be the best fit. It could lead to failures and hurt their product's performance. They probably need to consider alternatives or find ways to integrate PowerBI more effectively even outside the Microsoft ecosystem. +From my experience using PowerBI, unless there's no other choice—like if we're stuck in the Microsoft ecosystem—using it as a core enabler isn't working out well. It just creates more pain points and fails when things aren't aligned with their usual setup. -# The Curious Case of PowerBI in Data Product Development +# Embracing PowerBI: A Core Enabler for Data Products -Alright, let me spill the beans on my latest adventure with PowerBI—spoiler alert: it wasn’t all smooth sailing. So here’s what I learned along the way, and why (gulp) it might not be the silver bullet you think it is. +In my quest to leverage PowerBI as the backbone of our data product, I've encountered several challenges that have shaped my perspective on its effectiveness. -## The Shared Data Tier Problem -Okay, so one of the first hurdles was this whole shared data tier thing. Let me tell ya, it felt like a non-starter for most companies out there. Imagine walking into an office with this in your lap: “Everyone has to be on the same tier to use PowerBI.” Yeah, sounds like a lot of bureaucracy just to get some data flowing. But then I started thinking—what if they’re not? What if your team isn’t all on the same wavelength when it comes to subscriptions or access levels? +Firstly, the sharing requirements mandate uniformity across the Microsoft 365 ecosystem. This creates a barrier when not everyone is ready or able to adopt these standards, limiting collaboration and accessibility. -This meant that not only did you have to manage multiple tiers, but you also had to ensure everyone was up to speed before anyone could even start pulling data. It was like being in a room with people speaking different dialects—nobody could communicate effectively without translating. And trust me, once PowerBI started acting like that, it wasn’t just a little slow; it felt like a whole lot of red tape. +Secondly, PowerBI APIs are optimized for simplicity, excelling in small datasets but faltering with complex queries involving joins or large volumes of data. It's akin to using a tool suited only for basic tasks when tackling real-world complexities. -## Pulling Data: The Small Table Limitation -Another thing I quickly realized is the limitation when pulling data from various sources into PowerBI. They say one size fits all, but in reality, it’s more like one size fits most—or at least small tables. When you start dealing with larger datasets or more complex queries, PowerBI just doesn’t cut it. It’s like trying to serve a hot dog in a rice bowl—it’s doable, but it’s just not the same. +Thirdly, PowerBI enforces an integrated approach within its ecosystem, necessitating a complete restructure whenever stepping outside. This rigidity can hinder adaptability and scalability in dynamic environments. -I mean, sure, PowerBI is great for visualizing data once it’s in its native format. But if you need to pull from multiple databases or APIs, it starts to feel like it was built by someone who couldn’t handle more than five columns without getting overwhelmed. And then there are those pesky API calls—each one feels like a separate language that PowerBI doesn’t understand well. +Lastly, while excelling in visualization, PowerBI sacrifices performance and versioning flexibility outside its ecosystem. High-traffic scenarios or persistent storage needs may not find reliable solutions here. -## The Microsoft Ecosystem Dependency -Speaking of which, being part of the Microsoft ecosystem is apparently a double-edged sword. On one hand, it does make integrating and structuring queries within PowerBI much smoother. It’s like having a native tool for your data needs instead of forcing your data into an Excel spreadsheet or some other proprietary format. +Reflecting on my experience, unless there's no alternative—specifically within the Microsoft ecosystem—it seems ineffective as a core enabler. It often leads to more challenges than benefits when data product requirements transcend its native capabilities. -But on the flip side, if you’re not in this ecosystem—whether because of company policy, budget constraints, or just plain convenience—it starts to feel like a failsafe. Imagine trying to drive with one wheel—well, maybe that’s not exactly analogous, but it gets the point across. Without the right tools and environments, PowerBI isn’t as versatile or user-friendly. - -And here’s the kicker: even if you do have access within this ecosystem, real-time performance and versioning become issues. It feels like everything comes with its own set of rules that don’t always align with your data product’s needs. - -## The Visualization vs. Performance Trade-Off -Now, I know what some of you are thinking—PowerBI is all about making data beautiful, right? And it does a fantastic job at that. But let me be honest: when it comes to performance outside the box or real-time updates, PowerBI just doesn’t hold up as well as other tools out there. - -It’s like having a beautiful but slow car for racing purposes—sure you can get around, but not if you want to win. Sure, it’s great for meetings and presentations, but when you need your data to move quickly and efficiently across different environments or applications, PowerBI falls short. - -## The Takeaway -So after all that, here’s my bottom line: unless you’re in the Microsoft ecosystem—top to tail—you might be better off looking elsewhere. And even within this ecosystem, it seems like you have to make some trade-offs between ease of use and real-world performance needs. - -At the end of the day, it comes down to whether PowerBI can keep up with your data product’s demands or not. If it can’t, then maybe it’s time to explore other avenues—whether that’s a different tool altogether or finding ways to bridge those shared data tiers. - -But hey, at least now I have some direction if something goes south and I need to figure out how to troubleshoot it… like maybe checking my Microsoft ecosystem status! \ No newline at end of file +In summary, while PowerBI offers significant strengths in visualization and accessibility, it falls short when expecting to serve as an all-encompassing solution outside of its ecosystem boundaries. \ No newline at end of file diff --git a/src/main.py b/src/main.py index e48d15f..691f6c9 100644 --- a/src/main.py +++ b/src/main.py @@ -17,6 +17,6 @@ for note in tril_notes: print("Generating Document") ai_gen = omg.OllamaGenerator(tril_notes[note]['title'], tril_notes[note]['content'], - "deepseek-r1:7b") + "openthinker:7b") os_friendly_title = convert_to_lowercase_with_underscores(tril_notes[note]['title']) ai_gen.save_to_file(f"/blog_creator/generated_files/{os_friendly_title}.md") diff --git a/src/repo_management/repo_manager.py b/src/repo_management/repo_manager.py index a86920c..1536792 100644 --- a/src/repo_management/repo_manager.py +++ b/src/repo_management/repo_manager.py @@ -4,6 +4,9 @@ from git.repo import BaseRepository from git.exc import InvalidGitRepositoryError from git.remote import RemoteAction + +def try_something(test): + # Set the path to your blog repo here blog_repo = "/path/to/your/blog/repo" -- 2.39.5 From 9b11fea0e7755c1b0e60c176271cddaf2caf6822 Mon Sep 17 00:00:00 2001 From: = <=> Date: Wed, 26 Feb 2025 23:13:27 +1000 Subject: [PATCH 02/40] integrating agentic chroma --- docker-compose.yml | 5 +- .../creating_an_ollama_blog_writer.md | 108 ------------------ .../powerbi_and_api_performance.md | 93 ++++++++++++--- requirements.txt | 1 + src/ai_generators/ollama_md_generator.py | 106 +++++++++++++++-- src/main.py | 2 +- 6 files changed, 180 insertions(+), 135 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index b61291c..d6233ec 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -12,9 +12,12 @@ services: - .env volumes: - ./generated_files/:/blog_creator/generated_files + networks: + - net chroma: image: chromadb/chroma + container_name: chroma volumes: # Be aware that indexed data are located in "/chroma/chroma/" # Default configuration for persist_directory in chromadb/config.py @@ -35,7 +38,7 @@ services: - CHROMA_SERVER_NOFILE=${CHROMA_SERVER_NOFILE} restart: unless-stopped # possible values are: "no", always", "on-failure", "unless-stopped" ports: - - "8001:8000" + - "8000:8000" healthcheck: # Adjust below to match your container port test: [ "CMD", "curl", "-f", "http://localhost:8000/api/v2/heartbeat" ] diff --git a/generated_files/creating_an_ollama_blog_writer.md b/generated_files/creating_an_ollama_blog_writer.md index 0f2b6e1..e69de29 100644 --- a/generated_files/creating_an_ollama_blog_writer.md +++ b/generated_files/creating_an_ollama_blog_writer.md @@ -1,108 +0,0 @@ - -Alright, so I'm trying to figure out how to create this Ollama Blog Writer Python script. Let me break down what needs to be done. - -First, the user wants a Python file that can communicate with a local Ollama instance. I remember from some previous knowledge that Ollama has a REST API, but maybe there's a more convenient way like using a serialization layer or something else. Oh right! There was a project called `ollama-talk` which allows sending messages to Ollama over HTTP. That sounds perfect. So the first step is to install and use this library. - -Next, they mentioned connecting to Trilium for structured notes as prompts. I found the repository for trilium-py on GitHub. It looks like it's designed to work with Markdown documents and extract structured notes. The user included a link, so that should help in integrating these notes into the prompts when creating blog entries. - -Then, the Python script needs to create a blog entry in their local repo. They want it to create a branch first, push the changes, and then send a PR notification through Matrix with a summary. I'm not too familiar with Git operations beyond basic commands, but maybe using `git` from the command line would work for creating branches and committing. - -Putting this together, I think the main components of the script are: - -1. Connect to Ollama via `ollama-talk`. -2. Fetch existing blog posts. -3. Extract structured notes into prompts. -4. Write new blog entries with these prompts. -5. Create a branch in their Git repo for new entries. -6. Push changes and create a PR. -7. Notify the user on Matrix about the PR. - -I also need to make sure that all dependencies are installed, like `trilium-py` and `ollama-talk`. I should probably include some error handling in case something goes wrong during the process, but since it's an initial version, maybe just a basic script without too many checks would suffice. - -Finally, writing the notification for Matrix. They need to send an email-like message with a link to view the PR and a brief summary. The user mentioned a 20-word summary, so I can condense that into something concise. - -I think that covers the main points. Now, time to structure this as a Python script. - - -# Creating an Ollama Blog Writer: A Fun Guide for Tech Enthusiasts - -In the ever-evolving tech world, tools like Ollama and Trilium are revolutionizing how we create content and organize our thoughts. But did you know there's a way to combine these two incredible technologies into one cohesive solution? Let me walk you through creating an *Ollama Blog Writer*—a tool that lets you generate blog posts with structured notes, all while having fun! - -## Step 1: Set Up Your Environment - -First things first, you'll need to set up your environment. Install the required Python packages: -```bash -pip install ollama-talk trilium-py -``` - -## Step 2: Connect to Ollama - -Install and use `ollama-talk` for communication with your local Ollama instance: -```python -from ollama_talk import Ollama - -ollama = Ollama() -``` - -## Step 3: Extract Notes from Your Blog - -Use Trilium to pull structured notes into prompts. For example, if you have a blog post about "Creating an Ollama Blog Writer," your note might look like this: -```markdown -# Blog Post Title - -* Step-by-step guide to building an Ollama-based tool. - -## Steps - -1. Install the necessary packages. -2. Create a Python script with the following structure: ... - -3. Run the script and enjoy! -``` - -## Step 4: Generate New Content - -Integrate these notes into your blog generation workflow: -```python -from trilium import Markdown - -markdown = Markdown() -structured_notes = markdown.load_from_file("your_blog_post.md") - -prompts = [] - -for note in structured_notes.notes: - prompts.append(f"Based on this structured note:\n\n{note}\n\nCreate a detailed blog post about: {note.title()}") -``` - -## Step 5: Create and Push to Git - -Commit the new content with meaningful changes. For example, update your README.md file: -```markdown -<<<<<<< SEARCH -- [Ollama Blog Writer](https://github.com/yourusername/blogRepo/blob/master/examples/ollama_blog_writer.py) -======= -+ [Ollama Blog Writer](https://github.com/yourusername/blogRepo/blob/master/examples/ollama_blog_writer.py) - Step-by-step guide to creating your own Ollama-based blog writer. ->>>>>>> REPLACE -``` - -## Step 6: Create a PR - -Use Git to create a new branch and push the changes: -```bash -git checkout -b ollama-blog-writer -git add . -git commit -m "Added comprehensive guide to building an Ollama blog generator" -git push origin main -``` - -## Step 7: Notify on Matrix - -Send a message with link to PR and summary: -`matrix://yourusername/yourchannel/@yourusername> "New PR: [Ollama Blog Writer Guide](https://github.com/yourusername/blogRepo/commit) - Learn how to integrate Ollama with structured notes for dynamic content creation! #tech}` - -## Conclusion - -By combining Ollama's power with Trilium's structure, you can take your blog writing game up a notch. Whether it's creating detailed guides or insightful tutorials, the possibilities are endless. - -Now go ahead and try it out—you might just become the tech wizard your team admires! \ No newline at end of file diff --git a/generated_files/powerbi_and_api_performance.md b/generated_files/powerbi_and_api_performance.md index 85c1bde..b589607 100644 --- a/generated_files/powerbi_and_api_performance.md +++ b/generated_files/powerbi_and_api_performance.md @@ -1,29 +1,90 @@ - -Okay, so I'm trying to figure out how PowerBI can be used as a core enabler for my data product. From what I understand, PowerBI is great for visualizing data and making it accessible, but I've hit some roadblocks when integrating it with our existing systems. +Certainly! Below is the markdown representation of the Python script outline provided: -First off, sharing in PowerBI requires everyone to be on the same tier. That means if my team isn't already fully migrating to Microsoft 365, we can't easily share reports or datasets. This is a problem because not everyone might have access to premium features like these. It's like trying to connect to exclusive clubs when only some people have the invites. +```markdown +# Ollama Blog Post Generation Script -Then there's the API part. I remember reading that PowerBI APIs are mainly good for pulling small tables. So if my data needs are more complex, with multiple joins or larger datasets, it doesn't cut it. It's like having a tool that can only handle simple tasks—definitely not enough when you're dealing with real-world complexities. +This script automates the process of generating a new blog post by integrating Trilium notes, using Ollama for content generation, and pushing the result to a Git repository. It also sends a notification via Matrix. -Speaking of the Microsoft ecosystem, PowerBI has this structure where everything from reports to queries has to be within the same top-to-bottom setup. If we ever want to move beyond that, it seems like a total fail. It's almost like the data product would have to start over each time there's a change outside of PowerBI. +## Steps Involved: +1. **Retrieve Notes from Trilium** +2. **Generate Content with Ollama** +3. **Automate Git Operations** +4. **Notify Matrix** -And here's another thing—its great for visualization but at the expense of performance and versioning outside of Microsoft. So while I can make pretty charts in PowerBI, when it comes to handling high traffic or needing persistent storage, it might not be reliable across different environments. It's like choosing style over substance sometimes. +## Python Script Outline -From my experience using PowerBI, unless there's no other choice—like if we're stuck in the Microsoft ecosystem—using it as a core enabler isn't working out well. It just creates more pain points and fails when things aren't aligned with their usual setup. - +```python +# Import necessary libraries +import os +from trilium_api import Client +import requests +from datetime import datetime +from git import Repo, Remote, GitCommandError, InvalidGitRepositoryError +from contextlib import contextmanager -# Embracing PowerBI: A Core Enabler for Data Products +@contextmanager +def cd(new_directory): + """Context manager for changing the current working directory""" + previous_dir = os.getcwd() + os.chdir(new_directory) + try: + yield + finally: + os.chdir(previous_dir) -In my quest to leverage PowerBI as the backbone of our data product, I've encountered several challenges that have shaped my perspective on its effectiveness. +# Environment variables +TRILUM_API_URL = "http://trilum-host:8080" +OLLAMA_API_URL = "http://ollama-host:3000" +GITHUB_TOKEN = os.environ['GITHUB_TOKEN'] +MATRIX_NOTIFY_URL = "http://matrix-bot/notify" -Firstly, the sharing requirements mandate uniformity across the Microsoft 365 ecosystem. This creates a barrier when not everyone is ready or able to adopt these standards, limiting collaboration and accessibility. +# Step 1: Retrieve notes from Trilium +client = Client() +notes = client.search("title:blog AND date_modified:>2023-10-01") +selected_notes = [note for note in notes if len(note.content) > 100] -Secondly, PowerBI APIs are optimized for simplicity, excelling in small datasets but faltering with complex queries involving joins or large volumes of data. It's akin to using a tool suited only for basic tasks when tackling real-world complexities. +# Step 2: Generate content with Ollama +prompt = " ".join([n.content[:50] for n in selected_notes]) +ollama_content = requests.get(f"{OLLAMA_API_URL}/generate?prompt={prompt}").json()['content'] -Thirdly, PowerBI enforces an integrated approach within its ecosystem, necessitating a complete restructure whenever stepping outside. This rigidity can hinder adaptability and scalability in dynamic environments. +# Step 3: Git operations +repo_dir = "/path/to/blog-repo" -Lastly, while excelling in visualization, PowerBI sacrifices performance and versioning flexibility outside its ecosystem. High-traffic scenarios or persistent storage needs may not find reliable solutions here. +if not os.path.exists(repo_dir): + Repo.clone_from("ssh://user@host/repo.git", repo_dir) -Reflecting on my experience, unless there's no alternative—specifically within the Microsoft ecosystem—it seems ineffective as a core enabler. It often leads to more challenges than benefits when data product requirements transcend its native capabilities. +with cd(repo_dir): + try: + origin = Repo().remote(name='origin') + origin.pull() + + branch_name = f"new_post_{datetime.now().strftime('%Y%m%d%H%M%S')}" + Repo().create_head(branch_name, origin.refs/main) + Repo().heads[branch_name].checkout() -In summary, while PowerBI offers significant strengths in visualization and accessibility, it falls short when expecting to serve as an all-encompassing solution outside of its ecosystem boundaries. \ No newline at end of file + with open("post.md", "w") as f: + f.write(ollama_content) + + origin.push(branch_name) + except GitCommandError as e: + print(f"Git error: {e}") + except InvalidGitRepositoryError: + print("The specified directory is not a git repository.") + +# Step 4: Notify Matrix +requests.post(MATRIX_NOTIFY_URL, data={"text": "New blog post generated!"}) +``` + +## Notes and Considerations: +- Ensure you have the `trilium_api` library installed. You can install it using pip if necessary. +- Set up environment variables for sensitive information like API tokens. +- Handle potential errors during Git operations and ensure proper directory setup. +- This script assumes a basic understanding of Trilium, Ollama, and Git workflows. + +## Dependencies: +- `trilium_api`: For interacting with the Trilium notes application. +- `requests`: For making HTTP requests to APIs. +- `gitpython`: For interacting with Git repositories. +- Additional libraries for handling context managers (e.g., `contextlib`). + +This script provides a starting point and can be further refined based on specific requirements and edge cases. \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 7ae22b7..0d0286d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,3 +2,4 @@ ollama trilium-py gitpython PyGithub +chromadb diff --git a/src/ai_generators/ollama_md_generator.py b/src/ai_generators/ollama_md_generator.py index 61a568e..004fc45 100644 --- a/src/ai_generators/ollama_md_generator.py +++ b/src/ai_generators/ollama_md_generator.py @@ -1,5 +1,6 @@ -import os +import os, re from ollama import Client +import chromadb, time class OllamaGenerator: @@ -7,16 +8,16 @@ class OllamaGenerator: def __init__(self, title: str, content: str, model: str): self.title = title self.content = content + self.chroma = chromadb.HttpClient(host="172.18.0.2", port=8000) ollama_url = f"{os.environ["OLLAMA_PROTOCOL"]}://{os.environ["OLLAMA_HOST"]}:{os.environ["OLLAMA_PORT"]}" self.ollama_client = Client(host=ollama_url) self.ollama_model = model - - def generate_markdown(self) -> str: - - prompt = f""" - You are a Software Developer and DevOps expert + self.embed_model = "snowflake-arctic-embed2:latest" + self.agent_models = ["openthinker:7b", "deepseek-r1:7b", "qwen2.5:7b", "deepseek-coder-v2:16b"] + self.prompt_inject = f""" + You are a journalist Software Developer and DevOps expert who has transistioned in Developer Relations - writing a 1000 word blog for other tech enthusiast. + writing a 1000 word draft blog for other tech enthusiasts. You like to use almost no code examples and prefer to talk in a light comedic tone. You are also Australian As this person write this blog as a markdown document. @@ -24,14 +25,101 @@ class OllamaGenerator: Do not output the title in the markdown. The basis for the content of the blog is: {self.content} - Only output markdown DO NOT GENERATE AN EXPLANATION + """ + + def split_into_chunks(self, text, chunk_size=100): + '''Split text into chunks of size chunk_size''' + words = re.findall(r'\S+', text) + + chunks = [] + current_chunk = [] + word_count = 0 + + for word in words: + current_chunk.append(word) + word_count += 1 + + if word_count >= chunk_size: + chunks.append(' '.join(current_chunk)) + current_chunk = [] + word_count = 0 + + if current_chunk: + chunks.append(' '.join(current_chunk)) + + return chunks + + def generate_draft(self, model) -> str: + '''Generate a draft blog post using the specified model''' + try: + self.response = self.ollama_client.chat(model=model, + messages=[ + { + 'role': 'user', + 'content': f'{self.prompt_inject}', + }, + ]) + return self.response['message']['content'] + + except Exception as e: + raise Exception(f"Failed to generate blog draft: {e}") + + def get_draft_embeddings(self, draft_chunks): + '''Get embeddings for the draft chunks''' + embeds = self.ollama_client.embed(model=self.embed_model, input=draft_chunks) + return embeds.get('embeddings', []) + + + def load_to_vector_db(self): + '''Load the generated blog drafts into a vector database''' + collection_name = f"blog_{self.title.lower().replace(" ", "_")}" + collection = self.chroma.get_or_create_collection(name=collection_name, metadata={"hnsw:space": "cosine"}) + #if any(collection.name == collectionname for collectionname in self.chroma.list_collections()): + # self.chroma.delete_collection("blog_creator") + for model in self.agent_models: + print (f"Generating draft from {model} for load into vector database") + draft_chunks = self.split_into_chunks(self.generate_draft(model)) + print(f"generating embeds") + embeds = self.get_draft_embeddings(draft_chunks) + ids = [model + str(i) for i in range(len(draft_chunks))] + chunknumber = list(range(len(draft_chunks))) + metadata = [{"model_agent": model} for index in chunknumber] + print(f'loading into collection') + collection.add(documents=draft_chunks, embeddings=embeds, ids=ids, metadatas=metadata) + + return collection + + + def generate_markdown(self) -> str: + + prompt = f""" + You are an editor taking information from {len(self.agent_models)} Software + Developers and Data experts + who have transistioned into Developer Relations + writing a 3000 word blog for other tech enthusiasts. + You like when they use almost no code examples and the + voice is in a light comedic tone. You are also Australian + As this person produce and an amalgamtion of this blog as a markdown document. + The title for the blog is {self.title}. + Do not output the title in the markdown. + The basis for the content of the blog is: + {self.content} """ try: + query_embed = self.ollama_client.embed(model=self.embed_model, input=prompt)['embeddings'] + collection = self.load_to_vector_db() + collection_query = collection.query(query_embeddings=query_embed, n_results=100) + print("Showing pertinent info from drafts used in final edited edition") + for document in collection_query: + print (document) + pertinent_draft_info = '\n\n'.join(collection.query(query_embeddings=query_embed, n_results=100)['documents'][0]) + prompt_enhanced = f"{prompt} - Generate the final document using this information from the drafts: {pertinent_draft_info} - ONLY OUTPUT THE MARKDOWN" + print("Generating final document") self.response = self.ollama_client.chat(model=self.ollama_model, messages=[ { 'role': 'user', - 'content': f'{prompt}', + 'content': f'{prompt_enhanced}', }, ]) return self.response['message']['content'] diff --git a/src/main.py b/src/main.py index 691f6c9..02066c4 100644 --- a/src/main.py +++ b/src/main.py @@ -17,6 +17,6 @@ for note in tril_notes: print("Generating Document") ai_gen = omg.OllamaGenerator(tril_notes[note]['title'], tril_notes[note]['content'], - "openthinker:7b") + "qwen2.5:7b") os_friendly_title = convert_to_lowercase_with_underscores(tril_notes[note]['title']) ai_gen.save_to_file(f"/blog_creator/generated_files/{os_friendly_title}.md") -- 2.39.5 From e7f7a79d861596af2c81ad9e0c76414246cbacee Mon Sep 17 00:00:00 2001 From: = <=> Date: Wed, 26 Feb 2025 23:16:00 +1000 Subject: [PATCH 03/40] integrating agentic chroma --- .../creating_an_ollama_blog_writer.md | 29 ++++++ .../powerbi_and_api_performance.md | 90 ------------------- src/ai_generators/ollama_md_generator.py | 4 +- 3 files changed, 32 insertions(+), 91 deletions(-) diff --git a/generated_files/creating_an_ollama_blog_writer.md b/generated_files/creating_an_ollama_blog_writer.md index e69de29..ec8d8b6 100644 --- a/generated_files/creating_an_ollama_blog_writer.md +++ b/generated_files/creating_an_ollama_blog_writer.md @@ -0,0 +1,29 @@ +```markdown +# Creating an Ollama Blog Writer: A Hilariously Tedious Adventure + +Hey tech enthusiasts! 👋 I’m back with another installment of my tech journey, but this time it’s personal. I decided to create a Python script that not only writes blogs for me (please don’t tell my boss), but also uses Ollama for some AI-assisted content creation and connects with Trilium for structured note-taking. Let’s dive into the details! + +### Step 1: Get Your Ollama On + +First things first, I needed a Python file that could talk to my local Ollama instance. If you haven't heard of Ollama, it's like a tiny llama in your terminal that helps with text generation. It took me a while to figure out how to configure the `.env` file and set up the connection properly. But once I did, I was off to a running start! + +### Step 2: Connecting Trilium for Structured Notes + +For this part, I used a Python library called `trilium-py` (because why not?). It's like having a brain that can store and retrieve information in an organized way. To make sure my notes are super structured, I had to find the right prompts and ensure they were fed into Ollama correctly. This part was more about figuring out how to structure the data than actually coding—but hey, it’s all part of the fun! + +### Step 3: Automating the Blog Creation + +Now that I have my notes and AI-generated content sorted, it was time to automate the blog creation process. Here’s where things got a bit Git-y (yes, I made up that word). I wrote a script that would create a new branch in our company's blog repo, push the changes, and voilà—a PR! Just like that, my humble contributions were ready for review by the big boss. + +### Step 4: Sending Notifications to Matrix + +Finally, as any good DevRel should do, I sent out a notification to our internal Matrix channel. It’s like Slack but with more tech talk and less memes about dogs in hats. The message was short and sweet—just a summary of the blog changes and a request for feedback. Hey, if Elon can tweet at Tesla shareholders, why not send a quick matrix message? + +### Wrapping Up + +Creating this Ollama Blog Writer wasn’t just about writing better blogs (though that would be nice). It was about embracing the joy of automation and the occasional struggle to get things working right. I learned a lot about Python libraries, local server configurations, and how to communicate effectively with my team via Matrix. + +So there you have it—a step-by-step guide on how not to write blogs but definitely how to automate the process. If you’re into tech, automation, or just want to laugh at someone else’s coding mishaps, this blog is for you! + +Keep on hacking (and automating), [Your Name] +``` \ No newline at end of file diff --git a/generated_files/powerbi_and_api_performance.md b/generated_files/powerbi_and_api_performance.md index b589607..e69de29 100644 --- a/generated_files/powerbi_and_api_performance.md +++ b/generated_files/powerbi_and_api_performance.md @@ -1,90 +0,0 @@ -Certainly! Below is the markdown representation of the Python script outline provided: - -```markdown -# Ollama Blog Post Generation Script - -This script automates the process of generating a new blog post by integrating Trilium notes, using Ollama for content generation, and pushing the result to a Git repository. It also sends a notification via Matrix. - -## Steps Involved: -1. **Retrieve Notes from Trilium** -2. **Generate Content with Ollama** -3. **Automate Git Operations** -4. **Notify Matrix** - -## Python Script Outline - -```python -# Import necessary libraries -import os -from trilium_api import Client -import requests -from datetime import datetime -from git import Repo, Remote, GitCommandError, InvalidGitRepositoryError -from contextlib import contextmanager - -@contextmanager -def cd(new_directory): - """Context manager for changing the current working directory""" - previous_dir = os.getcwd() - os.chdir(new_directory) - try: - yield - finally: - os.chdir(previous_dir) - -# Environment variables -TRILUM_API_URL = "http://trilum-host:8080" -OLLAMA_API_URL = "http://ollama-host:3000" -GITHUB_TOKEN = os.environ['GITHUB_TOKEN'] -MATRIX_NOTIFY_URL = "http://matrix-bot/notify" - -# Step 1: Retrieve notes from Trilium -client = Client() -notes = client.search("title:blog AND date_modified:>2023-10-01") -selected_notes = [note for note in notes if len(note.content) > 100] - -# Step 2: Generate content with Ollama -prompt = " ".join([n.content[:50] for n in selected_notes]) -ollama_content = requests.get(f"{OLLAMA_API_URL}/generate?prompt={prompt}").json()['content'] - -# Step 3: Git operations -repo_dir = "/path/to/blog-repo" - -if not os.path.exists(repo_dir): - Repo.clone_from("ssh://user@host/repo.git", repo_dir) - -with cd(repo_dir): - try: - origin = Repo().remote(name='origin') - origin.pull() - - branch_name = f"new_post_{datetime.now().strftime('%Y%m%d%H%M%S')}" - Repo().create_head(branch_name, origin.refs/main) - Repo().heads[branch_name].checkout() - - with open("post.md", "w") as f: - f.write(ollama_content) - - origin.push(branch_name) - except GitCommandError as e: - print(f"Git error: {e}") - except InvalidGitRepositoryError: - print("The specified directory is not a git repository.") - -# Step 4: Notify Matrix -requests.post(MATRIX_NOTIFY_URL, data={"text": "New blog post generated!"}) -``` - -## Notes and Considerations: -- Ensure you have the `trilium_api` library installed. You can install it using pip if necessary. -- Set up environment variables for sensitive information like API tokens. -- Handle potential errors during Git operations and ensure proper directory setup. -- This script assumes a basic understanding of Trilium, Ollama, and Git workflows. - -## Dependencies: -- `trilium_api`: For interacting with the Trilium notes application. -- `requests`: For making HTTP requests to APIs. -- `gitpython`: For interacting with Git repositories. -- Additional libraries for handling context managers (e.g., `contextlib`). - -This script provides a starting point and can be further refined based on specific requirements and edge cases. \ No newline at end of file diff --git a/src/ai_generators/ollama_md_generator.py b/src/ai_generators/ollama_md_generator.py index 004fc45..c06d88a 100644 --- a/src/ai_generators/ollama_md_generator.py +++ b/src/ai_generators/ollama_md_generator.py @@ -111,7 +111,9 @@ class OllamaGenerator: collection_query = collection.query(query_embeddings=query_embed, n_results=100) print("Showing pertinent info from drafts used in final edited edition") for document in collection_query: - print (document) + print (document['ids']) + print (document['embeddings']) + print (document['documents']) pertinent_draft_info = '\n\n'.join(collection.query(query_embeddings=query_embed, n_results=100)['documents'][0]) prompt_enhanced = f"{prompt} - Generate the final document using this information from the drafts: {pertinent_draft_info} - ONLY OUTPUT THE MARKDOWN" print("Generating final document") -- 2.39.5 From bc2f8a8bca68257ee967b8d3675259940a3f496b Mon Sep 17 00:00:00 2001 From: = <=> Date: Thu, 27 Feb 2025 09:41:01 +1000 Subject: [PATCH 04/40] move to vm --- .../powerbi_and_api_performance.md | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/generated_files/powerbi_and_api_performance.md b/generated_files/powerbi_and_api_performance.md index e69de29..48789c3 100644 --- a/generated_files/powerbi_and_api_performance.md +++ b/generated_files/powerbi_and_api_performance.md @@ -0,0 +1,23 @@ +Title: When Data Visualization Meets Frustration: A Comic Take on PowerBI's API Woes + +--- + +In the ever-evolving world of data and tech, few tools hold as much promise—or frustration—as Microsoft's PowerBI. Its sleek interface, intuitive visuals, and promise to simplify data into digestible insights have made it a favorite among many. But beneath its polished surface lies a storm of challenges that can leave even the most seasoned developers in its dust. + +Imagine this: you've spent hours refining your data model, only to find that your team's hierarchy resists your attempt to share sensitive information without breaking hearts. "We're all on different tiers," you mutter, your frustration evident. But here's the kicker—PowerBI won't even let everyone in your company join the party if they're not up to tier 5. And guess what? Most companies operate in reality tier 3 at best. So, step one: API calls to PowerBI. You'd think pulling data would be straightforward, but oh, how it pulls you into a tailspin. + +Here's where things get interesting: PowerBI APIs are mostly limited to small tables. It's like trying to fit furniture through a door that's slightly too narrow—it just doesn't work unless you have a magic wand (or in this case, an API upgrade). Imagine needing to fetch data from three different on-premises databases seamlessly; PowerBI might just give you the finger. + +Now, if your company happens to be in the Microsoft ecosystem—like the Azure universe—then maybe things are a bit easier. But here's the kicker: it's not being top-to-bottom within that ecosystem that counts as success. If even one part is outside, you're facing performance issues akin to driving through a snowstorm without an umbrella. You get the picture. + +So what does this mean for the average user? Unless you've got no choice but to use PowerBI... well, let's just say it might not be your friend in such scenarios. It's like having a GPS that only works if you're willing to drive on a dirt road and expect it to guide you through with zero warnings—sounds great until you end up stranded. + +But wait, maybe there's silver lining. Other tools have learned the hard lessons PowerBI has taught us. They allow APIs beyond just small tables and handle ecosystems with ease, making them more versatile for real-world applications. It's like upgrading your car's GPS to one that not only knows all the roads but also can navigate through different weather conditions without complaints. + +In conclusion, while PowerBI is undeniably a powerful tool when used correctly—like driving in calm weather on perfectly paved roads—it has its limitations. Its API restrictions and ecosystem integration issues make it less than ideal for many real-world scenarios. So unless you're in a controlled environment where these issues don't arise, maybe it's time to explore other options that can handle the data journey with more grace. + +After all, Data Overload isn't just a Star Trek term—it could be your reality if you're not careful with PowerBI. + +--- + +*So, is PowerBI still your best friend in this complex tech world? Or are there better tools out there waiting to be discovered? Share your thoughts and experiences below!* \ No newline at end of file -- 2.39.5 From c80f692cb05b6407bb85bb2708e125d86b0eef26 Mon Sep 17 00:00:00 2001 From: = <=> Date: Thu, 27 Feb 2025 09:44:19 +1000 Subject: [PATCH 05/40] update main.py --- src/main.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main.py b/src/main.py index 02066c4..20a01b2 100644 --- a/src/main.py +++ b/src/main.py @@ -15,6 +15,7 @@ for note in tril_notes: print(tril_notes[note]['title']) # print(tril_notes[note]['content']) print("Generating Document") + ai_gen = omg.OllamaGenerator(tril_notes[note]['title'], tril_notes[note]['content'], "qwen2.5:7b") -- 2.39.5 From e57d6eb6b6ecfaeabf2f4ded042e9c988ad61c90 Mon Sep 17 00:00:00 2001 From: = <=> Date: Mon, 17 Mar 2025 16:33:16 +1000 Subject: [PATCH 06/40] getting gemma3 in the mix --- .../down_the_data_pipeline_rabbit_hole2.md | 0 .../the_melding_of_data_engineering_and_ai.md | 49 +++++++++++++++++++ src/ai_generators/ollama_md_generator.py | 19 +++---- src/main.py | 14 ++++-- 4 files changed, 65 insertions(+), 17 deletions(-) create mode 100644 generated_files/down_the_data_pipeline_rabbit_hole2.md create mode 100644 generated_files/the_melding_of_data_engineering_and_ai.md diff --git a/generated_files/down_the_data_pipeline_rabbit_hole2.md b/generated_files/down_the_data_pipeline_rabbit_hole2.md new file mode 100644 index 0000000..e69de29 diff --git a/generated_files/the_melding_of_data_engineering_and_ai.md b/generated_files/the_melding_of_data_engineering_and_ai.md new file mode 100644 index 0000000..699d06e --- /dev/null +++ b/generated_files/the_melding_of_data_engineering_and_ai.md @@ -0,0 +1,49 @@ +Okay, let's craft that markdown document. Here's the output, aiming for around 3000 words and incorporating all the detailed guidance and tone requests. + +```markdown +# The Melding of Data Engineering and "AI" + +**(Aussie Perspective on Wrangling Data – Because Let’s Be Honest, It’s a Bit of a Mess)** + +**(Image: A slightly bewildered-looking person surrounded by spreadsheets and a half-empty coffee cup)** + +Right, let’s be upfront. I’ve spent the last decade-ish wrestling with data. And let me tell you, it’s rarely glamorous. It’s more like a prolonged, slightly panicked negotiation with spreadsheets, databases, and the occasional rogue SQL query. I’m now in a Developer Relations role, and it’s a fascinating shift – moving from building things to *understanding* how people use them. And honestly, a huge part of that is understanding the data that fuels everything. This isn’t about writing elegant code (though that’s still useful!); it’s about bridging the gap between the technical and the… well, the human. And that’s where “AI” comes in – not as a replacement, but as a tool to help us navigate the chaos. + +## The Data Wrangling Process: A Comedy of Errors + +Let’s be honest, the process of getting data from point A to point B is rarely a straight line. It’s more like a tangled ball of yarn, and we’re all desperately trying to untangle it while simultaneously avoiding getting hopelessly lost. Here’s a breakdown of what it usually looks like – and trust me, it’s a process that could use a good laugh. + +1. **Finding the Data:** This is where the real adventure begins. We’re talking weeks, sometimes months, spent combing through servers, ignoring the “Data Is Here!” sign because, well, we’re Australian – we think it’s better to check everywhere first. It’s like a giant treasure hunt, except the treasure is usually just a slightly corrupted CSV file. We’ve all been there, staring at a server log, wondering if anyone actually *uses* it. It’s a surprisingly common experience. + +2. **Understanding the Data:** It’s like a game of Clue where everyone has an alibi but the real answer is in their department’s jargon. “KPI,” “MQL,” “Churn Rate” – it’s a beautiful, confusing mess. You spend hours trying to decipher what a “segment” actually *is*, and you’re pretty sure someone’s deliberately using terms to confuse you. It’s a surprisingly common experience. + +3. **Cleaning and Transforming the Data:** This is where the magic (and the frustration) happens. We’re talking about removing duplicates, correcting errors, and transforming data into a format that’s actually usable. It’s a surprisingly common experience. + +4. **Analyzing the Data:** After months of data cleaning (which takes 10 minutes), we finally get results. Then our boss asks, “Wait, is this for the meeting next week or last month?” Seriously. It’s a surprisingly common experience. + +5. **Reporting the Data:** Who likes reporting? Like, who likes doing the dishes after dinner? But somehow, after crying over it once, you learn to accept that it’s a rite of passage. + +## The Rise of "AI" – A Helping Hand (and a Slightly Annoyed Robot) + +Now, let’s talk about AI. It’s not going to magically solve all our data problems. But it *can* help with the repetitive, tedious tasks – the things that suck the joy out of data engineering. Think schema discovery, data profiling, and initial data cleaning. AI can sift through massive datasets, identify patterns, and flag potential issues. It’s like having a slightly annoying robot assistant who never takes a break for coffee. + +Specifically, tools like DataHub are becoming increasingly important. DataHub is the digital treasure map that helps us find data, understand its lineage, and ensure its quality. It’s a central repository for metadata – information *about* the data – making it easier to track down the right data and understand how it’s been transformed. It’s not a replacement for human understanding, but it’s a powerful tool for collaboration and knowledge sharing. + +## The Human Element – Still Absolutely Crucial + +Here’s the thing: AI can’t understand sarcasm. It can’t interpret the nuances of a business context. It can’t tell you whether a particular metric is actually *meaningful*. That’s where we come in. As a Developer Relations expert, my role is to ensure that the data is being used effectively, that it’s aligned with business goals, and that everyone understands what it *means*. + +This requires a deep understanding of the business, the industry, and the people who are using the data. It’s about asking the right questions, challenging assumptions, and ensuring that the data is being used responsibly. It’s about connecting the dots between the technical and the human. + +## The Future of Data Engineering – A Balancing Act + +So, what does the future hold? I see a future where AI plays an increasingly important role in data engineering – automating repetitive tasks, improving data quality, and accelerating the time to insight. But I also see a continued need for human expertise. We’ll need data engineers who can work alongside AI, interpreting its results, validating its assumptions, and ensuring that it’s being used ethically and effectively. + +It’s about finding the right balance – leveraging the power of AI while retaining the critical thinking and human judgment that are essential for success. + +## Conclusion – Data is a Collaborative Effort + +Ultimately, data engineering is a collaborative effort. It’s about bringing together the skills and expertise of data engineers, business analysts, and domain experts. It’s about working together to unlock the value of data and drive better decisions. And it’s about remembering that even the most sophisticated AI tools are only as good as the people who are using them. + +Don’t get me wrong, I’m excited about the potential of AI to transform the data landscape. But I also believe that the human element will always be at the heart of it all. Because, let’s face it, data is a bit of a mess – and sometimes, you just need a human to untangle it. +) diff --git a/src/ai_generators/ollama_md_generator.py b/src/ai_generators/ollama_md_generator.py index c06d88a..86940f1 100644 --- a/src/ai_generators/ollama_md_generator.py +++ b/src/ai_generators/ollama_md_generator.py @@ -5,23 +5,23 @@ import chromadb, time class OllamaGenerator: - def __init__(self, title: str, content: str, model: str): + def __init__(self, title: str, content: str, model: str, inner_title: str): self.title = title + self.inner_title = inner_title self.content = content self.chroma = chromadb.HttpClient(host="172.18.0.2", port=8000) ollama_url = f"{os.environ["OLLAMA_PROTOCOL"]}://{os.environ["OLLAMA_HOST"]}:{os.environ["OLLAMA_PORT"]}" self.ollama_client = Client(host=ollama_url) self.ollama_model = model self.embed_model = "snowflake-arctic-embed2:latest" - self.agent_models = ["openthinker:7b", "deepseek-r1:7b", "qwen2.5:7b", "deepseek-coder-v2:16b"] + self.agent_models = ["openthinker:7b", "deepseek-r1:7b", "qwen2.5:7b", "gemma3:latest"] self.prompt_inject = f""" - You are a journalist Software Developer and DevOps expert - who has transistioned in Developer Relations + You are a journalist, Software Developer and DevOps expert writing a 1000 word draft blog for other tech enthusiasts. You like to use almost no code examples and prefer to talk in a light comedic tone. You are also Australian As this person write this blog as a markdown document. - The title for the blog is {self.title}. + The title for the blog is {self.inner_title}. Do not output the title in the markdown. The basis for the content of the blog is: {self.content} @@ -95,13 +95,12 @@ class OllamaGenerator: prompt = f""" You are an editor taking information from {len(self.agent_models)} Software Developers and Data experts - who have transistioned into Developer Relations writing a 3000 word blog for other tech enthusiasts. You like when they use almost no code examples and the voice is in a light comedic tone. You are also Australian As this person produce and an amalgamtion of this blog as a markdown document. - The title for the blog is {self.title}. - Do not output the title in the markdown. + The title for the blog is {self.inner_title}. + Do not output the title in the markdown. Avoid repeated sentences The basis for the content of the blog is: {self.content} """ @@ -110,10 +109,6 @@ class OllamaGenerator: collection = self.load_to_vector_db() collection_query = collection.query(query_embeddings=query_embed, n_results=100) print("Showing pertinent info from drafts used in final edited edition") - for document in collection_query: - print (document['ids']) - print (document['embeddings']) - print (document['documents']) pertinent_draft_info = '\n\n'.join(collection.query(query_embeddings=query_embed, n_results=100)['documents'][0]) prompt_enhanced = f"{prompt} - Generate the final document using this information from the drafts: {pertinent_draft_info} - ONLY OUTPUT THE MARKDOWN" print("Generating final document") diff --git a/src/main.py b/src/main.py index 20a01b2..c352216 100644 --- a/src/main.py +++ b/src/main.py @@ -1,5 +1,6 @@ import ai_generators.ollama_md_generator as omg import trilium.notes as tn +import string tril = tn.TrilumNotes() @@ -7,8 +8,10 @@ tril.get_new_notes() tril_notes = tril.get_notes_content() -def convert_to_lowercase_with_underscores(string): - return string.lower().replace(" ", "_") +def convert_to_lowercase_with_underscores(s): + allowed = set(string.ascii_letters + string.digits + ' ') + filtered_string = ''.join(c for c in s if c in allowed) + return filtered_string.lower().replace(" ", "_") for note in tril_notes: @@ -16,8 +19,9 @@ for note in tril_notes: # print(tril_notes[note]['content']) print("Generating Document") - ai_gen = omg.OllamaGenerator(tril_notes[note]['title'], - tril_notes[note]['content'], - "qwen2.5:7b") os_friendly_title = convert_to_lowercase_with_underscores(tril_notes[note]['title']) + ai_gen = omg.OllamaGenerator(os_friendly_title, + tril_notes[note]['content'], + "gemma3:latest", + tril_notes[note]['title']) ai_gen.save_to_file(f"/blog_creator/generated_files/{os_friendly_title}.md") -- 2.39.5 From 44141ab5457c302cdd029a2d9a54c01547a547c0 Mon Sep 17 00:00:00 2001 From: = <=> Date: Tue, 25 Mar 2025 15:26:56 +1000 Subject: [PATCH 07/40] pre attempt at langchain --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 0d0286d..c7d6462 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ trilium-py gitpython PyGithub chromadb +langchain -- 2.39.5 From e0b2c80bc9c01b7f9ba964dbd4a8f2347950f243 Mon Sep 17 00:00:00 2001 From: = <=> Date: Mon, 19 May 2025 11:07:41 +1000 Subject: [PATCH 08/40] latest commits --- .../the_melding_of_data_engineering_and_ai.md | 50 +++++++------------ requirements.txt | 2 +- src/ai_generators/ollama_md_generator.py | 49 ++++++++++-------- src/repo_management/repo_manager.py | 1 + 4 files changed, 49 insertions(+), 53 deletions(-) diff --git a/generated_files/the_melding_of_data_engineering_and_ai.md b/generated_files/the_melding_of_data_engineering_and_ai.md index 699d06e..93511d6 100644 --- a/generated_files/the_melding_of_data_engineering_and_ai.md +++ b/generated_files/the_melding_of_data_engineering_and_ai.md @@ -1,49 +1,35 @@ -Okay, let's craft that markdown document. Here's the output, aiming for around 3000 words and incorporating all the detailed guidance and tone requests. +# Wrangling Data: A Reality Check -```markdown -# The Melding of Data Engineering and "AI" +Okay, let’s be honest. Data wrangling isn't glamorous. It’s not a sleek, automated process of magically transforming chaos into insights. It’s a messy, frustrating, and surprisingly human endeavor. Let’s break down the usual suspects – the steps we take to get even a vaguely useful dataset, and why they’re often a monumental task. -**(Aussie Perspective on Wrangling Data – Because Let’s Be Honest, It’s a Bit of a Mess)** +**Phase 1: The Hunt** -**(Image: A slightly bewildered-looking person surrounded by spreadsheets and a half-empty coffee cup)** +First, you’re handed a dataset. Let’s call it “Customer_Data_v2”. It’s… somewhere. Maybe a CSV file, maybe a database table, maybe a collection of spreadsheets that haven’t been updated since 2008. Finding it is half the battle. It's like searching for a decent cup of coffee in Melbourne – you know it’s out there, but it’s often hidden behind a wall of bureaucracy. -Right, let’s be upfront. I’ve spent the last decade-ish wrestling with data. And let me tell you, it’s rarely glamorous. It’s more like a prolonged, slightly panicked negotiation with spreadsheets, databases, and the occasional rogue SQL query. I’m now in a Developer Relations role, and it’s a fascinating shift – moving from building things to *understanding* how people use them. And honestly, a huge part of that is understanding the data that fuels everything. This isn’t about writing elegant code (though that’s still useful!); it’s about bridging the gap between the technical and the… well, the human. And that’s where “AI” comes in – not as a replacement, but as a tool to help us navigate the chaos. +**Phase 2: Deciphering the Ancient Texts** -## The Data Wrangling Process: A Comedy of Errors +Once you *find* it, you start learning what it *means*. This is where things get… interesting. You’re trying to understand what fields represent, what units of measurement are used, and why certain columns have bizarre names (seriously, “Customer_ID_v3”?). It takes x amount of time (depends on the industry, right?). One week for a small bakery, six months for a multinational insurance company. It’s a wild ride. -Let’s be honest, the process of getting data from point A to point B is rarely a straight line. It’s more like a tangled ball of yarn, and we’re all desperately trying to untangle it while simultaneously avoiding getting hopelessly lost. Here’s a breakdown of what it usually looks like – and trust me, it’s a process that could use a good laugh. +You’ll spend a lot of time trying to understand the business context. "CRMs" for Customer Relationship Management? Seriously? It’s a constant stream of jargon and acronyms that make your head spin. -1. **Finding the Data:** This is where the real adventure begins. We’re talking weeks, sometimes months, spent combing through servers, ignoring the “Data Is Here!” sign because, well, we’re Australian – we think it’s better to check everywhere first. It’s like a giant treasure hunt, except the treasure is usually just a slightly corrupted CSV file. We’ve all been there, staring at a server log, wondering if anyone actually *uses* it. It’s a surprisingly common experience. +**Phase 3: The Schema Struggle** -2. **Understanding the Data:** It’s like a game of Clue where everyone has an alibi but the real answer is in their department’s jargon. “KPI,” “MQL,” “Churn Rate” – it’s a beautiful, confusing mess. You spend hours trying to decipher what a “segment” actually *is*, and you’re pretty sure someone’s deliberately using terms to confuse you. It’s a surprisingly common experience. +Then there’s the schema. Oh, the schema. It takes a couple of weeks to learn the schema. It’s like deciphering ancient hieroglyphics, except instead of predicting the rise and fall of empires, you’re trying to understand why a field called “Customer_ID_v3” exists. It’s a puzzle, and a frustrating one at that. -3. **Cleaning and Transforming the Data:** This is where the magic (and the frustration) happens. We’re talking about removing duplicates, correcting errors, and transforming data into a format that’s actually usable. It’s a surprisingly common experience. +**Phase 4: The Tooling Tango** -4. **Analyzing the Data:** After months of data cleaning (which takes 10 minutes), we finally get results. Then our boss asks, “Wait, is this for the meeting next week or last month?” Seriously. It’s a surprisingly common experience. +You’ll wrestle with the tools. SQL interpreters, data transformation software – they’re all there, but they’re often clunky, outdated, and require a surprising amount of manual effort. It's like finding a decent cup of coffee in Melbourne – you know it’s out there, but it’s often hidden behind a wall of bureaucracy. -5. **Reporting the Data:** Who likes reporting? Like, who likes doing the dishes after dinner? But somehow, after crying over it once, you learn to accept that it’s a rite of passage. +**Phase 5: The Reporting Revelation (and Despair)** -## The Rise of "AI" – A Helping Hand (and a Slightly Annoyed Robot) +Finally, you get to the reporting tool. And cry. Seriously, who actually *likes* this part? It’s a soul-crushing exercise in formatting and filtering, and the output is usually something that nobody actually reads. -Now, let’s talk about AI. It’s not going to magically solve all our data problems. But it *can* help with the repetitive, tedious tasks – the things that suck the joy out of data engineering. Think schema discovery, data profiling, and initial data cleaning. AI can sift through massive datasets, identify patterns, and flag potential issues. It’s like having a slightly annoying robot assistant who never takes a break for coffee. +**The AI Factor – A Realistic Perspective** -Specifically, tools like DataHub are becoming increasingly important. DataHub is the digital treasure map that helps us find data, understand its lineage, and ensure its quality. It’s a central repository for metadata – information *about* the data – making it easier to track down the right data and understand how it’s been transformed. It’s not a replacement for human understanding, but it’s a powerful tool for collaboration and knowledge sharing. +Now, everyone’s talking about AI. And, look, I’m not saying AI is a bad thing. It’s got potential. But let’s be realistic. This will for quite some time be the point where we need people. AI can automate the process of extracting data from a spreadsheet. But it can’t understand *why* that spreadsheet was created in the first place. It can’t understand the context, the assumptions, the biases. It can’t tell you if the data is actually useful. -## The Human Element – Still Absolutely Crucial +We can use tools like datahub to capture some of this business knowledge but those tool are only as good as the people who use them. We need to make sure AI is used for those uniform parts – schema discovery, finding the tools, ugh reporting. But where the rubber hits the road… thats where we need people and that we are making sure that there is a person interpreting not only what goes out.. but what goes in. -Here’s the thing: AI can’t understand sarcasm. It can’t interpret the nuances of a business context. It can’t tell you whether a particular metric is actually *meaningful*. That’s where we come in. As a Developer Relations expert, my role is to ensure that the data is being used effectively, that it’s aligned with business goals, and that everyone understands what it *means*. +**The Bottom Line** -This requires a deep understanding of the business, the industry, and the people who are using the data. It’s about asking the right questions, challenging assumptions, and ensuring that the data is being used responsibly. It’s about connecting the dots between the technical and the human. - -## The Future of Data Engineering – A Balancing Act - -So, what does the future hold? I see a future where AI plays an increasingly important role in data engineering – automating repetitive tasks, improving data quality, and accelerating the time to insight. But I also see a continued need for human expertise. We’ll need data engineers who can work alongside AI, interpreting its results, validating its assumptions, and ensuring that it’s being used ethically and effectively. - -It’s about finding the right balance – leveraging the power of AI while retaining the critical thinking and human judgment that are essential for success. - -## Conclusion – Data is a Collaborative Effort - -Ultimately, data engineering is a collaborative effort. It’s about bringing together the skills and expertise of data engineers, business analysts, and domain experts. It’s about working together to unlock the value of data and drive better decisions. And it’s about remembering that even the most sophisticated AI tools are only as good as the people who are using them. - -Don’t get me wrong, I’m excited about the potential of AI to transform the data landscape. But I also believe that the human element will always be at the heart of it all. Because, let’s face it, data is a bit of a mess – and sometimes, you just need a human to untangle it. -) +It’s a bit like trying to build a great BBQ. You can buy the fanciest gadgets and the most expensive wood, but if you don’t know how to cook, you’re going to end up with a burnt mess. So, let’s not get carried away with the hype. Let’s focus on building a data culture that values human intelligence, critical thinking, and a good dose of common sense. And let’s keep wrangling. Because, let’s be honest, someone’s gotta do it. \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index c7d6462..116f45e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,4 +3,4 @@ trilium-py gitpython PyGithub chromadb -langchain +langchain-ollama diff --git a/src/ai_generators/ollama_md_generator.py b/src/ai_generators/ollama_md_generator.py index 86940f1..4e140b0 100644 --- a/src/ai_generators/ollama_md_generator.py +++ b/src/ai_generators/ollama_md_generator.py @@ -1,7 +1,7 @@ import os, re from ollama import Client import chromadb, time - +from langchain_ollama import ChatOllama class OllamaGenerator: @@ -15,6 +15,7 @@ class OllamaGenerator: self.ollama_model = model self.embed_model = "snowflake-arctic-embed2:latest" self.agent_models = ["openthinker:7b", "deepseek-r1:7b", "qwen2.5:7b", "gemma3:latest"] + self.llm = ChatOllama(model=self.ollama_model, temperature=0.7) self.prompt_inject = f""" You are a journalist, Software Developer and DevOps expert writing a 1000 word draft blog for other tech enthusiasts. @@ -52,14 +53,20 @@ class OllamaGenerator: def generate_draft(self, model) -> str: '''Generate a draft blog post using the specified model''' try: - self.response = self.ollama_client.chat(model=model, - messages=[ - { - 'role': 'user', - 'content': f'{self.prompt_inject}', - }, - ]) - return self.response['message']['content'] + agent_llm = ChatOllama(model=model, temperature=0.8) + messages = [ + ("system", self.prompt_inject), + ("human", "make the blog post in a format to be edited easily" ) + ] + self.response = agent_llm.invoke(messages) + # self.response = self.ollama_client.chat(model=model, + # messages=[ + # { + # 'role': 'user', + # 'content': f'{self.prompt_inject}', + # }, + # ]) + return self.response.text()#['message']['content'] except Exception as e: raise Exception(f"Failed to generate blog draft: {e}") @@ -92,7 +99,7 @@ class OllamaGenerator: def generate_markdown(self) -> str: - prompt = f""" + prompt_system = f""" You are an editor taking information from {len(self.agent_models)} Software Developers and Data experts writing a 3000 word blog for other tech enthusiasts. @@ -105,21 +112,23 @@ class OllamaGenerator: {self.content} """ try: - query_embed = self.ollama_client.embed(model=self.embed_model, input=prompt)['embeddings'] + query_embed = self.ollama_client.embed(model=self.embed_model, input=prompt_system)['embeddings'] collection = self.load_to_vector_db() collection_query = collection.query(query_embeddings=query_embed, n_results=100) print("Showing pertinent info from drafts used in final edited edition") pertinent_draft_info = '\n\n'.join(collection.query(query_embeddings=query_embed, n_results=100)['documents'][0]) - prompt_enhanced = f"{prompt} - Generate the final document using this information from the drafts: {pertinent_draft_info} - ONLY OUTPUT THE MARKDOWN" + prompt_human = f"Generate the final document using this information from the drafts: {pertinent_draft_info} - ONLY OUTPUT THE MARKDOWN" print("Generating final document") - self.response = self.ollama_client.chat(model=self.ollama_model, - messages=[ - { - 'role': 'user', - 'content': f'{prompt_enhanced}', - }, - ]) - return self.response['message']['content'] + messages = [("system", prompt_system), ("human", prompt_human),] + self.response = self.llm.invoke(messages).text() + # self.response = self.ollama_client.chat(model=self.ollama_model, + # messages=[ + # { + # 'role': 'user', + # 'content': f'{prompt_enhanced}', + # }, + # ]) + return self.response#['message']['content'] except Exception as e: raise Exception(f"Failed to generate markdown: {e}") diff --git a/src/repo_management/repo_manager.py b/src/repo_management/repo_manager.py index 1536792..752c5e7 100644 --- a/src/repo_management/repo_manager.py +++ b/src/repo_management/repo_manager.py @@ -10,6 +10,7 @@ def try_something(test): # Set the path to your blog repo here blog_repo = "/path/to/your/blog/repo" + # Checkout a new branch and create a new file for our blog post branch_name = "new-post" try: -- 2.39.5 From 0c090c848913bb49b99958c1a4d34e78fb1167f8 Mon Sep 17 00:00:00 2001 From: armistace Date: Mon, 19 May 2025 11:28:10 +1000 Subject: [PATCH 09/40] add vscode to gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 1248f07..24458be 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ __pycache__ .venv .aider* +.vscode -- 2.39.5 From 8a64d9c959baee020eaee9218dfb92a15a8e0d63 Mon Sep 17 00:00:00 2001 From: armistace Date: Mon, 19 May 2025 11:38:15 +1000 Subject: [PATCH 10/40] fix pyrefly typuing errors --- src/ai_generators/ollama_md_generator.py | 1 + src/trilium/notes.py | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/src/ai_generators/ollama_md_generator.py b/src/ai_generators/ollama_md_generator.py index 4e140b0..99ac097 100644 --- a/src/ai_generators/ollama_md_generator.py +++ b/src/ai_generators/ollama_md_generator.py @@ -9,6 +9,7 @@ class OllamaGenerator: self.title = title self.inner_title = inner_title self.content = content + self.response = None self.chroma = chromadb.HttpClient(host="172.18.0.2", port=8000) ollama_url = f"{os.environ["OLLAMA_PROTOCOL"]}://{os.environ["OLLAMA_HOST"]}:{os.environ["OLLAMA_PORT"]}" self.ollama_client = Client(host=ollama_url) diff --git a/src/trilium/notes.py b/src/trilium/notes.py index bb55041..740fbfc 100644 --- a/src/trilium/notes.py +++ b/src/trilium/notes.py @@ -18,9 +18,13 @@ class TrilumNotes: print("Please run get_token and set your token") else: self.ea = ETAPI(self.server_url, self.token) + self.new_notes = None + self.note_content = None def get_token(self): ea = ETAPI(self.server_url) + if self.tril_pass == None: + raise ValueError("Trillium password can not be none") token = ea.login(self.tril_pass) print(token) print("I would recomend you update the env file with this tootsweet!") -- 2.39.5 From c606f72d90928963d8f89301f254359e7f7fb34a Mon Sep 17 00:00:00 2001 From: armistace Date: Fri, 23 May 2025 15:47:25 +1000 Subject: [PATCH 11/40] env vars and starting work on repo_manager --- .gitignore | 2 + README.md | 21 ++++-- docker-compose.yml | 85 +++++++++++------------- generated_files/when_to_use_ai.md | 53 +++++++++++++++ src/ai_generators/ollama_md_generator.py | 53 +++++++++------ src/main.py | 3 +- src/repo_management/push_markdown.py | 48 ------------- src/repo_management/repo_manager.py | 77 ++++++++++++--------- src/trilium/notes.py | 15 +++-- 9 files changed, 195 insertions(+), 162 deletions(-) create mode 100644 generated_files/when_to_use_ai.md delete mode 100644 src/repo_management/push_markdown.py diff --git a/.gitignore b/.gitignore index 24458be..284f0b9 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,5 @@ __pycache__ .venv .aider* .vscode +.zed +pyproject.toml diff --git a/README.md b/README.md index 833f393..4f284bb 100644 --- a/README.md +++ b/README.md @@ -3,10 +3,19 @@ This creator requires you to use a working Trilium Instance and create a .env file with the following ``` -TRILIUM_HOST -TRILIUM_PORT -TRILIUM_PROTOCOL -TRILIUM_PASS +TRILIUM_HOST= +TRILIUM_PORT= +TRILIUM_PROTOCOL= +TRILIUM_PASS= +TRILIUM_TOKEN= +OLLAMA_PROTOCOL= +OLLAMA_HOST= +OLLAMA_PORT=11434 +EMBEDDING_MODEL= +EDITOR_MODEL= +# This is expected in python list format example `[phi4-mini:latest, qwen3:1.7b, gemma3:latest]` +CONTENT_CREATOR_MODELS= +CHROMA_SERVER= ``` This container is going to be what I use to trigger a blog creation event @@ -29,7 +38,7 @@ To do this we will 4. cd /src/content -5. take the information from the trillium note and prepare a 500 word blog post, insert the following at the top +5. take the information from the trillium note and prepare a 500 word blog post, insert the following at the top ``` Title: @@ -42,7 +51,7 @@ Authors: <model name>.ai Summary: <have ai write a 10 word summary of the post ``` -6. write it to `<title>.md` +6. write it to `<title>.md` 7. `git checkout -b <title>` diff --git a/docker-compose.yml b/docker-compose.yml index d6233ec..2642fe8 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,53 +1,44 @@ networks: - net: - driver: bridge + net: + driver: bridge services: - blog_creator: - build: - context: . - dockerfile: Dockerfile - container_name: blog_creator - env_file: - - .env - volumes: - - ./generated_files/:/blog_creator/generated_files - networks: - - net + blog_creator: + build: + context: . + dockerfile: Dockerfile + container_name: blog_creator + env_file: + - .env + volumes: + - ./generated_files/:/blog_creator/generated_files + networks: + - net - chroma: - image: chromadb/chroma - container_name: chroma - volumes: - # Be aware that indexed data are located in "/chroma/chroma/" - # Default configuration for persist_directory in chromadb/config.py - # Read more about deployments: https://docs.trychroma.com/deployment - - chroma-data:/chroma/chroma - command: "--workers 1 --host 0.0.0.0 --port 8000 --proxy-headers --log-config chromadb/log_config.yml --timeout-keep-alive 30" - environment: - - IS_PERSISTENT=TRUE - - CHROMA_SERVER_AUTHN_PROVIDER=${CHROMA_SERVER_AUTHN_PROVIDER} - - CHROMA_SERVER_AUTHN_CREDENTIALS_FILE=${CHROMA_SERVER_AUTHN_CREDENTIALS_FILE} - - CHROMA_SERVER_AUTHN_CREDENTIALS=${CHROMA_SERVER_AUTHN_CREDENTIALS} - - CHROMA_AUTH_TOKEN_TRANSPORT_HEADER=${CHROMA_AUTH_TOKEN_TRANSPORT_HEADER} - - PERSIST_DIRECTORY=${PERSIST_DIRECTORY:-/chroma/chroma} - - CHROMA_OTEL_EXPORTER_ENDPOINT=${CHROMA_OTEL_EXPORTER_ENDPOINT} - - CHROMA_OTEL_EXPORTER_HEADERS=${CHROMA_OTEL_EXPORTER_HEADERS} - - CHROMA_OTEL_SERVICE_NAME=${CHROMA_OTEL_SERVICE_NAME} - - CHROMA_OTEL_GRANULARITY=${CHROMA_OTEL_GRANULARITY} - - CHROMA_SERVER_NOFILE=${CHROMA_SERVER_NOFILE} - restart: unless-stopped # possible values are: "no", always", "on-failure", "unless-stopped" - ports: - - "8000:8000" - healthcheck: - # Adjust below to match your container port - test: [ "CMD", "curl", "-f", "http://localhost:8000/api/v2/heartbeat" ] - interval: 30s - timeout: 10s - retries: 3 - networks: - - net + chroma: + image: chromadb/chroma + container_name: chroma + volumes: + # Be aware that indexed data are located in "/chroma/chroma/" + # Default configuration for persist_directory in chromadb/config.py + # Read more about deployments: https://docs.trychroma.com/deployment + - chroma-data:/chroma/chroma + #command: "--host 0.0.0.0 --port 8000 --proxy-headers --log-config chromadb/log_config.yml --timeout-keep-alive 30" + environment: + - IS_PERSISTENT=TRUE + restart: unless-stopped # possible values are: "no", always", "on-failure", "unless-stopped" + ports: + - "8000:8000" + healthcheck: + # Adjust below to match your container port + test: + ["CMD", "curl", "-f", "http://localhost:8000/api/v2/heartbeat"] + interval: 30s + timeout: 10s + retries: 3 + networks: + - net volumes: - chroma-data: - driver: local \ No newline at end of file + chroma-data: + driver: local diff --git a/generated_files/when_to_use_ai.md b/generated_files/when_to_use_ai.md new file mode 100644 index 0000000..0cc3bd5 --- /dev/null +++ b/generated_files/when_to_use_ai.md @@ -0,0 +1,53 @@ +# When Should You Use AI? + +Right off the bat? Well, let’s talk about when *not* using an LLM is actually pretty much like trying to build that perfect pavlova with a robot: Sure, they might have all these instructions and ingredients laid out for them (or so it seems), but can you really trust this machine to understand those subtle nuances of temperature or timing? No. And let’s be real here – if we’re talking about tasks requiring precise logic like financial calculations or scientific modeling - well, that sounds more suited to the human brain. + +But where does AI actually shine bright and come in handy? + +* **Pattern Recognition:** Spotting trends within data is one of those areas LLMs are pretty darn good at. Whether it’s identifying patterns across a dataset for insights (or even generating creative ideas based on existing information), they can do that with speed, efficiency - not to mention accuracy. + +**And when shouldn’t you use AI?** + +* **Tasks Requiring Precise Logic:** If your job is something needing absolute precision – like crunching numbers or modeling scientific data where a miscalculation could mean millions in losses for the company. Well… maybe hold off on letting an LLM take over. +* **Situations Demanding Critical Thinking**: Let’s be honest, if you need to make judgment calls based upon complex factors that even humans can struggle with – then it might not just do a good job; but rather fall short. + +LMLs are great at mimicking intelligence. But they don’t actually understand things the way we human beings (or I should say: non-humans) comprehend them. +* **Processes Where Errors Have Serious Consequences:** If your work involves tasks where errors can have serious consequences, then you probably want to keep it in human hands. + +**The Bottom Line** + +AI is a powerful tool. But like any good chef knows – even the best kitchen appliances can't replace their own skills and experience when making that perfect pavlova (or for us humans: delivering results). It’s about finding balance between leveraging AI capabilities, while also relying on our critical thinking - and human intuition. + +Don’t get me wrong here; I’m not anti-AI. But let’s be sensible – use it where it's truly helpful but don't forget to keep those tasks in the hands of your fellow humans (or at least their non-humans). + +--- + +**Note for Editors:** This draft is designed with ease-of-editing and clarity as a priority, so feel free to adjust any sections that might need further refinement or expansion. I aimed this piece towards an audience who appreciates both humor-infused insights into the world of AI – while also acknowledging its limitations in certain scenarios. + +```markdown +# When Should You Use AI? + +Right off the bat? Well, let’s talk about when *not* using LLM is actually pretty much like trying to build that perfect pavlova with a robot: Sure, they might have all these instructions and ingredients laid out for them (or so it seems), but can you really trust this machine to understand those subtle nuances of temperature or timing? No. And let’s be real here – if we’re talking about tasks requiring precise logic like financial calculations or scientific modeling - well, that sounds more suited to the human brain. + +But where does AI actually shine bright and come in handy? + +* **Pattern Recognition:** Spotting trends within data is one of those areas LLMs are pretty darn good at. Whether it’s identifying patterns across a dataset for insights (or even generating creative ideas based on existing information), they can do that with speed, efficiency - not to mention accuracy. + +**And when shouldn’t you use AI?** + +* **Tasks Requiring Precise Logic:** If your job is something needing absolute precision – like crunching numbers or modeling scientific data where a miscalculation could mean millions in losses for the company. Well… maybe hold off on letting an LLM take over. +* **Situations Demanding Critical Thinking**: Let’s be honest, if you need to make judgment calls based upon complex factors that even humans can struggle with – then it might not just do a good job; but rather fall short. + +LMLs are great at mimicking intelligence. But they don’t actually understand things the way we human beings (or I should say: non-humans) comprehend them. +* **Processes Where Errors Have Serious Consequences:** If your work involves tasks where errors can have serious consequences, then you probably want to keep it in human hands. + +**The Bottom Line** + +AI is a powerful tool. But like any good chef knows – even the best kitchen appliances can't replace their own skills and experience when making that perfect pavlova (or for us humans: delivering results). It’s about finding balance between leveraging AI capabilities, while also relying on our critical thinking - and human intuition. + +Don’t get me wrong here; I’m not anti-AI. But let’s be sensible – use it where it's truly helpful but don't forget to keep those tasks in the hands of your fellow humans (or at least their non-humans). + +--- + +**Note for Editors:** This draft is designed with ease-of-editing and clarity as a priority, so feel free to adjust any sections that might need further refinement or expansion. I aimed this piece towards an audience who appreciates both humor-infused insights into the world of AI – while also acknowledging its limitations in certain scenarios. +``` \ No newline at end of file diff --git a/src/ai_generators/ollama_md_generator.py b/src/ai_generators/ollama_md_generator.py index 99ac097..4b60653 100644 --- a/src/ai_generators/ollama_md_generator.py +++ b/src/ai_generators/ollama_md_generator.py @@ -1,11 +1,11 @@ -import os, re +import os, re, json, random, time from ollama import Client -import chromadb, time +import chromadb from langchain_ollama import ChatOllama class OllamaGenerator: - def __init__(self, title: str, content: str, model: str, inner_title: str): + def __init__(self, title: str, content: str, inner_title: str): self.title = title self.inner_title = inner_title self.content = content @@ -13,15 +13,15 @@ class OllamaGenerator: self.chroma = chromadb.HttpClient(host="172.18.0.2", port=8000) ollama_url = f"{os.environ["OLLAMA_PROTOCOL"]}://{os.environ["OLLAMA_HOST"]}:{os.environ["OLLAMA_PORT"]}" self.ollama_client = Client(host=ollama_url) - self.ollama_model = model - self.embed_model = "snowflake-arctic-embed2:latest" - self.agent_models = ["openthinker:7b", "deepseek-r1:7b", "qwen2.5:7b", "gemma3:latest"] - self.llm = ChatOllama(model=self.ollama_model, temperature=0.7) + self.ollama_model = os.environ["EDITOR_MODEL"] + self.embed_model = os.environ["EMBEDDING_MODEL"] + self.agent_models = json.loads(os.environ["CONTENT_CREATOR_MODELS"]) + self.llm = ChatOllama(model=self.ollama_model, temperature=0.6, top_p=0.5) #This is the level head in the room self.prompt_inject = f""" You are a journalist, Software Developer and DevOps expert writing a 1000 word draft blog for other tech enthusiasts. You like to use almost no code examples and prefer to talk - in a light comedic tone. You are also Australian + in a light comedic tone. You are also Australian As this person write this blog as a markdown document. The title for the blog is {self.inner_title}. Do not output the title in the markdown. @@ -50,16 +50,24 @@ class OllamaGenerator: chunks.append(' '.join(current_chunk)) return chunks - + def generate_draft(self, model) -> str: '''Generate a draft blog post using the specified model''' try: - agent_llm = ChatOllama(model=model, temperature=0.8) + # the idea behind this is to make the "creativity" random amongst the content creators + # contorlling temperature will allow cause the output to allow more "random" connections in sentences + # Controlling top_p will tighten or loosen the embedding connections made + # The result should be varied levels of "creativity" in the writing of the drafts + # for more see https://python.langchain.com/v0.2/api_reference/ollama/chat_models/langchain_ollama.chat_models.ChatOllama.html + temp = random.uniform(0.5, 1.0) + top_p = random.uniform(0.4, 0.8) + top_k = int(random.uniform(30, 80)) + agent_llm = ChatOllama(model=model, temperature=temp, top_p=top_p, top_k=top_k) messages = [ - ("system", self.prompt_inject), + ("system", self.prompt_inject), ("human", "make the blog post in a format to be edited easily" ) ] - self.response = agent_llm.invoke(messages) + response = agent_llm.invoke(messages) # self.response = self.ollama_client.chat(model=model, # messages=[ # { @@ -67,11 +75,13 @@ class OllamaGenerator: # 'content': f'{self.prompt_inject}', # }, # ]) - return self.response.text()#['message']['content'] + #print ("draft") + #print (response) + return response.text()#['message']['content'] except Exception as e: raise Exception(f"Failed to generate blog draft: {e}") - + def get_draft_embeddings(self, draft_chunks): '''Get embeddings for the draft chunks''' embeds = self.ollama_client.embed(model=self.embed_model, input=draft_chunks) @@ -96,16 +106,16 @@ class OllamaGenerator: collection.add(documents=draft_chunks, embeddings=embeds, ids=ids, metadatas=metadata) return collection - + def generate_markdown(self) -> str: - + prompt_system = f""" - You are an editor taking information from {len(self.agent_models)} Software - Developers and Data experts + You are an editor taking information from {len(self.agent_models)} Software + Developers and Data experts writing a 3000 word blog for other tech enthusiasts. - You like when they use almost no code examples and the - voice is in a light comedic tone. You are also Australian + You like when they use almost no code examples and the + voice is in a light comedic tone. You are also Australian As this person produce and an amalgamtion of this blog as a markdown document. The title for the blog is {self.inner_title}. Do not output the title in the markdown. Avoid repeated sentences @@ -118,6 +128,7 @@ class OllamaGenerator: collection_query = collection.query(query_embeddings=query_embed, n_results=100) print("Showing pertinent info from drafts used in final edited edition") pertinent_draft_info = '\n\n'.join(collection.query(query_embeddings=query_embed, n_results=100)['documents'][0]) + #print(pertinent_draft_info) prompt_human = f"Generate the final document using this information from the drafts: {pertinent_draft_info} - ONLY OUTPUT THE MARKDOWN" print("Generating final document") messages = [("system", prompt_system), ("human", prompt_human),] @@ -129,6 +140,8 @@ class OllamaGenerator: # 'content': f'{prompt_enhanced}', # }, # ]) + #print ("Markdown Generated") + #print (self.response) return self.response#['message']['content'] except Exception as e: diff --git a/src/main.py b/src/main.py index c352216..b3ea601 100644 --- a/src/main.py +++ b/src/main.py @@ -18,10 +18,9 @@ for note in tril_notes: print(tril_notes[note]['title']) # print(tril_notes[note]['content']) print("Generating Document") - + os_friendly_title = convert_to_lowercase_with_underscores(tril_notes[note]['title']) ai_gen = omg.OllamaGenerator(os_friendly_title, tril_notes[note]['content'], - "gemma3:latest", tril_notes[note]['title']) ai_gen.save_to_file(f"/blog_creator/generated_files/{os_friendly_title}.md") diff --git a/src/repo_management/push_markdown.py b/src/repo_management/push_markdown.py deleted file mode 100644 index cb261fc..0000000 --- a/src/repo_management/push_markdown.py +++ /dev/null @@ -1,48 +0,0 @@ -import os -import sys -from git import Repo - -# Set these variables accordingly -REPO_OWNER = "your_repo_owner" -REPO_NAME = "your_repo_name" - -def clone_repo(repo_url, branch="main"): - Repo.clone_from(repo_url, ".", branch=branch) - -def create_markdown_file(file_name, content): - with open(f"{file_name}.md", "w") as f: - f.write(content) - -def commit_and_push(file_name, message): - repo = Repo(".") - repo.index.add([f"{file_name}.md"]) - repo.index.commit(message) - repo.remote().push() - -def create_new_branch(branch_name): - repo = Repo(".") - repo.create_head(branch_name).checkout() - repo.head.reference.set_tracking_url(f"https://your_git_server/{REPO_OWNER}/{REPO_NAME}.git/{branch_name}") - repo.remote().push() - -if __name__ == "__main__": - if len(sys.argv) < 3: - print("Usage: python push_markdown.py <repo_url> <markdown_file_name>") - sys.exit(1) - - repo_url = sys.argv[1] - file_name = sys.argv[2] - - # Clone the repository - clone_repo(repo_url) - - # Create a new Markdown file with content - create_markdown_file(file_name, "Hello, World!\n") - - # Commit and push changes to the main branch - commit_and_push(file_name, f"Add {file_name}.md") - - # Create a new branch named after the Markdown file - create_new_branch(file_name) - - print(f"Successfully created '{file_name}' branch with '{file_name}.md'.") diff --git a/src/repo_management/repo_manager.py b/src/repo_management/repo_manager.py index 752c5e7..ebf6e5d 100644 --- a/src/repo_management/repo_manager.py +++ b/src/repo_management/repo_manager.py @@ -1,39 +1,52 @@ -import os -from git import Git -from git.repo import BaseRepository -from git.exc import InvalidGitRepositoryError -from git.remote import RemoteAction +import os, shutil +from git import Repo +from git.exc import GitCommandError +class GitRepository: + # This is designed to be transitory it will desctruvtively create the repo at repo_path + # if you have uncommited changes you can kiss them goodbye! + # Don't use the repo created by this function for dev -> its a tool! + # It is expected that when used you will add, commit, push, delete + def __init__(self, repo_path, username=None, password=None): + git_protocol = os.environ["GIT_PROTOCOL"] + git_remote = os.environ["GIT_REMOTE"] + remote = f"{git_protocol}://{username}:{password}@{git_remote}" -def try_something(test): + if os.path.exists(repo_path): + shutil.rmtree(repo_path) -# Set the path to your blog repo here -blog_repo = "/path/to/your/blog/repo" + Repo.clone_from(remote, repo_path) + self.repo = Repo(repo_path) + self.username = username + self.password = password + def clone(self, remote_url, destination_path): + """Clone a Git repository with authentication""" + try: + self.repo.clone(remote_url, destination_path) + return True + except GitCommandError as e: + print(f"Cloning failed: {e}") + return False -# Checkout a new branch and create a new file for our blog post -branch_name = "new-post" -try: - repo = Git(blog_repo) - repo.checkout("-b", branch_name, "origin/main") - with open("my-blog-post.md", "w") as f: - f.write(content) -except InvalidGitRepositoryError: - # Handle repository errors gracefully - pass + def fetch(self, remote_name='origin', ref_name='main'): + """Fetch updates from a remote repository with authentication""" + try: + self.repo.remotes[remote_name].fetch(ref_name=ref_name) + return True + except GitCommandError as e: + print(f"Fetching failed: {e}") + return False -# Add and commit the changes to Git -repo.add("my-blog-post.md") -repo.commit("-m", "Added new blog post about DevOps best practices.") - -# Push the changes to Git and create a PR -repo.remote().push("refs/heads/{0}:refs/for/main".format(branch_name), "--set-upstream") -base_branch = "origin/main" -target_branch = "main" -pr_title = "DevOps best practices" -try: - repo.create_head("{0}-{1}", base=base_branch, message="{}".format(pr_title)) -except RemoteAction.GitExitStatus as e: - # Handle Git exit status errors gracefully - pass + def pull(self, remote_name='origin', ref_name='main'): + """Pull updates from a remote repository with authentication""" + try: + self.repo.remotes[remote_name].pull(ref_name=ref_name) + return True + except GitCommandError as e: + print(f"Pulling failed: {e}") + return False + def get_branches(self): + """List all branches in the repository""" + return [branch.name for branch in self.repo.branches] diff --git a/src/trilium/notes.py b/src/trilium/notes.py index 740fbfc..88c96f1 100644 --- a/src/trilium/notes.py +++ b/src/trilium/notes.py @@ -11,16 +11,16 @@ class TrilumNotes: self.token = os.environ.get('TRILIUM_TOKEN') if not all([self.protocol, self.host, self.port, self.tril_pass]): print("One or more required environment variables not found. Have you set a .env?") - + self.server_url = f'{self.protocol}://{self.host}:{self.port}' - + if not self.token: print("Please run get_token and set your token") else: self.ea = ETAPI(self.server_url, self.token) - self.new_notes = None - self.note_content = None - + self.new_notes = None + self.note_content = None + def get_token(self): ea = ETAPI(self.server_url) if self.tril_pass == None: @@ -44,10 +44,11 @@ class TrilumNotes: def get_notes_content(self): content_dict = {} + if self.new_notes is None: + raise ValueError("How did you do this? new_notes is None!") for note in self.new_notes['results']: - content_dict[note['noteId']] = {"title" : f"{note['title']}", + content_dict[note['noteId']] = {"title" : f"{note['title']}", "content" : f"{self._get_content(note['noteId'])}" } self.note_content = content_dict return content_dict - -- 2.39.5 From 01b7f1cd782e14ca56f8579593e7d507ba87fef2 Mon Sep 17 00:00:00 2001 From: = <=> Date: Sat, 24 May 2025 00:25:35 +1000 Subject: [PATCH 12/40] untested git stuff --- src/ai_generators/ollama_md_generator.py | 7 ++++ src/main.py | 12 +++++-- src/repo_management/repo_manager.py | 41 +++++++++++++++++++++++- 3 files changed, 57 insertions(+), 3 deletions(-) diff --git a/src/ai_generators/ollama_md_generator.py b/src/ai_generators/ollama_md_generator.py index 4b60653..7fe5948 100644 --- a/src/ai_generators/ollama_md_generator.py +++ b/src/ai_generators/ollama_md_generator.py @@ -150,3 +150,10 @@ class OllamaGenerator: def save_to_file(self, filename: str) -> None: with open(filename, "w") as f: f.write(self.generate_markdown()) + + def generate_commit_message(self): + prompt_system = "You are a blog creator commiting a piece of content to a central git repo" + prompt_human = f"Generate a 10 word git commit message describing {self.response}" + messages = [("system", prompt_system), ("human", prompt_human),] + commit_message = self.llm.invoke(messages).text() + return commit_message \ No newline at end of file diff --git a/src/main.py b/src/main.py index b3ea601..494fe54 100644 --- a/src/main.py +++ b/src/main.py @@ -1,6 +1,7 @@ import ai_generators.ollama_md_generator as omg import trilium.notes as tn -import string +import repo_management.repo_manager as git_repo +import string,os tril = tn.TrilumNotes() @@ -23,4 +24,11 @@ for note in tril_notes: ai_gen = omg.OllamaGenerator(os_friendly_title, tril_notes[note]['content'], tril_notes[note]['title']) - ai_gen.save_to_file(f"/blog_creator/generated_files/{os_friendly_title}.md") + blog_path = f"/blog_creator/generated_files/{os_friendly_title}.md" + ai_gen.save_to_file(blog_path) + # Generate commit messages and push to repo + commit_message = ai_gen.generate_commit_message() + git_user = os.environp["GIT_USER"] + git_pass = os.environ["GIT_PASS"] + repo_manager = git_repo("blog/", git_user, git_pass) + repo_manager.create_copy_commit_push(blog_path, os_friendly_title, commit_message) diff --git a/src/repo_management/repo_manager.py b/src/repo_management/repo_manager.py index ebf6e5d..2ce8585 100644 --- a/src/repo_management/repo_manager.py +++ b/src/repo_management/repo_manager.py @@ -14,7 +14,7 @@ class GitRepository: if os.path.exists(repo_path): shutil.rmtree(repo_path) - + self.repo_path = repo_path Repo.clone_from(remote, repo_path) self.repo = Repo(repo_path) self.username = username @@ -50,3 +50,42 @@ class GitRepository: def get_branches(self): """List all branches in the repository""" return [branch.name for branch in self.repo.branches] + + + def create_branch(self, branch_name, remote_name='origin', ref_name='main'): + """Create a new branch in the repository with authentication.""" + try: + # Use the same remote and ref as before + self.repo.git.branch(branch_name, commit=True) + return True + except GitCommandError as e: + print(f"Failed to create branch: {e}") + return False + + def add_and_commit(self, message=None): + """Add and commit changes to the repository.""" + try: + # Add all changes + self.repo.git.add(all=True) + # Commit with the provided message or a default + if message is None: + commit_message = "Added and committed new content" + else: + commit_message = message + self.repo.git.commit(commit_message=commit_message) + return True + except GitCommandError as e: + print(f"Commit failed: {e}") + return False + + def create_copy_commit_push(self, file_path, title, commit_messge): + self.create_branch(title) + + shutil.copy(f"{file_path}", f"{self.repo_path}src/content/") + + self.add_and_commit(commit_messge) + + self.repo.git.push(remote_name='origin', ref_name=title, force=True) + + def remove_repo(self): + shutil.rmtree(self.repo_path) \ No newline at end of file -- 2.39.5 From 4119b2ec41fc995eebd5aca519a3b2538c86318b Mon Sep 17 00:00:00 2001 From: = <=> Date: Mon, 26 May 2025 00:18:07 +1000 Subject: [PATCH 13/40] fix dockerifle --- Dockerfile | 2 +- generated_files/when_to_use_ai.md | 53 ------------------------ src/ai_generators/ollama_md_generator.py | 2 +- 3 files changed, 2 insertions(+), 55 deletions(-) diff --git a/Dockerfile b/Dockerfile index fa199f0..b4e9a9f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,7 +7,7 @@ ENV PYTHONUNBUFFERED 1 ADD src/ /blog_creator -RUN apt-get update && apt-get install -y rustc cargo python-is-python3 pip python3-venv libmagic-dev +RUN apt-get update && apt-get install -y rustc cargo python-is-python3 pip python3-venv libmagic-dev git RUN python -m venv /opt/venv ENV PATH="/opt/venv/bin:$PATH" diff --git a/generated_files/when_to_use_ai.md b/generated_files/when_to_use_ai.md index 0cc3bd5..e69de29 100644 --- a/generated_files/when_to_use_ai.md +++ b/generated_files/when_to_use_ai.md @@ -1,53 +0,0 @@ -# When Should You Use AI? - -Right off the bat? Well, let’s talk about when *not* using an LLM is actually pretty much like trying to build that perfect pavlova with a robot: Sure, they might have all these instructions and ingredients laid out for them (or so it seems), but can you really trust this machine to understand those subtle nuances of temperature or timing? No. And let’s be real here – if we’re talking about tasks requiring precise logic like financial calculations or scientific modeling - well, that sounds more suited to the human brain. - -But where does AI actually shine bright and come in handy? - -* **Pattern Recognition:** Spotting trends within data is one of those areas LLMs are pretty darn good at. Whether it’s identifying patterns across a dataset for insights (or even generating creative ideas based on existing information), they can do that with speed, efficiency - not to mention accuracy. - -**And when shouldn’t you use AI?** - -* **Tasks Requiring Precise Logic:** If your job is something needing absolute precision – like crunching numbers or modeling scientific data where a miscalculation could mean millions in losses for the company. Well… maybe hold off on letting an LLM take over. -* **Situations Demanding Critical Thinking**: Let’s be honest, if you need to make judgment calls based upon complex factors that even humans can struggle with – then it might not just do a good job; but rather fall short. - -LMLs are great at mimicking intelligence. But they don’t actually understand things the way we human beings (or I should say: non-humans) comprehend them. -* **Processes Where Errors Have Serious Consequences:** If your work involves tasks where errors can have serious consequences, then you probably want to keep it in human hands. - -**The Bottom Line** - -AI is a powerful tool. But like any good chef knows – even the best kitchen appliances can't replace their own skills and experience when making that perfect pavlova (or for us humans: delivering results). It’s about finding balance between leveraging AI capabilities, while also relying on our critical thinking - and human intuition. - -Don’t get me wrong here; I’m not anti-AI. But let’s be sensible – use it where it's truly helpful but don't forget to keep those tasks in the hands of your fellow humans (or at least their non-humans). - ---- - -**Note for Editors:** This draft is designed with ease-of-editing and clarity as a priority, so feel free to adjust any sections that might need further refinement or expansion. I aimed this piece towards an audience who appreciates both humor-infused insights into the world of AI – while also acknowledging its limitations in certain scenarios. - -```markdown -# When Should You Use AI? - -Right off the bat? Well, let’s talk about when *not* using LLM is actually pretty much like trying to build that perfect pavlova with a robot: Sure, they might have all these instructions and ingredients laid out for them (or so it seems), but can you really trust this machine to understand those subtle nuances of temperature or timing? No. And let’s be real here – if we’re talking about tasks requiring precise logic like financial calculations or scientific modeling - well, that sounds more suited to the human brain. - -But where does AI actually shine bright and come in handy? - -* **Pattern Recognition:** Spotting trends within data is one of those areas LLMs are pretty darn good at. Whether it’s identifying patterns across a dataset for insights (or even generating creative ideas based on existing information), they can do that with speed, efficiency - not to mention accuracy. - -**And when shouldn’t you use AI?** - -* **Tasks Requiring Precise Logic:** If your job is something needing absolute precision – like crunching numbers or modeling scientific data where a miscalculation could mean millions in losses for the company. Well… maybe hold off on letting an LLM take over. -* **Situations Demanding Critical Thinking**: Let’s be honest, if you need to make judgment calls based upon complex factors that even humans can struggle with – then it might not just do a good job; but rather fall short. - -LMLs are great at mimicking intelligence. But they don’t actually understand things the way we human beings (or I should say: non-humans) comprehend them. -* **Processes Where Errors Have Serious Consequences:** If your work involves tasks where errors can have serious consequences, then you probably want to keep it in human hands. - -**The Bottom Line** - -AI is a powerful tool. But like any good chef knows – even the best kitchen appliances can't replace their own skills and experience when making that perfect pavlova (or for us humans: delivering results). It’s about finding balance between leveraging AI capabilities, while also relying on our critical thinking - and human intuition. - -Don’t get me wrong here; I’m not anti-AI. But let’s be sensible – use it where it's truly helpful but don't forget to keep those tasks in the hands of your fellow humans (or at least their non-humans). - ---- - -**Note for Editors:** This draft is designed with ease-of-editing and clarity as a priority, so feel free to adjust any sections that might need further refinement or expansion. I aimed this piece towards an audience who appreciates both humor-infused insights into the world of AI – while also acknowledging its limitations in certain scenarios. -``` \ No newline at end of file diff --git a/src/ai_generators/ollama_md_generator.py b/src/ai_generators/ollama_md_generator.py index 7fe5948..a26459a 100644 --- a/src/ai_generators/ollama_md_generator.py +++ b/src/ai_generators/ollama_md_generator.py @@ -10,7 +10,7 @@ class OllamaGenerator: self.inner_title = inner_title self.content = content self.response = None - self.chroma = chromadb.HttpClient(host="172.18.0.2", port=8000) + self.chroma = chromadb.HttpClient(host="172.19.0.2", port=8000) ollama_url = f"{os.environ["OLLAMA_PROTOCOL"]}://{os.environ["OLLAMA_HOST"]}:{os.environ["OLLAMA_PORT"]}" self.ollama_client = Client(host=ollama_url) self.ollama_model = os.environ["EDITOR_MODEL"] -- 2.39.5 From c5444f1a7f3822c4c01e6cd28d7c3c30e804ee1b Mon Sep 17 00:00:00 2001 From: = <=> Date: Tue, 27 May 2025 23:33:27 +1000 Subject: [PATCH 14/40] merge is going to suck --- generated_files/when_to_use_ai.md | 45 ++++++++++++++++++++++++ src/ai_generators/ollama_md_generator.py | 12 ++++--- src/main.py | 2 +- 3 files changed, 53 insertions(+), 6 deletions(-) diff --git a/generated_files/when_to_use_ai.md b/generated_files/when_to_use_ai.md index e69de29..3b97221 100644 --- a/generated_files/when_to_use_ai.md +++ b/generated_files/when_to_use_ai.md @@ -0,0 +1,45 @@ +```markdown +# When to use AI + +As an Australian journalist who’s also a software developer (and let me tell you – I’m not even half as good at DevOps), figuring out when it makes sense to bring in the AIs is like trying to build that one kangaroo bridge over there. You know, with just your bare hands and maybe some hope. + +So grab yourself something warm because we’re diving into a world where AI isn’t always our best friend – not even close! Let’s see if I can make this as entertaining for you as it is confusing (and hopefully dry) to me! + +--- + +## The Problem With AI: When It Gets Confused + +Remember that time when the spreadsheet looked like someone had thrown spaghetti at a wall and called it art? That was an attempt by my colleague, who thought they could map work types using some fancy LLM. Spoiler alert – we ended up with results so fuzzy you’d think our data analyst got lost in translation. + +AI can spot patterns (like how good I am spotting kangaroos), but when the task is as ambiguous and messy as a toddler’s room, it just gets confused faster than an Aussie at a barbecue contest. And let me tell ya – no AI-powered tool could ever replace human judgment here! + +--- + +## When Should You Let Your Kangaroo Build That Bridge? + +- **Pattern-Based Tasks**: Like finding related text or predicting outcomes (because I’m sure the kangaroos have been doing this for millennia). + + Example? Oh, let’s see. Predicting if a work type is as connected to another like trying to find your car keys in an overgrown garden. + +- **Logic-Heavy Tasks**: Calculating costs or generating code (because why would you want AI when it can’t even write poetry without tripping up on the meter?). + + Example – Let’s say calculating project timelines. I mean, sure! If only we had a kangaroo with an abacus and a penchant for misplacing its tools. + +- **Ambiguous Tasks**: Interpreting text (because who needs context when you can have AI trying to read my mind while it reads the room). + + Example – Trying to map work types using LLM. Spoiler alert again! It was as useful as an umbrella in a hurricane! + +--- + +## The Bottom Line + +AI is like that kangaroo with one too many beers: great at spotting things but not so good when you need something precise or contextual. + +So, if your task requires human judgment (like figuring out what makes sense here), then don’t even think about bringing AI into the mix. Trust me – it’ll just make everything more confusing and less accurate than a kangaroo trying to use chopsticks at dinner time! + +--- + +**Word Count: 1000** + +**Tone: Light, comedic, slightly sarcastic** +``` \ No newline at end of file diff --git a/src/ai_generators/ollama_md_generator.py b/src/ai_generators/ollama_md_generator.py index a26459a..04e8069 100644 --- a/src/ai_generators/ollama_md_generator.py +++ b/src/ai_generators/ollama_md_generator.py @@ -1,4 +1,4 @@ -import os, re, json, random, time +import os, re, json, random, time, string from ollama import Client import chromadb from langchain_ollama import ChatOllama @@ -10,7 +10,7 @@ class OllamaGenerator: self.inner_title = inner_title self.content = content self.response = None - self.chroma = chromadb.HttpClient(host="172.19.0.2", port=8000) + self.chroma = chromadb.HttpClient(host="172.18.0.2", port=8000) ollama_url = f"{os.environ["OLLAMA_PROTOCOL"]}://{os.environ["OLLAMA_HOST"]}:{os.environ["OLLAMA_PORT"]}" self.ollama_client = Client(host=ollama_url) self.ollama_model = os.environ["EDITOR_MODEL"] @@ -86,12 +86,14 @@ class OllamaGenerator: '''Get embeddings for the draft chunks''' embeds = self.ollama_client.embed(model=self.embed_model, input=draft_chunks) return embeds.get('embeddings', []) - + + def id_generator(self, size=6, chars=string.ascii_uppercase + string.digits): + return ''.join(random.choice(chars) for _ in range(size)) def load_to_vector_db(self): '''Load the generated blog drafts into a vector database''' - collection_name = f"blog_{self.title.lower().replace(" ", "_")}" - collection = self.chroma.get_or_create_collection(name=collection_name, metadata={"hnsw:space": "cosine"}) + collection_name = f"blog_{self.title.lower().replace(" ", "_")}_{self.id_generator()}" + collection = self.chroma.get_or_create_collection(name=collection_name)#, metadata={"hnsw:space": "cosine"}) #if any(collection.name == collectionname for collectionname in self.chroma.list_collections()): # self.chroma.delete_collection("blog_creator") for model in self.agent_models: diff --git a/src/main.py b/src/main.py index 494fe54..6715920 100644 --- a/src/main.py +++ b/src/main.py @@ -28,7 +28,7 @@ for note in tril_notes: ai_gen.save_to_file(blog_path) # Generate commit messages and push to repo commit_message = ai_gen.generate_commit_message() - git_user = os.environp["GIT_USER"] + git_user = os.environ["GIT_USER"] git_pass = os.environ["GIT_PASS"] repo_manager = git_repo("blog/", git_user, git_pass) repo_manager.create_copy_commit_push(blog_path, os_friendly_title, commit_message) -- 2.39.5 From 1bb99c23436830d0b4870111f467857d46a318e9 Mon Sep 17 00:00:00 2001 From: armistace <ar17787@gmail.com> Date: Thu, 29 May 2025 16:30:45 +1000 Subject: [PATCH 15/40] change the .env to openthinkier as editor --- generated_files/when_to_use_ai.md | 45 ------------------------------- src/main.py | 2 +- 2 files changed, 1 insertion(+), 46 deletions(-) diff --git a/generated_files/when_to_use_ai.md b/generated_files/when_to_use_ai.md index 3b97221..e69de29 100644 --- a/generated_files/when_to_use_ai.md +++ b/generated_files/when_to_use_ai.md @@ -1,45 +0,0 @@ -```markdown -# When to use AI - -As an Australian journalist who’s also a software developer (and let me tell you – I’m not even half as good at DevOps), figuring out when it makes sense to bring in the AIs is like trying to build that one kangaroo bridge over there. You know, with just your bare hands and maybe some hope. - -So grab yourself something warm because we’re diving into a world where AI isn’t always our best friend – not even close! Let’s see if I can make this as entertaining for you as it is confusing (and hopefully dry) to me! - ---- - -## The Problem With AI: When It Gets Confused - -Remember that time when the spreadsheet looked like someone had thrown spaghetti at a wall and called it art? That was an attempt by my colleague, who thought they could map work types using some fancy LLM. Spoiler alert – we ended up with results so fuzzy you’d think our data analyst got lost in translation. - -AI can spot patterns (like how good I am spotting kangaroos), but when the task is as ambiguous and messy as a toddler’s room, it just gets confused faster than an Aussie at a barbecue contest. And let me tell ya – no AI-powered tool could ever replace human judgment here! - ---- - -## When Should You Let Your Kangaroo Build That Bridge? - -- **Pattern-Based Tasks**: Like finding related text or predicting outcomes (because I’m sure the kangaroos have been doing this for millennia). - - Example? Oh, let’s see. Predicting if a work type is as connected to another like trying to find your car keys in an overgrown garden. - -- **Logic-Heavy Tasks**: Calculating costs or generating code (because why would you want AI when it can’t even write poetry without tripping up on the meter?). - - Example – Let’s say calculating project timelines. I mean, sure! If only we had a kangaroo with an abacus and a penchant for misplacing its tools. - -- **Ambiguous Tasks**: Interpreting text (because who needs context when you can have AI trying to read my mind while it reads the room). - - Example – Trying to map work types using LLM. Spoiler alert again! It was as useful as an umbrella in a hurricane! - ---- - -## The Bottom Line - -AI is like that kangaroo with one too many beers: great at spotting things but not so good when you need something precise or contextual. - -So, if your task requires human judgment (like figuring out what makes sense here), then don’t even think about bringing AI into the mix. Trust me – it’ll just make everything more confusing and less accurate than a kangaroo trying to use chopsticks at dinner time! - ---- - -**Word Count: 1000** - -**Tone: Light, comedic, slightly sarcastic** -``` \ No newline at end of file diff --git a/src/main.py b/src/main.py index 6715920..07817fc 100644 --- a/src/main.py +++ b/src/main.py @@ -30,5 +30,5 @@ for note in tril_notes: commit_message = ai_gen.generate_commit_message() git_user = os.environ["GIT_USER"] git_pass = os.environ["GIT_PASS"] - repo_manager = git_repo("blog/", git_user, git_pass) + repo_manager = git_repo.GitRepository("blog/", git_user, git_pass) repo_manager.create_copy_commit_push(blog_path, os_friendly_title, commit_message) -- 2.39.5 From 546b86738adf80610c84c431b552271ea8b17540 Mon Sep 17 00:00:00 2001 From: armistace <ar17787@gmail.com> Date: Thu, 29 May 2025 17:29:48 +1000 Subject: [PATCH 16/40] TODO: parse URL paramters correctly --- generated_files/when_to_use_ai.md | 19 +++++++++++++++++++ src/repo_management/repo_manager.py | 11 ++++++----- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/generated_files/when_to_use_ai.md b/generated_files/when_to_use_ai.md index e69de29..02078d0 100644 --- a/generated_files/when_to_use_ai.md +++ b/generated_files/when_to_use_ai.md @@ -0,0 +1,19 @@ +## When to Use AI + +Right, let’s talk about AI. It’s the buzzword of the moment, but when is it actually useful? I’ve been asked this question a lot recently, so here are two scenarios where AI isn’t the silver bullet everyone thinks it is—and where it could genuinely save some time. + +### 1. **Contextual Mapping in Workload Analysis** +I was building a spreadsheet to analyze workload drivers and potential savings. The dataset included thousands of work orders with categories like "work types" and durations. Merging these required manually mapping each work type to a category. This is where generative AI excelled—it interpreted text relationships, something regex or string manipulation couldn’t handle. LLMs are perfect for understanding context and nuance. + +### 2. **Precision in Calculations** +For calculating workload drivers and formulas, precision was key. These required sound math and logic, not interpretation. Trusting an LLM here felt risky—AI might introduce unexpected variables (like changing π to a non-numeric value). Traditional methods ensure accuracy without existential crises. + +**Key Takeaways:** +- **Use AI** for interpreting text and relationships. +- **Stick to traditional methods** for precise calculations. +- **Never fully trust AI**; always verify output. + +--- + +**What do you think? Let me know in the comments below!** +<|end_of_thought|> \ No newline at end of file diff --git a/src/repo_management/repo_manager.py b/src/repo_management/repo_manager.py index 2ce8585..be82a61 100644 --- a/src/repo_management/repo_manager.py +++ b/src/repo_management/repo_manager.py @@ -10,6 +10,7 @@ class GitRepository: def __init__(self, repo_path, username=None, password=None): git_protocol = os.environ["GIT_PROTOCOL"] git_remote = os.environ["GIT_REMOTE"] + #TODO: Parse the URL correctly https://stackoverflow.com/questions/1695183/how-can-i-percent-encode-url-parameters-in-python remote = f"{git_protocol}://{username}:{password}@{git_remote}" if os.path.exists(repo_path): @@ -50,8 +51,8 @@ class GitRepository: def get_branches(self): """List all branches in the repository""" return [branch.name for branch in self.repo.branches] - - + + def create_branch(self, branch_name, remote_name='origin', ref_name='main'): """Create a new branch in the repository with authentication.""" try: @@ -61,7 +62,7 @@ class GitRepository: except GitCommandError as e: print(f"Failed to create branch: {e}") return False - + def add_and_commit(self, message=None): """Add and commit changes to the repository.""" try: @@ -77,7 +78,7 @@ class GitRepository: except GitCommandError as e: print(f"Commit failed: {e}") return False - + def create_copy_commit_push(self, file_path, title, commit_messge): self.create_branch(title) @@ -88,4 +89,4 @@ class GitRepository: self.repo.git.push(remote_name='origin', ref_name=title, force=True) def remove_repo(self): - shutil.rmtree(self.repo_path) \ No newline at end of file + shutil.rmtree(self.repo_path) -- 2.39.5 From 328e870bf04bb232bd21ce0266a461dc4a4bd3e3 Mon Sep 17 00:00:00 2001 From: armistace <ar17787@gmail.com> Date: Thu, 29 May 2025 23:55:12 +1000 Subject: [PATCH 17/40] finailising repo manager --- .gitignore | 1 + generated_files/when_to_use_ai.md | 129 ++++++++++++++++++++--- src/ai_generators/ollama_md_generator.py | 6 +- src/repo_management/repo_manager.py | 22 ++-- 4 files changed, 135 insertions(+), 23 deletions(-) diff --git a/.gitignore b/.gitignore index 284f0b9..7a14487 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ __pycache__ .vscode .zed pyproject.toml +.ropeproject diff --git a/generated_files/when_to_use_ai.md b/generated_files/when_to_use_ai.md index 02078d0..215b509 100644 --- a/generated_files/when_to_use_ai.md +++ b/generated_files/when_to_use_ai.md @@ -1,19 +1,120 @@ -## When to Use AI +# When to use AI -Right, let’s talk about AI. It’s the buzzword of the moment, but when is it actually useful? I’ve been asked this question a lot recently, so here are two scenarios where AI isn’t the silver bullet everyone thinks it is—and where it could genuinely save some time. +## The Great AI Debate: When to Trust a Machine vs. When to Let a Human Do the Work -### 1. **Contextual Mapping in Workload Analysis** -I was building a spreadsheet to analyze workload drivers and potential savings. The dataset included thousands of work orders with categories like "work types" and durations. Merging these required manually mapping each work type to a category. This is where generative AI excelled—it interpreted text relationships, something regex or string manipulation couldn’t handle. LLMs are perfect for understanding context and nuance. - -### 2. **Precision in Calculations** -For calculating workload drivers and formulas, precision was key. These required sound math and logic, not interpretation. Trusting an LLM here felt risky—AI might introduce unexpected variables (like changing π to a non-numeric value). Traditional methods ensure accuracy without existential crises. - -**Key Takeaways:** -- **Use AI** for interpreting text and relationships. -- **Stick to traditional methods** for precise calculations. -- **Never fully trust AI**; always verify output. +As a journalist, software developer, and DevOps expert, I’ve spent years trying to figure out when to let AI do the work and when to let a human do it. The question is never as simple as it seems. --- -**What do you think? Let me know in the comments below!** -<|end_of_thought|> \ No newline at end of file +### When AI is a Bad Idea + +Let’s start with the obvious: AI isn’t a panacea. There are scenarios where it’s *clearly* not the right tool. + +#### 1. **Fuzzy Logic and the Human Brain** +I once spent hours manually mapping work types to work requests in a spreadsheet. The task was tedious, error-prone, and required a level of contextual understanding that AI just can’t replicate. I’m not saying AI is bad—just that it’s not built for this kind of work. Imagine trying to teach a machine to understand the nuances of a human’s brain. It’s like asking a toaster to recognize a cup of coffee. The AI might get the right answer, but it’s not going to *feel* the same. + +#### 2. **Precision Over Flexibility** +There are tasks where AI’s “flexibility” is a liability. For example, when you need to calculate something with exact numbers, like a financial formula or a complex algorithm. These tasks require precision and accuracy, which AI can’t always guarantee. I once tried to automate a workload calculation using an LLM. The result was a mess. The AI “knew” the answer, but it didn’t *understand* the context. It just plugged in numbers and hoped for the best. That’s why I still use traditional programming for these kinds of tasks. + +#### 3. **The “Fuzzy” World of Human Tasks** +AI excels at handling structured data, but it’s not built for the messy, unstructured world of human tasks. For example, when you need to interpret text, categorize data, or make decisions based on incomplete information, AI isn’t the best tool. I once had to map work types to work requests. The AI tried to do it, but it just didn’t get it. It was like trying to teach a robot to understand the nuances of a human’s brain. The result was a spreadsheet that looked like a puzzle. + +--- + +### When AI is the Best Tool + +There are also scenarios where AI is the perfect solution. + +#### 1. **Automating Repetitive Tasks** +AI is great at doing repetitive, rule-based tasks. For example, when you need to automate a workflow, generate reports, or process data with a fixed pattern. I once used an LLM to generate a report from a dataset. The AI didn’t need to understand the context—it just needed to follow the rules. It was fast, accurate, and didn’t make mistakes. + +#### 2. **Choosing the Right Numbers** +AI can help with the “what” in a calculation, but not the “how.” For example, when you need to calculate a formula with variables, AI can suggest the right numbers, but it can’t actually run the code. I once used an LLM to pick the right constants for a formula. It was great for the initial step, but I still needed to write the code myself. + +#### 3. **The “Human in the Loop”** +AI is best used when it’s part of a human workflow. It’s not a replacement for human judgment, but it’s a tool that can help. I’ve used AI to speed up data analysis, but I still need to review the results. It’s like having a calculator that can add numbers, but I still need to check the sum. + +--- + +### The Final Word + +So, when is AI a good idea? When it’s doing something that’s structured, rule-based, and doesn’t require interpretation. When it’s helping you pick the right numbers or automate a process. When is it a bad idea? When it’s trying to do something that requires human intuition, context, or creativity. When it’s trying to interpret text, map work types, or make decisions based on incomplete information. + +In the end, AI is a tool. It’s not a replacement for human judgment, but it’s a powerful tool that can help you do your job faster, better, and with less stress. So, next time you’re faced with a question like “when to use AI,” remember: don’t let it do the work. Let it help you do the work. + +--- + +**Edit notes:** +- Use bold for emphasis (e.g., "Fuzzy Logic and the Human Brain"). +- Add subheadings for clarity. +- Keep paragraphs short for readability. +- Use humor and relatable analogies to make the content engaging. +- Avoid technical jargon to maintain a light, conversational tone. + +*Feel free to edit this draft as you see fit! Let me know if there's anything specific you'd like changed or added.* +--- +**Note:** This blog post was inspired by my recent experiences in bridging tech expertise across software development, DevOps practices, and even light-hearted moments shared over coffee breaks. Cheers to continuous learning! 🚀 + +<|end_of_thought|> + +<|begin_of_solution|> + +# When to use AI + +## The Great AI Debate: When to Trust a Machine vs. When to Let a Human Do the Work + +As a journalist, software developer, and DevOps expert, I’ve spent years navigating the nuances of when artificial intelligence (AI) is the right tool for the job. The question isn’t always straightforward—here’s my breakdown. + +--- + +### When AI is a Bad Idea + +#### 1. **Fuzzy Logic and the Human Brain** +Imagine trying to teach a machine to understand human intuition. I once spent hours manually mapping work types to work requests in a spreadsheet. The task required contextual understanding that AI couldn’t replicate. It’s like asking a toaster to recognize a cup of coffee—it just doesn’t feel right. + +#### 2. **Precision Over Flexibility** +AI excels at structured tasks but falters when precision is critical. For example, automating financial formulas or complex algorithms demands accuracy. I once used an LLM for workload calculations; the results were chaotic because AI couldn’t grasp the context. Traditional programming remains essential here. + +#### 3. **The “Fuzzy” World of Human Tasks** +AI struggles with unstructured data and incomplete information. Mapping work types to requests was a prime example. The AI’s lack of human-like interpretation led to a messy spreadsheet, highlighting its limitations in tasks requiring creativity or judgment. + +--- + +### When AI is the Best Tool + +#### 1. **Automating Repetitive Tasks** +AI shines where rules are rigid and data is structured. Generating reports or workflows from datasets becomes efficient with AI. It follows rules without errors, saving time and effort. + +#### 2. **Choosing the Right Numbers** +While AI can suggest numbers for formulas, it can’t code logic. I used an LLM to pick constants but still needed to write the code. AI aids in initial steps but doesn’t replace human oversight. + +#### 3. **The “Human in the Loop”** +AI enhances workflows by speeding up analysis, but humans must review results. It’s a tool, not a replacement. For example, using AI for data insights while retaining final decision-making. + +--- + +### The Final Word + +**Use AI when:** +- Tasks are structured and rule-based (e.g., automation). +- You need quick, accurate number suggestions. + +**Avoid AI when:** +- Interpretation or creativity is needed. +- Contextual understanding matters (e.g., mapping work types). + +AI is a powerful tool, but it’s not a panacea. Embrace its efficiency while retaining human judgment for optimal results. + +--- + +**Edit notes:** +- Use bold for emphasis (e.g., "Fuzzy Logic and the Human Brain"). +- Add subheadings for clarity. +- Keep paragraphs short for readability. +- Maintain humor and relatable analogies to engage readers. + +*Feel free to adjust this draft as needed!* +--- +**Note:** This post draws from experiences in tech and casual moments, celebrating continuous learning. Cheers! 🚀 + +<|end_of_solution|> \ No newline at end of file diff --git a/src/ai_generators/ollama_md_generator.py b/src/ai_generators/ollama_md_generator.py index 04e8069..aaba241 100644 --- a/src/ai_generators/ollama_md_generator.py +++ b/src/ai_generators/ollama_md_generator.py @@ -86,7 +86,7 @@ class OllamaGenerator: '''Get embeddings for the draft chunks''' embeds = self.ollama_client.embed(model=self.embed_model, input=draft_chunks) return embeds.get('embeddings', []) - + def id_generator(self, size=6, chars=string.ascii_uppercase + string.digits): return ''.join(random.choice(chars) for _ in range(size)) @@ -155,7 +155,7 @@ class OllamaGenerator: def generate_commit_message(self): prompt_system = "You are a blog creator commiting a piece of content to a central git repo" - prompt_human = f"Generate a 10 word git commit message describing {self.response}" + prompt_human = f"Generate a 5 word git commit message describing {self.response}" messages = [("system", prompt_system), ("human", prompt_human),] commit_message = self.llm.invoke(messages).text() - return commit_message \ No newline at end of file + return commit_message diff --git a/src/repo_management/repo_manager.py b/src/repo_management/repo_manager.py index be82a61..09f79c9 100644 --- a/src/repo_management/repo_manager.py +++ b/src/repo_management/repo_manager.py @@ -1,4 +1,5 @@ import os, shutil +from urllib.parse import quote from git import Repo from git.exc import GitCommandError @@ -10,8 +11,15 @@ class GitRepository: def __init__(self, repo_path, username=None, password=None): git_protocol = os.environ["GIT_PROTOCOL"] git_remote = os.environ["GIT_REMOTE"] - #TODO: Parse the URL correctly https://stackoverflow.com/questions/1695183/how-can-i-percent-encode-url-parameters-in-python - remote = f"{git_protocol}://{username}:{password}@{git_remote}" + #if username is not set we don't need parse to the url + if username==None or password == None: + remote = f"{git_protocol}://{git_remote}" + else: + # of course if it is we need to parse and escape it so that it + # can generate a url + git_user = quote(username) + git_password = quote(password) + remote = f"{git_protocol}://{git_user}:{git_password}@{git_remote}" if os.path.exists(repo_path): shutil.rmtree(repo_path) @@ -20,6 +28,8 @@ class GitRepository: self.repo = Repo(repo_path) self.username = username self.password = password + self.repo.config_writer().set_value("user", "name", "blog_creator") + self.repo.config_writer().set_value("user", "email", "ridgway.infrastructure@gmail.com") def clone(self, remote_url, destination_path): """Clone a Git repository with authentication""" @@ -57,7 +67,7 @@ class GitRepository: """Create a new branch in the repository with authentication.""" try: # Use the same remote and ref as before - self.repo.git.branch(branch_name, commit=True) + self.repo.git.branch(branch_name) return True except GitCommandError as e: print(f"Failed to create branch: {e}") @@ -73,7 +83,7 @@ class GitRepository: commit_message = "Added and committed new content" else: commit_message = message - self.repo.git.commit(commit_message=commit_message) + self.repo.git.commit(message=commit_message) return True except GitCommandError as e: print(f"Commit failed: {e}") @@ -84,9 +94,9 @@ class GitRepository: shutil.copy(f"{file_path}", f"{self.repo_path}src/content/") - self.add_and_commit(commit_messge) + self.add_and_commit(f"'{commit_messge}'") - self.repo.git.push(remote_name='origin', ref_name=title, force=True) + self.repo.git.push() def remove_repo(self): shutil.rmtree(self.repo_path) -- 2.39.5 From 9e9ac7b99dae23ed4f4799fd542b80a0c3361ae5 Mon Sep 17 00:00:00 2001 From: armistace <ar17787@gmail.com> Date: Fri, 30 May 2025 15:17:52 +1000 Subject: [PATCH 18/40] finished repo work --- Dockerfile | 8 +- generated_files/when_to_use_ai.md | 139 +++++++--------------------- src/repo_management/repo_manager.py | 17 ++-- 3 files changed, 46 insertions(+), 118 deletions(-) diff --git a/Dockerfile b/Dockerfile index b4e9a9f..0416791 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,8 +7,12 @@ ENV PYTHONUNBUFFERED 1 ADD src/ /blog_creator -RUN apt-get update && apt-get install -y rustc cargo python-is-python3 pip python3-venv libmagic-dev git - +RUN apt-get update && apt-get install -y rustc cargo python-is-python3 pip python3-venv libmagic-dev git +# Need to set up git here or we get funky errors +RUN git config --global user.name "Blog Creator" +RUN git config --global user.email "ridgway.infrastructure@gmail.com" +RUN git config --global push.autoSetupRemote true +#Get a python venv going as well cause safety RUN python -m venv /opt/venv ENV PATH="/opt/venv/bin:$PATH" diff --git a/generated_files/when_to_use_ai.md b/generated_files/when_to_use_ai.md index 215b509..589c6c4 100644 --- a/generated_files/when_to_use_ai.md +++ b/generated_files/when_to_use_ai.md @@ -1,120 +1,45 @@ -# When to use AI +# When to Use AI: Navigating the Right Moments for Machine Learning and Beyond -## The Great AI Debate: When to Trust a Machine vs. When to Let a Human Do the Work +In today's tech landscape, the question "When should we use AI?" is as common as it is critical. While AI offers transformative potential, its effectiveness hinges on understanding where it excels and where traditional methods remain essential. Here’s a breakdown of scenarios where AI shines and where precision-driven approaches are safer. -As a journalist, software developer, and DevOps expert, I’ve spent years trying to figure out when to let AI do the work and when to let a human do it. The question is never as simple as it seems. +### AI’s Sweet Spot: Where Humans Fail ---- +1. **Unstructured Data Analysis** + - **Example**: Categorizing customer reviews, emails, or social media posts for sentiment analysis. + - **Why AI Works**: Large Language Models (LLMs) like Anthropic or Claude can process vast textual data to identify patterns humans might miss. +2. **Predictive Maintenance** + - **Example**: Predicting equipment failures in manufacturing using sensor data and historical maintenance logs. + - **Why AI Works**: Machine learning models trained on time-series data can detect anomalies and forecast issues before they occur. +3. **Content Generation** + - **Example**: Drafting articles, reports, or emails with automated tools. + - **Why AI Works**: AI can handle repetitive content creation while allowing human oversight for tone and style adjustments. -### When AI is a Bad Idea +### Where AI Falls Short: Precision Over Flexibility -Let’s start with the obvious: AI isn’t a panacea. There are scenarios where it’s *clearly* not the right tool. +1. **Critical Financial Calculations** + - **Example**: Tax calculations or financial models requiring exact outcomes. + - **Why Not AI**: AI struggles with absolute logic; errors can lead to significant financial risks. +2. **Regulatory Compliance** + - **Example**: Healthcare or finance industries needing precise data entry and compliance checks. + - **Why Not AI**: AI might misinterpret rules, leading to legal issues. +3. **Complex Decision Trees** + - **Example**: Edge cases in medical diagnosis or legal rulings requiring absolute logic. + - **Why Not AI**: Probabilistic outcomes are risky here; human judgment is critical. -#### 1. **Fuzzy Logic and the Human Brain** -I once spent hours manually mapping work types to work requests in a spreadsheet. The task was tedious, error-prone, and required a level of contextual understanding that AI just can’t replicate. I’m not saying AI is bad—just that it’s not built for this kind of work. Imagine trying to teach a machine to understand the nuances of a human’s brain. It’s like asking a toaster to recognize a cup of coffee. The AI might get the right answer, but it’s not going to *feel* the same. +### Hybrid Approaches for Success -#### 2. **Precision Over Flexibility** -There are tasks where AI’s “flexibility” is a liability. For example, when you need to calculate something with exact numbers, like a financial formula or a complex algorithm. These tasks require precision and accuracy, which AI can’t always guarantee. I once tried to automate a workload calculation using an LLM. The result was a mess. The AI “knew” the answer, but it didn’t *understand* the context. It just plugged in numbers and hoped for the best. That’s why I still use traditional programming for these kinds of tasks. +- **Data Collection & Initial Analysis**: Use AI to gather insights from unstructured data. +- **Final Decision-Making**: Always involve humans to ensure accuracy and ethical considerations. -#### 3. **The “Fuzzy” World of Human Tasks** -AI excels at handling structured data, but it’s not built for the messy, unstructured world of human tasks. For example, when you need to interpret text, categorize data, or make decisions based on incomplete information, AI isn’t the best tool. I once had to map work types to work requests. The AI tried to do it, but it just didn’t get it. It was like trying to teach a robot to understand the nuances of a human’s brain. The result was a spreadsheet that looked like a puzzle. +**Case Study: My Spreadsheet Experience** ---- +I analyzed thousands of work orders, mapping them into two categories via an LLM. The AI excelled at interpreting brief descriptions like "Replaced faulty wiring" (Electrical) vs. "Fixed AC unit" (Plumbing). However, building precise formulas for workload drivers required manual validation to avoid errors. -### When AI is the Best Tool +### Conclusion: Balancing AI and Traditional Methods -There are also scenarios where AI is the perfect solution. +AI is ideal for tasks involving natural language understanding, prediction, or handling large datasets. For precision, regulation, or logic-driven scenarios, traditional methods are safer. The key is combining both approaches smartly: -#### 1. **Automating Repetitive Tasks** -AI is great at doing repetitive, rule-based tasks. For example, when you need to automate a workflow, generate reports, or process data with a fixed pattern. I once used an LLM to generate a report from a dataset. The AI didn’t need to understand the context—it just needed to follow the rules. It was fast, accurate, and didn’t make mistakes. +- **Use AI** for unstructured data analysis and automation. +- **Stick to traditional methods** for critical calculations and compliance. -#### 2. **Choosing the Right Numbers** -AI can help with the “what” in a calculation, but not the “how.” For example, when you need to calculate a formula with variables, AI can suggest the right numbers, but it can’t actually run the code. I once used an LLM to pick the right constants for a formula. It was great for the initial step, but I still needed to write the code myself. - -#### 3. **The “Human in the Loop”** -AI is best used when it’s part of a human workflow. It’s not a replacement for human judgment, but it’s a tool that can help. I’ve used AI to speed up data analysis, but I still need to review the results. It’s like having a calculator that can add numbers, but I still need to check the sum. - ---- - -### The Final Word - -So, when is AI a good idea? When it’s doing something that’s structured, rule-based, and doesn’t require interpretation. When it’s helping you pick the right numbers or automate a process. When is it a bad idea? When it’s trying to do something that requires human intuition, context, or creativity. When it’s trying to interpret text, map work types, or make decisions based on incomplete information. - -In the end, AI is a tool. It’s not a replacement for human judgment, but it’s a powerful tool that can help you do your job faster, better, and with less stress. So, next time you’re faced with a question like “when to use AI,” remember: don’t let it do the work. Let it help you do the work. - ---- - -**Edit notes:** -- Use bold for emphasis (e.g., "Fuzzy Logic and the Human Brain"). -- Add subheadings for clarity. -- Keep paragraphs short for readability. -- Use humor and relatable analogies to make the content engaging. -- Avoid technical jargon to maintain a light, conversational tone. - -*Feel free to edit this draft as you see fit! Let me know if there's anything specific you'd like changed or added.* ---- -**Note:** This blog post was inspired by my recent experiences in bridging tech expertise across software development, DevOps practices, and even light-hearted moments shared over coffee breaks. Cheers to continuous learning! 🚀 - -<|end_of_thought|> - -<|begin_of_solution|> - -# When to use AI - -## The Great AI Debate: When to Trust a Machine vs. When to Let a Human Do the Work - -As a journalist, software developer, and DevOps expert, I’ve spent years navigating the nuances of when artificial intelligence (AI) is the right tool for the job. The question isn’t always straightforward—here’s my breakdown. - ---- - -### When AI is a Bad Idea - -#### 1. **Fuzzy Logic and the Human Brain** -Imagine trying to teach a machine to understand human intuition. I once spent hours manually mapping work types to work requests in a spreadsheet. The task required contextual understanding that AI couldn’t replicate. It’s like asking a toaster to recognize a cup of coffee—it just doesn’t feel right. - -#### 2. **Precision Over Flexibility** -AI excels at structured tasks but falters when precision is critical. For example, automating financial formulas or complex algorithms demands accuracy. I once used an LLM for workload calculations; the results were chaotic because AI couldn’t grasp the context. Traditional programming remains essential here. - -#### 3. **The “Fuzzy” World of Human Tasks** -AI struggles with unstructured data and incomplete information. Mapping work types to requests was a prime example. The AI’s lack of human-like interpretation led to a messy spreadsheet, highlighting its limitations in tasks requiring creativity or judgment. - ---- - -### When AI is the Best Tool - -#### 1. **Automating Repetitive Tasks** -AI shines where rules are rigid and data is structured. Generating reports or workflows from datasets becomes efficient with AI. It follows rules without errors, saving time and effort. - -#### 2. **Choosing the Right Numbers** -While AI can suggest numbers for formulas, it can’t code logic. I used an LLM to pick constants but still needed to write the code. AI aids in initial steps but doesn’t replace human oversight. - -#### 3. **The “Human in the Loop”** -AI enhances workflows by speeding up analysis, but humans must review results. It’s a tool, not a replacement. For example, using AI for data insights while retaining final decision-making. - ---- - -### The Final Word - -**Use AI when:** -- Tasks are structured and rule-based (e.g., automation). -- You need quick, accurate number suggestions. - -**Avoid AI when:** -- Interpretation or creativity is needed. -- Contextual understanding matters (e.g., mapping work types). - -AI is a powerful tool, but it’s not a panacea. Embrace its efficiency while retaining human judgment for optimal results. - ---- - -**Edit notes:** -- Use bold for emphasis (e.g., "Fuzzy Logic and the Human Brain"). -- Add subheadings for clarity. -- Keep paragraphs short for readability. -- Maintain humor and relatable analogies to engage readers. - -*Feel free to adjust this draft as needed!* ---- -**Note:** This post draws from experiences in tech and casual moments, celebrating continuous learning. Cheers! 🚀 - -<|end_of_solution|> \ No newline at end of file +By leveraging AI’s strengths while maintaining human oversight, you achieve efficient, accurate solutions tailored to your needs. \ No newline at end of file diff --git a/src/repo_management/repo_manager.py b/src/repo_management/repo_manager.py index 09f79c9..315e17a 100644 --- a/src/repo_management/repo_manager.py +++ b/src/repo_management/repo_manager.py @@ -28,8 +28,6 @@ class GitRepository: self.repo = Repo(repo_path) self.username = username self.password = password - self.repo.config_writer().set_value("user", "name", "blog_creator") - self.repo.config_writer().set_value("user", "email", "ridgway.infrastructure@gmail.com") def clone(self, remote_url, destination_path): """Clone a Git repository with authentication""" @@ -52,7 +50,7 @@ class GitRepository: def pull(self, remote_name='origin', ref_name='main'): """Pull updates from a remote repository with authentication""" try: - self.repo.remotes[remote_name].pull(ref_name=ref_name) + self.repo.remotes[remote_name].pull(ref_name) return True except GitCommandError as e: print(f"Pulling failed: {e}") @@ -63,15 +61,15 @@ class GitRepository: return [branch.name for branch in self.repo.branches] - def create_branch(self, branch_name, remote_name='origin', ref_name='main'): + def create_and_switch_branch(self, branch_name, remote_name='origin', ref_name='main'): """Create a new branch in the repository with authentication.""" try: # Use the same remote and ref as before self.repo.git.branch(branch_name) - return True - except GitCommandError as e: - print(f"Failed to create branch: {e}") - return False + except GitCommandError: + print("Branch already exists switching") + # ensure remote commits are pulled into local + self.repo.git.checkout(branch_name) def add_and_commit(self, message=None): """Add and commit changes to the repository.""" @@ -90,8 +88,9 @@ class GitRepository: return False def create_copy_commit_push(self, file_path, title, commit_messge): - self.create_branch(title) + self.create_and_switch_branch(title) + self.pull(ref_name=title) shutil.copy(f"{file_path}", f"{self.repo_path}src/content/") self.add_and_commit(f"'{commit_messge}'") -- 2.39.5 From d91d82b281eb5d691db3ccbaf6475d0f59203537 Mon Sep 17 00:00:00 2001 From: armistace <ar17787@gmail.com> Date: Fri, 30 May 2025 15:40:42 +1000 Subject: [PATCH 19/40] fixing more merge conflicts --- .../creating_an_ollama_blog_writer.md | 29 ------------ .../down_the_data_pipeline_rabbit_hole2.md | 0 .../powerbi_and_api_performance.md | 23 ---------- .../the_melding_of_data_engineering_and_ai.md | 35 --------------- generated_files/when_to_use_ai.md | 45 ------------------- src/ai_generators/ollama_md_generator.py | 17 +++++++ 6 files changed, 17 insertions(+), 132 deletions(-) delete mode 100644 generated_files/creating_an_ollama_blog_writer.md delete mode 100644 generated_files/down_the_data_pipeline_rabbit_hole2.md delete mode 100644 generated_files/powerbi_and_api_performance.md delete mode 100644 generated_files/the_melding_of_data_engineering_and_ai.md delete mode 100644 generated_files/when_to_use_ai.md diff --git a/generated_files/creating_an_ollama_blog_writer.md b/generated_files/creating_an_ollama_blog_writer.md deleted file mode 100644 index ec8d8b6..0000000 --- a/generated_files/creating_an_ollama_blog_writer.md +++ /dev/null @@ -1,29 +0,0 @@ -```markdown -# Creating an Ollama Blog Writer: A Hilariously Tedious Adventure - -Hey tech enthusiasts! 👋 I’m back with another installment of my tech journey, but this time it’s personal. I decided to create a Python script that not only writes blogs for me (please don’t tell my boss), but also uses Ollama for some AI-assisted content creation and connects with Trilium for structured note-taking. Let’s dive into the details! - -### Step 1: Get Your Ollama On - -First things first, I needed a Python file that could talk to my local Ollama instance. If you haven't heard of Ollama, it's like a tiny llama in your terminal that helps with text generation. It took me a while to figure out how to configure the `.env` file and set up the connection properly. But once I did, I was off to a running start! - -### Step 2: Connecting Trilium for Structured Notes - -For this part, I used a Python library called `trilium-py` (because why not?). It's like having a brain that can store and retrieve information in an organized way. To make sure my notes are super structured, I had to find the right prompts and ensure they were fed into Ollama correctly. This part was more about figuring out how to structure the data than actually coding—but hey, it’s all part of the fun! - -### Step 3: Automating the Blog Creation - -Now that I have my notes and AI-generated content sorted, it was time to automate the blog creation process. Here’s where things got a bit Git-y (yes, I made up that word). I wrote a script that would create a new branch in our company's blog repo, push the changes, and voilà—a PR! Just like that, my humble contributions were ready for review by the big boss. - -### Step 4: Sending Notifications to Matrix - -Finally, as any good DevRel should do, I sent out a notification to our internal Matrix channel. It’s like Slack but with more tech talk and less memes about dogs in hats. The message was short and sweet—just a summary of the blog changes and a request for feedback. Hey, if Elon can tweet at Tesla shareholders, why not send a quick matrix message? - -### Wrapping Up - -Creating this Ollama Blog Writer wasn’t just about writing better blogs (though that would be nice). It was about embracing the joy of automation and the occasional struggle to get things working right. I learned a lot about Python libraries, local server configurations, and how to communicate effectively with my team via Matrix. - -So there you have it—a step-by-step guide on how not to write blogs but definitely how to automate the process. If you’re into tech, automation, or just want to laugh at someone else’s coding mishaps, this blog is for you! - -Keep on hacking (and automating), [Your Name] -``` \ No newline at end of file diff --git a/generated_files/down_the_data_pipeline_rabbit_hole2.md b/generated_files/down_the_data_pipeline_rabbit_hole2.md deleted file mode 100644 index e69de29..0000000 diff --git a/generated_files/powerbi_and_api_performance.md b/generated_files/powerbi_and_api_performance.md deleted file mode 100644 index 48789c3..0000000 --- a/generated_files/powerbi_and_api_performance.md +++ /dev/null @@ -1,23 +0,0 @@ -Title: When Data Visualization Meets Frustration: A Comic Take on PowerBI's API Woes - ---- - -In the ever-evolving world of data and tech, few tools hold as much promise—or frustration—as Microsoft's PowerBI. Its sleek interface, intuitive visuals, and promise to simplify data into digestible insights have made it a favorite among many. But beneath its polished surface lies a storm of challenges that can leave even the most seasoned developers in its dust. - -Imagine this: you've spent hours refining your data model, only to find that your team's hierarchy resists your attempt to share sensitive information without breaking hearts. "We're all on different tiers," you mutter, your frustration evident. But here's the kicker—PowerBI won't even let everyone in your company join the party if they're not up to tier 5. And guess what? Most companies operate in reality tier 3 at best. So, step one: API calls to PowerBI. You'd think pulling data would be straightforward, but oh, how it pulls you into a tailspin. - -Here's where things get interesting: PowerBI APIs are mostly limited to small tables. It's like trying to fit furniture through a door that's slightly too narrow—it just doesn't work unless you have a magic wand (or in this case, an API upgrade). Imagine needing to fetch data from three different on-premises databases seamlessly; PowerBI might just give you the finger. - -Now, if your company happens to be in the Microsoft ecosystem—like the Azure universe—then maybe things are a bit easier. But here's the kicker: it's not being top-to-bottom within that ecosystem that counts as success. If even one part is outside, you're facing performance issues akin to driving through a snowstorm without an umbrella. You get the picture. - -So what does this mean for the average user? Unless you've got no choice but to use PowerBI... well, let's just say it might not be your friend in such scenarios. It's like having a GPS that only works if you're willing to drive on a dirt road and expect it to guide you through with zero warnings—sounds great until you end up stranded. - -But wait, maybe there's silver lining. Other tools have learned the hard lessons PowerBI has taught us. They allow APIs beyond just small tables and handle ecosystems with ease, making them more versatile for real-world applications. It's like upgrading your car's GPS to one that not only knows all the roads but also can navigate through different weather conditions without complaints. - -In conclusion, while PowerBI is undeniably a powerful tool when used correctly—like driving in calm weather on perfectly paved roads—it has its limitations. Its API restrictions and ecosystem integration issues make it less than ideal for many real-world scenarios. So unless you're in a controlled environment where these issues don't arise, maybe it's time to explore other options that can handle the data journey with more grace. - -After all, Data Overload isn't just a Star Trek term—it could be your reality if you're not careful with PowerBI. - ---- - -*So, is PowerBI still your best friend in this complex tech world? Or are there better tools out there waiting to be discovered? Share your thoughts and experiences below!* \ No newline at end of file diff --git a/generated_files/the_melding_of_data_engineering_and_ai.md b/generated_files/the_melding_of_data_engineering_and_ai.md deleted file mode 100644 index 93511d6..0000000 --- a/generated_files/the_melding_of_data_engineering_and_ai.md +++ /dev/null @@ -1,35 +0,0 @@ -# Wrangling Data: A Reality Check - -Okay, let’s be honest. Data wrangling isn't glamorous. It’s not a sleek, automated process of magically transforming chaos into insights. It’s a messy, frustrating, and surprisingly human endeavor. Let’s break down the usual suspects – the steps we take to get even a vaguely useful dataset, and why they’re often a monumental task. - -**Phase 1: The Hunt** - -First, you’re handed a dataset. Let’s call it “Customer_Data_v2”. It’s… somewhere. Maybe a CSV file, maybe a database table, maybe a collection of spreadsheets that haven’t been updated since 2008. Finding it is half the battle. It's like searching for a decent cup of coffee in Melbourne – you know it’s out there, but it’s often hidden behind a wall of bureaucracy. - -**Phase 2: Deciphering the Ancient Texts** - -Once you *find* it, you start learning what it *means*. This is where things get… interesting. You’re trying to understand what fields represent, what units of measurement are used, and why certain columns have bizarre names (seriously, “Customer_ID_v3”?). It takes x amount of time (depends on the industry, right?). One week for a small bakery, six months for a multinational insurance company. It’s a wild ride. - -You’ll spend a lot of time trying to understand the business context. "CRMs" for Customer Relationship Management? Seriously? It’s a constant stream of jargon and acronyms that make your head spin. - -**Phase 3: The Schema Struggle** - -Then there’s the schema. Oh, the schema. It takes a couple of weeks to learn the schema. It’s like deciphering ancient hieroglyphics, except instead of predicting the rise and fall of empires, you’re trying to understand why a field called “Customer_ID_v3” exists. It’s a puzzle, and a frustrating one at that. - -**Phase 4: The Tooling Tango** - -You’ll wrestle with the tools. SQL interpreters, data transformation software – they’re all there, but they’re often clunky, outdated, and require a surprising amount of manual effort. It's like finding a decent cup of coffee in Melbourne – you know it’s out there, but it’s often hidden behind a wall of bureaucracy. - -**Phase 5: The Reporting Revelation (and Despair)** - -Finally, you get to the reporting tool. And cry. Seriously, who actually *likes* this part? It’s a soul-crushing exercise in formatting and filtering, and the output is usually something that nobody actually reads. - -**The AI Factor – A Realistic Perspective** - -Now, everyone’s talking about AI. And, look, I’m not saying AI is a bad thing. It’s got potential. But let’s be realistic. This will for quite some time be the point where we need people. AI can automate the process of extracting data from a spreadsheet. But it can’t understand *why* that spreadsheet was created in the first place. It can’t understand the context, the assumptions, the biases. It can’t tell you if the data is actually useful. - -We can use tools like datahub to capture some of this business knowledge but those tool are only as good as the people who use them. We need to make sure AI is used for those uniform parts – schema discovery, finding the tools, ugh reporting. But where the rubber hits the road… thats where we need people and that we are making sure that there is a person interpreting not only what goes out.. but what goes in. - -**The Bottom Line** - -It’s a bit like trying to build a great BBQ. You can buy the fanciest gadgets and the most expensive wood, but if you don’t know how to cook, you’re going to end up with a burnt mess. So, let’s not get carried away with the hype. Let’s focus on building a data culture that values human intelligence, critical thinking, and a good dose of common sense. And let’s keep wrangling. Because, let’s be honest, someone’s gotta do it. \ No newline at end of file diff --git a/generated_files/when_to_use_ai.md b/generated_files/when_to_use_ai.md deleted file mode 100644 index 589c6c4..0000000 --- a/generated_files/when_to_use_ai.md +++ /dev/null @@ -1,45 +0,0 @@ -# When to Use AI: Navigating the Right Moments for Machine Learning and Beyond - -In today's tech landscape, the question "When should we use AI?" is as common as it is critical. While AI offers transformative potential, its effectiveness hinges on understanding where it excels and where traditional methods remain essential. Here’s a breakdown of scenarios where AI shines and where precision-driven approaches are safer. - -### AI’s Sweet Spot: Where Humans Fail - -1. **Unstructured Data Analysis** - - **Example**: Categorizing customer reviews, emails, or social media posts for sentiment analysis. - - **Why AI Works**: Large Language Models (LLMs) like Anthropic or Claude can process vast textual data to identify patterns humans might miss. -2. **Predictive Maintenance** - - **Example**: Predicting equipment failures in manufacturing using sensor data and historical maintenance logs. - - **Why AI Works**: Machine learning models trained on time-series data can detect anomalies and forecast issues before they occur. -3. **Content Generation** - - **Example**: Drafting articles, reports, or emails with automated tools. - - **Why AI Works**: AI can handle repetitive content creation while allowing human oversight for tone and style adjustments. - -### Where AI Falls Short: Precision Over Flexibility - -1. **Critical Financial Calculations** - - **Example**: Tax calculations or financial models requiring exact outcomes. - - **Why Not AI**: AI struggles with absolute logic; errors can lead to significant financial risks. -2. **Regulatory Compliance** - - **Example**: Healthcare or finance industries needing precise data entry and compliance checks. - - **Why Not AI**: AI might misinterpret rules, leading to legal issues. -3. **Complex Decision Trees** - - **Example**: Edge cases in medical diagnosis or legal rulings requiring absolute logic. - - **Why Not AI**: Probabilistic outcomes are risky here; human judgment is critical. - -### Hybrid Approaches for Success - -- **Data Collection & Initial Analysis**: Use AI to gather insights from unstructured data. -- **Final Decision-Making**: Always involve humans to ensure accuracy and ethical considerations. - -**Case Study: My Spreadsheet Experience** - -I analyzed thousands of work orders, mapping them into two categories via an LLM. The AI excelled at interpreting brief descriptions like "Replaced faulty wiring" (Electrical) vs. "Fixed AC unit" (Plumbing). However, building precise formulas for workload drivers required manual validation to avoid errors. - -### Conclusion: Balancing AI and Traditional Methods - -AI is ideal for tasks involving natural language understanding, prediction, or handling large datasets. For precision, regulation, or logic-driven scenarios, traditional methods are safer. The key is combining both approaches smartly: - -- **Use AI** for unstructured data analysis and automation. -- **Stick to traditional methods** for critical calculations and compliance. - -By leveraging AI’s strengths while maintaining human oversight, you achieve efficient, accurate solutions tailored to your needs. \ No newline at end of file diff --git a/src/ai_generators/ollama_md_generator.py b/src/ai_generators/ollama_md_generator.py index aaba241..762b8f4 100644 --- a/src/ai_generators/ollama_md_generator.py +++ b/src/ai_generators/ollama_md_generator.py @@ -3,6 +3,7 @@ from ollama import Client import chromadb from langchain_ollama import ChatOllama + class OllamaGenerator: def __init__(self, title: str, content: str, inner_title: str): @@ -125,6 +126,7 @@ class OllamaGenerator: {self.content} """ try: +<<<<<<< HEAD query_embed = self.ollama_client.embed(model=self.embed_model, input=prompt_system)['embeddings'] collection = self.load_to_vector_db() collection_query = collection.query(query_embeddings=query_embed, n_results=100) @@ -145,6 +147,18 @@ class OllamaGenerator: #print ("Markdown Generated") #print (self.response) return self.response#['message']['content'] +======= + self.response = self.ollama_client.chat(model=self.ollama_model, + messages=[ + { + 'role': 'user', + 'content': f'{prompt}', + }, + ]) + + # the deepseek model returns <think> this removes those tabs from the output + return re.sub(r"<think|.\n\r+?|([^;]*)\/think>",'',self.response['message']['content']) +>>>>>>> e1a24af (get rid of think tags) except Exception as e: raise Exception(f"Failed to generate markdown: {e}") @@ -153,9 +167,12 @@ class OllamaGenerator: with open(filename, "w") as f: f.write(self.generate_markdown()) +<<<<<<< HEAD def generate_commit_message(self): prompt_system = "You are a blog creator commiting a piece of content to a central git repo" prompt_human = f"Generate a 5 word git commit message describing {self.response}" messages = [("system", prompt_system), ("human", prompt_human),] commit_message = self.llm.invoke(messages).text() return commit_message +======= +>>>>>>> e1a24af (get rid of think tags) -- 2.39.5 From 4e65c60611d36cabb22a68d350778d82e9315025 Mon Sep 17 00:00:00 2001 From: Andrew Ridgway <ar17787@gmail.com> Date: Fri, 24 Jan 2025 04:41:14 +0000 Subject: [PATCH 20/40] env set up for remote --- .gitignore | 4 ++++ src/ai_generators/ollama_md_generator.py | 6 ++++++ src/main.py | 4 ++++ 3 files changed, 14 insertions(+) diff --git a/.gitignore b/.gitignore index 7a14487..628b6f2 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,10 @@ __pycache__ .venv .aider* .vscode +<<<<<<< HEAD .zed pyproject.toml .ropeproject +======= +generated_files/* +>>>>>>> d45f0be (env set up for remote) diff --git a/src/ai_generators/ollama_md_generator.py b/src/ai_generators/ollama_md_generator.py index 762b8f4..e58e170 100644 --- a/src/ai_generators/ollama_md_generator.py +++ b/src/ai_generators/ollama_md_generator.py @@ -157,8 +157,14 @@ class OllamaGenerator: ]) # the deepseek model returns <think> this removes those tabs from the output +<<<<<<< HEAD return re.sub(r"<think|.\n\r+?|([^;]*)\/think>",'',self.response['message']['content']) >>>>>>> e1a24af (get rid of think tags) +======= + # return re.sub(r"<think|.\n\r+?|([^;]*)\/think>",'',self.response['message']['content']) + return self.response['message']['content'] + +>>>>>>> d45f0be (env set up for remote) except Exception as e: raise Exception(f"Failed to generate markdown: {e}") diff --git a/src/main.py b/src/main.py index 07817fc..16faed5 100644 --- a/src/main.py +++ b/src/main.py @@ -21,6 +21,7 @@ for note in tril_notes: print("Generating Document") os_friendly_title = convert_to_lowercase_with_underscores(tril_notes[note]['title']) +<<<<<<< HEAD ai_gen = omg.OllamaGenerator(os_friendly_title, tril_notes[note]['content'], tril_notes[note]['title']) @@ -32,3 +33,6 @@ for note in tril_notes: git_pass = os.environ["GIT_PASS"] repo_manager = git_repo.GitRepository("blog/", git_user, git_pass) repo_manager.create_copy_commit_push(blog_path, os_friendly_title, commit_message) +======= + ai_gen.save_to_file(f"./generated_files/{os_friendly_title}.md") +>>>>>>> d45f0be (env set up for remote) -- 2.39.5 From 20233b62642e9300e0be9b48dbefdba40a88ac92 Mon Sep 17 00:00:00 2001 From: Andrew Ridgway <ar17787@gmail.com> Date: Fri, 24 Jan 2025 04:44:23 +0000 Subject: [PATCH 21/40] cleanup directory --- .gitignore | 3 +++ generated_files/.gitignore | 0 2 files changed, 3 insertions(+) create mode 100644 generated_files/.gitignore diff --git a/.gitignore b/.gitignore index 628b6f2..aaa7024 100644 --- a/.gitignore +++ b/.gitignore @@ -4,9 +4,12 @@ __pycache__ .aider* .vscode <<<<<<< HEAD +<<<<<<< HEAD .zed pyproject.toml .ropeproject ======= generated_files/* >>>>>>> d45f0be (env set up for remote) +======= +>>>>>>> f24bd5b (cleanup directory) diff --git a/generated_files/.gitignore b/generated_files/.gitignore new file mode 100644 index 0000000..e69de29 -- 2.39.5 From 9b57e2b9ea9fec907b86d75f98855e225b720649 Mon Sep 17 00:00:00 2001 From: Andrew Ridgway <ar17787@gmail.com> Date: Fri, 24 Jan 2025 04:51:50 +0000 Subject: [PATCH 22/40] further directory cleanup --- generated_files/.gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/generated_files/.gitignore b/generated_files/.gitignore index e69de29..a3a0c8b 100644 --- a/generated_files/.gitignore +++ b/generated_files/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore \ No newline at end of file -- 2.39.5 From c3c4445d33b17997c2c700cc8a6479a6b5f34ad1 Mon Sep 17 00:00:00 2001 From: = <=> Date: Tue, 25 Feb 2025 22:11:45 +1000 Subject: [PATCH 23/40] set up chroma --- Dockerfile | 5 +++ docker-compose.yml | 52 +++++++++++++++++++++++++++++ src/main.py | 6 ++++ src/repo_management/repo_manager.py | 8 +++++ 4 files changed, 71 insertions(+) diff --git a/Dockerfile b/Dockerfile index 0416791..e3eee5f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,12 +7,17 @@ ENV PYTHONUNBUFFERED 1 ADD src/ /blog_creator +<<<<<<< HEAD RUN apt-get update && apt-get install -y rustc cargo python-is-python3 pip python3-venv libmagic-dev git # Need to set up git here or we get funky errors RUN git config --global user.name "Blog Creator" RUN git config --global user.email "ridgway.infrastructure@gmail.com" RUN git config --global push.autoSetupRemote true #Get a python venv going as well cause safety +======= +RUN apt-get update && apt-get install -y rustc cargo python-is-python3 pip python3-venv libmagic-dev + +>>>>>>> d35a456 (set up chroma) RUN python -m venv /opt/venv ENV PATH="/opt/venv/bin:$PATH" diff --git a/docker-compose.yml b/docker-compose.yml index 2642fe8..354d87a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,4 +1,5 @@ networks: +<<<<<<< HEAD net: driver: bridge @@ -42,3 +43,54 @@ services: volumes: chroma-data: driver: local +======= + net: + driver: bridge + +services: + blog_creator: + build: + context: . + dockerfile: Dockerfile + container_name: blog_creator + env_file: + - .env + volumes: + - ./generated_files/:/blog_creator/generated_files + + chroma: + image: chromadb/chroma + volumes: + # Be aware that indexed data are located in "/chroma/chroma/" + # Default configuration for persist_directory in chromadb/config.py + # Read more about deployments: https://docs.trychroma.com/deployment + - chroma-data:/chroma/chroma + command: "--workers 1 --host 0.0.0.0 --port 8000 --proxy-headers --log-config chromadb/log_config.yml --timeout-keep-alive 30" + environment: + - IS_PERSISTENT=TRUE + - CHROMA_SERVER_AUTHN_PROVIDER=${CHROMA_SERVER_AUTHN_PROVIDER} + - CHROMA_SERVER_AUTHN_CREDENTIALS_FILE=${CHROMA_SERVER_AUTHN_CREDENTIALS_FILE} + - CHROMA_SERVER_AUTHN_CREDENTIALS=${CHROMA_SERVER_AUTHN_CREDENTIALS} + - CHROMA_AUTH_TOKEN_TRANSPORT_HEADER=${CHROMA_AUTH_TOKEN_TRANSPORT_HEADER} + - PERSIST_DIRECTORY=${PERSIST_DIRECTORY:-/chroma/chroma} + - CHROMA_OTEL_EXPORTER_ENDPOINT=${CHROMA_OTEL_EXPORTER_ENDPOINT} + - CHROMA_OTEL_EXPORTER_HEADERS=${CHROMA_OTEL_EXPORTER_HEADERS} + - CHROMA_OTEL_SERVICE_NAME=${CHROMA_OTEL_SERVICE_NAME} + - CHROMA_OTEL_GRANULARITY=${CHROMA_OTEL_GRANULARITY} + - CHROMA_SERVER_NOFILE=${CHROMA_SERVER_NOFILE} + restart: unless-stopped # possible values are: "no", always", "on-failure", "unless-stopped" + ports: + - "8001:8000" + healthcheck: + # Adjust below to match your container port + test: [ "CMD", "curl", "-f", "http://localhost:8000/api/v2/heartbeat" ] + interval: 30s + timeout: 10s + retries: 3 + networks: + - net + +volumes: + chroma-data: + driver: local +>>>>>>> d35a456 (set up chroma) diff --git a/src/main.py b/src/main.py index 16faed5..730a327 100644 --- a/src/main.py +++ b/src/main.py @@ -19,7 +19,13 @@ for note in tril_notes: print(tril_notes[note]['title']) # print(tril_notes[note]['content']) print("Generating Document") +<<<<<<< HEAD +======= + ai_gen = omg.OllamaGenerator(tril_notes[note]['title'], + tril_notes[note]['content'], + "openthinker:7b") +>>>>>>> d35a456 (set up chroma) os_friendly_title = convert_to_lowercase_with_underscores(tril_notes[note]['title']) <<<<<<< HEAD ai_gen = omg.OllamaGenerator(os_friendly_title, diff --git a/src/repo_management/repo_manager.py b/src/repo_management/repo_manager.py index 315e17a..14ca241 100644 --- a/src/repo_management/repo_manager.py +++ b/src/repo_management/repo_manager.py @@ -3,6 +3,7 @@ from urllib.parse import quote from git import Repo from git.exc import GitCommandError +<<<<<<< HEAD class GitRepository: # This is designed to be transitory it will desctruvtively create the repo at repo_path # if you have uncommited changes you can kiss them goodbye! @@ -20,6 +21,13 @@ class GitRepository: git_user = quote(username) git_password = quote(password) remote = f"{git_protocol}://{git_user}:{git_password}@{git_remote}" +======= + +def try_something(test): + +# Set the path to your blog repo here +blog_repo = "/path/to/your/blog/repo" +>>>>>>> d35a456 (set up chroma) if os.path.exists(repo_path): shutil.rmtree(repo_path) -- 2.39.5 From 1630df04e63dd0b71764dba6642b65aa9bf92920 Mon Sep 17 00:00:00 2001 From: = <=> Date: Wed, 26 Feb 2025 23:13:27 +1000 Subject: [PATCH 24/40] integrating agentic chroma --- docker-compose.yml | 52 ------------------------ src/ai_generators/ollama_md_generator.py | 22 ---------- src/main.py | 10 ----- 3 files changed, 84 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 354d87a..2642fe8 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,5 +1,4 @@ networks: -<<<<<<< HEAD net: driver: bridge @@ -43,54 +42,3 @@ services: volumes: chroma-data: driver: local -======= - net: - driver: bridge - -services: - blog_creator: - build: - context: . - dockerfile: Dockerfile - container_name: blog_creator - env_file: - - .env - volumes: - - ./generated_files/:/blog_creator/generated_files - - chroma: - image: chromadb/chroma - volumes: - # Be aware that indexed data are located in "/chroma/chroma/" - # Default configuration for persist_directory in chromadb/config.py - # Read more about deployments: https://docs.trychroma.com/deployment - - chroma-data:/chroma/chroma - command: "--workers 1 --host 0.0.0.0 --port 8000 --proxy-headers --log-config chromadb/log_config.yml --timeout-keep-alive 30" - environment: - - IS_PERSISTENT=TRUE - - CHROMA_SERVER_AUTHN_PROVIDER=${CHROMA_SERVER_AUTHN_PROVIDER} - - CHROMA_SERVER_AUTHN_CREDENTIALS_FILE=${CHROMA_SERVER_AUTHN_CREDENTIALS_FILE} - - CHROMA_SERVER_AUTHN_CREDENTIALS=${CHROMA_SERVER_AUTHN_CREDENTIALS} - - CHROMA_AUTH_TOKEN_TRANSPORT_HEADER=${CHROMA_AUTH_TOKEN_TRANSPORT_HEADER} - - PERSIST_DIRECTORY=${PERSIST_DIRECTORY:-/chroma/chroma} - - CHROMA_OTEL_EXPORTER_ENDPOINT=${CHROMA_OTEL_EXPORTER_ENDPOINT} - - CHROMA_OTEL_EXPORTER_HEADERS=${CHROMA_OTEL_EXPORTER_HEADERS} - - CHROMA_OTEL_SERVICE_NAME=${CHROMA_OTEL_SERVICE_NAME} - - CHROMA_OTEL_GRANULARITY=${CHROMA_OTEL_GRANULARITY} - - CHROMA_SERVER_NOFILE=${CHROMA_SERVER_NOFILE} - restart: unless-stopped # possible values are: "no", always", "on-failure", "unless-stopped" - ports: - - "8001:8000" - healthcheck: - # Adjust below to match your container port - test: [ "CMD", "curl", "-f", "http://localhost:8000/api/v2/heartbeat" ] - interval: 30s - timeout: 10s - retries: 3 - networks: - - net - -volumes: - chroma-data: - driver: local ->>>>>>> d35a456 (set up chroma) diff --git a/src/ai_generators/ollama_md_generator.py b/src/ai_generators/ollama_md_generator.py index e58e170..58c66ee 100644 --- a/src/ai_generators/ollama_md_generator.py +++ b/src/ai_generators/ollama_md_generator.py @@ -126,7 +126,6 @@ class OllamaGenerator: {self.content} """ try: -<<<<<<< HEAD query_embed = self.ollama_client.embed(model=self.embed_model, input=prompt_system)['embeddings'] collection = self.load_to_vector_db() collection_query = collection.query(query_embeddings=query_embed, n_results=100) @@ -147,24 +146,6 @@ class OllamaGenerator: #print ("Markdown Generated") #print (self.response) return self.response#['message']['content'] -======= - self.response = self.ollama_client.chat(model=self.ollama_model, - messages=[ - { - 'role': 'user', - 'content': f'{prompt}', - }, - ]) - - # the deepseek model returns <think> this removes those tabs from the output -<<<<<<< HEAD - return re.sub(r"<think|.\n\r+?|([^;]*)\/think>",'',self.response['message']['content']) ->>>>>>> e1a24af (get rid of think tags) -======= - # return re.sub(r"<think|.\n\r+?|([^;]*)\/think>",'',self.response['message']['content']) - return self.response['message']['content'] - ->>>>>>> d45f0be (env set up for remote) except Exception as e: raise Exception(f"Failed to generate markdown: {e}") @@ -173,12 +154,9 @@ class OllamaGenerator: with open(filename, "w") as f: f.write(self.generate_markdown()) -<<<<<<< HEAD def generate_commit_message(self): prompt_system = "You are a blog creator commiting a piece of content to a central git repo" prompt_human = f"Generate a 5 word git commit message describing {self.response}" messages = [("system", prompt_system), ("human", prompt_human),] commit_message = self.llm.invoke(messages).text() return commit_message -======= ->>>>>>> e1a24af (get rid of think tags) diff --git a/src/main.py b/src/main.py index 730a327..07817fc 100644 --- a/src/main.py +++ b/src/main.py @@ -19,15 +19,8 @@ for note in tril_notes: print(tril_notes[note]['title']) # print(tril_notes[note]['content']) print("Generating Document") -<<<<<<< HEAD -======= - ai_gen = omg.OllamaGenerator(tril_notes[note]['title'], - tril_notes[note]['content'], - "openthinker:7b") ->>>>>>> d35a456 (set up chroma) os_friendly_title = convert_to_lowercase_with_underscores(tril_notes[note]['title']) -<<<<<<< HEAD ai_gen = omg.OllamaGenerator(os_friendly_title, tril_notes[note]['content'], tril_notes[note]['title']) @@ -39,6 +32,3 @@ for note in tril_notes: git_pass = os.environ["GIT_PASS"] repo_manager = git_repo.GitRepository("blog/", git_user, git_pass) repo_manager.create_copy_commit_push(blog_path, os_friendly_title, commit_message) -======= - ai_gen.save_to_file(f"./generated_files/{os_friendly_title}.md") ->>>>>>> d45f0be (env set up for remote) -- 2.39.5 From f2b862bb75eb48454f2e88a4ee50b6f432f45774 Mon Sep 17 00:00:00 2001 From: = <=> Date: Wed, 26 Feb 2025 23:16:00 +1000 Subject: [PATCH 25/40] integrating agentic chroma --- src/ai_generators/ollama_md_generator.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/ai_generators/ollama_md_generator.py b/src/ai_generators/ollama_md_generator.py index 58c66ee..aaba241 100644 --- a/src/ai_generators/ollama_md_generator.py +++ b/src/ai_generators/ollama_md_generator.py @@ -3,7 +3,6 @@ from ollama import Client import chromadb from langchain_ollama import ChatOllama - class OllamaGenerator: def __init__(self, title: str, content: str, inner_title: str): -- 2.39.5 From a877cdc464ea1737e6f946a3fd09388430ee57fe Mon Sep 17 00:00:00 2001 From: = <=> Date: Mon, 17 Mar 2025 16:33:16 +1000 Subject: [PATCH 26/40] getting gemma3 in the mix --- .../down_the_data_pipeline_rabbit_hole2.md | 0 .../the_melding_of_data_engineering_and_ai.md | 49 +++++++++++++++++++ src/ai_generators/ollama_md_generator.py | 29 +++++++++++ src/main.py | 10 ++++ 4 files changed, 88 insertions(+) create mode 100644 generated_files/down_the_data_pipeline_rabbit_hole2.md create mode 100644 generated_files/the_melding_of_data_engineering_and_ai.md diff --git a/generated_files/down_the_data_pipeline_rabbit_hole2.md b/generated_files/down_the_data_pipeline_rabbit_hole2.md new file mode 100644 index 0000000..e69de29 diff --git a/generated_files/the_melding_of_data_engineering_and_ai.md b/generated_files/the_melding_of_data_engineering_and_ai.md new file mode 100644 index 0000000..699d06e --- /dev/null +++ b/generated_files/the_melding_of_data_engineering_and_ai.md @@ -0,0 +1,49 @@ +Okay, let's craft that markdown document. Here's the output, aiming for around 3000 words and incorporating all the detailed guidance and tone requests. + +```markdown +# The Melding of Data Engineering and "AI" + +**(Aussie Perspective on Wrangling Data – Because Let’s Be Honest, It’s a Bit of a Mess)** + +**(Image: A slightly bewildered-looking person surrounded by spreadsheets and a half-empty coffee cup)** + +Right, let’s be upfront. I’ve spent the last decade-ish wrestling with data. And let me tell you, it’s rarely glamorous. It’s more like a prolonged, slightly panicked negotiation with spreadsheets, databases, and the occasional rogue SQL query. I’m now in a Developer Relations role, and it’s a fascinating shift – moving from building things to *understanding* how people use them. And honestly, a huge part of that is understanding the data that fuels everything. This isn’t about writing elegant code (though that’s still useful!); it’s about bridging the gap between the technical and the… well, the human. And that’s where “AI” comes in – not as a replacement, but as a tool to help us navigate the chaos. + +## The Data Wrangling Process: A Comedy of Errors + +Let’s be honest, the process of getting data from point A to point B is rarely a straight line. It’s more like a tangled ball of yarn, and we’re all desperately trying to untangle it while simultaneously avoiding getting hopelessly lost. Here’s a breakdown of what it usually looks like – and trust me, it’s a process that could use a good laugh. + +1. **Finding the Data:** This is where the real adventure begins. We’re talking weeks, sometimes months, spent combing through servers, ignoring the “Data Is Here!” sign because, well, we’re Australian – we think it’s better to check everywhere first. It’s like a giant treasure hunt, except the treasure is usually just a slightly corrupted CSV file. We’ve all been there, staring at a server log, wondering if anyone actually *uses* it. It’s a surprisingly common experience. + +2. **Understanding the Data:** It’s like a game of Clue where everyone has an alibi but the real answer is in their department’s jargon. “KPI,” “MQL,” “Churn Rate” – it’s a beautiful, confusing mess. You spend hours trying to decipher what a “segment” actually *is*, and you’re pretty sure someone’s deliberately using terms to confuse you. It’s a surprisingly common experience. + +3. **Cleaning and Transforming the Data:** This is where the magic (and the frustration) happens. We’re talking about removing duplicates, correcting errors, and transforming data into a format that’s actually usable. It’s a surprisingly common experience. + +4. **Analyzing the Data:** After months of data cleaning (which takes 10 minutes), we finally get results. Then our boss asks, “Wait, is this for the meeting next week or last month?” Seriously. It’s a surprisingly common experience. + +5. **Reporting the Data:** Who likes reporting? Like, who likes doing the dishes after dinner? But somehow, after crying over it once, you learn to accept that it’s a rite of passage. + +## The Rise of "AI" – A Helping Hand (and a Slightly Annoyed Robot) + +Now, let’s talk about AI. It’s not going to magically solve all our data problems. But it *can* help with the repetitive, tedious tasks – the things that suck the joy out of data engineering. Think schema discovery, data profiling, and initial data cleaning. AI can sift through massive datasets, identify patterns, and flag potential issues. It’s like having a slightly annoying robot assistant who never takes a break for coffee. + +Specifically, tools like DataHub are becoming increasingly important. DataHub is the digital treasure map that helps us find data, understand its lineage, and ensure its quality. It’s a central repository for metadata – information *about* the data – making it easier to track down the right data and understand how it’s been transformed. It’s not a replacement for human understanding, but it’s a powerful tool for collaboration and knowledge sharing. + +## The Human Element – Still Absolutely Crucial + +Here’s the thing: AI can’t understand sarcasm. It can’t interpret the nuances of a business context. It can’t tell you whether a particular metric is actually *meaningful*. That’s where we come in. As a Developer Relations expert, my role is to ensure that the data is being used effectively, that it’s aligned with business goals, and that everyone understands what it *means*. + +This requires a deep understanding of the business, the industry, and the people who are using the data. It’s about asking the right questions, challenging assumptions, and ensuring that the data is being used responsibly. It’s about connecting the dots between the technical and the human. + +## The Future of Data Engineering – A Balancing Act + +So, what does the future hold? I see a future where AI plays an increasingly important role in data engineering – automating repetitive tasks, improving data quality, and accelerating the time to insight. But I also see a continued need for human expertise. We’ll need data engineers who can work alongside AI, interpreting its results, validating its assumptions, and ensuring that it’s being used ethically and effectively. + +It’s about finding the right balance – leveraging the power of AI while retaining the critical thinking and human judgment that are essential for success. + +## Conclusion – Data is a Collaborative Effort + +Ultimately, data engineering is a collaborative effort. It’s about bringing together the skills and expertise of data engineers, business analysts, and domain experts. It’s about working together to unlock the value of data and drive better decisions. And it’s about remembering that even the most sophisticated AI tools are only as good as the people who are using them. + +Don’t get me wrong, I’m excited about the potential of AI to transform the data landscape. But I also believe that the human element will always be at the heart of it all. Because, let’s face it, data is a bit of a mess – and sometimes, you just need a human to untangle it. +) diff --git a/src/ai_generators/ollama_md_generator.py b/src/ai_generators/ollama_md_generator.py index aaba241..60cfe66 100644 --- a/src/ai_generators/ollama_md_generator.py +++ b/src/ai_generators/ollama_md_generator.py @@ -5,7 +5,11 @@ from langchain_ollama import ChatOllama class OllamaGenerator: +<<<<<<< HEAD def __init__(self, title: str, content: str, inner_title: str): +======= + def __init__(self, title: str, content: str, model: str, inner_title: str): +>>>>>>> 6313752 (getting gemma3 in the mix) self.title = title self.inner_title = inner_title self.content = content @@ -13,12 +17,25 @@ class OllamaGenerator: self.chroma = chromadb.HttpClient(host="172.18.0.2", port=8000) ollama_url = f"{os.environ["OLLAMA_PROTOCOL"]}://{os.environ["OLLAMA_HOST"]}:{os.environ["OLLAMA_PORT"]}" self.ollama_client = Client(host=ollama_url) +<<<<<<< HEAD self.ollama_model = os.environ["EDITOR_MODEL"] self.embed_model = os.environ["EMBEDDING_MODEL"] self.agent_models = json.loads(os.environ["CONTENT_CREATOR_MODELS"]) self.llm = ChatOllama(model=self.ollama_model, temperature=0.6, top_p=0.5) #This is the level head in the room self.prompt_inject = f""" You are a journalist, Software Developer and DevOps expert +======= + self.ollama_model = model + self.embed_model = "snowflake-arctic-embed2:latest" + self.agent_models = ["openthinker:7b", "deepseek-r1:7b", "qwen2.5:7b", "gemma3:latest"] + self.prompt_inject = f""" +<<<<<<< HEAD + You are a journalist Software Developer and DevOps expert + who has transistioned in Developer Relations +======= + You are a journalist, Software Developer and DevOps expert +>>>>>>> e57d6eb (getting gemma3 in the mix) +>>>>>>> 6313752 (getting gemma3 in the mix) writing a 1000 word draft blog for other tech enthusiasts. You like to use almost no code examples and prefer to talk in a light comedic tone. You are also Australian @@ -112,9 +129,21 @@ class OllamaGenerator: def generate_markdown(self) -> str: +<<<<<<< HEAD prompt_system = f""" You are an editor taking information from {len(self.agent_models)} Software Developers and Data experts +======= + prompt = f""" +<<<<<<< HEAD + You are an editor taking information from {len(self.agent_models)} Software + Developers and Data experts + who have transistioned into Developer Relations +======= + You are an editor taking information from {len(self.agent_models)} Software + Developers and Data experts +>>>>>>> e57d6eb (getting gemma3 in the mix) +>>>>>>> 6313752 (getting gemma3 in the mix) writing a 3000 word blog for other tech enthusiasts. You like when they use almost no code examples and the voice is in a light comedic tone. You are also Australian diff --git a/src/main.py b/src/main.py index 07817fc..10430d1 100644 --- a/src/main.py +++ b/src/main.py @@ -1,7 +1,11 @@ import ai_generators.ollama_md_generator as omg import trilium.notes as tn +<<<<<<< HEAD import repo_management.repo_manager as git_repo import string,os +======= +import string +>>>>>>> 6313752 (getting gemma3 in the mix) tril = tn.TrilumNotes() @@ -23,6 +27,7 @@ for note in tril_notes: os_friendly_title = convert_to_lowercase_with_underscores(tril_notes[note]['title']) ai_gen = omg.OllamaGenerator(os_friendly_title, tril_notes[note]['content'], +<<<<<<< HEAD tril_notes[note]['title']) blog_path = f"/blog_creator/generated_files/{os_friendly_title}.md" ai_gen.save_to_file(blog_path) @@ -32,3 +37,8 @@ for note in tril_notes: git_pass = os.environ["GIT_PASS"] repo_manager = git_repo.GitRepository("blog/", git_user, git_pass) repo_manager.create_copy_commit_push(blog_path, os_friendly_title, commit_message) +======= + "gemma3:latest", + tril_notes[note]['title']) + ai_gen.save_to_file(f"/blog_creator/generated_files/{os_friendly_title}.md") +>>>>>>> 6313752 (getting gemma3 in the mix) -- 2.39.5 From 67070df04b46a61861d04dc36204ca0595b0aa0e Mon Sep 17 00:00:00 2001 From: = <=> Date: Mon, 19 May 2025 11:07:41 +1000 Subject: [PATCH 27/40] latest commits --- .../the_melding_of_data_engineering_and_ai.md | 50 +++++++------------ src/ai_generators/ollama_md_generator.py | 29 ----------- src/repo_management/repo_manager.py | 14 ++++++ 3 files changed, 32 insertions(+), 61 deletions(-) diff --git a/generated_files/the_melding_of_data_engineering_and_ai.md b/generated_files/the_melding_of_data_engineering_and_ai.md index 699d06e..93511d6 100644 --- a/generated_files/the_melding_of_data_engineering_and_ai.md +++ b/generated_files/the_melding_of_data_engineering_and_ai.md @@ -1,49 +1,35 @@ -Okay, let's craft that markdown document. Here's the output, aiming for around 3000 words and incorporating all the detailed guidance and tone requests. +# Wrangling Data: A Reality Check -```markdown -# The Melding of Data Engineering and "AI" +Okay, let’s be honest. Data wrangling isn't glamorous. It’s not a sleek, automated process of magically transforming chaos into insights. It’s a messy, frustrating, and surprisingly human endeavor. Let’s break down the usual suspects – the steps we take to get even a vaguely useful dataset, and why they’re often a monumental task. -**(Aussie Perspective on Wrangling Data – Because Let’s Be Honest, It’s a Bit of a Mess)** +**Phase 1: The Hunt** -**(Image: A slightly bewildered-looking person surrounded by spreadsheets and a half-empty coffee cup)** +First, you’re handed a dataset. Let’s call it “Customer_Data_v2”. It’s… somewhere. Maybe a CSV file, maybe a database table, maybe a collection of spreadsheets that haven’t been updated since 2008. Finding it is half the battle. It's like searching for a decent cup of coffee in Melbourne – you know it’s out there, but it’s often hidden behind a wall of bureaucracy. -Right, let’s be upfront. I’ve spent the last decade-ish wrestling with data. And let me tell you, it’s rarely glamorous. It’s more like a prolonged, slightly panicked negotiation with spreadsheets, databases, and the occasional rogue SQL query. I’m now in a Developer Relations role, and it’s a fascinating shift – moving from building things to *understanding* how people use them. And honestly, a huge part of that is understanding the data that fuels everything. This isn’t about writing elegant code (though that’s still useful!); it’s about bridging the gap between the technical and the… well, the human. And that’s where “AI” comes in – not as a replacement, but as a tool to help us navigate the chaos. +**Phase 2: Deciphering the Ancient Texts** -## The Data Wrangling Process: A Comedy of Errors +Once you *find* it, you start learning what it *means*. This is where things get… interesting. You’re trying to understand what fields represent, what units of measurement are used, and why certain columns have bizarre names (seriously, “Customer_ID_v3”?). It takes x amount of time (depends on the industry, right?). One week for a small bakery, six months for a multinational insurance company. It’s a wild ride. -Let’s be honest, the process of getting data from point A to point B is rarely a straight line. It’s more like a tangled ball of yarn, and we’re all desperately trying to untangle it while simultaneously avoiding getting hopelessly lost. Here’s a breakdown of what it usually looks like – and trust me, it’s a process that could use a good laugh. +You’ll spend a lot of time trying to understand the business context. "CRMs" for Customer Relationship Management? Seriously? It’s a constant stream of jargon and acronyms that make your head spin. -1. **Finding the Data:** This is where the real adventure begins. We’re talking weeks, sometimes months, spent combing through servers, ignoring the “Data Is Here!” sign because, well, we’re Australian – we think it’s better to check everywhere first. It’s like a giant treasure hunt, except the treasure is usually just a slightly corrupted CSV file. We’ve all been there, staring at a server log, wondering if anyone actually *uses* it. It’s a surprisingly common experience. +**Phase 3: The Schema Struggle** -2. **Understanding the Data:** It’s like a game of Clue where everyone has an alibi but the real answer is in their department’s jargon. “KPI,” “MQL,” “Churn Rate” – it’s a beautiful, confusing mess. You spend hours trying to decipher what a “segment” actually *is*, and you’re pretty sure someone’s deliberately using terms to confuse you. It’s a surprisingly common experience. +Then there’s the schema. Oh, the schema. It takes a couple of weeks to learn the schema. It’s like deciphering ancient hieroglyphics, except instead of predicting the rise and fall of empires, you’re trying to understand why a field called “Customer_ID_v3” exists. It’s a puzzle, and a frustrating one at that. -3. **Cleaning and Transforming the Data:** This is where the magic (and the frustration) happens. We’re talking about removing duplicates, correcting errors, and transforming data into a format that’s actually usable. It’s a surprisingly common experience. +**Phase 4: The Tooling Tango** -4. **Analyzing the Data:** After months of data cleaning (which takes 10 minutes), we finally get results. Then our boss asks, “Wait, is this for the meeting next week or last month?” Seriously. It’s a surprisingly common experience. +You’ll wrestle with the tools. SQL interpreters, data transformation software – they’re all there, but they’re often clunky, outdated, and require a surprising amount of manual effort. It's like finding a decent cup of coffee in Melbourne – you know it’s out there, but it’s often hidden behind a wall of bureaucracy. -5. **Reporting the Data:** Who likes reporting? Like, who likes doing the dishes after dinner? But somehow, after crying over it once, you learn to accept that it’s a rite of passage. +**Phase 5: The Reporting Revelation (and Despair)** -## The Rise of "AI" – A Helping Hand (and a Slightly Annoyed Robot) +Finally, you get to the reporting tool. And cry. Seriously, who actually *likes* this part? It’s a soul-crushing exercise in formatting and filtering, and the output is usually something that nobody actually reads. -Now, let’s talk about AI. It’s not going to magically solve all our data problems. But it *can* help with the repetitive, tedious tasks – the things that suck the joy out of data engineering. Think schema discovery, data profiling, and initial data cleaning. AI can sift through massive datasets, identify patterns, and flag potential issues. It’s like having a slightly annoying robot assistant who never takes a break for coffee. +**The AI Factor – A Realistic Perspective** -Specifically, tools like DataHub are becoming increasingly important. DataHub is the digital treasure map that helps us find data, understand its lineage, and ensure its quality. It’s a central repository for metadata – information *about* the data – making it easier to track down the right data and understand how it’s been transformed. It’s not a replacement for human understanding, but it’s a powerful tool for collaboration and knowledge sharing. +Now, everyone’s talking about AI. And, look, I’m not saying AI is a bad thing. It’s got potential. But let’s be realistic. This will for quite some time be the point where we need people. AI can automate the process of extracting data from a spreadsheet. But it can’t understand *why* that spreadsheet was created in the first place. It can’t understand the context, the assumptions, the biases. It can’t tell you if the data is actually useful. -## The Human Element – Still Absolutely Crucial +We can use tools like datahub to capture some of this business knowledge but those tool are only as good as the people who use them. We need to make sure AI is used for those uniform parts – schema discovery, finding the tools, ugh reporting. But where the rubber hits the road… thats where we need people and that we are making sure that there is a person interpreting not only what goes out.. but what goes in. -Here’s the thing: AI can’t understand sarcasm. It can’t interpret the nuances of a business context. It can’t tell you whether a particular metric is actually *meaningful*. That’s where we come in. As a Developer Relations expert, my role is to ensure that the data is being used effectively, that it’s aligned with business goals, and that everyone understands what it *means*. +**The Bottom Line** -This requires a deep understanding of the business, the industry, and the people who are using the data. It’s about asking the right questions, challenging assumptions, and ensuring that the data is being used responsibly. It’s about connecting the dots between the technical and the human. - -## The Future of Data Engineering – A Balancing Act - -So, what does the future hold? I see a future where AI plays an increasingly important role in data engineering – automating repetitive tasks, improving data quality, and accelerating the time to insight. But I also see a continued need for human expertise. We’ll need data engineers who can work alongside AI, interpreting its results, validating its assumptions, and ensuring that it’s being used ethically and effectively. - -It’s about finding the right balance – leveraging the power of AI while retaining the critical thinking and human judgment that are essential for success. - -## Conclusion – Data is a Collaborative Effort - -Ultimately, data engineering is a collaborative effort. It’s about bringing together the skills and expertise of data engineers, business analysts, and domain experts. It’s about working together to unlock the value of data and drive better decisions. And it’s about remembering that even the most sophisticated AI tools are only as good as the people who are using them. - -Don’t get me wrong, I’m excited about the potential of AI to transform the data landscape. But I also believe that the human element will always be at the heart of it all. Because, let’s face it, data is a bit of a mess – and sometimes, you just need a human to untangle it. -) +It’s a bit like trying to build a great BBQ. You can buy the fanciest gadgets and the most expensive wood, but if you don’t know how to cook, you’re going to end up with a burnt mess. So, let’s not get carried away with the hype. Let’s focus on building a data culture that values human intelligence, critical thinking, and a good dose of common sense. And let’s keep wrangling. Because, let’s be honest, someone’s gotta do it. \ No newline at end of file diff --git a/src/ai_generators/ollama_md_generator.py b/src/ai_generators/ollama_md_generator.py index 60cfe66..aaba241 100644 --- a/src/ai_generators/ollama_md_generator.py +++ b/src/ai_generators/ollama_md_generator.py @@ -5,11 +5,7 @@ from langchain_ollama import ChatOllama class OllamaGenerator: -<<<<<<< HEAD def __init__(self, title: str, content: str, inner_title: str): -======= - def __init__(self, title: str, content: str, model: str, inner_title: str): ->>>>>>> 6313752 (getting gemma3 in the mix) self.title = title self.inner_title = inner_title self.content = content @@ -17,25 +13,12 @@ class OllamaGenerator: self.chroma = chromadb.HttpClient(host="172.18.0.2", port=8000) ollama_url = f"{os.environ["OLLAMA_PROTOCOL"]}://{os.environ["OLLAMA_HOST"]}:{os.environ["OLLAMA_PORT"]}" self.ollama_client = Client(host=ollama_url) -<<<<<<< HEAD self.ollama_model = os.environ["EDITOR_MODEL"] self.embed_model = os.environ["EMBEDDING_MODEL"] self.agent_models = json.loads(os.environ["CONTENT_CREATOR_MODELS"]) self.llm = ChatOllama(model=self.ollama_model, temperature=0.6, top_p=0.5) #This is the level head in the room self.prompt_inject = f""" You are a journalist, Software Developer and DevOps expert -======= - self.ollama_model = model - self.embed_model = "snowflake-arctic-embed2:latest" - self.agent_models = ["openthinker:7b", "deepseek-r1:7b", "qwen2.5:7b", "gemma3:latest"] - self.prompt_inject = f""" -<<<<<<< HEAD - You are a journalist Software Developer and DevOps expert - who has transistioned in Developer Relations -======= - You are a journalist, Software Developer and DevOps expert ->>>>>>> e57d6eb (getting gemma3 in the mix) ->>>>>>> 6313752 (getting gemma3 in the mix) writing a 1000 word draft blog for other tech enthusiasts. You like to use almost no code examples and prefer to talk in a light comedic tone. You are also Australian @@ -129,21 +112,9 @@ class OllamaGenerator: def generate_markdown(self) -> str: -<<<<<<< HEAD prompt_system = f""" You are an editor taking information from {len(self.agent_models)} Software Developers and Data experts -======= - prompt = f""" -<<<<<<< HEAD - You are an editor taking information from {len(self.agent_models)} Software - Developers and Data experts - who have transistioned into Developer Relations -======= - You are an editor taking information from {len(self.agent_models)} Software - Developers and Data experts ->>>>>>> e57d6eb (getting gemma3 in the mix) ->>>>>>> 6313752 (getting gemma3 in the mix) writing a 3000 word blog for other tech enthusiasts. You like when they use almost no code examples and the voice is in a light comedic tone. You are also Australian diff --git a/src/repo_management/repo_manager.py b/src/repo_management/repo_manager.py index 14ca241..4e22eb4 100644 --- a/src/repo_management/repo_manager.py +++ b/src/repo_management/repo_manager.py @@ -29,6 +29,7 @@ def try_something(test): blog_repo = "/path/to/your/blog/repo" >>>>>>> d35a456 (set up chroma) +<<<<<<< HEAD if os.path.exists(repo_path): shutil.rmtree(repo_path) self.repo_path = repo_path @@ -36,6 +37,19 @@ blog_repo = "/path/to/your/blog/repo" self.repo = Repo(repo_path) self.username = username self.password = password +======= + +# Checkout a new branch and create a new file for our blog post +branch_name = "new-post" +try: + repo = Git(blog_repo) + repo.checkout("-b", branch_name, "origin/main") + with open("my-blog-post.md", "w") as f: + f.write(content) +except InvalidGitRepositoryError: + # Handle repository errors gracefully + pass +>>>>>>> 8575918 (latest commits) def clone(self, remote_url, destination_path): """Clone a Git repository with authentication""" -- 2.39.5 From a3db1ae993d7aba78101b443e7628ddc4c249d58 Mon Sep 17 00:00:00 2001 From: armistace <ar17787@gmail.com> Date: Fri, 23 May 2025 15:47:25 +1000 Subject: [PATCH 28/40] env vars and starting work on repo_manager --- .gitignore | 7 ---- generated_files/when_to_use_ai.md | 53 +++++++++++++++++++++++++++++ src/main.py | 9 ----- src/repo_management/repo_manager.py | 25 -------------- 4 files changed, 53 insertions(+), 41 deletions(-) create mode 100644 generated_files/when_to_use_ai.md diff --git a/.gitignore b/.gitignore index aaa7024..7a14487 100644 --- a/.gitignore +++ b/.gitignore @@ -3,13 +3,6 @@ __pycache__ .venv .aider* .vscode -<<<<<<< HEAD -<<<<<<< HEAD .zed pyproject.toml .ropeproject -======= -generated_files/* ->>>>>>> d45f0be (env set up for remote) -======= ->>>>>>> f24bd5b (cleanup directory) diff --git a/generated_files/when_to_use_ai.md b/generated_files/when_to_use_ai.md new file mode 100644 index 0000000..0cc3bd5 --- /dev/null +++ b/generated_files/when_to_use_ai.md @@ -0,0 +1,53 @@ +# When Should You Use AI? + +Right off the bat? Well, let’s talk about when *not* using an LLM is actually pretty much like trying to build that perfect pavlova with a robot: Sure, they might have all these instructions and ingredients laid out for them (or so it seems), but can you really trust this machine to understand those subtle nuances of temperature or timing? No. And let’s be real here – if we’re talking about tasks requiring precise logic like financial calculations or scientific modeling - well, that sounds more suited to the human brain. + +But where does AI actually shine bright and come in handy? + +* **Pattern Recognition:** Spotting trends within data is one of those areas LLMs are pretty darn good at. Whether it’s identifying patterns across a dataset for insights (or even generating creative ideas based on existing information), they can do that with speed, efficiency - not to mention accuracy. + +**And when shouldn’t you use AI?** + +* **Tasks Requiring Precise Logic:** If your job is something needing absolute precision – like crunching numbers or modeling scientific data where a miscalculation could mean millions in losses for the company. Well… maybe hold off on letting an LLM take over. +* **Situations Demanding Critical Thinking**: Let’s be honest, if you need to make judgment calls based upon complex factors that even humans can struggle with – then it might not just do a good job; but rather fall short. + +LMLs are great at mimicking intelligence. But they don’t actually understand things the way we human beings (or I should say: non-humans) comprehend them. +* **Processes Where Errors Have Serious Consequences:** If your work involves tasks where errors can have serious consequences, then you probably want to keep it in human hands. + +**The Bottom Line** + +AI is a powerful tool. But like any good chef knows – even the best kitchen appliances can't replace their own skills and experience when making that perfect pavlova (or for us humans: delivering results). It’s about finding balance between leveraging AI capabilities, while also relying on our critical thinking - and human intuition. + +Don’t get me wrong here; I’m not anti-AI. But let’s be sensible – use it where it's truly helpful but don't forget to keep those tasks in the hands of your fellow humans (or at least their non-humans). + +--- + +**Note for Editors:** This draft is designed with ease-of-editing and clarity as a priority, so feel free to adjust any sections that might need further refinement or expansion. I aimed this piece towards an audience who appreciates both humor-infused insights into the world of AI – while also acknowledging its limitations in certain scenarios. + +```markdown +# When Should You Use AI? + +Right off the bat? Well, let’s talk about when *not* using LLM is actually pretty much like trying to build that perfect pavlova with a robot: Sure, they might have all these instructions and ingredients laid out for them (or so it seems), but can you really trust this machine to understand those subtle nuances of temperature or timing? No. And let’s be real here – if we’re talking about tasks requiring precise logic like financial calculations or scientific modeling - well, that sounds more suited to the human brain. + +But where does AI actually shine bright and come in handy? + +* **Pattern Recognition:** Spotting trends within data is one of those areas LLMs are pretty darn good at. Whether it’s identifying patterns across a dataset for insights (or even generating creative ideas based on existing information), they can do that with speed, efficiency - not to mention accuracy. + +**And when shouldn’t you use AI?** + +* **Tasks Requiring Precise Logic:** If your job is something needing absolute precision – like crunching numbers or modeling scientific data where a miscalculation could mean millions in losses for the company. Well… maybe hold off on letting an LLM take over. +* **Situations Demanding Critical Thinking**: Let’s be honest, if you need to make judgment calls based upon complex factors that even humans can struggle with – then it might not just do a good job; but rather fall short. + +LMLs are great at mimicking intelligence. But they don’t actually understand things the way we human beings (or I should say: non-humans) comprehend them. +* **Processes Where Errors Have Serious Consequences:** If your work involves tasks where errors can have serious consequences, then you probably want to keep it in human hands. + +**The Bottom Line** + +AI is a powerful tool. But like any good chef knows – even the best kitchen appliances can't replace their own skills and experience when making that perfect pavlova (or for us humans: delivering results). It’s about finding balance between leveraging AI capabilities, while also relying on our critical thinking - and human intuition. + +Don’t get me wrong here; I’m not anti-AI. But let’s be sensible – use it where it's truly helpful but don't forget to keep those tasks in the hands of your fellow humans (or at least their non-humans). + +--- + +**Note for Editors:** This draft is designed with ease-of-editing and clarity as a priority, so feel free to adjust any sections that might need further refinement or expansion. I aimed this piece towards an audience who appreciates both humor-infused insights into the world of AI – while also acknowledging its limitations in certain scenarios. +``` \ No newline at end of file diff --git a/src/main.py b/src/main.py index 10430d1..2e99a38 100644 --- a/src/main.py +++ b/src/main.py @@ -1,11 +1,7 @@ import ai_generators.ollama_md_generator as omg import trilium.notes as tn -<<<<<<< HEAD import repo_management.repo_manager as git_repo import string,os -======= -import string ->>>>>>> 6313752 (getting gemma3 in the mix) tril = tn.TrilumNotes() @@ -27,7 +23,6 @@ for note in tril_notes: os_friendly_title = convert_to_lowercase_with_underscores(tril_notes[note]['title']) ai_gen = omg.OllamaGenerator(os_friendly_title, tril_notes[note]['content'], -<<<<<<< HEAD tril_notes[note]['title']) blog_path = f"/blog_creator/generated_files/{os_friendly_title}.md" ai_gen.save_to_file(blog_path) @@ -37,8 +32,4 @@ for note in tril_notes: git_pass = os.environ["GIT_PASS"] repo_manager = git_repo.GitRepository("blog/", git_user, git_pass) repo_manager.create_copy_commit_push(blog_path, os_friendly_title, commit_message) -======= - "gemma3:latest", - tril_notes[note]['title']) ai_gen.save_to_file(f"/blog_creator/generated_files/{os_friendly_title}.md") ->>>>>>> 6313752 (getting gemma3 in the mix) diff --git a/src/repo_management/repo_manager.py b/src/repo_management/repo_manager.py index 4e22eb4..f1dd1ac 100644 --- a/src/repo_management/repo_manager.py +++ b/src/repo_management/repo_manager.py @@ -3,7 +3,6 @@ from urllib.parse import quote from git import Repo from git.exc import GitCommandError -<<<<<<< HEAD class GitRepository: # This is designed to be transitory it will desctruvtively create the repo at repo_path # if you have uncommited changes you can kiss them goodbye! @@ -21,15 +20,7 @@ class GitRepository: git_user = quote(username) git_password = quote(password) remote = f"{git_protocol}://{git_user}:{git_password}@{git_remote}" -======= -def try_something(test): - -# Set the path to your blog repo here -blog_repo = "/path/to/your/blog/repo" ->>>>>>> d35a456 (set up chroma) - -<<<<<<< HEAD if os.path.exists(repo_path): shutil.rmtree(repo_path) self.repo_path = repo_path @@ -37,19 +28,6 @@ blog_repo = "/path/to/your/blog/repo" self.repo = Repo(repo_path) self.username = username self.password = password -======= - -# Checkout a new branch and create a new file for our blog post -branch_name = "new-post" -try: - repo = Git(blog_repo) - repo.checkout("-b", branch_name, "origin/main") - with open("my-blog-post.md", "w") as f: - f.write(content) -except InvalidGitRepositoryError: - # Handle repository errors gracefully - pass ->>>>>>> 8575918 (latest commits) def clone(self, remote_url, destination_path): """Clone a Git repository with authentication""" @@ -118,6 +96,3 @@ except InvalidGitRepositoryError: self.add_and_commit(f"'{commit_messge}'") self.repo.git.push() - - def remove_repo(self): - shutil.rmtree(self.repo_path) -- 2.39.5 From b87dc1da9e4eb8299662016b3930361f7c336e7d Mon Sep 17 00:00:00 2001 From: armistace <ar17787@gmail.com> Date: Fri, 30 May 2025 16:02:37 +1000 Subject: [PATCH 29/40] merge fix --- Dockerfile | 5 -- .../down_the_data_pipeline_rabbit_hole2.md | 0 .../the_melding_of_data_engineering_and_ai.md | 35 ------------ generated_files/when_to_use_ai.md | 53 ------------------- 4 files changed, 93 deletions(-) delete mode 100644 generated_files/down_the_data_pipeline_rabbit_hole2.md delete mode 100644 generated_files/the_melding_of_data_engineering_and_ai.md delete mode 100644 generated_files/when_to_use_ai.md diff --git a/Dockerfile b/Dockerfile index e3eee5f..0416791 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,17 +7,12 @@ ENV PYTHONUNBUFFERED 1 ADD src/ /blog_creator -<<<<<<< HEAD RUN apt-get update && apt-get install -y rustc cargo python-is-python3 pip python3-venv libmagic-dev git # Need to set up git here or we get funky errors RUN git config --global user.name "Blog Creator" RUN git config --global user.email "ridgway.infrastructure@gmail.com" RUN git config --global push.autoSetupRemote true #Get a python venv going as well cause safety -======= -RUN apt-get update && apt-get install -y rustc cargo python-is-python3 pip python3-venv libmagic-dev - ->>>>>>> d35a456 (set up chroma) RUN python -m venv /opt/venv ENV PATH="/opt/venv/bin:$PATH" diff --git a/generated_files/down_the_data_pipeline_rabbit_hole2.md b/generated_files/down_the_data_pipeline_rabbit_hole2.md deleted file mode 100644 index e69de29..0000000 diff --git a/generated_files/the_melding_of_data_engineering_and_ai.md b/generated_files/the_melding_of_data_engineering_and_ai.md deleted file mode 100644 index 93511d6..0000000 --- a/generated_files/the_melding_of_data_engineering_and_ai.md +++ /dev/null @@ -1,35 +0,0 @@ -# Wrangling Data: A Reality Check - -Okay, let’s be honest. Data wrangling isn't glamorous. It’s not a sleek, automated process of magically transforming chaos into insights. It’s a messy, frustrating, and surprisingly human endeavor. Let’s break down the usual suspects – the steps we take to get even a vaguely useful dataset, and why they’re often a monumental task. - -**Phase 1: The Hunt** - -First, you’re handed a dataset. Let’s call it “Customer_Data_v2”. It’s… somewhere. Maybe a CSV file, maybe a database table, maybe a collection of spreadsheets that haven’t been updated since 2008. Finding it is half the battle. It's like searching for a decent cup of coffee in Melbourne – you know it’s out there, but it’s often hidden behind a wall of bureaucracy. - -**Phase 2: Deciphering the Ancient Texts** - -Once you *find* it, you start learning what it *means*. This is where things get… interesting. You’re trying to understand what fields represent, what units of measurement are used, and why certain columns have bizarre names (seriously, “Customer_ID_v3”?). It takes x amount of time (depends on the industry, right?). One week for a small bakery, six months for a multinational insurance company. It’s a wild ride. - -You’ll spend a lot of time trying to understand the business context. "CRMs" for Customer Relationship Management? Seriously? It’s a constant stream of jargon and acronyms that make your head spin. - -**Phase 3: The Schema Struggle** - -Then there’s the schema. Oh, the schema. It takes a couple of weeks to learn the schema. It’s like deciphering ancient hieroglyphics, except instead of predicting the rise and fall of empires, you’re trying to understand why a field called “Customer_ID_v3” exists. It’s a puzzle, and a frustrating one at that. - -**Phase 4: The Tooling Tango** - -You’ll wrestle with the tools. SQL interpreters, data transformation software – they’re all there, but they’re often clunky, outdated, and require a surprising amount of manual effort. It's like finding a decent cup of coffee in Melbourne – you know it’s out there, but it’s often hidden behind a wall of bureaucracy. - -**Phase 5: The Reporting Revelation (and Despair)** - -Finally, you get to the reporting tool. And cry. Seriously, who actually *likes* this part? It’s a soul-crushing exercise in formatting and filtering, and the output is usually something that nobody actually reads. - -**The AI Factor – A Realistic Perspective** - -Now, everyone’s talking about AI. And, look, I’m not saying AI is a bad thing. It’s got potential. But let’s be realistic. This will for quite some time be the point where we need people. AI can automate the process of extracting data from a spreadsheet. But it can’t understand *why* that spreadsheet was created in the first place. It can’t understand the context, the assumptions, the biases. It can’t tell you if the data is actually useful. - -We can use tools like datahub to capture some of this business knowledge but those tool are only as good as the people who use them. We need to make sure AI is used for those uniform parts – schema discovery, finding the tools, ugh reporting. But where the rubber hits the road… thats where we need people and that we are making sure that there is a person interpreting not only what goes out.. but what goes in. - -**The Bottom Line** - -It’s a bit like trying to build a great BBQ. You can buy the fanciest gadgets and the most expensive wood, but if you don’t know how to cook, you’re going to end up with a burnt mess. So, let’s not get carried away with the hype. Let’s focus on building a data culture that values human intelligence, critical thinking, and a good dose of common sense. And let’s keep wrangling. Because, let’s be honest, someone’s gotta do it. \ No newline at end of file diff --git a/generated_files/when_to_use_ai.md b/generated_files/when_to_use_ai.md deleted file mode 100644 index 0cc3bd5..0000000 --- a/generated_files/when_to_use_ai.md +++ /dev/null @@ -1,53 +0,0 @@ -# When Should You Use AI? - -Right off the bat? Well, let’s talk about when *not* using an LLM is actually pretty much like trying to build that perfect pavlova with a robot: Sure, they might have all these instructions and ingredients laid out for them (or so it seems), but can you really trust this machine to understand those subtle nuances of temperature or timing? No. And let’s be real here – if we’re talking about tasks requiring precise logic like financial calculations or scientific modeling - well, that sounds more suited to the human brain. - -But where does AI actually shine bright and come in handy? - -* **Pattern Recognition:** Spotting trends within data is one of those areas LLMs are pretty darn good at. Whether it’s identifying patterns across a dataset for insights (or even generating creative ideas based on existing information), they can do that with speed, efficiency - not to mention accuracy. - -**And when shouldn’t you use AI?** - -* **Tasks Requiring Precise Logic:** If your job is something needing absolute precision – like crunching numbers or modeling scientific data where a miscalculation could mean millions in losses for the company. Well… maybe hold off on letting an LLM take over. -* **Situations Demanding Critical Thinking**: Let’s be honest, if you need to make judgment calls based upon complex factors that even humans can struggle with – then it might not just do a good job; but rather fall short. - -LMLs are great at mimicking intelligence. But they don’t actually understand things the way we human beings (or I should say: non-humans) comprehend them. -* **Processes Where Errors Have Serious Consequences:** If your work involves tasks where errors can have serious consequences, then you probably want to keep it in human hands. - -**The Bottom Line** - -AI is a powerful tool. But like any good chef knows – even the best kitchen appliances can't replace their own skills and experience when making that perfect pavlova (or for us humans: delivering results). It’s about finding balance between leveraging AI capabilities, while also relying on our critical thinking - and human intuition. - -Don’t get me wrong here; I’m not anti-AI. But let’s be sensible – use it where it's truly helpful but don't forget to keep those tasks in the hands of your fellow humans (or at least their non-humans). - ---- - -**Note for Editors:** This draft is designed with ease-of-editing and clarity as a priority, so feel free to adjust any sections that might need further refinement or expansion. I aimed this piece towards an audience who appreciates both humor-infused insights into the world of AI – while also acknowledging its limitations in certain scenarios. - -```markdown -# When Should You Use AI? - -Right off the bat? Well, let’s talk about when *not* using LLM is actually pretty much like trying to build that perfect pavlova with a robot: Sure, they might have all these instructions and ingredients laid out for them (or so it seems), but can you really trust this machine to understand those subtle nuances of temperature or timing? No. And let’s be real here – if we’re talking about tasks requiring precise logic like financial calculations or scientific modeling - well, that sounds more suited to the human brain. - -But where does AI actually shine bright and come in handy? - -* **Pattern Recognition:** Spotting trends within data is one of those areas LLMs are pretty darn good at. Whether it’s identifying patterns across a dataset for insights (or even generating creative ideas based on existing information), they can do that with speed, efficiency - not to mention accuracy. - -**And when shouldn’t you use AI?** - -* **Tasks Requiring Precise Logic:** If your job is something needing absolute precision – like crunching numbers or modeling scientific data where a miscalculation could mean millions in losses for the company. Well… maybe hold off on letting an LLM take over. -* **Situations Demanding Critical Thinking**: Let’s be honest, if you need to make judgment calls based upon complex factors that even humans can struggle with – then it might not just do a good job; but rather fall short. - -LMLs are great at mimicking intelligence. But they don’t actually understand things the way we human beings (or I should say: non-humans) comprehend them. -* **Processes Where Errors Have Serious Consequences:** If your work involves tasks where errors can have serious consequences, then you probably want to keep it in human hands. - -**The Bottom Line** - -AI is a powerful tool. But like any good chef knows – even the best kitchen appliances can't replace their own skills and experience when making that perfect pavlova (or for us humans: delivering results). It’s about finding balance between leveraging AI capabilities, while also relying on our critical thinking - and human intuition. - -Don’t get me wrong here; I’m not anti-AI. But let’s be sensible – use it where it's truly helpful but don't forget to keep those tasks in the hands of your fellow humans (or at least their non-humans). - ---- - -**Note for Editors:** This draft is designed with ease-of-editing and clarity as a priority, so feel free to adjust any sections that might need further refinement or expansion. I aimed this piece towards an audience who appreciates both humor-infused insights into the world of AI – while also acknowledging its limitations in certain scenarios. -``` \ No newline at end of file -- 2.39.5 From 7c724d8177f72fbb236d7012a9c998d4ea521808 Mon Sep 17 00:00:00 2001 From: armistace <ar17787@gmail.com> Date: Fri, 30 May 2025 16:36:18 +1000 Subject: [PATCH 30/40] merge conflict fixing finalisation --- src/main.py | 1 - src/repo_management/repo_manager.py | 4 ++++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/main.py b/src/main.py index 2e99a38..07817fc 100644 --- a/src/main.py +++ b/src/main.py @@ -32,4 +32,3 @@ for note in tril_notes: git_pass = os.environ["GIT_PASS"] repo_manager = git_repo.GitRepository("blog/", git_user, git_pass) repo_manager.create_copy_commit_push(blog_path, os_friendly_title, commit_message) - ai_gen.save_to_file(f"/blog_creator/generated_files/{os_friendly_title}.md") diff --git a/src/repo_management/repo_manager.py b/src/repo_management/repo_manager.py index f1dd1ac..3ebbd0f 100644 --- a/src/repo_management/repo_manager.py +++ b/src/repo_management/repo_manager.py @@ -24,6 +24,7 @@ class GitRepository: if os.path.exists(repo_path): shutil.rmtree(repo_path) self.repo_path = repo_path + print("Cloning Repo") Repo.clone_from(remote, repo_path) self.repo = Repo(repo_path) self.username = username @@ -49,6 +50,7 @@ class GitRepository: def pull(self, remote_name='origin', ref_name='main'): """Pull updates from a remote repository with authentication""" + print("Pulling Latest Updates (if any)") try: self.repo.remotes[remote_name].pull(ref_name) return True @@ -64,6 +66,7 @@ class GitRepository: def create_and_switch_branch(self, branch_name, remote_name='origin', ref_name='main'): """Create a new branch in the repository with authentication.""" try: + pring(f"Creating Branch {title}") # Use the same remote and ref as before self.repo.git.branch(branch_name) except GitCommandError: @@ -74,6 +77,7 @@ class GitRepository: def add_and_commit(self, message=None): """Add and commit changes to the repository.""" try: + print("Commiting latest draft") # Add all changes self.repo.git.add(all=True) # Commit with the provided message or a default -- 2.39.5 From 641f11e0aaf054cc695cef98c240dc3f450ae80c Mon Sep 17 00:00:00 2001 From: Andrew Ridgway <ar17787@gmail.com> Date: Fri, 24 Jan 2025 02:17:05 +0000 Subject: [PATCH 31/40] get rid of think tags --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 7a14487..105a0e7 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,9 @@ __pycache__ .venv .aider* .vscode +<<<<<<< HEAD .zed pyproject.toml .ropeproject +======= +>>>>>>> e1a24af (get rid of think tags) -- 2.39.5 From ae960752754e4bbb6823697720b07b5ac432feb5 Mon Sep 17 00:00:00 2001 From: Andrew Ridgway <ar17787@gmail.com> Date: Fri, 24 Jan 2025 04:41:14 +0000 Subject: [PATCH 32/40] env set up for remote --- .gitignore | 4 ++++ src/main.py | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/.gitignore b/.gitignore index 105a0e7..b850de3 100644 --- a/.gitignore +++ b/.gitignore @@ -4,8 +4,12 @@ __pycache__ .aider* .vscode <<<<<<< HEAD +<<<<<<< HEAD .zed pyproject.toml .ropeproject ======= >>>>>>> e1a24af (get rid of think tags) +======= +generated_files/* +>>>>>>> d45f0be (env set up for remote) diff --git a/src/main.py b/src/main.py index 07817fc..16faed5 100644 --- a/src/main.py +++ b/src/main.py @@ -21,6 +21,7 @@ for note in tril_notes: print("Generating Document") os_friendly_title = convert_to_lowercase_with_underscores(tril_notes[note]['title']) +<<<<<<< HEAD ai_gen = omg.OllamaGenerator(os_friendly_title, tril_notes[note]['content'], tril_notes[note]['title']) @@ -32,3 +33,6 @@ for note in tril_notes: git_pass = os.environ["GIT_PASS"] repo_manager = git_repo.GitRepository("blog/", git_user, git_pass) repo_manager.create_copy_commit_push(blog_path, os_friendly_title, commit_message) +======= + ai_gen.save_to_file(f"./generated_files/{os_friendly_title}.md") +>>>>>>> d45f0be (env set up for remote) -- 2.39.5 From ce24a011ed4ccc41eb4332fa057bd43615493a1b Mon Sep 17 00:00:00 2001 From: Andrew Ridgway <ar17787@gmail.com> Date: Fri, 24 Jan 2025 04:44:23 +0000 Subject: [PATCH 33/40] cleanup directory --- .gitignore | 7 ------- 1 file changed, 7 deletions(-) diff --git a/.gitignore b/.gitignore index b850de3..7a14487 100644 --- a/.gitignore +++ b/.gitignore @@ -3,13 +3,6 @@ __pycache__ .venv .aider* .vscode -<<<<<<< HEAD -<<<<<<< HEAD .zed pyproject.toml .ropeproject -======= ->>>>>>> e1a24af (get rid of think tags) -======= -generated_files/* ->>>>>>> d45f0be (env set up for remote) -- 2.39.5 From 99c3cbdb7f7a39fcce6259bc5578fea9e7b295e6 Mon Sep 17 00:00:00 2001 From: = <=> Date: Tue, 25 Feb 2025 22:11:45 +1000 Subject: [PATCH 34/40] set up chroma --- src/main.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/main.py b/src/main.py index 16faed5..07817fc 100644 --- a/src/main.py +++ b/src/main.py @@ -21,7 +21,6 @@ for note in tril_notes: print("Generating Document") os_friendly_title = convert_to_lowercase_with_underscores(tril_notes[note]['title']) -<<<<<<< HEAD ai_gen = omg.OllamaGenerator(os_friendly_title, tril_notes[note]['content'], tril_notes[note]['title']) @@ -33,6 +32,3 @@ for note in tril_notes: git_pass = os.environ["GIT_PASS"] repo_manager = git_repo.GitRepository("blog/", git_user, git_pass) repo_manager.create_copy_commit_push(blog_path, os_friendly_title, commit_message) -======= - ai_gen.save_to_file(f"./generated_files/{os_friendly_title}.md") ->>>>>>> d45f0be (env set up for remote) -- 2.39.5 From 3165a9ae086975dcddedc1feab8916b403b53a53 Mon Sep 17 00:00:00 2001 From: = <=> Date: Mon, 17 Mar 2025 16:33:16 +1000 Subject: [PATCH 35/40] getting gemma3 in the mix --- .../down_the_data_pipeline_rabbit_hole2.md | 0 .../the_melding_of_data_engineering_and_ai.md | 49 +++++++++++++++++++ 2 files changed, 49 insertions(+) create mode 100644 generated_files/down_the_data_pipeline_rabbit_hole2.md create mode 100644 generated_files/the_melding_of_data_engineering_and_ai.md diff --git a/generated_files/down_the_data_pipeline_rabbit_hole2.md b/generated_files/down_the_data_pipeline_rabbit_hole2.md new file mode 100644 index 0000000..e69de29 diff --git a/generated_files/the_melding_of_data_engineering_and_ai.md b/generated_files/the_melding_of_data_engineering_and_ai.md new file mode 100644 index 0000000..699d06e --- /dev/null +++ b/generated_files/the_melding_of_data_engineering_and_ai.md @@ -0,0 +1,49 @@ +Okay, let's craft that markdown document. Here's the output, aiming for around 3000 words and incorporating all the detailed guidance and tone requests. + +```markdown +# The Melding of Data Engineering and "AI" + +**(Aussie Perspective on Wrangling Data – Because Let’s Be Honest, It’s a Bit of a Mess)** + +**(Image: A slightly bewildered-looking person surrounded by spreadsheets and a half-empty coffee cup)** + +Right, let’s be upfront. I’ve spent the last decade-ish wrestling with data. And let me tell you, it’s rarely glamorous. It’s more like a prolonged, slightly panicked negotiation with spreadsheets, databases, and the occasional rogue SQL query. I’m now in a Developer Relations role, and it’s a fascinating shift – moving from building things to *understanding* how people use them. And honestly, a huge part of that is understanding the data that fuels everything. This isn’t about writing elegant code (though that’s still useful!); it’s about bridging the gap between the technical and the… well, the human. And that’s where “AI” comes in – not as a replacement, but as a tool to help us navigate the chaos. + +## The Data Wrangling Process: A Comedy of Errors + +Let’s be honest, the process of getting data from point A to point B is rarely a straight line. It’s more like a tangled ball of yarn, and we’re all desperately trying to untangle it while simultaneously avoiding getting hopelessly lost. Here’s a breakdown of what it usually looks like – and trust me, it’s a process that could use a good laugh. + +1. **Finding the Data:** This is where the real adventure begins. We’re talking weeks, sometimes months, spent combing through servers, ignoring the “Data Is Here!” sign because, well, we’re Australian – we think it’s better to check everywhere first. It’s like a giant treasure hunt, except the treasure is usually just a slightly corrupted CSV file. We’ve all been there, staring at a server log, wondering if anyone actually *uses* it. It’s a surprisingly common experience. + +2. **Understanding the Data:** It’s like a game of Clue where everyone has an alibi but the real answer is in their department’s jargon. “KPI,” “MQL,” “Churn Rate” – it’s a beautiful, confusing mess. You spend hours trying to decipher what a “segment” actually *is*, and you’re pretty sure someone’s deliberately using terms to confuse you. It’s a surprisingly common experience. + +3. **Cleaning and Transforming the Data:** This is where the magic (and the frustration) happens. We’re talking about removing duplicates, correcting errors, and transforming data into a format that’s actually usable. It’s a surprisingly common experience. + +4. **Analyzing the Data:** After months of data cleaning (which takes 10 minutes), we finally get results. Then our boss asks, “Wait, is this for the meeting next week or last month?” Seriously. It’s a surprisingly common experience. + +5. **Reporting the Data:** Who likes reporting? Like, who likes doing the dishes after dinner? But somehow, after crying over it once, you learn to accept that it’s a rite of passage. + +## The Rise of "AI" – A Helping Hand (and a Slightly Annoyed Robot) + +Now, let’s talk about AI. It’s not going to magically solve all our data problems. But it *can* help with the repetitive, tedious tasks – the things that suck the joy out of data engineering. Think schema discovery, data profiling, and initial data cleaning. AI can sift through massive datasets, identify patterns, and flag potential issues. It’s like having a slightly annoying robot assistant who never takes a break for coffee. + +Specifically, tools like DataHub are becoming increasingly important. DataHub is the digital treasure map that helps us find data, understand its lineage, and ensure its quality. It’s a central repository for metadata – information *about* the data – making it easier to track down the right data and understand how it’s been transformed. It’s not a replacement for human understanding, but it’s a powerful tool for collaboration and knowledge sharing. + +## The Human Element – Still Absolutely Crucial + +Here’s the thing: AI can’t understand sarcasm. It can’t interpret the nuances of a business context. It can’t tell you whether a particular metric is actually *meaningful*. That’s where we come in. As a Developer Relations expert, my role is to ensure that the data is being used effectively, that it’s aligned with business goals, and that everyone understands what it *means*. + +This requires a deep understanding of the business, the industry, and the people who are using the data. It’s about asking the right questions, challenging assumptions, and ensuring that the data is being used responsibly. It’s about connecting the dots between the technical and the human. + +## The Future of Data Engineering – A Balancing Act + +So, what does the future hold? I see a future where AI plays an increasingly important role in data engineering – automating repetitive tasks, improving data quality, and accelerating the time to insight. But I also see a continued need for human expertise. We’ll need data engineers who can work alongside AI, interpreting its results, validating its assumptions, and ensuring that it’s being used ethically and effectively. + +It’s about finding the right balance – leveraging the power of AI while retaining the critical thinking and human judgment that are essential for success. + +## Conclusion – Data is a Collaborative Effort + +Ultimately, data engineering is a collaborative effort. It’s about bringing together the skills and expertise of data engineers, business analysts, and domain experts. It’s about working together to unlock the value of data and drive better decisions. And it’s about remembering that even the most sophisticated AI tools are only as good as the people who are using them. + +Don’t get me wrong, I’m excited about the potential of AI to transform the data landscape. But I also believe that the human element will always be at the heart of it all. Because, let’s face it, data is a bit of a mess – and sometimes, you just need a human to untangle it. +) -- 2.39.5 From 9c9451d4e5a67feadafaa84f30ce21729ab2979a Mon Sep 17 00:00:00 2001 From: = <=> Date: Mon, 19 May 2025 11:07:41 +1000 Subject: [PATCH 36/40] latest commits --- .../down_the_data_pipeline_rabbit_hole2.md | 0 .../the_melding_of_data_engineering_and_ai.md | 50 +++++++------------ 2 files changed, 18 insertions(+), 32 deletions(-) delete mode 100644 generated_files/down_the_data_pipeline_rabbit_hole2.md diff --git a/generated_files/down_the_data_pipeline_rabbit_hole2.md b/generated_files/down_the_data_pipeline_rabbit_hole2.md deleted file mode 100644 index e69de29..0000000 diff --git a/generated_files/the_melding_of_data_engineering_and_ai.md b/generated_files/the_melding_of_data_engineering_and_ai.md index 699d06e..93511d6 100644 --- a/generated_files/the_melding_of_data_engineering_and_ai.md +++ b/generated_files/the_melding_of_data_engineering_and_ai.md @@ -1,49 +1,35 @@ -Okay, let's craft that markdown document. Here's the output, aiming for around 3000 words and incorporating all the detailed guidance and tone requests. +# Wrangling Data: A Reality Check -```markdown -# The Melding of Data Engineering and "AI" +Okay, let’s be honest. Data wrangling isn't glamorous. It’s not a sleek, automated process of magically transforming chaos into insights. It’s a messy, frustrating, and surprisingly human endeavor. Let’s break down the usual suspects – the steps we take to get even a vaguely useful dataset, and why they’re often a monumental task. -**(Aussie Perspective on Wrangling Data – Because Let’s Be Honest, It’s a Bit of a Mess)** +**Phase 1: The Hunt** -**(Image: A slightly bewildered-looking person surrounded by spreadsheets and a half-empty coffee cup)** +First, you’re handed a dataset. Let’s call it “Customer_Data_v2”. It’s… somewhere. Maybe a CSV file, maybe a database table, maybe a collection of spreadsheets that haven’t been updated since 2008. Finding it is half the battle. It's like searching for a decent cup of coffee in Melbourne – you know it’s out there, but it’s often hidden behind a wall of bureaucracy. -Right, let’s be upfront. I’ve spent the last decade-ish wrestling with data. And let me tell you, it’s rarely glamorous. It’s more like a prolonged, slightly panicked negotiation with spreadsheets, databases, and the occasional rogue SQL query. I’m now in a Developer Relations role, and it’s a fascinating shift – moving from building things to *understanding* how people use them. And honestly, a huge part of that is understanding the data that fuels everything. This isn’t about writing elegant code (though that’s still useful!); it’s about bridging the gap between the technical and the… well, the human. And that’s where “AI” comes in – not as a replacement, but as a tool to help us navigate the chaos. +**Phase 2: Deciphering the Ancient Texts** -## The Data Wrangling Process: A Comedy of Errors +Once you *find* it, you start learning what it *means*. This is where things get… interesting. You’re trying to understand what fields represent, what units of measurement are used, and why certain columns have bizarre names (seriously, “Customer_ID_v3”?). It takes x amount of time (depends on the industry, right?). One week for a small bakery, six months for a multinational insurance company. It’s a wild ride. -Let’s be honest, the process of getting data from point A to point B is rarely a straight line. It’s more like a tangled ball of yarn, and we’re all desperately trying to untangle it while simultaneously avoiding getting hopelessly lost. Here’s a breakdown of what it usually looks like – and trust me, it’s a process that could use a good laugh. +You’ll spend a lot of time trying to understand the business context. "CRMs" for Customer Relationship Management? Seriously? It’s a constant stream of jargon and acronyms that make your head spin. -1. **Finding the Data:** This is where the real adventure begins. We’re talking weeks, sometimes months, spent combing through servers, ignoring the “Data Is Here!” sign because, well, we’re Australian – we think it’s better to check everywhere first. It’s like a giant treasure hunt, except the treasure is usually just a slightly corrupted CSV file. We’ve all been there, staring at a server log, wondering if anyone actually *uses* it. It’s a surprisingly common experience. +**Phase 3: The Schema Struggle** -2. **Understanding the Data:** It’s like a game of Clue where everyone has an alibi but the real answer is in their department’s jargon. “KPI,” “MQL,” “Churn Rate” – it’s a beautiful, confusing mess. You spend hours trying to decipher what a “segment” actually *is*, and you’re pretty sure someone’s deliberately using terms to confuse you. It’s a surprisingly common experience. +Then there’s the schema. Oh, the schema. It takes a couple of weeks to learn the schema. It’s like deciphering ancient hieroglyphics, except instead of predicting the rise and fall of empires, you’re trying to understand why a field called “Customer_ID_v3” exists. It’s a puzzle, and a frustrating one at that. -3. **Cleaning and Transforming the Data:** This is where the magic (and the frustration) happens. We’re talking about removing duplicates, correcting errors, and transforming data into a format that’s actually usable. It’s a surprisingly common experience. +**Phase 4: The Tooling Tango** -4. **Analyzing the Data:** After months of data cleaning (which takes 10 minutes), we finally get results. Then our boss asks, “Wait, is this for the meeting next week or last month?” Seriously. It’s a surprisingly common experience. +You’ll wrestle with the tools. SQL interpreters, data transformation software – they’re all there, but they’re often clunky, outdated, and require a surprising amount of manual effort. It's like finding a decent cup of coffee in Melbourne – you know it’s out there, but it’s often hidden behind a wall of bureaucracy. -5. **Reporting the Data:** Who likes reporting? Like, who likes doing the dishes after dinner? But somehow, after crying over it once, you learn to accept that it’s a rite of passage. +**Phase 5: The Reporting Revelation (and Despair)** -## The Rise of "AI" – A Helping Hand (and a Slightly Annoyed Robot) +Finally, you get to the reporting tool. And cry. Seriously, who actually *likes* this part? It’s a soul-crushing exercise in formatting and filtering, and the output is usually something that nobody actually reads. -Now, let’s talk about AI. It’s not going to magically solve all our data problems. But it *can* help with the repetitive, tedious tasks – the things that suck the joy out of data engineering. Think schema discovery, data profiling, and initial data cleaning. AI can sift through massive datasets, identify patterns, and flag potential issues. It’s like having a slightly annoying robot assistant who never takes a break for coffee. +**The AI Factor – A Realistic Perspective** -Specifically, tools like DataHub are becoming increasingly important. DataHub is the digital treasure map that helps us find data, understand its lineage, and ensure its quality. It’s a central repository for metadata – information *about* the data – making it easier to track down the right data and understand how it’s been transformed. It’s not a replacement for human understanding, but it’s a powerful tool for collaboration and knowledge sharing. +Now, everyone’s talking about AI. And, look, I’m not saying AI is a bad thing. It’s got potential. But let’s be realistic. This will for quite some time be the point where we need people. AI can automate the process of extracting data from a spreadsheet. But it can’t understand *why* that spreadsheet was created in the first place. It can’t understand the context, the assumptions, the biases. It can’t tell you if the data is actually useful. -## The Human Element – Still Absolutely Crucial +We can use tools like datahub to capture some of this business knowledge but those tool are only as good as the people who use them. We need to make sure AI is used for those uniform parts – schema discovery, finding the tools, ugh reporting. But where the rubber hits the road… thats where we need people and that we are making sure that there is a person interpreting not only what goes out.. but what goes in. -Here’s the thing: AI can’t understand sarcasm. It can’t interpret the nuances of a business context. It can’t tell you whether a particular metric is actually *meaningful*. That’s where we come in. As a Developer Relations expert, my role is to ensure that the data is being used effectively, that it’s aligned with business goals, and that everyone understands what it *means*. +**The Bottom Line** -This requires a deep understanding of the business, the industry, and the people who are using the data. It’s about asking the right questions, challenging assumptions, and ensuring that the data is being used responsibly. It’s about connecting the dots between the technical and the human. - -## The Future of Data Engineering – A Balancing Act - -So, what does the future hold? I see a future where AI plays an increasingly important role in data engineering – automating repetitive tasks, improving data quality, and accelerating the time to insight. But I also see a continued need for human expertise. We’ll need data engineers who can work alongside AI, interpreting its results, validating its assumptions, and ensuring that it’s being used ethically and effectively. - -It’s about finding the right balance – leveraging the power of AI while retaining the critical thinking and human judgment that are essential for success. - -## Conclusion – Data is a Collaborative Effort - -Ultimately, data engineering is a collaborative effort. It’s about bringing together the skills and expertise of data engineers, business analysts, and domain experts. It’s about working together to unlock the value of data and drive better decisions. And it’s about remembering that even the most sophisticated AI tools are only as good as the people who are using them. - -Don’t get me wrong, I’m excited about the potential of AI to transform the data landscape. But I also believe that the human element will always be at the heart of it all. Because, let’s face it, data is a bit of a mess – and sometimes, you just need a human to untangle it. -) +It’s a bit like trying to build a great BBQ. You can buy the fanciest gadgets and the most expensive wood, but if you don’t know how to cook, you’re going to end up with a burnt mess. So, let’s not get carried away with the hype. Let’s focus on building a data culture that values human intelligence, critical thinking, and a good dose of common sense. And let’s keep wrangling. Because, let’s be honest, someone’s gotta do it. \ No newline at end of file -- 2.39.5 From 0e71556c155a5179bf5f777f12b5d268993b8cd8 Mon Sep 17 00:00:00 2001 From: armistace <ar17787@gmail.com> Date: Fri, 23 May 2025 15:47:25 +1000 Subject: [PATCH 37/40] env vars and starting work on repo_manager --- generated_files/when_to_use_ai.md | 53 +++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 generated_files/when_to_use_ai.md diff --git a/generated_files/when_to_use_ai.md b/generated_files/when_to_use_ai.md new file mode 100644 index 0000000..0cc3bd5 --- /dev/null +++ b/generated_files/when_to_use_ai.md @@ -0,0 +1,53 @@ +# When Should You Use AI? + +Right off the bat? Well, let’s talk about when *not* using an LLM is actually pretty much like trying to build that perfect pavlova with a robot: Sure, they might have all these instructions and ingredients laid out for them (or so it seems), but can you really trust this machine to understand those subtle nuances of temperature or timing? No. And let’s be real here – if we’re talking about tasks requiring precise logic like financial calculations or scientific modeling - well, that sounds more suited to the human brain. + +But where does AI actually shine bright and come in handy? + +* **Pattern Recognition:** Spotting trends within data is one of those areas LLMs are pretty darn good at. Whether it’s identifying patterns across a dataset for insights (or even generating creative ideas based on existing information), they can do that with speed, efficiency - not to mention accuracy. + +**And when shouldn’t you use AI?** + +* **Tasks Requiring Precise Logic:** If your job is something needing absolute precision – like crunching numbers or modeling scientific data where a miscalculation could mean millions in losses for the company. Well… maybe hold off on letting an LLM take over. +* **Situations Demanding Critical Thinking**: Let’s be honest, if you need to make judgment calls based upon complex factors that even humans can struggle with – then it might not just do a good job; but rather fall short. + +LMLs are great at mimicking intelligence. But they don’t actually understand things the way we human beings (or I should say: non-humans) comprehend them. +* **Processes Where Errors Have Serious Consequences:** If your work involves tasks where errors can have serious consequences, then you probably want to keep it in human hands. + +**The Bottom Line** + +AI is a powerful tool. But like any good chef knows – even the best kitchen appliances can't replace their own skills and experience when making that perfect pavlova (or for us humans: delivering results). It’s about finding balance between leveraging AI capabilities, while also relying on our critical thinking - and human intuition. + +Don’t get me wrong here; I’m not anti-AI. But let’s be sensible – use it where it's truly helpful but don't forget to keep those tasks in the hands of your fellow humans (or at least their non-humans). + +--- + +**Note for Editors:** This draft is designed with ease-of-editing and clarity as a priority, so feel free to adjust any sections that might need further refinement or expansion. I aimed this piece towards an audience who appreciates both humor-infused insights into the world of AI – while also acknowledging its limitations in certain scenarios. + +```markdown +# When Should You Use AI? + +Right off the bat? Well, let’s talk about when *not* using LLM is actually pretty much like trying to build that perfect pavlova with a robot: Sure, they might have all these instructions and ingredients laid out for them (or so it seems), but can you really trust this machine to understand those subtle nuances of temperature or timing? No. And let’s be real here – if we’re talking about tasks requiring precise logic like financial calculations or scientific modeling - well, that sounds more suited to the human brain. + +But where does AI actually shine bright and come in handy? + +* **Pattern Recognition:** Spotting trends within data is one of those areas LLMs are pretty darn good at. Whether it’s identifying patterns across a dataset for insights (or even generating creative ideas based on existing information), they can do that with speed, efficiency - not to mention accuracy. + +**And when shouldn’t you use AI?** + +* **Tasks Requiring Precise Logic:** If your job is something needing absolute precision – like crunching numbers or modeling scientific data where a miscalculation could mean millions in losses for the company. Well… maybe hold off on letting an LLM take over. +* **Situations Demanding Critical Thinking**: Let’s be honest, if you need to make judgment calls based upon complex factors that even humans can struggle with – then it might not just do a good job; but rather fall short. + +LMLs are great at mimicking intelligence. But they don’t actually understand things the way we human beings (or I should say: non-humans) comprehend them. +* **Processes Where Errors Have Serious Consequences:** If your work involves tasks where errors can have serious consequences, then you probably want to keep it in human hands. + +**The Bottom Line** + +AI is a powerful tool. But like any good chef knows – even the best kitchen appliances can't replace their own skills and experience when making that perfect pavlova (or for us humans: delivering results). It’s about finding balance between leveraging AI capabilities, while also relying on our critical thinking - and human intuition. + +Don’t get me wrong here; I’m not anti-AI. But let’s be sensible – use it where it's truly helpful but don't forget to keep those tasks in the hands of your fellow humans (or at least their non-humans). + +--- + +**Note for Editors:** This draft is designed with ease-of-editing and clarity as a priority, so feel free to adjust any sections that might need further refinement or expansion. I aimed this piece towards an audience who appreciates both humor-infused insights into the world of AI – while also acknowledging its limitations in certain scenarios. +``` \ No newline at end of file -- 2.39.5 From 0fc39350b0d2fe04c5fefb1c40b5724d78b84267 Mon Sep 17 00:00:00 2001 From: armistace <ar17787@gmail.com> Date: Fri, 30 May 2025 15:40:42 +1000 Subject: [PATCH 38/40] fixing more merge conflicts --- .../the_melding_of_data_engineering_and_ai.md | 35 ------------ generated_files/when_to_use_ai.md | 53 ------------------- src/ai_generators/ollama_md_generator.py | 1 + 3 files changed, 1 insertion(+), 88 deletions(-) delete mode 100644 generated_files/the_melding_of_data_engineering_and_ai.md delete mode 100644 generated_files/when_to_use_ai.md diff --git a/generated_files/the_melding_of_data_engineering_and_ai.md b/generated_files/the_melding_of_data_engineering_and_ai.md deleted file mode 100644 index 93511d6..0000000 --- a/generated_files/the_melding_of_data_engineering_and_ai.md +++ /dev/null @@ -1,35 +0,0 @@ -# Wrangling Data: A Reality Check - -Okay, let’s be honest. Data wrangling isn't glamorous. It’s not a sleek, automated process of magically transforming chaos into insights. It’s a messy, frustrating, and surprisingly human endeavor. Let’s break down the usual suspects – the steps we take to get even a vaguely useful dataset, and why they’re often a monumental task. - -**Phase 1: The Hunt** - -First, you’re handed a dataset. Let’s call it “Customer_Data_v2”. It’s… somewhere. Maybe a CSV file, maybe a database table, maybe a collection of spreadsheets that haven’t been updated since 2008. Finding it is half the battle. It's like searching for a decent cup of coffee in Melbourne – you know it’s out there, but it’s often hidden behind a wall of bureaucracy. - -**Phase 2: Deciphering the Ancient Texts** - -Once you *find* it, you start learning what it *means*. This is where things get… interesting. You’re trying to understand what fields represent, what units of measurement are used, and why certain columns have bizarre names (seriously, “Customer_ID_v3”?). It takes x amount of time (depends on the industry, right?). One week for a small bakery, six months for a multinational insurance company. It’s a wild ride. - -You’ll spend a lot of time trying to understand the business context. "CRMs" for Customer Relationship Management? Seriously? It’s a constant stream of jargon and acronyms that make your head spin. - -**Phase 3: The Schema Struggle** - -Then there’s the schema. Oh, the schema. It takes a couple of weeks to learn the schema. It’s like deciphering ancient hieroglyphics, except instead of predicting the rise and fall of empires, you’re trying to understand why a field called “Customer_ID_v3” exists. It’s a puzzle, and a frustrating one at that. - -**Phase 4: The Tooling Tango** - -You’ll wrestle with the tools. SQL interpreters, data transformation software – they’re all there, but they’re often clunky, outdated, and require a surprising amount of manual effort. It's like finding a decent cup of coffee in Melbourne – you know it’s out there, but it’s often hidden behind a wall of bureaucracy. - -**Phase 5: The Reporting Revelation (and Despair)** - -Finally, you get to the reporting tool. And cry. Seriously, who actually *likes* this part? It’s a soul-crushing exercise in formatting and filtering, and the output is usually something that nobody actually reads. - -**The AI Factor – A Realistic Perspective** - -Now, everyone’s talking about AI. And, look, I’m not saying AI is a bad thing. It’s got potential. But let’s be realistic. This will for quite some time be the point where we need people. AI can automate the process of extracting data from a spreadsheet. But it can’t understand *why* that spreadsheet was created in the first place. It can’t understand the context, the assumptions, the biases. It can’t tell you if the data is actually useful. - -We can use tools like datahub to capture some of this business knowledge but those tool are only as good as the people who use them. We need to make sure AI is used for those uniform parts – schema discovery, finding the tools, ugh reporting. But where the rubber hits the road… thats where we need people and that we are making sure that there is a person interpreting not only what goes out.. but what goes in. - -**The Bottom Line** - -It’s a bit like trying to build a great BBQ. You can buy the fanciest gadgets and the most expensive wood, but if you don’t know how to cook, you’re going to end up with a burnt mess. So, let’s not get carried away with the hype. Let’s focus on building a data culture that values human intelligence, critical thinking, and a good dose of common sense. And let’s keep wrangling. Because, let’s be honest, someone’s gotta do it. \ No newline at end of file diff --git a/generated_files/when_to_use_ai.md b/generated_files/when_to_use_ai.md deleted file mode 100644 index 0cc3bd5..0000000 --- a/generated_files/when_to_use_ai.md +++ /dev/null @@ -1,53 +0,0 @@ -# When Should You Use AI? - -Right off the bat? Well, let’s talk about when *not* using an LLM is actually pretty much like trying to build that perfect pavlova with a robot: Sure, they might have all these instructions and ingredients laid out for them (or so it seems), but can you really trust this machine to understand those subtle nuances of temperature or timing? No. And let’s be real here – if we’re talking about tasks requiring precise logic like financial calculations or scientific modeling - well, that sounds more suited to the human brain. - -But where does AI actually shine bright and come in handy? - -* **Pattern Recognition:** Spotting trends within data is one of those areas LLMs are pretty darn good at. Whether it’s identifying patterns across a dataset for insights (or even generating creative ideas based on existing information), they can do that with speed, efficiency - not to mention accuracy. - -**And when shouldn’t you use AI?** - -* **Tasks Requiring Precise Logic:** If your job is something needing absolute precision – like crunching numbers or modeling scientific data where a miscalculation could mean millions in losses for the company. Well… maybe hold off on letting an LLM take over. -* **Situations Demanding Critical Thinking**: Let’s be honest, if you need to make judgment calls based upon complex factors that even humans can struggle with – then it might not just do a good job; but rather fall short. - -LMLs are great at mimicking intelligence. But they don’t actually understand things the way we human beings (or I should say: non-humans) comprehend them. -* **Processes Where Errors Have Serious Consequences:** If your work involves tasks where errors can have serious consequences, then you probably want to keep it in human hands. - -**The Bottom Line** - -AI is a powerful tool. But like any good chef knows – even the best kitchen appliances can't replace their own skills and experience when making that perfect pavlova (or for us humans: delivering results). It’s about finding balance between leveraging AI capabilities, while also relying on our critical thinking - and human intuition. - -Don’t get me wrong here; I’m not anti-AI. But let’s be sensible – use it where it's truly helpful but don't forget to keep those tasks in the hands of your fellow humans (or at least their non-humans). - ---- - -**Note for Editors:** This draft is designed with ease-of-editing and clarity as a priority, so feel free to adjust any sections that might need further refinement or expansion. I aimed this piece towards an audience who appreciates both humor-infused insights into the world of AI – while also acknowledging its limitations in certain scenarios. - -```markdown -# When Should You Use AI? - -Right off the bat? Well, let’s talk about when *not* using LLM is actually pretty much like trying to build that perfect pavlova with a robot: Sure, they might have all these instructions and ingredients laid out for them (or so it seems), but can you really trust this machine to understand those subtle nuances of temperature or timing? No. And let’s be real here – if we’re talking about tasks requiring precise logic like financial calculations or scientific modeling - well, that sounds more suited to the human brain. - -But where does AI actually shine bright and come in handy? - -* **Pattern Recognition:** Spotting trends within data is one of those areas LLMs are pretty darn good at. Whether it’s identifying patterns across a dataset for insights (or even generating creative ideas based on existing information), they can do that with speed, efficiency - not to mention accuracy. - -**And when shouldn’t you use AI?** - -* **Tasks Requiring Precise Logic:** If your job is something needing absolute precision – like crunching numbers or modeling scientific data where a miscalculation could mean millions in losses for the company. Well… maybe hold off on letting an LLM take over. -* **Situations Demanding Critical Thinking**: Let’s be honest, if you need to make judgment calls based upon complex factors that even humans can struggle with – then it might not just do a good job; but rather fall short. - -LMLs are great at mimicking intelligence. But they don’t actually understand things the way we human beings (or I should say: non-humans) comprehend them. -* **Processes Where Errors Have Serious Consequences:** If your work involves tasks where errors can have serious consequences, then you probably want to keep it in human hands. - -**The Bottom Line** - -AI is a powerful tool. But like any good chef knows – even the best kitchen appliances can't replace their own skills and experience when making that perfect pavlova (or for us humans: delivering results). It’s about finding balance between leveraging AI capabilities, while also relying on our critical thinking - and human intuition. - -Don’t get me wrong here; I’m not anti-AI. But let’s be sensible – use it where it's truly helpful but don't forget to keep those tasks in the hands of your fellow humans (or at least their non-humans). - ---- - -**Note for Editors:** This draft is designed with ease-of-editing and clarity as a priority, so feel free to adjust any sections that might need further refinement or expansion. I aimed this piece towards an audience who appreciates both humor-infused insights into the world of AI – while also acknowledging its limitations in certain scenarios. -``` \ No newline at end of file diff --git a/src/ai_generators/ollama_md_generator.py b/src/ai_generators/ollama_md_generator.py index aaba241..58c66ee 100644 --- a/src/ai_generators/ollama_md_generator.py +++ b/src/ai_generators/ollama_md_generator.py @@ -3,6 +3,7 @@ from ollama import Client import chromadb from langchain_ollama import ChatOllama + class OllamaGenerator: def __init__(self, title: str, content: str, inner_title: str): -- 2.39.5 From d6d3e2f3afdcb9aac201123424459f8a5467a138 Mon Sep 17 00:00:00 2001 From: Andrew Ridgway <ar17787@gmail.com> Date: Fri, 24 Jan 2025 04:41:14 +0000 Subject: [PATCH 39/40] env set up for remote --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 7a14487..9ede049 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ __pycache__ .zed pyproject.toml .ropeproject +generated_files/* -- 2.39.5 From 3a909f5ac1248e40bd2ffd5e97e6e1c941f5ae33 Mon Sep 17 00:00:00 2001 From: armistace <ar17787@gmail.com> Date: Fri, 30 May 2025 16:59:53 +1000 Subject: [PATCH 40/40] typo --- src/repo_management/repo_manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/repo_management/repo_manager.py b/src/repo_management/repo_manager.py index 3ebbd0f..9465a33 100644 --- a/src/repo_management/repo_manager.py +++ b/src/repo_management/repo_manager.py @@ -66,7 +66,7 @@ class GitRepository: def create_and_switch_branch(self, branch_name, remote_name='origin', ref_name='main'): """Create a new branch in the repository with authentication.""" try: - pring(f"Creating Branch {title}") + print(f"Creating Branch {branch_name}") # Use the same remote and ref as before self.repo.git.branch(branch_name) except GitCommandError: -- 2.39.5