From 77c335a4e94a9400d0f9b2d26e127fdb14cb3a67 Mon Sep 17 00:00:00 2001 From: Andrew Ridgway Date: Thu, 21 May 2026 21:06:42 +1000 Subject: [PATCH 1/2] updated readme --- README.md | 110 ++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 83 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index b199dd3..2ba251c 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ Automated pull request review system using [CrewAI](https://crewai.com) Flows and MCP (Model Context Protocol) tools. -Performs three parallel reviews — code quality, security, and infrastructure — then synthesizes a consolidated report via a REST API. +Performs three parallel reviews — code quality, security, and infrastructure — then synthesizes a consolidated report via a REST API. Supports both a direct API and a Gitea webhook integration that fetches diffs automatically and posts the review as a PR comment. ## Features @@ -10,32 +10,10 @@ Performs three parallel reviews — code quality, security, and infrastructure - **Security Review** — vulnerabilities, injection risks, auth issues (powered by Trivy) - **Infrastructure Review** — Dockerfiles, Kubernetes manifests, IaC (powered by Hadolint + Checkov) - **Summarisation** — merges all three reviews into a single actionable report -- **REST API** — FastAPI endpoints for health check and review trigger +- **REST API** — FastAPI endpoints for health check, manual review trigger, and Gitea webhook +- **Gitea Webhook** — process PR events directly; fetches diffs, runs reviews, posts results as a PR comment - **Dockerized** — multi-stage build with all tools bundled -## Architecture - -``` -POST /api/v1/review - │ - ▼ - CodeReviewFlow (CrewAI Flow) - │ - ┌────┼──────────────┐ - ▼ ▼ ▼ - Code Security Infra -Review Review Review - │ │ │ - └─────┼────────────┘ - ▼ - Summariser - │ - ▼ - JSON Response -``` - -LLM-agnostic via CrewAI's LLM abstraction — works with OpenAI, Anthropic, or Ollama. - ## Quick Start ### Prerequisites @@ -85,6 +63,34 @@ curl -X POST http://localhost:8000/api/v1/review \ }' ``` +## Architecture + +``` +POST /api/v1/review POST /api/v1/gitea-webhook + │ │ + │ Gitea webhook payload + │ │ + │ fetch diffs from + │ Gitea API + │ │ + ▼ ▼ + CodeReviewFlow (CrewAI Flow) + │ + ┌────┼──────────────┐ + ▼ ▼ ▼ + Code Security Infra +Review Review Review + │ │ │ + └─────┼────────────┘ + ▼ + Summariser + │ + ▼ + JSON Response / PR Comment +``` + +LLM-agnostic via CrewAI's LLM abstraction — works with OpenAI, Anthropic, or Ollama. + ## API ### `GET /api/v1/health` @@ -97,7 +103,7 @@ Returns service status. ### `POST /api/v1/review` -Triggers a full PR review. +Triggers a full PR review. Provide file contents and diffs directly in the request body. **Request body:** @@ -144,6 +150,42 @@ Triggers a full PR review. } ``` +### `POST /api/v1/gitea-webhook` + +Receives Gitea webhook events. Only processes `pull_request` events with actions `opened`, `synchronize`, or `reopened`. All other events and actions are ignored. + +The endpoint: + +1. Validates the `X-Gitea-Signature` header using HMAC-SHA256 (if `ACCESS_GITEA_SECRET` is configured) +2. Fetches changed files and their contents from the Gitea API +3. Runs the full review pipeline (code, security, infrastructure, summariser) +4. Posts the review summary as a comment on the PR via the Gitea API + +## Gitea Webhook Setup + +### 1. Create an access token + +In your Gitea instance, go to **Settings → Applications → Generate New Token** and create a token with `read:repository` scope. + +### 2. Add the webhook + +In your Gitea repository, go to **Settings → Webhooks → Add Webhook → Gitea**: + +- **Target URL**: `http://:30001/api/v1/gitea-webhook` +- **HTTP Method**: `POST` +- **Secret**: a random string (optional but recommended) +- **Trigger On**: Pull Request + +### 3. Configure environment variables + +Set the following in the container (or k8s secret): + +| Variable | Required | Default | Description | +|----------|----------|---------|-------------| +| `ACCESS_GITEA_URL` | yes | `http://192.168.178.160:3000` | Gitea instance base URL | +| `ACCESS_GITEA_TOKEN` | yes | — | Gitea personal access token with `read:repository` scope | +| `ACCESS_GITEA_SECRET` | no | `""` | Webhook secret; if set, signatures are validated | + ## Configuration All configuration via environment variables in `.env`: @@ -154,10 +196,23 @@ All configuration via environment variables in `.env`: | `LLM_PROVIDER` | (required) | `openai`, `anthropic`, or `ollama` | | `LLM_BASE_URL` | — | API base URL | | `LLM_API_KEY` | — | API key (not needed for Ollama) | +| `ACCESS_GITEA_URL` | `http://192.168.178.160:3000` | Gitea instance base URL | +| `ACCESS_GITEA_TOKEN` | — | Gitea personal access token with `read:repository` scope | +| `ACCESS_GITEA_SECRET` | — | Webhook secret for HMAC-SHA256 signature verification | | `TOTAL_FLOW_TIMEOUT` | `600` | Max seconds for full review | | `PER_CREW_TIMEOUT` | `300` | Max seconds per crew | | `LOG_LEVEL` | `INFO` | Logging level | +## Deployment + +### Kubernetes + +The repo includes a CI pipeline (`.gitea/workflows/build_push.yml`) that builds a multi-arch Docker image, pushes it to the registry, and deploys to Kubernetes. + +The k8s deployment uses a NodePort service exposing port 30001, which maps to the container's port 8000. + +Environment variables are stored in a k8s secret (`pr-reviewer-env`). The CI pipeline creates this secret automatically — add `ACCESS_GITEA_URL`, `ACCESS_GITEA_TOKEN`, and `ACCESS_GITEA_SECRET` as Gitea repo variables/secrets. + ## Development ```bash @@ -179,12 +234,13 @@ uvicorn src.pr_reviewer.main:app --reload ├── crews/ # Crew definitions (code, security, infra, summariser) ├── mcp_servers/ # MCP tool wrappers (Hadolint, Checkov) ├── src/pr_reviewer/ # Core application code -│ ├── main.py # FastAPI app +│ ├── main.py # FastAPI app, endpoints, webhook handler │ ├── flow.py # CrewAI Flow orchestration │ ├── state.py # Pydantic state models │ ├── llm.py # LLM factory │ └── context.py # Context resolution ├── tests/ # Unit and integration tests +├── kube/ # Kubernetes manifests ├── docker-compose.yaml ├── Dockerfile └── pyproject.toml From bc7ddd29d4cfa0b8514273186805ac314fb8ee5f Mon Sep 17 00:00:00 2001 From: Andrew Ridgway Date: Thu, 21 May 2026 21:16:41 +1000 Subject: [PATCH 2/2] update to proper gitea webhook syntax and provide immediate onit --- src/pr_reviewer/main.py | 124 ++++++++++++++++++++++------------------ 1 file changed, 68 insertions(+), 56 deletions(-) diff --git a/src/pr_reviewer/main.py b/src/pr_reviewer/main.py index 53203ad..6ce05ec 100644 --- a/src/pr_reviewer/main.py +++ b/src/pr_reviewer/main.py @@ -3,7 +3,7 @@ import os import hmac import hashlib import base64 -from fastapi import FastAPI, HTTPException, Request +from fastapi import FastAPI, HTTPException, Request, BackgroundTasks from fastapi.responses import JSONResponse import uvicorn from typing import Dict, Any, List, Optional @@ -89,8 +89,58 @@ def post_pr_comment(repo_full: str, pr_number: int, comment: str) -> None: logger.info(f"Posted review comment to PR #{pr_number} in {repo_full}") +def _run_review_background(repo_full: str, pr_number: int, pr_title: str, + pr_description: str, repo_url: str, branch: str, + base_branch: str, files: List[Dict[str, Any]]) -> None: + converted_files = [] + for f in files: + converted_files.append(FileInfo( + path=f["filename"], + content=f.get("content"), + status=f.get("status", "modified"), + additions=f.get("additions", 0), + deletions=f.get("deletions", 0), + patch=f.get("patch"), + )) + + flow_inputs = { + "pr_id": str(pr_number), + "pr_title": pr_title, + "pr_description": pr_description, + "pr_url": f"{repo_url}/pull/{pr_number}", + "repo_name": repo_full, + "repo_url": repo_url, + "branch": branch, + "base_branch": base_branch, + "files": [f.dict() for f in converted_files], + "context_overrides": None, + } + + flow = CodeReviewFlow() + try: + flow_result = flow.kickoff(inputs=flow_inputs) + except Exception as e: + logger.error(f"Background review failed for PR #{pr_number}: {e}") + try: + post_pr_comment(repo_full, pr_number, f"**PR Review failed:** {e}") + except Exception: + pass + return + + if flow_result.get("error"): + logger.error(f"PR review failed for PR #{pr_number}: {flow_result['error']}") + + try: + summary = flow_result.get("review_summary", "") + if summary: + comment = f"## PR Review Results\n\n{summary}" + post_pr_comment(repo_full, pr_number, comment) + except Exception as e: + logger.warning(f"Failed to post review comment: {e}") + + @app.post("/api/v1/gitea-webhook") -async def gitea_webhook(request: Request) -> Dict[str, Any]: +async def gitea_webhook(request: Request, background_tasks: BackgroundTasks) -> Dict[str, Any]: body = await request.body() sig = request.headers.get("X-Gitea-Signature", "") if not verify_signature(body, sig): @@ -107,7 +157,7 @@ async def gitea_webhook(request: Request) -> Dict[str, Any]: repo_full = repo["full_name"] repo_url = repo.get("html_url", f"{ACCESS_GITEA_URL}/{repo_full}") - if action not in ("opened", "synchronize", "reopened"): + if action not in ("opened", "synchronized", "reopened"): logger.info(f"Ignoring PR action: {action}") return {"status": "ignored", "reason": f"action '{action}' not processed"} @@ -119,62 +169,24 @@ async def gitea_webhook(request: Request) -> Dict[str, Any]: except Exception as e: raise HTTPException(status_code=500, detail=f"Error fetching PR files: {e}") - converted_files = [] - for f in files: - converted_files.append(FileInfo( - path=f["filename"], - content=f.get("content"), - status=f.get("status", "modified"), - additions=f.get("additions", 0), - deletions=f.get("deletions", 0), - patch=f.get("patch"), - )) - - flow_inputs = { - "pr_id": str(pr_number), - "pr_title": pr["title"], - "pr_description": pr.get("body", ""), - "pr_url": f"{repo_url}/pull/{pr_number}", - "repo_name": repo_full, - "repo_url": repo_url, - "branch": pr["head"]["label"], - "base_branch": pr["base"]["label"], - "files": [f.dict() for f in converted_files], - "context_overrides": None, - } - - flow = CodeReviewFlow() - loop = asyncio.get_event_loop() - with ThreadPoolExecutor() as pool: - try: - flow_result = await asyncio.wait_for( - loop.run_in_executor(pool, lambda: flow.kickoff(inputs=flow_inputs)), - timeout=TOTAL_FLOW_TIMEOUT, - ) - except asyncio.TimeoutError: - logger.error(f"PR review timed out for PR #{pr_number}") - raise HTTPException( - status_code=504, - detail=f"PR review timed out after {TOTAL_FLOW_TIMEOUT} seconds", - ) - - if flow_result.get("error"): - logger.error(f"PR review failed for PR #{pr_number}: {flow_result['error']}") - try: - summary = flow_result.get("review_summary", "") - if summary: - comment = f"## PR Review Results\n\n{summary}" - post_pr_comment(repo_full, pr_number, comment) + post_pr_comment(repo_full, pr_number, "PR received — starting review, sit tight :saluting_face:") except Exception as e: - logger.warning(f"Failed to post review comment: {e}") + logger.warning(f"Failed to post initial comment: {e}") - return { - "status": "completed" if not flow_result.get("error") else "failed", - "pr_number": pr_number, - "review_summary": flow_result.get("review_summary"), - "error": flow_result.get("error"), - } + background_tasks.add_task( + _run_review_background, + repo_full=repo_full, + pr_number=pr_number, + pr_title=pr["title"], + pr_description=pr.get("body", ""), + repo_url=repo_url, + branch=pr["head"]["label"], + base_branch=pr["base"]["label"], + files=files, + ) + + return {"status": "accepted", "pr_number": pr_number} return {"status": "ignored"}