first build attempt
Some checks failed
Build and Push Image / Build and push image (push) Failing after 1m58s

This commit is contained in:
Andrew Ridgway 2026-05-19 23:24:27 +10:00
parent b436a81300
commit bf6fe21ea6
Signed by: armistace
GPG Key ID: C8D9EAC514B47EF1
27 changed files with 449 additions and 634 deletions

View File

@ -1,36 +1,25 @@
# LLM Configuration
# Choose one of the following LLM providers:
# For OpenAI:
# Provider options: openai, anthropic, ollama
# Required
LLM_MODEL=gpt-4
LLM_BASE_URL=https://api.openai.com/v1
LLM_API_KEY=your_openai_api_key_here
LLM_PROVIDER=openai
# For Anthropic:
# LLM_MODEL=claude-3-opus-20240229
# LLM_BASE_URL=https://api.anthropic.com
# LLM_API_KEY=your_anthropic_api_key_here
# LLM_PROVIDER=anthropic
# Required for OpenAI/Anthropic
LLM_BASE_URL=https://api.openai.com/v1
LLM_API_KEY=your_api_key_here
# For Ollama (local):
# For Ollama (local or network):
# LLM_MODEL=llama2
# LLM_BASE_URL=http://localhost:11434
# LLM_API_KEY=ollama # Ollama doesn't require a real API key
# LLM_PROVIDER=ollama
# MCP Server Configuration
# Hadolint MCP Server (installed via pip in Docker)
# Checkov MCP Server (installed via pip in Docker)
# Semgrep MCP Server (native, no configuration needed)
# Trivy MCP Server (native, no configuration needed)
# Optional: Semgrep App URL and Token for SEMgrep App functionality
# Optional: Semgrep App URL and Token
SEMGRAPH_APP_URL=
SEMGRAPH_API_TOKEN=
# Timeout Configuration (in seconds)
# Timeout Configuration (seconds)
TOTAL_FLOW_TIMEOUT=600
PER_CREW_TIMEOUT=300
# Other Configuration
LOG_LEVEL=INFO

View File

@ -66,7 +66,15 @@ jobs:
chmod 644 /etc/apt/sources.list.d/kubernetes.list
apt-get update
apt-get install kubectl
kubectl delete namespace pr-reviewer
kubectl delete namespace pr-reviewer --ignore-not-found
kubectl create namespace pr-reviewer
kubectl create secret docker-registry regcred --docker-server=${{ vars.DOCKER_SERVER }} --docker-username=${{ vars.DOCKER_USERNAME }} --docker-password='${{ secrets.DOCKER_PASSWORD }}' --docker-email=${{ vars.DOCKER_EMAIL }} --namespace=pr-reviewer
kubectl apply -f kube/pr-reviewer_pod.yaml && kubectl apply -f kube/pr-reviewer_deployment.yaml && kubectl apply -f kube/pr-reviewer_service.yaml
kubectl create secret generic pr-reviewer-env \
--from-literal=LLM_PROVIDER=ollama \
--from-literal=LLM_MODEL=${{ vars.OLLAMA_MODEL }} \
--from-literal=LLM_BASE_URL=http://${{ vars.OLLAMA_SERVER }} \
--from-literal=LOG_LEVEL=INFO \
--from-literal=TOTAL_FLOW_TIMEOUT=600 \
--from-literal=PER_CREW_TIMEOUT=300 \
--namespace=pr-reviewer
kubectl apply -f kube/pr-reviewer_deployment.yaml && kubectl apply -f kube/pr-reviewer_service.yaml

5
.gitignore vendored
View File

@ -1 +1,6 @@
__pycache__/
.pytest_cache/
.benchmarks/
.spec/
.env
.venv/

View File

@ -1,5 +1,5 @@
# Stage 1: Base with system dependencies and tool installations
FROM python:3.12-slim as builder
# Stage 1: Builder
FROM python:3.12-slim AS builder
# Install system dependencies
RUN apt-get update && apt-get install -y \
@ -7,58 +7,52 @@ RUN apt-get update && apt-get install -y \
curl \
&& rm -rf /var/lib/apt/lists/*
# Install Hadolint (for Dockerfile linting)
RUN curl -Lo /bin/hadolint https://github.com/hadolint/hadolint/releases/download/v2.12.0/hadolint-Linux-x86_64 && \
chmod +x /bin/hadolint
# Install Checkov (for Kubernetes security scanning)
RUN pip install checkov
# Install Trivy (for container and IaC scanning) - Native MCP server
# Install Tools
RUN curl -Lo /bin/hadolint https://github.com/hadolint/hadolint/releases/download/v2.12.0/hadolint-Linux-x86_64 && chmod +x /bin/hadolint
RUN pip install checkov semgrep
RUN curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sh -s -- -b /usr/local/bin
# Install Semgrep (for code scanning) - Will use native MCP server
RUN pip install semgrep
# Install UV package manager
# Install UV
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
# Stage 2: App with source code and UV sync
WORKDIR /app
COPY pyproject.toml .
# Create virtual environment and install dependencies
RUN uv venv /opt/venv
RUN uv pip install --python /opt/venv/bin/python .
# Stage 2: Final
FROM python:3.12-slim
# Create non-root user
RUN useradd --create-home --shell /bin/bash app
WORKDIR /app
USER app
# Install runtime dependencies
# Install system dependencies needed at runtime
RUN apt-get update && apt-get install -y \
git \
&& rm -rf /var/lib/apt/lists/*
# Copy UV from builder stage
COPY --from=builder /bin/uv /bin/uv
COPY --from=builder /bin/uvx /bin/uvx
# Create non-root user
RUN useradd --create-home --shell /bin/bash app
WORKDIR /app
# Copy virtual environment and tools from builder
COPY --from=builder /opt/venv /opt/venv
COPY --from=builder /bin/hadolint /bin/hadolint
# Copy other tools if needed (Trivy, etc.)
COPY --from=builder /usr/local/bin/trivy /usr/local/bin/trivy
# Copy application code
COPY --chown=app:app pyproject.toml .
COPY --chown=app:app README.md .
COPY --chown=app:app src/ ./src/
COPY --chown=app:app mcp_servers/ ./mcp_servers/
COPY --chown=app:app crews/ ./crews/
COPY --chown=app:app tools/ ./tools/
COPY --chown=app:app config/ ./config/
COPY --chown=app:app contexts/ ./contexts/
COPY src/ ./src/
COPY mcp_servers/ ./mcp_servers/
COPY crews/ ./crews/
COPY tools/ ./tools/
COPY config/ ./config/
COPY contexts/ ./contexts/
COPY README.md .
# Install Python dependencies using UV
RUN uv sync --frozen --no-dev
# Set the environment variables to use the venv
ENV PATH="/opt/venv/bin:$PATH"
ENV PYTHONPATH="/app/src"
USER app
# Set environment variables
ENV PYTHONPATH=/app/src
ENV PATH="/app/.venv/bin:$PATH"
# Expose port
EXPOSE 8000
# Set entrypoint
ENTRYPOINT ["uvicorn", "src.pr_reviewer.main:app", "--host", "0.0.0.0", "--port", "8000"]

270
README.md
View File

@ -1,185 +1,191 @@
# PR Reviewer
An automated pull request review system using CrewAI and MCP (Model Context Protocol).
Automated pull request review system using [CrewAI](https://crewai.com) Flows and MCP (Model Context Protocol) tools.
## Overview
This system provides automated code, security, and infrastructure reviews for pull requests using a multi-agent approach. It leverages CrewAI for orchestrating specialized review agents and MCP (Model Context Protocol) for integrating with various static analysis tools.
Performs three parallel reviews — code quality, security, and infrastructure — then synthesizes a consolidated report via a REST API.
## Features
- **Code Review**: Uses Semgrep (via MCP) to check code quality, best practices, and maintainability
- **Security Review**: Uses Trivy (native MCP) to identify security vulnerabilities
- **Infrastructure Review**: Uses Hadolint and Checkov (via MCP wrappers) to review Dockerfiles and Kubernetes manifests
- **Contextual Review**: Incorporates customizable guidelines for code, security, and infrastructure reviews
- **Automated Orchestration**: Uses CrewAI Flows to manage the review process
- **REST API**: FastAPI endpoint for triggering reviews
- **Containerized**: Docker support for easy deployment
- **Code Review** — style, best practices, maintainability (powered by Semgrep)
- **Security Review** — vulnerabilities, injection risks, auth issues (powered by Trivy)
- **Infrastructure Review** — Dockerfiles, Kubernetes manifests, IaC (powered by Hadolint + Checkov)
- **Summarisation** — merges all three reviews into a single actionable report
- **REST API** — FastAPI endpoints for health check and review trigger
- **Dockerized** — multi-stage build with all tools bundled
## Architecture
The system follows a modular architecture with:
- State management using Pydantic models
- LLM factory for flexible provider support (OpenAI, Anthropic, Ollama)
- Context resolution system for incorporating review guidelines
- Crew-based implementation for each review type (code, security, infrastructure)
- MCP server integrations for static analysis tools
- Flow-based orchestration for managing the review process
- RESTful API for integration with CI/CD systems
```
POST /api/v1/review
CodeReviewFlow (CrewAI Flow)
┌────┼──────────────┐
▼ ▼ ▼
Code Security Infra
Review Review Review
│ │ │
└─────┼────────────┘
Summariser
JSON Response
```
## Installation
LLM-agnostic via CrewAI's LLM abstraction — works with OpenAI, Anthropic, or Ollama.
## Quick Start
### Prerequisites
- Python 3.10-3.13
- UV package manager
- Git
- Docker (optional, for containerized deployment)
### Local Development
1. Clone the repository
2. Install UV package manager: `curl -LsSf https://astral.sh/uv/install.sh | sh`
3. Activate UV environment: `source $HOME/.local/bin/env`
4. Create virtual environment: `uv venv .venv`
5. Activate virtual environment: `source .venv/bin/activate`
6. Install dependencies: `uv pip install -e .`
7. Configure environment variables (see `.env.example`)
- Docker
- An LLM provider (OpenAI API key, Anthropic key, or a running Ollama instance)
### Docker Deployment
1. Build the Docker image: `docker build -t pr-reviewer .`
2. Run the container: `docker run -p 8000:8000 --env-file .env pr-reviewer`
### Setup
## Usage
### API Endpoints
#### Health Check
```bash
GET /api/v1/health
cp .env.example .env
# Edit .env with your LLM provider details
```
Returns the health status of the service.
#### Trigger PR Review
### Run
```bash
POST /api/v1/review
docker compose up
```
Initiates a pull request review.
Request Body:
```json
{
Server starts at `http://localhost:8000`.
### Test
```bash
# Health check
curl http://localhost:8000/api/v1/health
# Trigger a review
curl -X POST http://localhost:8000/api/v1/review \
-H "Content-Type: application/json" \
-d '{
"pr_id": "123",
"title": "Add new feature",
"description": "This PR adds a new feature to the application",
"repo": {
"name": "my-repo",
"url": "https://github.com/user/my-repo"
},
"source": {
"branch": "feature/new-feature",
"commit": "abc123"
},
"target": {
"branch": "main",
"commit": "def456"
},
"title": "Add user authentication",
"repo": {"name": "myapp/backend", "url": "https://github.com/myapp/backend"},
"source": {"branch": "feature/auth"},
"target": {"branch": "main"},
"files": [
{
"path": "src/main.py",
"content": "print('Hello World')",
"status": "modified",
"additions": 1,
"path": "auth.py",
"status": "added",
"content": "def login(user, pwd):\n if user == \"admin\" and pwd == \"admin\":\n return True",
"additions": 3,
"deletions": 0
}
],
"context": {
"code_review": "Follow PEP8 guidelines",
"security_review": "Check for SQL injection vulnerabilities",
"infra_review": "Ensure Dockerfile follows best practices"
}
}
]
}'
```
Response:
## API
### `GET /api/v1/health`
Returns service status.
```json
{"status": "healthy", "service": "pr-reviewer"}
```
### `POST /api/v1/review`
Triggers a full PR review.
**Request body:**
| Field | Type | Required | Description |
|-------|------|----------|-------------|
| `pr_id` | string | yes | PR identifier |
| `title` | string | yes | PR title |
| `description` | string | no | PR description |
| `repo.name` | string | yes | Repository name |
| `repo.url` | string | yes | Repository URL |
| `source.branch` | string | yes | Source branch |
| `source.commit` | string | no | Source commit SHA |
| `target.branch` | string | yes | Target branch |
| `target.commit` | string | no | Target commit SHA |
| `files[]` | array | no | Changed files |
| `files[].path` | string | yes | File path |
| `files[].content` | string | no | File contents |
| `files[].status` | string | yes | `added`, `modified`, `removed` |
| `files[].additions` | int | no | Lines added |
| `files[].deletions` | int | no | Lines removed |
| `files[].patch` | string | no | Unified diff |
| `context.code_review` | string | no | Code review guidelines override |
| `context.security_review` | string | no | Security review guidelines override |
| `context.infra_review` | string | no | Infrastructure review guidelines override |
**Response:**
```json
{
"review_id": "uuid-string",
"review_id": "uuid",
"status": "completed",
"timestamp": "2023-05-08T10:00:00Z",
"timestamp": "2024-01-01T00:00:00Z",
"results": {
"code_review": "Code review results...",
"security_review": "Security review results...",
"infra_review": "Infrastructure review results...",
"summary": "Synthesized review summary..."
"code_review": "...",
"security_review": "...",
"infra_review": "...",
"summary": "..."
},
"metadata": {
"processing_time_seconds": 45.2,
"processing_time_seconds": 290.22,
"pr_id": "123",
"repo": {
"name": "my-repo",
"url": "https://github.com/user/my-repo"
}
"repo": {"name": "myapp/backend", "url": "https://github.com/myapp/backend"}
}
}
```
## Configuration
### Environment Variables
See `.env.example` for detailed configuration options.
All configuration via environment variables in `.env`:
### Context Files
Default review guidelines are located in `contexts/defaults/`:
- `code_review.md`: Coding practice guidelines
- `security_review.md`: Security guidelines
- `infra_review.md`: Infrastructure guidelines
These can be overridden via the API context parameter.
| Variable | Default | Description |
|----------|---------|-------------|
| `LLM_MODEL` | (required) | Model name (e.g. `gpt-4`, `gemma4:31b-cloud`) |
| `LLM_PROVIDER` | (required) | `openai`, `anthropic`, or `ollama` |
| `LLM_BASE_URL` | — | API base URL |
| `LLM_API_KEY` | — | API key (not needed for Ollama) |
| `TOTAL_FLOW_TIMEOUT` | `600` | Max seconds for full review |
| `PER_CREW_TIMEOUT` | `300` | Max seconds per crew |
| `LOG_LEVEL` | `INFO` | Logging level |
## Development
### Running Tests
```bash
# Run unit tests
pytest
# Install deps
uv pip install -e ".[dev]"
# Run tests with coverage
pytest --cov=src.pr_reviewer
# Run tests
pytest tests/
# Run specific test categories
pytest tests/unit/
pytest tests/integration/
# Run server locally
uvicorn src.pr_reviewer.main:app --reload
```
### Code Style
The project uses Black for code formatting and Flake8 for linting.
## Project Structure
Run formatting:
```bash
black src/
```
Run linting:
```bash
flake8 src/
├── config/ # Shared agent/task YAML configs
├── contexts/ # Default review guidelines (markdown)
├── crews/ # Crew definitions (code, security, infra, summariser)
├── mcp_servers/ # MCP tool wrappers (Hadolint, Checkov)
├── src/pr_reviewer/ # Core application code
│ ├── main.py # FastAPI app
│ ├── flow.py # CrewAI Flow orchestration
│ ├── state.py # Pydantic state models
│ ├── llm.py # LLM factory
│ └── context.py # Context resolution
├── tests/ # Unit and integration tests
├── docker-compose.yaml
├── Dockerfile
└── pyproject.toml
```
## Deployment
### Kubernetes
Kubernetes manifests are available in the `k8s/` directory:
- Secret for LLM configuration
- Deployment for the PR Reviewer service
- Service for exposing the API
### Gitea Actions
GitHub Actions workflow for CI/CD is available in `.gitea/workflows/deploy.yaml`.
## License
MIT
## Contributing
1. Fork the repository
2. Create a feature branch
3. Commit your changes
4. Push to the branch
5. Open a pull request

View File

View File

@ -1,7 +0,0 @@
# Summarizer Agent Configuration
summariser:
role: Senior Code Review Coordinator
goal: Synthesize individual review results into a cohesive, actionable review report
backstory: You are a senior technical lead with extensive experience in code review practices across multiple domains. You excel at combining feedback from different reviewers into a clear, prioritized, and actionable summary that helps development teams improve their code efficiently.
verbose: true
allow_delegation: false

View File

@ -1,16 +0,0 @@
# Summarizer Task Configuration
summarise_task:
description: |
Synthesize the results from code, security, and infrastructure reviews into a cohesive review report.
Code Review Results: {code_review_results}
Security Review Results: {security_review_results}
Infrastructure Review Results: {infra_review_results}
Context: {context}
expected_output: |
A comprehensive review report that includes:
- Executive summary of all findings
- Prioritized list of issues (critical, high, medium, low)
- Specific recommendations for each domain (code, security, infrastructure)
- Overall assessment and recommendation (e.g., Approved, Approved with Minor Changes, Significant Changes Needed)
- Summary of positive aspects of the PR
agent: summariser

View File

@ -1,19 +1,21 @@
from crewai import CrewBase, Agent, Task, Crew
from crewai import Agent, Task, Crew
from crewai.project import CrewBase, agent, task, crew
from crewai_tools import MCPServerAdapter
from mcp import StdioServerParameters
import os
from typing import Dict, Any
from pr_reviewer.llm import get_llm
class CodeReviewCrew(CrewBase):
@CrewBase
class CodeReviewCrew:
"""Code Review Crew for conducting code quality reviews."""
agents_config = "config/agents.yaml"
tasks_config = "config/tasks.yaml"
def __init__(self):
super().__init__()
# Configure Semgrep MCP server connection
self.llm = get_llm()
self.semgrep_server_params = StdioServerParameters(
command="semgrep",
args=["--metrics=off", "--json", "--stdin-display-name", "scanned_code", "--"],
@ -24,32 +26,38 @@ class CodeReviewCrew(CrewBase):
}
)
@Agent
@agent
def code_reviewer(self) -> Agent:
"""Senior Software Engineer agent for code review."""
return Agent(
config=self.agents_config["code_reviewer"],
tools=[], # Tools will be added via MCP adapter in the task
llm=self.llm,
tools=[],
verbose=True
)
@Task
@task
def code_review_task(self) -> Task:
"""Task for conducting code review."""
return Task(
config=self.tasks_config["code_review_task"],
)
@Crew
@crew
def crew(self) -> Crew:
"""Create the Code Review crew."""
# Create MCP server adapter for Semgrep
tools = []
try:
semgrep_adapter = MCPServerAdapter(self.semgrep_server_params)
if hasattr(semgrep_adapter, 'tools'):
tools = semgrep_adapter.tools
except Exception as e:
print(f"MCP adapter not available: {e}")
return Crew(
agents=[self.code_reviewer()],
tasks=[self.code_review_task()],
process="sequential",
verbose=True,
tools=semgrep_adapter.tools if hasattr(semgrep_adapter, 'tools') else [],
tools=tools,
)

View File

@ -1,60 +1,65 @@
from crewai import CrewBase, Agent, Task, Crew
from crewai import Agent, Task, Crew
from crewai.project import CrewBase, agent, task, crew
from crewai_tools import MCPServerAdapter
from mcp import StdioServerParameters
import os
from typing import Dict, Any
from pr_reviewer.llm import get_llm
class InfraReviewCrew(CrewBase):
@CrewBase
class InfraReviewCrew:
"""Infrastructure Review Crew for conducting infrastructure reviews."""
agents_config = "config/agents.yaml"
tasks_config = "config/tasks.yaml"
def __init__(self):
super().__init__()
# Configure Hadolint MCP server connection
self.llm = get_llm()
self.hadolint_server_params = StdioServerParameters(
command="python",
args=["/home/armistace/dev/pr_reviewer/mcp_servers/hadolint_mcp.py"],
args=["/app/mcp_servers/hadolint_mcp.py"],
env=os.environ
)
# Configure Checkov MCP server connection
self.checkov_server_params = StdioServerParameters(
command="python",
args=["/home/armistace/dev/pr_reviewer/mcp_servers/checkov_mcp.py"],
args=["/app/mcp_servers/checkov_mcp.py"],
env=os.environ
)
@Agent
@agent
def infra_reviewer(self) -> Agent:
"""DevOps/Platform Engineer agent for infrastructure review."""
return Agent(
config=self.agents_config["infra_reviewer"],
tools=[], # Tools will be added via MCP adapter in the task
llm=self.llm,
tools=[],
verbose=True
)
@Task
@task
def infra_review_task(self) -> Task:
"""Task for conducting infrastructure review."""
return Task(
config=self.tasks_config["infra_review_task"],
)
@Crew
@crew
def crew(self) -> Crew:
"""Create the Infrastructure Review crew."""
# Create MCP server adapters for Hadolint and Checkov
hadolint_adapter = MCPServerAdapter(self.hadolint_server_params)
checkov_adapter = MCPServerAdapter(self.checkov_server_params)
# Combine tools from both adapters
all_tools = []
try:
hadolint_adapter = MCPServerAdapter(self.hadolint_server_params)
if hasattr(hadolint_adapter, 'tools'):
all_tools.extend(hadolint_adapter.tools)
except Exception as e:
print(f"Hadolint MCP adapter not available: {e}")
try:
checkov_adapter = MCPServerAdapter(self.checkov_server_params)
if hasattr(checkov_adapter, 'tools'):
all_tools.extend(checkov_adapter.tools)
except Exception as e:
print(f"Checkov MCP adapter not available: {e}")
return Crew(
agents=[self.infra_reviewer()],

View File

@ -1,41 +1,41 @@
from crewai import CrewBase, Agent, Task, Crew
from crewai import Agent, Task, Crew
from crewai.project import CrewBase, agent, task, crew
from crewai_tools import MCPServerAdapter
from mcp import StdioServerParameters
import os
from typing import Dict, Any
from pr_reviewer.llm import get_llm
class SecurityReviewCrew(CrewBase):
@CrewBase
class SecurityReviewCrew:
"""Security Review Crew for conducting security reviews."""
agents_config = "config/agents.yaml"
tasks_config = "config/tasks.yaml"
def __init__(self):
super().__init__()
# Trivy uses native MCP server, so we don't need to configure a wrapper.
# However, we might need to set up connection parameters if required by the native server.
# For now, we assume the native Trivy MCP server is available at a known address or via stdio.
# We'll leave the MCP server configuration empty and rely on the native server being available.
self.trivy_server_params = None # Placeholder for if we need to configure stdio parameters
self.llm = get_llm()
self.trivy_server_params = None
@Agent
@agent
def security_reviewer(self) -> Agent:
"""Application Security Engineer agent for security review."""
return Agent(
config=self.agents_config["security_reviewer"],
tools=[], # Tools will be added via MCP adapter in the task
llm=self.llm,
tools=[],
verbose=True
)
@Task
@task
def security_review_task(self) -> Task:
"""Task for conducting security review."""
return Task(
config=self.tasks_config["security_review_task"],
)
@Crew
@crew
def crew(self) -> Crew:
"""Create the Security Review crew."""
# If we had an MCP server to wrap, we would create an adapter here.

View File

@ -1,37 +1,40 @@
from crewai import CrewBase, Agent, Task, Crew
from crewai import Agent, Task, Crew
from crewai.project import CrewBase, agent, task, crew
from crewai_tools import MCPServerAdapter
from mcp import StdioServerParameters
import os
from typing import Dict, Any
from pr_reviewer.llm import get_llm
class SummariserCrew(CrewBase):
@CrewBase
class SummariserCrew:
"""Summariser Crew for synthesizing review results."""
agents_config = "config/agents.yaml"
tasks_config = "config/tasks.yaml"
def __init__(self):
super().__init__()
# The summarizer doesn't need MCP server connections as it works with text results
self.llm = get_llm()
@Agent
@agent
def summariser(self) -> Agent:
"""Senior Code Review Coordinator agent for summarizing reviews."""
return Agent(
config=self.agents_config["summariser"],
tools=[], # No tools needed for summarization
llm=self.llm,
tools=[],
verbose=True
)
@Task
@task
def summarise_task(self) -> Task:
"""Task for synthesizing review results."""
return Task(
config=self.tasks_config["summarise_task"],
)
@Crew
@crew
def crew(self) -> Crew:
"""Create the Summariser crew."""
return Crew(

16
docker-compose.yaml Normal file
View File

@ -0,0 +1,16 @@
services:
pr-reviewer:
build:
context: .
dockerfile: Dockerfile
image: pr-reviewer:latest
ports:
- "8000:8000"
env_file:
- .env
volumes:
- ./src:/app/src
- ./config:/app/config
restart: always
extra_hosts:
- "host.docker.internal:host-gateway"

View File

@ -6,7 +6,7 @@ metadata:
app: pr-reviewer
namespace: pr-reviewer
spec:
replicas: 3
replicas: 1
selector:
matchLabels:
app: pr-reviewer
@ -20,5 +20,15 @@ spec:
image: git.aridgwayweb.com/armistace/pr-reviewer:latest
ports:
- containerPort: 8000
envFrom:
- secretRef:
name: pr-reviewer-env
resources:
requests:
memory: "512Mi"
cpu: "250m"
limits:
memory: "2Gi"
cpu: "1000m"
imagePullSecrets:
- name: regcred

View File

@ -1,13 +0,0 @@
apiVersion: v1
kind: Pod
metadata:
name: pr-reviewer
namespace: pr-reviewer
spec:
containers:
- name: pr-reviewer
image: git.aridgwayweb.com/armistace/pr-reviewer:latest
ports:
- containerPort: 8000
imagePullSecrets:
- name: regcred

View File

@ -19,10 +19,11 @@ classifiers = [
"Programming Language :: Python :: 3.13",
]
dependencies = [
"crewai>=0.28.0",
"crewai[tools]>=0.80.0",
"fastapi>=0.104.0",
"uvicorn>=0.24.0",
"mcp>=0.1.0",
"mcpadapt",
"pydantic>=2.5.0",
"python-dotenv>=1.0.0",
"gitpython>=3.1.0"

View File

@ -1,118 +0,0 @@
#!/usr/bin/env python3
"""
Simple test to verify the basic components work without Docker.
This tests the core components without requiring Docker build.
"""
import sys
import os
# Add the project root to the path
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
def test_imports():
"""Test that all modules can be imported."""
try:
# Test core modules
from src.pr_reviewer.state import FileInfo, ContextOverrides, PRReviewState
from src.pr_reviewer.llm import create_llm
from src.pr_reviewer.context import resolve_context
print("✓ Core modules imported successfully")
# Test state creation
state = PRReviewState(
pr_id="123",
pr_title="Test PR",
repo_name="test-repo",
repo_url="https://github.com/test/repo",
branch="feature",
base_branch="main"
)
print("✓ State creation works")
# Test context resolution (will use default files if they exist)
context = resolve_context(state)
print(f"✓ Context resolution works: {list(context.keys())}")
# Test file info
file_info = FileInfo(
path="test.py",
content="print('hello')",
status="added",
additions=1,
deletions=0
)
print("✓ FileInfo creation works")
# Test context overrides
context_overrides = ContextOverrides(
code_review="Custom code review",
security_review="Custom security review"
)
print("✓ ContextOverrides creation works")
print("\n✓ All basic component tests passed!")
return True
except Exception as e:
print(f"✗ Test failed with error: {e}")
import traceback
traceback.print_exc()
return False
def test_crew_imports():
"""Test that crew modules can be imported."""
try:
from crews.code_review_crew.code_review_crew import CodeReviewCrew
from crews.security_review_crew.security_review_crew import SecurityReviewCrew
from crews.infra_review_crew.infra_review_crew import InfraReviewCrew
from crews.summariser_crew.summariser_crew import SummariserCrew
print("✓ Crew modules imported successfully")
# Try to instantiate (might fail due to missing dependencies, but that's ok for import test)
code_crew = CodeReviewCrew()
security_crew = SecurityReviewCrew()
infra_crew = InfraReviewCrew()
summariser_crew = SummariserCrew()
print("✓ Crew instantiation works")
return True
except Exception as e:
print(f"⚠ Crew test warning (may be expected if dependencies missing): {e}")
# This might fail due to missing crewai or other dependencies, which is ok for this test
return True # Don't fail the overall test for this
def test_api_imports():
"""Test that API modules can be imported."""
try:
from src.pr_reviewer.main import app
from src.pr_reviewer.flow import CodeReviewFlow
print("✓ API modules imported successfully")
return True
except Exception as e:
print(f"✗ API import failed: {e}")
return False
if __name__ == "__main__":
print("Running simple component tests...\n")
success = True
success &= test_imports()
success &= test_crew_imports()
success &= test_api_imports()
if success:
print("\n🎉 All tests passed! The basic components are working.")
print("\nTo test with Docker:")
print("1. Fix any Docker build issues if needed")
print("2. Run: ./start.sh")
print("3. Or manually: docker build -t pr-reviewer . && docker run -p 8000:8000 pr-reviewer")
else:
print("\n❌ Some tests failed. Please check the errors above.")
sys.exit(1)

View File

@ -6,7 +6,6 @@ from .context import resolve_context
import os
from datetime import datetime
# Import the crews
from crews.code_review_crew.code_review_crew import CodeReviewCrew
from crews.security_review_crew.security_review_crew import SecurityReviewCrew
from crews.infra_review_crew.infra_review_crew import InfraReviewCrew
@ -16,51 +15,27 @@ from crews.summariser_crew.summariser_crew import SummariserCrew
class CodeReviewFlow(Flow[PRReviewState]):
@start()
def receive_pr(self, inputs):
"""Initialize the PR review state with input data."""
print(f"Received PR review request for PR #{inputs.get('pr_id')}")
def receive_pr(self):
print(f"Received PR review request for PR #{self.state.pr_id}")
# Initialize the state
self.state.pr_id = inputs.get("pr_id", "")
self.state.pr_title = inputs.get("pr_title", "")
self.state.pr_description = inputs.get("pr_description", "")
self.state.pr_url = inputs.get("pr_url", "")
self.state.repo_name = inputs.get("repo_name", "")
self.state.repo_url = inputs.get("repo_url", "")
self.state.branch = inputs.get("branch", "")
self.state.base_branch = inputs.get("base_branch", "")
# Convert files from list of dicts to list of FileInfo objects if needed
files_input = inputs.get("files", [])
if files_input and isinstance(files_input[0], dict):
# Convert dicts to FileInfo objects
if isinstance(self.state.files, list) and self.state.files and isinstance(self.state.files[0], dict):
from .state import FileInfo
self.state.files = [FileInfo(**file_dict) for file_dict in files_input]
else:
self.state.files = files_input
self.state.files = [FileInfo(**file_dict) for file_dict in self.state.files]
# Handle context_overrides
context_overrides_input = inputs.get("context_overrides")
if context_overrides_input and isinstance(context_overrides_input, dict):
context_input = self.state.context_overrides
if isinstance(context_input, dict):
from .state import ContextOverrides
self.state.context_overrides = ContextOverrides(**context_overrides_input)
else:
self.state.context_overrides = context_overrides_input
self.state.context_overrides = ContextOverrides(**context_input)
self.state.started_at = datetime.now()
# Resolve context
self.state.resolved_context = resolve_context(self.state)
return self.state
@listen(receive_pr)
def run_code_review(self):
"""Run the code review crew."""
print("Starting code review...")
# Instantiate and run the code review crew
code_crew = CodeReviewCrew()
# The crew's kickoff method expects inputs matching the task template variables
inputs = {
"pr_title": self.state.pr_title,
"pr_description": self.state.pr_description,
@ -70,15 +45,11 @@ class CodeReviewFlow(Flow[PRReviewState]):
result = code_crew.crew().kickoff(inputs=inputs)
self.state.code_review_results = str(result)
print("Code review completed.")
return self.state
@listen(receive_pr)
def run_security_review(self):
"""Run the security review crew."""
print("Starting security review...")
# Instantiate and run the security review crew
security_crew = SecurityReviewCrew()
inputs = {
"pr_title": self.state.pr_title,
@ -89,15 +60,11 @@ class CodeReviewFlow(Flow[PRReviewState]):
result = security_crew.crew().kickoff(inputs=inputs)
self.state.security_review_results = str(result)
print("Security review completed.")
return self.state
@listen(receive_pr)
def run_infra_review(self):
"""Run the infrastructure review crew."""
print("Starting infrastructure review...")
# Instantiate and run the infrastructure review crew
infra_crew = InfraReviewCrew()
inputs = {
"pr_title": self.state.pr_title,
@ -108,15 +75,11 @@ class CodeReviewFlow(Flow[PRReviewState]):
result = infra_crew.crew().kickoff(inputs=inputs)
self.state.infra_review_results = str(result)
print("Infrastructure review completed.")
return self.state
@listen(and_(run_code_review, run_security_review, run_infra_review))
def summarise(self):
"""Summarize the review results."""
print("Starting summarisation...")
# Instantiate and run the summariser crew
summariser_crew = SummariserCrew()
inputs = {
"code_review_results": self.state.code_review_results,
@ -128,15 +91,11 @@ class CodeReviewFlow(Flow[PRReviewState]):
self.state.review_summary = str(result)
self.state.completed_at = datetime.now()
print("Summarisation completed.")
return self.state
@listen(summarise)
def format_response(self):
"""Format the final response."""
print("Formatting final response...")
# Return the final state as the response
return {
"pr_id": self.state.pr_id,
"pr_title": self.state.pr_title,

View File

@ -1,4 +1,5 @@
import logging
import os
from fastapi import FastAPI, HTTPException, Request
from fastapi.responses import JSONResponse
import uvicorn
@ -54,6 +55,8 @@ async def review_pr(request: Request) -> Dict[str, Any]:
except Exception:
raise HTTPException(status_code=422, detail="Invalid JSON payload")
logger.info(f"Payload keys: {payload.keys() if isinstance(payload, dict) else 'Not a dict'}")
# Validate and extract required fields according to the API specification
# Request schema:
# {
@ -94,11 +97,16 @@ async def review_pr(request: Request) -> Dict[str, Any]:
title = payload.get("title")
description = payload.get("description")
logger.info(f"pr_id from payload: {pr_id}")
logger.info(f"title from payload: {title}")
# Extract repo information
repo_data = payload.get("repo", {})
repo_name = repo_data.get("name")
repo_url = repo_data.get("url")
logger.info(f"repo_name from payload: {repo_name}")
# Extract source information
source_data = payload.get("source", {})
source_branch = source_data.get("branch")
@ -158,6 +166,22 @@ async def review_pr(request: Request) -> Dict[str, Any]:
infra_review=context_data.get("infra_review")
)
# Debug: Print the inputs being passed to the flow
flow_inputs = {
"pr_id": pr_id,
"pr_title": title,
"pr_description": description,
"pr_url": f"{repo_url}/pull/{pr_id}", # Construct PR URL
"repo_name": repo_name,
"repo_url": repo_url,
"branch": source_branch, # Using source branch as the active branch
"base_branch": target_branch, # Using target branch as base
"files": [file.dict() for file in files], # Convert to dict for flow
"context_overrides": context_overrides.dict() if context_overrides else None
}
logger.info(f"Flow inputs: {flow_inputs}")
logger.info(f"Flow inputs keys: {flow_inputs.keys()}")
# Initialize and run the flow with timeout
flow = CodeReviewFlow()
@ -169,18 +193,7 @@ async def review_pr(request: Request) -> Dict[str, Any]:
flow_result = await asyncio.wait_for(
loop.run_in_executor(
pool,
lambda: flow.kickoff(inputs={
"pr_id": pr_id,
"pr_title": title,
"pr_description": description,
"pr_url": f"{repo_url}/pull/{pr_id}", # Construct PR URL
"repo_name": repo_name,
"repo_url": repo_url,
"branch": source_branch, # Using source branch as the active branch
"base_branch": target_branch, # Using target branch as base
"files": [file.dict() for file in files], # Convert to dict for flow
"context_overrides": context_overrides.dict() if context_overrides else None
})
lambda: flow.kickoff(inputs=flow_inputs)
),
timeout=TOTAL_FLOW_TIMEOUT
)

View File

@ -23,14 +23,14 @@ class ContextOverrides(BaseModel):
class PRReviewState(BaseModel):
"""State of the PR review process."""
# Input fields
pr_id: str
pr_title: str
pr_id: str = ""
pr_title: str = ""
pr_description: Optional[str] = None
pr_url: Optional[str] = None
repo_name: str
repo_url: str
branch: str
base_branch: str
repo_name: str = ""
repo_url: str = ""
branch: str = ""
base_branch: str = ""
files: List[FileInfo] = Field(default_factory=list)
context_overrides: Optional[ContextOverrides] = None
# Internal fields

View File

@ -1,12 +0,0 @@
#!/bin/bash
# Simple start script to build Docker image and run tests
set -e # Exit on any error
echo "Building Docker image..."
docker build -t pr-reviewer-test:latest .
echo "Running tests..."
python test_docker.py
echo "All tests completed!"

View File

@ -1,142 +0,0 @@
#!/usr/bin/env python3
"""
Test script to verify the Dockerized PR Reviewer application works correctly.
This script builds the Docker image, runs it, and tests the API endpoints.
"""
import time
import requests
import docker
import json
import sys
from typing import Dict, Any
def test_dockerized_app():
"""Test the Dockerized PR Reviewer application."""
client = docker.from_env()
try:
# Build the Docker image
print("Building Docker image...")
image, build_logs = client.images.build(
path=".",
tag="pr-reviewer-test:latest",
rm=True,
forcerm=True
)
print("Docker image built successfully.")
# Run the container
print("Starting container...")
container = client.containers.run(
image="pr-reviewer-test:latest",
detach=True,
ports={'8000/tcp': 8000},
environment={
"LLM_MODEL": "test-model",
"LLM_BASE_URL": "http://localhost:11434", # Using Ollama as example
"LLM_API_KEY": "ollama", # Ollama doesn't need a real key
"LLM_PROVIDER": "ollama"
}
)
print(f"Container started with ID: {container.id}")
# Wait for the container to be ready
print("Waiting for container to be ready...")
max_wait = 30 # seconds
start_time = time.time()
while time.time() - start_time < max_wait:
try:
response = requests.get("http://localhost:8000/api/v1/health", timeout=5)
if response.status_code == 200:
print("Container is ready!")
break
except requests.exceptions.ConnectionError:
print("Waiting for container to start...")
time.sleep(2)
else:
raise TimeoutError("Container did not become ready within the timeout period")
# Test the health endpoint
print("Testing health endpoint...")
health_response = requests.get("http://localhost:8000/api/v1/health")
assert health_response.status_code == 200, f"Health check failed: {health_response.status_code}"
health_data = health_response.json()
assert health_data["status"] == "healthy", f"Unexpected health status: {health_data['status']}"
print("Health endpoint test passed.")
# Test the review endpoint with minimal valid data
print("Testing review endpoint...")
test_payload = {
"pr_id": "123",
"title": "Test PR",
"description": "This is a test PR",
"repo": {
"name": "test-repo",
"url": "https://github.com/test/test-repo"
},
"source": {
"branch": "feature/test",
"commit": "abc123"
},
"target": {
"branch": "main",
"commit": "def456"
},
"files": [
{
"path": "src/main.py",
"content": "print('Hello World')",
"status": "modified",
"additions": 1,
"deletions": 0
}
],
"context": {
"code_review": "Follow basic coding standards",
"security_review": "Check for obvious security issues",
"infra_review": "Ensure basic infrastructure practices"
}
}
review_response = requests.post(
"http://localhost:8000/api/v1/review",
json=test_payload,
timeout=30 # Longer timeout for the review process
)
# We expect this to either succeed (200) or fail with a 500 due to LLM issues
# Since we're not actually connecting to a real LLM, we expect a 500
print(f"Review endpoint responded with status: {review_response.status_code}")
if review_response.status_code == 200:
review_data = review_response.json()
print("Review endpoint test passed.")
print(f"Review ID: {review_data.get('review_id')}")
print(f"Status: {review_data.get('status')}")
else:
print(f"Review endpoint returned error status {review_response.status_code} (expected due to lack of real LLM)")
print(f"Response: {review_response.text}")
# Clean up
print("Cleaning up...")
container.stop()
container.remove()
client.images.remove(image="pr-reviewer-test:latest", force=True)
print("Test completed successfully.")
except Exception as e:
print(f"Test failed with error: {e}")
# Try to clean up if possible
try:
if 'container' in locals():
container.stop()
container.remove()
if 'image' in locals():
client.images.remove(image="pr-reviewer-test:latest", force=True)
except:
pass
raise
if __name__ == "__main__":
test_dockerized_app()

View File

@ -0,0 +1,31 @@
import pytest
import requests
import os
from dotenv import load_dotenv
load_dotenv()
BASE_URL = "http://localhost:8000/api/v1"
def test_health_endpoint():
"""Test the health check endpoint."""
response = requests.get(f"{BASE_URL}/health")
assert response.status_code == 200
assert response.json() == {"status": "healthy", "service": "pr-reviewer"}
def test_trigger_review_invalid_pr():
"""Test triggering a review with an invalid PR payload."""
payload = {"pr_id": "invalid-id"}
response = requests.post(f"{BASE_URL}/review", json=payload)
# Depending on implementation, this might be 400 or 202 (async)
assert response.status_code in [200, 400, 422]
def test_trigger_review_missing_payload():
"""Test triggering a review with no payload."""
response = requests.post(f"{BASE_URL}/review", json={})
assert response.status_code == 422 # FastAPI default for missing required body fields
def test_get_status_nonexistent():
"""Test getting status for a non-existent review."""
response = requests.get(f"{BASE_URL}/status/non-existent-id")
assert response.status_code == 404

View File

@ -0,0 +1,75 @@
import pytest
import requests
import json
from dotenv import load_dotenv
load_dotenv()
BASE_URL = "http://localhost:8000/api/v1"
# Mock PR data for testing - comprehensive payload
MOCK_PR_DATA = {
"pr_id": "123",
"title": "Fix authentication vulnerability",
"description": "This PR addresses a critical authentication bypass vulnerability",
"repo": {
"name": "secure-app",
"url": "https://github.com/example/secure-app"
},
"source": {
"branch": "fix-auth-bypass",
"commit": "a1b2c3d4e5f6"
},
"target": {
"branch": "main",
"commit": "f6e5d4c3b2a1"
},
"files": [
{
"path": "src/auth.py",
"content": "def authenticate_user(username, password):\n # Vulnerable authentication implementation\n if username == 'admin' and password == 'password123':\n return True\n return False",
"status": "modified",
"additions": 5,
"deletions": 3,
"patch": "@@ -1,5 +1,5 @@\n def authenticate_user(username, password):\n- # Simple authentication\n- if username == 'admin' and password == 'password123':\n+ # Fixed authentication with proper validation\n+ if validate_credentials(username, password):\n return True\n return False"
}
],
"context": {
"code_review": "Focus on security best practices and authentication logic",
"security_review": "Identify potential vulnerabilities in authentication flow",
"infra_review": "Verify secure deployment configurations"
}
}
def test_full_review_workflow():
"""Test the full PR review workflow with mock data."""
# Trigger a review
response = requests.post(f"{BASE_URL}/review", json=MOCK_PR_DATA)
# Print response for debugging
print(f"Status Code: {response.status_code}")
print(f"Response: {response.text}")
# Validate response
assert response.status_code == 200
data = response.json()
assert "review_id" in data
assert data["status"] in ["completed", "failed"] # Allow either status
assert "results" in data
assert "metadata" in data
# Validate results structure
results = data["results"]
assert "code_review" in results or "code_review" in str(results) # At least present in the response
assert "security_review" in results or "security_review" in str(results)
assert "infra_review" in results or "infra_review" in str(results)
assert "summary" in results or "summary" in str(results)
# Validate metadata
metadata = data["metadata"]
assert "processing_time_seconds" in metadata
assert metadata["pr_id"] == MOCK_PR_DATA["pr_id"]
assert metadata["repo"]["name"] == MOCK_PR_DATA["repo"]["name"]
print("Full review workflow test passed!")