This commit is contained in:
Andrew Ridgway 2026-05-08 23:46:17 +10:00
commit 10c8cfaa0f
Signed by: armistace
GPG Key ID: C8D9EAC514B47EF1
71 changed files with 2139 additions and 0 deletions

36
.env.example Normal file
View File

@ -0,0 +1,36 @@
# LLM Configuration
# Choose one of the following LLM providers:
# For OpenAI:
LLM_MODEL=gpt-4
LLM_BASE_URL=https://api.openai.com/v1
LLM_API_KEY=your_openai_api_key_here
LLM_PROVIDER=openai
# For Anthropic:
# LLM_MODEL=claude-3-opus-20240229
# LLM_BASE_URL=https://api.anthropic.com
# LLM_API_KEY=your_anthropic_api_key_here
# LLM_PROVIDER=anthropic
# For Ollama (local):
# LLM_MODEL=llama2
# LLM_BASE_URL=http://localhost:11434
# LLM_API_KEY=ollama # Ollama doesn't require a real API key
# LLM_PROVIDER=ollama
# MCP Server Configuration
# Hadolint MCP Server (installed via pip in Docker)
# Checkov MCP Server (installed via pip in Docker)
# Semgrep MCP Server (native, no configuration needed)
# Trivy MCP Server (native, no configuration needed)
# Optional: Semgrep App URL and Token for SEMgrep App functionality
SEMGRAPH_APP_URL=
SEMGRAPH_API_TOKEN=
# Timeout Configuration (in seconds)
TOTAL_FLOW_TIMEOUT=600
PER_CREW_TIMEOUT=300
# Other Configuration
LOG_LEVEL=INFO

View File

@ -0,0 +1,72 @@
name: Build and Push Image
on:
push:
branches:
- master
jobs:
build:
name: Build and push image
runs-on: ubuntu-latest
container: catthehacker/ubuntu:act-latest
if: gitea.ref == 'refs/heads/master'
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Create Kubeconfig
run: |
mkdir $HOME/.kube
echo "${{ secrets.KUBEC_CONFIG_BUILDX_NEW }}" > $HOME/.kube/config
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
with:
driver: kubernetes
driver-opts: |
namespace=gitea-runner
qemu.install=true
- name: Login to Docker Registry
uses: docker/login-action@v3
with:
registry: git.aridgwayweb.com
username: armistace
password: ${{ secrets.REG_PASSWORD }}
- name: Build and push
uses: docker/build-push-action@v5
with:
context: .
push: true
platforms: linux/amd64,linux/arm64
tags: |
git.aridgwayweb.com/armistace/pr-reviewer:latest
- name: Trivy Scan
run: |
echo "Installing Trivy "
sudo apt-get update
sudo apt-get install -y wget apt-transport-https gnupg lsb-release
wget -qO - https://aquasecurity.github.io/trivy-repo/deb/public.key | sudo apt-key add -
echo deb https://aquasecurity.github.io/trivy-repo/deb $(lsb_release -sc) main | sudo tee -a /etc/apt/sources.list.d/trivy.list
sudo apt-get update
sudo apt-get install -y trivy
trivy image --format table --exit-code 1 --ignore-unfixed --vuln-type os,library --severity HIGH,CRITICAL git.aridgwayweb.com/armistace/pr-reviewer:latest
- name: Deploy
run: |
echo "Installing Kubectl"
apt-get update
apt-get install -y apt-transport-https ca-certificates curl gnupg
curl -fsSL https://pkgs.k8s.io/core:/stable:/v1.33/deb/Release.key | gpg --dearmor -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg
chmod 644 /etc/apt/keyrings/kubernetes-apt-keyring.gpg
echo 'deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/v1.33/deb/ /' | tee /etc/apt/sources.list.d/kubernetes.list
chmod 644 /etc/apt/sources.list.d/kubernetes.list
apt-get update
apt-get install kubectl
kubectl delete namespace pr-reviewer
kubectl create namespace pr-reviewer
kubectl create secret docker-registry regcred --docker-server=${{ vars.DOCKER_SERVER }} --docker-username=${{ vars.DOCKER_USERNAME }} --docker-password='${{ secrets.DOCKER_PASSWORD }}' --docker-email=${{ vars.DOCKER_EMAIL }} --namespace=pr-reviewer
kubectl apply -f kube/pr-reviewer_pod.yaml && kubectl apply -f kube/pr-reviewer_deployment.yaml && kubectl apply -f kube/pr-reviewer_service.yaml

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
.spec/

64
Dockerfile Normal file
View File

@ -0,0 +1,64 @@
# Stage 1: Base with system dependencies and tool installations
FROM python:3.12-slim as builder
# Install system dependencies
RUN apt-get update && apt-get install -y \
git \
curl \
&& rm -rf /var/lib/apt/lists/*
# Install Hadolint (for Dockerfile linting)
RUN curl -Lo /bin/hadolint https://github.com/hadolint/hadolint/releases/download/v2.12.0/hadolint-Linux-x86_64 && \
chmod +x /bin/hadolint
# Install Checkov (for Kubernetes security scanning)
RUN pip install checkov==3.1.123
# Install Trivy (for container and IaC scanning) - Native MCP server
RUN curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sh -s -- -b /usr/local/bin v0.47.0
# Install Semgrep (for code scanning) - Will use native MCP server
RUN pip install semgrep==1.76.0
# Install UV package manager
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
# Stage 2: App with source code and UV sync
FROM python:3.12-slim
# Create non-root user
RUN useradd --create-home --shell /bin/bash app
WORKDIR /app
USER app
# Install runtime dependencies
RUN apt-get update && apt-get install -y \
git \
&& rm -rf /var/lib/apt/lists/*
# Copy UV from builder stage
COPY --from=builder /bin/uv /bin/uv
COPY --from=builder /bin/uvx /bin/uvx
# Copy application code
COPY --chown=app:app pyproject.toml .
COPY --chown=app:app README.md .
COPY --chown=app:app src/ ./src/
COPY --chown=app:app mcp_servers/ ./mcp_servers/
COPY --chown=app:app crews/ ./crews/
COPY --chown=app:app tools/ ./tools/
COPY --chown=app:app config/ ./config/
COPY --chown=app:app contexts/ ./contexts/
# Install Python dependencies using UV
RUN uv sync --frozen --no-dev
# Set environment variables
ENV PYTHONPATH=/app/src
ENV PATH="/app/.venv/bin:$PATH"
# Expose port
EXPOSE 8000
# Set entrypoint
ENTRYPOINT ["uvicorn", "src.pr_reviewer.main:app", "--host", "0.0.0.0", "--port", "8000"]

185
README.md Normal file
View File

@ -0,0 +1,185 @@
# PR Reviewer
An automated pull request review system using CrewAI and MCP (Model Context Protocol).
## Overview
This system provides automated code, security, and infrastructure reviews for pull requests using a multi-agent approach. It leverages CrewAI for orchestrating specialized review agents and MCP (Model Context Protocol) for integrating with various static analysis tools.
## Features
- **Code Review**: Uses Semgrep (via MCP) to check code quality, best practices, and maintainability
- **Security Review**: Uses Trivy (native MCP) to identify security vulnerabilities
- **Infrastructure Review**: Uses Hadolint and Checkov (via MCP wrappers) to review Dockerfiles and Kubernetes manifests
- **Contextual Review**: Incorporates customizable guidelines for code, security, and infrastructure reviews
- **Automated Orchestration**: Uses CrewAI Flows to manage the review process
- **REST API**: FastAPI endpoint for triggering reviews
- **Containerized**: Docker support for easy deployment
## Architecture
The system follows a modular architecture with:
- State management using Pydantic models
- LLM factory for flexible provider support (OpenAI, Anthropic, Ollama)
- Context resolution system for incorporating review guidelines
- Crew-based implementation for each review type (code, security, infrastructure)
- MCP server integrations for static analysis tools
- Flow-based orchestration for managing the review process
- RESTful API for integration with CI/CD systems
## Installation
### Prerequisites
- Python 3.10-3.13
- UV package manager
- Git
- Docker (optional, for containerized deployment)
### Local Development
1. Clone the repository
2. Install UV package manager: `curl -LsSf https://astral.sh/uv/install.sh | sh`
3. Activate UV environment: `source $HOME/.local/bin/env`
4. Create virtual environment: `uv venv .venv`
5. Activate virtual environment: `source .venv/bin/activate`
6. Install dependencies: `uv pip install -e .`
7. Configure environment variables (see `.env.example`)
### Docker Deployment
1. Build the Docker image: `docker build -t pr-reviewer .`
2. Run the container: `docker run -p 8000:8000 --env-file .env pr-reviewer`
## Usage
### API Endpoints
#### Health Check
```bash
GET /api/v1/health
```
Returns the health status of the service.
#### Trigger PR Review
```bash
POST /api/v1/review
```
Initiates a pull request review.
Request Body:
```json
{
"pr_id": "123",
"title": "Add new feature",
"description": "This PR adds a new feature to the application",
"repo": {
"name": "my-repo",
"url": "https://github.com/user/my-repo"
},
"source": {
"branch": "feature/new-feature",
"commit": "abc123"
},
"target": {
"branch": "main",
"commit": "def456"
},
"files": [
{
"path": "src/main.py",
"content": "print('Hello World')",
"status": "modified",
"additions": 1,
"deletions": 0
}
],
"context": {
"code_review": "Follow PEP8 guidelines",
"security_review": "Check for SQL injection vulnerabilities",
"infra_review": "Ensure Dockerfile follows best practices"
}
}
```
Response:
```json
{
"review_id": "uuid-string",
"status": "completed",
"timestamp": "2023-05-08T10:00:00Z",
"results": {
"code_review": "Code review results...",
"security_review": "Security review results...",
"infra_review": "Infrastructure review results...",
"summary": "Synthesized review summary..."
},
"metadata": {
"processing_time_seconds": 45.2,
"pr_id": "123",
"repo": {
"name": "my-repo",
"url": "https://github.com/user/my-repo"
}
}
}
```
## Configuration
### Environment Variables
See `.env.example` for detailed configuration options.
### Context Files
Default review guidelines are located in `contexts/defaults/`:
- `code_review.md`: Coding practice guidelines
- `security_review.md`: Security guidelines
- `infra_review.md`: Infrastructure guidelines
These can be overridden via the API context parameter.
## Development
### Running Tests
```bash
# Run unit tests
pytest
# Run tests with coverage
pytest --cov=src.pr_reviewer
# Run specific test categories
pytest tests/unit/
pytest tests/integration/
```
### Code Style
The project uses Black for code formatting and Flake8 for linting.
Run formatting:
```bash
black src/
```
Run linting:
```bash
flake8 src/
```
## Deployment
### Kubernetes
Kubernetes manifests are available in the `k8s/` directory:
- Secret for LLM configuration
- Deployment for the PR Reviewer service
- Service for exposing the API
### Gitea Actions
GitHub Actions workflow for CI/CD is available in `.gitea/workflows/deploy.yaml`.
## License
MIT
## Contributing
1. Fork the repository
2. Create a feature branch
3. Commit your changes
4. Push to the branch
5. Open a pull request

0
config/__init__.py Normal file
View File

7
config/agents.yaml Normal file
View File

@ -0,0 +1,7 @@
# Summarizer Agent Configuration
summariser:
role: Senior Code Review Coordinator
goal: Synthesize individual review results into a cohesive, actionable review report
backstory: You are a senior technical lead with extensive experience in code review practices across multiple domains. You excel at combining feedback from different reviewers into a clear, prioritized, and actionable summary that helps development teams improve their code efficiently.
verbose: true
allow_delegation: false

16
config/tasks.yaml Normal file
View File

@ -0,0 +1,16 @@
# Summarizer Task Configuration
summarise_task:
description: |
Synthesize the results from code, security, and infrastructure reviews into a cohesive review report.
Code Review Results: {code_review_results}
Security Review Results: {security_review_results}
Infrastructure Review Results: {infra_review_results}
Context: {context}
expected_output: |
A comprehensive review report that includes:
- Executive summary of all findings
- Prioritized list of issues (critical, high, medium, low)
- Specific recommendations for each domain (code, security, infrastructure)
- Overall assessment and recommendation (e.g., Approved, Approved with Minor Changes, Significant Changes Needed)
- Summary of positive aspects of the PR
agent: summariser

View File

View File

@ -0,0 +1,19 @@
# Code Review Guidelines
## General Principles
- Write clean, readable, and maintainable code.
- Follow the project's coding standards and style guides.
- Ensure code is well-tested and documented.
- Avoid code duplication; refactor when necessary.
- Use meaningful names for variables, functions, and classes.
- Keep functions and classes focused on a single responsibility.
## Specific Checks
- [ ] Code follows the project's style guide (e.g., PEP8 for Python).
- [ ] No commented-out code or debug prints in production code.
- [ ] Proper error handling and logging.
- [ ] Resource management (e.g., closing files, releasing network connections).
- [ ] Security best practices (input validation, output encoding, etc.).
- [ ] Performance considerations (avoid unnecessary loops, optimize database queries).
- [ ] Unit tests are present and passing for new code.
- [ ] Changes are backward compatible or have a migration plan.

View File

@ -0,0 +1,22 @@
# Security Review Guidelines
## General Principles
- Follow the principle of least privilege.
- Validate and sanitize all user inputs.
- Use secure coding practices to prevent common vulnerabilities.
- Keep dependencies up to date and monitor for known security issues.
- Implement proper authentication and authorization mechanisms.
- Encrypt sensitive data at rest and in transit.
- Log security-relevant events and monitor for suspicious activities.
## Specific Checks
- [ ] Input validation and sanitization (SQL injection, XSS, command injection, etc.).
- [ ] Proper authentication and session management.
- [ ] Authorization checks (users can only access resources they are permitted to).
- [ ] Secure handling of sensitive data (passwords, tokens, PII).
- [ ] Use of up-to-date and secure dependencies (no known vulnerabilities).
- [ ] Proper error handling that does not leak sensitive information.
- [ ] Secure configuration (e.g., not using default passwords, disabling unnecessary services).
- [ ] Communication security (use of HTTPS, proper certificate validation).
- [ ] Protection against CSRF, clickjacking, and other web vulnerabilities.
- [ ] Secure file uploads (if applicable).

View File

View File

@ -0,0 +1,55 @@
from crewai import CrewBase, Agent, Task, Crew
from crewai_tools import MCPServerAdapter
from mcp import StdioServerParameters
import os
from typing import Dict, Any
class CodeReviewCrew(CrewBase):
"""Code Review Crew for conducting code quality reviews."""
agents_config = "config/agents.yaml"
tasks_config = "config/tasks.yaml"
def __init__(self):
super().__init__()
# Configure Semgrep MCP server connection
self.semgrep_server_params = StdioServerParameters(
command="semgrep",
args=["--metrics=off", "--json", "--stdin-display-name", "scanned_code", "--"],
env={
**os.environ,
"SEMGRAPH_APP_URL": os.getenv("SEMGRAPH_APP_URL", ""),
"SEMGRAPH_API_TOKEN": os.getenv("SEMGRAPH_API_TOKEN", ""),
}
)
@Agent
def code_reviewer(self) -> Agent:
"""Senior Software Engineer agent for code review."""
return Agent(
config=self.agents_config["code_reviewer"],
tools=[], # Tools will be added via MCP adapter in the task
verbose=True
)
@Task
def code_review_task(self) -> Task:
"""Task for conducting code review."""
return Task(
config=self.tasks_config["code_review_task"],
)
@Crew
def crew(self) -> Crew:
"""Create the Code Review crew."""
# Create MCP server adapter for Semgrep
semgrep_adapter = MCPServerAdapter(self.semgrep_server_params)
return Crew(
agents=[self.code_reviewer()],
tasks=[self.code_review_task()],
process="sequential",
verbose=True,
tools=semgrep_adapter.tools if hasattr(semgrep_adapter, 'tools') else [],
)

View File

@ -0,0 +1,7 @@
# Code Review Crew Agents Configuration
code_reviewer:
role: Senior Software Engineer
goal: Conduct thorough code reviews focusing on code quality, best practices, and maintainability
backstory: You are an experienced software engineer with a keen eye for detail and a passion for clean code. You have reviewed thousands of pull requests and helped teams improve their code quality.
verbose: true
allow_delegation: false

View File

@ -0,0 +1,16 @@
# Code Review Crew Tasks Configuration
code_review_task:
description: |
Review the code changes in the pull request for quality, best practices, and maintainability.
PR Title: {pr_title}
PR Description: {pr_description}
Files to review: {files}
Context: {context}
expected_output: |
A detailed code review report including:
- Summary of changes
- Code quality issues (if any)
- Best practices violations (if any)
- Suggestions for improvement
- Overall rating (e.g., Approved, Changes Needed)
agent: code_reviewer

View File

View File

@ -0,0 +1,7 @@
# Infrastructure Review Crew Agents Configuration
infra_reviewer:
role: DevOps/Platform Engineer
goal: Review infrastructure as code for correctness, security, and best practices
backstory: You are an experienced DevOps engineer with expertise in infrastructure as code, Kubernetes, Docker, and cloud platforms. You help teams ensure their infrastructure is secure, scalable, and follows best practices.
verbose: true
allow_delegation: false

View File

@ -0,0 +1,16 @@
# Infrastructure Review Crew Tasks Configuration
infra_review_task:
description: |
Review the infrastructure as code (IaC) in the pull request for correctness, security, and best practices.
PR Title: {pr_title}
PR Description: {pr_description}
Files to review: {files}
Context: {context}
expected_output: |
A detailed infrastructure review report including:
- Summary of infrastructure changes
- Issues found (misconfigurations, security vulnerabilities, etc.)
- Best practices violations (if any)
- Suggestions for improvement
- Overall rating (e.g., Approved, Needs Changes)
agent: infra_reviewer

View File

@ -0,0 +1,65 @@
from crewai import CrewBase, Agent, Task, Crew
from crewai_tools import MCPServerAdapter
from mcp import StdioServerParameters
import os
from typing import Dict, Any
class InfraReviewCrew(CrewBase):
"""Infrastructure Review Crew for conducting infrastructure reviews."""
agents_config = "config/agents.yaml"
tasks_config = "config/tasks.yaml"
def __init__(self):
super().__init__()
# Configure Hadolint MCP server connection
self.hadolint_server_params = StdioServerParameters(
command="python",
args=["/home/armistace/dev/pr_reviewer/mcp_servers/hadolint_mcp.py"],
env=os.environ
)
# Configure Checkov MCP server connection
self.checkov_server_params = StdioServerParameters(
command="python",
args=["/home/armistace/dev/pr_reviewer/mcp_servers/checkov_mcp.py"],
env=os.environ
)
@Agent
def infra_reviewer(self) -> Agent:
"""DevOps/Platform Engineer agent for infrastructure review."""
return Agent(
config=self.agents_config["infra_reviewer"],
tools=[], # Tools will be added via MCP adapter in the task
verbose=True
)
@Task
def infra_review_task(self) -> Task:
"""Task for conducting infrastructure review."""
return Task(
config=self.tasks_config["infra_review_task"],
)
@Crew
def crew(self) -> Crew:
"""Create the Infrastructure Review crew."""
# Create MCP server adapters for Hadolint and Checkov
hadolint_adapter = MCPServerAdapter(self.hadolint_server_params)
checkov_adapter = MCPServerAdapter(self.checkov_server_params)
# Combine tools from both adapters
all_tools = []
if hasattr(hadolint_adapter, 'tools'):
all_tools.extend(hadolint_adapter.tools)
if hasattr(checkov_adapter, 'tools'):
all_tools.extend(checkov_adapter.tools)
return Crew(
agents=[self.infra_reviewer()],
tasks=[self.infra_review_task()],
process="sequential",
verbose=True,
tools=all_tools,
)

View File

View File

@ -0,0 +1,7 @@
# Security Review Crew Agents Configuration
security_reviewer:
role: Application Security Engineer
goal: Identify security vulnerabilities and ensure security best practices are followed
backstory: You are an experienced security engineer specialized in application security. You have extensive experience in penetration testing, code security analysis, and helping organizations build secure software.
verbose: true
allow_delegation: false

View File

@ -0,0 +1,16 @@
# Security Review Crew Tasks Configuration
security_review_task:
description: |
Review the code changes in the pull request for security vulnerabilities and compliance with security best practices.
PR Title: {pr_title}
PR Description: {pr_description}
Files to review: {files}
Context: {context}
expected_output: |
A detailed security review report including:
- Summary of security-related changes
- Identified vulnerabilities (if any)
- Security best practices violations (if any)
- Suggestions for improving security posture
- Overall security rating (e.g., Secure, Needs Improvement)
agent: security_reviewer

View File

@ -0,0 +1,51 @@
from crewai import CrewBase, Agent, Task, Crew
from crewai_tools import MCPServerAdapter
from mcp import StdioServerParameters
import os
from typing import Dict, Any
class SecurityReviewCrew(CrewBase):
"""Security Review Crew for conducting security reviews."""
agents_config = "config/agents.yaml"
tasks_config = "config/tasks.yaml"
def __init__(self):
super().__init__()
# Trivy uses native MCP server, so we don't need to configure a wrapper.
# However, we might need to set up connection parameters if required by the native server.
# For now, we assume the native Trivy MCP server is available at a known address or via stdio.
# We'll leave the MCP server configuration empty and rely on the native server being available.
self.trivy_server_params = None # Placeholder for if we need to configure stdio parameters
@Agent
def security_reviewer(self) -> Agent:
"""Application Security Engineer agent for security review."""
return Agent(
config=self.agents_config["security_reviewer"],
tools=[], # Tools will be added via MCP adapter in the task
verbose=True
)
@Task
def security_review_task(self) -> Task:
"""Task for conducting security review."""
return Task(
config=self.tasks_config["security_review_task"],
)
@Crew
def crew(self) -> Crew:
"""Create the Security Review crew."""
# If we had an MCP server to wrap, we would create an adapter here.
# Since Trivy is native, we don't add any tools via MCPServerAdapter.
# However, the native server should be available in the MCP ecosystem.
# We'll assume the tools are automatically available or will be handled differently.
return Crew(
agents=[self.security_reviewer()],
tasks=[self.security_review_task()],
process="sequential",
verbose=True,
# No additional tools from MCP wrapper for Trivy (native)
)

View File

View File

@ -0,0 +1,7 @@
# Summarizer Crew Agents Configuration
summariser:
role: Senior Code Review Coordinator
goal: Synthesize individual review results into a cohesive, actionable review report
backstory: You are a senior technical lead with extensive experience in code review practices across multiple domains. You excel at combining feedback from different reviewers into a clear, prioritized, and actionable summary that helps development teams improve their code efficiently.
verbose: true
allow_delegation: false

View File

@ -0,0 +1,16 @@
# Summarizer Crew Tasks Configuration
summarise_task:
description: |
Synthesize the results from code, security, and infrastructure reviews into a cohesive review report.
Code Review Results: {code_review_results}
Security Review Results: {security_review_results}
Infrastructure Review Results: {infra_review_results}
Context: {context}
expected_output: |
A comprehensive review report that includes:
- Executive summary of all findings
- Prioritized list of issues (critical, high, medium, low)
- Specific recommendations for each domain (code, security, infrastructure)
- Overall assessment and recommendation (e.g., Approved, Approved with Minor Changes, Significant Changes Needed)
- Summary of positive aspects of the PR
agent: summariser

View File

@ -0,0 +1,43 @@
from crewai import CrewBase, Agent, Task, Crew
from crewai_tools import MCPServerAdapter
from mcp import StdioServerParameters
import os
from typing import Dict, Any
class SummariserCrew(CrewBase):
"""Summariser Crew for synthesizing review results."""
agents_config = "config/agents.yaml"
tasks_config = "config/tasks.yaml"
def __init__(self):
super().__init__()
# The summarizer doesn't need MCP server connections as it works with text results
@Agent
def summariser(self) -> Agent:
"""Senior Code Review Coordinator agent for summarizing reviews."""
return Agent(
config=self.agents_config["summariser"],
tools=[], # No tools needed for summarization
verbose=True
)
@Task
def summarise_task(self) -> Task:
"""Task for synthesizing review results."""
return Task(
config=self.tasks_config["summarise_task"],
)
@Crew
def crew(self) -> Crew:
"""Create the Summariser crew."""
return Crew(
agents=[self.summariser()],
tasks=[self.summarise_task()],
process="sequential",
verbose=True,
# No additional tools needed
)

View File

@ -0,0 +1,72 @@
name: Build and Push Image
on:
push:
branches:
- master
jobs:
build:
name: Build and push image
runs-on: ubuntu-latest
container: catthehacker/ubuntu:act-latest
if: gitea.ref == 'refs/heads/master'
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Create Kubeconfig
run: |
mkdir $HOME/.kube
echo "${{ secrets.KUBEC_CONFIG_BUILDX_NEW }}" > $HOME/.kube/config
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
with:
driver: kubernetes
driver-opts: |
namespace=gitea-runner
qemu.install=true
- name: Login to Docker Registry
uses: docker/login-action@v3
with:
registry: git.aridgwayweb.com
username: armistace
password: ${{ secrets.REG_PASSWORD }}
- name: Build and push
uses: docker/build-push-action@v5
with:
context: .
push: true
platforms: linux/amd64,linux/arm64
tags: |
git.aridgwayweb.com/armistace/blog:latest
- name: Trivy Scan
run: |
echo "Installing Trivy "
sudo apt-get update
sudo apt-get install -y wget apt-transport-https gnupg lsb-release
wget -qO - https://aquasecurity.github.io/trivy-repo/deb/public.key | sudo apt-key add -
echo deb https://aquasecurity.github.io/trivy-repo/deb $(lsb_release -sc) main | sudo tee -a /etc/apt/sources.list.d/trivy.list
sudo apt-get update
sudo apt-get install -y trivy
trivy image --format table --exit-code 1 --ignore-unfixed --vuln-type os,library --severity HIGH,CRITICAL git.aridgwayweb.com/armistace/blog:latest
- name: Deploy
run: |
echo "Installing Kubectl"
apt-get update
apt-get install -y apt-transport-https ca-certificates curl gnupg
curl -fsSL https://pkgs.k8s.io/core:/stable:/v1.33/deb/Release.key | gpg --dearmor -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg
chmod 644 /etc/apt/keyrings/kubernetes-apt-keyring.gpg
echo 'deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/v1.33/deb/ /' | tee /etc/apt/sources.list.d/kubernetes.list
chmod 644 /etc/apt/sources.list.d/kubernetes.list
apt-get update
apt-get install kubectl
kubectl delete namespace blog
kubectl create namespace blog
kubectl create secret docker-registry regcred --docker-server=${{ vars.DOCKER_SERVER }} --docker-username=${{ vars.DOCKER_USERNAME }} --docker-password='${{ secrets.DOCKER_PASSWORD }}' --docker-email=${{ vars.DOCKER_EMAIL }} --namespace=blog
kubectl apply -f kube/blog_pod.yaml && kubectl apply -f kube/blog_deployment.yaml && kubectl apply -f kube/blog_service.yaml

View File

@ -0,0 +1,24 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: blog-deployment
labels:
app: blog
namespace: blog
spec:
replicas: 3
selector:
matchLabels:
app: blog
template:
metadata:
labels:
app: blog
spec:
containers:
- name: blog
image: git.aridgwayweb.com/armistace/blog:latest
ports:
- containerPort: 8000
imagePullSecrets:
- name: regcred

View File

@ -0,0 +1,13 @@
apiVersion: v1
kind: Pod
metadata:
name: blog
namespace: blog
spec:
containers:
- name: blog
image: git.aridgwayweb.com/armistace/blog:latest
ports:
- containerPort: 8000
imagePullSecrets:
- name: regcred

View File

@ -0,0 +1,13 @@
apiVersion: v1
kind: Service
metadata:
name: blog-service
namespace: blog
spec:
type: NodePort
selector:
app: blog
ports:
- port: 80
targetPort: 8000
nodePort: 30009

View File

@ -0,0 +1,24 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: pr-reviewer-deployment
labels:
app: pr-reviewer
namespace: pr-reviewer
spec:
replicas: 3
selector:
matchLabels:
app: pr-reviewer
template:
metadata:
labels:
app: pr-reviewer
spec:
containers:
- name: pr-reviewer
image: git.aridgwayweb.com/armistace/pr-reviewer:latest
ports:
- containerPort: 8000
imagePullSecrets:
- name: regcred

13
kube/pr-reviewer_pod.yaml Normal file
View File

@ -0,0 +1,13 @@
apiVersion: v1
kind: Pod
metadata:
name: pr-reviewer
namespace: pr-reviewer
spec:
containers:
- name: pr-reviewer
image: git.aridgwayweb.com/armistace/pr-reviewer:latest
ports:
- containerPort: 8000
imagePullSecrets:
- name: regcred

View File

@ -0,0 +1,13 @@
apiVersion: v1
kind: Service
metadata:
name: pr-reviewer-service
namespace: pr-reviewer
spec:
type: NodePort
selector:
app: pr-reviewer
ports:
- port: 80
targetPort: 8000
nodePort: 30009

0
mcp_servers/__init__.py Normal file
View File

Binary file not shown.

Binary file not shown.

Binary file not shown.

146
mcp_servers/checkov_mcp.py Normal file
View File

@ -0,0 +1,146 @@
#!/usr/bin/env python3
"""
MCP server for Checkov Kubernetes security scanner.
"""
import asyncio
import json
import logging
import subprocess
import sys
import tempfile
import os
from typing import Any, Dict, List
import mcp.server.stdio
import mcp.types as types
from mcp.server import NotificationOptions, Server
from mcp.server.models import InitializationOptions
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Create server instance
server = Server("checkov-mcp")
@server.list_tools()
async def handle_list_tools() -> List[types.Tool]:
"""
List available tools.
"""
return [
types.Tool(
name="scan_kubernetes_manifests",
description="Scan Kubernetes manifests for security issues using Checkov",
inputSchema={
"type": "object",
"properties": {
"manifest_content": {
"type": "string",
"description": "The content of the Kubernetes manifest(s) to scan"
}
},
"required": ["manifest_content"]
}
)
]
@server.call_tool()
async def handle_call_tool(
name: str, arguments: Dict[str, Any] | None
) -> List[types.TextContent | types.ImageContent | types.EmbeddedResource]:
"""
Handle tool calls.
"""
if name != "scan_kubernetes_manifests":
raise ValueError(f"Unknown tool: {name}")
if not arguments:
raise ValueError("Missing arguments")
manifest_content = arguments.get("manifest_content")
if not manifest_content:
raise ValueError("Missing manifest_content argument")
try:
# Create a temporary file to hold the manifest content
with tempfile.NamedTemporaryFile(mode='w', suffix='.yaml', delete=False) as temp_file:
temp_file.write(manifest_content)
temp_file_path = temp_file.name
try:
# Run checkov on the manifest file
process = await asyncio.create_subprocess_exec(
"checkov",
"-f", temp_file_path,
"--quiet", # Reduce verbosity
"--output", "json", # Get JSON output for easier parsing
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
stdout, stderr = await process.communicate()
if process.returncode not in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]: # Checkov returns various codes
# Some non-zero codes are expected (findings, etc.)
pass
result = stdout.decode()
if stderr:
result += "\nSTDERR:\n" + stderr.decode()
# If checkov is not found, we'll get an error from the subprocess
if not result.strip() and process.returncode == 127: # command not found typically returns 127
result = "Error: Checkov command not found. Please install checkov."
return [
types.TextContent(
type="text",
text=result
)
]
finally:
# Clean up the temporary file
os.unlink(temp_file_path)
except FileNotFoundError:
logger.error("Checkov command not found. Please ensure checkov is installed and in PATH.")
return [
types.TextContent(
type="text",
text="Error: Checkov command not found. Please install checkov."
)
]
except Exception as e:
logger.exception("Error running checkov")
return [
types.TextContent(
type="text",
text=f"Error running checkov: {str(e)}"
)
]
async def main():
"""
Run the MCP server.
"""
# Run the server using stdio
async with mcp.server.stdio.stdio_server() as (read_stream, write_stream):
await server.run(
read_stream,
write_stream,
InitializationOptions(
server_name="checkov-mcp",
server_version="0.1.0",
capabilities=server.get_capabilities(
notification_options=NotificationOptions(),
experimental_capabilities={},
),
),
)
if __name__ == "__main__":
asyncio.run(main())

133
mcp_servers/hadolint_mcp.py Normal file
View File

@ -0,0 +1,133 @@
#!/usr/bin/env python3
"""
MCP server for Hadolint Dockerfile linter.
"""
import asyncio
import json
import logging
import subprocess
import sys
from typing import Any, Dict, List
import mcp.server.stdio
import mcp.types as types
from mcp.server import NotificationOptions, Server
from mcp.server.models import InitializationOptions
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Create server instance
server = Server("hadolint-mcp")
@server.list_tools()
async def handle_list_tools() -> List[types.Tool]:
"""
List available tools.
"""
return [
types.Tool(
name="lint_dockerfile",
description="Lint a Dockerfile using Hadolint",
inputSchema={
"type": "object",
"properties": {
"dockerfile_content": {
"type": "string",
"description": "The content of the Dockerfile to lint"
}
},
"required": ["dockerfile_content"]
}
)
]
@server.call_tool()
async def handle_call_tool(
name: str, arguments: Dict[str, Any] | None
) -> List[types.TextContent | types.ImageContent | types.EmbeddedResource]:
"""
Handle tool calls.
"""
if name != "lint_dockerfile":
raise ValueError(f"Unknown tool: {name}")
if not arguments:
raise ValueError("Missing arguments")
dockerfile_content = arguments.get("dockerfile_content")
if not dockerfile_content:
raise ValueError("Missing dockerfile_content argument")
try:
# Run hadolint on the Dockerfile content
process = await asyncio.create_subprocess_exec(
"hadolint",
"-", # Read from stdin
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
stdout, stderr = await process.communicate(input=dockerfile_content.encode())
if process.returncode != 0:
# Hadolint returns non-zero for linting errors, which is expected
# We still return the output as the result
result = stdout.decode() + stderr.decode()
else:
result = stdout.decode()
# If no output, hadolint passed with no issues
if not result.strip():
result = "Hadolint: No issues found."
return [
types.TextContent(
type="text",
text=result
)
]
except FileNotFoundError:
logger.error("Hadolint command not found. Please ensure hadolint is installed and in PATH.")
return [
types.TextContent(
type="text",
text="Error: Hadolint command not found. Please install hadolint."
)
]
except Exception as e:
logger.exception("Error running hadolint")
return [
types.TextContent(
type="text",
text=f"Error running hadolint: {str(e)}"
)
]
async def main():
"""
Run the MCP server.
"""
# Run the server using stdio
async with mcp.server.stdio.stdio_server() as (read_stream, write_stream):
await server.run(
read_stream,
write_stream,
InitializationOptions(
server_name="hadolint-mcp",
server_version="0.1.0",
capabilities=server.get_capabilities(
notification_options=NotificationOptions(),
experimental_capabilities={},
),
),
)
if __name__ == "__main__":
asyncio.run(main())

71
pyproject.toml Normal file
View File

@ -0,0 +1,71 @@
[project]
name = "pr-reviewer"
version = "0.1.0"
description = "A PR Reviewer system using CrewAI and MCP"
readme = "README.md"
requires-python = ">=3.10,<3.14"
authors = [
{name = "Developer", email = "dev@example.com"}
]
keywords = ["pull-request", "code-review", "security", "infrastructure", "crewai", "mcp"]
classifiers = [
"Development Status :: 3 - Alpha",
"Intended Audience :: Developers",
"License :: OSI Approved :: MIT License",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
]
dependencies = [
"crewai>=0.28.0",
"fastapi>=0.104.0",
"uvicorn>=0.24.0",
"mcp>=0.1.0",
"pydantic>=2.5.0",
"python-dotenv>=1.0.0",
"gitpython>=3.1.0"
]
[project.optional-dependencies]
anthropic = ["anthropic>=0.7.0"]
openai = ["openai>=1.0.0"]
ollama = []
dev = [
"pytest>=7.0.0",
"black>=22.0.0",
"flake8>=4.0.0",
"mypy>=0.9.0",
]
[project.urls]
Homepage = "https://github.com/your-org/pr-reviewer"
Documentation = "https://github.com/your-org/pr-reviewer#readme"
Repository = "https://github.com/your-org/pr-reviewer.git"
[build-system]
requires = ["setuptools>=61.0"]
build-backend = "setuptools.build_meta"
[tool.black]
line-length = 88
target-version = ['py310']
include = '\.py$'
exclude = '''
/(\.git
| \.hg
| \.mypy_cache
| \.tox
| \.venv
| _build
| buck-out
| build
| dist
)/
'''
[tool.mypy]
python_version = "3.10"
warn_return_any = true
warn_unused_configs = true

View File

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,45 @@
import os
from pathlib import Path
from typing import Dict, Optional
from .state import PRReviewState, ContextOverrides
def resolve_context(state: PRReviewState) -> Dict[str, str]:
"""
Resolve the context for each review type based on overrides and default files.
Args:
state: The PR review state containing potential context overrides
Returns:
A dictionary with keys 'code_review', 'security_review', 'infra_review'
and their resolved context strings.
"""
# Define the mapping of context types to their default file paths
context_mapping = {
'code_review': 'contexts/defaults/code_review.md',
'security_review': 'contexts/defaults/security_review.md',
'infra_review': 'contexts/defaults/infra_review.md'
}
resolved = {}
for context_type, default_path in context_mapping.items():
# Check if there's an override in the state
override_value = None
if state.context_overrides:
override_value = getattr(state.context_overrides, context_type, None)
if override_value is not None and override_value.strip() != '':
# Use the override if provided and not empty
resolved[context_type] = override_value.strip()
else:
# Use the default file
try:
with open(default_path, 'r') as f:
resolved[context_type] = f.read().strip()
except FileNotFoundError:
# If the default file doesn't exist, use an empty string
resolved[context_type] = ''
return resolved

150
src/pr_reviewer/flow.py Normal file
View File

@ -0,0 +1,150 @@
from crewai.flow import Flow, listen, start, and_
from crewai import Crew
from .state import PRReviewState
from .llm import get_llm
from .context import resolve_context
import os
from datetime import datetime
# Import the crews
from crews.code_review_crew.code_review_crew import CodeReviewCrew
from crews.security_review_crew.security_review_crew import SecurityReviewCrew
from crews.infra_review_crew.infra_review_crew import InfraReviewCrew
from crews.summariser_crew.summariser_crew import SummariserCrew
class CodeReviewFlow(Flow[PRReviewState]):
@start()
def receive_pr(self, inputs):
"""Initialize the PR review state with input data."""
print(f"Received PR review request for PR #{inputs.get('pr_id')}")
# Initialize the state
self.state.pr_id = inputs.get("pr_id", "")
self.state.pr_title = inputs.get("pr_title", "")
self.state.pr_description = inputs.get("pr_description", "")
self.state.pr_url = inputs.get("pr_url", "")
self.state.repo_name = inputs.get("repo_name", "")
self.state.repo_url = inputs.get("repo_url", "")
self.state.branch = inputs.get("branch", "")
self.state.base_branch = inputs.get("base_branch", "")
# Convert files from list of dicts to list of FileInfo objects if needed
files_input = inputs.get("files", [])
if files_input and isinstance(files_input[0], dict):
# Convert dicts to FileInfo objects
from .state import FileInfo
self.state.files = [FileInfo(**file_dict) for file_dict in files_input]
else:
self.state.files = files_input
# Handle context_overrides
context_overrides_input = inputs.get("context_overrides")
if context_overrides_input and isinstance(context_overrides_input, dict):
from .state import ContextOverrides
self.state.context_overrides = ContextOverrides(**context_overrides_input)
else:
self.state.context_overrides = context_overrides_input
self.state.started_at = datetime.now()
# Resolve context
self.state.resolved_context = resolve_context(self.state)
return self.state
@listen(receive_pr)
def run_code_review(self):
"""Run the code review crew."""
print("Starting code review...")
# Instantiate and run the code review crew
code_crew = CodeReviewCrew()
# The crew's kickoff method expects inputs matching the task template variables
inputs = {
"pr_title": self.state.pr_title,
"pr_description": self.state.pr_description,
"files": [file.dict() if hasattr(file, 'dict') else file for file in self.state.files],
"context": self.state.resolved_context.get("code_review", "")
}
result = code_crew.crew().kickoff(inputs=inputs)
self.state.code_review_results = str(result)
print("Code review completed.")
return self.state
@listen(receive_pr)
def run_security_review(self):
"""Run the security review crew."""
print("Starting security review...")
# Instantiate and run the security review crew
security_crew = SecurityReviewCrew()
inputs = {
"pr_title": self.state.pr_title,
"pr_description": self.state.pr_description,
"files": [file.dict() if hasattr(file, 'dict') else file for file in self.state.files],
"context": self.state.resolved_context.get("security_review", "")
}
result = security_crew.crew().kickoff(inputs=inputs)
self.state.security_review_results = str(result)
print("Security review completed.")
return self.state
@listen(receive_pr)
def run_infra_review(self):
"""Run the infrastructure review crew."""
print("Starting infrastructure review...")
# Instantiate and run the infrastructure review crew
infra_crew = InfraReviewCrew()
inputs = {
"pr_title": self.state.pr_title,
"pr_description": self.state.pr_description,
"files": [file.dict() if hasattr(file, 'dict') else file for file in self.state.files],
"context": self.state.resolved_context.get("infra_review", "")
}
result = infra_crew.crew().kickoff(inputs=inputs)
self.state.infra_review_results = str(result)
print("Infrastructure review completed.")
return self.state
@listen(and_(run_code_review, run_security_review, run_infra_review))
def summarise(self):
"""Summarize the review results."""
print("Starting summarisation...")
# Instantiate and run the summariser crew
summariser_crew = SummariserCrew()
inputs = {
"code_review_results": self.state.code_review_results,
"security_review_results": self.state.security_review_results,
"infra_review_results": self.state.infra_review_results,
"context": self.state.resolved_context
}
result = summariser_crew.crew().kickoff(inputs=inputs)
self.state.review_summary = str(result)
self.state.completed_at = datetime.now()
print("Summarisation completed.")
return self.state
@listen(summarise)
def format_response(self):
"""Format the final response."""
print("Formatting final response...")
# Return the final state as the response
return {
"pr_id": self.state.pr_id,
"pr_title": self.state.pr_title,
"review_summary": self.state.review_summary,
"code_review_results": self.state.code_review_results,
"security_review_results": self.state.security_review_results,
"infra_review_results": self.state.infra_review_results,
"started_at": self.state.started_at.isoformat() if self.state.started_at else None,
"completed_at": self.state.completed_at.isoformat() if self.state.completed_at else None,
"error": self.state.error
}

56
src/pr_reviewer/llm.py Normal file
View File

@ -0,0 +1,56 @@
import os
from crewai import LLM
from typing import Optional
def create_llm() -> LLM:
"""
Create an LLM instance based on environment variables.
Expected environment variables:
- LLM_MODEL: The model name to use (required)
- LLM_BASE_URL: The base URL for the LLM API (optional)
- LLM_API_KEY: The API key for the LLM service (optional)
- LLM_PROVIDER: The provider (e.g., 'openai', 'anthropic', 'ollama') (optional)
Returns:
LLM: A CrewAI LLM instance
"""
model = os.getenv("LLM_MODEL")
if not model:
raise ValueError("LLM_MODEL environment variable is required")
base_url = os.getenv("LLM_BASE_URL")
api_key = os.getenv("LLM_API_KEY")
provider = os.getenv("LLM_PROVIDER")
# Prepare LLM configuration
llm_config = {
"model": model,
}
if base_url:
llm_config["base_url"] = base_url
if api_key:
llm_config["api_key"] = api_key
if provider:
llm_config["provider"] = provider
return LLM(**llm_config)
# Shared LLM singleton
_shared_llm: Optional[LLM] = None
def get_llm() -> LLM:
"""
Get the shared LLM singleton instance.
Returns:
LLM: The shared LLM instance
"""
global _shared_llm
if _shared_llm is None:
_shared_llm = create_llm()
return _shared_llm

297
src/pr_reviewer/main.py Normal file
View File

@ -0,0 +1,297 @@
import logging
from fastapi import FastAPI, HTTPException, Request
from fastapi.responses import JSONResponse
import uvicorn
from typing import Dict, Any, List, Optional
import asyncio
from concurrent.futures import ThreadPoolExecutor, TimeoutError as FutureTimeoutError
import time
import uuid
from .flow import CodeReviewFlow
from .state import PRReviewState, FileInfo, ContextOverrides
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
app = FastAPI(
title="PR Reviewer API",
description="API for conducting automated pull request reviews",
version="0.1.0"
)
# Configuration for timeouts
TOTAL_FLOW_TIMEOUT = int(os.getenv("TOTAL_FLOW_TIMEOUT", "600")) # Default 10 minutes
PER_CREW_TIMEOUT = int(os.getenv("PER_CREW_TIMEOUT", "300")) # Default 5 minutes
@app.get("/api/v1/health")
async def health_check() -> Dict[str, str]:
"""
Health check endpoint to verify the service is running.
"""
return {"status": "healthy", "service": "pr-reviewer"}
@app.post("/api/v1/review")
async def review_pr(request: Request) -> Dict[str, Any]:
"""
Endpoint to trigger a PR review.
Implements the full request/response schema as specified in Section 2.2.1.
"""
start_time = time.time()
review_id = str(uuid.uuid4())
# Log the incoming request (excluding sensitive data)
logger.info(f"Received PR review request: {review_id}")
try:
# Parse the JSON payload
try:
payload = await request.json()
except Exception:
raise HTTPException(status_code=422, detail="Invalid JSON payload")
# Validate and extract required fields according to the API specification
# Request schema:
# {
# "pr_id": "string (required)",
# "title": "string (required)",
# "description": "string (optional)",
# "repo": {
# "name": "string (required)",
# "url": "string (required)"
# },
# "source": {
# "branch": "string (required)",
# "commit": "string (optional)"
# },
# "target": {
# "branch": "string (required)",
# "commit": "string (optional)"
# },
# "files": [
# {
# "path": "string (required)",
# "content": "string (optional)",
# "status": "string (required)",
# "additions": "integer (optional, default 0)",
# "deletions": "integer (optional, default 0)",
# "patch": "string (optional)"
# }
# ],
# "context": {
# "code_review": "string (optional)",
# "security_review": "string (optional)",
# "infra_review": "string (optional)"
# }
# }
# Extract top-level fields
pr_id = payload.get("pr_id")
title = payload.get("title")
description = payload.get("description")
# Extract repo information
repo_data = payload.get("repo", {})
repo_name = repo_data.get("name")
repo_url = repo_data.get("url")
# Extract source information
source_data = payload.get("source", {})
source_branch = source_data.get("branch")
source_commit = source_data.get("commit")
# Extract target information
target_data = payload.get("target", {})
target_branch = target_data.get("branch")
target_commit = target_data.get("commit")
# Extract files
files_data = payload.get("files", [])
# Extract context overrides
context_data = payload.get("context", {})
# Validate required fields
if not pr_id:
raise HTTPException(status_code=422, detail="Missing required field: pr_id")
if not title:
raise HTTPException(status_code=422, detail="Missing required field: title")
if not repo_name:
raise HTTPException(status_code=422, detail="Missing required field: repo.name")
if not repo_url:
raise HTTPException(status_code=422, detail="Missing required field: repo.url")
if not source_branch:
raise HTTPException(status_code=422, detail="Missing required field: source.branch")
if not target_branch:
raise HTTPException(status_code=422, detail="Missing required field: target.branch")
# Convert files data to FileInfo objects
files = []
for file_data in files_data:
if not file_data.get("path"):
raise HTTPException(status_code=422, detail="Missing required field: files[].path")
if not file_data.get("status"):
raise HTTPException(status_code=422, detail="Missing required field: files[].status")
file_info = FileInfo(
path=file_data.get("path", ""),
content=file_data.get("content"),
status=file_data.get("status", "modified"),
additions=file_data.get("additions", 0),
deletions=file_data.get("deletions", 0),
patch=file_data.get("patch")
)
files.append(file_info)
# Create context overrides object (only if at least one field is provided)
context_overrides = None
if any([context_data.get("code_review"),
context_data.get("security_review"),
context_data.get("infra_review")]):
context_overrides = ContextOverrides(
code_review=context_data.get("code_review"),
security_review=context_data.get("security_review"),
infra_review=context_data.get("infra_review")
)
# Initialize and run the flow with timeout
flow = CodeReviewFlow()
# Run the flow in a thread pool with timeout to avoid blocking the event loop
loop = asyncio.get_event_loop()
with ThreadPoolExecutor() as pool:
try:
# Wait for the flow to complete with a timeout
flow_result = await asyncio.wait_for(
loop.run_in_executor(
pool,
lambda: flow.kickoff(inputs={
"pr_id": pr_id,
"pr_title": title,
"pr_description": description,
"pr_url": f"{repo_url}/pull/{pr_id}", # Construct PR URL
"repo_name": repo_name,
"repo_url": repo_url,
"branch": source_branch, # Using source branch as the active branch
"base_branch": target_branch, # Using target branch as base
"files": [file.dict() for file in files], # Convert to dict for flow
"context_overrides": context_overrides.dict() if context_overrides else None
})
),
timeout=TOTAL_FLOW_TIMEOUT
)
except asyncio.TimeoutError:
logger.error(f"PR review timed out: {review_id}")
raise HTTPException(
status_code=504,
detail=f"PR review timed out after {TOTAL_FLOW_TIMEOUT} seconds"
)
# Calculate processing time
processing_time = time.time() - start_time
# Prepare response according to the API specification:
# {
# "review_id": "string (unique identifier for this review)",
# "status": "string (\"completed\", \"failed\", etc.)",
# "timestamp": "string (ISO 8601 timestamp)",
# "results": {
# "code_review": "string (results from code review)",
# "security_review": "string (results from security review)",
# "infra_review": "string (results from infrastructure review)",
# "summary": "string (synthesized review summary)"
# },
# "metadata": {
# "processing_time_seconds": "number",
# "pr_id": "string",
# "repo": {
# "name": "string",
# "url": "string"
# }
# }
# }
response = {
"review_id": review_id,
"status": "completed" if not flow_result.get("error") else "failed",
"timestamp": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
"results": {
"code_review": flow_result.get("code_review_results"),
"security_review": flow_result.get("security_review_results"),
"infra_review": flow_result.get("infra_review_results"),
"summary": flow_result.get("review_summary")
},
"metadata": {
"processing_time_seconds": round(processing_time, 2),
"pr_id": pr_id,
"repo": {
"name": repo_name,
"url": repo_url
}
}
}
# Include error information if present
if flow_result.get("error"):
response["metadata"]["error"] = flow_result["error"]
logger.error(f"PR review failed: {review_id} - {flow_result['error']}")
else:
logger.info(f"PR review completed successfully: {review_id} in {processing_time:.2f}s")
return response
except HTTPException:
# Re-raise HTTP exceptions as they are already properly formatted
raise
except asyncio.TimeoutError:
# This should be caught by the wait_for above, but just in case
logger.error(f"PR review timed out: {review_id}")
raise HTTPException(
status_code=504,
detail=f"PR review timed out after {TOTAL_FLOW_TIMEOUT} seconds"
)
except Exception as e:
# Log the error for debugging
logger.error(f"Error in PR review: {review_id} - {str(e)}")
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
# Error handlers
@app.exception_handler(404)
async def not_found_handler(request: Request, exc: HTTPException):
return JSONResponse(
status_code=404,
content={"message": "Endpoint not found"}
)
@app.exception_handler(422)
async def request_validation_exception_handler(request: Request, exc: HTTPException):
return JSONResponse(
status_code=422,
content={"message": "Invalid request payload", "details": exc.detail}
)
@app.exception_handler(500)
async def internal_error_handler(request: Request, exc: HTTPException):
return JSONResponse(
status_code=500,
content={"message": "Internal server error"}
)
@app.exception_handler(504)
async def timeout_handler(request: Request, exc: HTTPException):
return JSONResponse(
status_code=504,
content={"message": "Request timeout", "details": exc.detail}
)
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=8000)

45
src/pr_reviewer/state.py Normal file
View File

@ -0,0 +1,45 @@
from pydantic import BaseModel, Field
from typing import List, Optional, Dict, Any
from datetime import datetime
class FileInfo(BaseModel):
"""Information about a file in the PR."""
path: str
content: Optional[str] = None
status: str # added, modified, removed, etc.
additions: int = 0
deletions: int = 0
patch: Optional[str] = None
class ContextOverrides(BaseModel):
"""Overrides for the default context."""
code_review: Optional[str] = None
security_review: Optional[str] = None
infra_review: Optional[str] = None
class PRReviewState(BaseModel):
"""State of the PR review process."""
# Input fields
pr_id: str
pr_title: str
pr_description: Optional[str] = None
pr_url: Optional[str] = None
repo_name: str
repo_url: str
branch: str
base_branch: str
files: List[FileInfo] = Field(default_factory=list)
context_overrides: Optional[ContextOverrides] = None
# Internal fields
resolved_context: Optional[Dict[str, str]] = None
code_review_results: Optional[str] = None
security_review_results: Optional[str] = None
infra_review_results: Optional[str] = None
review_summary: Optional[str] = None
# Metadata
started_at: Optional[datetime] = None
completed_at: Optional[datetime] = None
error: Optional[str] = None

View File

@ -0,0 +1,70 @@
"""
Integration tests for MCP servers.
"""
import pytest
import sys
import os
# Add the project root to the path so we can import the MCP servers
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..'))
def test_hadolint_mcp_import():
"""Test that the Hadolint MCP server can be imported."""
try:
from mcp_servers.hadolint_mcp import server
assert server is not None
except ImportError as e:
pytest.fail(f"Failed to import Hadolint MCP server: {e}")
def test_checkov_mcp_import():
"""Test that the Checkov MCP server can be imported."""
try:
from mcp_servers.checkov_mcp import server
assert server is not None
except ImportError as e:
pytest.fail(f"Failed to import Checkov MCP server: {e}")
def test_crew_imports():
"""Test that all crew modules can be imported."""
try:
from crews.code_review_crew.code_review_crew import CodeReviewCrew
from crews.security_review_crew.security_review_crew import SecurityReviewCrew
from crews.infra_review_crew.infra_review_crew import InfraReviewCrew
from crews.summariser_crew.summariser_crew import SummariserCrew
# Test that we can instantiate the crews
code_crew = CodeReviewCrew()
security_crew = SecurityReviewCrew()
infra_crew = InfraReviewCrew()
summariser_crew = SummariserCrew()
assert code_crew is not None
assert security_crew is not None
assert infra_crew is not None
assert summariser_crew is not None
except ImportError as e:
pytest.fail(f"Failed to import crew modules: {e}")
except Exception as e:
pytest.fail(f"Failed to instantiate crews: {e}")
def test_flow_import():
"""Test that the flow module can be imported."""
try:
from src.pr_reviewer.flow import CodeReviewFlow
flow = CodeReviewFlow()
assert flow is not None
except ImportError as e:
pytest.fail(f"Failed to import flow module: {e}")
def test_main_import():
"""Test that the main module can be imported."""
try:
from src.pr_reviewer.main import app
assert app is not None
except ImportError as e:
pytest.fail(f"Failed to import main module: {e}")

100
tests/unit/test_context.py Normal file
View File

@ -0,0 +1,100 @@
"""
Unit tests for the context resolution system.
"""
import pytest
import tempfile
import os
from src.pr_reviewer.context import resolve_context
from src.pr_reviewer.state import PRReviewState, ContextOverrides
def test_resolve_context_with_defaults():
"""Test resolving context when default files exist."""
# Create temporary directory structure
with tempfile.TemporaryDirectory() as temp_dir:
# Create contexts/defaults directory
defaults_dir = os.path.join(temp_dir, "contexts", "defaults")
os.makedirs(defaults_dir)
# Create default context files
with open(os.path.join(defaults_dir, "code_review.md"), "w") as f:
f.write("# Code Review\n\nFollow PEP8.")
with open(os.path.join(defaults_dir, "security_review.md"), "w") as f:
f.write("# Security Review\n\nCheck for SQL injection.")
with open(os.path.join(defaults_dir, "infra_review.md"), "w") as f:
f.write("# Infra Review\n\nValidate Dockerfile.")
# Change to temp directory
original_cwd = os.getcwd()
os.chdir(temp_dir)
try:
# Create state without overrides
state = PRReviewState(
pr_id="123",
pr_title="Test PR",
repo_name="test-repo",
repo_url="https://github.com/user/test-repo",
branch="main",
base_branch="main"
)
# Resolve context
context = resolve_context(state)
# Assertions
assert context["code_review"] == "# Code Review\n\nFollow PEP8."
assert context["security_review"] == "# Security Review\n\nCheck for SQL injection."
assert context["infra_review"] == "# Infra Review\n\nValidate Dockerfile."
finally:
os.chdir(original_cwd)
def test_resolve_context_with_overrides():
"""Test resolving context when overrides are provided."""
# Create state with context overrides
state = PRReviewState(
pr_id="123",
pr_title="Test PR",
repo_name="test-repo",
repo_url="https://github.com/user/test-repo",
branch="main",
base_branch="main",
context_overrides=ContextOverrides(
code_review="Custom code review guidelines",
security_review="Custom security guidelines"
# infra_review is not provided, should use default if file exists
)
)
# Since we don't have default files in this test, infra_review should be empty
context = resolve_context(state)
# Assertions
assert context["code_review"] == "Custom code review guidelines"
assert context["security_review"] == "Custom security guidelines"
assert context["infra_review"] == "" # Empty because no default file and no override
def test_resolve_context_empty_state():
"""Test resolving context with minimal state."""
state = PRReviewState(
pr_id="123",
pr_title="Test PR",
repo_name="test-repo",
repo_url="https://github.com/user/test-repo",
branch="main",
base_branch="main"
)
# Without default files in the current directory, all contexts should be empty
# But since we have default files in the project, we need to check what they contain
context = resolve_context(state)
# The function should return the content of the default files
# We're just checking that it returns strings (could be empty or contain default content)
assert isinstance(context["code_review"], str)
assert isinstance(context["security_review"], str)
assert isinstance(context["infra_review"], str)

69
tests/unit/test_state.py Normal file
View File

@ -0,0 +1,69 @@
"""
Unit tests for the state management module.
"""
import pytest
from src.pr_reviewer.state import FileInfo, ContextOverrides, PRReviewState
from datetime import datetime
def test_file_info_creation():
"""Test creating a FileInfo instance."""
file_info = FileInfo(
path="src/main.py",
content="print('hello')",
status="modified",
additions=5,
deletions=2
)
assert file_info.path == "src/main.py"
assert file_info.content == "print('hello')"
assert file_info.status == "modified"
assert file_info.additions == 5
assert file_info.deletions == 2
def test_context_overrides_creation():
"""Test creating a ContextOverrides instance."""
context_overrides = ContextOverrides(
code_review="Follow PEP8",
security_review="Check for SQL injection",
infra_review="Validate Dockerfile"
)
assert context_overrides.code_review == "Follow PEP8"
assert context_overrides.security_review == "Check for SQL injection"
assert context_overrides.infra_review == "Validate Dockerfile"
def test_context_overrides_partial():
"""Test creating a ContextOverrides instance with partial fields."""
context_overrides = ContextOverrides(
code_review="Follow PEP8"
)
assert context_overrides.code_review == "Follow PEP8"
assert context_overrides.security_review is None
assert context_overrides.infra_review is None
def test_pr_review_state_creation():
"""Test creating a PRReviewState instance."""
state = PRReviewState(
pr_id="123",
pr_title="Add feature",
repo_name="test-repo",
repo_url="https://github.com/user/test-repo",
branch="feature",
base_branch="main"
)
assert state.pr_id == "123"
assert state.pr_title == "Add feature"
assert state.repo_name == "test-repo"
assert state.repo_url == "https://github.com/user/test-repo"
assert state.branch == "feature"
assert state.base_branch == "main"
assert state.files == []
assert state.started_at is None
assert state.completed_at is None

0
tools/__init__.py Normal file
View File

Binary file not shown.

Binary file not shown.

57
tools/git_tool.py Normal file
View File

@ -0,0 +1,57 @@
from pydantic import BaseModel, Field
from crewai_tools import BaseTool
import git
import os
import shutil
from typing import Type
class GitCloneInput(BaseModel):
"""Input schema for GitTool."""
repo_url: str = Field(..., description="URL of the Git repository to clone")
branch: str = Field(None, description="Branch to checkout (optional)")
commit: str = Field(None, description="Commit hash to checkout (optional)")
target_dir: str = Field(None, description="Target directory to clone into (optional)")
class GitTool(BaseTool):
name: str = "GitTool"
description: str = "Clones a Git repository and checks out a specific branch or commit"
args_schema: Type[BaseModel] = GitCloneInput
def _run(self, repo_url: str, branch: str = None, commit: str = None, target_dir: str = None) -> str:
"""
Clone a Git repository and checkout a specific branch or commit.
Args:
repo_url: URL of the Git repository to clone
branch: Branch to checkout (optional)
commit: Commit hash to checkout (optional)
target_dir: Target directory to clone into (optional)
Returns:
A message indicating the result of the operation.
"""
# If target_dir is not provided, extract the repository name from the URL
if target_dir is None:
target_dir = repo_url.split("/")[-1]
if target_dir.endswith(".git"):
target_dir = target_dir[:-4]
# Remove the target directory if it already exists to avoid conflicts
if os.path.exists(target_dir):
shutil.rmtree(target_dir)
try:
# Clone the repository
repo = git.Repo.clone_from(repo_url, target_dir)
# Checkout the specified branch or commit
if branch:
repo.git.checkout(branch)
elif commit:
repo.git.checkout(commit)
return f"Successfully cloned {repo_url} into {target_dir} and checked out {'branch: ' + branch if branch else 'commit: ' + commit if commit else 'default branch'}"
except Exception as e:
return f"Error cloning repository: {str(e)}"