commit 10c8cfaa0fd4a4e86b487e19e8baee4ccd858841 Author: Andrew Ridgway Date: Fri May 8 23:46:17 2026 +1000 initial diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..6f6b5f7 --- /dev/null +++ b/.env.example @@ -0,0 +1,36 @@ +# LLM Configuration +# Choose one of the following LLM providers: +# For OpenAI: +LLM_MODEL=gpt-4 +LLM_BASE_URL=https://api.openai.com/v1 +LLM_API_KEY=your_openai_api_key_here +LLM_PROVIDER=openai + +# For Anthropic: +# LLM_MODEL=claude-3-opus-20240229 +# LLM_BASE_URL=https://api.anthropic.com +# LLM_API_KEY=your_anthropic_api_key_here +# LLM_PROVIDER=anthropic + +# For Ollama (local): +# LLM_MODEL=llama2 +# LLM_BASE_URL=http://localhost:11434 +# LLM_API_KEY=ollama # Ollama doesn't require a real API key +# LLM_PROVIDER=ollama + +# MCP Server Configuration +# Hadolint MCP Server (installed via pip in Docker) +# Checkov MCP Server (installed via pip in Docker) +# Semgrep MCP Server (native, no configuration needed) +# Trivy MCP Server (native, no configuration needed) + +# Optional: Semgrep App URL and Token for SEMgrep App functionality +SEMGRAPH_APP_URL= +SEMGRAPH_API_TOKEN= + +# Timeout Configuration (in seconds) +TOTAL_FLOW_TIMEOUT=600 +PER_CREW_TIMEOUT=300 + +# Other Configuration +LOG_LEVEL=INFO \ No newline at end of file diff --git a/.gitea_soon/workflows/build_push.yml b/.gitea_soon/workflows/build_push.yml new file mode 100644 index 0000000..0116b1f --- /dev/null +++ b/.gitea_soon/workflows/build_push.yml @@ -0,0 +1,72 @@ +name: Build and Push Image +on: + push: + branches: + - master + +jobs: + build: + name: Build and push image + runs-on: ubuntu-latest + container: catthehacker/ubuntu:act-latest + if: gitea.ref == 'refs/heads/master' + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Create Kubeconfig + run: | + mkdir $HOME/.kube + echo "${{ secrets.KUBEC_CONFIG_BUILDX_NEW }}" > $HOME/.kube/config + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + with: + driver: kubernetes + driver-opts: | + namespace=gitea-runner + qemu.install=true + + - name: Login to Docker Registry + uses: docker/login-action@v3 + with: + registry: git.aridgwayweb.com + username: armistace + password: ${{ secrets.REG_PASSWORD }} + + - name: Build and push + uses: docker/build-push-action@v5 + with: + context: . + push: true + platforms: linux/amd64,linux/arm64 + tags: | + git.aridgwayweb.com/armistace/pr-reviewer:latest + + - name: Trivy Scan + run: | + echo "Installing Trivy " + sudo apt-get update + sudo apt-get install -y wget apt-transport-https gnupg lsb-release + wget -qO - https://aquasecurity.github.io/trivy-repo/deb/public.key | sudo apt-key add - + echo deb https://aquasecurity.github.io/trivy-repo/deb $(lsb_release -sc) main | sudo tee -a /etc/apt/sources.list.d/trivy.list + sudo apt-get update + sudo apt-get install -y trivy + trivy image --format table --exit-code 1 --ignore-unfixed --vuln-type os,library --severity HIGH,CRITICAL git.aridgwayweb.com/armistace/pr-reviewer:latest + + - name: Deploy + run: | + echo "Installing Kubectl" + apt-get update + apt-get install -y apt-transport-https ca-certificates curl gnupg + curl -fsSL https://pkgs.k8s.io/core:/stable:/v1.33/deb/Release.key | gpg --dearmor -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg + chmod 644 /etc/apt/keyrings/kubernetes-apt-keyring.gpg + echo 'deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/v1.33/deb/ /' | tee /etc/apt/sources.list.d/kubernetes.list + chmod 644 /etc/apt/sources.list.d/kubernetes.list + apt-get update + apt-get install kubectl + kubectl delete namespace pr-reviewer + kubectl create namespace pr-reviewer + kubectl create secret docker-registry regcred --docker-server=${{ vars.DOCKER_SERVER }} --docker-username=${{ vars.DOCKER_USERNAME }} --docker-password='${{ secrets.DOCKER_PASSWORD }}' --docker-email=${{ vars.DOCKER_EMAIL }} --namespace=pr-reviewer + kubectl apply -f kube/pr-reviewer_pod.yaml && kubectl apply -f kube/pr-reviewer_deployment.yaml && kubectl apply -f kube/pr-reviewer_service.yaml diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d60172d --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.spec/ diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..3035931 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,64 @@ +# Stage 1: Base with system dependencies and tool installations +FROM python:3.12-slim as builder + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + git \ + curl \ + && rm -rf /var/lib/apt/lists/* + +# Install Hadolint (for Dockerfile linting) +RUN curl -Lo /bin/hadolint https://github.com/hadolint/hadolint/releases/download/v2.12.0/hadolint-Linux-x86_64 && \ + chmod +x /bin/hadolint + +# Install Checkov (for Kubernetes security scanning) +RUN pip install checkov==3.1.123 + +# Install Trivy (for container and IaC scanning) - Native MCP server +RUN curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sh -s -- -b /usr/local/bin v0.47.0 + +# Install Semgrep (for code scanning) - Will use native MCP server +RUN pip install semgrep==1.76.0 + +# Install UV package manager +COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ + +# Stage 2: App with source code and UV sync +FROM python:3.12-slim + +# Create non-root user +RUN useradd --create-home --shell /bin/bash app +WORKDIR /app +USER app + +# Install runtime dependencies +RUN apt-get update && apt-get install -y \ + git \ + && rm -rf /var/lib/apt/lists/* + +# Copy UV from builder stage +COPY --from=builder /bin/uv /bin/uv +COPY --from=builder /bin/uvx /bin/uvx + +# Copy application code +COPY --chown=app:app pyproject.toml . +COPY --chown=app:app README.md . +COPY --chown=app:app src/ ./src/ +COPY --chown=app:app mcp_servers/ ./mcp_servers/ +COPY --chown=app:app crews/ ./crews/ +COPY --chown=app:app tools/ ./tools/ +COPY --chown=app:app config/ ./config/ +COPY --chown=app:app contexts/ ./contexts/ + +# Install Python dependencies using UV +RUN uv sync --frozen --no-dev + +# Set environment variables +ENV PYTHONPATH=/app/src +ENV PATH="/app/.venv/bin:$PATH" + +# Expose port +EXPOSE 8000 + +# Set entrypoint +ENTRYPOINT ["uvicorn", "src.pr_reviewer.main:app", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..5805519 --- /dev/null +++ b/README.md @@ -0,0 +1,185 @@ +# PR Reviewer + +An automated pull request review system using CrewAI and MCP (Model Context Protocol). + +## Overview + +This system provides automated code, security, and infrastructure reviews for pull requests using a multi-agent approach. It leverages CrewAI for orchestrating specialized review agents and MCP (Model Context Protocol) for integrating with various static analysis tools. + +## Features + +- **Code Review**: Uses Semgrep (via MCP) to check code quality, best practices, and maintainability +- **Security Review**: Uses Trivy (native MCP) to identify security vulnerabilities +- **Infrastructure Review**: Uses Hadolint and Checkov (via MCP wrappers) to review Dockerfiles and Kubernetes manifests +- **Contextual Review**: Incorporates customizable guidelines for code, security, and infrastructure reviews +- **Automated Orchestration**: Uses CrewAI Flows to manage the review process +- **REST API**: FastAPI endpoint for triggering reviews +- **Containerized**: Docker support for easy deployment + +## Architecture + +The system follows a modular architecture with: +- State management using Pydantic models +- LLM factory for flexible provider support (OpenAI, Anthropic, Ollama) +- Context resolution system for incorporating review guidelines +- Crew-based implementation for each review type (code, security, infrastructure) +- MCP server integrations for static analysis tools +- Flow-based orchestration for managing the review process +- RESTful API for integration with CI/CD systems + +## Installation + +### Prerequisites +- Python 3.10-3.13 +- UV package manager +- Git +- Docker (optional, for containerized deployment) + +### Local Development +1. Clone the repository +2. Install UV package manager: `curl -LsSf https://astral.sh/uv/install.sh | sh` +3. Activate UV environment: `source $HOME/.local/bin/env` +4. Create virtual environment: `uv venv .venv` +5. Activate virtual environment: `source .venv/bin/activate` +6. Install dependencies: `uv pip install -e .` +7. Configure environment variables (see `.env.example`) + +### Docker Deployment +1. Build the Docker image: `docker build -t pr-reviewer .` +2. Run the container: `docker run -p 8000:8000 --env-file .env pr-reviewer` + +## Usage + +### API Endpoints + +#### Health Check +```bash +GET /api/v1/health +``` +Returns the health status of the service. + +#### Trigger PR Review +```bash +POST /api/v1/review +``` +Initiates a pull request review. + +Request Body: +```json +{ + "pr_id": "123", + "title": "Add new feature", + "description": "This PR adds a new feature to the application", + "repo": { + "name": "my-repo", + "url": "https://github.com/user/my-repo" + }, + "source": { + "branch": "feature/new-feature", + "commit": "abc123" + }, + "target": { + "branch": "main", + "commit": "def456" + }, + "files": [ + { + "path": "src/main.py", + "content": "print('Hello World')", + "status": "modified", + "additions": 1, + "deletions": 0 + } + ], + "context": { + "code_review": "Follow PEP8 guidelines", + "security_review": "Check for SQL injection vulnerabilities", + "infra_review": "Ensure Dockerfile follows best practices" + } +} +``` + +Response: +```json +{ + "review_id": "uuid-string", + "status": "completed", + "timestamp": "2023-05-08T10:00:00Z", + "results": { + "code_review": "Code review results...", + "security_review": "Security review results...", + "infra_review": "Infrastructure review results...", + "summary": "Synthesized review summary..." + }, + "metadata": { + "processing_time_seconds": 45.2, + "pr_id": "123", + "repo": { + "name": "my-repo", + "url": "https://github.com/user/my-repo" + } + } +} +``` + +## Configuration + +### Environment Variables +See `.env.example` for detailed configuration options. + +### Context Files +Default review guidelines are located in `contexts/defaults/`: +- `code_review.md`: Coding practice guidelines +- `security_review.md`: Security guidelines +- `infra_review.md`: Infrastructure guidelines + +These can be overridden via the API context parameter. + +## Development + +### Running Tests +```bash +# Run unit tests +pytest + +# Run tests with coverage +pytest --cov=src.pr_reviewer + +# Run specific test categories +pytest tests/unit/ +pytest tests/integration/ +``` + +### Code Style +The project uses Black for code formatting and Flake8 for linting. + +Run formatting: +```bash +black src/ +``` + +Run linting: +```bash +flake8 src/ +``` + +## Deployment + +### Kubernetes +Kubernetes manifests are available in the `k8s/` directory: +- Secret for LLM configuration +- Deployment for the PR Reviewer service +- Service for exposing the API + +### Gitea Actions +GitHub Actions workflow for CI/CD is available in `.gitea/workflows/deploy.yaml`. + +## License +MIT + +## Contributing +1. Fork the repository +2. Create a feature branch +3. Commit your changes +4. Push to the branch +5. Open a pull request \ No newline at end of file diff --git a/config/__init__.py b/config/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/config/agents.yaml b/config/agents.yaml new file mode 100644 index 0000000..13be677 --- /dev/null +++ b/config/agents.yaml @@ -0,0 +1,7 @@ +# Summarizer Agent Configuration +summariser: + role: Senior Code Review Coordinator + goal: Synthesize individual review results into a cohesive, actionable review report + backstory: You are a senior technical lead with extensive experience in code review practices across multiple domains. You excel at combining feedback from different reviewers into a clear, prioritized, and actionable summary that helps development teams improve their code efficiently. + verbose: true + allow_delegation: false \ No newline at end of file diff --git a/config/tasks.yaml b/config/tasks.yaml new file mode 100644 index 0000000..ce72ea6 --- /dev/null +++ b/config/tasks.yaml @@ -0,0 +1,16 @@ +# Summarizer Task Configuration +summarise_task: + description: | + Synthesize the results from code, security, and infrastructure reviews into a cohesive review report. + Code Review Results: {code_review_results} + Security Review Results: {security_review_results} + Infrastructure Review Results: {infra_review_results} + Context: {context} + expected_output: | + A comprehensive review report that includes: + - Executive summary of all findings + - Prioritized list of issues (critical, high, medium, low) + - Specific recommendations for each domain (code, security, infrastructure) + - Overall assessment and recommendation (e.g., Approved, Approved with Minor Changes, Significant Changes Needed) + - Summary of positive aspects of the PR + agent: summariser \ No newline at end of file diff --git a/contexts/defaults/__init__.py b/contexts/defaults/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/contexts/defaults/code_review.md b/contexts/defaults/code_review.md new file mode 100644 index 0000000..6da6f6a --- /dev/null +++ b/contexts/defaults/code_review.md @@ -0,0 +1,19 @@ +# Code Review Guidelines + +## General Principles +- Write clean, readable, and maintainable code. +- Follow the project's coding standards and style guides. +- Ensure code is well-tested and documented. +- Avoid code duplication; refactor when necessary. +- Use meaningful names for variables, functions, and classes. +- Keep functions and classes focused on a single responsibility. + +## Specific Checks +- [ ] Code follows the project's style guide (e.g., PEP8 for Python). +- [ ] No commented-out code or debug prints in production code. +- [ ] Proper error handling and logging. +- [ ] Resource management (e.g., closing files, releasing network connections). +- [ ] Security best practices (input validation, output encoding, etc.). +- [ ] Performance considerations (avoid unnecessary loops, optimize database queries). +- [ ] Unit tests are present and passing for new code. +- [ ] Changes are backward compatible or have a migration plan. \ No newline at end of file diff --git a/contexts/defaults/security_review.md b/contexts/defaults/security_review.md new file mode 100644 index 0000000..228f334 --- /dev/null +++ b/contexts/defaults/security_review.md @@ -0,0 +1,22 @@ +# Security Review Guidelines + +## General Principles +- Follow the principle of least privilege. +- Validate and sanitize all user inputs. +- Use secure coding practices to prevent common vulnerabilities. +- Keep dependencies up to date and monitor for known security issues. +- Implement proper authentication and authorization mechanisms. +- Encrypt sensitive data at rest and in transit. +- Log security-relevant events and monitor for suspicious activities. + +## Specific Checks +- [ ] Input validation and sanitization (SQL injection, XSS, command injection, etc.). +- [ ] Proper authentication and session management. +- [ ] Authorization checks (users can only access resources they are permitted to). +- [ ] Secure handling of sensitive data (passwords, tokens, PII). +- [ ] Use of up-to-date and secure dependencies (no known vulnerabilities). +- [ ] Proper error handling that does not leak sensitive information. +- [ ] Secure configuration (e.g., not using default passwords, disabling unnecessary services). +- [ ] Communication security (use of HTTPS, proper certificate validation). +- [ ] Protection against CSRF, clickjacking, and other web vulnerabilities. +- [ ] Secure file uploads (if applicable). \ No newline at end of file diff --git a/crews/code_review_crew/__init__.py b/crews/code_review_crew/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/crews/code_review_crew/__pycache__/__init__.cpython-314.pyc b/crews/code_review_crew/__pycache__/__init__.cpython-314.pyc new file mode 100644 index 0000000..c805a48 Binary files /dev/null and b/crews/code_review_crew/__pycache__/__init__.cpython-314.pyc differ diff --git a/crews/code_review_crew/__pycache__/code_review_crew.cpython-314.pyc b/crews/code_review_crew/__pycache__/code_review_crew.cpython-314.pyc new file mode 100644 index 0000000..f8a68df Binary files /dev/null and b/crews/code_review_crew/__pycache__/code_review_crew.cpython-314.pyc differ diff --git a/crews/code_review_crew/code_review_crew.py b/crews/code_review_crew/code_review_crew.py new file mode 100644 index 0000000..4411fb2 --- /dev/null +++ b/crews/code_review_crew/code_review_crew.py @@ -0,0 +1,55 @@ +from crewai import CrewBase, Agent, Task, Crew +from crewai_tools import MCPServerAdapter +from mcp import StdioServerParameters +import os +from typing import Dict, Any + + +class CodeReviewCrew(CrewBase): + """Code Review Crew for conducting code quality reviews.""" + + agents_config = "config/agents.yaml" + tasks_config = "config/tasks.yaml" + + def __init__(self): + super().__init__() + # Configure Semgrep MCP server connection + self.semgrep_server_params = StdioServerParameters( + command="semgrep", + args=["--metrics=off", "--json", "--stdin-display-name", "scanned_code", "--"], + env={ + **os.environ, + "SEMGRAPH_APP_URL": os.getenv("SEMGRAPH_APP_URL", ""), + "SEMGRAPH_API_TOKEN": os.getenv("SEMGRAPH_API_TOKEN", ""), + } + ) + + @Agent + def code_reviewer(self) -> Agent: + """Senior Software Engineer agent for code review.""" + return Agent( + config=self.agents_config["code_reviewer"], + tools=[], # Tools will be added via MCP adapter in the task + verbose=True + ) + + @Task + def code_review_task(self) -> Task: + """Task for conducting code review.""" + return Task( + config=self.tasks_config["code_review_task"], + ) + + @Crew + def crew(self) -> Crew: + """Create the Code Review crew.""" + # Create MCP server adapter for Semgrep + semgrep_adapter = MCPServerAdapter(self.semgrep_server_params) + + return Crew( + agents=[self.code_reviewer()], + tasks=[self.code_review_task()], + process="sequential", + verbose=True, + tools=semgrep_adapter.tools if hasattr(semgrep_adapter, 'tools') else [], + ) \ No newline at end of file diff --git a/crews/code_review_crew/config/agents.yaml b/crews/code_review_crew/config/agents.yaml new file mode 100644 index 0000000..dcc4082 --- /dev/null +++ b/crews/code_review_crew/config/agents.yaml @@ -0,0 +1,7 @@ +# Code Review Crew Agents Configuration +code_reviewer: + role: Senior Software Engineer + goal: Conduct thorough code reviews focusing on code quality, best practices, and maintainability + backstory: You are an experienced software engineer with a keen eye for detail and a passion for clean code. You have reviewed thousands of pull requests and helped teams improve their code quality. + verbose: true + allow_delegation: false \ No newline at end of file diff --git a/crews/code_review_crew/config/tasks.yaml b/crews/code_review_crew/config/tasks.yaml new file mode 100644 index 0000000..ca96fb8 --- /dev/null +++ b/crews/code_review_crew/config/tasks.yaml @@ -0,0 +1,16 @@ +# Code Review Crew Tasks Configuration +code_review_task: + description: | + Review the code changes in the pull request for quality, best practices, and maintainability. + PR Title: {pr_title} + PR Description: {pr_description} + Files to review: {files} + Context: {context} + expected_output: | + A detailed code review report including: + - Summary of changes + - Code quality issues (if any) + - Best practices violations (if any) + - Suggestions for improvement + - Overall rating (e.g., Approved, Changes Needed) + agent: code_reviewer \ No newline at end of file diff --git a/crews/infra_review_crew/__init__.py b/crews/infra_review_crew/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/crews/infra_review_crew/__pycache__/__init__.cpython-314.pyc b/crews/infra_review_crew/__pycache__/__init__.cpython-314.pyc new file mode 100644 index 0000000..5d90bd4 Binary files /dev/null and b/crews/infra_review_crew/__pycache__/__init__.cpython-314.pyc differ diff --git a/crews/infra_review_crew/__pycache__/infra_review_crew.cpython-314.pyc b/crews/infra_review_crew/__pycache__/infra_review_crew.cpython-314.pyc new file mode 100644 index 0000000..b79cada Binary files /dev/null and b/crews/infra_review_crew/__pycache__/infra_review_crew.cpython-314.pyc differ diff --git a/crews/infra_review_crew/config/agents.yaml b/crews/infra_review_crew/config/agents.yaml new file mode 100644 index 0000000..7559cbe --- /dev/null +++ b/crews/infra_review_crew/config/agents.yaml @@ -0,0 +1,7 @@ +# Infrastructure Review Crew Agents Configuration +infra_reviewer: + role: DevOps/Platform Engineer + goal: Review infrastructure as code for correctness, security, and best practices + backstory: You are an experienced DevOps engineer with expertise in infrastructure as code, Kubernetes, Docker, and cloud platforms. You help teams ensure their infrastructure is secure, scalable, and follows best practices. + verbose: true + allow_delegation: false \ No newline at end of file diff --git a/crews/infra_review_crew/config/tasks.yaml b/crews/infra_review_crew/config/tasks.yaml new file mode 100644 index 0000000..1cc930b --- /dev/null +++ b/crews/infra_review_crew/config/tasks.yaml @@ -0,0 +1,16 @@ +# Infrastructure Review Crew Tasks Configuration +infra_review_task: + description: | + Review the infrastructure as code (IaC) in the pull request for correctness, security, and best practices. + PR Title: {pr_title} + PR Description: {pr_description} + Files to review: {files} + Context: {context} + expected_output: | + A detailed infrastructure review report including: + - Summary of infrastructure changes + - Issues found (misconfigurations, security vulnerabilities, etc.) + - Best practices violations (if any) + - Suggestions for improvement + - Overall rating (e.g., Approved, Needs Changes) + agent: infra_reviewer \ No newline at end of file diff --git a/crews/infra_review_crew/infra_review_crew.py b/crews/infra_review_crew/infra_review_crew.py new file mode 100644 index 0000000..049be39 --- /dev/null +++ b/crews/infra_review_crew/infra_review_crew.py @@ -0,0 +1,65 @@ +from crewai import CrewBase, Agent, Task, Crew +from crewai_tools import MCPServerAdapter +from mcp import StdioServerParameters +import os +from typing import Dict, Any + + +class InfraReviewCrew(CrewBase): + """Infrastructure Review Crew for conducting infrastructure reviews.""" + + agents_config = "config/agents.yaml" + tasks_config = "config/tasks.yaml" + + def __init__(self): + super().__init__() + # Configure Hadolint MCP server connection + self.hadolint_server_params = StdioServerParameters( + command="python", + args=["/home/armistace/dev/pr_reviewer/mcp_servers/hadolint_mcp.py"], + env=os.environ + ) + # Configure Checkov MCP server connection + self.checkov_server_params = StdioServerParameters( + command="python", + args=["/home/armistace/dev/pr_reviewer/mcp_servers/checkov_mcp.py"], + env=os.environ + ) + + @Agent + def infra_reviewer(self) -> Agent: + """DevOps/Platform Engineer agent for infrastructure review.""" + return Agent( + config=self.agents_config["infra_reviewer"], + tools=[], # Tools will be added via MCP adapter in the task + verbose=True + ) + + @Task + def infra_review_task(self) -> Task: + """Task for conducting infrastructure review.""" + return Task( + config=self.tasks_config["infra_review_task"], + ) + + @Crew + def crew(self) -> Crew: + """Create the Infrastructure Review crew.""" + # Create MCP server adapters for Hadolint and Checkov + hadolint_adapter = MCPServerAdapter(self.hadolint_server_params) + checkov_adapter = MCPServerAdapter(self.checkov_server_params) + + # Combine tools from both adapters + all_tools = [] + if hasattr(hadolint_adapter, 'tools'): + all_tools.extend(hadolint_adapter.tools) + if hasattr(checkov_adapter, 'tools'): + all_tools.extend(checkov_adapter.tools) + + return Crew( + agents=[self.infra_reviewer()], + tasks=[self.infra_review_task()], + process="sequential", + verbose=True, + tools=all_tools, + ) \ No newline at end of file diff --git a/crews/security_review_crew/__init__.py b/crews/security_review_crew/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/crews/security_review_crew/__pycache__/__init__.cpython-314.pyc b/crews/security_review_crew/__pycache__/__init__.cpython-314.pyc new file mode 100644 index 0000000..ac739b0 Binary files /dev/null and b/crews/security_review_crew/__pycache__/__init__.cpython-314.pyc differ diff --git a/crews/security_review_crew/__pycache__/security_review_crew.cpython-314.pyc b/crews/security_review_crew/__pycache__/security_review_crew.cpython-314.pyc new file mode 100644 index 0000000..4977505 Binary files /dev/null and b/crews/security_review_crew/__pycache__/security_review_crew.cpython-314.pyc differ diff --git a/crews/security_review_crew/config/agents.yaml b/crews/security_review_crew/config/agents.yaml new file mode 100644 index 0000000..5bcbbb3 --- /dev/null +++ b/crews/security_review_crew/config/agents.yaml @@ -0,0 +1,7 @@ +# Security Review Crew Agents Configuration +security_reviewer: + role: Application Security Engineer + goal: Identify security vulnerabilities and ensure security best practices are followed + backstory: You are an experienced security engineer specialized in application security. You have extensive experience in penetration testing, code security analysis, and helping organizations build secure software. + verbose: true + allow_delegation: false \ No newline at end of file diff --git a/crews/security_review_crew/config/tasks.yaml b/crews/security_review_crew/config/tasks.yaml new file mode 100644 index 0000000..02170ce --- /dev/null +++ b/crews/security_review_crew/config/tasks.yaml @@ -0,0 +1,16 @@ +# Security Review Crew Tasks Configuration +security_review_task: + description: | + Review the code changes in the pull request for security vulnerabilities and compliance with security best practices. + PR Title: {pr_title} + PR Description: {pr_description} + Files to review: {files} + Context: {context} + expected_output: | + A detailed security review report including: + - Summary of security-related changes + - Identified vulnerabilities (if any) + - Security best practices violations (if any) + - Suggestions for improving security posture + - Overall security rating (e.g., Secure, Needs Improvement) + agent: security_reviewer \ No newline at end of file diff --git a/crews/security_review_crew/security_review_crew.py b/crews/security_review_crew/security_review_crew.py new file mode 100644 index 0000000..861f63f --- /dev/null +++ b/crews/security_review_crew/security_review_crew.py @@ -0,0 +1,51 @@ +from crewai import CrewBase, Agent, Task, Crew +from crewai_tools import MCPServerAdapter +from mcp import StdioServerParameters +import os +from typing import Dict, Any + + +class SecurityReviewCrew(CrewBase): + """Security Review Crew for conducting security reviews.""" + + agents_config = "config/agents.yaml" + tasks_config = "config/tasks.yaml" + + def __init__(self): + super().__init__() + # Trivy uses native MCP server, so we don't need to configure a wrapper. + # However, we might need to set up connection parameters if required by the native server. + # For now, we assume the native Trivy MCP server is available at a known address or via stdio. + # We'll leave the MCP server configuration empty and rely on the native server being available. + self.trivy_server_params = None # Placeholder for if we need to configure stdio parameters + + @Agent + def security_reviewer(self) -> Agent: + """Application Security Engineer agent for security review.""" + return Agent( + config=self.agents_config["security_reviewer"], + tools=[], # Tools will be added via MCP adapter in the task + verbose=True + ) + + @Task + def security_review_task(self) -> Task: + """Task for conducting security review.""" + return Task( + config=self.tasks_config["security_review_task"], + ) + + @Crew + def crew(self) -> Crew: + """Create the Security Review crew.""" + # If we had an MCP server to wrap, we would create an adapter here. + # Since Trivy is native, we don't add any tools via MCPServerAdapter. + # However, the native server should be available in the MCP ecosystem. + # We'll assume the tools are automatically available or will be handled differently. + return Crew( + agents=[self.security_reviewer()], + tasks=[self.security_review_task()], + process="sequential", + verbose=True, + # No additional tools from MCP wrapper for Trivy (native) + ) \ No newline at end of file diff --git a/crews/summariser_crew/__init__.py b/crews/summariser_crew/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/crews/summariser_crew/__pycache__/__init__.cpython-314.pyc b/crews/summariser_crew/__pycache__/__init__.cpython-314.pyc new file mode 100644 index 0000000..d3e183a Binary files /dev/null and b/crews/summariser_crew/__pycache__/__init__.cpython-314.pyc differ diff --git a/crews/summariser_crew/__pycache__/summariser_crew.cpython-314.pyc b/crews/summariser_crew/__pycache__/summariser_crew.cpython-314.pyc new file mode 100644 index 0000000..364e6e4 Binary files /dev/null and b/crews/summariser_crew/__pycache__/summariser_crew.cpython-314.pyc differ diff --git a/crews/summariser_crew/config/agents.yaml b/crews/summariser_crew/config/agents.yaml new file mode 100644 index 0000000..cfb8775 --- /dev/null +++ b/crews/summariser_crew/config/agents.yaml @@ -0,0 +1,7 @@ +# Summarizer Crew Agents Configuration +summariser: + role: Senior Code Review Coordinator + goal: Synthesize individual review results into a cohesive, actionable review report + backstory: You are a senior technical lead with extensive experience in code review practices across multiple domains. You excel at combining feedback from different reviewers into a clear, prioritized, and actionable summary that helps development teams improve their code efficiently. + verbose: true + allow_delegation: false \ No newline at end of file diff --git a/crews/summariser_crew/config/tasks.yaml b/crews/summariser_crew/config/tasks.yaml new file mode 100644 index 0000000..648fd2b --- /dev/null +++ b/crews/summariser_crew/config/tasks.yaml @@ -0,0 +1,16 @@ +# Summarizer Crew Tasks Configuration +summarise_task: + description: | + Synthesize the results from code, security, and infrastructure reviews into a cohesive review report. + Code Review Results: {code_review_results} + Security Review Results: {security_review_results} + Infrastructure Review Results: {infra_review_results} + Context: {context} + expected_output: | + A comprehensive review report that includes: + - Executive summary of all findings + - Prioritized list of issues (critical, high, medium, low) + - Specific recommendations for each domain (code, security, infrastructure) + - Overall assessment and recommendation (e.g., Approved, Approved with Minor Changes, Significant Changes Needed) + - Summary of positive aspects of the PR + agent: summariser \ No newline at end of file diff --git a/crews/summariser_crew/summariser_crew.py b/crews/summariser_crew/summariser_crew.py new file mode 100644 index 0000000..2ea33c5 --- /dev/null +++ b/crews/summariser_crew/summariser_crew.py @@ -0,0 +1,43 @@ +from crewai import CrewBase, Agent, Task, Crew +from crewai_tools import MCPServerAdapter +from mcp import StdioServerParameters +import os +from typing import Dict, Any + + +class SummariserCrew(CrewBase): + """Summariser Crew for synthesizing review results.""" + + agents_config = "config/agents.yaml" + tasks_config = "config/tasks.yaml" + + def __init__(self): + super().__init__() + # The summarizer doesn't need MCP server connections as it works with text results + + @Agent + def summariser(self) -> Agent: + """Senior Code Review Coordinator agent for summarizing reviews.""" + return Agent( + config=self.agents_config["summariser"], + tools=[], # No tools needed for summarization + verbose=True + ) + + @Task + def summarise_task(self) -> Task: + """Task for synthesizing review results.""" + return Task( + config=self.tasks_config["summarise_task"], + ) + + @Crew + def crew(self) -> Crew: + """Create the Summariser crew.""" + return Crew( + agents=[self.summariser()], + tasks=[self.summarise_task()], + process="sequential", + verbose=True, + # No additional tools needed + ) \ No newline at end of file diff --git a/example_pipelines/.gitea/workflows/build_push.yml b/example_pipelines/.gitea/workflows/build_push.yml new file mode 100644 index 0000000..4f8a462 --- /dev/null +++ b/example_pipelines/.gitea/workflows/build_push.yml @@ -0,0 +1,72 @@ +name: Build and Push Image +on: + push: + branches: + - master + +jobs: + build: + name: Build and push image + runs-on: ubuntu-latest + container: catthehacker/ubuntu:act-latest + if: gitea.ref == 'refs/heads/master' + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Create Kubeconfig + run: | + mkdir $HOME/.kube + echo "${{ secrets.KUBEC_CONFIG_BUILDX_NEW }}" > $HOME/.kube/config + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + with: + driver: kubernetes + driver-opts: | + namespace=gitea-runner + qemu.install=true + + - name: Login to Docker Registry + uses: docker/login-action@v3 + with: + registry: git.aridgwayweb.com + username: armistace + password: ${{ secrets.REG_PASSWORD }} + + - name: Build and push + uses: docker/build-push-action@v5 + with: + context: . + push: true + platforms: linux/amd64,linux/arm64 + tags: | + git.aridgwayweb.com/armistace/blog:latest + + - name: Trivy Scan + run: | + echo "Installing Trivy " + sudo apt-get update + sudo apt-get install -y wget apt-transport-https gnupg lsb-release + wget -qO - https://aquasecurity.github.io/trivy-repo/deb/public.key | sudo apt-key add - + echo deb https://aquasecurity.github.io/trivy-repo/deb $(lsb_release -sc) main | sudo tee -a /etc/apt/sources.list.d/trivy.list + sudo apt-get update + sudo apt-get install -y trivy + trivy image --format table --exit-code 1 --ignore-unfixed --vuln-type os,library --severity HIGH,CRITICAL git.aridgwayweb.com/armistace/blog:latest + + - name: Deploy + run: | + echo "Installing Kubectl" + apt-get update + apt-get install -y apt-transport-https ca-certificates curl gnupg + curl -fsSL https://pkgs.k8s.io/core:/stable:/v1.33/deb/Release.key | gpg --dearmor -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg + chmod 644 /etc/apt/keyrings/kubernetes-apt-keyring.gpg + echo 'deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/v1.33/deb/ /' | tee /etc/apt/sources.list.d/kubernetes.list + chmod 644 /etc/apt/sources.list.d/kubernetes.list + apt-get update + apt-get install kubectl + kubectl delete namespace blog + kubectl create namespace blog + kubectl create secret docker-registry regcred --docker-server=${{ vars.DOCKER_SERVER }} --docker-username=${{ vars.DOCKER_USERNAME }} --docker-password='${{ secrets.DOCKER_PASSWORD }}' --docker-email=${{ vars.DOCKER_EMAIL }} --namespace=blog + kubectl apply -f kube/blog_pod.yaml && kubectl apply -f kube/blog_deployment.yaml && kubectl apply -f kube/blog_service.yaml diff --git a/example_pipelines/kube/blog_deployment.yaml b/example_pipelines/kube/blog_deployment.yaml new file mode 100644 index 0000000..8acae4e --- /dev/null +++ b/example_pipelines/kube/blog_deployment.yaml @@ -0,0 +1,24 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: blog-deployment + labels: + app: blog + namespace: blog +spec: + replicas: 3 + selector: + matchLabels: + app: blog + template: + metadata: + labels: + app: blog + spec: + containers: + - name: blog + image: git.aridgwayweb.com/armistace/blog:latest + ports: + - containerPort: 8000 + imagePullSecrets: + - name: regcred diff --git a/example_pipelines/kube/blog_pod.yaml b/example_pipelines/kube/blog_pod.yaml new file mode 100644 index 0000000..5ee6366 --- /dev/null +++ b/example_pipelines/kube/blog_pod.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: Pod +metadata: + name: blog + namespace: blog +spec: + containers: + - name: blog + image: git.aridgwayweb.com/armistace/blog:latest + ports: + - containerPort: 8000 + imagePullSecrets: + - name: regcred diff --git a/example_pipelines/kube/blog_service.yaml b/example_pipelines/kube/blog_service.yaml new file mode 100644 index 0000000..3af5257 --- /dev/null +++ b/example_pipelines/kube/blog_service.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: Service +metadata: + name: blog-service + namespace: blog +spec: + type: NodePort + selector: + app: blog + ports: + - port: 80 + targetPort: 8000 + nodePort: 30009 diff --git a/kube/pr-reviewer_deployment.yaml b/kube/pr-reviewer_deployment.yaml new file mode 100644 index 0000000..3c0bcba --- /dev/null +++ b/kube/pr-reviewer_deployment.yaml @@ -0,0 +1,24 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: pr-reviewer-deployment + labels: + app: pr-reviewer + namespace: pr-reviewer +spec: + replicas: 3 + selector: + matchLabels: + app: pr-reviewer + template: + metadata: + labels: + app: pr-reviewer + spec: + containers: + - name: pr-reviewer + image: git.aridgwayweb.com/armistace/pr-reviewer:latest + ports: + - containerPort: 8000 + imagePullSecrets: + - name: regcred diff --git a/kube/pr-reviewer_pod.yaml b/kube/pr-reviewer_pod.yaml new file mode 100644 index 0000000..774610c --- /dev/null +++ b/kube/pr-reviewer_pod.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: Pod +metadata: + name: pr-reviewer + namespace: pr-reviewer +spec: + containers: + - name: pr-reviewer + image: git.aridgwayweb.com/armistace/pr-reviewer:latest + ports: + - containerPort: 8000 + imagePullSecrets: + - name: regcred diff --git a/kube/pr-reviewer_service.yaml b/kube/pr-reviewer_service.yaml new file mode 100644 index 0000000..9acd7a0 --- /dev/null +++ b/kube/pr-reviewer_service.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: Service +metadata: + name: pr-reviewer-service + namespace: pr-reviewer +spec: + type: NodePort + selector: + app: pr-reviewer + ports: + - port: 80 + targetPort: 8000 + nodePort: 30009 diff --git a/mcp_servers/__init__.py b/mcp_servers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/mcp_servers/__pycache__/__init__.cpython-314.pyc b/mcp_servers/__pycache__/__init__.cpython-314.pyc new file mode 100644 index 0000000..07fa215 Binary files /dev/null and b/mcp_servers/__pycache__/__init__.cpython-314.pyc differ diff --git a/mcp_servers/__pycache__/checkov_mcp.cpython-314.pyc b/mcp_servers/__pycache__/checkov_mcp.cpython-314.pyc new file mode 100644 index 0000000..e721c0e Binary files /dev/null and b/mcp_servers/__pycache__/checkov_mcp.cpython-314.pyc differ diff --git a/mcp_servers/__pycache__/hadolint_mcp.cpython-314.pyc b/mcp_servers/__pycache__/hadolint_mcp.cpython-314.pyc new file mode 100644 index 0000000..0334ee1 Binary files /dev/null and b/mcp_servers/__pycache__/hadolint_mcp.cpython-314.pyc differ diff --git a/mcp_servers/checkov_mcp.py b/mcp_servers/checkov_mcp.py new file mode 100644 index 0000000..66d2032 --- /dev/null +++ b/mcp_servers/checkov_mcp.py @@ -0,0 +1,146 @@ +#!/usr/bin/env python3 +""" +MCP server for Checkov Kubernetes security scanner. +""" +import asyncio +import json +import logging +import subprocess +import sys +import tempfile +import os +from typing import Any, Dict, List + +import mcp.server.stdio +import mcp.types as types +from mcp.server import NotificationOptions, Server +from mcp.server.models import InitializationOptions + +# Configure logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +# Create server instance +server = Server("checkov-mcp") + + +@server.list_tools() +async def handle_list_tools() -> List[types.Tool]: + """ + List available tools. + """ + return [ + types.Tool( + name="scan_kubernetes_manifests", + description="Scan Kubernetes manifests for security issues using Checkov", + inputSchema={ + "type": "object", + "properties": { + "manifest_content": { + "type": "string", + "description": "The content of the Kubernetes manifest(s) to scan" + } + }, + "required": ["manifest_content"] + } + ) + ] + + +@server.call_tool() +async def handle_call_tool( + name: str, arguments: Dict[str, Any] | None +) -> List[types.TextContent | types.ImageContent | types.EmbeddedResource]: + """ + Handle tool calls. + """ + if name != "scan_kubernetes_manifests": + raise ValueError(f"Unknown tool: {name}") + + if not arguments: + raise ValueError("Missing arguments") + + manifest_content = arguments.get("manifest_content") + if not manifest_content: + raise ValueError("Missing manifest_content argument") + + try: + # Create a temporary file to hold the manifest content + with tempfile.NamedTemporaryFile(mode='w', suffix='.yaml', delete=False) as temp_file: + temp_file.write(manifest_content) + temp_file_path = temp_file.name + + try: + # Run checkov on the manifest file + process = await asyncio.create_subprocess_exec( + "checkov", + "-f", temp_file_path, + "--quiet", # Reduce verbosity + "--output", "json", # Get JSON output for easier parsing + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + + stdout, stderr = await process.communicate() + + if process.returncode not in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]: # Checkov returns various codes + # Some non-zero codes are expected (findings, etc.) + pass + + result = stdout.decode() + if stderr: + result += "\nSTDERR:\n" + stderr.decode() + + # If checkov is not found, we'll get an error from the subprocess + if not result.strip() and process.returncode == 127: # command not found typically returns 127 + result = "Error: Checkov command not found. Please install checkov." + + return [ + types.TextContent( + type="text", + text=result + ) + ] + finally: + # Clean up the temporary file + os.unlink(temp_file_path) + except FileNotFoundError: + logger.error("Checkov command not found. Please ensure checkov is installed and in PATH.") + return [ + types.TextContent( + type="text", + text="Error: Checkov command not found. Please install checkov." + ) + ] + except Exception as e: + logger.exception("Error running checkov") + return [ + types.TextContent( + type="text", + text=f"Error running checkov: {str(e)}" + ) + ] + + +async def main(): + """ + Run the MCP server. + """ + # Run the server using stdio + async with mcp.server.stdio.stdio_server() as (read_stream, write_stream): + await server.run( + read_stream, + write_stream, + InitializationOptions( + server_name="checkov-mcp", + server_version="0.1.0", + capabilities=server.get_capabilities( + notification_options=NotificationOptions(), + experimental_capabilities={}, + ), + ), + ) + + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/mcp_servers/hadolint_mcp.py b/mcp_servers/hadolint_mcp.py new file mode 100644 index 0000000..1ef9bca --- /dev/null +++ b/mcp_servers/hadolint_mcp.py @@ -0,0 +1,133 @@ +#!/usr/bin/env python3 +""" +MCP server for Hadolint Dockerfile linter. +""" +import asyncio +import json +import logging +import subprocess +import sys +from typing import Any, Dict, List + +import mcp.server.stdio +import mcp.types as types +from mcp.server import NotificationOptions, Server +from mcp.server.models import InitializationOptions + +# Configure logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +# Create server instance +server = Server("hadolint-mcp") + + +@server.list_tools() +async def handle_list_tools() -> List[types.Tool]: + """ + List available tools. + """ + return [ + types.Tool( + name="lint_dockerfile", + description="Lint a Dockerfile using Hadolint", + inputSchema={ + "type": "object", + "properties": { + "dockerfile_content": { + "type": "string", + "description": "The content of the Dockerfile to lint" + } + }, + "required": ["dockerfile_content"] + } + ) + ] + + +@server.call_tool() +async def handle_call_tool( + name: str, arguments: Dict[str, Any] | None +) -> List[types.TextContent | types.ImageContent | types.EmbeddedResource]: + """ + Handle tool calls. + """ + if name != "lint_dockerfile": + raise ValueError(f"Unknown tool: {name}") + + if not arguments: + raise ValueError("Missing arguments") + + dockerfile_content = arguments.get("dockerfile_content") + if not dockerfile_content: + raise ValueError("Missing dockerfile_content argument") + + try: + # Run hadolint on the Dockerfile content + process = await asyncio.create_subprocess_exec( + "hadolint", + "-", # Read from stdin + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + + stdout, stderr = await process.communicate(input=dockerfile_content.encode()) + + if process.returncode != 0: + # Hadolint returns non-zero for linting errors, which is expected + # We still return the output as the result + result = stdout.decode() + stderr.decode() + else: + result = stdout.decode() + + # If no output, hadolint passed with no issues + if not result.strip(): + result = "Hadolint: No issues found." + + return [ + types.TextContent( + type="text", + text=result + ) + ] + except FileNotFoundError: + logger.error("Hadolint command not found. Please ensure hadolint is installed and in PATH.") + return [ + types.TextContent( + type="text", + text="Error: Hadolint command not found. Please install hadolint." + ) + ] + except Exception as e: + logger.exception("Error running hadolint") + return [ + types.TextContent( + type="text", + text=f"Error running hadolint: {str(e)}" + ) + ] + + +async def main(): + """ + Run the MCP server. + """ + # Run the server using stdio + async with mcp.server.stdio.stdio_server() as (read_stream, write_stream): + await server.run( + read_stream, + write_stream, + InitializationOptions( + server_name="hadolint-mcp", + server_version="0.1.0", + capabilities=server.get_capabilities( + notification_options=NotificationOptions(), + experimental_capabilities={}, + ), + ), + ) + + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..e77fd6f --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,71 @@ +[project] +name = "pr-reviewer" +version = "0.1.0" +description = "A PR Reviewer system using CrewAI and MCP" +readme = "README.md" +requires-python = ">=3.10,<3.14" +authors = [ + {name = "Developer", email = "dev@example.com"} +] +keywords = ["pull-request", "code-review", "security", "infrastructure", "crewai", "mcp"] +classifiers = [ + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", +] +dependencies = [ + "crewai>=0.28.0", + "fastapi>=0.104.0", + "uvicorn>=0.24.0", + "mcp>=0.1.0", + "pydantic>=2.5.0", + "python-dotenv>=1.0.0", + "gitpython>=3.1.0" +] + +[project.optional-dependencies] +anthropic = ["anthropic>=0.7.0"] +openai = ["openai>=1.0.0"] +ollama = [] +dev = [ + "pytest>=7.0.0", + "black>=22.0.0", + "flake8>=4.0.0", + "mypy>=0.9.0", +] + +[project.urls] +Homepage = "https://github.com/your-org/pr-reviewer" +Documentation = "https://github.com/your-org/pr-reviewer#readme" +Repository = "https://github.com/your-org/pr-reviewer.git" + +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[tool.black] +line-length = 88 +target-version = ['py310'] +include = '\.py$' +exclude = ''' +/(\.git +| \.hg +| \.mypy_cache +| \.tox +| \.venv +| _build +| buck-out +| build +| dist +)/ +''' + +[tool.mypy] +python_version = "3.10" +warn_return_any = true +warn_unused_configs = true diff --git a/src/pr_reviewer/__init__.py b/src/pr_reviewer/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/pr_reviewer/__pycache__/__init__.cpython-314.pyc b/src/pr_reviewer/__pycache__/__init__.cpython-314.pyc new file mode 100644 index 0000000..72c67ff Binary files /dev/null and b/src/pr_reviewer/__pycache__/__init__.cpython-314.pyc differ diff --git a/src/pr_reviewer/__pycache__/context.cpython-314.pyc b/src/pr_reviewer/__pycache__/context.cpython-314.pyc new file mode 100644 index 0000000..2ddf2d8 Binary files /dev/null and b/src/pr_reviewer/__pycache__/context.cpython-314.pyc differ diff --git a/src/pr_reviewer/__pycache__/flow.cpython-314.pyc b/src/pr_reviewer/__pycache__/flow.cpython-314.pyc new file mode 100644 index 0000000..66212c3 Binary files /dev/null and b/src/pr_reviewer/__pycache__/flow.cpython-314.pyc differ diff --git a/src/pr_reviewer/__pycache__/llm.cpython-314.pyc b/src/pr_reviewer/__pycache__/llm.cpython-314.pyc new file mode 100644 index 0000000..82ee3b7 Binary files /dev/null and b/src/pr_reviewer/__pycache__/llm.cpython-314.pyc differ diff --git a/src/pr_reviewer/__pycache__/main.cpython-314.pyc b/src/pr_reviewer/__pycache__/main.cpython-314.pyc new file mode 100644 index 0000000..00db7c2 Binary files /dev/null and b/src/pr_reviewer/__pycache__/main.cpython-314.pyc differ diff --git a/src/pr_reviewer/__pycache__/state.cpython-314.pyc b/src/pr_reviewer/__pycache__/state.cpython-314.pyc new file mode 100644 index 0000000..c2ff1bc Binary files /dev/null and b/src/pr_reviewer/__pycache__/state.cpython-314.pyc differ diff --git a/src/pr_reviewer/context.py b/src/pr_reviewer/context.py new file mode 100644 index 0000000..d303370 --- /dev/null +++ b/src/pr_reviewer/context.py @@ -0,0 +1,45 @@ +import os +from pathlib import Path +from typing import Dict, Optional +from .state import PRReviewState, ContextOverrides + + +def resolve_context(state: PRReviewState) -> Dict[str, str]: + """ + Resolve the context for each review type based on overrides and default files. + + Args: + state: The PR review state containing potential context overrides + + Returns: + A dictionary with keys 'code_review', 'security_review', 'infra_review' + and their resolved context strings. + """ + # Define the mapping of context types to their default file paths + context_mapping = { + 'code_review': 'contexts/defaults/code_review.md', + 'security_review': 'contexts/defaults/security_review.md', + 'infra_review': 'contexts/defaults/infra_review.md' + } + + resolved = {} + + for context_type, default_path in context_mapping.items(): + # Check if there's an override in the state + override_value = None + if state.context_overrides: + override_value = getattr(state.context_overrides, context_type, None) + + if override_value is not None and override_value.strip() != '': + # Use the override if provided and not empty + resolved[context_type] = override_value.strip() + else: + # Use the default file + try: + with open(default_path, 'r') as f: + resolved[context_type] = f.read().strip() + except FileNotFoundError: + # If the default file doesn't exist, use an empty string + resolved[context_type] = '' + + return resolved diff --git a/src/pr_reviewer/flow.py b/src/pr_reviewer/flow.py new file mode 100644 index 0000000..c7b975b --- /dev/null +++ b/src/pr_reviewer/flow.py @@ -0,0 +1,150 @@ +from crewai.flow import Flow, listen, start, and_ +from crewai import Crew +from .state import PRReviewState +from .llm import get_llm +from .context import resolve_context +import os +from datetime import datetime + +# Import the crews +from crews.code_review_crew.code_review_crew import CodeReviewCrew +from crews.security_review_crew.security_review_crew import SecurityReviewCrew +from crews.infra_review_crew.infra_review_crew import InfraReviewCrew +from crews.summariser_crew.summariser_crew import SummariserCrew + + +class CodeReviewFlow(Flow[PRReviewState]): + + @start() + def receive_pr(self, inputs): + """Initialize the PR review state with input data.""" + print(f"Received PR review request for PR #{inputs.get('pr_id')}") + + # Initialize the state + self.state.pr_id = inputs.get("pr_id", "") + self.state.pr_title = inputs.get("pr_title", "") + self.state.pr_description = inputs.get("pr_description", "") + self.state.pr_url = inputs.get("pr_url", "") + self.state.repo_name = inputs.get("repo_name", "") + self.state.repo_url = inputs.get("repo_url", "") + self.state.branch = inputs.get("branch", "") + self.state.base_branch = inputs.get("base_branch", "") + # Convert files from list of dicts to list of FileInfo objects if needed + files_input = inputs.get("files", []) + if files_input and isinstance(files_input[0], dict): + # Convert dicts to FileInfo objects + from .state import FileInfo + self.state.files = [FileInfo(**file_dict) for file_dict in files_input] + else: + self.state.files = files_input + + # Handle context_overrides + context_overrides_input = inputs.get("context_overrides") + if context_overrides_input and isinstance(context_overrides_input, dict): + from .state import ContextOverrides + self.state.context_overrides = ContextOverrides(**context_overrides_input) + else: + self.state.context_overrides = context_overrides_input + + self.state.started_at = datetime.now() + + # Resolve context + self.state.resolved_context = resolve_context(self.state) + + return self.state + + @listen(receive_pr) + def run_code_review(self): + """Run the code review crew.""" + print("Starting code review...") + + # Instantiate and run the code review crew + code_crew = CodeReviewCrew() + # The crew's kickoff method expects inputs matching the task template variables + inputs = { + "pr_title": self.state.pr_title, + "pr_description": self.state.pr_description, + "files": [file.dict() if hasattr(file, 'dict') else file for file in self.state.files], + "context": self.state.resolved_context.get("code_review", "") + } + result = code_crew.crew().kickoff(inputs=inputs) + self.state.code_review_results = str(result) + print("Code review completed.") + + return self.state + + @listen(receive_pr) + def run_security_review(self): + """Run the security review crew.""" + print("Starting security review...") + + # Instantiate and run the security review crew + security_crew = SecurityReviewCrew() + inputs = { + "pr_title": self.state.pr_title, + "pr_description": self.state.pr_description, + "files": [file.dict() if hasattr(file, 'dict') else file for file in self.state.files], + "context": self.state.resolved_context.get("security_review", "") + } + result = security_crew.crew().kickoff(inputs=inputs) + self.state.security_review_results = str(result) + print("Security review completed.") + + return self.state + + @listen(receive_pr) + def run_infra_review(self): + """Run the infrastructure review crew.""" + print("Starting infrastructure review...") + + # Instantiate and run the infrastructure review crew + infra_crew = InfraReviewCrew() + inputs = { + "pr_title": self.state.pr_title, + "pr_description": self.state.pr_description, + "files": [file.dict() if hasattr(file, 'dict') else file for file in self.state.files], + "context": self.state.resolved_context.get("infra_review", "") + } + result = infra_crew.crew().kickoff(inputs=inputs) + self.state.infra_review_results = str(result) + print("Infrastructure review completed.") + + return self.state + + @listen(and_(run_code_review, run_security_review, run_infra_review)) + def summarise(self): + """Summarize the review results.""" + print("Starting summarisation...") + + # Instantiate and run the summariser crew + summariser_crew = SummariserCrew() + inputs = { + "code_review_results": self.state.code_review_results, + "security_review_results": self.state.security_review_results, + "infra_review_results": self.state.infra_review_results, + "context": self.state.resolved_context + } + result = summariser_crew.crew().kickoff(inputs=inputs) + self.state.review_summary = str(result) + self.state.completed_at = datetime.now() + print("Summarisation completed.") + + return self.state + + @listen(summarise) + def format_response(self): + """Format the final response.""" + print("Formatting final response...") + + # Return the final state as the response + return { + "pr_id": self.state.pr_id, + "pr_title": self.state.pr_title, + "review_summary": self.state.review_summary, + "code_review_results": self.state.code_review_results, + "security_review_results": self.state.security_review_results, + "infra_review_results": self.state.infra_review_results, + "started_at": self.state.started_at.isoformat() if self.state.started_at else None, + "completed_at": self.state.completed_at.isoformat() if self.state.completed_at else None, + "error": self.state.error + } \ No newline at end of file diff --git a/src/pr_reviewer/llm.py b/src/pr_reviewer/llm.py new file mode 100644 index 0000000..624620f --- /dev/null +++ b/src/pr_reviewer/llm.py @@ -0,0 +1,56 @@ +import os +from crewai import LLM +from typing import Optional + + +def create_llm() -> LLM: + """ + Create an LLM instance based on environment variables. + + Expected environment variables: + - LLM_MODEL: The model name to use (required) + - LLM_BASE_URL: The base URL for the LLM API (optional) + - LLM_API_KEY: The API key for the LLM service (optional) + - LLM_PROVIDER: The provider (e.g., 'openai', 'anthropic', 'ollama') (optional) + + Returns: + LLM: A CrewAI LLM instance + """ + model = os.getenv("LLM_MODEL") + if not model: + raise ValueError("LLM_MODEL environment variable is required") + + base_url = os.getenv("LLM_BASE_URL") + api_key = os.getenv("LLM_API_KEY") + provider = os.getenv("LLM_PROVIDER") + + # Prepare LLM configuration + llm_config = { + "model": model, + } + + if base_url: + llm_config["base_url"] = base_url + if api_key: + llm_config["api_key"] = api_key + if provider: + llm_config["provider"] = provider + + return LLM(**llm_config) + + +# Shared LLM singleton +_shared_llm: Optional[LLM] = None + + +def get_llm() -> LLM: + """ + Get the shared LLM singleton instance. + + Returns: + LLM: The shared LLM instance + """ + global _shared_llm + if _shared_llm is None: + _shared_llm = create_llm() + return _shared_llm \ No newline at end of file diff --git a/src/pr_reviewer/main.py b/src/pr_reviewer/main.py new file mode 100644 index 0000000..f1082a6 --- /dev/null +++ b/src/pr_reviewer/main.py @@ -0,0 +1,297 @@ +import logging +from fastapi import FastAPI, HTTPException, Request +from fastapi.responses import JSONResponse +import uvicorn +from typing import Dict, Any, List, Optional +import asyncio +from concurrent.futures import ThreadPoolExecutor, TimeoutError as FutureTimeoutError +import time +import uuid + +from .flow import CodeReviewFlow +from .state import PRReviewState, FileInfo, ContextOverrides + +# Configure logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +app = FastAPI( + title="PR Reviewer API", + description="API for conducting automated pull request reviews", + version="0.1.0" +) + + +# Configuration for timeouts +TOTAL_FLOW_TIMEOUT = int(os.getenv("TOTAL_FLOW_TIMEOUT", "600")) # Default 10 minutes +PER_CREW_TIMEOUT = int(os.getenv("PER_CREW_TIMEOUT", "300")) # Default 5 minutes + + +@app.get("/api/v1/health") +async def health_check() -> Dict[str, str]: + """ + Health check endpoint to verify the service is running. + """ + return {"status": "healthy", "service": "pr-reviewer"} + + +@app.post("/api/v1/review") +async def review_pr(request: Request) -> Dict[str, Any]: + """ + Endpoint to trigger a PR review. + Implements the full request/response schema as specified in Section 2.2.1. + """ + start_time = time.time() + review_id = str(uuid.uuid4()) + + # Log the incoming request (excluding sensitive data) + logger.info(f"Received PR review request: {review_id}") + + try: + # Parse the JSON payload + try: + payload = await request.json() + except Exception: + raise HTTPException(status_code=422, detail="Invalid JSON payload") + + # Validate and extract required fields according to the API specification + # Request schema: + # { + # "pr_id": "string (required)", + # "title": "string (required)", + # "description": "string (optional)", + # "repo": { + # "name": "string (required)", + # "url": "string (required)" + # }, + # "source": { + # "branch": "string (required)", + # "commit": "string (optional)" + # }, + # "target": { + # "branch": "string (required)", + # "commit": "string (optional)" + # }, + # "files": [ + # { + # "path": "string (required)", + # "content": "string (optional)", + # "status": "string (required)", + # "additions": "integer (optional, default 0)", + # "deletions": "integer (optional, default 0)", + # "patch": "string (optional)" + # } + # ], + # "context": { + # "code_review": "string (optional)", + # "security_review": "string (optional)", + # "infra_review": "string (optional)" + # } + # } + + # Extract top-level fields + pr_id = payload.get("pr_id") + title = payload.get("title") + description = payload.get("description") + + # Extract repo information + repo_data = payload.get("repo", {}) + repo_name = repo_data.get("name") + repo_url = repo_data.get("url") + + # Extract source information + source_data = payload.get("source", {}) + source_branch = source_data.get("branch") + source_commit = source_data.get("commit") + + # Extract target information + target_data = payload.get("target", {}) + target_branch = target_data.get("branch") + target_commit = target_data.get("commit") + + # Extract files + files_data = payload.get("files", []) + + # Extract context overrides + context_data = payload.get("context", {}) + + # Validate required fields + if not pr_id: + raise HTTPException(status_code=422, detail="Missing required field: pr_id") + if not title: + raise HTTPException(status_code=422, detail="Missing required field: title") + if not repo_name: + raise HTTPException(status_code=422, detail="Missing required field: repo.name") + if not repo_url: + raise HTTPException(status_code=422, detail="Missing required field: repo.url") + if not source_branch: + raise HTTPException(status_code=422, detail="Missing required field: source.branch") + if not target_branch: + raise HTTPException(status_code=422, detail="Missing required field: target.branch") + + # Convert files data to FileInfo objects + files = [] + for file_data in files_data: + if not file_data.get("path"): + raise HTTPException(status_code=422, detail="Missing required field: files[].path") + if not file_data.get("status"): + raise HTTPException(status_code=422, detail="Missing required field: files[].status") + + file_info = FileInfo( + path=file_data.get("path", ""), + content=file_data.get("content"), + status=file_data.get("status", "modified"), + additions=file_data.get("additions", 0), + deletions=file_data.get("deletions", 0), + patch=file_data.get("patch") + ) + files.append(file_info) + + # Create context overrides object (only if at least one field is provided) + context_overrides = None + if any([context_data.get("code_review"), + context_data.get("security_review"), + context_data.get("infra_review")]): + context_overrides = ContextOverrides( + code_review=context_data.get("code_review"), + security_review=context_data.get("security_review"), + infra_review=context_data.get("infra_review") + ) + + # Initialize and run the flow with timeout + flow = CodeReviewFlow() + + # Run the flow in a thread pool with timeout to avoid blocking the event loop + loop = asyncio.get_event_loop() + with ThreadPoolExecutor() as pool: + try: + # Wait for the flow to complete with a timeout + flow_result = await asyncio.wait_for( + loop.run_in_executor( + pool, + lambda: flow.kickoff(inputs={ + "pr_id": pr_id, + "pr_title": title, + "pr_description": description, + "pr_url": f"{repo_url}/pull/{pr_id}", # Construct PR URL + "repo_name": repo_name, + "repo_url": repo_url, + "branch": source_branch, # Using source branch as the active branch + "base_branch": target_branch, # Using target branch as base + "files": [file.dict() for file in files], # Convert to dict for flow + "context_overrides": context_overrides.dict() if context_overrides else None + }) + ), + timeout=TOTAL_FLOW_TIMEOUT + ) + except asyncio.TimeoutError: + logger.error(f"PR review timed out: {review_id}") + raise HTTPException( + status_code=504, + detail=f"PR review timed out after {TOTAL_FLOW_TIMEOUT} seconds" + ) + + # Calculate processing time + processing_time = time.time() - start_time + + # Prepare response according to the API specification: + # { + # "review_id": "string (unique identifier for this review)", + # "status": "string (\"completed\", \"failed\", etc.)", + # "timestamp": "string (ISO 8601 timestamp)", + # "results": { + # "code_review": "string (results from code review)", + # "security_review": "string (results from security review)", + # "infra_review": "string (results from infrastructure review)", + # "summary": "string (synthesized review summary)" + # }, + # "metadata": { + # "processing_time_seconds": "number", + # "pr_id": "string", + # "repo": { + # "name": "string", + # "url": "string" + # } + # } + # } + + response = { + "review_id": review_id, + "status": "completed" if not flow_result.get("error") else "failed", + "timestamp": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()), + "results": { + "code_review": flow_result.get("code_review_results"), + "security_review": flow_result.get("security_review_results"), + "infra_review": flow_result.get("infra_review_results"), + "summary": flow_result.get("review_summary") + }, + "metadata": { + "processing_time_seconds": round(processing_time, 2), + "pr_id": pr_id, + "repo": { + "name": repo_name, + "url": repo_url + } + } + } + + # Include error information if present + if flow_result.get("error"): + response["metadata"]["error"] = flow_result["error"] + logger.error(f"PR review failed: {review_id} - {flow_result['error']}") + else: + logger.info(f"PR review completed successfully: {review_id} in {processing_time:.2f}s") + + return response + + except HTTPException: + # Re-raise HTTP exceptions as they are already properly formatted + raise + except asyncio.TimeoutError: + # This should be caught by the wait_for above, but just in case + logger.error(f"PR review timed out: {review_id}") + raise HTTPException( + status_code=504, + detail=f"PR review timed out after {TOTAL_FLOW_TIMEOUT} seconds" + ) + except Exception as e: + # Log the error for debugging + logger.error(f"Error in PR review: {review_id} - {str(e)}") + raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}") + + +# Error handlers +@app.exception_handler(404) +async def not_found_handler(request: Request, exc: HTTPException): + return JSONResponse( + status_code=404, + content={"message": "Endpoint not found"} + ) + + +@app.exception_handler(422) +async def request_validation_exception_handler(request: Request, exc: HTTPException): + return JSONResponse( + status_code=422, + content={"message": "Invalid request payload", "details": exc.detail} + ) + + +@app.exception_handler(500) +async def internal_error_handler(request: Request, exc: HTTPException): + return JSONResponse( + status_code=500, + content={"message": "Internal server error"} + ) + + +@app.exception_handler(504) +async def timeout_handler(request: Request, exc: HTTPException): + return JSONResponse( + status_code=504, + content={"message": "Request timeout", "details": exc.detail} + ) + + +if __name__ == "__main__": + uvicorn.run(app, host="0.0.0.0", port=8000) \ No newline at end of file diff --git a/src/pr_reviewer/state.py b/src/pr_reviewer/state.py new file mode 100644 index 0000000..17f5469 --- /dev/null +++ b/src/pr_reviewer/state.py @@ -0,0 +1,45 @@ +from pydantic import BaseModel, Field +from typing import List, Optional, Dict, Any +from datetime import datetime + + +class FileInfo(BaseModel): + """Information about a file in the PR.""" + path: str + content: Optional[str] = None + status: str # added, modified, removed, etc. + additions: int = 0 + deletions: int = 0 + patch: Optional[str] = None + + +class ContextOverrides(BaseModel): + """Overrides for the default context.""" + code_review: Optional[str] = None + security_review: Optional[str] = None + infra_review: Optional[str] = None + + +class PRReviewState(BaseModel): + """State of the PR review process.""" + # Input fields + pr_id: str + pr_title: str + pr_description: Optional[str] = None + pr_url: Optional[str] = None + repo_name: str + repo_url: str + branch: str + base_branch: str + files: List[FileInfo] = Field(default_factory=list) + context_overrides: Optional[ContextOverrides] = None + # Internal fields + resolved_context: Optional[Dict[str, str]] = None + code_review_results: Optional[str] = None + security_review_results: Optional[str] = None + infra_review_results: Optional[str] = None + review_summary: Optional[str] = None + # Metadata + started_at: Optional[datetime] = None + completed_at: Optional[datetime] = None + error: Optional[str] = None \ No newline at end of file diff --git a/tests/integration/__pycache__/test_mcp_servers.cpython-314-pytest-9.0.3.pyc b/tests/integration/__pycache__/test_mcp_servers.cpython-314-pytest-9.0.3.pyc new file mode 100644 index 0000000..7d0c4a9 Binary files /dev/null and b/tests/integration/__pycache__/test_mcp_servers.cpython-314-pytest-9.0.3.pyc differ diff --git a/tests/integration/test_mcp_servers.py b/tests/integration/test_mcp_servers.py new file mode 100644 index 0000000..8809efb --- /dev/null +++ b/tests/integration/test_mcp_servers.py @@ -0,0 +1,70 @@ +""" +Integration tests for MCP servers. +""" +import pytest +import sys +import os + +# Add the project root to the path so we can import the MCP servers +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..')) + + +def test_hadolint_mcp_import(): + """Test that the Hadolint MCP server can be imported.""" + try: + from mcp_servers.hadolint_mcp import server + assert server is not None + except ImportError as e: + pytest.fail(f"Failed to import Hadolint MCP server: {e}") + + +def test_checkov_mcp_import(): + """Test that the Checkov MCP server can be imported.""" + try: + from mcp_servers.checkov_mcp import server + assert server is not None + except ImportError as e: + pytest.fail(f"Failed to import Checkov MCP server: {e}") + + +def test_crew_imports(): + """Test that all crew modules can be imported.""" + try: + from crews.code_review_crew.code_review_crew import CodeReviewCrew + from crews.security_review_crew.security_review_crew import SecurityReviewCrew + from crews.infra_review_crew.infra_review_crew import InfraReviewCrew + from crews.summariser_crew.summariser_crew import SummariserCrew + + # Test that we can instantiate the crews + code_crew = CodeReviewCrew() + security_crew = SecurityReviewCrew() + infra_crew = InfraReviewCrew() + summariser_crew = SummariserCrew() + + assert code_crew is not None + assert security_crew is not None + assert infra_crew is not None + assert summariser_crew is not None + except ImportError as e: + pytest.fail(f"Failed to import crew modules: {e}") + except Exception as e: + pytest.fail(f"Failed to instantiate crews: {e}") + + +def test_flow_import(): + """Test that the flow module can be imported.""" + try: + from src.pr_reviewer.flow import CodeReviewFlow + flow = CodeReviewFlow() + assert flow is not None + except ImportError as e: + pytest.fail(f"Failed to import flow module: {e}") + + +def test_main_import(): + """Test that the main module can be imported.""" + try: + from src.pr_reviewer.main import app + assert app is not None + except ImportError as e: + pytest.fail(f"Failed to import main module: {e}") \ No newline at end of file diff --git a/tests/unit/__pycache__/test_context.cpython-314-pytest-9.0.3.pyc b/tests/unit/__pycache__/test_context.cpython-314-pytest-9.0.3.pyc new file mode 100644 index 0000000..91c5270 Binary files /dev/null and b/tests/unit/__pycache__/test_context.cpython-314-pytest-9.0.3.pyc differ diff --git a/tests/unit/__pycache__/test_state.cpython-314-pytest-9.0.3.pyc b/tests/unit/__pycache__/test_state.cpython-314-pytest-9.0.3.pyc new file mode 100644 index 0000000..ca21ced Binary files /dev/null and b/tests/unit/__pycache__/test_state.cpython-314-pytest-9.0.3.pyc differ diff --git a/tests/unit/test_context.py b/tests/unit/test_context.py new file mode 100644 index 0000000..e330543 --- /dev/null +++ b/tests/unit/test_context.py @@ -0,0 +1,100 @@ +""" +Unit tests for the context resolution system. +""" +import pytest +import tempfile +import os +from src.pr_reviewer.context import resolve_context +from src.pr_reviewer.state import PRReviewState, ContextOverrides + + +def test_resolve_context_with_defaults(): + """Test resolving context when default files exist.""" + # Create temporary directory structure + with tempfile.TemporaryDirectory() as temp_dir: + # Create contexts/defaults directory + defaults_dir = os.path.join(temp_dir, "contexts", "defaults") + os.makedirs(defaults_dir) + + # Create default context files + with open(os.path.join(defaults_dir, "code_review.md"), "w") as f: + f.write("# Code Review\n\nFollow PEP8.") + + with open(os.path.join(defaults_dir, "security_review.md"), "w") as f: + f.write("# Security Review\n\nCheck for SQL injection.") + + with open(os.path.join(defaults_dir, "infra_review.md"), "w") as f: + f.write("# Infra Review\n\nValidate Dockerfile.") + + # Change to temp directory + original_cwd = os.getcwd() + os.chdir(temp_dir) + + try: + # Create state without overrides + state = PRReviewState( + pr_id="123", + pr_title="Test PR", + repo_name="test-repo", + repo_url="https://github.com/user/test-repo", + branch="main", + base_branch="main" + ) + + # Resolve context + context = resolve_context(state) + + # Assertions + assert context["code_review"] == "# Code Review\n\nFollow PEP8." + assert context["security_review"] == "# Security Review\n\nCheck for SQL injection." + assert context["infra_review"] == "# Infra Review\n\nValidate Dockerfile." + finally: + os.chdir(original_cwd) + + +def test_resolve_context_with_overrides(): + """Test resolving context when overrides are provided.""" + # Create state with context overrides + state = PRReviewState( + pr_id="123", + pr_title="Test PR", + repo_name="test-repo", + repo_url="https://github.com/user/test-repo", + branch="main", + base_branch="main", + context_overrides=ContextOverrides( + code_review="Custom code review guidelines", + security_review="Custom security guidelines" + # infra_review is not provided, should use default if file exists + ) + ) + + # Since we don't have default files in this test, infra_review should be empty + context = resolve_context(state) + + # Assertions + assert context["code_review"] == "Custom code review guidelines" + assert context["security_review"] == "Custom security guidelines" + assert context["infra_review"] == "" # Empty because no default file and no override + + +def test_resolve_context_empty_state(): + """Test resolving context with minimal state.""" + state = PRReviewState( + pr_id="123", + pr_title="Test PR", + repo_name="test-repo", + repo_url="https://github.com/user/test-repo", + branch="main", + base_branch="main" + ) + + # Without default files in the current directory, all contexts should be empty + # But since we have default files in the project, we need to check what they contain + context = resolve_context(state) + + # The function should return the content of the default files + # We're just checking that it returns strings (could be empty or contain default content) + assert isinstance(context["code_review"], str) + assert isinstance(context["security_review"], str) + assert isinstance(context["infra_review"], str) \ No newline at end of file diff --git a/tests/unit/test_state.py b/tests/unit/test_state.py new file mode 100644 index 0000000..8c21538 --- /dev/null +++ b/tests/unit/test_state.py @@ -0,0 +1,69 @@ +""" +Unit tests for the state management module. +""" +import pytest +from src.pr_reviewer.state import FileInfo, ContextOverrides, PRReviewState +from datetime import datetime + + +def test_file_info_creation(): + """Test creating a FileInfo instance.""" + file_info = FileInfo( + path="src/main.py", + content="print('hello')", + status="modified", + additions=5, + deletions=2 + ) + + assert file_info.path == "src/main.py" + assert file_info.content == "print('hello')" + assert file_info.status == "modified" + assert file_info.additions == 5 + assert file_info.deletions == 2 + + +def test_context_overrides_creation(): + """Test creating a ContextOverrides instance.""" + context_overrides = ContextOverrides( + code_review="Follow PEP8", + security_review="Check for SQL injection", + infra_review="Validate Dockerfile" + ) + + assert context_overrides.code_review == "Follow PEP8" + assert context_overrides.security_review == "Check for SQL injection" + assert context_overrides.infra_review == "Validate Dockerfile" + + +def test_context_overrides_partial(): + """Test creating a ContextOverrides instance with partial fields.""" + context_overrides = ContextOverrides( + code_review="Follow PEP8" + ) + + assert context_overrides.code_review == "Follow PEP8" + assert context_overrides.security_review is None + assert context_overrides.infra_review is None + + +def test_pr_review_state_creation(): + """Test creating a PRReviewState instance.""" + state = PRReviewState( + pr_id="123", + pr_title="Add feature", + repo_name="test-repo", + repo_url="https://github.com/user/test-repo", + branch="feature", + base_branch="main" + ) + + assert state.pr_id == "123" + assert state.pr_title == "Add feature" + assert state.repo_name == "test-repo" + assert state.repo_url == "https://github.com/user/test-repo" + assert state.branch == "feature" + assert state.base_branch == "main" + assert state.files == [] + assert state.started_at is None + assert state.completed_at is None \ No newline at end of file diff --git a/tools/__init__.py b/tools/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tools/__pycache__/__init__.cpython-314.pyc b/tools/__pycache__/__init__.cpython-314.pyc new file mode 100644 index 0000000..1a57492 Binary files /dev/null and b/tools/__pycache__/__init__.cpython-314.pyc differ diff --git a/tools/__pycache__/git_tool.cpython-314.pyc b/tools/__pycache__/git_tool.cpython-314.pyc new file mode 100644 index 0000000..f781786 Binary files /dev/null and b/tools/__pycache__/git_tool.cpython-314.pyc differ diff --git a/tools/git_tool.py b/tools/git_tool.py new file mode 100644 index 0000000..c4fa835 --- /dev/null +++ b/tools/git_tool.py @@ -0,0 +1,57 @@ +from pydantic import BaseModel, Field +from crewai_tools import BaseTool +import git +import os +import shutil +from typing import Type + + +class GitCloneInput(BaseModel): + """Input schema for GitTool.""" + repo_url: str = Field(..., description="URL of the Git repository to clone") + branch: str = Field(None, description="Branch to checkout (optional)") + commit: str = Field(None, description="Commit hash to checkout (optional)") + target_dir: str = Field(None, description="Target directory to clone into (optional)") + + +class GitTool(BaseTool): + name: str = "GitTool" + description: str = "Clones a Git repository and checks out a specific branch or commit" + args_schema: Type[BaseModel] = GitCloneInput + + def _run(self, repo_url: str, branch: str = None, commit: str = None, target_dir: str = None) -> str: + """ + Clone a Git repository and checkout a specific branch or commit. + + Args: + repo_url: URL of the Git repository to clone + branch: Branch to checkout (optional) + commit: Commit hash to checkout (optional) + target_dir: Target directory to clone into (optional) + + Returns: + A message indicating the result of the operation. + """ + # If target_dir is not provided, extract the repository name from the URL + if target_dir is None: + target_dir = repo_url.split("/")[-1] + if target_dir.endswith(".git"): + target_dir = target_dir[:-4] + + # Remove the target directory if it already exists to avoid conflicts + if os.path.exists(target_dir): + shutil.rmtree(target_dir) + + try: + # Clone the repository + repo = git.Repo.clone_from(repo_url, target_dir) + + # Checkout the specified branch or commit + if branch: + repo.git.checkout(branch) + elif commit: + repo.git.checkout(commit) + + return f"Successfully cloned {repo_url} into {target_dir} and checked out {'branch: ' + branch if branch else 'commit: ' + commit if commit else 'default branch'}" + except Exception as e: + return f"Error cloning repository: {str(e)}" \ No newline at end of file