From bf6fe21ea601147d0969a28fcb4ecb33384f76e9 Mon Sep 17 00:00:00 2001 From: Andrew Ridgway Date: Tue, 19 May 2026 23:24:27 +1000 Subject: [PATCH] first build attempt --- .env.example | 29 +- .gitea/workflows/build_push.yml | 12 +- .gitignore | 5 + Dockerfile | 74 +++-- README.md | 284 +++++++++--------- config/__init__.py | 0 config/agents.yaml | 7 - config/tasks.yaml | 16 - .../code_review_crew.cpython-314.pyc | Bin 3232 -> 3360 bytes crews/code_review_crew/code_review_crew.py | 30 +- crews/infra_review_crew/infra_review_crew.py | 45 +-- .../security_review_crew.py | 24 +- crews/summariser_crew/summariser_crew.py | 19 +- docker-compose.yaml | 16 + kube/pr-reviewer_deployment.yaml | 14 +- kube/pr-reviewer_pod.yaml | 13 - pyproject.toml | 3 +- simple_test.py | 118 -------- .../__pycache__/flow.cpython-314.pyc | Bin 9040 -> 13393 bytes .../__pycache__/main.cpython-314.pyc | Bin 11201 -> 11851 bytes src/pr_reviewer/flow.py | 65 +--- src/pr_reviewer/main.py | 37 ++- src/pr_reviewer/state.py | 12 +- start.sh | 12 - test_docker.py | 142 --------- tests/integration/test_api.py | 31 ++ tests/integration/test_full_review.py | 75 +++++ 27 files changed, 449 insertions(+), 634 deletions(-) delete mode 100644 config/__init__.py delete mode 100644 config/agents.yaml delete mode 100644 config/tasks.yaml create mode 100644 docker-compose.yaml delete mode 100644 kube/pr-reviewer_pod.yaml delete mode 100644 simple_test.py delete mode 100755 start.sh delete mode 100644 test_docker.py create mode 100644 tests/integration/test_api.py create mode 100644 tests/integration/test_full_review.py diff --git a/.env.example b/.env.example index 6f6b5f7..52046b7 100644 --- a/.env.example +++ b/.env.example @@ -1,36 +1,25 @@ # LLM Configuration -# Choose one of the following LLM providers: -# For OpenAI: +# Provider options: openai, anthropic, ollama + +# Required LLM_MODEL=gpt-4 -LLM_BASE_URL=https://api.openai.com/v1 -LLM_API_KEY=your_openai_api_key_here LLM_PROVIDER=openai -# For Anthropic: -# LLM_MODEL=claude-3-opus-20240229 -# LLM_BASE_URL=https://api.anthropic.com -# LLM_API_KEY=your_anthropic_api_key_here -# LLM_PROVIDER=anthropic +# Required for OpenAI/Anthropic +LLM_BASE_URL=https://api.openai.com/v1 +LLM_API_KEY=your_api_key_here -# For Ollama (local): +# For Ollama (local or network): # LLM_MODEL=llama2 # LLM_BASE_URL=http://localhost:11434 -# LLM_API_KEY=ollama # Ollama doesn't require a real API key # LLM_PROVIDER=ollama -# MCP Server Configuration -# Hadolint MCP Server (installed via pip in Docker) -# Checkov MCP Server (installed via pip in Docker) -# Semgrep MCP Server (native, no configuration needed) -# Trivy MCP Server (native, no configuration needed) - -# Optional: Semgrep App URL and Token for SEMgrep App functionality +# Optional: Semgrep App URL and Token SEMGRAPH_APP_URL= SEMGRAPH_API_TOKEN= -# Timeout Configuration (in seconds) +# Timeout Configuration (seconds) TOTAL_FLOW_TIMEOUT=600 PER_CREW_TIMEOUT=300 -# Other Configuration LOG_LEVEL=INFO \ No newline at end of file diff --git a/.gitea/workflows/build_push.yml b/.gitea/workflows/build_push.yml index 0116b1f..3abed7a 100644 --- a/.gitea/workflows/build_push.yml +++ b/.gitea/workflows/build_push.yml @@ -66,7 +66,15 @@ jobs: chmod 644 /etc/apt/sources.list.d/kubernetes.list apt-get update apt-get install kubectl - kubectl delete namespace pr-reviewer + kubectl delete namespace pr-reviewer --ignore-not-found kubectl create namespace pr-reviewer kubectl create secret docker-registry regcred --docker-server=${{ vars.DOCKER_SERVER }} --docker-username=${{ vars.DOCKER_USERNAME }} --docker-password='${{ secrets.DOCKER_PASSWORD }}' --docker-email=${{ vars.DOCKER_EMAIL }} --namespace=pr-reviewer - kubectl apply -f kube/pr-reviewer_pod.yaml && kubectl apply -f kube/pr-reviewer_deployment.yaml && kubectl apply -f kube/pr-reviewer_service.yaml + kubectl create secret generic pr-reviewer-env \ + --from-literal=LLM_PROVIDER=ollama \ + --from-literal=LLM_MODEL=${{ vars.OLLAMA_MODEL }} \ + --from-literal=LLM_BASE_URL=http://${{ vars.OLLAMA_SERVER }} \ + --from-literal=LOG_LEVEL=INFO \ + --from-literal=TOTAL_FLOW_TIMEOUT=600 \ + --from-literal=PER_CREW_TIMEOUT=300 \ + --namespace=pr-reviewer + kubectl apply -f kube/pr-reviewer_deployment.yaml && kubectl apply -f kube/pr-reviewer_service.yaml \ No newline at end of file diff --git a/.gitignore b/.gitignore index d60172d..a8b4a3c 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,6 @@ +__pycache__/ +.pytest_cache/ +.benchmarks/ .spec/ +.env +.venv/ \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 4f33255..2ae6638 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ -# Stage 1: Base with system dependencies and tool installations -FROM python:3.12-slim as builder +# Stage 1: Builder +FROM python:3.12-slim AS builder # Install system dependencies RUN apt-get update && apt-get install -y \ @@ -7,58 +7,52 @@ RUN apt-get update && apt-get install -y \ curl \ && rm -rf /var/lib/apt/lists/* -# Install Hadolint (for Dockerfile linting) -RUN curl -Lo /bin/hadolint https://github.com/hadolint/hadolint/releases/download/v2.12.0/hadolint-Linux-x86_64 && \ - chmod +x /bin/hadolint - -# Install Checkov (for Kubernetes security scanning) -RUN pip install checkov - -# Install Trivy (for container and IaC scanning) - Native MCP server +# Install Tools +RUN curl -Lo /bin/hadolint https://github.com/hadolint/hadolint/releases/download/v2.12.0/hadolint-Linux-x86_64 && chmod +x /bin/hadolint +RUN pip install checkov semgrep RUN curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sh -s -- -b /usr/local/bin -# Install Semgrep (for code scanning) - Will use native MCP server -RUN pip install semgrep - -# Install UV package manager +# Install UV COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ -# Stage 2: App with source code and UV sync +WORKDIR /app +COPY pyproject.toml . +# Create virtual environment and install dependencies +RUN uv venv /opt/venv +RUN uv pip install --python /opt/venv/bin/python . + +# Stage 2: Final FROM python:3.12-slim -# Create non-root user -RUN useradd --create-home --shell /bin/bash app -WORKDIR /app -USER app - -# Install runtime dependencies +# Install system dependencies needed at runtime RUN apt-get update && apt-get install -y \ git \ && rm -rf /var/lib/apt/lists/* -# Copy UV from builder stage -COPY --from=builder /bin/uv /bin/uv -COPY --from=builder /bin/uvx /bin/uvx +# Create non-root user +RUN useradd --create-home --shell /bin/bash app +WORKDIR /app + +# Copy virtual environment and tools from builder +COPY --from=builder /opt/venv /opt/venv +COPY --from=builder /bin/hadolint /bin/hadolint +# Copy other tools if needed (Trivy, etc.) +COPY --from=builder /usr/local/bin/trivy /usr/local/bin/trivy # Copy application code -COPY --chown=app:app pyproject.toml . -COPY --chown=app:app README.md . -COPY --chown=app:app src/ ./src/ -COPY --chown=app:app mcp_servers/ ./mcp_servers/ -COPY --chown=app:app crews/ ./crews/ -COPY --chown=app:app tools/ ./tools/ -COPY --chown=app:app config/ ./config/ -COPY --chown=app:app contexts/ ./contexts/ +COPY src/ ./src/ +COPY mcp_servers/ ./mcp_servers/ +COPY crews/ ./crews/ +COPY tools/ ./tools/ +COPY config/ ./config/ +COPY contexts/ ./contexts/ +COPY README.md . -# Install Python dependencies using UV -RUN uv sync --frozen --no-dev +# Set the environment variables to use the venv +ENV PATH="/opt/venv/bin:$PATH" +ENV PYTHONPATH="/app/src" +USER app -# Set environment variables -ENV PYTHONPATH=/app/src -ENV PATH="/app/.venv/bin:$PATH" - -# Expose port EXPOSE 8000 -# Set entrypoint ENTRYPOINT ["uvicorn", "src.pr_reviewer.main:app", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/README.md b/README.md index 5805519..b199dd3 100644 --- a/README.md +++ b/README.md @@ -1,185 +1,191 @@ # PR Reviewer -An automated pull request review system using CrewAI and MCP (Model Context Protocol). +Automated pull request review system using [CrewAI](https://crewai.com) Flows and MCP (Model Context Protocol) tools. -## Overview - -This system provides automated code, security, and infrastructure reviews for pull requests using a multi-agent approach. It leverages CrewAI for orchestrating specialized review agents and MCP (Model Context Protocol) for integrating with various static analysis tools. +Performs three parallel reviews — code quality, security, and infrastructure — then synthesizes a consolidated report via a REST API. ## Features -- **Code Review**: Uses Semgrep (via MCP) to check code quality, best practices, and maintainability -- **Security Review**: Uses Trivy (native MCP) to identify security vulnerabilities -- **Infrastructure Review**: Uses Hadolint and Checkov (via MCP wrappers) to review Dockerfiles and Kubernetes manifests -- **Contextual Review**: Incorporates customizable guidelines for code, security, and infrastructure reviews -- **Automated Orchestration**: Uses CrewAI Flows to manage the review process -- **REST API**: FastAPI endpoint for triggering reviews -- **Containerized**: Docker support for easy deployment +- **Code Review** — style, best practices, maintainability (powered by Semgrep) +- **Security Review** — vulnerabilities, injection risks, auth issues (powered by Trivy) +- **Infrastructure Review** — Dockerfiles, Kubernetes manifests, IaC (powered by Hadolint + Checkov) +- **Summarisation** — merges all three reviews into a single actionable report +- **REST API** — FastAPI endpoints for health check and review trigger +- **Dockerized** — multi-stage build with all tools bundled ## Architecture -The system follows a modular architecture with: -- State management using Pydantic models -- LLM factory for flexible provider support (OpenAI, Anthropic, Ollama) -- Context resolution system for incorporating review guidelines -- Crew-based implementation for each review type (code, security, infrastructure) -- MCP server integrations for static analysis tools -- Flow-based orchestration for managing the review process -- RESTful API for integration with CI/CD systems +``` +POST /api/v1/review + │ + ▼ + CodeReviewFlow (CrewAI Flow) + │ + ┌────┼──────────────┐ + ▼ ▼ ▼ + Code Security Infra +Review Review Review + │ │ │ + └─────┼────────────┘ + ▼ + Summariser + │ + ▼ + JSON Response +``` -## Installation +LLM-agnostic via CrewAI's LLM abstraction — works with OpenAI, Anthropic, or Ollama. + +## Quick Start ### Prerequisites -- Python 3.10-3.13 -- UV package manager -- Git -- Docker (optional, for containerized deployment) -### Local Development -1. Clone the repository -2. Install UV package manager: `curl -LsSf https://astral.sh/uv/install.sh | sh` -3. Activate UV environment: `source $HOME/.local/bin/env` -4. Create virtual environment: `uv venv .venv` -5. Activate virtual environment: `source .venv/bin/activate` -6. Install dependencies: `uv pip install -e .` -7. Configure environment variables (see `.env.example`) +- Docker +- An LLM provider (OpenAI API key, Anthropic key, or a running Ollama instance) -### Docker Deployment -1. Build the Docker image: `docker build -t pr-reviewer .` -2. Run the container: `docker run -p 8000:8000 --env-file .env pr-reviewer` +### Setup -## Usage - -### API Endpoints - -#### Health Check ```bash -GET /api/v1/health +cp .env.example .env +# Edit .env with your LLM provider details ``` -Returns the health status of the service. -#### Trigger PR Review +### Run + ```bash -POST /api/v1/review +docker compose up ``` -Initiates a pull request review. -Request Body: +Server starts at `http://localhost:8000`. + +### Test + +```bash +# Health check +curl http://localhost:8000/api/v1/health + +# Trigger a review +curl -X POST http://localhost:8000/api/v1/review \ + -H "Content-Type: application/json" \ + -d '{ + "pr_id": "123", + "title": "Add user authentication", + "repo": {"name": "myapp/backend", "url": "https://github.com/myapp/backend"}, + "source": {"branch": "feature/auth"}, + "target": {"branch": "main"}, + "files": [ + { + "path": "auth.py", + "status": "added", + "content": "def login(user, pwd):\n if user == \"admin\" and pwd == \"admin\":\n return True", + "additions": 3, + "deletions": 0 + } + ] + }' +``` + +## API + +### `GET /api/v1/health` + +Returns service status. + +```json +{"status": "healthy", "service": "pr-reviewer"} +``` + +### `POST /api/v1/review` + +Triggers a full PR review. + +**Request body:** + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `pr_id` | string | yes | PR identifier | +| `title` | string | yes | PR title | +| `description` | string | no | PR description | +| `repo.name` | string | yes | Repository name | +| `repo.url` | string | yes | Repository URL | +| `source.branch` | string | yes | Source branch | +| `source.commit` | string | no | Source commit SHA | +| `target.branch` | string | yes | Target branch | +| `target.commit` | string | no | Target commit SHA | +| `files[]` | array | no | Changed files | +| `files[].path` | string | yes | File path | +| `files[].content` | string | no | File contents | +| `files[].status` | string | yes | `added`, `modified`, `removed` | +| `files[].additions` | int | no | Lines added | +| `files[].deletions` | int | no | Lines removed | +| `files[].patch` | string | no | Unified diff | +| `context.code_review` | string | no | Code review guidelines override | +| `context.security_review` | string | no | Security review guidelines override | +| `context.infra_review` | string | no | Infrastructure review guidelines override | + +**Response:** + ```json { - "pr_id": "123", - "title": "Add new feature", - "description": "This PR adds a new feature to the application", - "repo": { - "name": "my-repo", - "url": "https://github.com/user/my-repo" - }, - "source": { - "branch": "feature/new-feature", - "commit": "abc123" - }, - "target": { - "branch": "main", - "commit": "def456" - }, - "files": [ - { - "path": "src/main.py", - "content": "print('Hello World')", - "status": "modified", - "additions": 1, - "deletions": 0 - } - ], - "context": { - "code_review": "Follow PEP8 guidelines", - "security_review": "Check for SQL injection vulnerabilities", - "infra_review": "Ensure Dockerfile follows best practices" - } -} -``` - -Response: -```json -{ - "review_id": "uuid-string", + "review_id": "uuid", "status": "completed", - "timestamp": "2023-05-08T10:00:00Z", + "timestamp": "2024-01-01T00:00:00Z", "results": { - "code_review": "Code review results...", - "security_review": "Security review results...", - "infra_review": "Infrastructure review results...", - "summary": "Synthesized review summary..." + "code_review": "...", + "security_review": "...", + "infra_review": "...", + "summary": "..." }, "metadata": { - "processing_time_seconds": 45.2, + "processing_time_seconds": 290.22, "pr_id": "123", - "repo": { - "name": "my-repo", - "url": "https://github.com/user/my-repo" - } + "repo": {"name": "myapp/backend", "url": "https://github.com/myapp/backend"} } } ``` ## Configuration -### Environment Variables -See `.env.example` for detailed configuration options. +All configuration via environment variables in `.env`: -### Context Files -Default review guidelines are located in `contexts/defaults/`: -- `code_review.md`: Coding practice guidelines -- `security_review.md`: Security guidelines -- `infra_review.md`: Infrastructure guidelines - -These can be overridden via the API context parameter. +| Variable | Default | Description | +|----------|---------|-------------| +| `LLM_MODEL` | (required) | Model name (e.g. `gpt-4`, `gemma4:31b-cloud`) | +| `LLM_PROVIDER` | (required) | `openai`, `anthropic`, or `ollama` | +| `LLM_BASE_URL` | — | API base URL | +| `LLM_API_KEY` | — | API key (not needed for Ollama) | +| `TOTAL_FLOW_TIMEOUT` | `600` | Max seconds for full review | +| `PER_CREW_TIMEOUT` | `300` | Max seconds per crew | +| `LOG_LEVEL` | `INFO` | Logging level | ## Development -### Running Tests ```bash -# Run unit tests -pytest +# Install deps +uv pip install -e ".[dev]" -# Run tests with coverage -pytest --cov=src.pr_reviewer +# Run tests +pytest tests/ -# Run specific test categories -pytest tests/unit/ -pytest tests/integration/ +# Run server locally +uvicorn src.pr_reviewer.main:app --reload ``` -### Code Style -The project uses Black for code formatting and Flake8 for linting. +## Project Structure -Run formatting: -```bash -black src/ ``` - -Run linting: -```bash -flake8 src/ +├── config/ # Shared agent/task YAML configs +├── contexts/ # Default review guidelines (markdown) +├── crews/ # Crew definitions (code, security, infra, summariser) +├── mcp_servers/ # MCP tool wrappers (Hadolint, Checkov) +├── src/pr_reviewer/ # Core application code +│ ├── main.py # FastAPI app +│ ├── flow.py # CrewAI Flow orchestration +│ ├── state.py # Pydantic state models +│ ├── llm.py # LLM factory +│ └── context.py # Context resolution +├── tests/ # Unit and integration tests +├── docker-compose.yaml +├── Dockerfile +└── pyproject.toml ``` - -## Deployment - -### Kubernetes -Kubernetes manifests are available in the `k8s/` directory: -- Secret for LLM configuration -- Deployment for the PR Reviewer service -- Service for exposing the API - -### Gitea Actions -GitHub Actions workflow for CI/CD is available in `.gitea/workflows/deploy.yaml`. - -## License -MIT - -## Contributing -1. Fork the repository -2. Create a feature branch -3. Commit your changes -4. Push to the branch -5. Open a pull request \ No newline at end of file diff --git a/config/__init__.py b/config/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/config/agents.yaml b/config/agents.yaml deleted file mode 100644 index 13be677..0000000 --- a/config/agents.yaml +++ /dev/null @@ -1,7 +0,0 @@ -# Summarizer Agent Configuration -summariser: - role: Senior Code Review Coordinator - goal: Synthesize individual review results into a cohesive, actionable review report - backstory: You are a senior technical lead with extensive experience in code review practices across multiple domains. You excel at combining feedback from different reviewers into a clear, prioritized, and actionable summary that helps development teams improve their code efficiently. - verbose: true - allow_delegation: false \ No newline at end of file diff --git a/config/tasks.yaml b/config/tasks.yaml deleted file mode 100644 index ce72ea6..0000000 --- a/config/tasks.yaml +++ /dev/null @@ -1,16 +0,0 @@ -# Summarizer Task Configuration -summarise_task: - description: | - Synthesize the results from code, security, and infrastructure reviews into a cohesive review report. - Code Review Results: {code_review_results} - Security Review Results: {security_review_results} - Infrastructure Review Results: {infra_review_results} - Context: {context} - expected_output: | - A comprehensive review report that includes: - - Executive summary of all findings - - Prioritized list of issues (critical, high, medium, low) - - Specific recommendations for each domain (code, security, infrastructure) - - Overall assessment and recommendation (e.g., Approved, Approved with Minor Changes, Significant Changes Needed) - - Summary of positive aspects of the PR - agent: summariser \ No newline at end of file diff --git a/crews/code_review_crew/__pycache__/code_review_crew.cpython-314.pyc b/crews/code_review_crew/__pycache__/code_review_crew.cpython-314.pyc index f8a68dfe975140c6b1bbd3399905c080146feb19..09b379af5fa0207118025ab96b5fa3914b8f10ea 100644 GIT binary patch delta 966 zcmah{zi-n(6uygp#daJQD5}z?O+(Ta6KWS!1tCyd(ZzudC_}{pl}SwM(!}9>v@o^& z2ZWp0+OZ>aMC=SKh|Q>UW~c-b;>T3(Ie^%(?C;+D{NDGS-+3puGi+3{hKBg{RnylN zdPZ*R5l^DpYaPG@bB5O?5S@C9zo3-3-kn zu~ajt!TaP|)-|c=4$y%DI*p2`t_-1i`nEJHfH^|+L|rN&OZ_B6W(mFB!4WY|FA{(A zniu)8{~Z6sSZ7SQe)+Dm8n%7M zBkdrfUc+~q{)W>fE5zRj{7s)Y)Q@Q71Rd(Ph)094m3N{}b$=0HkM%cL*wDvG5a5lmmCP|SanPxL`5$9Q5wt3$Z zdCs2Tw1tTXONyL@B8x+m;&5iW-*pby3WrH{B!i)@Jge9|=N-@(XJ8h8Y{^AV{cBof zZ%6TgB*Trqwh$Uv@Za?X0njd{%$&lvBO@eW7Qd6MI2C7 z2>eQ93;=8hsE{g$SXuQVk5bYvOpyXJwR|BNj|%TkErw0Mo*WFEx!vG!p54%>QR1)p ztU%NnU~jeg@&z7}O8~ OgSDr(z9GIOTE75jJkcru delta 874 zcmah|y-yTT5P!4Zw|jeN&>*tF^6heP6c8H}5Sl#`EG~xFC~mm*yzp`O_9?M~(hkq} z56}{0Bds?63&v&*!Pb}s~K@{*_uiBv_oUka0 z{p>s&U^SzQ)y&&4qmmF~!mVjTEU-S36KJWm#N(aCD2Y@3TB8_miB!88CF`lS)J_{< z$whe3;no~cS^?3q!e&97IeG)X0Iy!D*I``FYh~z?>zW58`9ymOnw-^#p-LVrD`p%fK3jVR^#} za@FNiRU@zQAJqw}cMjp_9{a%@-Tq3@u4f1LkCZ|;z=ZVdzRQys#VI~8xk+{p0m<-j zf@y@*ydNcz5Ihf5xF@%5&!+*U+8XXF?ObZr<68Wv8Sl{kDnyp$b6dDR2H~`*YH(Ig zIP*aTO+G`=ML>JVrD!mjUM@c;_nmpam&m{KU0BHHTO^jilAoPu*FSZGQfiQP)a3NO z^5R)t>L@MdcdMe++DNi?Q+0D~n_Confg%j?c)PjP;00pnU&vGr8TZM~I<~HCw(1Wz t;yJ#I9KDsvWC-z#3IHdp@|IPOS#Zoo-m}69^S&8VU_ZU}o#ByH`2*h`v<3hG diff --git a/crews/code_review_crew/code_review_crew.py b/crews/code_review_crew/code_review_crew.py index 4411fb2..ecda520 100644 --- a/crews/code_review_crew/code_review_crew.py +++ b/crews/code_review_crew/code_review_crew.py @@ -1,19 +1,21 @@ -from crewai import CrewBase, Agent, Task, Crew +from crewai import Agent, Task, Crew +from crewai.project import CrewBase, agent, task, crew from crewai_tools import MCPServerAdapter from mcp import StdioServerParameters import os from typing import Dict, Any +from pr_reviewer.llm import get_llm -class CodeReviewCrew(CrewBase): +@CrewBase +class CodeReviewCrew: """Code Review Crew for conducting code quality reviews.""" agents_config = "config/agents.yaml" tasks_config = "config/tasks.yaml" def __init__(self): - super().__init__() - # Configure Semgrep MCP server connection + self.llm = get_llm() self.semgrep_server_params = StdioServerParameters( command="semgrep", args=["--metrics=off", "--json", "--stdin-display-name", "scanned_code", "--"], @@ -24,32 +26,38 @@ class CodeReviewCrew(CrewBase): } ) - @Agent + @agent def code_reviewer(self) -> Agent: """Senior Software Engineer agent for code review.""" return Agent( config=self.agents_config["code_reviewer"], - tools=[], # Tools will be added via MCP adapter in the task + llm=self.llm, + tools=[], verbose=True ) - @Task + @task def code_review_task(self) -> Task: """Task for conducting code review.""" return Task( config=self.tasks_config["code_review_task"], ) - @Crew + @crew def crew(self) -> Crew: """Create the Code Review crew.""" - # Create MCP server adapter for Semgrep - semgrep_adapter = MCPServerAdapter(self.semgrep_server_params) + tools = [] + try: + semgrep_adapter = MCPServerAdapter(self.semgrep_server_params) + if hasattr(semgrep_adapter, 'tools'): + tools = semgrep_adapter.tools + except Exception as e: + print(f"MCP adapter not available: {e}") return Crew( agents=[self.code_reviewer()], tasks=[self.code_review_task()], process="sequential", verbose=True, - tools=semgrep_adapter.tools if hasattr(semgrep_adapter, 'tools') else [], + tools=tools, ) \ No newline at end of file diff --git a/crews/infra_review_crew/infra_review_crew.py b/crews/infra_review_crew/infra_review_crew.py index 049be39..7b422c4 100644 --- a/crews/infra_review_crew/infra_review_crew.py +++ b/crews/infra_review_crew/infra_review_crew.py @@ -1,60 +1,65 @@ -from crewai import CrewBase, Agent, Task, Crew +from crewai import Agent, Task, Crew +from crewai.project import CrewBase, agent, task, crew from crewai_tools import MCPServerAdapter from mcp import StdioServerParameters import os from typing import Dict, Any +from pr_reviewer.llm import get_llm -class InfraReviewCrew(CrewBase): +@CrewBase +class InfraReviewCrew: """Infrastructure Review Crew for conducting infrastructure reviews.""" agents_config = "config/agents.yaml" tasks_config = "config/tasks.yaml" def __init__(self): - super().__init__() - # Configure Hadolint MCP server connection + self.llm = get_llm() self.hadolint_server_params = StdioServerParameters( command="python", - args=["/home/armistace/dev/pr_reviewer/mcp_servers/hadolint_mcp.py"], + args=["/app/mcp_servers/hadolint_mcp.py"], env=os.environ ) - # Configure Checkov MCP server connection self.checkov_server_params = StdioServerParameters( command="python", - args=["/home/armistace/dev/pr_reviewer/mcp_servers/checkov_mcp.py"], + args=["/app/mcp_servers/checkov_mcp.py"], env=os.environ ) - @Agent + @agent def infra_reviewer(self) -> Agent: """DevOps/Platform Engineer agent for infrastructure review.""" return Agent( config=self.agents_config["infra_reviewer"], - tools=[], # Tools will be added via MCP adapter in the task + llm=self.llm, + tools=[], verbose=True ) - @Task + @task def infra_review_task(self) -> Task: """Task for conducting infrastructure review.""" return Task( config=self.tasks_config["infra_review_task"], ) - @Crew + @crew def crew(self) -> Crew: """Create the Infrastructure Review crew.""" - # Create MCP server adapters for Hadolint and Checkov - hadolint_adapter = MCPServerAdapter(self.hadolint_server_params) - checkov_adapter = MCPServerAdapter(self.checkov_server_params) - - # Combine tools from both adapters all_tools = [] - if hasattr(hadolint_adapter, 'tools'): - all_tools.extend(hadolint_adapter.tools) - if hasattr(checkov_adapter, 'tools'): - all_tools.extend(checkov_adapter.tools) + try: + hadolint_adapter = MCPServerAdapter(self.hadolint_server_params) + if hasattr(hadolint_adapter, 'tools'): + all_tools.extend(hadolint_adapter.tools) + except Exception as e: + print(f"Hadolint MCP adapter not available: {e}") + try: + checkov_adapter = MCPServerAdapter(self.checkov_server_params) + if hasattr(checkov_adapter, 'tools'): + all_tools.extend(checkov_adapter.tools) + except Exception as e: + print(f"Checkov MCP adapter not available: {e}") return Crew( agents=[self.infra_reviewer()], diff --git a/crews/security_review_crew/security_review_crew.py b/crews/security_review_crew/security_review_crew.py index 861f63f..b970a2c 100644 --- a/crews/security_review_crew/security_review_crew.py +++ b/crews/security_review_crew/security_review_crew.py @@ -1,41 +1,41 @@ -from crewai import CrewBase, Agent, Task, Crew +from crewai import Agent, Task, Crew +from crewai.project import CrewBase, agent, task, crew from crewai_tools import MCPServerAdapter from mcp import StdioServerParameters import os from typing import Dict, Any +from pr_reviewer.llm import get_llm -class SecurityReviewCrew(CrewBase): +@CrewBase +class SecurityReviewCrew: """Security Review Crew for conducting security reviews.""" agents_config = "config/agents.yaml" tasks_config = "config/tasks.yaml" def __init__(self): - super().__init__() - # Trivy uses native MCP server, so we don't need to configure a wrapper. - # However, we might need to set up connection parameters if required by the native server. - # For now, we assume the native Trivy MCP server is available at a known address or via stdio. - # We'll leave the MCP server configuration empty and rely on the native server being available. - self.trivy_server_params = None # Placeholder for if we need to configure stdio parameters + self.llm = get_llm() + self.trivy_server_params = None - @Agent + @agent def security_reviewer(self) -> Agent: """Application Security Engineer agent for security review.""" return Agent( config=self.agents_config["security_reviewer"], - tools=[], # Tools will be added via MCP adapter in the task + llm=self.llm, + tools=[], verbose=True ) - @Task + @task def security_review_task(self) -> Task: """Task for conducting security review.""" return Task( config=self.tasks_config["security_review_task"], ) - @Crew + @crew def crew(self) -> Crew: """Create the Security Review crew.""" # If we had an MCP server to wrap, we would create an adapter here. diff --git a/crews/summariser_crew/summariser_crew.py b/crews/summariser_crew/summariser_crew.py index 2ea33c5..61a6557 100644 --- a/crews/summariser_crew/summariser_crew.py +++ b/crews/summariser_crew/summariser_crew.py @@ -1,37 +1,40 @@ -from crewai import CrewBase, Agent, Task, Crew +from crewai import Agent, Task, Crew +from crewai.project import CrewBase, agent, task, crew from crewai_tools import MCPServerAdapter from mcp import StdioServerParameters import os from typing import Dict, Any +from pr_reviewer.llm import get_llm -class SummariserCrew(CrewBase): +@CrewBase +class SummariserCrew: """Summariser Crew for synthesizing review results.""" agents_config = "config/agents.yaml" tasks_config = "config/tasks.yaml" def __init__(self): - super().__init__() - # The summarizer doesn't need MCP server connections as it works with text results + self.llm = get_llm() - @Agent + @agent def summariser(self) -> Agent: """Senior Code Review Coordinator agent for summarizing reviews.""" return Agent( config=self.agents_config["summariser"], - tools=[], # No tools needed for summarization + llm=self.llm, + tools=[], verbose=True ) - @Task + @task def summarise_task(self) -> Task: """Task for synthesizing review results.""" return Task( config=self.tasks_config["summarise_task"], ) - @Crew + @crew def crew(self) -> Crew: """Create the Summariser crew.""" return Crew( diff --git a/docker-compose.yaml b/docker-compose.yaml new file mode 100644 index 0000000..1557020 --- /dev/null +++ b/docker-compose.yaml @@ -0,0 +1,16 @@ +services: + pr-reviewer: + build: + context: . + dockerfile: Dockerfile + image: pr-reviewer:latest + ports: + - "8000:8000" + env_file: + - .env + volumes: + - ./src:/app/src + - ./config:/app/config + restart: always + extra_hosts: + - "host.docker.internal:host-gateway" diff --git a/kube/pr-reviewer_deployment.yaml b/kube/pr-reviewer_deployment.yaml index 3c0bcba..8aa9c14 100644 --- a/kube/pr-reviewer_deployment.yaml +++ b/kube/pr-reviewer_deployment.yaml @@ -6,7 +6,7 @@ metadata: app: pr-reviewer namespace: pr-reviewer spec: - replicas: 3 + replicas: 1 selector: matchLabels: app: pr-reviewer @@ -20,5 +20,15 @@ spec: image: git.aridgwayweb.com/armistace/pr-reviewer:latest ports: - containerPort: 8000 + envFrom: + - secretRef: + name: pr-reviewer-env + resources: + requests: + memory: "512Mi" + cpu: "250m" + limits: + memory: "2Gi" + cpu: "1000m" imagePullSecrets: - - name: regcred + - name: regcred \ No newline at end of file diff --git a/kube/pr-reviewer_pod.yaml b/kube/pr-reviewer_pod.yaml deleted file mode 100644 index 774610c..0000000 --- a/kube/pr-reviewer_pod.yaml +++ /dev/null @@ -1,13 +0,0 @@ -apiVersion: v1 -kind: Pod -metadata: - name: pr-reviewer - namespace: pr-reviewer -spec: - containers: - - name: pr-reviewer - image: git.aridgwayweb.com/armistace/pr-reviewer:latest - ports: - - containerPort: 8000 - imagePullSecrets: - - name: regcred diff --git a/pyproject.toml b/pyproject.toml index e77fd6f..437702e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,10 +19,11 @@ classifiers = [ "Programming Language :: Python :: 3.13", ] dependencies = [ - "crewai>=0.28.0", + "crewai[tools]>=0.80.0", "fastapi>=0.104.0", "uvicorn>=0.24.0", "mcp>=0.1.0", + "mcpadapt", "pydantic>=2.5.0", "python-dotenv>=1.0.0", "gitpython>=3.1.0" diff --git a/simple_test.py b/simple_test.py deleted file mode 100644 index 4d074ac..0000000 --- a/simple_test.py +++ /dev/null @@ -1,118 +0,0 @@ -#!/usr/bin/env python3 -""" -Simple test to verify the basic components work without Docker. -This tests the core components without requiring Docker build. -""" - -import sys -import os - -# Add the project root to the path -sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) - -def test_imports(): - """Test that all modules can be imported.""" - try: - # Test core modules - from src.pr_reviewer.state import FileInfo, ContextOverrides, PRReviewState - from src.pr_reviewer.llm import create_llm - from src.pr_reviewer.context import resolve_context - - print("✓ Core modules imported successfully") - - # Test state creation - state = PRReviewState( - pr_id="123", - pr_title="Test PR", - repo_name="test-repo", - repo_url="https://github.com/test/repo", - branch="feature", - base_branch="main" - ) - - print("✓ State creation works") - - # Test context resolution (will use default files if they exist) - context = resolve_context(state) - print(f"✓ Context resolution works: {list(context.keys())}") - - # Test file info - file_info = FileInfo( - path="test.py", - content="print('hello')", - status="added", - additions=1, - deletions=0 - ) - print("✓ FileInfo creation works") - - # Test context overrides - context_overrides = ContextOverrides( - code_review="Custom code review", - security_review="Custom security review" - ) - print("✓ ContextOverrides creation works") - - print("\n✓ All basic component tests passed!") - return True - - except Exception as e: - print(f"✗ Test failed with error: {e}") - import traceback - traceback.print_exc() - return False - -def test_crew_imports(): - """Test that crew modules can be imported.""" - try: - from crews.code_review_crew.code_review_crew import CodeReviewCrew - from crews.security_review_crew.security_review_crew import SecurityReviewCrew - from crews.infra_review_crew.infra_review_crew import InfraReviewCrew - from crews.summariser_crew.summariser_crew import SummariserCrew - - print("✓ Crew modules imported successfully") - - # Try to instantiate (might fail due to missing dependencies, but that's ok for import test) - code_crew = CodeReviewCrew() - security_crew = SecurityReviewCrew() - infra_crew = InfraReviewCrew() - summariser_crew = SummariserCrew() - - print("✓ Crew instantiation works") - return True - - except Exception as e: - print(f"⚠ Crew test warning (may be expected if dependencies missing): {e}") - # This might fail due to missing crewai or other dependencies, which is ok for this test - return True # Don't fail the overall test for this - -def test_api_imports(): - """Test that API modules can be imported.""" - try: - from src.pr_reviewer.main import app - from src.pr_reviewer.flow import CodeReviewFlow - - print("✓ API modules imported successfully") - return True - - except Exception as e: - print(f"✗ API import failed: {e}") - return False - -if __name__ == "__main__": - print("Running simple component tests...\n") - - success = True - success &= test_imports() - success &= test_crew_imports() - success &= test_api_imports() - - if success: - print("\n🎉 All tests passed! The basic components are working.") - print("\nTo test with Docker:") - print("1. Fix any Docker build issues if needed") - print("2. Run: ./start.sh") - print("3. Or manually: docker build -t pr-reviewer . && docker run -p 8000:8000 pr-reviewer") - else: - print("\n❌ Some tests failed. Please check the errors above.") - sys.exit(1) \ No newline at end of file diff --git a/src/pr_reviewer/__pycache__/flow.cpython-314.pyc b/src/pr_reviewer/__pycache__/flow.cpython-314.pyc index 66212c3651f64ad8313ec87e805481972c7ffa0c..4cfa52ff54df665c065e46c14a4ab774ddc01ca0 100644 GIT binary patch delta 3386 zcmbVOZ%kX)6@T~P_joq%J=+-D7*lu-gyLdgP-yrU+9p5(NeD5G(=8;-#Sd_Tv3vcT z2&9%oozzv+x~#XII%(7RHdr&Q9h&xKky>ffKCB3xASs~EZG?Pbb2bT*_%I*zmM1mB0okSJFf5n<_gz^ zK+7=Ua$;ruNFYbq7GxzCSij%qWx5O{qlw`PO^Ps@92OgxphRA0o36ddG0ZJyD(`^0 zKoKM|ovi7yzF5ajmF0ag5oB5%YF#!(eH%{%P4VO$=d(H)rk0+au#(%3+NrQ*|KSLF zGJjys3x-8$74AqAsxaM#c{7Q5RAgID14v|tb@>HTUmC~I+l8>Ti_NFVOvvO@i;JWz zBGOOWZ3nfI)r_sjKwOo4z^FcdTs)w0NrlwS6z$UguoCwLp%<|qjN&5y_E{|jH}->W z$Tnx*#Boq7*-L};I@=hNjT9SU&056H=(X4@CVQ0$)tILlMlWrkzH+quv?Enk&qP=U zbJ3Dv+n5YHLi_gRlL02|Cg0?INN3nio4|O^IW8#%>rz2x-UzwKBhD}FMdgeZB7?l( zucEOkQ#_4D`#nTV6lhcaQJ)};ys-Y zs3ua{Qa)^8`HA78t%A+0URu)AAzv&1d8aSOrEq*v%UK|eoGqS?FQ&EV;*6TJK)jp< za>&^t(S?jUjByf^lR_7HL-=msSo?f3p|(eHB2G`tsO>TJMtcfRWA#Q{y{Y1M(qKKA zl<0`vV!q%RtDJRozd!r_i&=SaOL%r$kg|^8-Nv;W?}gUtvU1Os&}+zk(f8r($FV>3 zPyVida;@nnE$c0JTeDwQv%%S{Jhvsx|C47No^8>YtvtRZ4sKW0W}g_?tUR6-kAEge z8!uabi1=UEInJ;avSRn3(8hmUbEvlH4PlLx?SCVXM~-e}Bj0n6q6+egvlWSCOd3Ls zCA$bhF)$`=z}}RRk@3Qp6=71lXvCsxW5o}avE17tEaPa>z2@V-&sSRwhz!@@ywuy zJT?8u-#X+ZuiCwjb0JpTC3l^5XqWuP8RGS1yU8P`Pg$;>RA(|cPJ5>0g7_E>x=U&+ z{VVG-4$uvj0AP}^8vv#U*8=na6j1=%UVuJ;ehN8nIv*$B9rT~h$~uQbpqC2z8LZyK zgK!Y&!_P7@<*AswUsewA5vof6jqt8_O!Au7JZnN$?${DK?>pp5%6aQdbv@SMph zz1(AQO8)NgwVCAEj06raS3=w~-e>r0G+z3z1bg2$i95PFyGlp0ZA)ms@AQ+3>Vfh+ zab&#O$NmMqRrOQi_kBqznjk*H?w=s~-nJ7&k6-h7P&v8n`!@X@@{03FSwh(30>O zGg`95i9U-ZJt!M3K5ZnhgyD0HgA{V&Ofr#LP&G9+9o5z?MoZewL{&VkQ=9KcN}$a` zPr)$LS#I?=^RLov`X0ILzhbi5usX8Zla)hTLhF5}vT@J}<{&PbX)SI1$x!FL(})90%CrKu^CPzGFdFzagD1 znl}6bYJ!VAU*E+Mx_L;J>;Gtp(bdL^J;YwdLv)FU0ZQh+Famf4w9xr_jqoX0ody`A zP&~aLI}31*!jA}Y{dq&PAEQCegKz@i&>$B;yB8!3+ev_n6iS0!0>z6I?vaG|Pbjf5 zUi~>bgTDge%K#-M=mr_Q^Y9eF6@af&Nbi*zL@xo%P{@*q|7R?YZAi5lb_LH;QBeH9 ziiYRG7ZhDDR}kq%Ny871CfotyH~>uSXRx>q5Cc#sj0f#GVS0Konoy^w$#tcpu@h8~ zEq)F_XIWc3uRZZV&7w%0E0upzGIVHv|q4`8)J zC!ZI;KDNnuci2ipQ~dx|JI$mg&_GTED*N9Ux>KJ$(w#kaYEwMD$&D5)$57rBe2i{# zo*lNz@Zo&`tDO+J5J;jJ`6$rq*85$c8Rg0r77`eKZ{u!aKeFxCFAkaKgn>jdmRV2- c@vXNS`aH2RWV`diyRE->H~)iyjn2`30nn1xr~m)} delta 1665 zcmbVMZ)jUp6u6j~)^<(f+%&E_XIbYunrviW3Lz$`d)Or9 zzLyP$u!`Wm4ds9+BCPn2!4xFu7e8$vD)>P`1>-(AKTQ1Ap@>ox?|Es`Ke8|04?pg? z=bqm==bm%VHy6I1R}ZL)i|{2j|JF-=AE|Fq;d=rf(67|2-BfbKRI`p4(VSqnC4$#Q zLe|MLMFU(}nxjiL*bsXLgw*z_nW4N^i%#w2TxXxM_C;RZinsllhy zDGs&4S*52fFKdMGl7!)wCt?e1@AC<#B=dk6Lb_V zc!$Irluf{O?>LRYm)>yKD36DYBaFbNcbaMt^#%GS_(4-P#CB~7K{tjM^l zNAU(`!_pk~r1DoFO;5uYiJt{F*EklR#_%tO;vC-$CF48cyY=b{EBrV1=mM;Vf-uzY zywc7qzSx?{2%GFXnASFkqJRkNSrg=zD{6yy(g9m=4JefO#Sdp#4 zf#Ok&?l{6p1guS~3Vmnml}dr>M#0o;)o8I+Sz9fe<8yTg(%n3~usj>p(`RAzZAmz~Ps^uZ?MJ$7$soHEEve%$yra!KGFd3t32$|8oa z&pUqmGwckylgfQTlwRh70nfyKq*?go(0qKF%M1f&k0G2uIE8>Sku4$QAT#sSEV3|P8*pA2Z*%zb zD*v!|CD%*m?+Y=h|2G?vl}+5;KSg0{CJg_~*rA^6hHCOGcX>N`G+y>k_Pkt8||AU~w@b15H243v| diff --git a/src/pr_reviewer/__pycache__/main.cpython-314.pyc b/src/pr_reviewer/__pycache__/main.cpython-314.pyc index 00db7c2f8e7d7668bfb991264223e5a4a1aed072..a22dd501980accb6f8851413fbcbee452ebc4c85 100644 GIT binary patch delta 4070 zcmb7Hdu&_P89(Pf{JMU`_O)ZjjvsOC*hw3gG;NZ$X;Pr6LqlkQ$T>jK(3BZ zhImd5Qwdt86O7CxSef+`89l5?Xl1R!!(pAQ!x%HHPZ(rFf|I#~Q8p$_vZ*FF%VsQR zhb@T)xglYdZ8h93+c6G@d6_>If`Vq)k#NdREQEq~xG~|9T?x1BPBh6)elQM_d(ro` zJcF5D1oigI|1!gU1Zf}lbwiR10J49a4gx_h7z9o*_RuvnFdhtoahQCNsQUzyVD84r zlBaGM+N#ndScHaqR9eRa$)jXt^cM8df#f3!e@o#<72ZM-yvjer>Cp(~sL#)Dk-bg% ztUe*sI|e|;1%MZf>yjxF8#NxdxyS|!4 z3S#xiTInzgfD`|I2mo_iV+J7Ioc5s!Z3%s(_1~vS*%b{;L^{Q*yjrBT8j!J)bVNGY zh@NpaIUmLwX#`!AT4p~Hxk;@oIu$E0bxdE!SQSGhot`RaNDNj^WVSmasKY3-+fmij zYKj9jzJgZNRu@K(>kX_l6u8kgQ)v%Z%Ah_F`}Usy{&G>)iHY=GuA`kw}fC4 zP4`%v>)0J@OWoShc_)De6}33G__T@)I%M!z8U?ne_DY|FBCBAoZJ3T==yA>r^@_A< zo0L;9&o<0VFu!*z)(HwWv7rGBeFQ+kF50X4z-F|-h434_H3lLJ8hIk|1hz_29`)r> z?JdnhP~9jMGqtGIR1oaREQIb>vb3J67f3aA+{PIq0ZnHb)hl!fw%V*NJr~T1`59VyU*MRbYHqHi}P>}>?oI(4s8 zovH_+2R&0;H$aj(WYT5y;hSwI$RuWyd>9;Iy_YoD<1&d%Y5_oLrid*D0hTdSY@f2>}x1x?<& zld&hO$)su*JT8J4eQFC@jtjyz%xd@8y#wAH%z^!MGswX`bPmQw(SZH^g?V!_ePKGE z&IG2i7mE7=^M+%2Igk!ya+7j}#dmEkBau5#BA4AfuKAh`|`FXsU}~PCJ#GoXVd)8>1@BS-gCOBXpuFFOmxf{iCtd_Xo@r z8!-9iB3NeLGQMg2_0eVHT3}H5!nk_X^%}%?#IdISMnkTco5f!=vy<5hqg0w9)kl|_ zvTzh_G&P_0OEK)IV!%q0JR#?1vXVdmZLR7O=?8Z#*xrtbZC5tm z=YBeMCA;eAE*t&p{l`kduJvHgYOrT5*jEzAHXK}|;W=*8rKUKud~~_L#NW53>k8CE zj;?stV|}Z!z8kUqrRaeY-@m3ixT!%_f4J9dvVGRtvEJH!y|uf{yVm*WDjzMoynl5K z-ezfs6IZV}zR79Awy%N4Z@bKWtp(4I`O1qGzS%UR z>wzOC2i96Q1xu#jx0@p<5?osN0&mxk4jEGrUbERxw1I13c%qfL7GVhAJ_JsN!5=$# zO#cKF>QHFALI()_fIno$!-t^8{5Z!guzc3@?BiJ05Uht#t^?TmT2rGpIDBlYhY0pCz2h{|UBqPGF^oH*G@G6d z6tfcUh5_6V^O8gyOZX%akL5^|{W)gCIGRS}ZEMiHOa#a8cHOB2_q(=&<~WfrqFqr( z!z+YUye$##CG==C$Sz}cC;CA&4qroWNB7WFzKH%7eSu38l{18lp!2aK<_u;lbh=Qe z(79O|m13bNIp-B?_UXxNLC)o8C(fm3Gt)R`QZ9+)Es;>AW#nmZas39fq}atdOgCr< z;Vodk1@uU0f711uyAcD!;D%$c1Z+3$&PC~!3ooJ7_Jh3g7wS zbs&_$5L;PC7cNj%nA-9OO{Er1$|TyqphB>rsU;AL$?R@je4CLka72;&(hnP=Jn3m4;cXFzz9IF4@h)o`e2Na zm>4UuF;3!q6l44Nm_kw%c-W_ulmz4YDq<>06;n&}{%{34*2Ofc8 z79NBDC{8~dTq`oyt=Gri*|u=@+GU-lw@RyZKoWRB{g-R>r>SP>cOj*V}GKN2_P$@E2d1+Ebfn2;SCvYWa>awA; zjA_c4t8A^^V9g7B!P=@XD^`{ui=m8V6=h??24kh5+-mGDK^FX;O3TtTq1tMUJPubS zd&0ZbIws-3Cav3GMtpnex}1awPXV)R!kh)nx(Ra?Fxw`qvVhq)VQz}y=QKKV1Y~T2 z$_Ng<4?tEUXo`h{PCTj&;GbypYze_f85N$?y4~)Lj)rW7BPK+)G&qq*J_s4ZDN=(K+Tvj&} zz&nirgWe4`2y5wgdqU8~=?A)H2ztRHSOuG47aW39a0!)yTkvdCqdEtes*o!`R4<#M zdikF9lKOm+^sX~-jy=MwLDqop8T&Lw)GRQl@|dAmt;UF)jlB6JvGD*DeHx;tP|J|) z7{ntqOb)dU0X%Q<;7dw%xxt}rC?J$P8ro!!sJ4u4FJr+nwxf)-%UD;3j72%L6YUz> zCT}koM;JLdkVUH1taRx0kp4`->4A)O3=d{5pj>D zLrqwCM(43h=q5KMM+g*CO0sv_?uV)xv!9f~?bz%fxbMBuN z>1E4nP9$@w(Gy~FdhDdQgCeG}%(Rp`BaLQWNQq*4JeBJY!@MFrot=?#VJ5FUl^#2l znV86P8X_1M{ElIpCGAttv{%fW-UQg4r6O_|&b`z2L!BPjuz! zk)@XXD=oeET6&jT`qpfk?Uk3c>p>r0(X1$bgIbVZP9g>P{L_!gA6M&IV76ayeH;I} zs@aWL{uo^2SXCU7l3zOV=8+%tuh<)2?OluG-QGrg-WyrG=XEg5x*osc4?-8d;dj(M zVl*!IKLH^RdA-fP?gdrRb-yWSegsTD^StJf66kBzcP;+K{}hDX*cPZ!|Leg7OJtv{ z*WyEgUR@(mSkd{HbpC&@pTHLb-=AF}QJ(A84z|PFm6k*exZMmBKJIpeqkQXuT?20L z%VsN~zhVm1QJ_tfz6%S~-mNE#_kuluOm~8CK*`^!+buxuZU83sR@|-SC?DQKq`#>< z;3U&;yCB(b8F&ZW$HB6_65&E6}}dng0EnEXeaz8j)wyL z8zkcjd?qxjI7>M3HT+q~2Ti!D?ks#AU#$z7zeV6{jHsm2W=hWEKiBQxzeiXjuB`8X zWB6LAf1|Yvzgs`0q5V#LoscitaK>%Ziqw&)pyWIr4cGG*3G2k?!gY#y3JP1@_;GW? z*Bb>YE4VSB)blf`oK#IefXs{}eus(*UcXDZ^Z0`XKmQhCt1;6Ug%@yZ<92+i@tkIy zic^&IW2ULsJW1F*o6KhOYL+wG{rPc*p|Av&Izowf*>d^QHd+`w;)# diff --git a/src/pr_reviewer/flow.py b/src/pr_reviewer/flow.py index c7b975b..07e3138 100644 --- a/src/pr_reviewer/flow.py +++ b/src/pr_reviewer/flow.py @@ -6,7 +6,6 @@ from .context import resolve_context import os from datetime import datetime -# Import the crews from crews.code_review_crew.code_review_crew import CodeReviewCrew from crews.security_review_crew.security_review_crew import SecurityReviewCrew from crews.infra_review_crew.infra_review_crew import InfraReviewCrew @@ -16,51 +15,27 @@ from crews.summariser_crew.summariser_crew import SummariserCrew class CodeReviewFlow(Flow[PRReviewState]): @start() - def receive_pr(self, inputs): - """Initialize the PR review state with input data.""" - print(f"Received PR review request for PR #{inputs.get('pr_id')}") - - # Initialize the state - self.state.pr_id = inputs.get("pr_id", "") - self.state.pr_title = inputs.get("pr_title", "") - self.state.pr_description = inputs.get("pr_description", "") - self.state.pr_url = inputs.get("pr_url", "") - self.state.repo_name = inputs.get("repo_name", "") - self.state.repo_url = inputs.get("repo_url", "") - self.state.branch = inputs.get("branch", "") - self.state.base_branch = inputs.get("base_branch", "") - # Convert files from list of dicts to list of FileInfo objects if needed - files_input = inputs.get("files", []) - if files_input and isinstance(files_input[0], dict): - # Convert dicts to FileInfo objects + def receive_pr(self): + print(f"Received PR review request for PR #{self.state.pr_id}") + + if isinstance(self.state.files, list) and self.state.files and isinstance(self.state.files[0], dict): from .state import FileInfo - self.state.files = [FileInfo(**file_dict) for file_dict in files_input] - else: - self.state.files = files_input - - # Handle context_overrides - context_overrides_input = inputs.get("context_overrides") - if context_overrides_input and isinstance(context_overrides_input, dict): + self.state.files = [FileInfo(**file_dict) for file_dict in self.state.files] + + context_input = self.state.context_overrides + if isinstance(context_input, dict): from .state import ContextOverrides - self.state.context_overrides = ContextOverrides(**context_overrides_input) - else: - self.state.context_overrides = context_overrides_input - + self.state.context_overrides = ContextOverrides(**context_input) + self.state.started_at = datetime.now() - - # Resolve context self.state.resolved_context = resolve_context(self.state) - + return self.state @listen(receive_pr) def run_code_review(self): - """Run the code review crew.""" print("Starting code review...") - - # Instantiate and run the code review crew code_crew = CodeReviewCrew() - # The crew's kickoff method expects inputs matching the task template variables inputs = { "pr_title": self.state.pr_title, "pr_description": self.state.pr_description, @@ -70,15 +45,11 @@ class CodeReviewFlow(Flow[PRReviewState]): result = code_crew.crew().kickoff(inputs=inputs) self.state.code_review_results = str(result) print("Code review completed.") - return self.state @listen(receive_pr) def run_security_review(self): - """Run the security review crew.""" print("Starting security review...") - - # Instantiate and run the security review crew security_crew = SecurityReviewCrew() inputs = { "pr_title": self.state.pr_title, @@ -89,15 +60,11 @@ class CodeReviewFlow(Flow[PRReviewState]): result = security_crew.crew().kickoff(inputs=inputs) self.state.security_review_results = str(result) print("Security review completed.") - return self.state @listen(receive_pr) def run_infra_review(self): - """Run the infrastructure review crew.""" print("Starting infrastructure review...") - - # Instantiate and run the infrastructure review crew infra_crew = InfraReviewCrew() inputs = { "pr_title": self.state.pr_title, @@ -108,15 +75,11 @@ class CodeReviewFlow(Flow[PRReviewState]): result = infra_crew.crew().kickoff(inputs=inputs) self.state.infra_review_results = str(result) print("Infrastructure review completed.") - return self.state @listen(and_(run_code_review, run_security_review, run_infra_review)) def summarise(self): - """Summarize the review results.""" print("Starting summarisation...") - - # Instantiate and run the summariser crew summariser_crew = SummariserCrew() inputs = { "code_review_results": self.state.code_review_results, @@ -128,15 +91,11 @@ class CodeReviewFlow(Flow[PRReviewState]): self.state.review_summary = str(result) self.state.completed_at = datetime.now() print("Summarisation completed.") - return self.state @listen(summarise) def format_response(self): - """Format the final response.""" print("Formatting final response...") - - # Return the final state as the response return { "pr_id": self.state.pr_id, "pr_title": self.state.pr_title, @@ -147,4 +106,4 @@ class CodeReviewFlow(Flow[PRReviewState]): "started_at": self.state.started_at.isoformat() if self.state.started_at else None, "completed_at": self.state.completed_at.isoformat() if self.state.completed_at else None, "error": self.state.error - } \ No newline at end of file + } diff --git a/src/pr_reviewer/main.py b/src/pr_reviewer/main.py index f1082a6..f206d97 100644 --- a/src/pr_reviewer/main.py +++ b/src/pr_reviewer/main.py @@ -1,4 +1,5 @@ import logging +import os from fastapi import FastAPI, HTTPException, Request from fastapi.responses import JSONResponse import uvicorn @@ -54,6 +55,8 @@ async def review_pr(request: Request) -> Dict[str, Any]: except Exception: raise HTTPException(status_code=422, detail="Invalid JSON payload") + logger.info(f"Payload keys: {payload.keys() if isinstance(payload, dict) else 'Not a dict'}") + # Validate and extract required fields according to the API specification # Request schema: # { @@ -94,11 +97,16 @@ async def review_pr(request: Request) -> Dict[str, Any]: title = payload.get("title") description = payload.get("description") + logger.info(f"pr_id from payload: {pr_id}") + logger.info(f"title from payload: {title}") + # Extract repo information repo_data = payload.get("repo", {}) repo_name = repo_data.get("name") repo_url = repo_data.get("url") + logger.info(f"repo_name from payload: {repo_name}") + # Extract source information source_data = payload.get("source", {}) source_branch = source_data.get("branch") @@ -158,6 +166,22 @@ async def review_pr(request: Request) -> Dict[str, Any]: infra_review=context_data.get("infra_review") ) + # Debug: Print the inputs being passed to the flow + flow_inputs = { + "pr_id": pr_id, + "pr_title": title, + "pr_description": description, + "pr_url": f"{repo_url}/pull/{pr_id}", # Construct PR URL + "repo_name": repo_name, + "repo_url": repo_url, + "branch": source_branch, # Using source branch as the active branch + "base_branch": target_branch, # Using target branch as base + "files": [file.dict() for file in files], # Convert to dict for flow + "context_overrides": context_overrides.dict() if context_overrides else None + } + logger.info(f"Flow inputs: {flow_inputs}") + logger.info(f"Flow inputs keys: {flow_inputs.keys()}") + # Initialize and run the flow with timeout flow = CodeReviewFlow() @@ -169,18 +193,7 @@ async def review_pr(request: Request) -> Dict[str, Any]: flow_result = await asyncio.wait_for( loop.run_in_executor( pool, - lambda: flow.kickoff(inputs={ - "pr_id": pr_id, - "pr_title": title, - "pr_description": description, - "pr_url": f"{repo_url}/pull/{pr_id}", # Construct PR URL - "repo_name": repo_name, - "repo_url": repo_url, - "branch": source_branch, # Using source branch as the active branch - "base_branch": target_branch, # Using target branch as base - "files": [file.dict() for file in files], # Convert to dict for flow - "context_overrides": context_overrides.dict() if context_overrides else None - }) + lambda: flow.kickoff(inputs=flow_inputs) ), timeout=TOTAL_FLOW_TIMEOUT ) diff --git a/src/pr_reviewer/state.py b/src/pr_reviewer/state.py index 17f5469..11111df 100644 --- a/src/pr_reviewer/state.py +++ b/src/pr_reviewer/state.py @@ -23,14 +23,14 @@ class ContextOverrides(BaseModel): class PRReviewState(BaseModel): """State of the PR review process.""" # Input fields - pr_id: str - pr_title: str + pr_id: str = "" + pr_title: str = "" pr_description: Optional[str] = None pr_url: Optional[str] = None - repo_name: str - repo_url: str - branch: str - base_branch: str + repo_name: str = "" + repo_url: str = "" + branch: str = "" + base_branch: str = "" files: List[FileInfo] = Field(default_factory=list) context_overrides: Optional[ContextOverrides] = None # Internal fields diff --git a/start.sh b/start.sh deleted file mode 100755 index 09a15b5..0000000 --- a/start.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/bash -# Simple start script to build Docker image and run tests - -set -e # Exit on any error - -echo "Building Docker image..." -docker build -t pr-reviewer-test:latest . - -echo "Running tests..." -python test_docker.py - -echo "All tests completed!" \ No newline at end of file diff --git a/test_docker.py b/test_docker.py deleted file mode 100644 index 7db890a..0000000 --- a/test_docker.py +++ /dev/null @@ -1,142 +0,0 @@ -#!/usr/bin/env python3 -""" -Test script to verify the Dockerized PR Reviewer application works correctly. -This script builds the Docker image, runs it, and tests the API endpoints. -""" - -import time -import requests -import docker -import json -import sys -from typing import Dict, Any - -def test_dockerized_app(): - """Test the Dockerized PR Reviewer application.""" - client = docker.from_env() - - try: - # Build the Docker image - print("Building Docker image...") - image, build_logs = client.images.build( - path=".", - tag="pr-reviewer-test:latest", - rm=True, - forcerm=True - ) - print("Docker image built successfully.") - - # Run the container - print("Starting container...") - container = client.containers.run( - image="pr-reviewer-test:latest", - detach=True, - ports={'8000/tcp': 8000}, - environment={ - "LLM_MODEL": "test-model", - "LLM_BASE_URL": "http://localhost:11434", # Using Ollama as example - "LLM_API_KEY": "ollama", # Ollama doesn't need a real key - "LLM_PROVIDER": "ollama" - } - ) - print(f"Container started with ID: {container.id}") - - # Wait for the container to be ready - print("Waiting for container to be ready...") - max_wait = 30 # seconds - start_time = time.time() - while time.time() - start_time < max_wait: - try: - response = requests.get("http://localhost:8000/api/v1/health", timeout=5) - if response.status_code == 200: - print("Container is ready!") - break - except requests.exceptions.ConnectionError: - print("Waiting for container to start...") - time.sleep(2) - else: - raise TimeoutError("Container did not become ready within the timeout period") - - # Test the health endpoint - print("Testing health endpoint...") - health_response = requests.get("http://localhost:8000/api/v1/health") - assert health_response.status_code == 200, f"Health check failed: {health_response.status_code}" - health_data = health_response.json() - assert health_data["status"] == "healthy", f"Unexpected health status: {health_data['status']}" - print("Health endpoint test passed.") - - # Test the review endpoint with minimal valid data - print("Testing review endpoint...") - test_payload = { - "pr_id": "123", - "title": "Test PR", - "description": "This is a test PR", - "repo": { - "name": "test-repo", - "url": "https://github.com/test/test-repo" - }, - "source": { - "branch": "feature/test", - "commit": "abc123" - }, - "target": { - "branch": "main", - "commit": "def456" - }, - "files": [ - { - "path": "src/main.py", - "content": "print('Hello World')", - "status": "modified", - "additions": 1, - "deletions": 0 - } - ], - "context": { - "code_review": "Follow basic coding standards", - "security_review": "Check for obvious security issues", - "infra_review": "Ensure basic infrastructure practices" - } - } - - review_response = requests.post( - "http://localhost:8000/api/v1/review", - json=test_payload, - timeout=30 # Longer timeout for the review process - ) - - # We expect this to either succeed (200) or fail with a 500 due to LLM issues - # Since we're not actually connecting to a real LLM, we expect a 500 - print(f"Review endpoint responded with status: {review_response.status_code}") - - if review_response.status_code == 200: - review_data = review_response.json() - print("Review endpoint test passed.") - print(f"Review ID: {review_data.get('review_id')}") - print(f"Status: {review_data.get('status')}") - else: - print(f"Review endpoint returned error status {review_response.status_code} (expected due to lack of real LLM)") - print(f"Response: {review_response.text}") - - # Clean up - print("Cleaning up...") - container.stop() - container.remove() - client.images.remove(image="pr-reviewer-test:latest", force=True) - print("Test completed successfully.") - - except Exception as e: - print(f"Test failed with error: {e}") - # Try to clean up if possible - try: - if 'container' in locals(): - container.stop() - container.remove() - if 'image' in locals(): - client.images.remove(image="pr-reviewer-test:latest", force=True) - except: - pass - raise - -if __name__ == "__main__": - test_dockerized_app() \ No newline at end of file diff --git a/tests/integration/test_api.py b/tests/integration/test_api.py new file mode 100644 index 0000000..ccc935b --- /dev/null +++ b/tests/integration/test_api.py @@ -0,0 +1,31 @@ +import pytest +import requests +import os +from dotenv import load_dotenv + +load_dotenv() + +BASE_URL = "http://localhost:8000/api/v1" + +def test_health_endpoint(): + """Test the health check endpoint.""" + response = requests.get(f"{BASE_URL}/health") + assert response.status_code == 200 + assert response.json() == {"status": "healthy", "service": "pr-reviewer"} + +def test_trigger_review_invalid_pr(): + """Test triggering a review with an invalid PR payload.""" + payload = {"pr_id": "invalid-id"} + response = requests.post(f"{BASE_URL}/review", json=payload) + # Depending on implementation, this might be 400 or 202 (async) + assert response.status_code in [200, 400, 422] + +def test_trigger_review_missing_payload(): + """Test triggering a review with no payload.""" + response = requests.post(f"{BASE_URL}/review", json={}) + assert response.status_code == 422 # FastAPI default for missing required body fields + +def test_get_status_nonexistent(): + """Test getting status for a non-existent review.""" + response = requests.get(f"{BASE_URL}/status/non-existent-id") + assert response.status_code == 404 diff --git a/tests/integration/test_full_review.py b/tests/integration/test_full_review.py new file mode 100644 index 0000000..072c09f --- /dev/null +++ b/tests/integration/test_full_review.py @@ -0,0 +1,75 @@ +import pytest +import requests +import json +from dotenv import load_dotenv + +load_dotenv() + +BASE_URL = "http://localhost:8000/api/v1" + +# Mock PR data for testing - comprehensive payload +MOCK_PR_DATA = { + "pr_id": "123", + "title": "Fix authentication vulnerability", + "description": "This PR addresses a critical authentication bypass vulnerability", + "repo": { + "name": "secure-app", + "url": "https://github.com/example/secure-app" + }, + "source": { + "branch": "fix-auth-bypass", + "commit": "a1b2c3d4e5f6" + }, + "target": { + "branch": "main", + "commit": "f6e5d4c3b2a1" + }, + "files": [ + { + "path": "src/auth.py", + "content": "def authenticate_user(username, password):\n # Vulnerable authentication implementation\n if username == 'admin' and password == 'password123':\n return True\n return False", + "status": "modified", + "additions": 5, + "deletions": 3, + "patch": "@@ -1,5 +1,5 @@\n def authenticate_user(username, password):\n- # Simple authentication\n- if username == 'admin' and password == 'password123':\n+ # Fixed authentication with proper validation\n+ if validate_credentials(username, password):\n return True\n return False" + } + ], + "context": { + "code_review": "Focus on security best practices and authentication logic", + "security_review": "Identify potential vulnerabilities in authentication flow", + "infra_review": "Verify secure deployment configurations" + } +} + +def test_full_review_workflow(): + """Test the full PR review workflow with mock data.""" + # Trigger a review + response = requests.post(f"{BASE_URL}/review", json=MOCK_PR_DATA) + + # Print response for debugging + print(f"Status Code: {response.status_code}") + print(f"Response: {response.text}") + + # Validate response + assert response.status_code == 200 + + data = response.json() + assert "review_id" in data + assert data["status"] in ["completed", "failed"] # Allow either status + assert "results" in data + assert "metadata" in data + + # Validate results structure + results = data["results"] + assert "code_review" in results or "code_review" in str(results) # At least present in the response + assert "security_review" in results or "security_review" in str(results) + assert "infra_review" in results or "infra_review" in str(results) + assert "summary" in results or "summary" in str(results) + + # Validate metadata + metadata = data["metadata"] + assert "processing_time_seconds" in metadata + assert metadata["pr_id"] == MOCK_PR_DATA["pr_id"] + assert metadata["repo"]["name"] == MOCK_PR_DATA["repo"]["name"] + + print("Full review workflow test passed!")