Capstone: Building a Code Review AI System with GitHub Integration

System Design

An AI code review system acts as an automated reviewer on every pull request. It receives a webhook when a PR is opened or updated, fetches the diff, analyzes each changed file for bugs, security issues, style violations, and improvement opportunities, then posts inline comments on the PR and assigns an overall quality score.

The architecture has four parts: a webhook receiver that handles GitHub events, a diff analyzer that breaks the PR into reviewable units, a review agent that generates comments using GPT-4o, and a quality tracker that stores scores and trends over time.

Data Model

# models.py
from sqlalchemy import Column, String, Text, Float, Integer, DateTime, ForeignKey
from sqlalchemy.dialects.postgresql import UUID, JSONB
import uuid

class Repository(Base):
    __tablename__ = "repositories"
    id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
    github_id = Column(Integer, unique=True)
    full_name = Column(String(300))  # "org/repo"
    installation_id = Column(Integer)
    review_config = Column(JSONB, default={})  # custom review rules
    created_at = Column(DateTime, server_default="now()")

class PullRequestReview(Base):
    __tablename__ = "pr_reviews"
    id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
    repo_id = Column(UUID(as_uuid=True), ForeignKey("repositories.id"))
    pr_number = Column(Integer)
    pr_title = Column(String(500))
    author = Column(String(100))
    overall_score = Column(Float, nullable=True)  # 0-10
    total_comments = Column(Integer, default=0)
    critical_issues = Column(Integer, default=0)
    status = Column(String(20), default="pending")  # pending, reviewed, error
    created_at = Column(DateTime, server_default="now()")

class ReviewComment(Base):
    __tablename__ = "review_comments"
    id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
    review_id = Column(UUID(as_uuid=True), ForeignKey("pr_reviews.id"))
    file_path = Column(String(500))
    line_number = Column(Integer)
    severity = Column(String(20))  # "critical", "warning", "suggestion", "praise"
    category = Column(String(50))  # "bug", "security", "style", "performance"
    comment = Column(Text)
    code_snippet = Column(Text)

GitHub Webhook Handler

Configure a GitHub App that sends pull_request events to your endpoint.

# routes/webhooks.py
from fastapi import APIRouter, Request, HTTPException
import hmac, hashlib

router = APIRouter()

def verify_signature(payload: bytes, signature: str, secret: str) -> bool:
    expected = "sha256=" + hmac.new(
        secret.encode(), payload, hashlib.sha256
    ).hexdigest()
    return hmac.compare_digest(expected, signature)

@router.post("/webhooks/github")
async def github_webhook(request: Request, db=Depends(get_db)):
    payload = await request.body()
    signature = request.headers.get("X-Hub-Signature-256", "")

    if not verify_signature(payload, signature, os.environ["GITHUB_WEBHOOK_SECRET"]):
        raise HTTPException(403, "Invalid signature")

    event = request.headers.get("X-GitHub-Event")
    data = json.loads(payload)

    if event == "pull_request" and data["action"] in ("opened", "synchronize"):
        pr = data["pull_request"]
        repo = db.query(Repository).filter(
            Repository.github_id == data["repository"]["id"]
        ).first()
        if repo:
            asyncio.create_task(review_pull_request(
                repo, pr["number"], pr["title"], pr["user"]["login"], db
            ))

    return {"ok": True}

Diff Analysis and Review Agent

Fetch the PR diff from GitHub, split it by file, and analyze each file with the review agent.

See AI Voice Agents Handle Real Calls

Book a free demo or calculate how much you can save with AI voice automation.

Book a Demo ROI Calculator

# services/reviewer.py
import httpx
from agents import Agent, function_tool

@function_tool
def post_review_comment(
    file_path: str, line: int, severity: str, category: str, comment: str
) -> str:
    """Record a review comment for a specific file and line."""
    # Stored in context, posted to GitHub after all files are reviewed
    return f"Comment recorded: [{severity}] {file_path}:{line}"

review_agent = Agent(
    name="Code Review Agent",
    instructions="""You are an expert code reviewer. Analyze the diff and:
    1. Find bugs, logic errors, and edge cases
    2. Identify security vulnerabilities (SQL injection, XSS, hardcoded secrets)
    3. Flag performance issues (N+1 queries, unnecessary allocations)
    4. Suggest readability improvements
    Use post_review_comment for each finding. Be specific about the line number.
    Severity levels: critical (must fix), warning (should fix), suggestion (nice to have).
    Only comment when genuinely useful. Avoid trivial nitpicks.""",
    tools=[post_review_comment],
)

async def review_pull_request(repo, pr_number, pr_title, author, db):
    # Fetch the diff
    github = httpx.AsyncClient(headers={
        "Authorization": f"Bearer {get_installation_token(repo.installation_id)}",
        "Accept": "application/vnd.github.v3.diff",
    })
    resp = await github.get(
        f"https://api.github.com/repos/{repo.full_name}/pulls/{pr_number}"
    )
    diff_text = resp.text

    # Create review record
    review = PullRequestReview(
        repo_id=repo.id, pr_number=pr_number,
        pr_title=pr_title, author=author,
    )
    db.add(review)
    db.commit()

    # Split diff by file and review each
    file_diffs = parse_diff_by_file(diff_text)
    all_comments = []

    for file_path, diff_content in file_diffs.items():
        if should_skip_file(file_path):  # skip lock files, binaries
            continue
        result = await Runner.run(
            review_agent,
            f"Review this diff for {file_path}:\n\n{diff_content}"
        )
        comments = extract_comments_from_result(result)
        all_comments.extend(comments)

    # Post comments to GitHub
    await post_github_review(repo, pr_number, all_comments, github)

    # Calculate quality score
    critical = sum(1 for c in all_comments if c["severity"] == "critical")
    warnings = sum(1 for c in all_comments if c["severity"] == "warning")
    score = max(0, 10 - (critical * 2) - (warnings * 0.5))

    review.overall_score = score
    review.total_comments = len(all_comments)
    review.critical_issues = critical
    review.status = "reviewed"
    db.commit()

Posting Review Comments to GitHub

# services/github_api.py
async def post_github_review(repo, pr_number, comments, github):
    """Post a PR review with inline comments."""
    # Get the latest commit SHA
    pr_resp = await github.get(
        f"https://api.github.com/repos/{repo.full_name}/pulls/{pr_number}",
        headers={"Accept": "application/vnd.github.v3+json"},
    )
    commit_sha = pr_resp.json()["head"]["sha"]

    # Format comments for GitHub API
    gh_comments = []
    for c in comments:
        gh_comments.append({
            "path": c["file_path"],
            "line": c["line_number"],
            "body": f"**[{c['severity'].upper()}] {c['category']}**\n\n{c['comment']}",
        })

    # Submit the review
    await github.post(
        f"https://api.github.com/repos/{repo.full_name}/pulls/{pr_number}/reviews",
        json={
            "commit_id": commit_sha,
            "body": f"AI Code Review: Score {score}/10 | {len(comments)} findings",
            "event": "COMMENT",
            "comments": gh_comments,
        },
    )

Quality Tracking Dashboard

# routes/quality.py
@router.get("/repos/{repo_id}/quality-trends")
async def quality_trends(repo_id: str, days: int = 30, db=Depends(get_db)):
    since = datetime.utcnow() - timedelta(days=days)
    reviews = db.query(PullRequestReview).filter(
        PullRequestReview.repo_id == repo_id,
        PullRequestReview.created_at >= since,
        PullRequestReview.status == "reviewed",
    ).order_by(PullRequestReview.created_at).all()

    return {
        "avg_score": sum(r.overall_score for r in reviews) / max(len(reviews), 1),
        "total_reviews": len(reviews),
        "total_critical": sum(r.critical_issues for r in reviews),
        "trend": [
            {"date": r.created_at.isoformat(), "score": r.overall_score}
            for r in reviews
        ],
    }

FAQ

How do I avoid noisy reviews that developers ignore?

Tune the agent instructions to only comment on findings that are genuinely actionable. Set a minimum severity threshold — for example, only post comments with severity "warning" or higher. Track which comments developers resolve versus dismiss, and use that signal to refine the review criteria.

How do I handle large PRs with hundreds of changed files?

Set a file limit (for example, 30 files) and prioritize files by risk. Review source code files before test files, and skip auto-generated files, lock files, and binaries. For PRs exceeding the limit, post a summary comment explaining that only the most critical files were reviewed.

How do I customize review rules per repository?

Store custom review instructions in the review_config JSONB field on the repository record. Merge these instructions into the agent's system prompt before each review. This lets teams configure language-specific rules, ignored patterns, and severity thresholds without changing code.

#CapstoneProject #CodeReview #GitHub #DeveloperTools #Webhooks #FullStackAI #AgenticAI #LearnAI #AIEngineering

Capstone: Building a Code Review AI System with GitHub Integration

System Design

Data Model

GitHub Webhook Handler

Diff Analysis and Review Agent

Posting Review Comments to GitHub

Quality Tracking Dashboard

FAQ

How do I avoid noisy reviews that developers ignore?

How do I handle large PRs with hundreds of changed files?

How do I customize review rules per repository?

Try CallSphere AI Voice Agents

Related Articles

WebArena and Real-World Web Agent Benchmarks: How We Measure Browser Agent Performance

Taking Screenshots and Recording Videos with Playwright for AI Analysis

Playwright Selectors Deep Dive: CSS, XPath, Text, and Role-Based Element Finding