Building an AI Agent Cost Dashboard: Real-Time Spend Tracking and Budget Alerts

Why You Need a Cost Dashboard

Checking your OpenAI billing page once a month is not cost management — it is cost discovery. By the time you notice a spike, you have already overspent. A purpose-built cost dashboard gives you real-time visibility into spend, automatic alerts before budgets are exceeded, and trend data for capacity planning.

Data Collection Layer

Every LLM call, embedding request, and tool invocation must emit a cost event. Build a lightweight collector that sits between your agent and the LLM provider.

import time
import json
from dataclasses import dataclass, field, asdict
from typing import List, Optional
from collections import defaultdict

@dataclass
class CostEvent:
    event_id: str
    timestamp: float
    agent_id: str
    model: str
    event_type: str  # "llm_call", "embedding", "tool_call"
    input_tokens: int = 0
    output_tokens: int = 0
    cost_usd: float = 0.0
    user_id: Optional[str] = None
    metadata: dict = field(default_factory=dict)

class CostCollector:
    MODEL_PRICING = {
        "gpt-4o": {"input": 2.50, "output": 10.00},
        "gpt-4o-mini": {"input": 0.15, "output": 0.60},
        "text-embedding-3-small": {"input": 0.02, "output": 0.0},
        "text-embedding-3-large": {"input": 0.13, "output": 0.0},
    }

    def __init__(self):
        self.events: List[CostEvent] = []

    def calculate_cost(self, model: str, input_tokens: int, output_tokens: int) -> float:
        pricing = self.MODEL_PRICING.get(model, {"input": 5.0, "output": 15.0})
        input_cost = (input_tokens / 1_000_000) * pricing["input"]
        output_cost = (output_tokens / 1_000_000) * pricing["output"]
        return round(input_cost + output_cost, 6)

    def record(
        self,
        agent_id: str,
        model: str,
        event_type: str,
        input_tokens: int,
        output_tokens: int = 0,
        user_id: str = None,
        **metadata,
    ) -> CostEvent:
        cost = self.calculate_cost(model, input_tokens, output_tokens)
        event = CostEvent(
            event_id=f"{agent_id}-{int(time.time() * 1000)}",
            timestamp=time.time(),
            agent_id=agent_id,
            model=model,
            event_type=event_type,
            input_tokens=input_tokens,
            output_tokens=output_tokens,
            cost_usd=cost,
            user_id=user_id,
            metadata=metadata,
        )
        self.events.append(event)
        return event

Aggregation Engine

Raw events must be aggregated into useful views: by time period, model, agent, and user.

from datetime import datetime, timedelta

class CostAggregator:
    def __init__(self, events: List[CostEvent]):
        self.events = events

    def _filter_window(self, window_seconds: int) -> List[CostEvent]:
        cutoff = time.time() - window_seconds
        return [e for e in self.events if e.timestamp > cutoff]

    def total_cost(self, window_seconds: int = 86400) -> float:
        return sum(e.cost_usd for e in self._filter_window(window_seconds))

    def cost_by_model(self, window_seconds: int = 86400) -> dict:
        breakdown = defaultdict(float)
        for event in self._filter_window(window_seconds):
            breakdown[event.model] += event.cost_usd
        return dict(sorted(breakdown.items(), key=lambda x: -x[1]))

    def cost_by_agent(self, window_seconds: int = 86400) -> dict:
        breakdown = defaultdict(float)
        for event in self._filter_window(window_seconds):
            breakdown[event.agent_id] += event.cost_usd
        return dict(sorted(breakdown.items(), key=lambda x: -x[1]))

    def cost_by_hour(self, window_hours: int = 24) -> dict:
        hourly = defaultdict(float)
        for event in self._filter_window(window_hours * 3600):
            hour = datetime.fromtimestamp(event.timestamp).strftime("%Y-%m-%d %H:00")
            hourly[hour] += event.cost_usd
        return dict(sorted(hourly.items()))

    def top_users(self, window_seconds: int = 86400, limit: int = 10) -> list:
        user_costs = defaultdict(lambda: {"cost": 0.0, "requests": 0})
        for event in self._filter_window(window_seconds):
            uid = event.user_id or "anonymous"
            user_costs[uid]["cost"] += event.cost_usd
            user_costs[uid]["requests"] += 1
        sorted_users = sorted(user_costs.items(), key=lambda x: -x[1]["cost"])
        return [{"user_id": uid, **data} for uid, data in sorted_users[:limit]]

Budget Alert System

from enum import Enum

class AlertSeverity(Enum):
    INFO = "info"
    WARNING = "warning"
    CRITICAL = "critical"

@dataclass
class BudgetAlert:
    severity: AlertSeverity
    message: str
    current_spend: float
    budget_limit: float
    usage_percent: float
    timestamp: float = field(default_factory=time.time)

class BudgetAlertManager:
    def __init__(self, monthly_budget: float):
        self.monthly_budget = monthly_budget
        self.thresholds = {
            0.50: AlertSeverity.INFO,
            0.75: AlertSeverity.WARNING,
            0.90: AlertSeverity.CRITICAL,
            1.00: AlertSeverity.CRITICAL,
        }
        self.sent_alerts: set = set()

    def check(self, current_monthly_spend: float) -> List[BudgetAlert]:
        usage_pct = current_monthly_spend / self.monthly_budget if self.monthly_budget else 0
        alerts = []
        for threshold, severity in self.thresholds.items():
            if usage_pct >= threshold and threshold not in self.sent_alerts:
                self.sent_alerts.add(threshold)
                alerts.append(BudgetAlert(
                    severity=severity,
                    message=f"Budget {threshold:.0%} reached: "
                            f"${current_monthly_spend:,.2f} of "
                            f"${self.monthly_budget:,.2f}",
                    current_spend=current_monthly_spend,
                    budget_limit=self.monthly_budget,
                    usage_percent=round(usage_pct * 100, 1),
                ))
        return alerts

    def reset_monthly(self):
        self.sent_alerts.clear()

Cost Forecasting

Predict end-of-month spend based on current trends.

See AI Voice Agents Handle Real Calls

Book a free demo or calculate how much you can save with AI voice automation.

Book a Demo ROI Calculator

class CostForecaster:
    def __init__(self, aggregator: CostAggregator):
        self.aggregator = aggregator

    def forecast_monthly(self) -> dict:
        now = datetime.now()
        day_of_month = now.day
        days_in_month = 30

        spend_so_far = self.aggregator.total_cost(window_seconds=day_of_month * 86400)
        daily_average = spend_so_far / day_of_month if day_of_month > 0 else 0
        remaining_days = days_in_month - day_of_month
        projected_total = spend_so_far + (daily_average * remaining_days)

        recent_daily = self.aggregator.total_cost(window_seconds=3 * 86400) / 3
        trend = "increasing" if recent_daily > daily_average * 1.1 else (
            "decreasing" if recent_daily < daily_average * 0.9 else "stable"
        )
        trend_adjusted = spend_so_far + (recent_daily * remaining_days)

        return {
            "spend_to_date": round(spend_so_far, 2),
            "daily_average": round(daily_average, 2),
            "recent_daily_average": round(recent_daily, 2),
            "projected_total": round(projected_total, 2),
            "trend_adjusted_total": round(trend_adjusted, 2),
            "trend": trend,
            "day_of_month": day_of_month,
        }

FastAPI Dashboard Endpoints

from fastapi import FastAPI, Query

app = FastAPI(title="AI Agent Cost Dashboard")

collector = CostCollector()
alert_manager = BudgetAlertManager(monthly_budget=10000)

@app.get("/api/costs/summary")
def cost_summary(window_hours: int = Query(24, ge=1, le=720)):
    aggregator = CostAggregator(collector.events)
    window_sec = window_hours * 3600
    return {
        "total_cost": round(aggregator.total_cost(window_sec), 4),
        "by_model": aggregator.cost_by_model(window_sec),
        "by_agent": aggregator.cost_by_agent(window_sec),
        "top_users": aggregator.top_users(window_sec),
        "total_events": len(aggregator._filter_window(window_sec)),
    }

@app.get("/api/costs/hourly")
def hourly_costs(hours: int = Query(24, ge=1, le=168)):
    aggregator = CostAggregator(collector.events)
    return {"hourly_costs": aggregator.cost_by_hour(hours)}

@app.get("/api/costs/forecast")
def cost_forecast():
    aggregator = CostAggregator(collector.events)
    forecaster = CostForecaster(aggregator)
    return forecaster.forecast_monthly()

@app.get("/api/costs/alerts")
def check_alerts():
    aggregator = CostAggregator(collector.events)
    current_spend = aggregator.total_cost(window_seconds=30 * 86400)
    alerts = alert_manager.check(current_spend)
    return {
        "alerts": [asdict(a) for a in alerts],
        "current_monthly_spend": round(current_spend, 2),
        "budget": alert_manager.monthly_budget,
    }

Putting It All Together

The complete cost dashboard architecture has four components working together: the collector captures every cost event at the point of API invocation, the aggregator transforms raw events into time-windowed summaries, the alert manager monitors spend against budgets and emits notifications, and the forecaster projects future spend from historical trends. This gives engineering and finance teams a shared source of truth for AI agent economics.

FAQ

How should I store cost events in production?

For small scale (under 1 million events/month), PostgreSQL with time-based partitioning works well. For larger volumes, use a time-series database like TimescaleDB or InfluxDB. Always write events asynchronously so cost tracking does not add latency to agent responses. Keep raw events for 90 days and aggregate older data into hourly/daily summaries.

How accurate are the cost forecasts?

Linear forecasts based on daily averages are accurate within 10–15% for workloads with stable patterns. The trend-adjusted forecast (using the most recent 3-day average) accounts for growth or seasonality and is typically more accurate mid-month. For early-month forecasts (days 1–5), accuracy is lower because the sample size is small — consider using the previous month’s data as a baseline.

Should I build this or use a third-party cost monitoring tool?

Tools like Helicone, LangSmith, and Portkey provide excellent cost tracking out of the box. Build your own only if you need custom aggregation logic, tight integration with internal billing systems, or multi-provider normalization that existing tools do not support. For most teams, starting with a third-party tool and migrating to a custom solution as needs grow is the pragmatic choice.

#CostDashboard #Monitoring #BudgetAlerts #Forecasting #Observability #AgenticAI #LearnAI #AIEngineering

Building an AI Agent Cost Dashboard: Real-Time Spend Tracking and Budget Alerts

Why You Need a Cost Dashboard

Data Collection Layer

Aggregation Engine

Budget Alert System

Cost Forecasting

FastAPI Dashboard Endpoints

Putting It All Together

FAQ

How should I store cost events in production?

How accurate are the cost forecasts?

Should I build this or use a third-party cost monitoring tool?

Try CallSphere AI Voice Agents

Related Articles

WebArena and Real-World Web Agent Benchmarks: How We Measure Browser Agent Performance

Taking Screenshots and Recording Videos with Playwright for AI Analysis

Playwright Selectors Deep Dive: CSS, XPath, Text, and Role-Based Element Finding