Building an AI Agent Cost Dashboard: Real-Time Spend Tracking and Budget Alerts
Build a production-ready cost dashboard for AI agents with real-time spend tracking, budget alerts, cost forecasting, and per-model breakdowns. Complete Python implementation with FastAPI and data aggregation.
Why You Need a Cost Dashboard
Checking your OpenAI billing page once a month is not cost management — it is cost discovery. By the time you notice a spike, you have already overspent. A purpose-built cost dashboard gives you real-time visibility into spend, automatic alerts before budgets are exceeded, and trend data for capacity planning.
Data Collection Layer
Every LLM call, embedding request, and tool invocation must emit a cost event. Build a lightweight collector that sits between your agent and the LLM provider.
import time
import json
from dataclasses import dataclass, field, asdict
from typing import List, Optional
from collections import defaultdict
@dataclass
class CostEvent:
event_id: str
timestamp: float
agent_id: str
model: str
event_type: str # "llm_call", "embedding", "tool_call"
input_tokens: int = 0
output_tokens: int = 0
cost_usd: float = 0.0
user_id: Optional[str] = None
metadata: dict = field(default_factory=dict)
class CostCollector:
MODEL_PRICING = {
"gpt-4o": {"input": 2.50, "output": 10.00},
"gpt-4o-mini": {"input": 0.15, "output": 0.60},
"text-embedding-3-small": {"input": 0.02, "output": 0.0},
"text-embedding-3-large": {"input": 0.13, "output": 0.0},
}
def __init__(self):
self.events: List[CostEvent] = []
def calculate_cost(self, model: str, input_tokens: int, output_tokens: int) -> float:
pricing = self.MODEL_PRICING.get(model, {"input": 5.0, "output": 15.0})
input_cost = (input_tokens / 1_000_000) * pricing["input"]
output_cost = (output_tokens / 1_000_000) * pricing["output"]
return round(input_cost + output_cost, 6)
def record(
self,
agent_id: str,
model: str,
event_type: str,
input_tokens: int,
output_tokens: int = 0,
user_id: str = None,
**metadata,
) -> CostEvent:
cost = self.calculate_cost(model, input_tokens, output_tokens)
event = CostEvent(
event_id=f"{agent_id}-{int(time.time() * 1000)}",
timestamp=time.time(),
agent_id=agent_id,
model=model,
event_type=event_type,
input_tokens=input_tokens,
output_tokens=output_tokens,
cost_usd=cost,
user_id=user_id,
metadata=metadata,
)
self.events.append(event)
return event
Aggregation Engine
Raw events must be aggregated into useful views: by time period, model, agent, and user.
from datetime import datetime, timedelta
class CostAggregator:
def __init__(self, events: List[CostEvent]):
self.events = events
def _filter_window(self, window_seconds: int) -> List[CostEvent]:
cutoff = time.time() - window_seconds
return [e for e in self.events if e.timestamp > cutoff]
def total_cost(self, window_seconds: int = 86400) -> float:
return sum(e.cost_usd for e in self._filter_window(window_seconds))
def cost_by_model(self, window_seconds: int = 86400) -> dict:
breakdown = defaultdict(float)
for event in self._filter_window(window_seconds):
breakdown[event.model] += event.cost_usd
return dict(sorted(breakdown.items(), key=lambda x: -x[1]))
def cost_by_agent(self, window_seconds: int = 86400) -> dict:
breakdown = defaultdict(float)
for event in self._filter_window(window_seconds):
breakdown[event.agent_id] += event.cost_usd
return dict(sorted(breakdown.items(), key=lambda x: -x[1]))
def cost_by_hour(self, window_hours: int = 24) -> dict:
hourly = defaultdict(float)
for event in self._filter_window(window_hours * 3600):
hour = datetime.fromtimestamp(event.timestamp).strftime("%Y-%m-%d %H:00")
hourly[hour] += event.cost_usd
return dict(sorted(hourly.items()))
def top_users(self, window_seconds: int = 86400, limit: int = 10) -> list:
user_costs = defaultdict(lambda: {"cost": 0.0, "requests": 0})
for event in self._filter_window(window_seconds):
uid = event.user_id or "anonymous"
user_costs[uid]["cost"] += event.cost_usd
user_costs[uid]["requests"] += 1
sorted_users = sorted(user_costs.items(), key=lambda x: -x[1]["cost"])
return [{"user_id": uid, **data} for uid, data in sorted_users[:limit]]
Budget Alert System
from enum import Enum
class AlertSeverity(Enum):
INFO = "info"
WARNING = "warning"
CRITICAL = "critical"
@dataclass
class BudgetAlert:
severity: AlertSeverity
message: str
current_spend: float
budget_limit: float
usage_percent: float
timestamp: float = field(default_factory=time.time)
class BudgetAlertManager:
def __init__(self, monthly_budget: float):
self.monthly_budget = monthly_budget
self.thresholds = {
0.50: AlertSeverity.INFO,
0.75: AlertSeverity.WARNING,
0.90: AlertSeverity.CRITICAL,
1.00: AlertSeverity.CRITICAL,
}
self.sent_alerts: set = set()
def check(self, current_monthly_spend: float) -> List[BudgetAlert]:
usage_pct = current_monthly_spend / self.monthly_budget if self.monthly_budget else 0
alerts = []
for threshold, severity in self.thresholds.items():
if usage_pct >= threshold and threshold not in self.sent_alerts:
self.sent_alerts.add(threshold)
alerts.append(BudgetAlert(
severity=severity,
message=f"Budget {threshold:.0%} reached: "
f"${current_monthly_spend:,.2f} of "
f"${self.monthly_budget:,.2f}",
current_spend=current_monthly_spend,
budget_limit=self.monthly_budget,
usage_percent=round(usage_pct * 100, 1),
))
return alerts
def reset_monthly(self):
self.sent_alerts.clear()
Cost Forecasting
Predict end-of-month spend based on current trends.
See AI Voice Agents Handle Real Calls
Book a free demo or calculate how much you can save with AI voice automation.
class CostForecaster:
def __init__(self, aggregator: CostAggregator):
self.aggregator = aggregator
def forecast_monthly(self) -> dict:
now = datetime.now()
day_of_month = now.day
days_in_month = 30
spend_so_far = self.aggregator.total_cost(window_seconds=day_of_month * 86400)
daily_average = spend_so_far / day_of_month if day_of_month > 0 else 0
remaining_days = days_in_month - day_of_month
projected_total = spend_so_far + (daily_average * remaining_days)
recent_daily = self.aggregator.total_cost(window_seconds=3 * 86400) / 3
trend = "increasing" if recent_daily > daily_average * 1.1 else (
"decreasing" if recent_daily < daily_average * 0.9 else "stable"
)
trend_adjusted = spend_so_far + (recent_daily * remaining_days)
return {
"spend_to_date": round(spend_so_far, 2),
"daily_average": round(daily_average, 2),
"recent_daily_average": round(recent_daily, 2),
"projected_total": round(projected_total, 2),
"trend_adjusted_total": round(trend_adjusted, 2),
"trend": trend,
"day_of_month": day_of_month,
}
FastAPI Dashboard Endpoints
from fastapi import FastAPI, Query
app = FastAPI(title="AI Agent Cost Dashboard")
collector = CostCollector()
alert_manager = BudgetAlertManager(monthly_budget=10000)
@app.get("/api/costs/summary")
def cost_summary(window_hours: int = Query(24, ge=1, le=720)):
aggregator = CostAggregator(collector.events)
window_sec = window_hours * 3600
return {
"total_cost": round(aggregator.total_cost(window_sec), 4),
"by_model": aggregator.cost_by_model(window_sec),
"by_agent": aggregator.cost_by_agent(window_sec),
"top_users": aggregator.top_users(window_sec),
"total_events": len(aggregator._filter_window(window_sec)),
}
@app.get("/api/costs/hourly")
def hourly_costs(hours: int = Query(24, ge=1, le=168)):
aggregator = CostAggregator(collector.events)
return {"hourly_costs": aggregator.cost_by_hour(hours)}
@app.get("/api/costs/forecast")
def cost_forecast():
aggregator = CostAggregator(collector.events)
forecaster = CostForecaster(aggregator)
return forecaster.forecast_monthly()
@app.get("/api/costs/alerts")
def check_alerts():
aggregator = CostAggregator(collector.events)
current_spend = aggregator.total_cost(window_seconds=30 * 86400)
alerts = alert_manager.check(current_spend)
return {
"alerts": [asdict(a) for a in alerts],
"current_monthly_spend": round(current_spend, 2),
"budget": alert_manager.monthly_budget,
}
Putting It All Together
The complete cost dashboard architecture has four components working together: the collector captures every cost event at the point of API invocation, the aggregator transforms raw events into time-windowed summaries, the alert manager monitors spend against budgets and emits notifications, and the forecaster projects future spend from historical trends. This gives engineering and finance teams a shared source of truth for AI agent economics.
FAQ
How should I store cost events in production?
For small scale (under 1 million events/month), PostgreSQL with time-based partitioning works well. For larger volumes, use a time-series database like TimescaleDB or InfluxDB. Always write events asynchronously so cost tracking does not add latency to agent responses. Keep raw events for 90 days and aggregate older data into hourly/daily summaries.
How accurate are the cost forecasts?
Linear forecasts based on daily averages are accurate within 10–15% for workloads with stable patterns. The trend-adjusted forecast (using the most recent 3-day average) accounts for growth or seasonality and is typically more accurate mid-month. For early-month forecasts (days 1–5), accuracy is lower because the sample size is small — consider using the previous month’s data as a baseline.
Should I build this or use a third-party cost monitoring tool?
Tools like Helicone, LangSmith, and Portkey provide excellent cost tracking out of the box. Build your own only if you need custom aggregation logic, tight integration with internal billing systems, or multi-provider normalization that existing tools do not support. For most teams, starting with a third-party tool and migrating to a custom solution as needs grow is the pragmatic choice.
#CostDashboard #Monitoring #BudgetAlerts #Forecasting #Observability #AgenticAI #LearnAI #AIEngineering
CallSphere Team
Expert insights on AI voice agents and customer communication automation.
Try CallSphere AI Voice Agents
See how AI voice agents work for your industry. Live demo available -- no signup required.