Capstone: Building a Real-Time Voice AI Call Center with Analytics Dashboard
Build a production voice AI call center featuring WebRTC-based agent pools, real-time call monitoring, concurrent call handling, and a post-call analytics dashboard with sentiment and intent scoring.
Call Center Architecture
A real-time voice AI call center handles multiple simultaneous phone calls, each serviced by an AI agent with access to business tools. This capstone goes beyond a single-call booking system to build a full call center with call routing, concurrent session management, real-time supervisor monitoring, and post-call analytics.
The architecture has five layers: telephony (Twilio for inbound/outbound calls), media (WebSocket streams for audio), agent pool (concurrent AI agent instances), monitoring (real-time dashboard via Server-Sent Events), and analytics (post-call analysis with GPT-4o).
Data Model
# models.py
from sqlalchemy import Column, String, Text, Float, Integer, DateTime, ForeignKey
from sqlalchemy.dialects.postgresql import UUID, JSONB
import uuid
class CallLog(Base):
__tablename__ = "call_logs"
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
call_sid = Column(String(100), unique=True, index=True)
direction = Column(String(10)) # "inbound", "outbound"
caller_number = Column(String(20))
agent_instance_id = Column(String(100))
status = Column(String(20), default="active") # active, completed, failed
started_at = Column(DateTime, server_default="now()")
ended_at = Column(DateTime, nullable=True)
duration_seconds = Column(Integer, nullable=True)
transcript = Column(Text, nullable=True)
class CallAnalytics(Base):
__tablename__ = "call_analytics"
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
call_id = Column(UUID(as_uuid=True), ForeignKey("call_logs.id"))
sentiment_score = Column(Float) # -1.0 to 1.0
intent = Column(String(100))
resolution = Column(String(50)) # "resolved", "escalated", "dropped"
topics = Column(JSONB) # list of discussed topics
satisfaction_estimate = Column(Float)
summary = Column(Text)
analyzed_at = Column(DateTime, server_default="now()")
Concurrent Agent Pool
The agent pool manages multiple simultaneous AI agent sessions. Each inbound call gets its own agent instance with isolated conversation state.
# core/agent_pool.py
import asyncio
from dataclasses import dataclass, field
from agents import Agent, Runner
@dataclass
class AgentSession:
call_sid: str
agent: Agent
history: list = field(default_factory=list)
active: bool = True
class AgentPool:
def __init__(self, max_concurrent: int = 50):
self.max_concurrent = max_concurrent
self.sessions: dict[str, AgentSession] = {}
self._lock = asyncio.Lock()
async def create_session(self, call_sid: str) -> AgentSession:
async with self._lock:
if len(self.sessions) >= self.max_concurrent:
raise RuntimeError("Agent pool at capacity")
agent = Agent(
name=f"Call Agent ({call_sid[:8]})",
instructions=CALL_CENTER_INSTRUCTIONS,
tools=[lookup_account, check_balance, create_ticket, transfer_call],
)
session = AgentSession(call_sid=call_sid, agent=agent)
self.sessions[call_sid] = session
return session
async def process_utterance(self, call_sid: str, text: str) -> str:
session = self.sessions.get(call_sid)
if not session or not session.active:
raise ValueError(f"No active session for {call_sid}")
session.history.append({"role": "user", "content": text})
result = await Runner.run(session.agent, text)
session.history.append({"role": "assistant", "content": result.final_output})
return result.final_output
async def end_session(self, call_sid: str) -> list:
async with self._lock:
session = self.sessions.pop(call_sid, None)
if session:
session.active = False
return session.history
return []
agent_pool = AgentPool(max_concurrent=50)
Real-Time Monitoring with Server-Sent Events
Supervisors need a live view of all active calls. Use Server-Sent Events (SSE) to push real-time updates to the monitoring dashboard.
# routes/monitoring.py
from fastapi import APIRouter
from fastapi.responses import StreamingResponse
import asyncio, json
router = APIRouter()
event_queue: asyncio.Queue = asyncio.Queue()
async def publish_event(event_type: str, data: dict):
await event_queue.put({"type": event_type, "data": data})
async def event_stream():
while True:
event = await event_queue.get()
yield f"event: {event['type']}\ndata: {json.dumps(event['data'])}\n\n"
@router.get("/monitor/stream")
async def monitor_stream():
return StreamingResponse(event_stream(), media_type="text/event-stream")
@router.get("/monitor/active-calls")
async def get_active_calls():
sessions = agent_pool.sessions
return {
"active_count": len(sessions),
"capacity": agent_pool.max_concurrent,
"calls": [
{
"call_sid": sid,
"turn_count": len(s.history),
"active": s.active,
}
for sid, s in sessions.items()
],
}
Emit events at key moments in the call lifecycle.
See AI Voice Agents Handle Real Calls
Book a free demo or calculate how much you can save with AI voice automation.
# In the WebSocket handler:
async def handle_call_start(call_sid: str, caller: str):
session = await agent_pool.create_session(call_sid)
await publish_event("call_started", {
"call_sid": call_sid, "caller": caller, "timestamp": utcnow_iso()
})
async def handle_utterance(call_sid: str, text: str):
response = await agent_pool.process_utterance(call_sid, text)
await publish_event("utterance", {
"call_sid": call_sid, "user": text, "agent": response
})
return response
async def handle_call_end(call_sid: str):
history = await agent_pool.end_session(call_sid)
await publish_event("call_ended", {"call_sid": call_sid})
# Trigger async post-call analysis
asyncio.create_task(analyze_call(call_sid, history))
Post-Call Analytics with GPT-4o
After each call ends, analyze the transcript to extract sentiment, intent, resolution status, and a summary.
# services/post_call_analysis.py
import openai, json
async def analyze_call(call_sid: str, history: list):
transcript = "\n".join(
[f"{m['role'].upper()}: {m['content']}" for m in history]
)
response = openai.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": """Analyze this call transcript.
Return JSON with: sentiment_score (-1 to 1), intent (string),
resolution (resolved/escalated/dropped), topics (list of strings),
satisfaction_estimate (0 to 1), summary (2 sentences)."""},
{"role": "user", "content": transcript},
],
response_format={"type": "json_object"},
)
analysis = json.loads(response.choices[0].message.content)
call = db.query(CallLog).filter(CallLog.call_sid == call_sid).first()
call.transcript = transcript
call.status = "completed"
analytics = CallAnalytics(
call_id=call.id,
sentiment_score=analysis["sentiment_score"],
intent=analysis["intent"],
resolution=analysis["resolution"],
topics=analysis["topics"],
satisfaction_estimate=analysis["satisfaction_estimate"],
summary=analysis["summary"],
)
db.add(analytics)
db.commit()
Analytics Dashboard API
# routes/analytics.py
@router.get("/analytics/overview")
async def analytics_overview(days: int = 7, db=Depends(get_db)):
since = datetime.utcnow() - timedelta(days=days)
calls = db.query(CallLog).filter(CallLog.started_at >= since).all()
analytics = db.query(CallAnalytics).join(CallLog).filter(
CallLog.started_at >= since
).all()
return {
"total_calls": len(calls),
"avg_duration": sum(c.duration_seconds or 0 for c in calls) / max(len(calls), 1),
"avg_sentiment": sum(a.sentiment_score for a in analytics) / max(len(analytics), 1),
"resolution_rates": {
"resolved": sum(1 for a in analytics if a.resolution == "resolved"),
"escalated": sum(1 for a in analytics if a.resolution == "escalated"),
"dropped": sum(1 for a in analytics if a.resolution == "dropped"),
},
}
FAQ
How do I handle call spikes beyond the agent pool capacity?
Implement a queue system with estimated wait times. When the pool is at capacity, new callers hear a hold message with their position in the queue. Use a priority queue so returning callers or VIP numbers get faster service. Monitor queue depth as a key metric for scaling decisions.
How do I ensure call audio quality over WebSocket?
Use Twilio's mulaw encoding at 8kHz for telephony-grade audio. For the WebSocket connection, ensure your server is geographically close to Twilio's media servers. Monitor WebSocket latency and implement audio buffering to smooth out network jitter.
How accurate is the post-call sentiment analysis?
GPT-4o achieves approximately 85-90% agreement with human raters on sentiment scoring for call transcripts. For critical decisions like customer churn prediction, combine the AI sentiment score with structured signals like resolution status and call duration. Periodically sample calls for human review to calibrate the model.
#CapstoneProject #VoiceAI #CallCenter #WebRTC #RealTimeAnalytics #FullStackAI #AgenticAI #LearnAI #AIEngineering
CallSphere Team
Expert insights on AI voice agents and customer communication automation.
Try CallSphere AI Voice Agents
See how AI voice agents work for your industry. Live demo available -- no signup required.