Conversation Summarization: Generating Concise Summaries of Long Agent Interactions
Build conversation summarization systems that generate concise, actionable summaries of long AI agent interactions with key point extraction, decision tracking, and follow-up items.
Why Summarize Conversations?
Long conversations with AI agents accumulate context that becomes unwieldy. A 30-message support interaction buries the actual decisions and next steps under layers of troubleshooting dialog. Conversation summarization extracts the essential information — what was discussed, what was decided, what actions remain — and presents it in a form that humans and other agents can use efficiently.
Summaries serve multiple purposes: handoff context when transferring to a human agent, session continuity when a user returns later, audit trails for compliance, and analytics data for improving agent performance.
Modeling Conversation Turns
Start by structuring raw conversation data into a form suitable for summarization.
from dataclasses import dataclass, field
from datetime import datetime
from enum import Enum
from typing import Optional
class TurnType(Enum):
GREETING = "greeting"
QUESTION = "question"
ANSWER = "answer"
ACTION = "action"
DECISION = "decision"
COMPLAINT = "complaint"
RESOLUTION = "resolution"
SMALL_TALK = "small_talk"
@dataclass
class ConversationTurn:
speaker: str # "user" or "agent"
content: str
timestamp: datetime
turn_type: TurnType = TurnType.ANSWER
importance: float = 0.5 # 0.0-1.0
entities: dict = field(default_factory=dict)
is_key_point: bool = False
class TurnClassifier:
def __init__(self):
self.type_indicators = {
TurnType.QUESTION: ["?", "how", "what", "when", "can you"],
TurnType.COMPLAINT: [
"problem", "issue", "broken", "wrong", "not working",
],
TurnType.DECISION: [
"let's go with", "i'll take", "yes proceed",
"confirmed", "agreed",
],
TurnType.ACTION: [
"i've initiated", "done", "completed",
"processed", "updated", "created",
],
TurnType.RESOLUTION: [
"resolved", "fixed", "that works", "thank you",
"all set", "that solves",
],
TurnType.GREETING: [
"hello", "hi ", "hey", "good morning", "good afternoon",
],
}
self.high_importance_types = {
TurnType.DECISION, TurnType.ACTION,
TurnType.RESOLUTION, TurnType.COMPLAINT,
}
def classify(self, turn: ConversationTurn) -> ConversationTurn:
content_lower = turn.content.lower()
best_type = TurnType.ANSWER
best_score = 0
for turn_type, indicators in self.type_indicators.items():
hits = sum(1 for ind in indicators if ind in content_lower)
if hits > best_score:
best_score = hits
best_type = turn_type
turn.turn_type = best_type
turn.importance = (
0.8 if best_type in self.high_importance_types else 0.4
)
turn.is_key_point = turn.importance >= 0.7
return turn
Key Point Extraction
Not every turn matters for the summary. Extract key points — decisions, actions, complaints, and resolutions — while filtering noise.
@dataclass
class KeyPoint:
content: str
category: str
timestamp: datetime
speaker: str
class KeyPointExtractor:
def __init__(self, importance_threshold: float = 0.6):
self.threshold = importance_threshold
self.classifier = TurnClassifier()
def extract(
self, turns: list[ConversationTurn]
) -> list[KeyPoint]:
classified = [self.classifier.classify(t) for t in turns]
key_points = []
for turn in classified:
if turn.importance < self.threshold:
continue
# Skip near-duplicate key points
if key_points and self._is_redundant(
turn.content, key_points[-1].content
):
continue
key_points.append(KeyPoint(
content=self._clean_content(turn.content),
category=turn.turn_type.value,
timestamp=turn.timestamp,
speaker=turn.speaker,
))
return key_points
def _is_redundant(self, new: str, existing: str) -> bool:
new_words = set(new.lower().split())
existing_words = set(existing.lower().split())
if not new_words or not existing_words:
return False
overlap = len(new_words & existing_words)
return overlap / len(new_words) > 0.7
def _clean_content(self, content: str) -> str:
# Remove filler phrases
fillers = [
"um ", "uh ", "well ", "so basically ",
"i mean ", "you know ",
]
result = content
for filler in fillers:
result = result.replace(filler, "")
return result.strip()
The Summarization Engine
Combine key points into structured, actionable summaries with distinct sections.
See AI Voice Agents Handle Real Calls
Book a free demo or calculate how much you can save with AI voice automation.
@dataclass
class ConversationSummary:
topic: str
duration_minutes: float
total_turns: int
key_points: list[KeyPoint]
decisions: list[str]
actions_taken: list[str]
pending_items: list[str]
outcome: str
formatted: str = ""
class SummarizationEngine:
def __init__(self):
self.extractor = KeyPointExtractor()
def summarize(
self, turns: list[ConversationTurn], topic: str = "Support Interaction"
) -> ConversationSummary:
if not turns:
return ConversationSummary(
topic=topic, duration_minutes=0,
total_turns=0, key_points=[],
decisions=[], actions_taken=[],
pending_items=[], outcome="No conversation data.",
)
key_points = self.extractor.extract(turns)
duration = (
turns[-1].timestamp - turns[0].timestamp
).total_seconds() / 60
decisions = [
kp.content for kp in key_points
if kp.category == "decision"
]
actions = [
kp.content for kp in key_points
if kp.category == "action"
]
complaints = [
kp.content for kp in key_points
if kp.category == "complaint"
]
outcome = self._determine_outcome(key_points)
pending = self._find_pending_items(turns, actions)
summary = ConversationSummary(
topic=topic,
duration_minutes=round(duration, 1),
total_turns=len(turns),
key_points=key_points,
decisions=decisions,
actions_taken=actions,
pending_items=pending,
outcome=outcome,
)
summary.formatted = self._format(summary, complaints)
return summary
def _determine_outcome(self, key_points: list[KeyPoint]) -> str:
has_resolution = any(
kp.category == "resolution" for kp in key_points
)
has_complaint = any(
kp.category == "complaint" for kp in key_points
)
if has_resolution:
return "Resolved"
if has_complaint:
return "Unresolved - requires follow-up"
return "Completed"
def _find_pending_items(
self, turns: list[ConversationTurn], completed_actions: list[str]
) -> list[str]:
pending = []
for turn in turns:
lower = turn.content.lower()
if any(
phrase in lower
for phrase in ["will follow up", "i'll check", "get back to",
"pending", "waiting for"]
):
pending.append(turn.content)
return pending
def _format(
self, summary: ConversationSummary, complaints: list[str]
) -> str:
lines = [
f"## {summary.topic}",
f"Duration: {summary.duration_minutes} min | "
f"Turns: {summary.total_turns} | "
f"Outcome: {summary.outcome}",
"",
]
if complaints:
lines.append("### Issues Reported")
for c in complaints:
lines.append(f"- {c}")
lines.append("")
if summary.decisions:
lines.append("### Decisions Made")
for d in summary.decisions:
lines.append(f"- {d}")
lines.append("")
if summary.actions_taken:
lines.append("### Actions Taken")
for a in summary.actions_taken:
lines.append(f"- {a}")
lines.append("")
if summary.pending_items:
lines.append("### Pending Follow-Up")
for p in summary.pending_items:
lines.append(f"- {p}")
return "\n".join(lines)
Using the Engine
from datetime import datetime, timedelta
base = datetime(2026, 3, 17, 10, 0)
turns = [
ConversationTurn("user", "Hi, I have a billing problem",
base, TurnType.COMPLAINT),
ConversationTurn("agent", "I'm sorry to hear that. What's the issue?",
base + timedelta(seconds=15)),
ConversationTurn("user", "I was charged twice for order ORD-9921",
base + timedelta(seconds=45), TurnType.COMPLAINT),
ConversationTurn("agent", "I've found the duplicate charge and "
"processed a refund of $49.99.",
base + timedelta(minutes=2), TurnType.ACTION),
ConversationTurn("user", "Yes proceed with the refund, confirmed.",
base + timedelta(minutes=3), TurnType.DECISION),
ConversationTurn("agent", "Refund completed. It will appear in "
"3-5 business days.",
base + timedelta(minutes=4), TurnType.RESOLUTION),
ConversationTurn("user", "Thank you, that solves my issue.",
base + timedelta(minutes=5), TurnType.RESOLUTION),
]
engine = SummarizationEngine()
summary = engine.summarize(turns, topic="Billing: Duplicate Charge")
print(summary.formatted)
This produces a clean summary with issues, decisions, actions, and outcome — ready for agent handoff or session records.
FAQ
When should summarization be triggered?
Trigger summarization at three points: at conversation end for archival and analytics, at agent handoff so the receiving agent has full context, and at session timeout so returning users can review what happened. For long conversations (over 20 turns), also generate running summaries every 10 turns to keep the active context window manageable.
How do you handle multi-topic conversations in a single summary?
Detect topic shifts using intent classification and segment the conversation into topic blocks before summarizing. Generate a per-topic summary and a brief overall summary. This prevents important details from one topic being buried by the volume of another. Use headings in the formatted output to visually separate topics.
What makes a summary actionable versus just informative?
An actionable summary includes three elements: what happened (key points), what was decided (decisions), and what still needs to happen (pending items with owners and deadlines where available). Summaries that only list what was discussed without extracting decisions and next steps force the reader to re-read the full conversation anyway, defeating the purpose.
#Summarization #ConversationAnalytics #NLP #AgentMemory #Python #AgenticAI #LearnAI #AIEngineering
CallSphere Team
Expert insights on AI voice agents and customer communication automation.
Try CallSphere AI Voice Agents
See how AI voice agents work for your industry. Live demo available -- no signup required.