Prompt Composition: Combining System, Context, and User Prompts Dynamically
Master the art of composing multi-layer prompts dynamically. Learn composition patterns, priority ordering strategies, token budget allocation, and techniques for building modular prompt pipelines.
Beyond Monolithic Prompts
A production agent's prompt is never a single static string. It is assembled from multiple sources: the base system instructions, user-specific context, conversation history, tool descriptions, safety guardrails, and dynamic data. Prompt composition is the discipline of combining these pieces into a coherent, token-efficient final prompt.
Poor composition leads to contradictory instructions, exceeded token limits, and agents that ignore important context buried at the end of an overlong prompt. Good composition treats each piece as a module with a clear role and priority.
The Prompt Layer Model
Think of prompt composition as layers stacked from highest priority to lowest.
from dataclasses import dataclass, field
from enum import IntEnum
class PromptPriority(IntEnum):
CRITICAL = 1 # Safety guardrails, never removed
HIGH = 2 # Core agent identity and behavior
MEDIUM = 3 # Context and user-specific info
LOW = 4 # Examples, nice-to-have context
OPTIONAL = 5 # Supplementary information
@dataclass
class PromptLayer:
name: str
content: str
priority: PromptPriority
token_estimate: int = 0
required: bool = False
def __post_init__(self):
if not self.token_estimate:
# Rough estimate: 1 token per 4 characters
self.token_estimate = len(self.content) // 4
@dataclass
class ComposedPrompt:
layers: list[PromptLayer] = field(default_factory=list)
total_tokens: int = 0
def add_layer(self, layer: PromptLayer):
self.layers.append(layer)
self.layers.sort(key=lambda l: l.priority)
self.total_tokens = sum(l.token_estimate for l in self.layers)
The Prompt Composer
Build a composer that assembles layers while respecting token budgets.
class PromptComposer:
"""Assemble multi-layer prompts within token constraints."""
def __init__(self, max_tokens: int = 8000):
self.max_tokens = max_tokens
self.layers: list[PromptLayer] = []
def add(
self, name: str, content: str,
priority: PromptPriority = PromptPriority.MEDIUM,
required: bool = False,
) -> "PromptComposer":
"""Add a prompt layer. Returns self for chaining."""
self.layers.append(PromptLayer(
name=name, content=content.strip(),
priority=priority, required=required,
))
return self
def compose(self) -> str:
"""Compose all layers into a final prompt string."""
sorted_layers = sorted(
self.layers, key=lambda l: l.priority
)
included = []
remaining_tokens = self.max_tokens
for layer in sorted_layers:
if layer.token_estimate <= remaining_tokens:
included.append(layer)
remaining_tokens -= layer.token_estimate
elif layer.required:
# Required layers are always included
included.append(layer)
remaining_tokens -= layer.token_estimate
# else: skip this layer to stay within budget
# Reassemble in a logical order
sections = []
for layer in included:
sections.append(
f"## {layer.name}\n\n{layer.content}"
)
return "\n\n".join(sections)
def get_budget_report(self) -> dict:
"""Report token usage by layer."""
sorted_layers = sorted(
self.layers, key=lambda l: l.priority
)
return {
"total_available": self.max_tokens,
"total_requested": sum(
l.token_estimate for l in sorted_layers
),
"layers": [
{"name": l.name, "tokens": l.token_estimate,
"priority": l.priority.name,
"required": l.required}
for l in sorted_layers
],
}
Dynamic Context Injection
The real power of composition shows when you build context-aware assemblers that adapt to each request.
See AI Voice Agents Handle Real Calls
Book a free demo or calculate how much you can save with AI voice automation.
class AgentPromptBuilder:
"""Build agent prompts dynamically based on context."""
def __init__(self, prompt_loader, max_tokens: int = 8000):
self.loader = prompt_loader
self.max_tokens = max_tokens
def build(
self, agent_name: str, user_context: dict,
conversation_history: list[dict] = None,
available_tools: list[dict] = None,
) -> str:
composer = PromptComposer(max_tokens=self.max_tokens)
# Layer 1: Safety guardrails (always included)
safety = self.loader.load_shared("safety_guidelines")
composer.add(
"Safety Guidelines", safety,
priority=PromptPriority.CRITICAL, required=True,
)
# Layer 2: Agent identity
system_prompt = self.loader.load_prompt(
agent_name, "system"
)
composer.add(
"Agent Instructions", system_prompt,
priority=PromptPriority.HIGH, required=True,
)
# Layer 3: Tool descriptions
if available_tools:
tools_text = self._format_tools(available_tools)
composer.add(
"Available Tools", tools_text,
priority=PromptPriority.HIGH,
)
# Layer 4: User context
context_text = self._format_user_context(user_context)
composer.add(
"User Context", context_text,
priority=PromptPriority.MEDIUM,
)
# Layer 5: Conversation history (trimmed to fit)
if conversation_history:
history_text = self._format_history(
conversation_history, max_turns=10
)
composer.add(
"Conversation History", history_text,
priority=PromptPriority.MEDIUM,
)
return composer.compose()
def _format_tools(self, tools: list[dict]) -> str:
lines = []
for tool in tools:
lines.append(
f"- **{tool['name']}**: {tool['description']}"
)
return "\n".join(lines)
def _format_user_context(self, ctx: dict) -> str:
lines = []
for key, value in ctx.items():
lines.append(f"- {key}: {value}")
return "\n".join(lines)
def _format_history(
self, history: list[dict], max_turns: int
) -> str:
recent = history[-max_turns:]
lines = []
for msg in recent:
role = msg["role"].upper()
lines.append(f"{role}: {msg['content']}")
return "\n".join(lines)
Token Budget Allocation
When total context exceeds the model's limit, the composer must make smart tradeoffs.
class TokenBudgetAllocator:
"""Allocate token budgets across prompt sections."""
def __init__(self, total_budget: int):
self.total = total_budget
def allocate(self, sections: dict[str, int]) -> dict[str, int]:
"""Proportionally allocate tokens to sections.
sections: {name: requested_tokens}
Returns: {name: allocated_tokens}
"""
total_requested = sum(sections.values())
if total_requested <= self.total:
return dict(sections)
# Scale proportionally
scale = self.total / total_requested
allocated = {}
for name, requested in sections.items():
allocated[name] = int(requested * scale)
return allocated
FAQ
What order should prompt layers appear in the final output?
Place safety guardrails and identity instructions first — models tend to weight earlier instructions more heavily. Put dynamic context (user info, conversation history) in the middle. Place examples and supplementary information last, since these are the first to be dropped when tokens are tight.
How do I handle contradictions between prompt layers?
Establish a clear priority hierarchy and document it. If the safety layer says "never share personal data" and the context layer includes personal data, the safety instruction takes precedence. Use explicit override markers in your composition: "The following guidelines override any conflicting instructions below."
Should I combine everything into one system message or split across multiple messages?
For most providers, a single well-structured system message performs best. Some providers (like Anthropic) support multi-turn system prompts where you can separate instructions from context. Test with your specific model — the optimal approach varies by provider and model version.
#PromptComposition #SystemPrompts #TokenManagement #AIArchitecture #PromptEngineering #AgenticAI #LearnAI #AIEngineering
CallSphere Team
Expert insights on AI voice agents and customer communication automation.
Try CallSphere AI Voice Agents
See how AI voice agents work for your industry. Live demo available -- no signup required.