Multi-Agent Workflow
This example demonstrates a multi-agent workflow with intelligent model routing—using cheaper models for simple tasks and more capable models for complex reasoning.
Architecture
┌─────────────────────────────────────────────────────────────────┐│ MULTI-AGENT WORKFLOW │├─────────────────────────────────────────────────────────────────┤│ ││ User Query ││ │ ││ ▼ ││ ┌─────────────────────┐ ││ │ Intent Classifier │ ◄── GPT-4o-mini (fast, cheap) ││ │ (Low Complexity) │ ││ └──────────┬──────────┘ ││ │ ││ ▼ ││ ┌─────────────────────┐ ││ │ Router │ ││ └──────────┬──────────┘ ││ │ ││ ┌─────────┼─────────┐ ││ │ │ │ ││ ▼ ▼ ▼ ││ Simple Moderate Complex ││ │ │ │ ││ ▼ ▼ ▼ ││ GPT-3.5 GPT-4o-mini GPT-4o ││ │└─────────────────────────────────────────────────────────────────┘Implementation
src/agent.ts
import 'dotenv/config';import { LadgerTracer } from '@ladger/sdk';import OpenAI from 'openai';
const tracer = new LadgerTracer({ apiKey: process.env.LADGER_API_KEY!, flowName: 'multi-agent-workflow', debug: true,});
const openai = new OpenAI();
// Pricing for cost calculationconst PRICING = { 'gpt-4o': { input: 0.005 / 1000, output: 0.015 / 1000 }, 'gpt-4o-mini': { input: 0.00015 / 1000, output: 0.0006 / 1000 }, 'gpt-3.5-turbo': { input: 0.0005 / 1000, output: 0.0015 / 1000 },};
function getCost(model: string, input: number, output: number): number { const p = PRICING[model as keyof typeof PRICING] || PRICING['gpt-4o']; return input * p.input + output * p.output;}
type Intent = 'question' | 'request' | 'greeting' | 'complaint' | 'unknown';type Complexity = 'simple' | 'moderate' | 'complex';
/** * Step 1: Classify user intent * Uses a fast, cheap model for quick classification */async function classifyIntent( message: string, parentSpan: any): Promise<{ intent: Intent; complexity: Complexity }> { return tracer.trace('classify-intent', async (span) => { const completion = await openai.chat.completions.create({ model: 'gpt-4o-mini', messages: [ { role: 'system', content: `Classify the user message. Return JSON:{ "intent": "question" | "request" | "greeting" | "complaint" | "unknown", "complexity": "simple" | "moderate" | "complex"}
Simple: factual, short answerModerate: requires explanationComplex: multi-step reasoning, analysis`, }, { role: 'user', content: message }, ], response_format: { type: 'json_object' }, max_tokens: 50, });
const usage = completion.usage!; span.recordCost({ provider: 'openai', model: 'gpt-4o-mini', inputTokens: usage.prompt_tokens, outputTokens: usage.completion_tokens, costUsd: getCost('gpt-4o-mini', usage.prompt_tokens, usage.completion_tokens), });
const result = JSON.parse(completion.choices[0].message.content || '{}');
span.setAttributes({ 'classification.intent': result.intent, 'classification.complexity': result.complexity, });
return { intent: result.intent || 'unknown', complexity: result.complexity || 'moderate', }; }, { parent: parentSpan });}
/** * Step 2: Route to appropriate model based on complexity */function selectModel(complexity: Complexity): string { switch (complexity) { case 'simple': return 'gpt-3.5-turbo'; case 'moderate': return 'gpt-4o-mini'; case 'complex': return 'gpt-4o'; }}
/** * Step 3: Generate response with selected model */async function generateResponse( message: string, intent: Intent, model: string, parentSpan: any): Promise<string> { return tracer.trace('generate-response', async (span) => { span.setAttributes({ 'response.model': model, 'response.intent': intent, });
const systemPrompt = getSystemPrompt(intent);
const completion = await openai.chat.completions.create({ model, messages: [ { role: 'system', content: systemPrompt }, { role: 'user', content: message }, ], });
const usage = completion.usage!; span.recordCost({ provider: 'openai', model, inputTokens: usage.prompt_tokens, outputTokens: usage.completion_tokens, costUsd: getCost(model, usage.prompt_tokens, usage.completion_tokens), });
return completion.choices[0].message.content || ''; }, { parent: parentSpan });}
function getSystemPrompt(intent: Intent): string { const prompts: Record<Intent, string> = { question: 'You are a knowledgeable assistant. Answer questions clearly and accurately.', request: 'You are a helpful assistant. Help the user accomplish their request.', greeting: 'You are a friendly assistant. Respond warmly to greetings.', complaint: 'You are an empathetic support agent. Address concerns professionally.', unknown: 'You are a helpful assistant. Respond appropriately to the user.', }; return prompts[intent];}
/** * Main agent workflow */export async function handleMessage(message: string): Promise<{ response: string; metadata: { intent: Intent; complexity: Complexity; model: string; };}> { // New session for each conversation turn tracer.newSession();
return tracer.trace('agent-workflow', async (rootSpan) => { rootSpan.setAttributes({ 'input.length': message.length, });
// Step 1: Classify const { intent, complexity } = await classifyIntent(message, rootSpan);
// Step 2: Route const model = selectModel(complexity); rootSpan.setAttributes({ 'routing.intent': intent, 'routing.complexity': complexity, 'routing.model': model, });
// Step 3: Generate const response = await generateResponse(message, intent, model, rootSpan);
return { response, metadata: { intent, complexity, model }, }; });}
// CLI interfaceasync function main() { const testMessages = [ 'Hi there!', // Simple greeting 'What is 2 + 2?', // Simple question 'How does photosynthesis work?', // Moderate question 'Explain quantum entanglement and its implications for computing', // Complex ];
console.log('Testing Multi-Agent Workflow\n'); console.log('='.repeat(60));
for (const message of testMessages) { console.log(`\n📝 Input: "${message}"`);
const result = await handleMessage(message);
console.log(`🏷️ Intent: ${result.metadata.intent}`); console.log(`📊 Complexity: ${result.metadata.complexity}`); console.log(`🤖 Model: ${result.metadata.model}`); console.log(`💬 Response: ${result.response.slice(0, 100)}...`); console.log('-'.repeat(60)); }
await tracer.shutdown();}
main().catch(console.error);Express API Version
import express from 'express';import { handleMessage } from './agent';
const app = express();app.use(express.json());
app.post('/agent', async (req, res) => { const { message } = req.body;
if (!message) { return res.status(400).json({ error: 'Message is required' }); }
try { const result = await handleMessage(message); res.json(result); } catch (error) { console.error('Agent error:', error); res.status(500).json({ error: 'Failed to process message' }); }});
app.listen(3000, () => { console.log('Agent API running on http://localhost:3000');});Cost Comparison
Without intelligent routing (always using GPT-4o):
| Query Type | Count | Cost/Query | Total |
|---|---|---|---|
| Simple | 5,000 | $0.025 | $125 |
| Moderate | 3,000 | $0.025 | $75 |
| Complex | 2,000 | $0.025 | $50 |
| Total | 10,000 | $250 |
With intelligent routing:
| Query Type | Model | Count | Cost/Query | Total |
|---|---|---|---|---|
| Simple | GPT-3.5 | 5,000 | $0.002 | $10 |
| Moderate | GPT-4o-mini | 3,000 | $0.003 | $9 |
| Complex | GPT-4o | 2,000 | $0.025 | $50 |
| Total | 10,000 | $69 |
Savings: $181/month (72% reduction)
Dashboard View
After running the workflow, the Ladger dashboard shows:
FLOW: multi-agent-workflow
├── agent-workflow (root)│ ├── classify-intent│ │ └── Model: gpt-4o-mini│ │ Cost: $0.0002│ ││ └── generate-response│ └── Model: [varies by complexity]│ Cost: $0.002 - $0.025
COST BREAKDOWN:• classify-intent: 8% of total• generate-response: 92% of total
MODEL DISTRIBUTION:• gpt-3.5-turbo: 50% of calls• gpt-4o-mini: 30% of calls• gpt-4o: 20% of callsAdvanced: Adding Memory
interface ConversationState { messages: { role: 'user' | 'assistant'; content: string }[]; intent: Intent; complexity: Complexity;}
const conversations = new Map<string, ConversationState>();
async function handleConversation( conversationId: string, message: string): Promise<string> { // Get or create state let state = conversations.get(conversationId); if (!state) { // New conversation - classify first message const { intent, complexity } = await classifyIntent(message, null); state = { messages: [], intent, complexity }; conversations.set(conversationId, state); }
// Add user message state.messages.push({ role: 'user', content: message });
// Generate response with context const model = selectModel(state.complexity); const response = await tracer.trace('conversation-turn', async (span) => { const completion = await openai.chat.completions.create({ model, messages: [ { role: 'system', content: getSystemPrompt(state!.intent) }, ...state!.messages, ], });
span.setAttributes({ 'conversation.id': conversationId, 'conversation.length': state!.messages.length, });
span.recordCost({ provider: 'openai', model, inputTokens: completion.usage?.prompt_tokens, outputTokens: completion.usage?.completion_tokens, });
return completion.choices[0].message.content || ''; });
// Add assistant response state.messages.push({ role: 'assistant', content: response });
return response;}Next Steps
- Learn about Task Classification in detail
- Explore Optimization Recommendations
- Test changes with Simulations