Output Parsers - Structuring LLM Responses β
Transform unstructured LLM outputs into reliable, typed data structures for robust application development
π― Understanding Output Parsers β
Output parsers are crucial components that convert raw LLM text responses into structured, usable data formats. They bridge the gap between natural language output and programmatic data handling.
π The Output Processing Flow β
text
π OUTPUT PARSING PIPELINE π
(From text to structured data)
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
β RAW LLM OUTPUT β
β (Unstructured text) β
β β
β "The weather is sunny with temperature 75Β°F. Rain chance is β
β 20%. Wind speed is 5 mph from the northwest. UV index is 7." β
βββββββββββββββββββββββ¬ββββββββββββββββββββββββββββββββββββββββββββ
β
βΌ OUTPUT PARSER
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
β STRUCTURED OUTPUT β
β (Typed data objects) β
β β
β { β
β "condition": "sunny", β
β "temperature": 75, β
β "rain_chance": 20, β
β "wind_speed": 5, β
β "wind_direction": "northwest", β
β "uv_index": 7 β
β } β
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββπ§ Core Output Parser Types β
π String Output Parser β
python
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
# Basic string output parser - simplest form
str_parser = StrOutputParser()
# Simple chain with string parsing
basic_chain = (
ChatPromptTemplate.from_template("Write a haiku about {topic}")
| ChatOpenAI()
| StrOutputParser()
)
result = basic_chain.invoke({"topic": "programming"})
print(f"Haiku: {result}")
print(f"Type: {type(result)}") # <class 'str'>
# Manual equivalent of what StrOutputParser does
def manual_string_parser(ai_message):
"""What StrOutputParser does behind the scenes"""
if hasattr(ai_message, 'content'):
return ai_message.content
return str(ai_message)
# Using manual parser
manual_result = manual_string_parser(ChatOpenAI().invoke("Hello"))
print(f"Manual result: {manual_result}")ποΈ Pydantic Output Parser β
python
from langchain_core.output_parsers import PydanticOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field
from typing import List
# Define data models
class WeatherReport(BaseModel):
condition: str = Field(description="Weather condition (sunny, cloudy, rainy, etc.)")
temperature: int = Field(description="Temperature in Fahrenheit")
humidity: int = Field(description="Humidity percentage")
wind_speed: int = Field(description="Wind speed in mph")
class Config:
schema_extra = {
"example": {
"condition": "sunny",
"temperature": 75,
"humidity": 45,
"wind_speed": 10
}
}
class PersonProfile(BaseModel):
name: str = Field(description="Person's full name")
age: int = Field(description="Person's age")
occupation: str = Field(description="Person's job or profession")
skills: List[str] = Field(description="List of skills or expertise areas")
bio: str = Field(description="Brief biography or description")
# Create parsers
weather_parser = PydanticOutputParser(pydantic_object=WeatherReport)
person_parser = PydanticOutputParser(pydantic_object=PersonProfile)
# Weather analysis chain
weather_chain = (
ChatPromptTemplate.from_template(
"Analyze this weather description and extract structured data: {weather_text}\n\n"
"{format_instructions}"
).partial(format_instructions=weather_parser.get_format_instructions())
| ChatOpenAI()
| weather_parser
)
# Person profile chain
profile_chain = (
ChatPromptTemplate.from_template(
"Create a person profile based on this description: {description}\n\n"
"{format_instructions}"
).partial(format_instructions=person_parser.get_format_instructions())
| ChatOpenAI()
| person_parser
)
# Test the chains
weather_data = weather_chain.invoke({
"weather_text": "It's a beautiful sunny day with 78 degrees, light breeze at 8 mph, and humidity around 40%"
})
person_data = profile_chain.invoke({
"description": "John is a 32-year-old software engineer who specializes in Python and machine learning. He has 8 years of experience building AI applications."
})
print(f"Weather: {weather_data}")
print(f"Type: {type(weather_data)}") # <class 'WeatherReport'>
print(f"Temperature: {weather_data.temperature}")
print(f"Person: {person_data}")
print(f"Skills: {person_data.skills}")π JSON Output Parser β
python
from langchain_core.output_parsers import JsonOutputParser
import json
# Simple JSON parser without schema
json_parser = JsonOutputParser()
# Create a chain that outputs JSON
json_chain = (
ChatPromptTemplate.from_template(
"""Analyze the following text and return a JSON object with sentiment analysis:
Text: {text}
Return JSON with these fields:
- sentiment: positive/negative/neutral
- confidence: 0.0 to 1.0
- key_phrases: array of important phrases
- emotions: array of detected emotions
Ensure your response is valid JSON only."""
)
| ChatOpenAI()
| json_parser
)
# Test JSON parsing
sentiment_result = json_chain.invoke({
"text": "I absolutely love this new feature! It makes my work so much easier and faster."
})
print(f"Sentiment analysis: {sentiment_result}")
print(f"Type: {type(sentiment_result)}") # <class 'dict'>
print(f"Confidence: {sentiment_result.get('confidence')}")
# JSON with schema validation
class AnalysisResult(BaseModel):
sentiment: str = Field(description="Sentiment: positive, negative, or neutral")
confidence: float = Field(description="Confidence score between 0 and 1")
key_phrases: List[str] = Field(description="Important phrases from the text")
emotions: List[str] = Field(description="Detected emotions")
# Pydantic parser with JSON output (more robust)
structured_json_parser = PydanticOutputParser(pydantic_object=AnalysisResult)
structured_chain = (
ChatPromptTemplate.from_template(
"Analyze this text for sentiment: {text}\n\n{format_instructions}"
).partial(format_instructions=structured_json_parser.get_format_instructions())
| ChatOpenAI()
| structured_json_parser
)
structured_result = structured_chain.invoke({
"text": "The movie was disappointing. Poor acting and confusing plot."
})
print(f"Structured result: {structured_result}")π List Output Parser β
python
from langchain_core.output_parsers import CommaSeparatedListOutputParser
# Comma-separated list parser
list_parser = CommaSeparatedListOutputParser()
# Chain for generating lists
list_chain = (
ChatPromptTemplate.from_template(
"List {count} {category} separated by commas. Only return the comma-separated list.\n\n"
"{format_instructions}"
).partial(format_instructions=list_parser.get_format_instructions())
| ChatOpenAI()
| list_parser
)
# Generate different types of lists
programming_languages = list_chain.invoke({
"count": "5",
"category": "popular programming languages"
})
fruits = list_chain.invoke({
"count": "7",
"category": "tropical fruits"
})
print(f"Programming languages: {programming_languages}")
print(f"Type: {type(programming_languages)}") # <class 'list'>
print(f"First language: {programming_languages[0]}")
print(f"Fruits: {fruits}")
# Custom list parser for specific formats
class NumberedListOutputParser:
"""Custom parser for numbered lists"""
def parse(self, text: str) -> List[str]:
lines = text.strip().split('\n')
items = []
for line in lines:
# Remove numbering (1., 2., etc.) and clean up
clean_line = line.strip()
if clean_line:
# Remove common list prefixes
for prefix in ['1.', '2.', '3.', '4.', '5.', '-', '*', 'β’']:
if clean_line.startswith(prefix):
clean_line = clean_line[len(prefix):].strip()
break
if clean_line:
items.append(clean_line)
return items
numbered_parser = NumberedListOutputParser()
numbered_chain = (
ChatPromptTemplate.from_template(
"Create a numbered list of {count} {category}. Use format: 1. item, 2. item, etc."
)
| ChatOpenAI()
| numbered_parser.parse
)
# This would need to be wrapped properly in LCEL, but shows the conceptπ¨ Advanced Output Parsing Patterns β
π Multi-Format Output Parser β
python
from enum import Enum
from typing import Union
class OutputFormat(Enum):
JSON = "json"
PYDANTIC = "pydantic"
LIST = "list"
STRING = "string"
class MultiFormatParser:
"""Parser that can handle multiple output formats dynamically"""
def __init__(self):
self.parsers = {
OutputFormat.JSON: JsonOutputParser(),
OutputFormat.LIST: CommaSeparatedListOutputParser(),
OutputFormat.STRING: StrOutputParser()
}
def parse(self, text: str, format_type: OutputFormat):
parser = self.parsers.get(format_type, self.parsers[OutputFormat.STRING])
return parser.parse(text)
def create_chain(self, format_type: OutputFormat):
"""Create a chain with the specified output format"""
parser = self.parsers.get(format_type, self.parsers[OutputFormat.STRING])
format_instructions = {
OutputFormat.JSON: "Return your response as a valid JSON object.",
OutputFormat.LIST: "Return your response as a comma-separated list.",
OutputFormat.STRING: "Return your response as plain text."
}
return (
ChatPromptTemplate.from_template(
"Answer the following question: {question}\n\n"
f"{format_instructions[format_type]}"
)
| ChatOpenAI()
| parser
)
# Usage example
multi_parser = MultiFormatParser()
# Create chains for different formats
json_chain = multi_parser.create_chain(OutputFormat.JSON)
list_chain = multi_parser.create_chain(OutputFormat.LIST)
string_chain = multi_parser.create_chain(OutputFormat.STRING)
# Test different formats
question = "What are the main benefits of using Python for data science?"
json_result = json_chain.invoke({"question": question})
list_result = list_chain.invoke({"question": question})
string_result = string_chain.invoke({"question": question})
print(f"JSON format: {json_result}")
print(f"List format: {list_result}")
print(f"String format: {string_result}")π§ Intelligent Format Detection β
python
import re
from typing import Any
class SmartOutputParser:
"""Automatically detect and parse output format"""
def __init__(self):
self.json_parser = JsonOutputParser()
self.list_parser = CommaSeparatedListOutputParser()
self.str_parser = StrOutputParser()
def detect_format(self, text: str) -> str:
"""Detect the format of the output text"""
text = text.strip()
# Check for JSON
if (text.startswith('{') and text.endswith('}')) or \
(text.startswith('[') and text.endswith(']')):
try:
json.loads(text)
return "json"
except json.JSONDecodeError:
pass
# Check for comma-separated list
if ',' in text and not any(char in text for char in '{}[]()'):
# Simple heuristic: if it has commas but no complex structures
return "list"
# Check for numbered list
if re.match(r'^\d+\.', text.split('\n')[0]):
return "numbered_list"
# Default to string
return "string"
def parse(self, text: str) -> Any:
"""Parse text based on detected format"""
format_type = self.detect_format(text)
try:
if format_type == "json":
return self.json_parser.parse(text)
elif format_type == "list":
return self.list_parser.parse(text)
elif format_type == "numbered_list":
return self._parse_numbered_list(text)
else:
return self.str_parser.parse(text)
except Exception as e:
print(f"Parsing failed for format {format_type}: {e}")
return self.str_parser.parse(text)
def _parse_numbered_list(self, text: str) -> List[str]:
"""Parse numbered list format"""
lines = text.split('\n')
items = []
for line in lines:
# Remove numbering and extract content
match = re.match(r'^\d+\.\s*(.+)', line.strip())
if match:
items.append(match.group(1))
return items
# Smart parser in a chain
smart_parser = SmartOutputParser()
smart_chain = (
ChatPromptTemplate.from_template(
"Answer this question in the most appropriate format: {question}"
)
| ChatOpenAI()
| smart_parser.parse
)
# Test with questions that naturally lead to different formats
questions = [
"List 5 programming languages", # Should return list
"What is Python?", # Should return string
"Create a JSON object describing a person with name, age, and skills", # Should return JSON
]
for question in questions:
result = smart_chain.invoke({"question": question})
print(f"Question: {question}")
print(f"Result: {result}")
print(f"Type: {type(result)}")
print("---")π§ Custom Output Parsers β
python
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Dict, Any
import re
# Base class for custom parsers
class CustomOutputParser(ABC):
@abstractmethod
def parse(self, text: str) -> Any:
pass
@abstractmethod
def get_format_instructions(self) -> str:
pass
# Custom parser for code extraction
class CodeExtractionParser(CustomOutputParser):
"""Extract code blocks from LLM output"""
def parse(self, text: str) -> Dict[str, List[str]]:
code_pattern = r'```(\w+)?\n(.*?)\n```'
matches = re.findall(code_pattern, text, re.DOTALL)
result = {"code_blocks": [], "languages": [], "explanations": []}
for language, code in matches:
result["code_blocks"].append(code.strip())
result["languages"].append(language.strip() if language else "unknown")
# Extract text that's not in code blocks
remaining_text = re.sub(code_pattern, "", text, flags=re.DOTALL)
result["explanations"] = [
line.strip() for line in remaining_text.split('\n')
if line.strip()
]
return result
def get_format_instructions(self) -> str:
return """
Format your response with code blocks using triple backticks:
```python
your code here
```
Include explanations outside the code blocks.
"""
# Custom parser for mathematical expressions
@dataclass
class MathStep:
step_number: int
description: str
formula: str
result: str
class MathProblemParser(CustomOutputParser):
"""Parse step-by-step mathematical solutions"""
def parse(self, text: str) -> List[MathStep]:
steps = []
lines = text.split('\n')
current_step = None
step_counter = 1
for line in lines:
line = line.strip()
if not line:
continue
# Look for step indicators
if re.match(r'^(step\s*\d+|^\d+\.)', line.lower()):
if current_step:
steps.append(current_step)
current_step = MathStep(
step_number=step_counter,
description=line,
formula="",
result=""
)
step_counter += 1
elif current_step:
# Look for mathematical expressions
if any(op in line for op in ['=', '+', '-', '*', '/', '^']):
if '=' in line:
parts = line.split('=')
current_step.formula = parts[0].strip()
current_step.result = parts[1].strip() if len(parts) > 1 else ""
else:
current_step.formula += f" {line}"
if current_step:
steps.append(current_step)
return steps
def get_format_instructions(self) -> str:
return """
Solve the problem step by step:
Step 1: [description]
formula = result
Step 2: [description]
formula = result
Continue for each step...
"""
# Email extraction parser
class EmailDataParser(CustomOutputParser):
"""Extract structured data from email-like text"""
def parse(self, text: str) -> Dict[str, Any]:
result = {
"subject": "",
"sender": "",
"recipient": "",
"body": "",
"action_items": [],
"dates": [],
"priority": "normal"
}
lines = text.split('\n')
body_lines = []
in_body = False
for line in lines:
line = line.strip()
# Extract email headers
if line.lower().startswith('subject:'):
result["subject"] = line[8:].strip()
elif line.lower().startswith('from:'):
result["sender"] = line[5:].strip()
elif line.lower().startswith('to:'):
result["recipient"] = line[3:].strip()
elif line.lower().startswith('priority:'):
result["priority"] = line[9:].strip().lower()
elif line == '' and not in_body:
in_body = True
elif in_body:
body_lines.append(line)
# Extract action items
if any(keyword in line.lower() for keyword in ['todo', 'action', 'task', 'please']):
result["action_items"].append(line)
# Extract dates (simple pattern)
date_pattern = r'\b\d{1,2}/\d{1,2}/\d{4}\b|\b\w+ \d{1,2}, \d{4}\b'
dates = re.findall(date_pattern, line)
result["dates"].extend(dates)
result["body"] = '\n'.join(body_lines)
return result
def get_format_instructions(self) -> str:
return """
Format as an email:
Subject: [subject line]
From: [sender]
To: [recipient]
Priority: [high/normal/low]
[email body content]
"""
# Usage examples
code_parser = CodeExtractionParser()
math_parser = MathProblemParser()
email_parser = EmailDataParser()
# Code extraction chain
code_chain = (
ChatPromptTemplate.from_template(
"Write a Python function to {task}. Include explanation.\n\n"
"{format_instructions}"
).partial(format_instructions=code_parser.get_format_instructions())
| ChatOpenAI()
| code_parser.parse
)
# Math problem chain
math_chain = (
ChatPromptTemplate.from_template(
"Solve this math problem step by step: {problem}\n\n"
"{format_instructions}"
).partial(format_instructions=math_parser.get_format_instructions())
| ChatOpenAI()
| math_parser.parse
)
# Test the custom parsers
code_result = code_chain.invoke({"task": "calculate fibonacci numbers"})
print(f"Code extraction: {code_result}")
math_result = math_chain.invoke({"problem": "Find the area of a circle with radius 5"})
print(f"Math steps: {math_result}")β‘ Parser Error Handling and Validation β
π‘οΈ Robust Parser Implementation β
python
from typing import Optional, Callable, Any
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class RobustParser:
"""Parser wrapper with comprehensive error handling"""
def __init__(self,
primary_parser: Any,
fallback_parser: Optional[Any] = None,
validator: Optional[Callable] = None,
max_retries: int = 3):
self.primary_parser = primary_parser
self.fallback_parser = fallback_parser or StrOutputParser()
self.validator = validator
self.max_retries = max_retries
def parse(self, text: str) -> Any:
"""Parse with error handling and validation"""
# Try primary parser
for attempt in range(self.max_retries):
try:
result = self.primary_parser.parse(text)
# Validate result if validator provided
if self.validator and not self.validator(result):
raise ValueError("Validation failed")
logger.info(f"Successfully parsed with primary parser")
return result
except Exception as e:
logger.warning(f"Primary parser attempt {attempt + 1} failed: {e}")
if attempt < self.max_retries - 1:
# Optionally clean text for retry
text = self._clean_text_for_retry(text)
# Try fallback parser
try:
logger.info("Trying fallback parser")
result = self.fallback_parser.parse(text)
if self.validator and not self.validator(result):
logger.warning("Fallback result failed validation")
return result
except Exception as e:
logger.error(f"Fallback parser also failed: {e}")
return f"Parsing failed: {text}"
def _clean_text_for_retry(self, text: str) -> str:
"""Clean text for retry attempts"""
# Remove common formatting issues
text = text.strip()
text = re.sub(r'```\w*\n?', '', text) # Remove code block markers
text = re.sub(r'\n+', '\n', text) # Remove extra newlines
return text
# Validation functions
def validate_json_structure(data: dict) -> bool:
"""Validate JSON has required structure"""
required_keys = ['status', 'data']
return isinstance(data, dict) and all(key in data for key in required_keys)
def validate_list_length(data: list) -> bool:
"""Validate list has reasonable length"""
return isinstance(data, list) and 1 <= len(data) <= 20
def validate_person_data(data: PersonProfile) -> bool:
"""Validate person profile data"""
return (
data.age > 0 and
len(data.name.strip()) > 0 and
len(data.skills) > 0
)
# Create robust parsers
robust_json_parser = RobustParser(
primary_parser=JsonOutputParser(),
validator=validate_json_structure
)
robust_list_parser = RobustParser(
primary_parser=CommaSeparatedListOutputParser(),
validator=validate_list_length
)
robust_person_parser = RobustParser(
primary_parser=PydanticOutputParser(pydantic_object=PersonProfile),
validator=validate_person_data
)
# Test robust parsing
test_chains = {
"json": (
ChatPromptTemplate.from_template(
"Create a JSON response about {topic}. Include 'status' and 'data' fields."
)
| ChatOpenAI()
| robust_json_parser.parse
),
"list": (
ChatPromptTemplate.from_template(
"List key concepts about {topic} (3-8 items)."
)
| ChatOpenAI()
| robust_list_parser.parse
),
"person": (
ChatPromptTemplate.from_template(
"Create a person profile: {description}\n\n"
"Return in this format:\n"
"Name: [name]\n"
"Age: [age]\n"
"Occupation: [job]\n"
"Skills: [skill1, skill2, skill3]\n"
"Bio: [short bio]"
)
| ChatOpenAI()
| robust_person_parser.parse
)
}
# Test the robust parsers
for parser_type, chain in test_chains.items():
try:
if parser_type == "json":
result = chain.invoke({"topic": "artificial intelligence"})
elif parser_type == "list":
result = chain.invoke({"topic": "machine learning"})
else: # person
result = chain.invoke({"description": "Sarah, 28, data scientist with Python and R skills"})
print(f"{parser_type} result: {result}")
print(f"Type: {type(result)}")
except Exception as e:
print(f"{parser_type} failed: {e}")π Parser Composition and Chaining β
π Multi-Stage Parsing β
python
class PipelineParser:
"""Chain multiple parsers together for complex transformations"""
def __init__(self, parsers: List[Any]):
self.parsers = parsers
def parse(self, text: str) -> Any:
"""Apply parsers in sequence"""
result = text
for i, parser in enumerate(self.parsers):
try:
if hasattr(parser, 'parse'):
result = parser.parse(result)
else:
result = parser(result)
logger.info(f"Stage {i+1} completed: {type(result)}")
except Exception as e:
logger.error(f"Stage {i+1} failed: {e}")
raise
return result
# Example: Extract code, then analyze it
def code_analyzer(code_data: dict) -> dict:
"""Analyze extracted code blocks"""
analysis = {
"total_blocks": len(code_data["code_blocks"]),
"languages": code_data["languages"],
"complexity_score": 0,
"suggestions": []
}
for code in code_data["code_blocks"]:
# Simple complexity analysis
lines = len(code.split('\n'))
if lines > 20:
analysis["complexity_score"] += 3
elif lines > 10:
analysis["complexity_score"] += 2
else:
analysis["complexity_score"] += 1
# Simple suggestions
if 'def ' in code:
analysis["suggestions"].append("Contains function definitions")
if 'class ' in code:
analysis["suggestions"].append("Contains class definitions")
if 'import ' in code:
analysis["suggestions"].append("Uses external libraries")
return analysis
# Create pipeline
code_analysis_pipeline = PipelineParser([
CodeExtractionParser(), # Extract code blocks
code_analyzer # Analyze the code
])
# Use in a chain
code_analysis_chain = (
ChatPromptTemplate.from_template(
"Write a Python program to {task}. Include multiple functions and explain your approach."
)
| ChatOpenAI()
| code_analysis_pipeline.parse
)
analysis_result = code_analysis_chain.invoke({
"task": "implement a binary search tree with insert, search, and delete operations"
})
print(f"Code analysis: {analysis_result}")π Conditional Parsing β
python
class ConditionalParser:
"""Choose parser based on content analysis"""
def __init__(self, parser_map: Dict[str, Any], default_parser: Any):
self.parser_map = parser_map
self.default_parser = default_parser
def _detect_content_type(self, text: str) -> str:
"""Detect what type of content we're dealing with"""
text_lower = text.lower()
if any(word in text_lower for word in ['step', 'solve', 'calculate', 'equation']):
return "math"
elif any(word in text_lower for word in ['def ', 'class ', 'import ', 'function']):
return "code"
elif '{' in text and '}' in text:
return "json"
elif ',' in text and len(text.split(',')) > 2:
return "list"
else:
return "text"
def parse(self, text: str) -> Any:
"""Parse based on detected content type"""
content_type = self._detect_content_type(text)
parser = self.parser_map.get(content_type, self.default_parser)
logger.info(f"Detected content type: {content_type}")
return parser.parse(text)
# Create conditional parser
conditional_parser = ConditionalParser(
parser_map={
"math": math_parser,
"code": code_parser,
"json": JsonOutputParser(),
"list": CommaSeparatedListOutputParser()
},
default_parser=StrOutputParser()
)
# Smart response chain
smart_response_chain = (
ChatPromptTemplate.from_template(
"Respond to this request in the most appropriate format: {request}"
)
| ChatOpenAI()
| conditional_parser.parse
)
# Test different request types
requests = [
"Solve this equation: 2x + 5 = 15",
"Write a function to reverse a string",
"List the planets in our solar system",
"Create a JSON object for a book with title, author, and year",
"What is machine learning?"
]
for request in requests:
result = smart_response_chain.invoke({"request": request})
print(f"Request: {request}")
print(f"Result: {result}")
print(f"Type: {type(result)}")
print("---")π― Performance and Optimization β
β‘ Parser Caching β
python
from functools import lru_cache
import hashlib
class CachedParser:
"""Parser with caching for repeated inputs"""
def __init__(self, base_parser: Any, cache_size: int = 128):
self.base_parser = base_parser
self.cache_size = cache_size
self._parse_cached = lru_cache(maxsize=cache_size)(self._parse_internal)
def _generate_cache_key(self, text: str) -> str:
"""Generate cache key from input text"""
return hashlib.md5(text.encode()).hexdigest()
def _parse_internal(self, cache_key: str, text: str) -> Any:
"""Internal parsing method for caching"""
return self.base_parser.parse(text)
def parse(self, text: str) -> Any:
"""Parse with caching"""
cache_key = self._generate_cache_key(text)
return self._parse_cached(cache_key, text)
def get_cache_info(self):
"""Get cache statistics"""
return self._parse_cached.cache_info()
# Create cached parsers
cached_json_parser = CachedParser(JsonOutputParser())
cached_person_parser = CachedParser(PydanticOutputParser(pydantic_object=PersonProfile))
# Test caching
test_text = '{"name": "John", "age": 30, "city": "New York"}'
# First parse - cache miss
result1 = cached_json_parser.parse(test_text)
print(f"Cache info after first parse: {cached_json_parser.get_cache_info()}")
# Second parse - cache hit
result2 = cached_json_parser.parse(test_text)
print(f"Cache info after second parse: {cached_json_parser.get_cache_info()}")
print(f"Results equal: {result1 == result2}")π Integration with LangChain Chains β
π Parser Selection in Chains β
python
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
def create_adaptive_chain():
"""Create a chain that adapts its parser based on the task"""
def select_parser(inputs: dict) -> dict:
"""Select appropriate parser based on task type"""
task_type = inputs.get("task_type", "general")
parser_map = {
"json_analysis": JsonOutputParser(),
"list_generation": CommaSeparatedListOutputParser(),
"code_review": code_parser,
"math_problem": math_parser,
"person_profile": PydanticOutputParser(pydantic_object=PersonProfile)
}
selected_parser = parser_map.get(task_type, StrOutputParser())
inputs["parser"] = selected_parser
return inputs
def apply_parser(inputs: dict) -> Any:
"""Apply the selected parser to the LLM output"""
llm_output = inputs["llm_output"]
parser = inputs["parser"]
return parser.parse(llm_output)
# Create the adaptive chain
chain = (
RunnableLambda(select_parser)
| RunnablePassthrough.assign(
llm_output=ChatPromptTemplate.from_template(
"Complete this task: {task}"
) | ChatOpenAI()
)
| RunnableLambda(apply_parser)
)
return chain
# Test adaptive chain
adaptive_chain = create_adaptive_chain()
test_cases = [
{
"task": "Create a JSON object describing a computer",
"task_type": "json_analysis"
},
{
"task": "List 5 benefits of exercise",
"task_type": "list_generation"
},
{
"task": "Solve: x^2 + 5x + 6 = 0",
"task_type": "math_problem"
}
]
for test_case in test_cases:
result = adaptive_chain.invoke(test_case)
print(f"Task: {test_case['task']}")
print(f"Type: {test_case['task_type']}")
print(f"Result: {result}")
print(f"Result type: {type(result)}")
print("---")π Next Steps β
Ready to enhance your understanding further? Continue with:
- Retrieval Basics - Add knowledge retrieval to your applications
- Agents Basics - Build autonomous AI agents that use tools
- Memory Basics - Add conversation memory and context
Key Output Parser Takeaways:
- Structure over chaos - Parsers transform unpredictable text into reliable data
- Type safety - Pydantic parsers provide runtime validation and type checking
- Error resilience - Robust parsers with fallbacks prevent application crashes
- Performance optimization - Caching and smart selection improve efficiency
- Composition patterns - Chain parsers for complex data transformations
- Validation is crucial - Always validate parsed output for production applications