Skip to content

Output Parsers - Structuring LLM Responses ​

Transform unstructured LLM outputs into reliable, typed data structures for robust application development

🎯 Understanding Output Parsers ​

Output parsers are crucial components that convert raw LLM text responses into structured, usable data formats. They bridge the gap between natural language output and programmatic data handling.

πŸ”„ The Output Processing Flow ​

text
                    πŸ”„ OUTPUT PARSING PIPELINE πŸ”„
                     (From text to structured data)

    β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
    β”‚                     RAW LLM OUTPUT                              β”‚
    β”‚                  (Unstructured text)                           β”‚
    β”‚                                                                β”‚
    β”‚  "The weather is sunny with temperature 75Β°F. Rain chance is  β”‚
    β”‚   20%. Wind speed is 5 mph from the northwest. UV index is 7." β”‚
    β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
                         β”‚
                         β–Ό OUTPUT PARSER
    β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
    β”‚                  STRUCTURED OUTPUT                              β”‚
    β”‚                  (Typed data objects)                          β”‚
    β”‚                                                                β”‚
    β”‚  {                                                             β”‚
    β”‚    "condition": "sunny",                                       β”‚
    β”‚    "temperature": 75,                                          β”‚
    β”‚    "rain_chance": 20,                                          β”‚
    β”‚    "wind_speed": 5,                                            β”‚
    β”‚    "wind_direction": "northwest",                              β”‚
    β”‚    "uv_index": 7                                               β”‚
    β”‚  }                                                             β”‚
    β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜

πŸ”§ Core Output Parser Types ​

πŸ“ String Output Parser ​

python
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

# Basic string output parser - simplest form
str_parser = StrOutputParser()

# Simple chain with string parsing
basic_chain = (
    ChatPromptTemplate.from_template("Write a haiku about {topic}")
    | ChatOpenAI()
    | StrOutputParser()
)

result = basic_chain.invoke({"topic": "programming"})
print(f"Haiku: {result}")
print(f"Type: {type(result)}")  # <class 'str'>

# Manual equivalent of what StrOutputParser does
def manual_string_parser(ai_message):
    """What StrOutputParser does behind the scenes"""
    if hasattr(ai_message, 'content'):
        return ai_message.content
    return str(ai_message)

# Using manual parser
manual_result = manual_string_parser(ChatOpenAI().invoke("Hello"))
print(f"Manual result: {manual_result}")

πŸ—οΈ Pydantic Output Parser ​

python
from langchain_core.output_parsers import PydanticOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field
from typing import List

# Define data models
class WeatherReport(BaseModel):
    condition: str = Field(description="Weather condition (sunny, cloudy, rainy, etc.)")
    temperature: int = Field(description="Temperature in Fahrenheit")
    humidity: int = Field(description="Humidity percentage")
    wind_speed: int = Field(description="Wind speed in mph")
    
    class Config:
        schema_extra = {
            "example": {
                "condition": "sunny",
                "temperature": 75,
                "humidity": 45,
                "wind_speed": 10
            }
        }

class PersonProfile(BaseModel):
    name: str = Field(description="Person's full name")
    age: int = Field(description="Person's age")
    occupation: str = Field(description="Person's job or profession")
    skills: List[str] = Field(description="List of skills or expertise areas")
    bio: str = Field(description="Brief biography or description")

# Create parsers
weather_parser = PydanticOutputParser(pydantic_object=WeatherReport)
person_parser = PydanticOutputParser(pydantic_object=PersonProfile)

# Weather analysis chain
weather_chain = (
    ChatPromptTemplate.from_template(
        "Analyze this weather description and extract structured data: {weather_text}\n\n"
        "{format_instructions}"
    ).partial(format_instructions=weather_parser.get_format_instructions())
    | ChatOpenAI()
    | weather_parser
)

# Person profile chain
profile_chain = (
    ChatPromptTemplate.from_template(
        "Create a person profile based on this description: {description}\n\n"
        "{format_instructions}"
    ).partial(format_instructions=person_parser.get_format_instructions())
    | ChatOpenAI()
    | person_parser
)

# Test the chains
weather_data = weather_chain.invoke({
    "weather_text": "It's a beautiful sunny day with 78 degrees, light breeze at 8 mph, and humidity around 40%"
})

person_data = profile_chain.invoke({
    "description": "John is a 32-year-old software engineer who specializes in Python and machine learning. He has 8 years of experience building AI applications."
})

print(f"Weather: {weather_data}")
print(f"Type: {type(weather_data)}")  # <class 'WeatherReport'>
print(f"Temperature: {weather_data.temperature}")

print(f"Person: {person_data}")
print(f"Skills: {person_data.skills}")

πŸ“Š JSON Output Parser ​

python
from langchain_core.output_parsers import JsonOutputParser
import json

# Simple JSON parser without schema
json_parser = JsonOutputParser()

# Create a chain that outputs JSON
json_chain = (
    ChatPromptTemplate.from_template(
        """Analyze the following text and return a JSON object with sentiment analysis:
        
        Text: {text}
        
        Return JSON with these fields:
        - sentiment: positive/negative/neutral
        - confidence: 0.0 to 1.0
        - key_phrases: array of important phrases
        - emotions: array of detected emotions
        
        Ensure your response is valid JSON only."""
    )
    | ChatOpenAI()
    | json_parser
)

# Test JSON parsing
sentiment_result = json_chain.invoke({
    "text": "I absolutely love this new feature! It makes my work so much easier and faster."
})

print(f"Sentiment analysis: {sentiment_result}")
print(f"Type: {type(sentiment_result)}")  # <class 'dict'>
print(f"Confidence: {sentiment_result.get('confidence')}")

# JSON with schema validation
class AnalysisResult(BaseModel):
    sentiment: str = Field(description="Sentiment: positive, negative, or neutral")
    confidence: float = Field(description="Confidence score between 0 and 1")
    key_phrases: List[str] = Field(description="Important phrases from the text")
    emotions: List[str] = Field(description="Detected emotions")

# Pydantic parser with JSON output (more robust)
structured_json_parser = PydanticOutputParser(pydantic_object=AnalysisResult)

structured_chain = (
    ChatPromptTemplate.from_template(
        "Analyze this text for sentiment: {text}\n\n{format_instructions}"
    ).partial(format_instructions=structured_json_parser.get_format_instructions())
    | ChatOpenAI()
    | structured_json_parser
)

structured_result = structured_chain.invoke({
    "text": "The movie was disappointing. Poor acting and confusing plot."
})

print(f"Structured result: {structured_result}")

πŸ“‹ List Output Parser ​

python
from langchain_core.output_parsers import CommaSeparatedListOutputParser

# Comma-separated list parser
list_parser = CommaSeparatedListOutputParser()

# Chain for generating lists
list_chain = (
    ChatPromptTemplate.from_template(
        "List {count} {category} separated by commas. Only return the comma-separated list.\n\n"
        "{format_instructions}"
    ).partial(format_instructions=list_parser.get_format_instructions())
    | ChatOpenAI()
    | list_parser
)

# Generate different types of lists
programming_languages = list_chain.invoke({
    "count": "5",
    "category": "popular programming languages"
})

fruits = list_chain.invoke({
    "count": "7",
    "category": "tropical fruits"
})

print(f"Programming languages: {programming_languages}")
print(f"Type: {type(programming_languages)}")  # <class 'list'>
print(f"First language: {programming_languages[0]}")

print(f"Fruits: {fruits}")

# Custom list parser for specific formats
class NumberedListOutputParser:
    """Custom parser for numbered lists"""
    
    def parse(self, text: str) -> List[str]:
        lines = text.strip().split('\n')
        items = []
        
        for line in lines:
            # Remove numbering (1., 2., etc.) and clean up
            clean_line = line.strip()
            if clean_line:
                # Remove common list prefixes
                for prefix in ['1.', '2.', '3.', '4.', '5.', '-', '*', 'β€’']:
                    if clean_line.startswith(prefix):
                        clean_line = clean_line[len(prefix):].strip()
                        break
                
                if clean_line:
                    items.append(clean_line)
        
        return items

numbered_parser = NumberedListOutputParser()

numbered_chain = (
    ChatPromptTemplate.from_template(
        "Create a numbered list of {count} {category}. Use format: 1. item, 2. item, etc."
    )
    | ChatOpenAI()
    | numbered_parser.parse
)

# This would need to be wrapped properly in LCEL, but shows the concept

🎨 Advanced Output Parsing Patterns ​

πŸ”„ Multi-Format Output Parser ​

python
from enum import Enum
from typing import Union

class OutputFormat(Enum):
    JSON = "json"
    PYDANTIC = "pydantic"
    LIST = "list"
    STRING = "string"

class MultiFormatParser:
    """Parser that can handle multiple output formats dynamically"""
    
    def __init__(self):
        self.parsers = {
            OutputFormat.JSON: JsonOutputParser(),
            OutputFormat.LIST: CommaSeparatedListOutputParser(),
            OutputFormat.STRING: StrOutputParser()
        }
    
    def parse(self, text: str, format_type: OutputFormat):
        parser = self.parsers.get(format_type, self.parsers[OutputFormat.STRING])
        return parser.parse(text)
    
    def create_chain(self, format_type: OutputFormat):
        """Create a chain with the specified output format"""
        parser = self.parsers.get(format_type, self.parsers[OutputFormat.STRING])
        
        format_instructions = {
            OutputFormat.JSON: "Return your response as a valid JSON object.",
            OutputFormat.LIST: "Return your response as a comma-separated list.",
            OutputFormat.STRING: "Return your response as plain text."
        }
        
        return (
            ChatPromptTemplate.from_template(
                "Answer the following question: {question}\n\n"
                f"{format_instructions[format_type]}"
            )
            | ChatOpenAI()
            | parser
        )

# Usage example
multi_parser = MultiFormatParser()

# Create chains for different formats
json_chain = multi_parser.create_chain(OutputFormat.JSON)
list_chain = multi_parser.create_chain(OutputFormat.LIST)
string_chain = multi_parser.create_chain(OutputFormat.STRING)

# Test different formats
question = "What are the main benefits of using Python for data science?"

json_result = json_chain.invoke({"question": question})
list_result = list_chain.invoke({"question": question})
string_result = string_chain.invoke({"question": question})

print(f"JSON format: {json_result}")
print(f"List format: {list_result}")
print(f"String format: {string_result}")

🧠 Intelligent Format Detection ​

python
import re
from typing import Any

class SmartOutputParser:
    """Automatically detect and parse output format"""
    
    def __init__(self):
        self.json_parser = JsonOutputParser()
        self.list_parser = CommaSeparatedListOutputParser()
        self.str_parser = StrOutputParser()
    
    def detect_format(self, text: str) -> str:
        """Detect the format of the output text"""
        text = text.strip()
        
        # Check for JSON
        if (text.startswith('{') and text.endswith('}')) or \
           (text.startswith('[') and text.endswith(']')):
            try:
                json.loads(text)
                return "json"
            except json.JSONDecodeError:
                pass
        
        # Check for comma-separated list
        if ',' in text and not any(char in text for char in '{}[]()'):
            # Simple heuristic: if it has commas but no complex structures
            return "list"
        
        # Check for numbered list
        if re.match(r'^\d+\.', text.split('\n')[0]):
            return "numbered_list"
        
        # Default to string
        return "string"
    
    def parse(self, text: str) -> Any:
        """Parse text based on detected format"""
        format_type = self.detect_format(text)
        
        try:
            if format_type == "json":
                return self.json_parser.parse(text)
            elif format_type == "list":
                return self.list_parser.parse(text)
            elif format_type == "numbered_list":
                return self._parse_numbered_list(text)
            else:
                return self.str_parser.parse(text)
        except Exception as e:
            print(f"Parsing failed for format {format_type}: {e}")
            return self.str_parser.parse(text)
    
    def _parse_numbered_list(self, text: str) -> List[str]:
        """Parse numbered list format"""
        lines = text.split('\n')
        items = []
        
        for line in lines:
            # Remove numbering and extract content
            match = re.match(r'^\d+\.\s*(.+)', line.strip())
            if match:
                items.append(match.group(1))
        
        return items

# Smart parser in a chain
smart_parser = SmartOutputParser()

smart_chain = (
    ChatPromptTemplate.from_template(
        "Answer this question in the most appropriate format: {question}"
    )
    | ChatOpenAI()
    | smart_parser.parse
)

# Test with questions that naturally lead to different formats
questions = [
    "List 5 programming languages",  # Should return list
    "What is Python?",  # Should return string
    "Create a JSON object describing a person with name, age, and skills",  # Should return JSON
]

for question in questions:
    result = smart_chain.invoke({"question": question})
    print(f"Question: {question}")
    print(f"Result: {result}")
    print(f"Type: {type(result)}")
    print("---")

πŸ”§ Custom Output Parsers ​

python
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Dict, Any
import re

# Base class for custom parsers
class CustomOutputParser(ABC):
    @abstractmethod
    def parse(self, text: str) -> Any:
        pass
    
    @abstractmethod
    def get_format_instructions(self) -> str:
        pass

# Custom parser for code extraction
class CodeExtractionParser(CustomOutputParser):
    """Extract code blocks from LLM output"""
    
    def parse(self, text: str) -> Dict[str, List[str]]:
        code_pattern = r'```(\w+)?\n(.*?)\n```'
        matches = re.findall(code_pattern, text, re.DOTALL)
        
        result = {"code_blocks": [], "languages": [], "explanations": []}
        
        for language, code in matches:
            result["code_blocks"].append(code.strip())
            result["languages"].append(language.strip() if language else "unknown")
        
        # Extract text that's not in code blocks
        remaining_text = re.sub(code_pattern, "", text, flags=re.DOTALL)
        result["explanations"] = [
            line.strip() for line in remaining_text.split('\n') 
            if line.strip()
        ]
        
        return result
    
    def get_format_instructions(self) -> str:
        return """
        Format your response with code blocks using triple backticks:
        ```python
        your code here
        ```
        Include explanations outside the code blocks.
        """

# Custom parser for mathematical expressions
@dataclass
class MathStep:
    step_number: int
    description: str
    formula: str
    result: str

class MathProblemParser(CustomOutputParser):
    """Parse step-by-step mathematical solutions"""
    
    def parse(self, text: str) -> List[MathStep]:
        steps = []
        lines = text.split('\n')
        
        current_step = None
        step_counter = 1
        
        for line in lines:
            line = line.strip()
            if not line:
                continue
            
            # Look for step indicators
            if re.match(r'^(step\s*\d+|^\d+\.)', line.lower()):
                if current_step:
                    steps.append(current_step)
                
                current_step = MathStep(
                    step_number=step_counter,
                    description=line,
                    formula="",
                    result=""
                )
                step_counter += 1
            
            elif current_step:
                # Look for mathematical expressions
                if any(op in line for op in ['=', '+', '-', '*', '/', '^']):
                    if '=' in line:
                        parts = line.split('=')
                        current_step.formula = parts[0].strip()
                        current_step.result = parts[1].strip() if len(parts) > 1 else ""
                    else:
                        current_step.formula += f" {line}"
        
        if current_step:
            steps.append(current_step)
        
        return steps
    
    def get_format_instructions(self) -> str:
        return """
        Solve the problem step by step:
        Step 1: [description]
        formula = result
        
        Step 2: [description] 
        formula = result
        
        Continue for each step...
        """

# Email extraction parser
class EmailDataParser(CustomOutputParser):
    """Extract structured data from email-like text"""
    
    def parse(self, text: str) -> Dict[str, Any]:
        result = {
            "subject": "",
            "sender": "",
            "recipient": "",
            "body": "",
            "action_items": [],
            "dates": [],
            "priority": "normal"
        }
        
        lines = text.split('\n')
        body_lines = []
        in_body = False
        
        for line in lines:
            line = line.strip()
            
            # Extract email headers
            if line.lower().startswith('subject:'):
                result["subject"] = line[8:].strip()
            elif line.lower().startswith('from:'):
                result["sender"] = line[5:].strip()
            elif line.lower().startswith('to:'):
                result["recipient"] = line[3:].strip()
            elif line.lower().startswith('priority:'):
                result["priority"] = line[9:].strip().lower()
            elif line == '' and not in_body:
                in_body = True
            elif in_body:
                body_lines.append(line)
                
                # Extract action items
                if any(keyword in line.lower() for keyword in ['todo', 'action', 'task', 'please']):
                    result["action_items"].append(line)
                
                # Extract dates (simple pattern)
                date_pattern = r'\b\d{1,2}/\d{1,2}/\d{4}\b|\b\w+ \d{1,2}, \d{4}\b'
                dates = re.findall(date_pattern, line)
                result["dates"].extend(dates)
        
        result["body"] = '\n'.join(body_lines)
        return result
    
    def get_format_instructions(self) -> str:
        return """
        Format as an email:
        Subject: [subject line]
        From: [sender]
        To: [recipient]
        Priority: [high/normal/low]
        
        [email body content]
        """

# Usage examples
code_parser = CodeExtractionParser()
math_parser = MathProblemParser()
email_parser = EmailDataParser()

# Code extraction chain
code_chain = (
    ChatPromptTemplate.from_template(
        "Write a Python function to {task}. Include explanation.\n\n"
        "{format_instructions}"
    ).partial(format_instructions=code_parser.get_format_instructions())
    | ChatOpenAI()
    | code_parser.parse
)

# Math problem chain
math_chain = (
    ChatPromptTemplate.from_template(
        "Solve this math problem step by step: {problem}\n\n"
        "{format_instructions}"
    ).partial(format_instructions=math_parser.get_format_instructions())
    | ChatOpenAI()
    | math_parser.parse
)

# Test the custom parsers
code_result = code_chain.invoke({"task": "calculate fibonacci numbers"})
print(f"Code extraction: {code_result}")

math_result = math_chain.invoke({"problem": "Find the area of a circle with radius 5"})
print(f"Math steps: {math_result}")

⚑ Parser Error Handling and Validation ​

πŸ›‘οΈ Robust Parser Implementation ​

python
from typing import Optional, Callable, Any
import logging

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class RobustParser:
    """Parser wrapper with comprehensive error handling"""
    
    def __init__(self, 
                 primary_parser: Any,
                 fallback_parser: Optional[Any] = None,
                 validator: Optional[Callable] = None,
                 max_retries: int = 3):
        self.primary_parser = primary_parser
        self.fallback_parser = fallback_parser or StrOutputParser()
        self.validator = validator
        self.max_retries = max_retries
    
    def parse(self, text: str) -> Any:
        """Parse with error handling and validation"""
        
        # Try primary parser
        for attempt in range(self.max_retries):
            try:
                result = self.primary_parser.parse(text)
                
                # Validate result if validator provided
                if self.validator and not self.validator(result):
                    raise ValueError("Validation failed")
                
                logger.info(f"Successfully parsed with primary parser")
                return result
                
            except Exception as e:
                logger.warning(f"Primary parser attempt {attempt + 1} failed: {e}")
                if attempt < self.max_retries - 1:
                    # Optionally clean text for retry
                    text = self._clean_text_for_retry(text)
        
        # Try fallback parser
        try:
            logger.info("Trying fallback parser")
            result = self.fallback_parser.parse(text)
            
            if self.validator and not self.validator(result):
                logger.warning("Fallback result failed validation")
            
            return result
            
        except Exception as e:
            logger.error(f"Fallback parser also failed: {e}")
            return f"Parsing failed: {text}"
    
    def _clean_text_for_retry(self, text: str) -> str:
        """Clean text for retry attempts"""
        # Remove common formatting issues
        text = text.strip()
        text = re.sub(r'```\w*\n?', '', text)  # Remove code block markers
        text = re.sub(r'\n+', '\n', text)      # Remove extra newlines
        return text

# Validation functions
def validate_json_structure(data: dict) -> bool:
    """Validate JSON has required structure"""
    required_keys = ['status', 'data']
    return isinstance(data, dict) and all(key in data for key in required_keys)

def validate_list_length(data: list) -> bool:
    """Validate list has reasonable length"""
    return isinstance(data, list) and 1 <= len(data) <= 20

def validate_person_data(data: PersonProfile) -> bool:
    """Validate person profile data"""
    return (
        data.age > 0 and
        len(data.name.strip()) > 0 and
        len(data.skills) > 0
    )

# Create robust parsers
robust_json_parser = RobustParser(
    primary_parser=JsonOutputParser(),
    validator=validate_json_structure
)

robust_list_parser = RobustParser(
    primary_parser=CommaSeparatedListOutputParser(),
    validator=validate_list_length
)

robust_person_parser = RobustParser(
    primary_parser=PydanticOutputParser(pydantic_object=PersonProfile),
    validator=validate_person_data
)

# Test robust parsing
test_chains = {
    "json": (
        ChatPromptTemplate.from_template(
            "Create a JSON response about {topic}. Include 'status' and 'data' fields."
        )
        | ChatOpenAI()
        | robust_json_parser.parse
    ),
    
    "list": (
        ChatPromptTemplate.from_template(
            "List key concepts about {topic} (3-8 items)."
        )
        | ChatOpenAI()
        | robust_list_parser.parse
    ),
    
    "person": (
        ChatPromptTemplate.from_template(
            "Create a person profile: {description}\n\n"
            "Return in this format:\n"
            "Name: [name]\n"
            "Age: [age]\n"
            "Occupation: [job]\n"
            "Skills: [skill1, skill2, skill3]\n"
            "Bio: [short bio]"
        )
        | ChatOpenAI()
        | robust_person_parser.parse
    )
}

# Test the robust parsers
for parser_type, chain in test_chains.items():
    try:
        if parser_type == "json":
            result = chain.invoke({"topic": "artificial intelligence"})
        elif parser_type == "list":
            result = chain.invoke({"topic": "machine learning"})
        else:  # person
            result = chain.invoke({"description": "Sarah, 28, data scientist with Python and R skills"})
        
        print(f"{parser_type} result: {result}")
        print(f"Type: {type(result)}")
        
    except Exception as e:
        print(f"{parser_type} failed: {e}")

πŸ”„ Parser Composition and Chaining ​

πŸ”— Multi-Stage Parsing ​

python
class PipelineParser:
    """Chain multiple parsers together for complex transformations"""
    
    def __init__(self, parsers: List[Any]):
        self.parsers = parsers
    
    def parse(self, text: str) -> Any:
        """Apply parsers in sequence"""
        result = text
        
        for i, parser in enumerate(self.parsers):
            try:
                if hasattr(parser, 'parse'):
                    result = parser.parse(result)
                else:
                    result = parser(result)
                
                logger.info(f"Stage {i+1} completed: {type(result)}")
                
            except Exception as e:
                logger.error(f"Stage {i+1} failed: {e}")
                raise
        
        return result

# Example: Extract code, then analyze it
def code_analyzer(code_data: dict) -> dict:
    """Analyze extracted code blocks"""
    analysis = {
        "total_blocks": len(code_data["code_blocks"]),
        "languages": code_data["languages"],
        "complexity_score": 0,
        "suggestions": []
    }
    
    for code in code_data["code_blocks"]:
        # Simple complexity analysis
        lines = len(code.split('\n'))
        if lines > 20:
            analysis["complexity_score"] += 3
        elif lines > 10:
            analysis["complexity_score"] += 2
        else:
            analysis["complexity_score"] += 1
        
        # Simple suggestions
        if 'def ' in code:
            analysis["suggestions"].append("Contains function definitions")
        if 'class ' in code:
            analysis["suggestions"].append("Contains class definitions")
        if 'import ' in code:
            analysis["suggestions"].append("Uses external libraries")
    
    return analysis

# Create pipeline
code_analysis_pipeline = PipelineParser([
    CodeExtractionParser(),  # Extract code blocks
    code_analyzer           # Analyze the code
])

# Use in a chain
code_analysis_chain = (
    ChatPromptTemplate.from_template(
        "Write a Python program to {task}. Include multiple functions and explain your approach."
    )
    | ChatOpenAI()
    | code_analysis_pipeline.parse
)

analysis_result = code_analysis_chain.invoke({
    "task": "implement a binary search tree with insert, search, and delete operations"
})

print(f"Code analysis: {analysis_result}")

πŸ”„ Conditional Parsing ​

python
class ConditionalParser:
    """Choose parser based on content analysis"""
    
    def __init__(self, parser_map: Dict[str, Any], default_parser: Any):
        self.parser_map = parser_map
        self.default_parser = default_parser
    
    def _detect_content_type(self, text: str) -> str:
        """Detect what type of content we're dealing with"""
        text_lower = text.lower()
        
        if any(word in text_lower for word in ['step', 'solve', 'calculate', 'equation']):
            return "math"
        elif any(word in text_lower for word in ['def ', 'class ', 'import ', 'function']):
            return "code"
        elif '{' in text and '}' in text:
            return "json"
        elif ',' in text and len(text.split(',')) > 2:
            return "list"
        else:
            return "text"
    
    def parse(self, text: str) -> Any:
        """Parse based on detected content type"""
        content_type = self._detect_content_type(text)
        parser = self.parser_map.get(content_type, self.default_parser)
        
        logger.info(f"Detected content type: {content_type}")
        return parser.parse(text)

# Create conditional parser
conditional_parser = ConditionalParser(
    parser_map={
        "math": math_parser,
        "code": code_parser,
        "json": JsonOutputParser(),
        "list": CommaSeparatedListOutputParser()
    },
    default_parser=StrOutputParser()
)

# Smart response chain
smart_response_chain = (
    ChatPromptTemplate.from_template(
        "Respond to this request in the most appropriate format: {request}"
    )
    | ChatOpenAI()
    | conditional_parser.parse
)

# Test different request types
requests = [
    "Solve this equation: 2x + 5 = 15",
    "Write a function to reverse a string",
    "List the planets in our solar system",
    "Create a JSON object for a book with title, author, and year",
    "What is machine learning?"
]

for request in requests:
    result = smart_response_chain.invoke({"request": request})
    print(f"Request: {request}")
    print(f"Result: {result}")
    print(f"Type: {type(result)}")
    print("---")

🎯 Performance and Optimization ​

⚑ Parser Caching ​

python
from functools import lru_cache
import hashlib

class CachedParser:
    """Parser with caching for repeated inputs"""
    
    def __init__(self, base_parser: Any, cache_size: int = 128):
        self.base_parser = base_parser
        self.cache_size = cache_size
        self._parse_cached = lru_cache(maxsize=cache_size)(self._parse_internal)
    
    def _generate_cache_key(self, text: str) -> str:
        """Generate cache key from input text"""
        return hashlib.md5(text.encode()).hexdigest()
    
    def _parse_internal(self, cache_key: str, text: str) -> Any:
        """Internal parsing method for caching"""
        return self.base_parser.parse(text)
    
    def parse(self, text: str) -> Any:
        """Parse with caching"""
        cache_key = self._generate_cache_key(text)
        return self._parse_cached(cache_key, text)
    
    def get_cache_info(self):
        """Get cache statistics"""
        return self._parse_cached.cache_info()

# Create cached parsers
cached_json_parser = CachedParser(JsonOutputParser())
cached_person_parser = CachedParser(PydanticOutputParser(pydantic_object=PersonProfile))

# Test caching
test_text = '{"name": "John", "age": 30, "city": "New York"}'

# First parse - cache miss
result1 = cached_json_parser.parse(test_text)
print(f"Cache info after first parse: {cached_json_parser.get_cache_info()}")

# Second parse - cache hit
result2 = cached_json_parser.parse(test_text)
print(f"Cache info after second parse: {cached_json_parser.get_cache_info()}")

print(f"Results equal: {result1 == result2}")

πŸ”— Integration with LangChain Chains ​

πŸ”„ Parser Selection in Chains ​

python
from langchain_core.runnables import RunnableLambda, RunnablePassthrough

def create_adaptive_chain():
    """Create a chain that adapts its parser based on the task"""
    
    def select_parser(inputs: dict) -> dict:
        """Select appropriate parser based on task type"""
        task_type = inputs.get("task_type", "general")
        
        parser_map = {
            "json_analysis": JsonOutputParser(),
            "list_generation": CommaSeparatedListOutputParser(),
            "code_review": code_parser,
            "math_problem": math_parser,
            "person_profile": PydanticOutputParser(pydantic_object=PersonProfile)
        }
        
        selected_parser = parser_map.get(task_type, StrOutputParser())
        inputs["parser"] = selected_parser
        
        return inputs
    
    def apply_parser(inputs: dict) -> Any:
        """Apply the selected parser to the LLM output"""
        llm_output = inputs["llm_output"]
        parser = inputs["parser"]
        
        return parser.parse(llm_output)
    
    # Create the adaptive chain
    chain = (
        RunnableLambda(select_parser)
        | RunnablePassthrough.assign(
            llm_output=ChatPromptTemplate.from_template(
                "Complete this task: {task}"
            ) | ChatOpenAI()
        )
        | RunnableLambda(apply_parser)
    )
    
    return chain

# Test adaptive chain
adaptive_chain = create_adaptive_chain()

test_cases = [
    {
        "task": "Create a JSON object describing a computer",
        "task_type": "json_analysis"
    },
    {
        "task": "List 5 benefits of exercise",
        "task_type": "list_generation"
    },
    {
        "task": "Solve: x^2 + 5x + 6 = 0",
        "task_type": "math_problem"
    }
]

for test_case in test_cases:
    result = adaptive_chain.invoke(test_case)
    print(f"Task: {test_case['task']}")
    print(f"Type: {test_case['task_type']}")
    print(f"Result: {result}")
    print(f"Result type: {type(result)}")
    print("---")

πŸ”— Next Steps ​

Ready to enhance your understanding further? Continue with:


Key Output Parser Takeaways:

  • Structure over chaos - Parsers transform unpredictable text into reliable data
  • Type safety - Pydantic parsers provide runtime validation and type checking
  • Error resilience - Robust parsers with fallbacks prevent application crashes
  • Performance optimization - Caching and smart selection improve efficiency
  • Composition patterns - Chain parsers for complex data transformations
  • Validation is crucial - Always validate parsed output for production applications

Released under the MIT License.