Parsing and Validation Guide
View SourceChainex provides sophisticated parsing capabilities to transform LLM text responses into structured, validated data using JSON schemas and Elixir structs.
JSON Parsing with Schema Validation
Extract structured data from LLM responses with automatic validation:
Basic JSON Parsing
# Simple schema
user_schema = %{
name: :string,
age: :integer,
email: :string
}
"Extract user information from: {{text}}"
|> Chainex.Chain.new()
|> Chainex.Chain.llm(:openai)
|> Chainex.Chain.parse(:json, schema: user_schema)
|> Chainex.Chain.run(%{text: "John Smith, 30 years old, john@example.com"})
# => {:ok, %{"name" => "John Smith", "age" => 30, "email" => "john@example.com"}}Nested JSON Schemas
Handle complex, nested data structures:
company_schema = %{
company: %{
name: :string,
industry: :string,
founded: :integer,
headquarters: %{
city: :string,
country: :string,
coordinates: %{
lat: :float,
lng: :float
}
}
},
financials: %{
revenue: %{
amount: :float,
currency: :string,
period: :string
},
employees: :integer,
funding_rounds: [%{
round: :string,
amount: :float,
date: :string,
investors: [:string]
}]
}
}
"Analyze this company information: {{company_info}}"
|> Chainex.Chain.new()
|> Chainex.Chain.llm(:openai)
|> Chainex.Chain.parse(:json, schema: company_schema)
|> Chainex.Chain.run(%{company_info: "Tesla Inc. is an electric vehicle manufacturer..."})Struct Parsing
Parse directly into Elixir structs for type safety:
Defining Structs
defmodule ProductAnalysis do
defstruct [:product, :analysis, :recommendations, :timestamp]
defmodule Product do
defstruct [:name, :category, :price, :specifications]
defmodule Price do
defstruct [:amount, :currency, :discount_percent]
end
defmodule Specifications do
defstruct [:dimensions, :weight, :features, :warranty_years]
defmodule Dimensions do
defstruct [:length, :width, :height, :unit]
end
end
end
defmodule Analysis do
defstruct [:market_position, :strengths, :weaknesses, :competitive_score]
end
endUsing Struct Parsing
"Analyze this product: {{product_description}}"
|> Chainex.Chain.new()
|> Chainex.Chain.llm(:openai)
|> Chainex.Chain.parse(:struct, ProductAnalysis)
|> Chainex.Chain.transform(fn analysis ->
# Now working with strongly typed structs
%{analysis | timestamp: DateTime.utc_now()}
end)
|> Chainex.Chain.run(%{product_description: "iPhone 15 Pro - latest smartphone..."})
# Returns: {:ok, %ProductAnalysis{product: %ProductAnalysis.Product{...}}}Advanced Parsing Features
Automatic Format Injection
Chainex automatically adds format instructions to improve parsing success:
# The parser automatically adds instructions like:
# "Format your response as valid JSON matching this schema: {...}"
# "Ensure all required fields are present and types are correct"
chain = "Extract data from: {{input}}"
|> Chainex.Chain.new()
|> Chainex.Chain.llm(:openai)
|> Chainex.Chain.parse(:json, schema: schema)
# Format instructions are automatically injected before the LLM callCustom Parsers
Create domain-specific parsers for specialized formats:
# Email address parser
email_parser = fn text ->
emails = Regex.scan(~r/[\w._%+-]+@[\w.-]+\.[A-Za-z]{2,}/, text)
{:ok, List.flatten(emails)}
end
"Find all email addresses in this text: {{text}}"
|> Chainex.Chain.new()
|> Chainex.Chain.llm(:openai)
|> Chainex.Chain.parse(email_parser)
|> Chainex.Chain.run(%{text: "Contact john@example.com or mary@company.org"})
# => {:ok, ["john@example.com", "mary@company.org"]}CSV Parser
csv_parser = fn text ->
text
|> String.split("\n")
|> Enum.map(&String.split(&1, ","))
|> Enum.map(&Enum.map(&1, fn cell -> String.trim(cell, "\"") end))
|> then(fn [headers | rows] ->
{:ok, %{headers: headers, rows: rows}}
end)
rescue
_ -> {:error, "Invalid CSV format"}
end
"Convert this data to CSV format: {{data}}"
|> Chainex.Chain.new()
|> Chainex.Chain.llm(:openai)
|> Chainex.Chain.parse(csv_parser)Error Handling in Parsing
Graceful Degradation
Handle parsing failures gracefully:
"Extract user data: {{input}}"
|> Chainex.Chain.new()
|> Chainex.Chain.llm(:openai)
|> Chainex.Chain.parse(:json, schema: user_schema, on_error: :return_raw)
|> Chainex.Chain.transform(fn
%{} = parsed_data ->
# Successfully parsed
{:parsed, parsed_data}
raw_text when is_binary(raw_text) ->
# Parsing failed, got raw text back
{:raw, raw_text}
end)Retry with Different Models
parsing_chain = fn text ->
# Try with GPT-4 first (better at structured output)
case attempt_parse_with_model(text, :gpt4, user_schema) do
{:ok, result} -> {:ok, result}
{:error, _} ->
# Fallback to Claude (different parsing approach)
case attempt_parse_with_model(text, :claude, user_schema) do
{:ok, result} -> {:ok, result}
{:error, _} ->
# Final fallback - return raw text
{:ok, text}
end
end
end
defp attempt_parse_with_model(text, model, schema) do
"Extract user data: #{text}"
|> Chainex.Chain.new()
|> Chainex.Chain.llm(model)
|> Chainex.Chain.parse(:json, schema: schema)
|> Chainex.Chain.run()
endValidation and Schema Types
Supported Types
comprehensive_schema = %{
# Basic types
name: :string,
age: :integer,
score: :float,
active: :boolean,
# Arrays
tags: [:string],
numbers: [:integer],
# Nested objects
address: %{
street: :string,
city: :string,
postal_code: :string
},
# Array of objects
contacts: [%{
type: :string,
value: :string,
preferred: :boolean
}],
# Optional fields (use default values or make nullable)
bio: {:optional, :string},
last_login: {:nullable, :string}
}Custom Validation
Add custom validation logic:
validated_parser = fn json_result ->
case json_result do
{:ok, %{"email" => email} = data} ->
if String.contains?(email, "@") do
{:ok, data}
else
{:error, "Invalid email format"}
end
error -> error
end
end
chain = "Extract user data: {{input}}"
|> Chainex.Chain.new()
|> Chainex.Chain.llm(:openai)
|> Chainex.Chain.parse(:json, schema: user_schema)
|> Chainex.Chain.transform(validated_parser)Real-World Parsing Examples
Resume Parser
defmodule ResumeParser do
defstruct [:personal, :experience, :education, :skills]
defmodule Personal do
defstruct [:name, :email, :phone, :location, :summary]
end
defmodule Experience do
defstruct [:title, :company, :duration, :responsibilities, :achievements]
end
defmodule Education do
defstruct [:degree, :institution, :graduation_year, :gpa]
end
defmodule Skills do
defstruct [:technical, :languages, :certifications]
end
end
resume_chain = "Parse this resume: {{resume_text}}"
|> Chainex.Chain.new()
|> Chainex.Chain.llm(:openai, model: "gpt-4")
|> Chainex.Chain.parse(:struct, ResumeParser)
|> Chainex.Chain.transform(fn resume ->
# Post-processing
%{resume |
personal: normalize_contact_info(resume.personal),
experience: rank_by_relevance(resume.experience),
skills: categorize_skills(resume.skills)
}
end)Invoice Parser
invoice_schema = %{
invoice: %{
number: :string,
date: :string,
due_date: :string,
status: :string
},
vendor: %{
name: :string,
address: %{
street: :string,
city: :string,
state: :string,
zip: :string
},
tax_id: :string
},
line_items: [%{
description: :string,
quantity: :integer,
unit_price: :float,
total: :float,
tax_rate: :float
}],
totals: %{
subtotal: :float,
tax_amount: :float,
total: :float,
currency: :string
}
}
"Extract all information from this invoice: {{invoice_image_description}}"
|> Chainex.Chain.new()
|> Chainex.Chain.llm(:openai, model: "gpt-4-vision")
|> Chainex.Chain.parse(:json, schema: invoice_schema)
|> Chainex.Chain.transform(fn invoice_data ->
# Validate totals match line items
calculated_total = calculate_invoice_total(invoice_data["line_items"])
actual_total = invoice_data["totals"]["total"]
if abs(calculated_total - actual_total) < 0.01 do
{:ok, invoice_data}
else
{:error, "Invoice totals don't match line items"}
end
end)Testing Parsing
Unit Tests
defmodule MyApp.ParsingTest do
use ExUnit.Case
test "user schema parsing" do
user_schema = %{name: :string, age: :integer}
chain = Chainex.Chain.new("{{input}}")
|> Chainex.Chain.llm(:mock, response: ~s({"name": "Alice", "age": 25}))
|> Chainex.Chain.parse(:json, schema: user_schema)
{:ok, result} = Chainex.Chain.run(chain, %{input: "test"})
assert result["name"] == "Alice"
assert result["age"] == 25
end
test "handles parsing errors gracefully" do
chain = Chainex.Chain.new("{{input}}")
|> Chainex.Chain.llm(:mock, response: "Invalid JSON{")
|> Chainex.Chain.parse(:json, on_error: :return_raw)
{:ok, result} = Chainex.Chain.run(chain, %{input: "test"})
assert result == "Invalid JSON{"
end
endBest Practices
1. Schema Design
- Keep schemas as simple as possible while capturing necessary data
- Use descriptive field names that help the LLM understand what to extract
- Provide default values for optional fields
- Use arrays for collections of similar items
2. Error Handling
- Always handle parsing failures gracefully
- Provide fallback options (return raw text, retry with different model)
- Log parsing failures for debugging and schema improvement
3. Validation
- Validate critical fields with custom logic after parsing
- Use Elixir's pattern matching for robust data handling
- Consider using Ecto changesets for complex validation
4. Performance
- Cache parsing results for repeated operations
- Use simpler models for basic parsing tasks
- Consider streaming for large documents