Testing Architecture & Development Guide
View SourceOverview
This document outlines the comprehensive testing strategy for the pipeline orchestration system, providing elegant mock/live mode management and robust test coverage for all components.
Core Testing Principles
1. Clear Separation of Concerns
- Business Logic: Tested with mocks for external dependencies
- Integration Points: Tested with both mocks and live services
- End-to-End Flows: Tested in controlled environments
2. Environment-Based Testing
- Unit Tests: Always use mocks (fast, reliable, no external deps)
- Integration Tests: Can use mocks or live services based on configuration
- E2E Tests: Use live services in staging environments
3. Dependency Injection Pattern
- All external service interactions go through provider interfaces
- Providers can be swapped between mock and live implementations
- Configuration determines which provider to use
Testing Architecture
Provider Interface Pattern
# All external services implement a provider behavior
defmodule Pipeline.Providers.AIProvider do
@callback query(prompt :: String.t(), options :: map()) ::
{:ok, response :: map()} | {:error, reason :: String.t()}
end
# Live implementation
defmodule Pipeline.Providers.ClaudeProvider do
@behaviour Pipeline.Providers.AIProvider
def query(prompt, options) do
# Real Claude SDK calls
end
end
# Mock implementation
defmodule Pipeline.Test.Mocks.ClaudeProvider do
@behaviour Pipeline.Providers.AIProvider
def query(prompt, options) do
# Deterministic mock responses
end
end
Configuration-Driven Provider Selection
# config/test.exs
config :pipeline, :providers, %{
ai_provider: Pipeline.Test.Mocks.ClaudeProvider,
gemini_provider: Pipeline.Test.Mocks.GeminiProvider
}
# config/dev.exs
config :pipeline, :providers, %{
ai_provider: Pipeline.Providers.ClaudeProvider,
gemini_provider: Pipeline.Providers.GeminiProvider
}
Test Mode Management
# Environment variable controls test mode
# TEST_MODE=mock mix test # Uses mocks
# TEST_MODE=live mix test # Uses live services
# TEST_MODE=mixed mix test # Uses mocks for unit, live for integration
defmodule Pipeline.TestMode do
def provider_for(service) do
case get_test_mode() do
:mock -> mock_provider(service)
:live -> live_provider(service)
:mixed -> mixed_provider(service)
end
end
defp get_test_mode do
System.get_env("TEST_MODE", "mock") |> String.to_atom()
end
end
Mock Implementation Strategy
1. Deterministic Responses
defmodule Pipeline.Test.Mocks.ClaudeProvider do
@behaviour Pipeline.Providers.AIProvider
# Predictable responses based on input patterns
def query("simple test", _opts) do
{:ok, %{
text: "Mock response for simple test",
success: true,
cost: 0.001
}}
end
def query("error test", _opts) do
{:error, "Mock error for testing"}
end
# Pattern matching for common scenarios
def query(prompt, _opts) when is_binary(prompt) do
{:ok, %{
text: "Mock response for: #{String.slice(prompt, 0, 50)}...",
success: true,
cost: 0.001
}}
end
end
2. Stateful Mocks for Complex Scenarios
defmodule Pipeline.Test.Mocks.StatefulClaudeProvider do
use GenServer
@behaviour Pipeline.Providers.AIProvider
# Track conversation state, turn counts, etc.
def query(prompt, options) do
GenServer.call(__MODULE__, {:query, prompt, options})
end
def handle_call({:query, prompt, options}, _from, state) do
# Simulate turn limits, state transitions, etc.
{response, new_state} = generate_response(prompt, options, state)
{:reply, response, new_state}
end
end
3. Scenario-Based Testing
defmodule Pipeline.Test.Scenarios do
def setup_successful_workflow do
# Configure mocks for a complete successful workflow
Pipeline.Test.Mocks.ClaudeProvider.set_responses([
"Create a Python hello world program",
"Add error handling to the program",
"Write unit tests for the program"
])
end
def setup_failure_scenario do
# Configure mocks to simulate various failure modes
Pipeline.Test.Mocks.ClaudeProvider.set_error_on_turn(2, "API rate limit exceeded")
end
end
Test Organization
Directory Structure
test/
├── unit/ # Fast, isolated tests with mocks
│ ├── pipeline/
│ │ ├── executor_test.exs
│ │ ├── step/
│ │ │ ├── claude_test.exs
│ │ │ └── gemini_test.exs
│ │ └── workflow_loader_test.exs
│ └── support/
├── integration/ # Cross-component tests
│ ├── end_to_end_test.exs
│ ├── workflow_execution_test.exs
│ └── provider_integration_test.exs
├── fixtures/ # Test data and configurations
│ ├── workflows/
│ │ ├── simple_workflow.yaml
│ │ └── complex_workflow.yaml
│ └── responses/
│ ├── claude_responses.json
│ └── gemini_responses.json
└── support/ # Test helpers and utilities
├── test_case.exs
├── factory.ex
└── mocks/
├── claude_provider.ex
└── gemini_provider.ex
Test Categories
Unit Tests (Always Mocked)
defmodule Pipeline.ExecutorTest do
use Pipeline.Test.Case, mode: :mock
test "executes workflow steps in sequence" do
workflow = build(:simple_workflow)
assert {:ok, results} = Pipeline.Executor.execute(workflow)
assert length(results) == 3
end
end
Integration Tests (Configurable)
defmodule Pipeline.WorkflowExecutionTest do
use Pipeline.Test.Case, mode: :configurable
@tag :integration
test "complete workflow execution" do
workflow = load_fixture("workflows/simple_workflow.yaml")
assert {:ok, results} = Pipeline.Executor.execute(workflow)
assert results["final_step"]["success"] == true
end
end
Live Tests (Live Services Only)
defmodule Pipeline.LiveIntegrationTest do
use Pipeline.Test.Case, mode: :live
@tag :live
@tag timeout: 30_000
test "actual Claude API integration" do
# Only runs with TEST_MODE=live or TEST_MODE=mixed
prompt = "Write a simple hello world in Python"
assert {:ok, response} = Pipeline.Providers.ClaudeProvider.query(prompt, %{})
assert response.success == true
assert is_binary(response.text)
end
end
Mock Data Management
Response Fixtures
# test/fixtures/responses/claude_responses.json
{
"simple_python_program": {
"text": "print('Hello, World!')\n\n# This is a simple Python program...",
"success": true,
"cost": 0.0023
},
"code_review": {
"text": "Code review feedback:\n1. Consider adding type hints...",
"success": true,
"cost": 0.0156
}
}
Factory Pattern
defmodule Pipeline.Test.Factory do
def build(:workflow) do
%{
"workflow" => %{
"name" => "test_workflow",
"steps" => [
build(:claude_step),
build(:gemini_step)
]
}
}
end
def build(:claude_step) do
%{
"name" => "claude_task",
"type" => "claude",
"prompt" => [%{"type" => "static", "content" => "Test prompt"}]
}
end
end
Running Tests
Command Examples
# Unit tests only (fast, always mocked)
mix test test/unit/
# Integration tests with mocks
TEST_MODE=mock mix test test/integration/
# Integration tests with live services
TEST_MODE=live mix test test/integration/ --include live
# All tests with mixed mode
TEST_MODE=mixed mix test
# Specific test scenarios
mix test --only integration
mix test --only live
mix test --exclude live # Skip live tests
Continuous Integration
# CI pipeline uses mocks for speed and reliability
TEST_MODE=mock mix test --coverage
# Nightly build runs live tests
TEST_MODE=live mix test --include live --timeout 300000
Development Workflow
1. Writing New Features
- Start with unit tests using mocks
- Implement the feature with provider interfaces
- Add integration tests that work with mocks
- Test with live services locally
- Update mock responses based on live behavior
2. Debugging Issues
- Reproduce with mocks first (faster iteration)
- Compare mock vs live behavior
- Update mocks to match live service behavior
- Fix implementation based on findings
3. Adding New External Services
- Define provider behavior/interface
- Create mock implementation
- Implement live provider
- Add configuration switching
- Write comprehensive tests for both
Quality Assurance
Test Coverage Requirements
- Unit Tests: 95% line coverage minimum
- Integration Tests: All critical paths covered
- Mock Accuracy: Regular validation against live services
Performance Benchmarks
defmodule Pipeline.Test.Performance do
use ExUnit.Case
@tag :benchmark
test "workflow execution performance" do
workflow = build(:complex_workflow)
{time, _result} = :timer.tc(fn ->
Pipeline.Executor.execute(workflow)
end)
# Workflow should complete within 5 seconds with mocks
assert time < 5_000_000 # microseconds
end
end
Mock Validation
# Periodically validate mocks against live services
defmodule Pipeline.Test.MockValidation do
@tag :validation
test "mock responses match live service behavior" do
test_cases = load_validation_cases()
for test_case <- test_cases do
mock_response = MockProvider.query(test_case.prompt, test_case.options)
live_response = LiveProvider.query(test_case.prompt, test_case.options)
assert_responses_equivalent(mock_response, live_response)
end
end
end
Best Practices
1. Mock Design
- Deterministic: Same input always produces same output
- Realistic: Mirror real service behavior patterns
- Fast: No network calls, minimal computation
- Comprehensive: Cover error cases and edge conditions
2. Test Data
- Version Controlled: All fixtures in git
- Realistic: Based on actual service responses
- Minimal: Only include necessary data
- Documented: Clear comments explaining test scenarios
3. Configuration
- Environment Driven: Use env vars for mode selection
- Default Safe: Default to mocks for safety
- Override Capable: Easy to switch modes for debugging
- CI Friendly: Reliable in automated environments
This testing architecture provides a robust foundation for developing and maintaining the pipeline orchestration system with confidence in both mocked and live environments.