A collection of resilience and concurrency primitives for Elixir.
| Module | Description |
|---|---|
Resiliency.CircuitBreaker | Circuit breaker — stop calling a failing service, auto-recover with half-open probing |
Resiliency.BackoffRetry | Retry with configurable backoff strategies (constant, exponential, linear, jitter) |
Resiliency.Hedged | Hedged requests — send a backup after a percentile-based delay to cut tail latency |
Resiliency.SingleFlight | Deduplicate concurrent calls to the same key so the function executes only once |
Resiliency.Race | Race concurrent functions — first success wins, losers are cancelled |
Resiliency.AllSettled | Run all concurrently, collect every result regardless of failures |
Resiliency.Map | Bounded-concurrency parallel map with fail-fast cancellation |
Resiliency.FirstOk | Sequential fallback chain — try each function until one succeeds |
Resiliency.WeightedSemaphore | Weighted semaphore with FIFO fairness and timeout support |
Resiliency.Bulkhead | Bulkhead — isolate workloads with per-partition concurrency limits and rejection semantics |
Resiliency.RateLimiter | Token-bucket rate limiter — control request frequency with burst support and retry-after hints |
Resiliency.Telemetry | Built-in :telemetry events — spans and point events for every module |
Installation
Add resiliency to your list of dependencies in mix.exs:
def deps do
[
{:resiliency, "~> 0.6.0"}
]
endQuick Start
BackoffRetry
Resiliency.BackoffRetry.retry(fn ->
case HttpClient.get(url) do
{:ok, %{status: 200}} = success -> success
{:ok, %{status: 503}} -> raise "service unavailable"
{:error, reason} -> raise "request failed: #{inspect(reason)}"
end
end, max_attempts: 3, backoff: :exponential)Hedged
# Start a tracker (typically in your supervision tree)
{:ok, tracker} = Resiliency.Hedged.start_link(name: :my_hedged, percentile: 95)
# Hedged call — sends a backup request if the first is slower than p95
{:ok, result} = Resiliency.Hedged.run(:my_hedged, fn -> expensive_call() end, [])SingleFlight
# Start under a supervisor
{:ok, sf} = Resiliency.SingleFlight.start_link(name: :my_sf)
# 100 concurrent callers with the same key — function runs only once
{:ok, data} = Resiliency.SingleFlight.flight(:my_sf, "user:123", fn ->
Database.get_user(123)
end)Task Combinators
# Race — first success wins, losers are killed
{:ok, fastest} = Resiliency.Race.run([
fn -> fetch_from_cache() end,
fn -> fetch_from_db() end
])
# Parallel map with bounded concurrency
{:ok, results} = Resiliency.Map.run(urls, &fetch/1, max_concurrency: 10)
# all_settled — never short-circuits, collects all results
results = Resiliency.AllSettled.run([fn -> risky_op() end, ...])
# first_ok — sequential fallback chain
{:ok, value} = Resiliency.FirstOk.run([
fn -> try_cache() end,
fn -> try_db() end,
fn -> try_api() end
])CircuitBreaker
# Start under a supervisor
children = [{Resiliency.CircuitBreaker, name: :my_breaker, failure_rate_threshold: 0.5}]
Supervisor.start_link(children, strategy: :one_for_one)
# Wrapped execution
case Resiliency.CircuitBreaker.call(:my_breaker, fn -> HttpClient.get(url) end) do
{:ok, response} -> handle_response(response)
{:error, :circuit_open} -> {:error, :service_degraded}
{:error, reason} -> {:error, reason}
endWeightedSemaphore
# Start under a supervisor
children = [{Resiliency.WeightedSemaphore, name: :my_sem, max: 10}]
# Acquire with weight
{:ok, result} = Resiliency.WeightedSemaphore.acquire(:my_sem, 3, fn ->
heavy_operation()
end)
# Non-blocking try
:rejected = Resiliency.WeightedSemaphore.try_acquire(:my_sem, 5, fn -> :work end)Bulkhead
# Start under a supervisor
children = [{Resiliency.Bulkhead, name: :my_bulkhead, max_concurrent: 10}]
Supervisor.start_link(children, strategy: :one_for_one)
# Basic call — rejects immediately when full (default max_wait: 0)
case Resiliency.Bulkhead.call(:my_bulkhead, fn -> HttpClient.get(url) end) do
{:ok, response} -> handle_response(response)
{:error, :bulkhead_full} -> {:error, :overloaded}
{:error, reason} -> {:error, reason}
end
# With waiting — waits up to 1s for a permit
Resiliency.Bulkhead.call(:my_bulkhead, fn -> :work end, max_wait: 1_000)RateLimiter
# Start under a supervisor — 100 req/s with burst of 10
children = [{Resiliency.RateLimiter, name: :my_rate_limiter, rate: 100.0, burst_size: 10}]
Supervisor.start_link(children, strategy: :one_for_one)
# Basic call — rejects with retry hint when bucket is empty
case Resiliency.RateLimiter.call(:my_rate_limiter, fn -> HttpClient.get(url) end) do
{:ok, response} -> handle_response(response)
{:error, {:rate_limited, retry_after_ms}} -> {:error, {:overloaded, retry_after_ms}}
{:error, reason} -> {:error, reason}
end
# Weighted call — expensive operations consume more tokens
Resiliency.RateLimiter.call(:my_rate_limiter, fn -> bulk_operation() end, weight: 5)Telemetry
All modules emit :telemetry events out of the box — no configuration required. Attach handlers using the standard :telemetry API:
# Log every retry
:telemetry.attach(
"my-app-retry-logger",
[:resiliency, :retry, :retry],
fn _event, %{delay: delay}, %{attempt: attempt, error: error}, _config ->
Logger.warning("Retry attempt=#{attempt} delay=#{delay}ms error=#{inspect(error)}")
end,
nil
)
# Track circuit breaker state changes
:telemetry.attach(
"my-app-cb-state",
[:resiliency, :circuit_breaker, :state_change],
fn _event, _measurements, %{name: name, from: from, to: to}, _config ->
MyApp.Metrics.increment("circuit_breaker.state_change",
tags: [name: name, from: from, to: to])
end,
nil
)See Resiliency.Telemetry for the full event catalogue with all measurements and metadata keys.
Migration from Individual Packages
If you were using any of the standalone packages (backoff_retry, hedged, single_flight, task_extension, weighted_semaphore), update your dependencies and add a Resiliency. prefix to all module references:
| Before | After |
|---|---|
BackoffRetry.retry(...) | Resiliency.BackoffRetry.retry(...) |
Hedged.run(...) | Resiliency.Hedged.run(...) |
SingleFlight.flight(...) | Resiliency.SingleFlight.flight(...) |
TaskExtension.race(...) | Resiliency.Race.run(...) |
WeightedSemaphore.acquire(...) | Resiliency.WeightedSemaphore.acquire(...) |
License
MIT — see LICENSE.