CI Hex.pm Docs

A collection of resilience and concurrency primitives for Elixir.

ModuleDescription
Resiliency.CircuitBreakerCircuit breaker — stop calling a failing service, auto-recover with half-open probing
Resiliency.BackoffRetryRetry with configurable backoff strategies (constant, exponential, linear, jitter)
Resiliency.HedgedHedged requests — send a backup after a percentile-based delay to cut tail latency
Resiliency.SingleFlightDeduplicate concurrent calls to the same key so the function executes only once
Resiliency.RaceRace concurrent functions — first success wins, losers are cancelled
Resiliency.AllSettledRun all concurrently, collect every result regardless of failures
Resiliency.MapBounded-concurrency parallel map with fail-fast cancellation
Resiliency.FirstOkSequential fallback chain — try each function until one succeeds
Resiliency.WeightedSemaphoreWeighted semaphore with FIFO fairness and timeout support
Resiliency.BulkheadBulkhead — isolate workloads with per-partition concurrency limits and rejection semantics
Resiliency.RateLimiterToken-bucket rate limiter — control request frequency with burst support and retry-after hints
Resiliency.TelemetryBuilt-in :telemetry events — spans and point events for every module

Installation

Add resiliency to your list of dependencies in mix.exs:

def deps do
  [
    {:resiliency, "~> 0.6.0"}
  ]
end

Quick Start

BackoffRetry

Resiliency.BackoffRetry.retry(fn ->
  case HttpClient.get(url) do
    {:ok, %{status: 200}} = success -> success
    {:ok, %{status: 503}} -> raise "service unavailable"
    {:error, reason} -> raise "request failed: #{inspect(reason)}"
  end
end, max_attempts: 3, backoff: :exponential)

Hedged

# Start a tracker (typically in your supervision tree)
{:ok, tracker} = Resiliency.Hedged.start_link(name: :my_hedged, percentile: 95)

# Hedged call — sends a backup request if the first is slower than p95
{:ok, result} = Resiliency.Hedged.run(:my_hedged, fn -> expensive_call() end, [])

SingleFlight

# Start under a supervisor
{:ok, sf} = Resiliency.SingleFlight.start_link(name: :my_sf)

# 100 concurrent callers with the same key — function runs only once
{:ok, data} = Resiliency.SingleFlight.flight(:my_sf, "user:123", fn ->
  Database.get_user(123)
end)

Task Combinators

# Race — first success wins, losers are killed
{:ok, fastest} = Resiliency.Race.run([
  fn -> fetch_from_cache() end,
  fn -> fetch_from_db() end
])

# Parallel map with bounded concurrency
{:ok, results} = Resiliency.Map.run(urls, &fetch/1, max_concurrency: 10)

# all_settled — never short-circuits, collects all results
results = Resiliency.AllSettled.run([fn -> risky_op() end, ...])

# first_ok — sequential fallback chain
{:ok, value} = Resiliency.FirstOk.run([
  fn -> try_cache() end,
  fn -> try_db() end,
  fn -> try_api() end
])

CircuitBreaker

# Start under a supervisor
children = [{Resiliency.CircuitBreaker, name: :my_breaker, failure_rate_threshold: 0.5}]
Supervisor.start_link(children, strategy: :one_for_one)

# Wrapped execution
case Resiliency.CircuitBreaker.call(:my_breaker, fn -> HttpClient.get(url) end) do
  {:ok, response} -> handle_response(response)
  {:error, :circuit_open} -> {:error, :service_degraded}
  {:error, reason} -> {:error, reason}
end

WeightedSemaphore

# Start under a supervisor
children = [{Resiliency.WeightedSemaphore, name: :my_sem, max: 10}]

# Acquire with weight
{:ok, result} = Resiliency.WeightedSemaphore.acquire(:my_sem, 3, fn ->
  heavy_operation()
end)

# Non-blocking try
:rejected = Resiliency.WeightedSemaphore.try_acquire(:my_sem, 5, fn -> :work end)

Bulkhead

# Start under a supervisor
children = [{Resiliency.Bulkhead, name: :my_bulkhead, max_concurrent: 10}]
Supervisor.start_link(children, strategy: :one_for_one)

# Basic call — rejects immediately when full (default max_wait: 0)
case Resiliency.Bulkhead.call(:my_bulkhead, fn -> HttpClient.get(url) end) do
  {:ok, response} -> handle_response(response)
  {:error, :bulkhead_full} -> {:error, :overloaded}
  {:error, reason} -> {:error, reason}
end

# With waiting — waits up to 1s for a permit
Resiliency.Bulkhead.call(:my_bulkhead, fn -> :work end, max_wait: 1_000)

RateLimiter

# Start under a supervisor — 100 req/s with burst of 10
children = [{Resiliency.RateLimiter, name: :my_rate_limiter, rate: 100.0, burst_size: 10}]
Supervisor.start_link(children, strategy: :one_for_one)

# Basic call — rejects with retry hint when bucket is empty
case Resiliency.RateLimiter.call(:my_rate_limiter, fn -> HttpClient.get(url) end) do
  {:ok, response} -> handle_response(response)
  {:error, {:rate_limited, retry_after_ms}} -> {:error, {:overloaded, retry_after_ms}}
  {:error, reason} -> {:error, reason}
end

# Weighted call — expensive operations consume more tokens
Resiliency.RateLimiter.call(:my_rate_limiter, fn -> bulk_operation() end, weight: 5)

Telemetry

All modules emit :telemetry events out of the box — no configuration required. Attach handlers using the standard :telemetry API:

# Log every retry
:telemetry.attach(
  "my-app-retry-logger",
  [:resiliency, :retry, :retry],
  fn _event, %{delay: delay}, %{attempt: attempt, error: error}, _config ->
    Logger.warning("Retry attempt=#{attempt} delay=#{delay}ms error=#{inspect(error)}")
  end,
  nil
)

# Track circuit breaker state changes
:telemetry.attach(
  "my-app-cb-state",
  [:resiliency, :circuit_breaker, :state_change],
  fn _event, _measurements, %{name: name, from: from, to: to}, _config ->
    MyApp.Metrics.increment("circuit_breaker.state_change",
      tags: [name: name, from: from, to: to])
  end,
  nil
)

See Resiliency.Telemetry for the full event catalogue with all measurements and metadata keys.

Migration from Individual Packages

If you were using any of the standalone packages (backoff_retry, hedged, single_flight, task_extension, weighted_semaphore), update your dependencies and add a Resiliency. prefix to all module references:

BeforeAfter
BackoffRetry.retry(...)Resiliency.BackoffRetry.retry(...)
Hedged.run(...)Resiliency.Hedged.run(...)
SingleFlight.flight(...)Resiliency.SingleFlight.flight(...)
TaskExtension.race(...)Resiliency.Race.run(...)
WeightedSemaphore.acquire(...)Resiliency.WeightedSemaphore.acquire(...)

License

MIT — see LICENSE.