Snakepit.HealthMonitor (Snakepit v0.8.7)

View Source

Monitors worker health and crash patterns.

Tracks crashes within a rolling window and determines overall pool health. Can be used to trigger circuit breaker actions or alerting.

Usage

{:ok, hm} = HealthMonitor.start_link(
  name: :my_pool_health,
  pool: :default,
  max_crashes: 10,
  crash_window_ms: 60_000
)

HealthMonitor.record_crash(hm, "worker_1", %{reason: :segfault})

if HealthMonitor.healthy?(hm) do
  # Pool is healthy
else
  # Too many crashes, consider action
end

Summary

Functions

Returns a specification to start this module under a supervisor.

Returns whether the pool is considered healthy.

Starts a health monitor.

Returns comprehensive health statistics.

Returns health status for a specific worker.

Types

t()

@type t() :: %{
  pool: atom(),
  workers: %{required(String.t()) => worker_stats()},
  crash_window_ms: pos_integer(),
  max_crashes: pos_integer(),
  total_crashes: non_neg_integer(),
  check_interval_ms: pos_integer(),
  check_timer: reference() | nil
}

worker_stats()

@type worker_stats() :: %{
  crash_count: non_neg_integer(),
  last_crash_time: integer() | nil,
  crash_reasons: [term()]
}

Functions

child_spec(init_arg)

Returns a specification to start this module under a supervisor.

See Supervisor.

healthy?(server)

@spec healthy?(GenServer.server()) :: boolean()

Returns whether the pool is considered healthy.

record_crash(server, worker_id, info \\ %{})

@spec record_crash(GenServer.server(), String.t(), map()) :: :ok

Records a worker crash.

start_link(opts)

@spec start_link(keyword()) :: GenServer.on_start()

Starts a health monitor.

Options

  • :name - GenServer name (required)
  • :pool - Pool name to monitor (required)
  • :check_interval_ms - Health check interval (default: 30000)
  • :crash_window_ms - Rolling window for crash counting (default: 60000)
  • :max_crashes - Max crashes in window before unhealthy (default: 10)

stats(server)

@spec stats(GenServer.server()) :: map()

Returns comprehensive health statistics.

worker_health(server, worker_id)

@spec worker_health(GenServer.server(), String.t()) :: map()

Returns health status for a specific worker.