Snakepit Telemetry Events
View SourceComprehensive reference for all telemetry events emitted by Snakepit v0.6.0+.
Overview
Snakepit uses :telemetry for observability and monitoring. Events are emitted at key lifecycle points to enable:
- Performance monitoring
- Resource tracking
- Worker health monitoring
- Automatic recycling visibility
- Custom metrics and alerts
Event List
Worker Lifecycle Events
[:snakepit, :worker, :recycled]
Emitted when a worker is recycled (TTL, max requests, memory threshold).
Measurements:
count: 1- Always 1 per event
Metadata:
%{
worker_id: "pool_worker_123",
pool: :hpc_pool,
reason: :ttl_expired | :max_requests | :memory_threshold | :manual | :worker_died,
uptime_seconds: 3600,
request_count: 1234
}Example Handler:
:telemetry.attach(
"worker-recycle-handler",
[:snakepit, :worker, :recycled],
fn _event, %{count: count}, metadata, _config ->
Logger.info("Worker #{metadata.worker_id} recycled: #{metadata.reason}")
Logger.info(" Uptime: #{metadata.uptime_seconds}s, Requests: #{metadata.request_count}")
end,
nil
)[:snakepit, :worker, :health_check_failed]
Emitted when a worker fails a health check.
Measurements:
count: 1
Metadata:
%{
worker_id: "pool_worker_123",
pool: :hpc_pool,
reason: :worker_dead | :health_check_failed | term()
}[:snakepit, :worker, :started]
Emitted when a worker successfully starts (future enhancement).
Measurements:
count: 1startup_time_ms: integer()
Metadata:
%{
worker_id: "pool_worker_123",
pool: :hpc_pool,
profile: :process | :thread,
capacity: 1 | 16 # Depends on profile
}Pool Events (Future)
[:snakepit, :pool, :saturated]
Emitted when pool reaches capacity and queues requests.
Measurements:
count: 1queue_size: integer()
Metadata:
%{
pool: :hpc_pool,
available_workers: 0,
busy_workers: 16,
queue_size: 42
}[:snakepit, :request, :executed]
Emitted after each successful request.
Measurements:
count: 1duration_ms: integer()
Metadata:
%{
pool: :hpc_pool,
worker_id: "pool_worker_123",
command: "compute_intensive",
success: true | false
}Usage Examples
Basic Monitoring
# Attach handler for all Snakepit events
:telemetry.attach_many(
"snakepit-monitor",
[
[:snakepit, :worker, :recycled],
[:snakepit, :worker, :health_check_failed]
],
&MyApp.TelemetryHandler.handle_event/4,
nil
)
defmodule MyApp.TelemetryHandler do
def handle_event(event, measurements, metadata, _config) do
IO.inspect({event, measurements, metadata}, label: "Snakepit Event")
end
endPrometheus Metrics
# Count worker recycling events by reason
:telemetry.attach(
"worker-recycle-counter",
[:snakepit, :worker, :recycled],
fn _event, _measurements, metadata, _config ->
:telemetry_metrics_prometheus_core.execute(
:counter,
[:snakepit, :worker_recycled_total],
1,
%{reason: metadata.reason, pool: metadata.pool}
)
end,
nil
)
# Track worker uptime histogram
:telemetry.attach(
"worker-uptime-histogram",
[:snakepit, :worker, :recycled],
fn _event, _measurements, metadata, _config ->
:telemetry_metrics_prometheus_core.execute(
:histogram,
[:snakepit, :worker_uptime_seconds],
metadata.uptime_seconds,
%{pool: metadata.pool}
)
end,
nil
)LiveDashboard Integration
# In router.ex
live_dashboard "/dashboard",
metrics: MyApp.Telemetry,
telemetry_poller_metrics: [
# Snakepit metrics
last_value("snakepit.worker.count"),
counter("snakepit.worker.recycled_total"),
summary("snakepit.worker.uptime_seconds"),
last_value("snakepit.pool.queue_size")
]Custom Alerts
# Alert if too many workers recycled in short time
:telemetry.attach(
"worker-recycle-alert",
[:snakepit, :worker, :recycled],
fn _event, _measurements, metadata, state ->
# Increment counter
count = Map.get(state, :recycle_count, 0) + 1
new_state = Map.put(state, :recycle_count, count)
# Alert if > 10 recycling events in 60 seconds
if count > 10 do
Logger.warning("High worker churn detected: #{count} workers recycled")
# Send alert to monitoring system
MyApp.Monitoring.send_alert(:high_worker_churn, metadata)
end
new_state
end,
%{}
)Telemetry Best Practices
1. Attach Handlers Early
# In application.ex, before starting Snakepit
defmodule MyApp.Application do
def start(_type, _args) do
# Attach telemetry handlers FIRST
MyApp.Telemetry.attach_handlers()
children = [
# ... other children
{Snakepit.Application, []}
]
Supervisor.start_link(children, strategy: :one_for_one)
end
end2. Use Structured Metadata
# Good: Structured data
:telemetry.execute(
[:myapp, :custom, :event],
%{count: 1},
%{pool: pool_name, worker: worker_id}
)
# Bad: Unstructured strings
:telemetry.execute(
[:myapp, :custom, :event],
%{count: 1},
%{message: "Pool #{pool_name} worker #{worker_id}"}
)3. Don't Block in Handlers
# Bad: Blocking I/O in handler
:telemetry.attach("handler", event, fn _event, _meas, meta, _cfg ->
HTTPoison.post("http://metrics.example.com", Jason.encode!(meta)) # SLOW!
end, nil)
# Good: Async processing
:telemetry.attach("handler", event, fn _event, _meas, meta, _cfg ->
Task.start(fn ->
HTTPoison.post("http://metrics.example.com", Jason.encode!(meta))
end)
end, nil)Debugging Telemetry
See All Events
# Attach debug handler to see all Snakepit events
:telemetry.attach_many(
"debug-all",
[
[:snakepit, :worker, :recycled],
[:snakepit, :worker, :health_check_failed],
[:snakepit, :worker, :started],
[:snakepit, :pool, :saturated],
[:snakepit, :request, :executed]
],
fn event, measurements, metadata, _config ->
IO.inspect({event, measurements, metadata}, label: "Telemetry")
end,
nil
)Test Event Emission
# Manually trigger events for testing
:telemetry.execute(
[:snakepit, :worker, :recycled],
%{count: 1},
%{
worker_id: "test_worker",
pool: :test_pool,
reason: :manual,
uptime_seconds: 100,
request_count: 50
}
)