ExBurn.CubeclBridge (ex_burn v0.1.0)

Bridge to ExCubecl for GPU execution via Burn's CubeCL backend.

CubeCL (Compute Unified Backend for Compute Language) is Burn's GPU compute abstraction layer that supports:

CUDA (NVIDIA GPUs)
Metal (Apple GPUs — iOS, macOS)
Vulkan (Android, Linux, Windows)
WebGPU (Browser-based GPU)
ROCm (AMD GPUs)

This module delegates to the ExCubecl library (v0.4.0) for all GPU operations. ExCubecl buffers (opaque references) are used for GPU memory throughout.

Usage

# Check if a GPU is available
if ExBurn.CubeclBridge.available?() do
  # Initialize the GPU context
  {:ok, ctx} = ExBurn.CubeclBridge.init(:metal)

  # Check device capabilities
  caps = ExBurn.CubeclBridge.device_capabilities(ctx)

  # Allocate GPU buffer and run a kernel
  {:ok, buf} = ExBurn.CubeclBridge.allocate_gpu(ctx, [4, 4], :f32)
  {:ok, result} = ExBurn.CubeclBridge.execute(ctx, :add, [buf, buf])

  # Pipeline for multi-kernel execution
  {:ok, pid} = ExBurn.CubeclBridge.pipeline()
  :ok = ExBurn.CubeclBridge.pipeline_add(pid, :add, [buf, buf], buf)
  :ok = ExBurn.CubeclBridge.pipeline_add(pid, :relu, [buf], buf)
  {:ok, commands} = ExBurn.CubeclBridge.pipeline_run(pid)
  :ok = ExBurn.CubeclBridge.pipeline_free(pid)
end

Summary

Types

backend()

buffer()

command_id()

context()

kernel()

pipeline_id()

Functions

allocate_gpu(ctx, shape, type)

Allocates a GPU buffer with the given shape and type.

async_poll(command_id)

Polls the status of an asynchronous command.

async_submit(command)

Submits a command for asynchronous execution.

async_wait(command_id)

Blocks until the given command completes.

available?()

Checks whether a GPU device is available via ExCubecl.

available_backends()

Returns a list of available GPU backends on this system.

buffer_dtype(buf)

Returns the data type of a GPU buffer.

buffer_read(buf)

Reads the raw binary data from a GPU buffer.

buffer_read!(buf)

Reads the raw binary data from a GPU buffer, raising on error.

buffer_shape(buf)

Returns the shape of a GPU buffer.

buffer_size(buf)

Returns the size of a GPU buffer in bytes.

compile_kernel(ctx, kernel_type, opts \\ [])

Compiles a compute kernel for the given backend.

destroy(ctx)

Destroys the GPU context and frees all associated resources.

device_capabilities(ctx)

Returns the capabilities of the GPU device.

device_count()

Returns the number of GPU devices available.

device_to_host(ctx, buf)

Copies data from a GPU buffer to host (CPU) as an Nx tensor.

execute(ctx, kernel_type, inputs, opts \\ [])

Executes a compute kernel on the GPU.

free(ctx, buf)

Frees a GPU buffer.

host_to_device(ctx, tensor)

Copies data from host (CPU) to a new GPU buffer.

init(backend, opts \\ [])

Initializes a GPU compute context for the given backend.

kernels()

Returns the list of kernel names supported by ExCubecl.

memory_total(ctx)

Returns the total available GPU memory (in bytes).

memory_used(ctx)

Returns the amount of GPU memory currently in use (in bytes).

pipeline()

Creates a new pipeline for multi-kernel execution.

pipeline_add(pipeline_id, kernel, inputs, output, params \\ %{})

Adds a kernel command to a pipeline.

pipeline_add_struct(pipeline_id, command)

Adds a pre-built %ExCubecl.Command{} struct to a pipeline.

pipeline_free(pipeline_id)

Frees a pipeline and its associated resources.

pipeline_run(pipeline_id)

Executes all commands in the pipeline and returns their command IDs.

supported_dtypes()

Returns the list of supported data types.

synchronize(ctx)

Synchronizes the GPU context, blocking until all queued operations complete.

version()

Returns the ExCubecl library version string.

Types

backend()

@type backend() :: :cuda | :metal | :vulkan | :wgpu | :rocm

buffer()

@type buffer() :: reference()

command_id()

@type command_id() :: non_neg_integer()

context()

@type context() :: reference()

kernel()

@type kernel() :: atom()

pipeline_id()

@type pipeline_id() :: non_neg_integer()

Functions

allocate_gpu(ctx, shape, type)

@spec allocate_gpu(context(), [non_neg_integer()], atom()) ::
  {:ok, buffer()} | {:error, String.t()}

Allocates a GPU buffer with the given shape and type.

Returns an ExCubecl buffer reference.

async_poll(command_id)

@spec async_poll(command_id()) ::
  {:ok, :pending | :running | :completed | :failed} | {:error, term()}

Polls the status of an asynchronous command.

Returns :pending, :running, :completed, or :failed.

async_submit(command)

@spec async_submit(ExCubecl.Command.t()) :: {:ok, command_id()} | {:error, term()}

Submits a command for asynchronous execution.

Returns a command ID that can be polled or waited on.

async_wait(command_id)

@spec async_wait(command_id()) :: :ok | {:error, term()}

Blocks until the given command completes.

available?()

@spec available?() :: boolean()

Checks whether a GPU device is available via ExCubecl.

available_backends()

@spec available_backends() :: [backend()]

Returns a list of available GPU backends on this system.

Delegates to ExCubecl availability checks and platform detection.

buffer_dtype(buf)

@spec buffer_dtype(buffer()) :: {:ok, String.t()} | {:error, term()}

Returns the data type of a GPU buffer.

buffer_read(buf)

@spec buffer_read(buffer()) :: {:ok, binary()} | {:error, term()}

Reads the raw binary data from a GPU buffer.

buffer_read!(buf)

@spec buffer_read!(buffer()) :: binary()

Reads the raw binary data from a GPU buffer, raising on error.

buffer_shape(buf)

@spec buffer_shape(buffer()) :: {:ok, [non_neg_integer()]} | {:error, term()}

Returns the shape of a GPU buffer.

buffer_size(buf)

@spec buffer_size(buffer()) :: {:ok, non_neg_integer()} | {:error, term()}

Returns the size of a GPU buffer in bytes.

compile_kernel(ctx, kernel_type, opts \\ [])

@spec compile_kernel(context(), kernel(), keyword()) ::
  {:ok, reference()} | {:error, String.t()}

Compiles a compute kernel for the given backend.

Kernels are managed by ExCubecl; this function verifies the kernel is available and returns a reference.

destroy(ctx)

@spec destroy(context()) :: :ok

Destroys the GPU context and frees all associated resources.

device_capabilities(ctx)

@spec device_capabilities(context()) :: map()

Returns the capabilities of the GPU device.

device_count()

@spec device_count() :: {:ok, non_neg_integer()} | {:error, term()}

Returns the number of GPU devices available.

device_to_host(ctx, buf)

@spec device_to_host(context(), buffer()) ::
  {:ok, Nx.Tensor.t()} | {:error, String.t()}

Copies data from a GPU buffer to host (CPU) as an Nx tensor.

Parameters

ctx — The GPU context
buffer — An ExCubecl buffer reference

Returns

{:ok, Nx.Tensor.t()} on success, {:error, reason} on failure.

execute(ctx, kernel_type, inputs, opts \\ [])

@spec execute(context(), kernel(), [buffer()], keyword()) ::
  {:ok, buffer()} | {:error, String.t()}

Executes a compute kernel on the GPU.

Parameters

ctx — The GPU context
kernel — The kernel to execute (atom)
args — List of ExCubecl buffer references
opts — Options (currently unused, reserved for future use)

Returns

{:ok, result_buffer} on success, {:error, reason} on failure.

free(ctx, buf)

@spec free(context(), buffer()) :: :ok

Frees a GPU buffer.

Note: ExCubecl buffers are garbage-collected when their reference goes out of scope. This function is a no-op for API compatibility.

host_to_device(ctx, tensor)

@spec host_to_device(context(), Nx.Tensor.t()) ::
  {:ok, buffer()} | {:error, String.t()}

Copies data from host (CPU) to a new GPU buffer.

Parameters

ctx — The GPU context
tensor — An Nx tensor to copy to the GPU

Returns

{:ok, buffer} on success, {:error, reason} on failure.

init(backend, opts \\ [])

@spec init(
  backend(),
  keyword()
) :: {:ok, context()} | {:error, String.t()}

Initializes a GPU compute context for the given backend.

Parameters

backend — The GPU backend to use (:cuda, :metal, :vulkan, :wgpu, :rocm)
opts — Options (currently unused, reserved for future use)

Returns

{:ok, context} on success, {:error, reason} on failure.

kernels()

@spec kernels() :: {:ok, [String.t()]} | {:error, term()}

Returns the list of kernel names supported by ExCubecl.

memory_total(ctx)

@spec memory_total(context()) :: non_neg_integer()

Returns the total available GPU memory (in bytes).

Note: ExCubecl does not currently expose memory usage statistics. This always returns 0.

memory_used(ctx)

@spec memory_used(context()) :: non_neg_integer()

Returns the amount of GPU memory currently in use (in bytes).

Note: ExCubecl does not currently expose memory usage statistics. This always returns 0.

pipeline()

@spec pipeline() :: {:ok, pipeline_id()} | {:error, term()}

Creates a new pipeline for multi-kernel execution.

pipeline_add(pipeline_id, kernel, inputs, output, params \\ %{})

@spec pipeline_add(pipeline_id(), atom(), [buffer()], buffer(), map()) ::
  :ok | {:error, term()}

Adds a kernel command to a pipeline.

Parameters

pipeline_id — The pipeline to add to
kernel — Kernel name (atom)
inputs — List of input buffer references
output — Output buffer reference
params — Additional parameters (optional, default: %{})

pipeline_add_struct(pipeline_id, command)

@spec pipeline_add_struct(pipeline_id(), ExCubecl.Command.t()) ::
  :ok | {:error, term()}

Adds a pre-built %ExCubecl.Command{} struct to a pipeline.

pipeline_free(pipeline_id)

@spec pipeline_free(pipeline_id()) :: :ok | {:error, term()}

Frees a pipeline and its associated resources.

pipeline_run(pipeline_id)

@spec pipeline_run(pipeline_id()) :: {:ok, [command_id()]} | {:error, term()}

Executes all commands in the pipeline and returns their command IDs.

supported_dtypes()

@spec supported_dtypes() :: [atom()]

Returns the list of supported data types.

synchronize(ctx)

@spec synchronize(context()) :: :ok

Synchronizes the GPU context, blocking until all queued operations complete.

Note: ExCubecl does not expose a global synchronization primitive. Use async_wait/1 on specific command IDs for fine-grained control.

version()

@spec version() :: String.t()

Returns the ExCubecl library version string.