Vllm.Config.AttentionConfig (VLLM v0.3.0)

Copy Markdown View Source

Configuration for attention mechanisms in vLLM.

Summary

Types

t()

@opaque t()

Functions

_set_from_env_if_set(ref, field_name, env_var_name, opts \\ [])

@spec _set_from_env_if_set(SnakeBridge.Ref.t(), String.t(), String.t(), keyword()) ::
  {:ok, nil} | {:error, Snakepit.Error.t()}

Set field from env var if set, with deprecation warning.

Parameters

  • field_name (String.t())
  • env_var_name (String.t())

Returns

  • nil

backend(ref)

@spec backend(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}

compute_hash(ref, opts \\ [])

@spec compute_hash(
  SnakeBridge.Ref.t(),
  keyword()
) :: {:ok, String.t()} | {:error, Snakepit.Error.t()}

Provide a hash that uniquely identifies all the configs

that affect the structure of the computation graph from input ids/embeddings to the final hidden states, excluding anything before input ids/embeddings and after the final hidden states.

Returns

  • String.t()

disable_flashinfer_prefill(ref)

@spec disable_flashinfer_prefill(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

disable_flashinfer_q_quantization(ref)

@spec disable_flashinfer_q_quantization(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

flash_attn_max_num_splits_for_cuda_graph(ref)

@spec flash_attn_max_num_splits_for_cuda_graph(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

flash_attn_version(ref)

@spec flash_attn_version(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

new(dataclass_self__, args, kwargs, opts \\ [])

@spec new(term(), term(), term(), keyword()) ::
  {:ok, SnakeBridge.Ref.t()} | {:error, Snakepit.Error.t()}

Constructs AttentionConfig.

Parameters

  • dataclass_self__ (term())
  • args (term())
  • kwargs (term())

use_cudnn_prefill(ref)

@spec use_cudnn_prefill(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

use_prefill_decode_attention(ref)

@spec use_prefill_decode_attention(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

use_trtllm_attention(ref)

@spec use_trtllm_attention(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

use_trtllm_ragged_deepseek_prefill(ref)

@spec use_trtllm_ragged_deepseek_prefill(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

validate_backend_before(ref, value, opts \\ [])

@spec validate_backend_before(SnakeBridge.Ref.t(), term(), keyword()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

Enable parsing of the backend enum type from string.

Parameters

  • value (term())

Returns

  • term()