Vllm.Config.CacheConfig (VLLM v0.3.0)

Configuration for the KV cache.

Summary

Functions

_validate_cache_dtype(ref, cache_dtype, opts \\ [])

Python method CacheConfig._validate_cache_dtype.

block_size(ref)

cache_dtype(ref)

calculate_kv_scales(ref)

compute_hash(ref, opts \\ [])

WARNING: Whenever a new field is added to this config,

cpu_kvcache_space_bytes(ref)

cpu_offload_gb(ref)

enable_prefix_caching(ref)

gpu_memory_utilization(ref)

is_attention_free(ref)

kv_cache_memory_bytes(ref)

kv_offloading_backend(ref)

kv_offloading_size(ref)

kv_sharing_fast_prefill(ref)

mamba_block_size(ref)

mamba_cache_dtype(ref)

mamba_page_size_padded(ref)

mamba_ssm_cache_dtype(ref)

metrics_info(ref, opts \\ [])

Python method CacheConfig.metrics_info.

new(dataclass_self__, args, kwargs, opts \\ [])

Constructs CacheConfig.

num_cpu_blocks(ref)

num_gpu_blocks(ref)

num_gpu_blocks_override(ref)

prefix_caching_hash_algo(ref)

sliding_window(ref)

swap_space(ref)

verify_with_parallel_config(ref, parallel_config, opts \\ [])

Python method CacheConfig.verify_with_parallel_config.

Types

t()

@opaque t()

Functions

_validate_cache_dtype(ref, cache_dtype, opts \\ [])

@spec _validate_cache_dtype(SnakeBridge.Ref.t(), term(), keyword()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

Python method CacheConfig._validate_cache_dtype.

Parameters

cache_dtype (term())

Returns

term()

block_size(ref)

@spec block_size(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}

cache_dtype(ref)

@spec cache_dtype(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}

calculate_kv_scales(ref)

@spec calculate_kv_scales(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

compute_hash(ref, opts \\ [])

@spec compute_hash(
  SnakeBridge.Ref.t(),
  keyword()
) :: {:ok, String.t()} | {:error, Snakepit.Error.t()}

WARNING: Whenever a new field is added to this config,

ensure that it is included in the factors list if it affects the computation graph.

Provide a hash that uniquely identifies all the configs that affect the structure of the computation graph from input ids/embeddings to the final hidden states, excluding anything before input ids/embeddings and after the final hidden states.

Returns

String.t()

cpu_kvcache_space_bytes(ref)

@spec cpu_kvcache_space_bytes(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

cpu_offload_gb(ref)

@spec cpu_offload_gb(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

enable_prefix_caching(ref)

@spec enable_prefix_caching(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

gpu_memory_utilization(ref)

@spec gpu_memory_utilization(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

is_attention_free(ref)

@spec is_attention_free(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

kv_cache_memory_bytes(ref)

@spec kv_cache_memory_bytes(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

kv_offloading_backend(ref)

@spec kv_offloading_backend(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

kv_offloading_size(ref)

@spec kv_offloading_size(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

kv_sharing_fast_prefill(ref)

@spec kv_sharing_fast_prefill(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

mamba_block_size(ref)

@spec mamba_block_size(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

mamba_cache_dtype(ref)

@spec mamba_cache_dtype(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

mamba_page_size_padded(ref)

@spec mamba_page_size_padded(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

mamba_ssm_cache_dtype(ref)

@spec mamba_ssm_cache_dtype(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

metrics_info(ref, opts \\ [])

@spec metrics_info(
  SnakeBridge.Ref.t(),
  keyword()
) :: {:ok, term()} | {:error, Snakepit.Error.t()}

Python method CacheConfig.metrics_info.

Returns

term()

new(dataclass_self__, args, kwargs, opts \\ [])

@spec new(term(), term(), term(), keyword()) ::
  {:ok, SnakeBridge.Ref.t()} | {:error, Snakepit.Error.t()}

Constructs CacheConfig.

Parameters

dataclass_self__ (term())
args (term())
kwargs (term())

num_cpu_blocks(ref)

@spec num_cpu_blocks(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

num_gpu_blocks(ref)

@spec num_gpu_blocks(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

num_gpu_blocks_override(ref)

@spec num_gpu_blocks_override(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

prefix_caching_hash_algo(ref)

@spec prefix_caching_hash_algo(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

sliding_window(ref)

@spec sliding_window(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

swap_space(ref)

@spec swap_space(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}

verify_with_parallel_config(ref, parallel_config, opts \\ [])

@spec verify_with_parallel_config(SnakeBridge.Ref.t(), term(), keyword()) ::
  {:ok, nil} | {:error, Snakepit.Error.t()}

Python method CacheConfig.verify_with_parallel_config.

Parameters

parallel_config (term())

Returns

nil