Vllm.Config.CacheConfig (VLLM v0.3.0)

Copy Markdown View Source

Configuration for the KV cache.

Summary

Types

t()

@opaque t()

Functions

_validate_cache_dtype(ref, cache_dtype, opts \\ [])

@spec _validate_cache_dtype(SnakeBridge.Ref.t(), term(), keyword()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

Python method CacheConfig._validate_cache_dtype.

Parameters

  • cache_dtype (term())

Returns

  • term()

block_size(ref)

@spec block_size(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}

cache_dtype(ref)

@spec cache_dtype(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}

calculate_kv_scales(ref)

@spec calculate_kv_scales(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

compute_hash(ref, opts \\ [])

@spec compute_hash(
  SnakeBridge.Ref.t(),
  keyword()
) :: {:ok, String.t()} | {:error, Snakepit.Error.t()}

WARNING: Whenever a new field is added to this config,

ensure that it is included in the factors list if it affects the computation graph.

Provide a hash that uniquely identifies all the configs that affect the structure of the computation graph from input ids/embeddings to the final hidden states, excluding anything before input ids/embeddings and after the final hidden states.

Returns

  • String.t()

cpu_kvcache_space_bytes(ref)

@spec cpu_kvcache_space_bytes(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

cpu_offload_gb(ref)

@spec cpu_offload_gb(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

enable_prefix_caching(ref)

@spec enable_prefix_caching(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

gpu_memory_utilization(ref)

@spec gpu_memory_utilization(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

is_attention_free(ref)

@spec is_attention_free(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

kv_cache_memory_bytes(ref)

@spec kv_cache_memory_bytes(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

kv_offloading_backend(ref)

@spec kv_offloading_backend(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

kv_offloading_size(ref)

@spec kv_offloading_size(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

kv_sharing_fast_prefill(ref)

@spec kv_sharing_fast_prefill(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

mamba_block_size(ref)

@spec mamba_block_size(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

mamba_cache_dtype(ref)

@spec mamba_cache_dtype(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

mamba_page_size_padded(ref)

@spec mamba_page_size_padded(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

mamba_ssm_cache_dtype(ref)

@spec mamba_ssm_cache_dtype(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

metrics_info(ref, opts \\ [])

@spec metrics_info(
  SnakeBridge.Ref.t(),
  keyword()
) :: {:ok, term()} | {:error, Snakepit.Error.t()}

Python method CacheConfig.metrics_info.

Returns

  • term()

new(dataclass_self__, args, kwargs, opts \\ [])

@spec new(term(), term(), term(), keyword()) ::
  {:ok, SnakeBridge.Ref.t()} | {:error, Snakepit.Error.t()}

Constructs CacheConfig.

Parameters

  • dataclass_self__ (term())
  • args (term())
  • kwargs (term())

num_cpu_blocks(ref)

@spec num_cpu_blocks(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

num_gpu_blocks(ref)

@spec num_gpu_blocks(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

num_gpu_blocks_override(ref)

@spec num_gpu_blocks_override(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

prefix_caching_hash_algo(ref)

@spec prefix_caching_hash_algo(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

sliding_window(ref)

@spec sliding_window(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

swap_space(ref)

@spec swap_space(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}

verify_with_parallel_config(ref, parallel_config, opts \\ [])

@spec verify_with_parallel_config(SnakeBridge.Ref.t(), term(), keyword()) ::
  {:ok, nil} | {:error, Snakepit.Error.t()}

Python method CacheConfig.verify_with_parallel_config.

Parameters

  • parallel_config (term())

Returns

  • nil