Configuration for the KV cache.
Summary
Functions
Python method CacheConfig._validate_cache_dtype.
WARNING: Whenever a new field is added to this config,
Python method CacheConfig.metrics_info.
Constructs CacheConfig.
Python method CacheConfig.verify_with_parallel_config.
Types
Functions
@spec _validate_cache_dtype(SnakeBridge.Ref.t(), term(), keyword()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
Python method CacheConfig._validate_cache_dtype.
Parameters
cache_dtype(term())
Returns
term()
@spec block_size(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec cache_dtype(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec calculate_kv_scales(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec compute_hash( SnakeBridge.Ref.t(), keyword() ) :: {:ok, String.t()} | {:error, Snakepit.Error.t()}
WARNING: Whenever a new field is added to this config,
ensure that it is included in the factors list if it affects the computation graph.
Provide a hash that uniquely identifies all the configs that affect the structure of the computation graph from input ids/embeddings to the final hidden states, excluding anything before input ids/embeddings and after the final hidden states.
Returns
String.t()
@spec cpu_kvcache_space_bytes(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec cpu_offload_gb(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec enable_prefix_caching(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec gpu_memory_utilization(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec is_attention_free(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec kv_cache_memory_bytes(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec kv_offloading_backend(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec kv_offloading_size(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec kv_sharing_fast_prefill(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec mamba_block_size(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec mamba_cache_dtype(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec mamba_page_size_padded(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec mamba_ssm_cache_dtype(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec metrics_info( SnakeBridge.Ref.t(), keyword() ) :: {:ok, term()} | {:error, Snakepit.Error.t()}
Python method CacheConfig.metrics_info.
Returns
term()
@spec new(term(), term(), term(), keyword()) :: {:ok, SnakeBridge.Ref.t()} | {:error, Snakepit.Error.t()}
Constructs CacheConfig.
Parameters
dataclass_self__(term())args(term())kwargs(term())
@spec num_cpu_blocks(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec num_gpu_blocks(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec num_gpu_blocks_override(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec prefix_caching_hash_algo(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec sliding_window(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec swap_space(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec verify_with_parallel_config(SnakeBridge.Ref.t(), term(), keyword()) :: {:ok, nil} | {:error, Snakepit.Error.t()}
Python method CacheConfig.verify_with_parallel_config.
Parameters
parallel_config(term())
Returns
nil