Configuration for the model.
Summary
Functions
Apply dict overrides, handling both nested configs and dict values.
Python method ModelConfig._get_convert_type.
Python method ModelConfig._get_default_convert_type.
Python method ModelConfig._get_default_runner_type.
Python method ModelConfig._get_encoder_config.
Python method ModelConfig._get_runner_type.
Determine which Transformers modeling backend class will be used if
Python method ModelConfig._lowercase_tokenizer_mode.
Skip validation if the value is None when initialisation is delayed.
Python method ModelConfig._try_verify_and_update_model_config.
Recursively updates a config or dict with nested updates.
The current version of bitsandbytes (0.46.1) with 8-bit models does not
Python method ModelConfig._verify_cuda_graph.
Python method ModelConfig._verify_quantization.
Python method ModelConfig._verify_with_expert_parallelism.
WARNING: Whenever a new field is added to this config,
Python method ModelConfig.get_and_verify_max_len.
This method returns a dictionary containing the non-default sampling
Python method ModelConfig.get_head_size.
Python method ModelConfig.get_hidden_size.
Python method ModelConfig.get_inputs_embeds_size.
Python method ModelConfig.get_layers_start_end_indices.
Returns the mamba chunk size if it exists
Python method ModelConfig.get_model_arch_config.
Get the multimodal configuration of the model.
Python method ModelConfig.get_num_attention_heads.
Python method ModelConfig.get_num_experts.
Returns the number of KV heads per GPU.
Python method ModelConfig.get_num_layers.
Python method ModelConfig.get_num_layers_by_block_type.
Get the sliding window size from the HF text config if present.
Python method ModelConfig.get_total_num_hidden_layers.
Returns the total number of KV heads.
Python method ModelConfig.get_vocab_size.
Pull model/tokenizer from Object Storage to temporary
Constructs ModelConfig.
This method attempts to retrieve the non-default values of the
Check if the model is using the Transformers modeling backend class.
Called after post_init
Python method ModelConfig.validate_quantization_before.
Python method ModelConfig.verify_dual_chunk_attention_config.
Python method ModelConfig.verify_with_parallel_config.
Types
Functions
@spec _apply_dict_overrides( SnakeBridge.Ref.t(), term(), %{optional(String.t()) => term()}, keyword() ) :: {:ok, nil} | {:error, Snakepit.Error.t()}
Apply dict overrides, handling both nested configs and dict values.
Parameters
config(term())overrides(%{optional(String.t()) => term()})
Returns
nil
@spec _get_convert_type(SnakeBridge.Ref.t(), [String.t()], term(), term(), keyword()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
Python method ModelConfig._get_convert_type.
Parameters
architectures(list(String.t()))runner_type(term())convert(term())
Returns
term()
@spec _get_default_convert_type(SnakeBridge.Ref.t(), [String.t()], term(), keyword()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
Python method ModelConfig._get_default_convert_type.
Parameters
architectures(list(String.t()))runner_type(term())
Returns
term()
@spec _get_default_runner_type(SnakeBridge.Ref.t(), [String.t()], keyword()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
Python method ModelConfig._get_default_runner_type.
Parameters
architectures(list(String.t()))
Returns
term()
@spec _get_encoder_config( SnakeBridge.Ref.t(), keyword() ) :: {:ok, term()} | {:error, Snakepit.Error.t()}
Python method ModelConfig._get_encoder_config.
Returns
term()
@spec _get_runner_type(SnakeBridge.Ref.t(), [String.t()], term(), keyword()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
Python method ModelConfig._get_runner_type.
Parameters
architectures(list(String.t()))runner(term())
Returns
term()
@spec _get_transformers_backend_cls( SnakeBridge.Ref.t(), keyword() ) :: {:ok, String.t()} | {:error, Snakepit.Error.t()}
Determine which Transformers modeling backend class will be used if
model_impl is set to transformers or auto.
Returns
String.t()
@spec _lowercase_tokenizer_mode(SnakeBridge.Ref.t(), String.t(), keyword()) :: {:ok, String.t()} | {:error, Snakepit.Error.t()}
Python method ModelConfig._lowercase_tokenizer_mode.
Parameters
tokenizer_mode(String.t())
Returns
String.t()
@spec _skip_none_validation(SnakeBridge.Ref.t(), term(), term(), keyword()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
Skip validation if the value is None when initialisation is delayed.
Parameters
value(term())handler(term())
Returns
term()
@spec _try_verify_and_update_model_config( SnakeBridge.Ref.t(), keyword() ) :: {:ok, term()} | {:error, Snakepit.Error.t()}
Python method ModelConfig._try_verify_and_update_model_config.
Returns
term()
@spec _update_nested( SnakeBridge.Ref.t(), term(), %{optional(String.t()) => term()}, keyword() ) :: {:ok, nil} | {:error, Snakepit.Error.t()}
Recursively updates a config or dict with nested updates.
Parameters
target(term())updates(%{optional(String.t()) => term()})
Returns
nil
@spec _verify_bnb_config( SnakeBridge.Ref.t(), keyword() ) :: {:ok, nil} | {:error, Snakepit.Error.t()}
The current version of bitsandbytes (0.46.1) with 8-bit models does not
yet support CUDA graph.
TODO Remove this when bitsandbytes supports.
Returns
nil
@spec _verify_cuda_graph( SnakeBridge.Ref.t(), keyword() ) :: {:ok, nil} | {:error, Snakepit.Error.t()}
Python method ModelConfig._verify_cuda_graph.
Returns
nil
@spec _verify_quantization( SnakeBridge.Ref.t(), keyword() ) :: {:ok, nil} | {:error, Snakepit.Error.t()}
Python method ModelConfig._verify_quantization.
Returns
nil
@spec _verify_with_expert_parallelism( SnakeBridge.Ref.t(), keyword() ) :: {:ok, nil} | {:error, Snakepit.Error.t()}
Python method ModelConfig._verify_with_expert_parallelism.
Returns
nil
@spec allow_deprecated_quantization(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec allowed_local_media_path(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec allowed_media_domains(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec architecture(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec architectures(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec attn_type(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec code_revision(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec compute_hash( SnakeBridge.Ref.t(), keyword() ) :: {:ok, String.t()} | {:error, Snakepit.Error.t()}
WARNING: Whenever a new field is added to this config,
ensure that it is included in the factors list if it affects the computation graph.
Provide a hash that uniquely identifies all the configs that affect the structure of the computation graph from input ids/embeddings to the final hidden states, excluding anything before input ids/embeddings and after the final hidden states.
Returns
String.t()
@spec config_format(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec convert(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec disable_cascade_attn(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec disable_sliding_window(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec dtype(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec embedding_size(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec enable_mm_embeds(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec enable_prompt_embeds(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec enable_return_routed_experts(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec enable_sleep_mode(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec enforce_eager(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec generation_config(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec get_and_verify_max_len(SnakeBridge.Ref.t(), integer(), keyword()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
Python method ModelConfig.get_and_verify_max_len.
Parameters
max_model_len(integer())
Returns
term()
@spec get_diff_sampling_param( SnakeBridge.Ref.t(), keyword() ) :: {:ok, %{optional(String.t()) => term()}} | {:error, Snakepit.Error.t()}
This method returns a dictionary containing the non-default sampling
parameters with override_generation_config applied.
The default sampling parameters are:
- vLLM's neutral defaults if
self.generation_config="vllm" - the model's defaults if
self.generation_config="auto" - as defined in
generation_config.jsonifself.generation_config="path/to/generation_config/dir"
Returns
%{optional(String.t()) => term()}
@spec get_head_size( SnakeBridge.Ref.t(), keyword() ) :: {:ok, integer()} | {:error, Snakepit.Error.t()}
Python method ModelConfig.get_head_size.
Returns
integer()
@spec get_inputs_embeds_size( SnakeBridge.Ref.t(), keyword() ) :: {:ok, integer()} | {:error, Snakepit.Error.t()}
Python method ModelConfig.get_inputs_embeds_size.
Returns
integer()
@spec get_layers_start_end_indices(SnakeBridge.Ref.t(), term(), keyword()) :: {:ok, {integer(), integer()}} | {:error, Snakepit.Error.t()}
Python method ModelConfig.get_layers_start_end_indices.
Parameters
parallel_config(term())
Returns
{integer(), integer()}
@spec get_mamba_chunk_size( SnakeBridge.Ref.t(), keyword() ) :: {:ok, term()} | {:error, Snakepit.Error.t()}
Returns the mamba chunk size if it exists
Returns
term()
@spec get_model_arch_config( SnakeBridge.Ref.t(), keyword() ) :: {:ok, term()} | {:error, Snakepit.Error.t()}
Python method ModelConfig.get_model_arch_config.
Returns
term()
@spec get_multimodal_config( SnakeBridge.Ref.t(), keyword() ) :: {:ok, term()} | {:error, Snakepit.Error.t()}
Get the multimodal configuration of the model.
Raises
ArgumentError- If the model is not multimodal.
Returns
term()
@spec get_num_attention_heads(SnakeBridge.Ref.t(), term(), keyword()) :: {:ok, integer()} | {:error, Snakepit.Error.t()}
Python method ModelConfig.get_num_attention_heads.
Parameters
parallel_config(term())
Returns
integer()
@spec get_num_experts( SnakeBridge.Ref.t(), keyword() ) :: {:ok, integer()} | {:error, Snakepit.Error.t()}
Python method ModelConfig.get_num_experts.
Returns
integer()
@spec get_num_kv_heads(SnakeBridge.Ref.t(), term(), keyword()) :: {:ok, integer()} | {:error, Snakepit.Error.t()}
Returns the number of KV heads per GPU.
Parameters
parallel_config(term())
Returns
integer()
@spec get_num_layers(SnakeBridge.Ref.t(), term(), keyword()) :: {:ok, integer()} | {:error, Snakepit.Error.t()}
Python method ModelConfig.get_num_layers.
Parameters
parallel_config(term())
Returns
integer()
@spec get_num_layers_by_block_type(SnakeBridge.Ref.t(), term(), [term()], keyword()) :: {:ok, integer()} | {:error, Snakepit.Error.t()}
Python method ModelConfig.get_num_layers_by_block_type.
Parameters
parallel_config(term())block_type(term() default: 'attention')
Returns
integer()
@spec get_sliding_window( SnakeBridge.Ref.t(), keyword() ) :: {:ok, term()} | {:error, Snakepit.Error.t()}
Get the sliding window size from the HF text config if present.
Returns
term()
@spec get_total_num_kv_heads( SnakeBridge.Ref.t(), keyword() ) :: {:ok, integer()} | {:error, Snakepit.Error.t()}
Returns the total number of KV heads.
Returns
integer()
@spec get_vocab_size( SnakeBridge.Ref.t(), keyword() ) :: {:ok, integer()} | {:error, Snakepit.Error.t()}
Python method ModelConfig.get_vocab_size.
Returns
integer()
@spec has_inner_state(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec has_noops(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec head_dtype(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec hf_config_path(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec hf_token(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec interleave_mm_strings(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec io_processor_plugin(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec is_attention_free(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec is_chunked_prefill_supported(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec is_cross_encoder(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec is_deepseek_mla(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec is_encoder_decoder(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec is_hybrid(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec is_matryoshka(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec is_mm_prefix_lm(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec is_moe(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec is_multimodal_model(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec is_multimodal_raw_input_only_model(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec is_pp_supported(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec is_prefix_caching_supported(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec is_quantized(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec limit_mm_per_prompt(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec logits_processor_pattern(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec logits_processors(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec logprobs_mode(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec matryoshka_dimensions(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec max_logprobs(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec max_model_len(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec maybe_pull_model_tokenizer_for_runai( SnakeBridge.Ref.t(), String.t(), String.t(), keyword() ) :: {:ok, nil} | {:error, Snakepit.Error.t()}
Pull model/tokenizer from Object Storage to temporary
directory when needed.
Parameters
model- Model name or pathtokenizer- Tokenizer name or path
Returns
nil
@spec media_io_kwargs(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec mm_encoder_attn_backend(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec mm_encoder_tp_mode(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec mm_processor_cache_gb(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec mm_processor_cache_type(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec mm_processor_kwargs(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec mm_shm_cache_max_object_size_mb(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec model(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec model_impl(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec model_weights(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec multimodal_config(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec new(term(), term(), term(), keyword()) :: {:ok, SnakeBridge.Ref.t()} | {:error, Snakepit.Error.t()}
Constructs ModelConfig.
Parameters
dataclass_self__(term())args(term())kwargs(term())
@spec override_attention_dtype(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec pooler_config(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec quantization(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec registry(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec requires_raw_input_tokens(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec revision(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec runner(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec seed(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec served_model_name(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec skip_mm_profiling(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec skip_tokenizer_init(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec spec_target_max_model_len(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec supports_mamba_prefix_caching(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec tokenizer(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec tokenizer_mode(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec tokenizer_revision(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec trust_remote_code(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec try_get_generation_config( SnakeBridge.Ref.t(), keyword() ) :: {:ok, %{optional(String.t()) => term()}} | {:error, Snakepit.Error.t()}
This method attempts to retrieve the non-default values of the
generation config for this model.
The generation config can contain information about special tokens, as
well as sampling parameters. Which is why this method exists separately
to get_diff_sampling_param.
Returns
%{optional(String.t()) => term()}
@spec use_mla(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec use_sep_token(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec uses_alibi(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec uses_mrope(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec uses_xdrope_dim(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec using_transformers_backend( SnakeBridge.Ref.t(), keyword() ) :: {:ok, boolean()} | {:error, Snakepit.Error.t()}
Check if the model is using the Transformers modeling backend class.
Returns
boolean()
@spec validate_model_config_after( SnakeBridge.Ref.t(), keyword() ) :: {:ok, term()} | {:error, Snakepit.Error.t()}
Called after post_init
Returns
term()
@spec validate_quantization_before(SnakeBridge.Ref.t(), term(), keyword()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
Python method ModelConfig.validate_quantization_before.
Parameters
value(term())
Returns
term()
@spec verify_dual_chunk_attention_config(SnakeBridge.Ref.t(), term(), keyword()) :: {:ok, nil} | {:error, Snakepit.Error.t()}
Python method ModelConfig.verify_dual_chunk_attention_config.
Parameters
load_config(term())
Returns
nil
@spec verify_with_parallel_config(SnakeBridge.Ref.t(), term(), keyword()) :: {:ok, nil} | {:error, Snakepit.Error.t()}
Python method ModelConfig.verify_with_parallel_config.
Parameters
parallel_config(term())
Returns
nil
@spec video_pruning_rate(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}