Sampling parameters for text generation.
Overall, we follow the sampling parameters from the OpenAI text completion API (https://platform.openai.com/docs/api-reference/completions/create). In addition, we support beam search, which is not supported by OpenAI.
Summary
Functions
Validate and convert input to msgspec.Struct instance.
vLLM: a high-throughput and memory-efficient inference engine for LLMs
vLLM: a high-throughput and memory-efficient inference engine for LLMs
Deep copy, but maybe not the LogitsProcessor objects.
vLLM: a high-throughput and memory-efficient inference engine for LLMs
Initialize self. See help(type(self)) for accurate signature.
Update if there are non-default values from generation_config
vLLM: a high-throughput and memory-efficient inference engine for LLMs
Types
Functions
@spec _validate_msgspec(SnakeBridge.Ref.t(), term(), keyword()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
Validate and convert input to msgspec.Struct instance.
Parameters
value(term())
Returns
term()
@spec _verify_args( SnakeBridge.Ref.t(), keyword() ) :: {:ok, nil} | {:error, Snakepit.Error.t()}
vLLM: a high-throughput and memory-efficient inference engine for LLMs
Returns
nil
@spec _verify_greedy_sampling( SnakeBridge.Ref.t(), keyword() ) :: {:ok, nil} | {:error, Snakepit.Error.t()}
vLLM: a high-throughput and memory-efficient inference engine for LLMs
Returns
nil
@spec all_stop_token_ids(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec allowed_token_ids(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec bad_words(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec bad_words_token_ids(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec clone( SnakeBridge.Ref.t(), keyword() ) :: {:ok, Vllm.SamplingParamsClass.t()} | {:error, Snakepit.Error.t()}
Deep copy, but maybe not the LogitsProcessor objects.
LogitsProcessor objects may contain an arbitrary, nontrivial amount of data that is expensive to copy. However, if not copied, the processor needs to support parallel decoding for multiple sequences See https://github.com/vllm-project/vllm/issues/3087
If skip_clone is True, uses shallow copy instead of deep copy.
Returns
Vllm.SamplingParamsClass.t()
@spec detokenize(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec extra_args(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec flat_logprobs(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec frequency_penalty(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec from_optional(SnakeBridge.Ref.t(), [term()], keyword()) :: {:ok, Vllm.SamplingParamsClass.t()} | {:error, Snakepit.Error.t()}
vLLM: a high-throughput and memory-efficient inference engine for LLMs
Parameters
n(term() default: 1)presence_penalty(term() default: 0.0)frequency_penalty(term() default: 0.0)repetition_penalty(term() default: 1.0)temperature(term() default: 1.0)top_p(term() default: 1.0)top_k(integer() default: 0)min_p(float() default: 0.0)seed(term() default: None)stop(term() default: None)stop_token_ids(term() default: None)bad_words(term() default: None)include_stop_str_in_output(boolean() default: False)ignore_eos(boolean() default: False)max_tokens(term() default: 16)min_tokens(integer() default: 0)logprobs(term() default: None)prompt_logprobs(term() default: None)detokenize(boolean() default: True)skip_special_tokens(boolean() default: True)spaces_between_special_tokens(boolean() default: True)logits_processors(term() default: None)truncate_prompt_tokens(integer() | nil default: None)output_kind(Vllm.SamplingParams.RequestOutputKind.t() default: <RequestOutputKind.CUMULATIVE: 0>)structured_outputs(term() default: None)logit_bias(term() default: None)allowed_token_ids(term() default: None)extra_args(term() default: None)skip_clone(boolean() default: False)
Returns
Vllm.SamplingParamsClass.t()
@spec ignore_eos(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec include_stop_str_in_output(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec logit_bias(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec logits_processors(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec logprobs(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec max_tokens(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec min_p(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec min_tokens(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec n(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec new( [term()], keyword() ) :: {:ok, SnakeBridge.Ref.t()} | {:error, Snakepit.Error.t()}
Initialize self. See help(type(self)) for accurate signature.
Parameters
args(term())kwargs(term())
@spec output_kind(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec output_text_buffer_length(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec presence_penalty(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec prompt_logprobs(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec repetition_penalty(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec sampling_type(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec seed(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec skip_clone(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec skip_reading_prefix_cache(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec skip_special_tokens(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec spaces_between_special_tokens(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec stop(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec stop_token_ids(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec structured_outputs(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec temperature(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec top_k(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec top_p(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec truncate_prompt_tokens(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec update_from_generation_config( SnakeBridge.Ref.t(), %{optional(String.t()) => term()}, [term()], keyword() ) :: {:ok, nil} | {:error, Snakepit.Error.t()}
Update if there are non-default values from generation_config
Parameters
generation_config(%{optional(String.t()) => term()})model_eos_token_id(term() default: None)
Returns
nil
@spec update_from_tokenizer(SnakeBridge.Ref.t(), term(), keyword()) :: {:ok, nil} | {:error, Snakepit.Error.t()}
vLLM: a high-throughput and memory-efficient inference engine for LLMs
Parameters
tokenizer(term())
Returns
nil