Vllm.SamplingParams (VLLM v0.3.0)

Sampling parameters for text generation.

Overall, we follow the sampling parameters from the OpenAI text completion API (https://platform.openai.com/docs/api-reference/completions/create). In addition, we support beam search, which is not supported by OpenAI.

Summary

Types

t()

Functions

_validate_msgspec(ref, value, opts \\ [])

Validate and convert input to msgspec.Struct instance.

_verify_args(ref, opts \\ [])

vLLM: a high-throughput and memory-efficient inference engine for LLMs

_verify_greedy_sampling(ref, opts \\ [])

vLLM: a high-throughput and memory-efficient inference engine for LLMs

all_stop_token_ids(ref)

allowed_token_ids(ref)

bad_words(ref)

bad_words_token_ids(ref)

clone(ref, opts \\ [])

Deep copy, but maybe not the LogitsProcessor objects.

detokenize(ref)

extra_args(ref)

flat_logprobs(ref)

frequency_penalty(ref)

from_optional(ref, args, opts \\ [])

vLLM: a high-throughput and memory-efficient inference engine for LLMs

ignore_eos(ref)

include_stop_str_in_output(ref)

logit_bias(ref)

logits_processors(ref)

logprobs(ref)

max_tokens(ref)

min_p(ref)

min_tokens(ref)

n(ref)

new(args, opts \\ [])

Initialize self. See help(type(self)) for accurate signature.

output_kind(ref)

output_text_buffer_length(ref)

presence_penalty(ref)

prompt_logprobs(ref)

repetition_penalty(ref)

sampling_type(ref)

seed(ref)

skip_clone(ref)

skip_reading_prefix_cache(ref)

skip_special_tokens(ref)

spaces_between_special_tokens(ref)

stop(ref)

stop_token_ids(ref)

structured_outputs(ref)

temperature(ref)

top_k(ref)

top_p(ref)

truncate_prompt_tokens(ref)

update_from_generation_config(ref, generation_config, args, opts \\ [])

Update if there are non-default values from generation_config

update_from_tokenizer(ref, tokenizer, opts \\ [])

vLLM: a high-throughput and memory-efficient inference engine for LLMs

Types

t()

@opaque t()

Functions

_validate_msgspec(ref, value, opts \\ [])

@spec _validate_msgspec(SnakeBridge.Ref.t(), term(), keyword()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

Validate and convert input to msgspec.Struct instance.

Parameters

value (term())

Returns

term()

_verify_args(ref, opts \\ [])

@spec _verify_args(
  SnakeBridge.Ref.t(),
  keyword()
) :: {:ok, nil} | {:error, Snakepit.Error.t()}

vLLM: a high-throughput and memory-efficient inference engine for LLMs

Returns

nil

_verify_greedy_sampling(ref, opts \\ [])

@spec _verify_greedy_sampling(
  SnakeBridge.Ref.t(),
  keyword()
) :: {:ok, nil} | {:error, Snakepit.Error.t()}

vLLM: a high-throughput and memory-efficient inference engine for LLMs

Returns

nil

all_stop_token_ids(ref)

@spec all_stop_token_ids(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

allowed_token_ids(ref)

@spec allowed_token_ids(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

bad_words(ref)

@spec bad_words(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}

bad_words_token_ids(ref)

@spec bad_words_token_ids(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

clone(ref, opts \\ [])

@spec clone(
  SnakeBridge.Ref.t(),
  keyword()
) :: {:ok, Vllm.SamplingParamsClass.t()} | {:error, Snakepit.Error.t()}

Deep copy, but maybe not the LogitsProcessor objects.

LogitsProcessor objects may contain an arbitrary, nontrivial amount of data that is expensive to copy. However, if not copied, the processor needs to support parallel decoding for multiple sequences See https://github.com/vllm-project/vllm/issues/3087

If skip_clone is True, uses shallow copy instead of deep copy.

Returns

Vllm.SamplingParamsClass.t()

detokenize(ref)

@spec detokenize(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}

extra_args(ref)

@spec extra_args(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}

flat_logprobs(ref)

@spec flat_logprobs(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

frequency_penalty(ref)

@spec frequency_penalty(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

from_optional(ref, args, opts \\ [])

@spec from_optional(SnakeBridge.Ref.t(), [term()], keyword()) ::
  {:ok, Vllm.SamplingParamsClass.t()} | {:error, Snakepit.Error.t()}

vLLM: a high-throughput and memory-efficient inference engine for LLMs

Parameters

n (term() default: 1)
presence_penalty (term() default: 0.0)
frequency_penalty (term() default: 0.0)
repetition_penalty (term() default: 1.0)
temperature (term() default: 1.0)
top_p (term() default: 1.0)
top_k (integer() default: 0)
min_p (float() default: 0.0)
seed (term() default: None)
stop (term() default: None)
stop_token_ids (term() default: None)
bad_words (term() default: None)
include_stop_str_in_output (boolean() default: False)
ignore_eos (boolean() default: False)
max_tokens (term() default: 16)
min_tokens (integer() default: 0)
logprobs (term() default: None)
prompt_logprobs (term() default: None)
detokenize (boolean() default: True)
skip_special_tokens (boolean() default: True)
spaces_between_special_tokens (boolean() default: True)
logits_processors (term() default: None)
truncate_prompt_tokens (integer() | nil default: None)
output_kind (Vllm.SamplingParams.RequestOutputKind.t() default: <RequestOutputKind.CUMULATIVE: 0>)
structured_outputs (term() default: None)
logit_bias (term() default: None)
allowed_token_ids (term() default: None)
extra_args (term() default: None)
skip_clone (boolean() default: False)

Returns

Vllm.SamplingParamsClass.t()

ignore_eos(ref)

@spec ignore_eos(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}

include_stop_str_in_output(ref)

@spec include_stop_str_in_output(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

logit_bias(ref)

@spec logit_bias(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}

logits_processors(ref)

@spec logits_processors(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

logprobs(ref)

@spec logprobs(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}

max_tokens(ref)

@spec max_tokens(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}

min_p(ref)

@spec min_p(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}

min_tokens(ref)

@spec min_tokens(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}

n(ref)

@spec n(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}

new(args, opts \\ [])

@spec new(
  [term()],
  keyword()
) :: {:ok, SnakeBridge.Ref.t()} | {:error, Snakepit.Error.t()}

Initialize self. See help(type(self)) for accurate signature.

Parameters

args (term())
kwargs (term())

output_kind(ref)

@spec output_kind(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}

output_text_buffer_length(ref)

@spec output_text_buffer_length(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

presence_penalty(ref)

@spec presence_penalty(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

prompt_logprobs(ref)

@spec prompt_logprobs(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

repetition_penalty(ref)

@spec repetition_penalty(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

sampling_type(ref)

@spec sampling_type(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

seed(ref)

@spec seed(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}

skip_clone(ref)

@spec skip_clone(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}

skip_reading_prefix_cache(ref)

@spec skip_reading_prefix_cache(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

skip_special_tokens(ref)

@spec skip_special_tokens(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

spaces_between_special_tokens(ref)

@spec spaces_between_special_tokens(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

stop(ref)

@spec stop(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}

stop_token_ids(ref)

@spec stop_token_ids(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

structured_outputs(ref)

@spec structured_outputs(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

temperature(ref)

@spec temperature(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}

top_k(ref)

@spec top_k(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}

top_p(ref)

@spec top_p(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}

truncate_prompt_tokens(ref)

@spec truncate_prompt_tokens(SnakeBridge.Ref.t()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

update_from_generation_config(ref, generation_config, args, opts \\ [])

@spec update_from_generation_config(
  SnakeBridge.Ref.t(),
  %{optional(String.t()) => term()},
  [term()],
  keyword()
) :: {:ok, nil} | {:error, Snakepit.Error.t()}

Update if there are non-default values from generation_config

Parameters

generation_config (%{optional(String.t()) => term()})
model_eos_token_id (term() default: None)

Returns

nil

update_from_tokenizer(ref, tokenizer, opts \\ [])

@spec update_from_tokenizer(SnakeBridge.Ref.t(), term(), keyword()) ::
  {:ok, nil} | {:error, Snakepit.Error.t()}

vLLM: a high-throughput and memory-efficient inference engine for LLMs

Parameters

tokenizer (term())

Returns

nil