Vllm.BeamSearch (VLLM v0.3.0)

Copy Markdown View Source

Submodule bindings for vllm.beam_search.

Version

  • Requested: 0.14.0
  • Observed at generation: 0.14.0

Runtime Options

All functions accept a __runtime__ option for controlling execution behavior:

Vllm.BeamSearch.some_function(args, __runtime__: [timeout: 120_000])

Supported runtime options

  • :timeout - Call timeout in milliseconds (default: 120,000ms / 2 minutes)
  • :timeout_profile - Use a named profile (:default, :ml_inference, :batch_job, :streaming)
  • :stream_timeout - Timeout for streaming operations (default: 1,800,000ms / 30 minutes)
  • :session_id - Override the session ID for this call
  • :pool_name - Target a specific Snakepit pool (multi-pool setups)
  • :affinity - Override session affinity (:hint, :strict_queue, :strict_fail_fast)

Timeout Profiles

  • :default - 2 minute timeout for regular calls
  • :ml_inference - 10 minute timeout for ML/LLM workloads
  • :batch_job - Unlimited timeout for long-running jobs
  • :streaming - 2 minute timeout, 30 minute stream_timeout

Example with timeout override

# For a long-running ML inference call
Vllm.BeamSearch.predict(data, __runtime__: [timeout_profile: :ml_inference])

# Or explicit timeout
Vllm.BeamSearch.predict(data, __runtime__: [timeout: 600_000])

# Route to a pool and enforce strict affinity
Vllm.BeamSearch.predict(data, __runtime__: [pool_name: :strict_pool, affinity: :strict_queue])

See SnakeBridge.Defaults for global timeout configuration.

Summary

Functions

create_sort_beams_key_function(eos_token_id, length_penalty, opts \\ [])

@spec create_sort_beams_key_function(integer(), float(), keyword()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

Python binding for vllm.beam_search.create_sort_beams_key_function.

Parameters

  • eos_token_id (integer())
  • length_penalty (float())

Returns

  • term()

get_beam_search_score(tokens, cumulative_logprob, eos_token_id)

@spec get_beam_search_score([integer()], float(), integer()) ::
  {:ok, float()} | {:error, Snakepit.Error.t()}

Calculate the beam search score with length penalty.

Adapted from

https://github.com/huggingface/transformers/blob/ccb92be23def445f2afdea94c31286f84b89eb5b/src/transformers/generation/beam_search.py#L938

Parameters

  • tokens (list(integer()))
  • cumulative_logprob (float())
  • eos_token_id (integer())
  • length_penalty (float() default: 1.0)

Returns

  • float()

get_beam_search_score(tokens, cumulative_logprob, eos_token_id, opts)

@spec get_beam_search_score([integer()], float(), integer(), keyword()) ::
  {:ok, float()} | {:error, Snakepit.Error.t()}
@spec get_beam_search_score([integer()], float(), integer(), float()) ::
  {:ok, float()} | {:error, Snakepit.Error.t()}

get_beam_search_score(tokens, cumulative_logprob, eos_token_id, length_penalty, opts)

@spec get_beam_search_score([integer()], float(), integer(), float(), keyword()) ::
  {:ok, float()} | {:error, Snakepit.Error.t()}

multi_modal_data_dict()

@spec multi_modal_data_dict() :: {:ok, term()} | {:error, Snakepit.Error.t()}

Python binding for vllm.beam_search.MultiModalDataDict.

Returns

  • term()

multi_modal_data_dict(opts)

@spec multi_modal_data_dict(keyword()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec multi_modal_data_dict(term()) :: {:ok, term()} | {:error, Snakepit.Error.t()}

multi_modal_data_dict(arg1, opts)

@spec multi_modal_data_dict(
  term(),
  keyword()
) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec multi_modal_data_dict(term(), term()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

multi_modal_data_dict(arg1, arg2, opts)

@spec multi_modal_data_dict(term(), term(), keyword()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}
@spec multi_modal_data_dict(term(), term(), term()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

multi_modal_data_dict(arg1, arg2, arg3, opts)

@spec multi_modal_data_dict(term(), term(), term(), keyword()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}
@spec multi_modal_data_dict(term(), term(), term(), term()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

multi_modal_data_dict(arg1, arg2, arg3, arg4, opts)

@spec multi_modal_data_dict(term(), term(), term(), term(), keyword()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}
@spec multi_modal_data_dict(term(), term(), term(), term(), term()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

multi_modal_data_dict(arg1, arg2, arg3, arg4, arg5, opts)

@spec multi_modal_data_dict(term(), term(), term(), term(), term(), keyword()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}
@spec multi_modal_data_dict(term(), term(), term(), term(), term(), term()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

multi_modal_data_dict(arg1, arg2, arg3, arg4, arg5, arg6, opts)

@spec multi_modal_data_dict(term(), term(), term(), term(), term(), term(), keyword()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}
@spec multi_modal_data_dict(term(), term(), term(), term(), term(), term(), term()) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

multi_modal_data_dict(arg1, arg2, arg3, arg4, arg5, arg6, arg7, opts)

@spec multi_modal_data_dict(
  term(),
  term(),
  term(),
  term(),
  term(),
  term(),
  term(),
  keyword()
) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}
@spec multi_modal_data_dict(
  term(),
  term(),
  term(),
  term(),
  term(),
  term(),
  term(),
  term()
) ::
  {:ok, term()} | {:error, Snakepit.Error.t()}

multi_modal_data_dict(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, opts)

@spec multi_modal_data_dict(
  term(),
  term(),
  term(),
  term(),
  term(),
  term(),
  term(),
  term(),
  keyword()
) :: {:ok, term()} | {:error, Snakepit.Error.t()}