Legacy LLMEngine for backwards compatibility.
Summary
Functions
Remove request_ids from EngineCore and Detokenizer.
Load a new LoRA adapter into the engine for future requests.
vLLM: a high-throughput and memory-efficient inference engine for LLMs
vLLM: a high-throughput and memory-efficient inference engine for LLMs
vLLM: a high-throughput and memory-efficient inference engine for LLMs
Log stats if logging is enabled.
Log stats when the time interval has passed.
Creates an LLM engine from the engine arguments.
vLLM: a high-throughput and memory-efficient inference engine for LLMs
vLLM: a high-throughput and memory-efficient inference engine for LLMs
vLLM: a high-throughput and memory-efficient inference engine for LLMs
vLLM: a high-throughput and memory-efficient inference engine for LLMs
vLLM: a high-throughput and memory-efficient inference engine for LLMs
vLLM: a high-throughput and memory-efficient inference engine for LLMs
vLLM: a high-throughput and memory-efficient inference engine for LLMs
vLLM: a high-throughput and memory-efficient inference engine for LLMs
List all registered adapters.
Initialize self. See help(type(self)) for accurate signature.
Prevent an adapter from being evicted.
Remove an already loaded LoRA adapter.
vLLM: a high-throughput and memory-efficient inference engine for LLMs
vLLM: a high-throughput and memory-efficient inference engine for LLMs
vLLM: a high-throughput and memory-efficient inference engine for LLMs
vLLM: a high-throughput and memory-efficient inference engine for LLMs
vLLM: a high-throughput and memory-efficient inference engine for LLMs
vLLM: a high-throughput and memory-efficient inference engine for LLMs
vLLM: a high-throughput and memory-efficient inference engine for LLMs
vLLM: a high-throughput and memory-efficient inference engine for LLMs
Types
Functions
@spec abort_request(SnakeBridge.Ref.t(), [String.t()], [term()], keyword()) :: {:ok, nil} | {:error, Snakepit.Error.t()}
Remove request_ids from EngineCore and Detokenizer.
Parameters
request_ids(list(String.t()))internal(boolean() default: False)
Returns
nil
@spec add_lora(SnakeBridge.Ref.t(), term(), keyword()) :: {:ok, boolean()} | {:error, Snakepit.Error.t()}
Load a new LoRA adapter into the engine for future requests.
Parameters
lora_request(term())
Returns
boolean()
@spec add_request( SnakeBridge.Ref.t(), String.t(), term(), term(), [term()], keyword() ) :: {:ok, nil} | {:error, Snakepit.Error.t()}
vLLM: a high-throughput and memory-efficient inference engine for LLMs
Parameters
request_id(String.t())prompt(term())params(term())arrival_time(term() default: None)lora_request(term() default: None)tokenization_kwargs(term() default: None)trace_headers(term() default: None)priority(integer() default: 0)prompt_text(term() default: None)
Returns
nil
@spec apply_model(SnakeBridge.Ref.t(), term(), keyword()) :: {:ok, [term()]} | {:error, Snakepit.Error.t()}
vLLM: a high-throughput and memory-efficient inference engine for LLMs
Parameters
func(term())
Returns
list(term())
@spec collective_rpc(SnakeBridge.Ref.t(), term(), [term()], keyword()) :: {:ok, [term()]} | {:error, Snakepit.Error.t()}
vLLM: a high-throughput and memory-efficient inference engine for LLMs
Parameters
method(term())timeout(term() default: None)args(tuple() default: ())kwargs(term() default: None)
Returns
list(term())
@spec do_log_stats( SnakeBridge.Ref.t(), keyword() ) :: {:ok, nil} | {:error, Snakepit.Error.t()}
Log stats if logging is enabled.
Returns
nil
@spec do_log_stats_with_interval( SnakeBridge.Ref.t(), keyword() ) :: {:ok, nil} | {:error, Snakepit.Error.t()}
Log stats when the time interval has passed.
Returns
nil
@spec from_engine_args(SnakeBridge.Ref.t(), term(), [term()], keyword()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
Creates an LLM engine from the engine arguments.
Parameters
engine_args(term())usage_context(term() default: <UsageContext.ENGINE_CONTEXT: 'ENGINE_CONTEXT'>)stat_loggers(term() default: None)enable_multiprocessing(boolean() default: False)
Returns
term()
@spec from_vllm_config(SnakeBridge.Ref.t(), term(), [term()], keyword()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
vLLM: a high-throughput and memory-efficient inference engine for LLMs
Parameters
vllm_config(term())usage_context(term() default: <UsageContext.ENGINE_CONTEXT: 'ENGINE_CONTEXT'>)stat_loggers(term() default: None)disable_log_stats(boolean() default: False)
Returns
term()
@spec get_metrics( SnakeBridge.Ref.t(), keyword() ) :: {:ok, [term()]} | {:error, Snakepit.Error.t()}
vLLM: a high-throughput and memory-efficient inference engine for LLMs
Returns
list(term())
@spec get_num_unfinished_requests( SnakeBridge.Ref.t(), keyword() ) :: {:ok, integer()} | {:error, Snakepit.Error.t()}
vLLM: a high-throughput and memory-efficient inference engine for LLMs
Returns
integer()
@spec get_supported_tasks( SnakeBridge.Ref.t(), keyword() ) :: {:ok, {term(), term()}} | {:error, Snakepit.Error.t()}
vLLM: a high-throughput and memory-efficient inference engine for LLMs
Returns
{term(), term()}
@spec get_tokenizer( SnakeBridge.Ref.t(), keyword() ) :: {:ok, term()} | {:error, Snakepit.Error.t()}
vLLM: a high-throughput and memory-efficient inference engine for LLMs
Returns
term()
@spec has_unfinished_requests( SnakeBridge.Ref.t(), keyword() ) :: {:ok, boolean()} | {:error, Snakepit.Error.t()}
vLLM: a high-throughput and memory-efficient inference engine for LLMs
Returns
boolean()
@spec has_unfinished_requests_dp(SnakeBridge.Ref.t(), boolean(), keyword()) :: {:ok, boolean()} | {:error, Snakepit.Error.t()}
vLLM: a high-throughput and memory-efficient inference engine for LLMs
Parameters
has_unfinished(boolean())
Returns
boolean()
@spec is_sleeping( SnakeBridge.Ref.t(), keyword() ) :: {:ok, boolean()} | {:error, Snakepit.Error.t()}
vLLM: a high-throughput and memory-efficient inference engine for LLMs
Returns
boolean()
@spec list_loras( SnakeBridge.Ref.t(), keyword() ) :: {:ok, MapSet.t(integer())} | {:error, Snakepit.Error.t()}
List all registered adapters.
Returns
MapSet.t(integer())
@spec new(term(), term(), boolean(), [term()], keyword()) :: {:ok, SnakeBridge.Ref.t()} | {:error, Snakepit.Error.t()}
Initialize self. See help(type(self)) for accurate signature.
Parameters
vllm_config(term())executor_class(term())log_stats(boolean())aggregate_engine_logging(boolean() default: False)usage_context(term() default: <UsageContext.ENGINE_CONTEXT: 'ENGINE_CONTEXT'>)stat_loggers(term() default: None)mm_registry(term() default: <vllm.multimodal.registry.MultiModalRegistry object at 0x74247e84e510>)use_cached_outputs(boolean() default: False)multiprocess_mode(boolean() default: False)
@spec pin_lora(SnakeBridge.Ref.t(), integer(), keyword()) :: {:ok, boolean()} | {:error, Snakepit.Error.t()}
Prevent an adapter from being evicted.
Parameters
lora_id(integer())
Returns
boolean()
@spec remove_lora(SnakeBridge.Ref.t(), integer(), keyword()) :: {:ok, boolean()} | {:error, Snakepit.Error.t()}
Remove an already loaded LoRA adapter.
Parameters
lora_id(integer())
Returns
boolean()
@spec reset_mm_cache( SnakeBridge.Ref.t(), keyword() ) :: {:ok, term()} | {:error, Snakepit.Error.t()}
vLLM: a high-throughput and memory-efficient inference engine for LLMs
Returns
term()
@spec reset_prefix_cache(SnakeBridge.Ref.t(), [term()], keyword()) :: {:ok, boolean()} | {:error, Snakepit.Error.t()}
vLLM: a high-throughput and memory-efficient inference engine for LLMs
Parameters
reset_running_requests(boolean() default: False)reset_connector(boolean() default: False)
Returns
boolean()
@spec sleep(SnakeBridge.Ref.t(), [term()], keyword()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
vLLM: a high-throughput and memory-efficient inference engine for LLMs
Parameters
level(integer() default: 1)
Returns
term()
@spec start_profile( SnakeBridge.Ref.t(), keyword() ) :: {:ok, term()} | {:error, Snakepit.Error.t()}
vLLM: a high-throughput and memory-efficient inference engine for LLMs
Returns
term()
@spec step( SnakeBridge.Ref.t(), keyword() ) :: {:ok, [term()]} | {:error, Snakepit.Error.t()}
vLLM: a high-throughput and memory-efficient inference engine for LLMs
Returns
list(term())
@spec stop_profile( SnakeBridge.Ref.t(), keyword() ) :: {:ok, term()} | {:error, Snakepit.Error.t()}
vLLM: a high-throughput and memory-efficient inference engine for LLMs
Returns
term()
@spec tokenizer(SnakeBridge.Ref.t()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
@spec validate_outputs(SnakeBridge.Ref.t(), term(), term(), keyword()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
vLLM: a high-throughput and memory-efficient inference engine for LLMs
Parameters
outputs(term())output_type(term())
Returns
term()
@spec wake_up(SnakeBridge.Ref.t(), [term()], keyword()) :: {:ok, term()} | {:error, Snakepit.Error.t()}
vLLM: a high-throughput and memory-efficient inference engine for LLMs
Parameters
tags(term() default: None)
Returns
term()