erllama_model_backend behaviour (erllama v0.1.0)

Behaviour describing the operations the erllama_model gen_statem needs from a backing inference engine.

Two backends ship in v0.2:

erllama_model_stub — deterministic phash2-based stubs; used

  by tests that don't have a GGUF on disk.

erllama_model_llama — real llama.cpp via the NIF.

Future backends (mock for fault injection, remote for distributed inference, etc.) can plug in via this same surface.

Summary

Types

chat_message()

chat_request()

chat_tool()

sampler_opts()

state()

Callbacks

apply_adapters/2

apply_chat_template/2

clear_sampler/1

configure_sampler/2

decode_one/2

detokenize/2

embed/2

init(Config)

kv_pack/2

kv_unpack/2

load_adapter/2

prefill/2

seq_rm_last/2

set_grammar/2

terminate/1

tokenize/2

unload_adapter/2

Types

chat_message()

-type chat_message() :: #{role := binary(), content := binary() | [map()]}.

chat_request()

-type chat_request() ::
          #{messages := [chat_message()],
            system => binary() | undefined,
            tools => [chat_tool()] | undefined}.

chat_tool()

-type chat_tool() :: #{name := binary(), description => binary(), schema => map()}.

sampler_opts()

-type sampler_opts() ::
          #{grammar => binary(),
            repetition_penalty => float(),
            top_k => non_neg_integer(),
            top_p => float(),
            min_p => float(),
            temperature => float(),
            seed => non_neg_integer()}.

state()

-type state() :: term().

Callbacks

apply_adapters/2

(optional)

-callback apply_adapters(state(), [{term(), float()}]) -> {ok, state()} | {error, term()}.

apply_chat_template/2

(optional)

-callback apply_chat_template(state(), Request :: chat_request()) ->
                                 {ok, [erllama_nif:token_id()]} | {error, term()}.

clear_sampler/1

(optional)

-callback clear_sampler(state()) -> {ok, state()} | {error, term()}.

configure_sampler/2

(optional)

-callback configure_sampler(state(), sampler_opts()) -> {ok, state()} | {error, term()}.

decode_one/2

-callback decode_one(state(), ContextTokens :: [erllama_nif:token_id()]) ->
                        {ok, erllama_nif:token_id()} | {eog, erllama_nif:token_id()} | {error, term()}.

detokenize/2

-callback detokenize(state(), [erllama_nif:token_id()]) -> binary() | {error, term()}.

embed/2

(optional)

-callback embed(state(), [erllama_nif:token_id()]) -> {ok, [float()]} | {error, term()}.

init(Config)

-callback init(Config :: map()) -> {ok, state()} | {error, term()}.

kv_pack/2

-callback kv_pack(state(), Tokens :: [erllama_nif:token_id()]) -> binary() | {error, term()}.

kv_unpack/2

-callback kv_unpack(state(), Bin :: binary()) -> ok | {error, term()}.

load_adapter/2

(optional)

-callback load_adapter(state(), Path :: iodata()) -> {ok, term(), state()} | {error, term()}.

prefill/2

-callback prefill(state(), [erllama_nif:token_id()]) -> ok | {error, term()}.

seq_rm_last/2

(optional)

-callback seq_rm_last(state(), NTokens :: pos_integer()) -> ok | {error, term()}.

set_grammar/2

(optional)

-callback set_grammar(state(), Grammar :: binary() | undefined) -> {ok, state()} | {error, term()}.

terminate/1

-callback terminate(state()) -> ok.

tokenize/2

-callback tokenize(state(), Text :: binary()) -> [erllama_nif:token_id()] | {error, term()}.

unload_adapter/2

(optional)

-callback unload_adapter(state(), Handle :: term()) -> {ok, state()} | {error, term()}.