HuggingfaceClient.Inference.Task.ChatCompletion (huggingface_client v0.1.0)

Chat completion task — OpenAI-compatible /v1/chat/completions.

Non-streaming

{:ok, response} = HuggingfaceClient.chat_completion(client, %{
  model: "meta-llama/Llama-3.1-8B-Instruct",
  messages: [%{role: "user", content: "Hello!"}],
  max_tokens: 512
})

Streaming

{:ok, stream} = HuggingfaceClient.chat_completion_stream(client, %{
  model: "meta-llama/Llama-3.1-8B-Instruct",
  messages: [%{role: "user", content: "Hello!"}]
})

for chunk <- stream do
  IO.write(get_in(chunk, ["choices", Access.at(0), "delta", "content"]) || "")
end

Multi-provider

{:ok, response} = HuggingfaceClient.chat_completion(client, %{
  model: "meta-llama/Llama-3.1-8B-Instruct",
  provider: "groq",
  messages: [%{role: "user", content: "Hi!"}]
})

Summary

Functions

run(client, args)

Runs a non-streaming chat completion.

stream(client, args)

Runs a streaming chat completion.

Functions

run(client, args)

@spec run(HuggingfaceClient.Client.t(), map()) ::
  {:ok, map()} | {:error, Exception.t()}

Runs a non-streaming chat completion.

Returns {:ok, response_map} or {:error, exception}.

stream(client, args)

@spec stream(HuggingfaceClient.Client.t(), map()) ::
  {:ok, Enumerable.t()} | {:error, Exception.t()}

Runs a streaming chat completion.

Returns {:ok, stream} where each element is a decoded chunk map, or {:error, exception}.