HfHub.Api (HfHub v0.2.0)

Copy Markdown View Source

HuggingFace Hub API client.

Provides functions to interact with the HuggingFace Hub API for fetching metadata about models, datasets, and spaces.

Examples

# Get model information
{:ok, model_info} = HfHub.Api.model_info("bert-base-uncased")

# Get dataset information
{:ok, dataset_info} = HfHub.Api.dataset_info("squad")

# List models with filters
{:ok, models} = HfHub.Api.list_models(filter: "text-classification", sort: "downloads")

# List files in a repository
{:ok, files} = HfHub.Api.list_files("bert-base-uncased", repo_type: :model)

Summary

Functions

Gets the available configuration names for a dataset.

Fetches information about a dataset from the HuggingFace Hub.

Lists available splits for a dataset config.

Extracts configuration names from dataset card_data.

Lists datasets from the HuggingFace Hub with optional filters.

Lists files in a repository.

Lists models from the HuggingFace Hub with optional filters.

Lists repository tree entries (files and folders).

Fetches information about a model from the HuggingFace Hub.

Fetches information about a space from the HuggingFace Hub.

Types

dataset_info()

@type dataset_info() :: %{
  id: String.t(),
  author: String.t() | nil,
  sha: String.t(),
  downloads: non_neg_integer(),
  likes: non_neg_integer(),
  tags: [String.t()],
  siblings: [file_info()],
  created_at: DateTime.t(),
  updated_at: DateTime.t()
}

file_info()

@type file_info() :: %{
  rfilename: String.t(),
  size: non_neg_integer(),
  lfs: map() | nil
}

model_info()

@type model_info() :: %{
  id: String.t(),
  author: String.t() | nil,
  sha: String.t(),
  downloads: non_neg_integer(),
  likes: non_neg_integer(),
  tags: [String.t()],
  pipeline_tag: String.t() | nil,
  siblings: [file_info()],
  created_at: DateTime.t(),
  updated_at: DateTime.t()
}

space_info()

@type space_info() :: %{
  id: String.t(),
  author: String.t() | nil,
  sha: String.t(),
  likes: non_neg_integer(),
  tags: [String.t()],
  sdk: String.t(),
  created_at: DateTime.t(),
  updated_at: DateTime.t()
}

tree_entry()

@type tree_entry() :: %{
  type: :file | :folder,
  path: String.t(),
  size: non_neg_integer() | nil,
  lfs: map() | nil,
  oid: String.t() | nil
}

Functions

dataset_configs(repo_id, opts \\ [])

@spec dataset_configs(
  HfHub.repo_id(),
  keyword()
) :: {:ok, [String.t()]} | {:error, term()}

Gets the available configuration names for a dataset.

Configurations (also called subsets) represent different versions or splits of a dataset. For example, openai/gsm8k has "main" and "socratic" configs.

Options

  • :token - Authentication token. If not provided, uses configured token.

Examples

{:ok, configs} = HfHub.Api.dataset_configs("openai/gsm8k")
# => {:ok, ["main", "socratic"]}

{:ok, configs} = HfHub.Api.dataset_configs("imdb")
# => {:ok, ["plain_text"]}

dataset_info(repo_id, opts \\ [])

@spec dataset_info(
  HfHub.repo_id(),
  keyword()
) :: {:ok, dataset_info()} | {:error, term()}

Fetches information about a dataset from the HuggingFace Hub.

Options

  • :revision - Git revision (branch, tag, or commit hash). Defaults to "main".
  • :token - Authentication token. If not provided, uses configured token.

Examples

{:ok, info} = HfHub.Api.dataset_info("squad")
{:ok, info} = HfHub.Api.dataset_info("squad", revision: "main")

dataset_splits(repo_id, opts \\ [])

@spec dataset_splits(
  HfHub.repo_id(),
  keyword()
) :: {:ok, [String.t()]} | {:error, term()}

Lists available splits for a dataset config.

Options

  • :config - Dataset config name (defaults to inferred config).
  • :revision - Git revision. Defaults to "main".
  • :token - Authentication token.

extract_config_names(card_data)

@spec extract_config_names(map() | nil) :: [String.t()]

Extracts configuration names from dataset card_data.

Handles both the modern "configs" format and legacy "dataset_config_names" format.

Examples

iex> HfHub.Api.extract_config_names(%{"configs" => [%{"config_name" => "main"}]})
["main"]

iex> HfHub.Api.extract_config_names(%{"dataset_config_names" => ["train", "test"]})
["train", "test"]

iex> HfHub.Api.extract_config_names(nil)
[]

list_datasets(opts \\ [])

@spec list_datasets(keyword()) :: {:ok, [dataset_info()]} | {:error, term()}

Lists datasets from the HuggingFace Hub with optional filters.

Options

  • :filter - Filter by task or other criteria
  • :sort - Sort by field (e.g., "downloads", "likes", "updated")
  • :direction - Sort direction (:asc or :desc)
  • :limit - Maximum number of results
  • :author - Filter by author

Examples

{:ok, datasets} = HfHub.Api.list_datasets(sort: "downloads", limit: 10)

list_files(repo_id, opts \\ [])

@spec list_files(
  HfHub.repo_id(),
  keyword()
) :: {:ok, [file_info()]} | {:error, term()}

Lists files in a repository.

Options

  • :repo_type - Type of repository (:model, :dataset, or :space). Defaults to :model.
  • :revision - Git revision. Defaults to "main".
  • :recursive - List files recursively. Defaults to true for datasets.
  • :path_in_repo - Subdirectory path to list.
  • :token - Authentication token.

Examples

{:ok, files} = HfHub.Api.list_files("bert-base-uncased", repo_type: :model)

list_models(opts \\ [])

@spec list_models(keyword()) :: {:ok, [model_info()]} | {:error, term()}

Lists models from the HuggingFace Hub with optional filters.

Options

  • :filter - Filter by task, library, or other criteria
  • :sort - Sort by field (e.g., "downloads", "likes", "updated")
  • :direction - Sort direction (:asc or :desc)
  • :limit - Maximum number of results
  • :author - Filter by author

Examples

{:ok, models} = HfHub.Api.list_models(filter: "text-classification", limit: 10)

list_repo_tree(repo_id, opts \\ [])

@spec list_repo_tree(
  HfHub.repo_id(),
  keyword()
) :: {:ok, [tree_entry()]} | {:error, term()}

Lists repository tree entries (files and folders).

Options

  • :repo_type - Type of repository (:model, :dataset, or :space). Defaults to :model.
  • :revision - Git revision. Defaults to "main".
  • :path_in_repo - Subdirectory path to list.
  • :recursive - List recursively. Defaults to false.
  • :expand - Request expanded metadata. Defaults to false.
  • :token - Authentication token.

model_info(repo_id, opts \\ [])

@spec model_info(
  HfHub.repo_id(),
  keyword()
) :: {:ok, model_info()} | {:error, term()}

Fetches information about a model from the HuggingFace Hub.

Options

  • :revision - Git revision (branch, tag, or commit hash). Defaults to "main".
  • :token - Authentication token. If not provided, uses configured token.

Examples

{:ok, info} = HfHub.Api.model_info("bert-base-uncased")
{:ok, info} = HfHub.Api.model_info("bert-base-uncased", revision: "main")

space_info(repo_id, opts \\ [])

@spec space_info(
  HfHub.repo_id(),
  keyword()
) :: {:ok, space_info()} | {:error, term()}

Fetches information about a space from the HuggingFace Hub.

Options

  • :revision - Git revision (branch, tag, or commit hash). Defaults to "main".
  • :token - Authentication token. If not provided, uses configured token.

Examples

{:ok, info} = HfHub.Api.space_info("user/space-name")