HfHub.Download (HfHub v0.2.0)

Copy Markdown View Source

File download functionality for HuggingFace Hub.

Provides functions to download files from HuggingFace repositories with caching, resume support, and progress tracking.

Examples

# Download a single file
{:ok, path} = HfHub.Download.hf_hub_download(
  repo_id: "bert-base-uncased",
  filename: "config.json",
  repo_type: :model
)

# Download entire repository snapshot
{:ok, snapshot_path} = HfHub.Download.snapshot_download(
  repo_id: "bert-base-uncased",
  repo_type: :model
)

# Stream download for large files
{:ok, stream} = HfHub.Download.download_stream(
  repo_id: "bert-base-uncased",
  filename: "pytorch_model.bin"
)

Summary

Functions

Computes the SHA256 hash of a file.

Creates a stream for downloading a file.

Downloads a file from a HuggingFace repository.

Resumes an interrupted download.

Downloads an entire repository snapshot.

Types

download_opts()

@type download_opts() :: [
  repo_id: HfHub.repo_id(),
  filename: HfHub.filename(),
  repo_type: HfHub.repo_type(),
  revision: HfHub.revision(),
  cache_dir: Path.t(),
  force_download: boolean(),
  extract: boolean(),
  extract_dir: Path.t(),
  token: String.t() | nil,
  progress_callback:
    (non_neg_integer(), non_neg_integer() | nil -> any()) | nil,
  verify_checksum: boolean(),
  expected_sha256: String.t() | nil
]

Functions

compute_sha256(path)

@spec compute_sha256(Path.t()) :: {:ok, String.t()} | {:error, term()}

Computes the SHA256 hash of a file.

Returns {:ok, hash} where hash is a lowercase hex-encoded string.

Examples

{:ok, hash} = HfHub.Download.compute_sha256("/path/to/file")
# => {:ok, "abc123..."}

download_stream(opts)

@spec download_stream(keyword()) :: {:ok, Enumerable.t()} | {:error, term()}

Creates a stream for downloading a file.

Useful for large files where you want to process the data as it downloads.

Options

  • :repo_id - Repository ID
  • :filename - Name of the file to download
  • :repo_type - Type of repository. Defaults to :model.
  • :revision - Git revision. Defaults to "main".
  • :token - Authentication token.

Examples

{:ok, stream} = HfHub.Download.download_stream(
  repo_id: "bert-base-uncased",
  filename: "pytorch_model.bin"
)

stream
|> Stream.each(fn chunk -> IO.write(chunk) end)
|> Stream.run()

hf_hub_download(opts)

@spec hf_hub_download(download_opts()) :: {:ok, Path.t()} | {:error, term()}

Downloads a file from a HuggingFace repository.

Returns the local path to the cached file.

Options

  • :repo_id - Repository ID (e.g., "bert-base-uncased")
  • :filename - Name of the file to download
  • :repo_type - Type of repository (:model, :dataset, or :space). Defaults to :model.
  • :revision - Git revision. Defaults to "main".
  • :cache_dir - Local cache directory. Defaults to configured cache directory.
  • :force_download - Force re-download even if cached. Defaults to false.
  • :extract - Extract archives after download. Defaults to false.
  • :extract_dir - Destination for extracted files (directory for archives, file path for .gz).
  • :token - Authentication token.
  • :progress_callback - Function called with (bytes_downloaded, total_bytes) during download. total_bytes may be nil if the server doesn't provide Content-Length.
  • :verify_checksum - Verify SHA256 checksum after download. Defaults to false.
  • :expected_sha256 - Expected SHA256 hash. If provided and doesn't match, returns error.

Examples

{:ok, path} = HfHub.Download.hf_hub_download(
  repo_id: "bert-base-uncased",
  filename: "config.json"
)

{:ok, path} = HfHub.Download.hf_hub_download(
  repo_id: "squad",
  filename: "train.json",
  repo_type: :dataset,
  revision: "main"
)

# With progress tracking
{:ok, path} = HfHub.Download.hf_hub_download(
  repo_id: "some/model",
  filename: "model.bin",
  progress_callback: fn downloaded, total ->
    if total, do: IO.puts("#{round(downloaded / total * 100)}%")
  end
)

# With checksum verification
{:ok, path} = HfHub.Download.hf_hub_download(
  repo_id: "some/model",
  filename: "model.bin",
  verify_checksum: true,
  expected_sha256: "abc123..."
)

resume_download(opts)

@spec resume_download(keyword()) :: {:ok, Path.t()} | {:error, term()}

Resumes an interrupted download.

Options

  • :repo_id - Repository ID
  • :filename - Name of the file to download
  • :repo_type - Type of repository. Defaults to :model.
  • :revision - Git revision. Defaults to "main".
  • :token - Authentication token.

Examples

{:ok, path} = HfHub.Download.resume_download(
  repo_id: "bert-base-uncased",
  filename: "pytorch_model.bin"
)

snapshot_download(opts)

@spec snapshot_download(keyword()) :: {:ok, Path.t()} | {:error, term()}

Downloads an entire repository snapshot.

Returns the local path to the snapshot directory.

Options

  • :repo_id - Repository ID
  • :repo_type - Type of repository. Defaults to :model.
  • :revision - Git revision. Defaults to "main".
  • :cache_dir - Local cache directory.
  • :ignore_patterns - List of glob patterns to ignore
  • :allow_patterns - List of glob patterns to allow
  • :token - Authentication token.

Examples

{:ok, snapshot_path} = HfHub.Download.snapshot_download(
  repo_id: "bert-base-uncased"
)

{:ok, snapshot_path} = HfHub.Download.snapshot_download(
  repo_id: "bert-base-uncased",
  ignore_patterns: ["*.msgpack", "*.h5"]
)