SparkEx.WriterV2 (SparkEx v0.1.1)

Builder API for writing DataFrames using the V2 DataSource API.

Mirrors PySpark's DataFrameWriterV2 with a builder pattern.

Examples

import SparkEx.WriterV2

# Create a new table
df
|> SparkEx.DataFrame.write_v2("catalog.db.my_table")
|> using("parquet")
|> table_property("description", "My table")
|> create()

# Append to existing table
df
|> SparkEx.DataFrame.write_v2("my_table")
|> append()

# Overwrite with condition
import SparkEx.Functions, only: [col: 1, lit: 1]

df
|> SparkEx.DataFrame.write_v2("my_table")
|> overwrite(col("date") |> SparkEx.Column.eq(lit("2024-01-01")))

Summary

Types

t()

Functions

append(writer, opts \\ [])

Appends the DataFrame's data to the table.

cluster_by(writer, columns)

Sets the clustering columns.

create(writer, opts \\ [])

Creates a new table with the DataFrame's data.

create_or_replace(writer, opts \\ [])

Creates a table or replaces it if it already exists.

createOrReplace(writer, opts \\ [])

Alias for create_or_replace/2 (PySpark createOrReplace).

option(writer, key, value)

Sets a single writer option.

options(writer, opts)

Merges a map of options into the writer.

overwrite(writer)

Overwrites rows in the table.

overwrite(writer, opts)

Overwrites rows in the table matching the given condition.

overwrite(writer, condition, opts)

overwrite_partitions(writer, opts \\ [])

Overwrites all partitions that the DataFrame touches.

partitioned_by(writer, exprs)

Sets the partitioning expressions.

replace(writer, opts \\ [])

Replaces an existing table with the DataFrame's data.

table_properties(writer, props)

Merges a map of table properties.

table_property(writer, key, value)

Sets a single table property.

using(writer, provider)

Sets the provider (data source) for the table.

Types

t()

@type t() :: %SparkEx.WriterV2{
  cluster_by: [String.t()],
  df: SparkEx.DataFrame.t(),
  options: %{required(String.t()) => String.t()},
  overwrite_condition: term() | nil,
  partitioned_by: [term()],
  provider: String.t() | nil,
  table_name: String.t(),
  table_properties: %{required(String.t()) => String.t()}
}

Functions

append(writer, opts \\ [])

@spec append(
  t(),
  keyword()
) :: :ok | {:error, term()}

Appends the DataFrame's data to the table.

cluster_by(writer, columns)

@spec cluster_by(t(), [String.t()]) :: t()

Sets the clustering columns.

create(writer, opts \\ [])

@spec create(
  t(),
  keyword()
) :: :ok | {:error, term()}

Creates a new table with the DataFrame's data.

create_or_replace(writer, opts \\ [])

@spec create_or_replace(
  t(),
  keyword()
) :: :ok | {:error, term()}

Creates a table or replaces it if it already exists.

createOrReplace(writer, opts \\ [])

@spec createOrReplace(
  t(),
  keyword()
) :: :ok | {:error, term()}

Alias for create_or_replace/2 (PySpark createOrReplace).

option(writer, key, value)

@spec option(t(), String.t(), term()) :: t()

Sets a single writer option.

options(writer, opts)

@spec options(t(), map() | keyword()) :: t()

Merges a map of options into the writer.

overwrite(writer)

@spec overwrite(t()) :: :ok | {:error, term()}

Overwrites rows in the table.

Examples

# Overwrite all data
overwrite(writer)

# Overwrite with condition
overwrite(writer, col("date") |> SparkEx.Column.eq(lit("2024-01-01")))

overwrite(writer, opts)

@spec overwrite(
  t(),
  keyword()
) :: :ok | {:error, term()}

@spec overwrite(t(), SparkEx.Column.t()) :: :ok | {:error, term()}

Overwrites rows in the table matching the given condition.

overwrite(writer, condition, opts)

@spec overwrite(t(), SparkEx.Column.t(), keyword()) :: :ok | {:error, term()}

overwrite_partitions(writer, opts \\ [])

@spec overwrite_partitions(
  t(),
  keyword()
) :: :ok | {:error, term()}

Overwrites all partitions that the DataFrame touches.

partitioned_by(writer, exprs)

@spec partitioned_by(t(), [term()]) :: t()

Sets the partitioning expressions.

Accepts column expressions (e.g. from SparkEx.Functions.col/1).

replace(writer, opts \\ [])

@spec replace(
  t(),
  keyword()
) :: :ok | {:error, term()}

Replaces an existing table with the DataFrame's data.

table_properties(writer, props)

@spec table_properties(t(), map() | keyword()) :: t()

Merges a map of table properties.

table_property(writer, key, value)

@spec table_property(t(), String.t(), term()) :: t()

Sets a single table property.

using(writer, provider)

@spec using(t(), String.t()) :: t()

Sets the provider (data source) for the table.