Data source reader APIs for creating streaming DataFrames.
Mirrors PySpark's DataStreamReader with a builder pattern.
Examples
reader = SparkEx.StreamReader.new(session)
df = reader |> SparkEx.StreamReader.format("rate") |> SparkEx.StreamReader.load()
# Convenience: rate source for testing
df = SparkEx.StreamReader.rate(session, rows_per_second: 10)
Summary
Functions
Sets the schema for the streaming reader.
Types
@type t() :: %SparkEx.StreamReader{ format: String.t() | nil, options: %{required(String.t()) => String.t()}, schema: String.t() | nil, session: GenServer.server() }
Functions
@spec csv(GenServer.server(), String.t(), keyword()) :: SparkEx.DataFrame.t()
@spec json(GenServer.server(), String.t(), keyword()) :: SparkEx.DataFrame.t()
@spec load(t()) :: SparkEx.DataFrame.t()
@spec load(t(), String.t() | [String.t()]) :: SparkEx.DataFrame.t()
@spec new(GenServer.server()) :: t()
@spec orc(GenServer.server(), String.t(), keyword()) :: SparkEx.DataFrame.t()
@spec parquet(GenServer.server(), String.t(), keyword()) :: SparkEx.DataFrame.t()
@spec rate( GenServer.server(), keyword() ) :: SparkEx.DataFrame.t()
@spec schema(t(), String.t() | SparkEx.Types.struct_type()) :: t()
Sets the schema for the streaming reader.
Accepts either a DDL string or a struct type from SparkEx.Types.
Examples
reader |> StreamReader.schema("id LONG, name STRING")
reader |> StreamReader.schema(SparkEx.Types.struct_type([
SparkEx.Types.struct_field("id", :long),
SparkEx.Types.struct_field("name", :string)
]))
@spec table(t(), String.t()) :: SparkEx.DataFrame.t()
@spec text(GenServer.server(), String.t(), keyword()) :: SparkEx.DataFrame.t()
@spec xml(GenServer.server(), String.t(), keyword()) :: SparkEx.DataFrame.t()