SparkEx.StreamReader (SparkEx v0.1.0)

Copy Markdown View Source

Data source reader APIs for creating streaming DataFrames.

Mirrors PySpark's DataStreamReader with a builder pattern.

Examples

reader = SparkEx.StreamReader.new(session)
df = reader |> SparkEx.StreamReader.format("rate") |> SparkEx.StreamReader.load()

# Convenience: rate source for testing
df = SparkEx.StreamReader.rate(session, rows_per_second: 10)

Summary

Types

t()

@type t() :: %SparkEx.StreamReader{
  format: String.t() | nil,
  options: %{required(String.t()) => String.t()},
  schema: String.t() | nil,
  session: GenServer.server()
}

Functions

csv(session, path, opts \\ [])

format(reader, source)

@spec format(t(), String.t()) :: t()

json(session, path, opts \\ [])

load(reader)

@spec load(t()) :: SparkEx.DataFrame.t()

load(reader, path)

@spec load(t(), String.t() | [String.t()]) :: SparkEx.DataFrame.t()

new(session)

@spec new(GenServer.server()) :: t()

option(reader, key, value)

@spec option(t(), String.t(), term()) :: t()

options(reader, opts)

@spec options(t(), map() | keyword()) :: t()

orc(session, path, opts \\ [])

parquet(session, path, opts \\ [])

rate(session, opts \\ [])

@spec rate(
  GenServer.server(),
  keyword()
) :: SparkEx.DataFrame.t()

schema(reader, schema_ddl)

@spec schema(t(), String.t() | SparkEx.Types.struct_type()) :: t()

Sets the schema for the streaming reader.

Accepts either a DDL string or a struct type from SparkEx.Types.

Examples

reader |> StreamReader.schema("id LONG, name STRING")
reader |> StreamReader.schema(SparkEx.Types.struct_type([
  SparkEx.Types.struct_field("id", :long),
  SparkEx.Types.struct_field("name", :string)
]))

table(reader, table_name)

@spec table(t(), String.t()) :: SparkEx.DataFrame.t()

text(session, path, opts \\ [])

xml(session, path, opts \\ [])