View Source Explorer.Backend.DataFrame behaviour (Explorer v0.8.2)

The behaviour for DataFrame backends.

Summary

Callbacks

Functions

Default inspect implementation for backends.

Creates a new DataFrame for a given backend.

Types

@type basic_types() :: float() | integer() | String.t() | Date.t() | DateTime.t()
@type column_name() :: String.t()
@type columns_for_io() :: [column_name()] | [pos_integer()] | nil
@type compression() :: {algorithm :: option(atom()), level :: option(integer())}
@type df() :: Explorer.DataFrame.t()
@type dtype() :: Explorer.Series.dtype()
@type dtypes() :: %{required(column_name()) => dtype()}
@type fs_entry() :: Explorer.DataFrame.fs_entry()
@type io_result(t) :: {:ok, t} | {:error, Exception.t()}
@type lazy_frame() :: Explorer.Backend.LazyFrame.t()
@type lazy_series() :: Explorer.Backend.LazySeries.t()
@type mutate_value() ::
  series()
  | basic_types()
  | [basic_types()]
  | (df() -> series() | basic_types() | [basic_types()])
@type ok_result() :: :ok | {:error, Exception.t()}
@type option(type) :: type | nil
@type result(t) :: {:ok, t} | {:error, term()}
@type series() :: Explorer.Series.t()
@type t() :: struct()

Callbacks

@callback collect(df()) :: df()
Link to this callback

concat_columns(list, out_df)

View Source
@callback concat_columns([df()], out_df :: df()) :: df()
Link to this callback

concat_rows(list, out_df)

View Source
@callback concat_rows([df()], out_df :: df()) :: df()
Link to this callback

correlation(df, out_df, ddof, method)

View Source
@callback correlation(df(), out_df :: df(), ddof :: integer(), method :: atom()) :: df()
Link to this callback

covariance(df, out_df, ddof)

View Source
@callback covariance(df(), out_df :: df(), ddof :: integer()) :: df()
Link to this callback

distinct(df, out_df, columns)

View Source
@callback distinct(df(), out_df :: df(), columns :: [column_name()]) :: df()
@callback drop_nil(df(), columns :: [column_name()]) :: df()
Link to this callback

dummies(df, out_df, columns)

View Source
@callback dummies(df(), out_df :: df(), columns :: [column_name()]) :: df()
Link to this callback

dump_csv(df, header?, delimiter)

View Source
@callback dump_csv(df(), header? :: boolean(), delimiter :: String.t()) ::
  io_result(binary())
Link to this callback

dump_ipc(df, compression)

View Source
@callback dump_ipc(df(), compression()) :: io_result(binary())
Link to this callback

dump_ipc_stream(df, compression)

View Source
@callback dump_ipc_stream(df(), compression()) :: io_result(binary())
@callback dump_ndjson(df()) :: io_result(binary())
Link to this callback

dump_parquet(df, compression)

View Source
@callback dump_parquet(df(), compression()) :: io_result(binary())
Link to this callback

explode(df, out_df, columns)

View Source
@callback explode(df(), out_df :: df(), columns :: [column_name()]) :: df()
Link to this callback

filter_with(df, out_df, lazy_series)

View Source
@callback filter_with(df(), out_df :: df(), lazy_series()) :: df()
Link to this callback

from_csv( entry, dtypes, delimiter, nil_values, skip_rows, skip_rows_after_header, header?, encoding, max_rows, columns, infer_schema_length, parse_dates, eol_delimiter )

View Source
@callback from_csv(
  entry :: fs_entry(),
  dtypes(),
  delimiter :: String.t(),
  nil_values :: [String.t()],
  skip_rows :: integer(),
  skip_rows_after_header :: integer(),
  header? :: boolean(),
  encoding :: String.t(),
  max_rows :: option(integer()),
  columns :: columns_for_io(),
  infer_schema_length :: option(integer()),
  parse_dates :: boolean(),
  eol_delimiter :: option(String.t())
) :: io_result(df())
Link to this callback

from_ipc(entry, columns)

View Source
@callback from_ipc(
  entry :: fs_entry(),
  columns :: columns_for_io()
) :: io_result(df())
Link to this callback

from_ipc_stream(filename, columns)

View Source
@callback from_ipc_stream(
  filename :: fs_entry(),
  columns :: columns_for_io()
) :: io_result(df())
Link to this callback

from_ndjson(filename, infer_schema_length, batch_size)

View Source
@callback from_ndjson(
  filename :: fs_entry(),
  infer_schema_length :: integer(),
  batch_size :: integer()
) :: io_result(df())
Link to this callback

from_parquet(entry, max_rows, columns)

View Source
@callback from_parquet(
  entry :: fs_entry(),
  max_rows :: option(integer()),
  columns :: columns_for_io()
) :: io_result(df())
Link to this callback

from_query(t, query, params)

View Source
@callback from_query(
  Adbc.Connection.t(),
  query :: String.t(),
  params :: [term()]
) :: result(df())
@callback from_series([{binary(), Series.t()}]) :: df()
@callback from_tabular(Table.Reader.t(), dtypes()) :: df()
@callback head(df(), rows :: integer()) :: df()
@callback inspect(df(), opts :: Inspect.Opts.t()) :: Inspect.Algebra.t()
Link to this callback

join(left, right, out_df, on, how)

View Source
@callback join(
  left :: df(),
  right :: df(),
  out_df :: df(),
  on :: [{column_name(), column_name()}],
  how :: :left | :inner | :outer | :right | :cross
) :: df()
@callback lazy() :: module()
@callback lazy(df()) :: df()
Link to this callback

load_csv( contents, dtypes, delimiter, nil_values, skip_rows, skip_rows_after_header, header?, encoding, max_rows, columns, infer_schema_length, parse_dates, eol_delimiter )

View Source
@callback load_csv(
  contents :: String.t(),
  dtypes(),
  delimiter :: String.t(),
  nil_values :: [String.t()],
  skip_rows :: integer(),
  skip_rows_after_header :: integer(),
  header? :: boolean(),
  encoding :: String.t(),
  max_rows :: option(integer()),
  columns :: columns_for_io(),
  infer_schema_length :: option(integer()),
  parse_dates :: boolean(),
  eol_delimiter :: option(String.t())
) :: io_result(df())
Link to this callback

load_ipc(contents, columns)

View Source
@callback load_ipc(
  contents :: binary(),
  columns :: columns_for_io()
) :: io_result(df())
Link to this callback

load_ipc_stream(contents, columns)

View Source
@callback load_ipc_stream(
  contents :: binary(),
  columns :: columns_for_io()
) :: io_result(df())
Link to this callback

load_ndjson(contents, infer_schema_length, batch_size)

View Source
@callback load_ndjson(
  contents :: String.t(),
  infer_schema_length :: integer(),
  batch_size :: integer()
) :: io_result(df())
@callback load_parquet(contents :: binary()) :: io_result(df())
@callback mask(df(), mask :: series()) :: df()
Link to this callback

mutate_with(df, out_df, mutations)

View Source
@callback mutate_with(df(), out_df :: df(), mutations :: [{column_name(), lazy_series()}]) ::
  df()
@callback n_rows(df()) :: integer()
@callback nil_count(df()) :: df()
Link to this callback

pivot_longer(df, out_df, columns_to_pivot, columns_to_keep, names_to, values_to)

View Source
@callback pivot_longer(
  df(),
  out_df :: df(),
  columns_to_pivot :: [column_name()],
  columns_to_keep :: [column_name()],
  names_to :: column_name(),
  values_to :: column_name()
) :: df()
Link to this callback

pivot_wider(df, id_columns, names_from, values_from, names_prefix)

View Source
@callback pivot_wider(
  df(),
  id_columns :: [column_name()],
  names_from :: column_name(),
  values_from :: [column_name()],
  names_prefix :: String.t()
) :: df()
@callback pull(df(), column :: column_name()) :: series()
Link to this callback

put(df, out_df, column_name, series)

View Source
@callback put(df(), out_df :: df(), column_name(), series()) :: df()
@callback re_dtype(String.t()) :: dtype()
Link to this callback

rename(df, out_df, list)

View Source
@callback rename(df(), out_df :: df(), [{old :: column_name(), new :: column_name()}]) ::
  df()
Link to this callback

sample(df, n_or_frac, replace, shuffle, seed)

View Source
@callback sample(
  df(),
  n_or_frac :: number(),
  replace :: boolean(),
  shuffle :: boolean(),
  seed :: option(integer())
) :: df()
@callback select(df(), out_df :: df()) :: df()
@callback slice(
  df(),
  indices ::
    [integer()] | series() | %Range{first: term(), last: term(), step: term()}
) ::
  df()
Link to this callback

slice(df, offset, length)

View Source
@callback slice(df(), offset :: integer(), length :: integer()) :: df()
Link to this callback

sort_with(df, out_df, directions, maintain_order?, multithreaded?, nulls_last?)

View Source
@callback sort_with(
  df(),
  out_df :: df(),
  directions :: [{:asc | :desc, lazy_series()}],
  maintain_order? :: boolean(),
  multithreaded? :: boolean(),
  nulls_last? :: boolean()
) :: df()
Link to this callback

summarise_with(df, out_df, aggregations)

View Source
@callback summarise_with(
  df(),
  out_df :: df(),
  aggregations :: [{column_name(), lazy_series()}]
) :: df()
@callback tail(df(), rows :: integer()) :: df()
Link to this callback

to_csv(df, entry, header?, delimiter, streaming)

View Source
@callback to_csv(
  df(),
  entry :: fs_entry(),
  header? :: boolean(),
  delimiter :: String.t(),
  streaming :: boolean()
) :: ok_result()
Link to this callback

to_ipc(df, entry, compression, streaming)

View Source
@callback to_ipc(df(), entry :: fs_entry(), compression(), streaming :: boolean()) ::
  ok_result()
Link to this callback

to_ipc_stream(df, entry, compression)

View Source
@callback to_ipc_stream(
  df(),
  entry :: fs_entry(),
  compression()
) :: ok_result()
@callback to_ndjson(df(), entry :: fs_entry()) :: ok_result()
Link to this callback

to_parquet(df, entry, compression, streaming)

View Source
@callback to_parquet(
  df(),
  entry :: fs_entry(),
  compression(),
  streaming :: boolean()
) :: ok_result()
@callback to_rows(df(), atom_keys? :: boolean()) :: [map()]
Link to this callback

to_rows_stream(df, atom_keys?, chunk_size)

View Source
@callback to_rows_stream(df(), atom_keys? :: boolean(), chunk_size :: integer()) ::
  Enumerable.t()
Link to this callback

transpose(df, out_df, keep_names_as, new_column_names)

View Source
@callback transpose(
  df(),
  out_df :: df(),
  keep_names_as :: column_name(),
  new_column_names :: [column_name()]
) :: df()
Link to this callback

unnest(df, out_df, columns)

View Source
@callback unnest(df(), out_df :: df(), columns :: [column_name()]) :: df()

Functions

Link to this function

inspect(df, backend, n_rows, inspect_opts, opts \\ [])

View Source

Default inspect implementation for backends.

Link to this function

new(data, names, dtypes)

View Source

Creates a new DataFrame for a given backend.