View Source Explorer.Backend.DataFrame behaviour (Explorer v0.10.1)

The behaviour for DataFrame backends.

Summary

Callbacks

Functions

Default inspect implementation for backends.

Creates a new DataFrame for a given backend.

Types

basic_types()

@type basic_types() :: float() | integer() | String.t() | Date.t() | DateTime.t()

column_name()

@type column_name() :: String.t()

columns_for_io()

@type columns_for_io() :: [column_name()] | [pos_integer()] | nil

compression()

@type compression() :: {algorithm :: option(atom()), level :: option(integer())}

df()

@type df() :: Explorer.DataFrame.t()

dtype()

@type dtype() :: Explorer.Series.dtype()

dtypes()

@type dtypes() :: %{required(column_name()) => dtype()}

fs_entry()

@type fs_entry() :: Explorer.DataFrame.fs_entry()

io_dtypes()

@type io_dtypes() :: [{column_name(), dtype()}]

io_result(t)

@type io_result(t) :: {:ok, t} | {:error, Exception.t()}

lazy_series()

@type lazy_series() :: Explorer.Backend.LazySeries.t()

mutate_value()

@type mutate_value() ::
  series()
  | basic_types()
  | [basic_types()]
  | (df() -> series() | basic_types() | [basic_types()])

ok_result()

@type ok_result() :: :ok | {:error, Exception.t()}

option(type)

@type option(type) :: type | nil

query_frame()

@type query_frame() :: Explorer.Backend.QueryFrame.t()

result(t)

@type result(t) :: {:ok, t} | {:error, term()}

series()

@type series() :: Explorer.Series.t()

t()

@type t() :: struct()

Callbacks

compute(df)

@callback compute(df()) :: df()

concat_columns(list, out_df)

@callback concat_columns([df()], out_df :: df()) :: df()

concat_rows(list, out_df)

@callback concat_rows([df()], out_df :: df()) :: df()

correlation(df, out_df, ddof, method)

@callback correlation(df(), out_df :: df(), ddof :: integer(), method :: atom()) :: df()

covariance(df, out_df, ddof)

@callback covariance(df(), out_df :: df(), ddof :: integer()) :: df()

distinct(df, out_df, columns)

@callback distinct(df(), out_df :: df(), columns :: [column_name()]) :: df()

drop_nil(df, columns)

@callback drop_nil(df(), columns :: [column_name()]) :: df()

dummies(df, out_df, columns)

@callback dummies(df(), out_df :: df(), columns :: [column_name()]) :: df()

dump_csv(df, header?, delimiter)

@callback dump_csv(df(), header? :: boolean(), delimiter :: String.t()) ::
  io_result(binary())

dump_ipc(df, compression)

@callback dump_ipc(df(), compression()) :: io_result(binary())

dump_ipc_stream(df, compression)

@callback dump_ipc_stream(df(), compression()) :: io_result(binary())

dump_ndjson(df)

@callback dump_ndjson(df()) :: io_result(binary())

dump_parquet(df, compression)

@callback dump_parquet(df(), compression()) :: io_result(binary())

explode(df, out_df, columns)

@callback explode(df(), out_df :: df(), columns :: [column_name()]) :: df()

filter_with(df, out_df, lazy_series)

@callback filter_with(df(), out_df :: df(), lazy_series()) :: df()

from_csv( entry, io_dtypes, delimiter, nil_values, skip_rows, skip_rows_after_header, header?, encoding, max_rows, columns, infer_schema_length, parse_dates, eol_delimiter )

@callback from_csv(
  entry :: fs_entry(),
  io_dtypes(),
  delimiter :: String.t(),
  nil_values :: [String.t()],
  skip_rows :: integer(),
  skip_rows_after_header :: integer(),
  header? :: boolean(),
  encoding :: String.t(),
  max_rows :: option(integer()),
  columns :: columns_for_io(),
  infer_schema_length :: option(integer()),
  parse_dates :: boolean(),
  eol_delimiter :: option(String.t())
) :: io_result(df())

from_ipc(entry, columns)

@callback from_ipc(
  entry :: fs_entry(),
  columns :: columns_for_io()
) :: io_result(df())

from_ipc_stream(filename, columns)

@callback from_ipc_stream(
  filename :: fs_entry(),
  columns :: columns_for_io()
) :: io_result(df())

from_ndjson(filename, infer_schema_length, batch_size)

@callback from_ndjson(
  filename :: fs_entry(),
  infer_schema_length :: integer(),
  batch_size :: integer()
) :: io_result(df())

from_parquet(entry, max_rows, columns, rechunk)

@callback from_parquet(
  entry :: fs_entry(),
  max_rows :: option(integer()),
  columns :: columns_for_io(),
  rechunk :: boolean()
) :: io_result(df())

from_query(t, query, params)

@callback from_query(
  Adbc.Connection.t(),
  query :: String.t(),
  params :: [term()]
) :: result(df())

from_series(list)

@callback from_series([{binary(), Series.t()}]) :: df()

from_tabular(t, io_dtypes)

@callback from_tabular(Table.Reader.t(), io_dtypes()) :: df()

head(df, rows)

@callback head(df(), rows :: integer()) :: df()

inspect(df, opts)

@callback inspect(df(), opts :: Inspect.Opts.t()) :: Inspect.Algebra.t()

join(list, out_df, on, how)

@callback join(
  [df()],
  out_df :: df(),
  on :: [{column_name(), column_name()}],
  how :: :left | :inner | :outer | :right | :cross
) :: df()

lazy()

@callback lazy() :: module()

lazy(df)

@callback lazy(df()) :: df()

load_csv( contents, io_dtypes, delimiter, nil_values, skip_rows, skip_rows_after_header, header?, encoding, max_rows, columns, infer_schema_length, parse_dates, eol_delimiter )

@callback load_csv(
  contents :: String.t(),
  io_dtypes(),
  delimiter :: String.t(),
  nil_values :: [String.t()],
  skip_rows :: integer(),
  skip_rows_after_header :: integer(),
  header? :: boolean(),
  encoding :: String.t(),
  max_rows :: option(integer()),
  columns :: columns_for_io(),
  infer_schema_length :: option(integer()),
  parse_dates :: boolean(),
  eol_delimiter :: option(String.t())
) :: io_result(df())

load_ipc(contents, columns)

@callback load_ipc(
  contents :: binary(),
  columns :: columns_for_io()
) :: io_result(df())

load_ipc_stream(contents, columns)

@callback load_ipc_stream(
  contents :: binary(),
  columns :: columns_for_io()
) :: io_result(df())

load_ndjson(contents, infer_schema_length, batch_size)

@callback load_ndjson(
  contents :: String.t(),
  infer_schema_length :: integer(),
  batch_size :: integer()
) :: io_result(df())

load_parquet(contents)

@callback load_parquet(contents :: binary()) :: io_result(df())

mask(df, mask)

@callback mask(df(), mask :: series()) :: df()

mutate_with(df, out_df, mutations)

@callback mutate_with(df(), out_df :: df(), mutations :: [{column_name(), lazy_series()}]) ::
  df()

n_rows(df)

@callback n_rows(df()) :: integer()

nil_count(df)

@callback nil_count(df()) :: df()

owner_export(df)

@callback owner_export(df()) :: io_result(term())

owner_import(term)

@callback owner_import(term()) :: io_result(df())

owner_reference(df)

@callback owner_reference(df()) :: reference() | nil

pivot_longer(df, out_df, columns_to_pivot, columns_to_keep, names_to, values_to)

@callback pivot_longer(
  df(),
  out_df :: df(),
  columns_to_pivot :: [column_name()],
  columns_to_keep :: [column_name()],
  names_to :: column_name(),
  values_to :: column_name()
) :: df()

pivot_wider(df, id_columns, names_from, values_from, names_prefix)

@callback pivot_wider(
  df(),
  id_columns :: [column_name()],
  names_from :: column_name(),
  values_from :: [column_name()],
  names_prefix :: String.t()
) :: df()

pull(df, column)

@callback pull(df(), column :: column_name()) :: series()

put(df, out_df, column_name, series)

@callback put(df(), out_df :: df(), column_name(), series()) :: df()

re_dtype(t)

@callback re_dtype(String.t()) :: dtype()

rename(df, out_df, list)

@callback rename(df(), out_df :: df(), [{old :: column_name(), new :: column_name()}]) ::
  df()

sample(df, n_or_frac, replace, shuffle, seed)

@callback sample(
  df(),
  n_or_frac :: number(),
  replace :: boolean(),
  shuffle :: boolean(),
  seed :: option(integer())
) :: df()

select(df, out_df)

@callback select(df(), out_df :: df()) :: df()

slice(df, indices)

@callback slice(
  df(),
  indices ::
    [integer()] | series() | %Range{first: term(), last: term(), step: term()}
) ::
  df()

slice(df, offset, length)

@callback slice(df(), offset :: integer(), length :: integer()) :: df()

sort_with(df, out_df, directions, maintain_order?, multithreaded?, nulls_last?)

@callback sort_with(
  df(),
  out_df :: df(),
  directions :: [{:asc | :desc, lazy_series()}],
  maintain_order? :: boolean(),
  multithreaded? :: boolean(),
  nulls_last? :: boolean()
) :: df()

sql(df, sql_string, table_name)

@callback sql(df(), sql_string :: binary(), table_name :: binary()) :: df()

summarise_with(df, out_df, aggregations)

@callback summarise_with(
  df(),
  out_df :: df(),
  aggregations :: [{column_name(), lazy_series()}]
) :: df()

tail(df, rows)

@callback tail(df(), rows :: integer()) :: df()

to_csv(df, entry, header?, delimiter, streaming)

@callback to_csv(
  df(),
  entry :: fs_entry(),
  header? :: boolean(),
  delimiter :: String.t(),
  streaming :: boolean()
) :: ok_result()

to_ipc(df, entry, compression, streaming)

@callback to_ipc(df(), entry :: fs_entry(), compression(), streaming :: boolean()) ::
  ok_result()

to_ipc_stream(df, entry, compression)

@callback to_ipc_stream(
  df(),
  entry :: fs_entry(),
  compression()
) :: ok_result()

to_ndjson(df, entry)

@callback to_ndjson(df(), entry :: fs_entry()) :: ok_result()

to_parquet(df, entry, compression, streaming)

@callback to_parquet(
  df(),
  entry :: fs_entry(),
  compression(),
  streaming :: boolean()
) :: ok_result()

to_rows(df, atom_keys?)

@callback to_rows(df(), atom_keys? :: boolean()) :: [map()]

to_rows_stream(df, atom_keys?, chunk_size)

@callback to_rows_stream(df(), atom_keys? :: boolean(), chunk_size :: integer()) ::
  Enumerable.t()

transpose(df, out_df, keep_names_as, new_column_names)

@callback transpose(
  df(),
  out_df :: df(),
  keep_names_as :: column_name(),
  new_column_names :: [column_name()]
) :: df()

unnest(df, out_df, columns)

@callback unnest(df(), out_df :: df(), columns :: [column_name()]) :: df()

Functions

inspect(df, backend, n_rows, inspect_opts, opts \\ [])

Default inspect implementation for backends.

new(data, names, dtypes)

Creates a new DataFrame for a given backend.