View Source Explorer.Backend.DataFrame behaviour (Explorer v0.10.0)
The behaviour for DataFrame backends.
Summary
Types
@type basic_types() :: float() | integer() | String.t() | Date.t() | DateTime.t()
@type column_name() :: String.t()
@type columns_for_io() :: [column_name()] | [pos_integer()] | nil
@type df() :: Explorer.DataFrame.t()
@type dtype() :: Explorer.Series.dtype()
@type dtypes() :: %{required(column_name()) => dtype()}
@type fs_entry() :: Explorer.DataFrame.fs_entry()
@type io_dtypes() :: [{column_name(), dtype()}]
@type io_result(t) :: {:ok, t} | {:error, Exception.t()}
@type lazy_series() :: Explorer.Backend.LazySeries.t()
@type mutate_value() :: series() | basic_types() | [basic_types()] | (df() -> series() | basic_types() | [basic_types()])
@type ok_result() :: :ok | {:error, Exception.t()}
@type option(type) :: type | nil
@type query_frame() :: Explorer.Backend.QueryFrame.t()
@type result(t) :: {:ok, t} | {:error, term()}
@type series() :: Explorer.Series.t()
@type t() :: struct()
Callbacks
@callback distinct(df(), out_df :: df(), columns :: [column_name()]) :: df()
@callback drop_nil(df(), columns :: [column_name()]) :: df()
@callback dummies(df(), out_df :: df(), columns :: [column_name()]) :: df()
@callback dump_ipc(df(), compression()) :: io_result(binary())
@callback dump_ipc_stream(df(), compression()) :: io_result(binary())
@callback dump_parquet(df(), compression()) :: io_result(binary())
@callback explode(df(), out_df :: df(), columns :: [column_name()]) :: df()
@callback filter_with(df(), out_df :: df(), lazy_series()) :: df()
Link to this callback
from_csv( entry, io_dtypes, delimiter, nil_values, skip_rows, skip_rows_after_header, header?, encoding, max_rows, columns, infer_schema_length, parse_dates, eol_delimiter )
View Source@callback from_csv( entry :: fs_entry(), io_dtypes(), delimiter :: String.t(), nil_values :: [String.t()], skip_rows :: integer(), skip_rows_after_header :: integer(), header? :: boolean(), encoding :: String.t(), max_rows :: option(integer()), columns :: columns_for_io(), infer_schema_length :: option(integer()), parse_dates :: boolean(), eol_delimiter :: option(String.t()) ) :: io_result(df())
@callback from_ipc( entry :: fs_entry(), columns :: columns_for_io() ) :: io_result(df())
@callback from_ipc_stream( filename :: fs_entry(), columns :: columns_for_io() ) :: io_result(df())
@callback from_query( Adbc.Connection.t(), query :: String.t(), params :: [term()] ) :: result(df())
@callback from_tabular(Table.Reader.t(), io_dtypes()) :: df()
@callback inspect(df(), opts :: Inspect.Opts.t()) :: Inspect.Algebra.t()
@callback join( [df()], out_df :: df(), on :: [{column_name(), column_name()}], how :: :left | :inner | :outer | :right | :cross ) :: df()
@callback lazy() :: module()
Link to this callback
load_csv( contents, io_dtypes, delimiter, nil_values, skip_rows, skip_rows_after_header, header?, encoding, max_rows, columns, infer_schema_length, parse_dates, eol_delimiter )
View Source@callback load_csv( contents :: String.t(), io_dtypes(), delimiter :: String.t(), nil_values :: [String.t()], skip_rows :: integer(), skip_rows_after_header :: integer(), header? :: boolean(), encoding :: String.t(), max_rows :: option(integer()), columns :: columns_for_io(), infer_schema_length :: option(integer()), parse_dates :: boolean(), eol_delimiter :: option(String.t()) ) :: io_result(df())
@callback load_ipc( contents :: binary(), columns :: columns_for_io() ) :: io_result(df())
@callback load_ipc_stream( contents :: binary(), columns :: columns_for_io() ) :: io_result(df())
@callback mutate_with(df(), out_df :: df(), mutations :: [{column_name(), lazy_series()}]) :: df()
Link to this callback
pivot_longer(df, out_df, columns_to_pivot, columns_to_keep, names_to, values_to)
View Source@callback pivot_longer( df(), out_df :: df(), columns_to_pivot :: [column_name()], columns_to_keep :: [column_name()], names_to :: column_name(), values_to :: column_name() ) :: df()
Link to this callback
pivot_wider(df, id_columns, names_from, values_from, names_prefix)
View Source@callback pivot_wider( df(), id_columns :: [column_name()], names_from :: column_name(), values_from :: [column_name()], names_prefix :: String.t() ) :: df()
@callback pull(df(), column :: column_name()) :: series()
@callback put(df(), out_df :: df(), column_name(), series()) :: df()
@callback rename(df(), out_df :: df(), [{old :: column_name(), new :: column_name()}]) :: df()
Link to this callback
sort_with(df, out_df, directions, maintain_order?, multithreaded?, nulls_last?)
View Source@callback summarise_with( df(), out_df :: df(), aggregations :: [{column_name(), lazy_series()}] ) :: df()
@callback to_ipc(df(), entry :: fs_entry(), compression(), streaming :: boolean()) :: ok_result()
@callback to_ipc_stream( df(), entry :: fs_entry(), compression() ) :: ok_result()
@callback to_parquet( df(), entry :: fs_entry(), compression(), streaming :: boolean() ) :: ok_result()
@callback to_rows_stream(df(), atom_keys? :: boolean(), chunk_size :: integer()) :: Enumerable.t()
@callback transpose( df(), out_df :: df(), keep_names_as :: column_name(), new_column_names :: [column_name()] ) :: df()
@callback unnest(df(), out_df :: df(), columns :: [column_name()]) :: df()
Functions
Default inspect implementation for backends.
Creates a new DataFrame for a given backend.