gleam_stats/stats

A module containing several helpful functions for computing and working with statistics.


Types

A type used to represent the bins in a histogram. The type is an alias of a tuple containing a min/max range and a count of the values in that range.

Example:
 import gleeunit/should
 import gleam/pair
 import gleam_stats/stats

 pub fn example () {
   // Create a bin
   let bin: stats.Bin = #(stats.Range(0., 1.), 999)
   // Retrieve min and max values
   let stats.Range(min, max) = pair.first(bin)
   min
   |> should.equal(0.)
   max
   |> should.equal(1.)
   // Retrieve count
   let count = pair.second(bin)
   count
   |> should.equal(999)
 }
pub type Bin =
  #(Range, Int)

A type used to represent a min/max interval. The Range type is among others used to represent the bin boundaries in a histogram.

Example:
 import gleam_stats/stats
 import gleeunit/should

 pub fn example () {
   // Create a range
   let range = stats.Range(0., 1.)
   // Retrieve min and max values
   let stats.Range(min, max) = range
   min
   |> should.equal(0.)
   max
   |> should.equal(1.)
 }
pub type Range {
  Range(min: Float, max: Float)
}

Constructors

  • Range(min: Float, max: Float)

Functions

pub fn allclose(xarr: List(Float), yarr: List(Float), rtol: Float, atol: Float) -> Result(
  List(Bool),
  String,
)

Determine if a list of values are close to or equivalent to a another list of reference values.

Example:
 import gleeunit/should
 import gleam_stats/stats

 pub fn example () {
   let val: Float = 99.
   let ref_val: Float = 100.
   let xarr: List(Float) = list.repeat(val, 42)
   let yarr: List(Float) = list.repeat(ref_val, 42)
   // We set 'atol' and 'rtol' such that the values are equivalent
   // if 'val' is within 1 percent of 'ref_val' +/- 0.1
   let rtol: Float = 0.01
   let atol: Float = 0.10
   stats.allclose(xarr, yarr, rtol, atol)
   |> fn(zarr: Result(List(Bool), String)) -> Result(Bool, Nil) {
     case zarr {
       Ok(arr) ->
         arr
         |> list.all(fn(a: Bool) -> Bool { a })
         |> Ok
       _ -> Nil |> Error
     }
   }
   |> should.equal(Ok(True))
 }
pub fn amax(arr: List(Float)) -> Result(Float, String)

Returns the maximum value of a list.

Example:
 import gleeunit/should
 import gleam_stats/stats

 pub fn example () {
   // An empty lists returns an error
   []
   |> stats.amax()
   |> should.be_error()

   // Valid input returns a result
   [4., 4., 3., 2., 1.]
   |> stats.amax()
   |> should.equal(Ok(4.))
 }
pub fn amin(arr: List(Float)) -> Result(Float, String)

Returns the minimum value of a list.

Example:
 import gleeunit/should
 import gleam_stats/stats

 pub fn example () {
   // An empty lists returns an error
   []
   |> stats.amin()
   |> should.be_error()
 
   // Valid input returns a result
   [4., 4., 3., 2., 1.]
   |> stats.amin()
   |> should.equal(Ok(1.))
 }
pub fn argmax(arr: List(Float)) -> Result(List(Int), String)

Returns the indices of the maximum values in a list.

Example:
 import gleeunit/should
 import gleam_stats/stats

 pub fn example () {
   // An empty lists returns an error
   []
   |> stats.argmax()
   |> should.be_error()
 
   // Valid input returns a result
   [4., 4., 3., 2., 1.]
   |> stats.argmax()
   |> should.equal(Ok([0, 1]))
 }
pub fn argmin(arr: List(Float)) -> Result(List(Int), String)

Returns the indices of the minimum values in a list.

Example:
 import gleeunit/should
 import gleam_stats/stats

 pub fn example () {
   // An empty lists returns an error
   []
   |> stats.argmin()
   |> should.be_error()
 
   // Valid input returns a result
   [4., 4., 3., 2., 1.]
   |> stats.argmin()
   |> should.equal(Ok([4]))
 }
pub fn correlation(xarr: List(Float), yarr: List(Float)) -> Result(
  Float,
  String,
)

Calculate Pearson’s sample correlation coefficient to determine the linear relationship between the elements in two lists of equal length. The correlation coefficient $$r_{xy} \in [-1, 1]$$ is calculated as:

\[ r_{xy} =\frac{\sum ^n _{i=1}(x_i - \bar{x})(y_i - \bar{y})}{\sqrt{\sum^n _{i=1}(x_i - \bar{x})^2} \sqrt{\sum^n _{i=1}(y_i - \bar{y})^2}} \]

In the formula, $$n$$ is the sample size (the length of the input lists), $$x_i$$, $$y_i$$ are the corresponding sample points indexed by $$i$$ and $$\bar{x}$$, $$\bar{y}$$ are the sample means.

Example:
 import gleeunit/should
 import gleam_stats/stats

 pub fn example () {
   // An empty lists returns an error
   stats.correlation([], [])
   |> should.be_error()
 
   // Lists with fewer than 2 elements return an error
   stats.correlation([1.0], [1.0])
   |> should.be_error()

   // Lists of uneqal length return an error
   stats.correlation([1.0, 2.0, 3.0], [1.0, 2.0])
   |> should.be_error()

   // Perfect positive correlation
   let xarr0: List(Float) =
     list.range(0, 100)
     |> list.map(fn(x: Int) -> Float { int.to_float(x) })
   let yarr0: List(Float) =
     list.range(0, 100)
     |> list.map(fn(x: Int) -> Float { int.to_float(x) })
   stats.correlation(xarr0, yarr0)
   |> should.equal(Ok(1.))
 
   // Perfect negative correlation
   let xarr0: List(Float) =
     list.range(0, 100)
     |> list.map(fn(x: Int) -> Float { -1. *. int.to_float(x) })
   let yarr0: List(Float) =
     list.range(0, 100)
     |> list.map(fn(x: Int) -> Float { int.to_float(x) })
   stats.correlation(xarr0, yarr0)
   |> should.equal(Ok(-1.))
 }
pub fn freedman_diaconis_rule(arr: List(Float)) -> Result(
  Float,
  String,
)

Use Freedman-Diaconis’s Rule to determine the bin widths of a histogram.

Example:
 import gleeunit/should
 import gleam_stats/stats

 pub fn example () {
   // An empty list returns an error
   []
   |> stats.freedman_diaconis_rule()
   |> should.be_error()
 
   // Calculate histogram bin widths
   list.range(0, 1000)
   |> list.map(fn(x: Int) -> Float { int.to_float(x) })
   |> stats.freedman_diaconis_rule()
   |> should.equal(Ok(10.))
 }
pub fn gmean(arr: List(Float)) -> Result(Float, String)

Calculcate the geometric mean $$\bar{x}$$ of the elements in a list:

\[ \bar{x} = \left(\prod^{n}_{i=1} x_i\right)^{\frac{1}{n}} \]

In the formula, $$n$$ is the sample size (the length of the list) and $$x_i$$ is the sample point in the input list indexed by $$i$$. Note: The geometric mean is only defined for positive numbers.

Example:
 import gleeunit/should
 import gleam_stats/stats

 pub fn example () {
   // An empty list returns an error
   []
   |> stats.gmean()
   |> should.be_error()

   // List with negative numbers returns an error
   [-1., -3., -6.]
   |> stats.gmean()
   |> should.be_error()

   // Valid input returns a result
   [1., 3., 9.]
   |> stats.gmean()
   |> should.equal(Ok(3.))
 }
pub fn histogram(arr: List(Float), width: Float) -> Result(
  List(#(Range, Int)),
  String,
)

Create a histogram of the elements in a list.

Example:
 import gleam_stats/stats
 import gleeunit/should

 pub fn example () {
   // An empty lists returns an error
   []
   |> stats.histogram(1.)
   |> should.be_error()

   // Create the bins of a histogram given a list of values
   list.range(0, 100)
   |> list.map(fn(x: Int) -> Float { int.to_float(x) })
   // Below 25. is the bin width
   // The Freedman-Diaconis’s Rule can be used to determine a decent value
   |> stats.histogram(25.)
   |> should.equal(Ok([
     #(stats.Range(0., 25.), 25),
     #(stats.Range(25., 50.), 25),
     #(stats.Range(50., 75.), 25),
     #(stats.Range(75., 100.), 25),
   ]))
 }
pub fn hmean(arr: List(Float)) -> Result(Float, String)

Calculcate the harmonic mean $$\bar{x}$$ of the elements in a list:

\[ \bar{x} = \frac{n}{\sum_{i=1}^{n}\frac{1}{x_i}} \]

In the formula, $$n$$ is the sample size (the length of the list) and $$x_i$$ is the sample point in the input list indexed by $$i$$. Note: The harmonic mean is only defined for positive numbers.

Example:
 import gleeunit/should
 import gleam_stats/stats

 pub fn example () {
   // An empty list returns an error
   []
   |> stats.hmean()
   |> should.be_error()

   // List with negative numbers returns an error
   [-1., -3., -6.]
   |> stats.hmean()
   |> should.be_error()
 
   // Valid input returns a result
   [1., 3., 6.]
   |> stats.hmean()
   |> should.equal(Ok(2.))
 }
pub fn iqr(arr: List(Float)) -> Result(Float, String)

Calculate the interquartile range (IQR) of the elements in a list.

Example:
 import gleeunit/should
 import gleam_stats/stats

 pub fn example () {
   // An empty list returns an error
   []
   |> stats.iqr()
   |> should.be_error()
 
   // Valid input returns a result
   [1., 2., 3., 4., 5.]
   |> stats.iqr()
   |> should.equal(Ok(3.))
 }
pub fn isclose(a: Float, b: Float, rtol: Float, atol: Float) -> Bool

Determine if a given value $$a$$ is close to or equivalent to a reference value $$b$$ based on supplied relative $$r_{tol}$$ and absolute $$a_{tol}$$ tolerance values. The equivalance of the two given values are then determined based on the equation:

\[ |a - b| \leq (a_{tol} + r_{tol} \cdot |b|) \]

True is returned if statement holds, otherwise False is returned.

Example:
 import gleeunit/should
 import gleam_stats/stats

 pub fn example () {
   let val: Float = 99.
   let ref_val: Float = 100.
   // We set 'atol' and 'rtol' such that the values are equivalent
   // if 'val' is within 1 percent of 'ref_val' +/- 0.1
   let rtol: Float = 0.01
   let atol: Float = 0.10
   stats.isclose(val, ref_val, rtol, atol)
   |> should.be_true()
 }
pub fn kurtosis(arr: List(Float)) -> Result(Float, String)

Calculcate the sample kurtosis of a list of elements using the definition of Fisher.

Example:
 import gleeunit/should
 import gleam_stats/stats

 pub fn example () {
   // An empty list returns an error
   []
   |> stats.skewness()
   |> should.be_error()
 
   // No tail 
   // -> Fisher's definition gives kurtosis -3 
   [1., 1., 1., 1.]
   |> stats.kurtosis()
   |> should.equal(Ok(-3.))
 
   // Distribution with a tail 
   // -> Higher kurtosis 
   [1., 1., 1., 2.]
   |> stats.kurtosis()
   |> fn(x: Result(Float, String)) -> Bool {
     case x {
       Ok(x) -> x >. -3.
       _ -> False
     }
   }
   |> should.be_true()
 }
pub fn mean(arr: List(Float)) -> Result(Float, String)

Calculcate the arithmetic mean of the elements in a list:

\[ \bar{x} = \frac{1}{n}\sum_{i=1}^n x_i \]

In the formula, $$n$$ is the sample size (the length of the list) and $$x_i$$ is the sample point in the input list indexed by $$i$$.

Example:
 import gleeunit/should
 import gleam_stats/stats

 pub fn example () {
   // An empty list returns an error
   []
   |> stats.mean()
   |> should.be_error()

   // Valid input returns a result
   [1., 2., 3.]
   |> stats.mean()
   |> should.equal(Ok(2.))
 }
pub fn median(arr: List(Float)) -> Result(Float, String)

Calculcate the median of the elements in a list.

Example:
 import gleeunit/should
 import gleam_stats/stats

 pub fn example () {
   // An empty list returns an error
   []
   |> stats.median()
   |> should.be_error()

   // Valid input returns a result
   [1., 2., 3.]
   |> stats.median()
   |> should.equal(Ok(2.))
 
   [1., 2., 3., 4.]
   |> stats.median()
   |> should.equal(Ok(2.5))
 }
pub fn moment(arr: List(Float), n: Int) -> Result(Float, String)

Calculcate the n’th moment about the mean of a list of elements.

Example:
 import gleeunit/should
 import gleam_stats/stats

 pub fn example () {
   // An empty list returns an error
   []
   |> stats.moment(0)
   |> should.be_error()
 
   // 0th moment about the mean is 1. per definition
   [0., 1., 2., 3., 4.]
   |> stats.moment(0)
   |> should.equal(Ok(1.))
 
   // 1st moment about the mean is 0. per definition
   [0., 1., 2., 3., 4.]
   |> stats.moment(1)
   |> should.equal(Ok(0.))
 
   // 2nd moment about the mean
   [0., 1., 2., 3., 4.]
   |> stats.moment(2)
   |> should.equal(Ok(2.))
 }
pub fn percentile(arr: List(Float), n: Int) -> Result(
  Float,
  String,
)

Calculate the n’th percentile of the elements in a list using linear interpolation between closest ranks.

Example:
 import gleeunit/should
 import gleam_stats/stats

 pub fn example () {
   // An empty list returns an error
   []
   |> stats.percentile(40)
   |> should.be_error()
 
   // Calculate 40th percentile 
   [15., 20., 35., 40., 50.]
   |> stats.percentile(40)
   |> should.equal(Ok(29.))
 }
pub fn skewness(arr: List(Float)) -> Result(Float, String)

Calculcate the sample skewness of a list of elements using the Fisher-Pearson coefficient of skewness.

Example:
 import gleeunit/should
 import gleam_stats/stats

 pub fn example () {
   // An empty list returns an error
   []
   |> stats.skewness()
   |> should.be_error()
 
   // No skewness 
   // -> Zero skewness
   [1., 2., 3., 4.]
   |> stats.skewness()
   |> should.equal(Ok(0.))
 
   // Right-skewed distribution 
   // -> Positive skewness
   [1., 1., 1., 2.]
   |> stats.skewness()
   |> fn(x: Result(Float, String)) -> Bool {
     case x {
       Ok(x) -> x >. 0.
       _ -> False
     }
   }
   |> should.be_true()
 }
pub fn std(arr: List(Float), ddof: Int) -> Result(Float, String)

Calculcate the sample standard deviation of the elements in a list: \[ s = \left(\frac{1}{n - d} \sum_{i=1}^{n}(x_i - \bar{x}))\right)^{\frac{1}{2}} \]

In the formula, $$n$$ is the sample size (the length of the list) and $$x_i$$ is the sample point in the input list indexed by $$i$$. Furthermore, $$\bar{x}$$ is the sample mean and $$d$$ is the “Delta Degrees of Freedom”, and is by default set to $$d = 0$$, which gives a biased estimate of the sample standard deviation. Setting $$d = 1$$ gives an unbiased estimate.

Example:
 import gleeunit/should
 import gleam_stats/stats

 pub fn example () {
   // Degrees of freedom
   let ddof: Int = 1
 
   // An empty list returns an error
   []
   |> stats.std(ddof)
   |> should.be_error()
 
   // Valid input returns a result
   [1., 2., 3.]
   |> stats.std(ddof)
   |> should.equal(Ok(1.))
 }
pub fn sum(arr: List(Float)) -> Float

Calculcate the sum of the elements in a list:

\[ \sum_{i=1}^n x_i \]

In the formula, $$n$$ is the length of the list and $$x_i$$ is the value in the input list indexed by $$i$$.

Example:
 import gleeunit/should
 import gleam_stats/stats

 pub fn example () {
   // An empty list returns an error
   []
   |> stats.sum()
   |> should.equal(0.)

   // Valid input returns a result
   [1., 2., 3.]
   |> stats.sum()
   |> should.equal(6.)
 }
pub fn trim(arr: List(Float), min: Int, max: Int) -> Result(
  List(Float),
  String,
)

Trim a list to a certain size given min/max indices. The min/max indices are inclusive.

Example:
 import gleeunit/should
 import gleam_stats/stats

 pub fn example () {
   // An empty lists returns an error
   []
   |> stats.trim(0, 0)
   |> should.be_error()
 
   // Trim the list to only the middle part of list
   [1., 2., 3., 4., 5., 6.]
   |> stats.trim(1, 4)
   |> should.equal(Ok([2., 3., 4., 5.]))
 }
pub fn var(arr: List(Float), ddof: Int) -> Result(Float, String)

Calculcate the sample variance of the elements in a list: \[ s^{2} = \frac{1}{n - d} \sum_{i=1}^{n}(x_i - \bar{x}) \]

In the formula, $$n$$ is the sample size (the length of the list) and $$x_i$$ is the sample point in the input list indexed by $$i$$. Furthermore, $$\bar{x}$$ is the sample mean and $$d$$ is the “Delta Degrees of Freedom”, and is by default set to $$d = 0$$, which gives a biased estimate of the sample variance. Setting $$d = 1$$ gives an unbiased estimate.

Example:
 import gleeunit/should
 import gleam_stats/stats

 pub fn example () {
   // Degrees of freedom
   let ddof: Int = 1
 
   // An empty list returns an error
   []
   |> stats.var(ddof)
   |> should.be_error()
 
   // Valid input returns a result
   [1., 2., 3.]
   |> stats.var(ddof)
   |> should.equal(Ok(1.))
 }
pub fn zscore(arr: List(Float), ddof: Int) -> Result(
  List(Float),
  String,
)

Calculate the z-score of each value in the list relative to the sample mean and standard deviation.

Example:
 import gleeunit/should
 import gleam_stats/stats

 pub fn example () {
   // An empty list returns an error
   []
   // Use degrees of freedom = 1
   |> stats.zscore(1)
   |> should.be_error()
 
   [1., 2., 3.]
   // Use degrees of freedom = 1
   |> stats.zscore(1)
   |> should.equal(Ok([-1., 0., 1.]))
 }