View Source Evision.ML Example - Decision Tree and Random Forest

# set `EVISION_PREFER_PRECOMPILED` to `false` 
# if you prefer `:evision` to be compiled from source
# note that to compile from source, you may need at least 1GB RAM
# System.put_env("EVISION_PREFER_PRECOMPILED", "false")

Mix.install([
  {:evision, "~> 0.1.9"},
  {:kino, "~> 0.7"},
  {:scidata, "~> 0.1"},
  {:nx, "~> 0.3", override: true},
  {:scholar, "~> 0.1", github: "elixir-nx/scholar"}
])

register-smartcells

Register SmartCells

:ok = Evision.SmartCell.register_smartcells()

download-the-dataset

Download the Dataset

get-the-wine-dataset-with-scidata

Get the Wine dataset with Scidata

{features, labels} = Scidata.Wine.download()
:ok

make-a-dataset-with-evision-ml-traindata

Make a dataset with Evision.ML.TrainData

dataset =
  Evision.ML.TrainData.create(
    Evision.Mat.from_nx(Nx.tensor(features, type: :f32, backend: Evision.Backend)),
    Evision.cv_ROW_SAMPLE(),
    Evision.Mat.from_nx(Nx.tensor(labels, type: :s32, backend: Evision.Backend))
  )
  |> Evision.ML.TrainData.setTrainTestSplitRatio(0.8, shuffle: true)

IO.puts("#Samples: #{Evision.ML.TrainData.getNSamples(dataset)}")
IO.puts("#Training samples: #{Evision.ML.TrainData.getNTrainSamples(dataset)}")
IO.puts("#Test samples: #{Evision.ML.TrainData.getNTestSamples(dataset)}")

train-the-dataset-with-decision-tree-evision-ml-dtrees

Train the Dataset with Decision Tree, Evision.ML.DTrees

dtree =
  Evision.ML.DTrees.create()
  |> Evision.ML.DTrees.setMaxDepth(8)
  |> Evision.ML.DTrees.setMaxCategories(3)
  |> Evision.ML.DTrees.setCVFolds(0)
  |> Evision.ML.DTrees.setMinSampleCount(10)

(
  Evision.ML.DTrees.train(dtree, dataset)

  dtree
  |> Evision.ML.DTrees.calcError(dataset, false)
  |> then(&IO.puts("Training Error: #{elem(&1, 0)}"))

  dtree
  |> Evision.ML.DTrees.calcError(dataset, true)
  |> then(&IO.puts("Test Error: #{elem(&1, 0)}"))
)

calculate-confusion-matrix

Calculate Confusion Matrix

{_test_error, results} = Evision.ML.DTrees.calcError(dtree, dataset, true)

y_true =
  Evision.Mat.to_nx(results, Nx.BinaryBackend)
  |> Nx.reshape({:auto})
  |> Nx.as_type(:s32)

y_pred =
  Evision.Mat.to_nx(Evision.ML.TrainData.getTestResponses(dataset), Nx.BinaryBackend)
  |> Nx.reshape({:auto})
  |> Nx.as_type(:s32)

Scholar.Metrics.confusion_matrix(y_true, y_pred, num_classes: 3)

save-the-trained-model-and-load-it-back

Save the Trained Model and Load It Back

It's also possible to save the trained model to a file and load it back!

# save to file
filename = Path.join(__DIR__, "dtree.bin")
Evision.ML.DTrees.save(dtree, filename)

# load from file
dtree_from_file = Evision.ML.DTrees.load(filename)

# they should give the same results!
{test_error, _results} = Evision.ML.DTrees.calcError(dtree, dataset, true)
{test_error_2, _results} = Evision.ML.DTrees.calcError(dtree_from_file, dataset, true)
test_error == test_error_2

train-the-dataset-with-random-forest-evision-ml-rtrees

Train the Dataset with Random Forest, Evision.ML.RTrees

rtree =
  Evision.ML.RTrees.create()
  |> Evision.ML.RTrees.setMaxDepth(10)
  |> Evision.ML.RTrees.setMaxCategories(3)
  |> Evision.ML.RTrees.setCVFolds(0)
  |> Evision.ML.RTrees.setMinSampleCount(10)
  |> Evision.ML.RTrees.setActiveVarCount(0)
  |> Evision.ML.RTrees.setCalculateVarImportance(false)

rtree =
  Evision.ML.RTrees.setTermCriteria(
    rtree,
    {Evision.cv_MAX_ITER() + Evision.cv_EPS(), 30, 5.0e-5}
  )

(
  (
    dataset =
      Evision.ML.TrainData.create(
        Evision.Mat.from_nx(Nx.tensor(features, type: :f32, backend: Evision.Backend)),
        Evision.cv_ROW_SAMPLE(),
        Evision.Mat.from_nx(Nx.tensor(labels, type: :s32, backend: Evision.Backend))
      )
      |> Evision.ML.TrainData.setTrainTestSplitRatio(0.8, shuffle: true)

    IO.puts("#Samples: #{Evision.ML.TrainData.getNSamples(dataset)}")
    IO.puts("#Training samples: #{Evision.ML.TrainData.getNTrainSamples(dataset)}")
    IO.puts("#Test samples: #{Evision.ML.TrainData.getNTestSamples(dataset)}")
  )

  Evision.ML.RTrees.train(rtree, dataset)

  rtree
  |> Evision.ML.RTrees.calcError(dataset, false)
  |> then(&IO.puts("Training Error: #{elem(&1, 0)}"))

  rtree
  |> Evision.ML.RTrees.calcError(dataset, true)
  |> then(&IO.puts("Test Error: #{elem(&1, 0)}"))
)

calculate-confusion-matrix-1

Calculate Confusion Matrix

{_test_error, results} = Evision.ML.RTrees.calcError(rtree, dataset, true)

y_true =
  Evision.Mat.to_nx(results, Nx.BinaryBackend)
  |> Nx.reshape({:auto})
  |> Nx.as_type(:s32)

y_pred =
  Evision.Mat.to_nx(Evision.ML.TrainData.getTestResponses(dataset), Nx.BinaryBackend)
  |> Nx.reshape({:auto})
  |> Nx.as_type(:s32)

Scholar.Metrics.confusion_matrix(y_true, y_pred, num_classes: 3)

save-the-trained-model-and-load-it-back-1

Save the Trained Model and Load It Back

# save to file
filename = Path.join(__DIR__, "rtree.bin")
Evision.ML.RTrees.save(rtree, filename)

# load from file
rtree_from_file = Evision.ML.RTrees.load(filename)

# they should give the same results!
{test_error, _results} = Evision.ML.RTrees.calcError(rtree, dataset, true)
{test_error_2, _results} = Evision.ML.RTrees.calcError(rtree_from_file, dataset, true)
test_error == test_error_2