View Source Evision Example - Simple use of Evision in a Machine Learning Pipeline with Nx and torhcx

Mix.install([
  {:evision, "~> 0.2"},
  {:req, "~> 0.5"},
  {:torchx, "~> 0.3"},
  {:nx, "~> 0.3", override: true},
  {:kino, "~> 0.7"},
  {:scidata, "~> 0.1"}
], system_env: [
  # optional, defaults to `true`
  # set `EVISION_PREFER_PRECOMPILED` to `false`
  # if you prefer `:evision` to be compiled from source
  # note that to compile from source, you may need at least 1GB RAM
  {"EVISION_PREFER_PRECOMPILED", true},

  # optional, defaults to `true`
  # set `EVISION_ENABLE_CONTRIB` to `false`
  # if you don't need modules from `opencv_contrib`
  {"EVISION_ENABLE_CONTRIB", true},

  # optional, defaults to `false`
  # set `EVISION_ENABLE_CUDA` to `true`
  # if you wish to use CUDA related functions
  # note that `EVISION_ENABLE_CONTRIB` also has to be `true`
  # because cuda related modules come from the `opencv_contrib` repo
  {"EVISION_ENABLE_CUDA", false},

  # required when
  # - `EVISION_ENABLE_CUDA` is `true`
  # - and `EVISION_PREFER_PRECOMPILED` is `true`
  #
  # set `EVISION_CUDA_VERSION` to the version that matches
  # your local CUDA runtime version
  #
  # current available versions are
  # - 118
  # - 121
  {"EVISION_CUDA_VERSION", "118"},

  # require for Windows users when
  # - `EVISION_ENABLE_CUDA` is `true`
  # set `EVISION_CUDA_RUNTIME_DIR` to the directory that contains
  # CUDA runtime libraries
  {"EVISION_CUDA_RUNTIME_DIR", "C:/PATH/TO/CUDA/RUNTIME"}
])

:ok

Define Some Helper Functions and Download the Test Image

# change to the file's directory
# or somewhere you have write permission
File.cd!(__DIR__)

defmodule Helper do
  def download!(url, save_as, overwrite? \\ false) do
    unless File.exists?(save_as) do
      Req.get!(url, http_errors: :raise, into: File.stream!(save_as), cache: not overwrite?)
    end

    :ok
  end
end

Helper.download!(
  "https://upload.wikimedia.org/wikipedia/commons/thumb/3/3a/Cat03.jpg/1200px-Cat03.jpg",
  "cat.jpg"
)

:ok

Read the Test Image

# in real-life use cases, the input source might be a camera
# instead of downloading a file and reading it

alias Evision, as: Cv

img = Cv.imread("cat.jpg", flags: Cv.Constant.cv_IMREAD_ANYCOLOR())
resized_img = Cv.resize(img, {128, 128})

Cv.imencode(".png", resized_img)
|> Kino.Image.new(:png)

Select a Default Nx Backend

# by default we don't have the LibTorch backend
# but if you listed :torchx as a dependency
# then please uncomment the following line to use the LibTorch backend
# Similarly for the EXLA backend
# Nx.default_backend(Torchx.Backend)

nil

Write a Module for the CIFAR10 Dataset

defmodule CIFAR10Dataset do
  defp transform_images({bin, type, shape}, backend) do
    bin
    |> Nx.from_binary(type, backend: backend)
    |> Nx.reshape({elem(shape, 0), 3 * 32 * 32}, names: [:batch, :input])
    |> Nx.divide(255.0)
  end

  defp transform_labels({bin, type, _}, backend) do
    bin
    |> Nx.from_binary(type, backend: backend)
  end

  def fetch(backend \\ Torchx.Backend) do
    {images, labels} = Scidata.CIFAR10.download()
    {transform_images(images, backend), transform_labels(labels, backend)}
  end
end

{:module, CIFAR10Dataset, <<70, 79, 82, 49, 0, 0, 10, ...>>, {:fetch, 1}}

Write A Tiny Linear Neural Network

# training code
# based on https://github.com/elixir-nx/nx/blob/e4454423f7be39d3adc9dea76526185fbfaf7a58/exla/examples/mnist.exs

defmodule DenseNN do
  import Nx.Defn

  defn init_random_params do
    # 3 layers
    #  1. Dense(32) with sigmoid
    #  2. Dense(24) with sigmoid
    #  3. Dense(10) with softmax
    w1 = Nx.random_normal({3072, 32}, 0.0, 0.1, names: [:input, :layer1])
    b1 = Nx.random_normal({32}, 0.0, 0.1, names: [:layer1])
    w2 = Nx.random_normal({32, 24}, 0.0, 0.1, names: [:layer1, :layer2])
    b2 = Nx.random_normal({24}, 0.0, 0.1, names: [:layer2])
    w3 = Nx.random_normal({24, 10}, 0.0, 0.1, names: [:layer2, :output])
    b3 = Nx.random_normal({10}, 0.0, 0.1, names: [:output])
    {w1, b1, w2, b2, w3, b3}
  end

  defn softmax(logits) do
    Nx.exp(logits) /
      Nx.sum(Nx.exp(logits), axes: [:output], keep_axes: true)
  end

  defn predict({w1, b1, w2, b2, w3, b3}, batch) do
    batch
    |> Nx.dot(w1)
    |> Nx.add(b1)
    |> Nx.sigmoid()
    |> Nx.dot(w2)
    |> Nx.add(b2)
    |> Nx.sigmoid()
    |> Nx.dot(w3)
    |> Nx.add(b3)
    |> softmax()
  end

  defn accuracy({w1, b1, w2, b2, w3, b3}, batch_images, batch_labels) do
    Nx.mean(
      Nx.equal(
        Nx.argmax(batch_labels, axis: :output),
        Nx.argmax(predict({w1, b1, w2, b2, w3, b3}, batch_images), axis: :output)
      )
      |> Nx.as_type({:s, 8})
    )
  end

  defn loss({w1, b1, w2, b2, w3, b3}, batch_images, batch_labels) do
    preds = predict({w1, b1, w2, b2, w3, b3}, batch_images)
    -Nx.sum(Nx.mean(Nx.log(preds) * batch_labels, axes: [:output]))
  end

  defn update({w1, b1, w2, b2, w3, b3} = params, batch_images, batch_labels, step) do
    {grad_w1, grad_b1, grad_w2, grad_b2, grad_w3, grad_b3} =
      grad(params, &loss(&1, batch_images, batch_labels))

    {
      w1 - grad_w1 * step,
      b1 - grad_b1 * step,
      w2 - grad_w2 * step,
      b2 - grad_b2 * step,
      w3 - grad_w3 * step,
      b3 - grad_b3 * step
    }
  end

  defn update_with_averages(
         {_, _, _, _, _, _} = cur_params,
         imgs,
         tar,
         avg_loss,
         avg_accuracy,
         total
       ) do
    batch_loss = loss(cur_params, imgs, tar)
    batch_accuracy = accuracy(cur_params, imgs, tar)
    avg_loss = avg_loss + batch_loss / total
    avg_accuracy = avg_accuracy + batch_accuracy / total
    {update(cur_params, imgs, tar, 0.01), avg_loss, avg_accuracy}
  end

  def train_epoch(cur_params, x, labels) do
    total_batches = Enum.count(x)

    x
    |> Enum.zip(labels)
    |> Enum.reduce({cur_params, Nx.tensor(0.0), Nx.tensor(0.0)}, fn
      {x, tar}, {cur_params, avg_loss, avg_accuracy} ->
        update_with_averages(cur_params, x, tar, avg_loss, avg_accuracy, total_batches)
    end)
  end

  def train(x, labels, params, opts \\ []) do
    epochs = opts[:epochs] || 5

    for epoch <- 1..epochs, reduce: params do
      cur_params ->
        {time, {new_params, epoch_avg_loss, epoch_avg_acc}} =
          :timer.tc(__MODULE__, :train_epoch, [cur_params, x, labels])

        epoch_avg_loss =
          epoch_avg_loss
          |> Nx.backend_transfer()
          |> Nx.to_number()

        epoch_avg_acc =
          epoch_avg_acc
          |> Nx.backend_transfer()
          |> Nx.to_number()

        IO.puts(
          "Epoch #{epoch} Time: #{time / 1_000_000}s, loss: #{Float.round(epoch_avg_loss, 3)}, acc: #{Float.round(epoch_avg_acc, 3)}"
        )

        new_params
    end
  end
end

{:module, DenseNN, <<70, 79, 82, 49, 0, 0, 45, ...>>, {:train, 4}}

Convert Label to One-hot Encoding

defmodule Helper do
  def to_onehot_single(0, oh, _pos) do
    oh
  end

  def to_onehot_single(count, oh, pos) do
    cur = count - 1

    case cur == pos do
      true -> to_onehot_single(count - 1, [1] ++ oh, pos)
      _ -> to_onehot_single(count - 1, [0] ++ oh, pos)
    end
  end

  def to_onehot_single(0, _pos) do
    []
  end

  def to_onehot_single(count, pos) do
    to_onehot_single(count, [], pos)
  end

  def to_onehot(labels, unique_classes) do
    for(
      l <- Nx.to_flat_list(labels),
      do: Nx.tensor([to_onehot_single(unique_classes, l)])
    )
    |> Nx.concatenate()
    |> Nx.reshape({:auto, unique_classes}, names: [:batch, :output])
  end
end

{:module, Helper, <<70, 79, 82, 49, 0, 0, 10, ...>>, {:to_onehot, 2}}

Train the Neural Network

defmodule Demo do
  require CIFAR10Dataset
  require DenseNN
  require Helper

  def load_dataset(backend) do
    {uSec, result} = :timer.tc(fn -> CIFAR10Dataset.fetch(backend) end)
    IO.puts("[Time] load dataset: #{uSec / 1000.0} ms")
    result
  end

  def to_batched_input(x_training, y_training, batch_size) do
    unique_classes = 10

    x_training_batched =
      x_training
      # uint8 to float
      |> Nx.as_type({:f, 32})
      # flatten
      |> Nx.reshape({:auto, 3072})
      |> Nx.to_batched(batch_size)

    y_training_batched =
      y_training
      |> Helper.to_onehot(unique_classes)
      |> Nx.as_type({:f, 32})
      |> Nx.to_batched(batch_size)

    {x_training_batched, y_training_batched}
  end

  def init_random_params do
    {uSec, result} = :timer.tc(fn -> DenseNN.init_random_params() end)
    IO.puts("[Time] init random params: #{uSec / 1000.0} ms")
    result
  end

  def run(opts \\ []) do
    epochs = opts[:epochs] || 5
    backend = opts[:backend] || Nx.BinaryBackend
    batch_size = opts[:batch_size] || 300
    Nx.default_backend(backend)

    params = init_random_params()
    {x_training, y_training} = load_dataset(backend)

    {x_training_batched, y_training_batched} =
      to_batched_input(x_training, y_training, batch_size)

    DenseNN.train(
      x_training_batched,
      y_training_batched,
      params,
      epochs: epochs
    )
  end
end

{:module, Demo, <<70, 79, 82, 49, 0, 0, 18, ...>>, {:run, 1}}

params = Demo.run(backend: Torchx.Backend, epochs: 50)

[Time] init random params: 19.537 ms
[Time] load dataset: 8473.881 ms
Epoch 1 Time: 1.801972s, loss: 68.817, acc: 0.122
Epoch 2 Time: 1.526419s, loss: 64.754, acc: 0.183
Epoch 3 Time: 1.474312s, loss: 62.076, acc: 0.211
Epoch 4 Time: 1.463488s, loss: 60.709, acc: 0.247
Epoch 5 Time: 1.495513s, loss: 58.924, acc: 0.276
Epoch 6 Time: 1.50393s, loss: 57.685, acc: 0.296
Epoch 7 Time: 1.401566s, loss: 56.825, acc: 0.311
Epoch 8 Time: 1.552453s, loss: 56.041, acc: 0.325
Epoch 9 Time: 2.047746s, loss: 55.225, acc: 0.336
Epoch 10 Time: 1.500972s, loss: 54.438, acc: 0.346
Epoch 11 Time: 1.449009s, loss: 53.76, acc: 0.355
Epoch 12 Time: 1.456149s, loss: 53.171, acc: 0.361
Epoch 13 Time: 1.636981s, loss: 52.639, acc: 0.366
Epoch 14 Time: 1.519201s, loss: 52.153, acc: 0.372
Epoch 15 Time: 1.582178s, loss: 51.709, acc: 0.377
Epoch 16 Time: 1.524292s, loss: 51.303, acc: 0.381
Epoch 17 Time: 1.472452s, loss: 50.93, acc: 0.386
Epoch 18 Time: 1.500321s, loss: 50.584, acc: 0.39
Epoch 19 Time: 1.503371s, loss: 50.258, acc: 0.395
Epoch 20 Time: 1.504718s, loss: 49.949, acc: 0.399
Epoch 21 Time: 1.579238s, loss: 49.654, acc: 0.403
Epoch 22 Time: 1.571431s, loss: 49.373, acc: 0.407
Epoch 23 Time: 1.683039s, loss: 49.105, acc: 0.411
Epoch 24 Time: 1.526153s, loss: 48.852, acc: 0.414
Epoch 25 Time: 1.519478s, loss: 48.609, acc: 0.418
Epoch 26 Time: 1.417229s, loss: 48.374, acc: 0.42
Epoch 27 Time: 1.711816s, loss: 48.146, acc: 0.423
Epoch 28 Time: 1.628203s, loss: 47.925, acc: 0.427
Epoch 29 Time: 1.601898s, loss: 47.71, acc: 0.431
Epoch 30 Time: 1.552816s, loss: 47.503, acc: 0.434
Epoch 31 Time: 1.483635s, loss: 47.302, acc: 0.437
Epoch 32 Time: 1.500484s, loss: 47.104, acc: 0.44
Epoch 33 Time: 1.514314s, loss: 46.912, acc: 0.442
Epoch 34 Time: 1.560259s, loss: 46.731, acc: 0.445
Epoch 35 Time: 1.505326s, loss: 46.556, acc: 0.447
Epoch 36 Time: 1.492348s, loss: 46.388, acc: 0.45
Epoch 37 Time: 1.479754s, loss: 46.226, acc: 0.452
Epoch 38 Time: 1.556336s, loss: 46.069, acc: 0.454
Epoch 39 Time: 1.512166s, loss: 45.916, acc: 0.455
Epoch 40 Time: 1.525349s, loss: 45.765, acc: 0.458
Epoch 41 Time: 1.536424s, loss: 45.617, acc: 0.46
Epoch 42 Time: 1.496241s, loss: 45.656, acc: 0.46
Epoch 43 Time: 1.473638s, loss: 45.358, acc: 0.462
Epoch 44 Time: 1.504573s, loss: 45.18, acc: 0.465
Epoch 45 Time: 1.502319s, loss: 45.129, acc: 0.465
Epoch 46 Time: 1.515625s, loss: 45.035, acc: 0.466
Epoch 47 Time: 1.496521s, loss: 44.82, acc: 0.468
Epoch 48 Time: 1.501526s, loss: 44.838, acc: 0.468
Epoch 49 Time: 1.500548s, loss: 44.607, acc: 0.472
Epoch 50 Time: 1.522726s, loss: 44.57, acc: 0.471

{#Nx.Tensor<
   f32[input: 3072][layer1: 32]
   Torchx.Backend(cpu)
   [
     [0.06159350275993347, 0.12734365463256836, -0.04637990519404411, -0.14622245728969574, -0.026950793340802193, 0.18754519522190094, -0.11162003129720688, 0.03631016984581947, 0.06155526638031006, 0.05119727551937103, 0.12082687020301819, 0.0010204321006312966, -0.13074278831481934, -0.2162177860736847, -0.0529991090297699, -0.11709204316139221, 0.03308134153485298, 0.10344900190830231, -0.007962973788380623, 0.005867910571396351, 0.025203991681337357, -0.16794253885746002, -0.06448774039745331, 0.13841457664966583, -0.11046885699033737, 0.1314300000667572, 0.11232485622167587, 0.05331533029675484, -0.056031279265880585, -0.15944091975688934, 0.08177391439676285, -0.3181536793708801],
     [0.07542850077152252, 0.10426164418458939, -0.09150480479001999, 0.08607892692089081, -0.0802445337176323, 0.3077136278152466, 0.022843508049845695, -0.10194684565067291, 0.01955121010541916, 0.05440697446465492, 0.19110870361328125, -0.06551551818847656, 0.0012398258550092578, -0.07089567184448242, 0.005781807005405426, 0.032927487045526505, -0.15386459231376648, ...],
     ...
   ]
 >,
 #Nx.Tensor<
   f32[layer1: 32]
   Torchx.Backend(cpu)
   [-0.4782559275627136, -0.0953182652592659, 0.9538414478302002, -0.9563804864883423, 0.19684253633022308, 0.10464754700660706, 0.2017214596271515, 0.4398568272590637, 0.23314496874809265, 1.1341161727905273, -0.35619667172431946, 0.3896051347255707, -0.03676304966211319, -0.11970412731170654, 0.6443958282470703, 1.0974687337875366, -0.9757993817329407, -0.237301766872406, 0.6900271773338318, -1.0126398801803589, 0.8445910215377808, -0.06984522938728333, 0.6991291642189026, 0.4147650897502899, 0.5383307337760925, -1.6058013439178467, -0.6333990693092346, 0.9713459610939026, -1.1919199228286743, -0.6940388083457947, 0.43150636553764343, -0.07613875716924667]
 >,
 #Nx.Tensor<
   f32[layer1: 32][layer2: 24]
   Torchx.Backend(cpu)
   [
     [-0.33987560868263245, -0.2776806652545929, -0.3417806923389435, 0.2850123345851898, -0.08022978156805038, -0.5811548233032227, -0.17571184039115906, 0.018330495804548264, -0.13240738213062286, 0.46940329670906067, -0.38814595341682434, 0.344807505607605, -0.13484203815460205, 0.34233394265174866, 0.003809022717177868, -0.09918670356273651, -0.04311465099453926, 0.8633210062980652, 0.07081698626279831, -0.42742085456848145, 0.5656407475471497, -0.5184997320175171, -0.1400681883096695, -0.37492144107818604],
     [-0.18105199933052063, -0.26929351687431335, 0.455635666847229, -0.958427369594574, 0.6590504050254822, -0.9575876593589783, 0.03432007133960724, -0.3971480429172516, 0.2179064154624939, -0.31215599179267883, -0.15511885285377502, -0.7740356922149658, 0.7641487121582031, 0.0803070217370987, 0.2655712068080902, -0.23865076899528503, 0.5451679825782776, 0.16663742065429688, -0.08000250160694122, -0.42117956280708313, 0.026075761765241623, -0.07219810038805008, 0.5508838891983032, ...],
     ...
   ]
 >,
 #Nx.Tensor<
   f32[layer2: 24]
   Torchx.Backend(cpu)
   [0.12863606214523315, -0.39577043056488037, 0.2603394389152527, -0.4970460832118988, -0.12190719693899155, 0.11095257848501205, -0.11531135439872742, -0.055682189762592316, -0.013144372962415218, 0.13842496275901794, 0.05578012019395828, -0.47933924198150635, -0.05614984408020973, 0.03527414798736572, -0.3992805778980255, -0.11208709329366684, 0.13771165907382965, 0.0196288600564003, 0.008769847452640533, 0.34402191638946533, -0.20614822208881378, 0.12027487903833389, -0.06340263783931732, 0.12220388650894165]
 >,
 #Nx.Tensor<
   f32[layer2: 24][output: 10]
   Torchx.Backend(cpu)
   [
     [-0.7585468888282776, -0.21927006542682648, -0.4808247983455658, 0.5093653798103333, 0.19336795806884766, 0.8497358560562134, 0.7614853382110596, 0.9867469668388367, -0.18471986055374146, -1.8935502767562866],
     [-0.21348366141319275, -0.4031388461589813, 0.154790997505188, -1.302069902420044, 1.0026453733444214, -0.42505010962486267, 0.7698855400085449, 1.26364004611969, -0.5512898564338684, -0.5894452929496765],
     [-0.7507593631744385, 0.8991221189498901, -0.7759523391723633, -0.15009775757789612, -1.1441510915756226, 0.8113402128219604, -1.033116340637207, 0.48261716961860657, 0.3629790246486664, 1.512804388999939],
     [0.6820945143699646, -0.19330617785453796, 1.6458057165145874, -0.5821719765663147, 1.9896080493927002, -0.4230886399745941, -0.39437347650527954, -1.1041091680526733, -0.4087747037410736, -1.095003604888916],
     [0.32531166076660156, 0.8763105869293213, -0.4181594252586365, -0.3064834475517273, -0.9987258911132812, ...],
     ...
   ]
 >,
 #Nx.Tensor<
   f32[output: 10]
   Torchx.Backend(cpu)
   [-0.08992704749107361, 0.04459410160779953, -0.21699029207229614, -0.1284622699022293, -0.07085893303155899, 0.2854973077774048, -0.38408756256103516, 0.19632413983345032, 0.11082038283348083, -0.2601413130760193]
 >}

Classify the Test Image with the Neural Network

resized_img = Cv.resize(img, {128, 128})

Cv.imencode(".png", resized_img)
|> Kino.Image.new(:png)

classes = [:airplane, :automobile, :bird, :cat, :deer, :dog, :frog, :horse, :ship, :truck]

input_tensor =
  img
  |> Cv.resize({32, 32})
  |> Cv.Mat.to_nx(Nx.BinaryBackend)
  |> Nx.backend_transfer(Torchx.Backend)
  |> Nx.flatten()

pred =
  params
  |> DenseNN.predict(input_tensor)
  |> Nx.argmax()
  |> Nx.to_number()

Enum.at(classes, pred)

:cat

← Previous Page Sudoku Puzzle Extractor

Next Page → Evision.DNN Example - Object Detection Task with GoogleNet