View Source Evision.DNN Example - Object Detection Task with GoogleNet

# set `EVISION_PREFER_PRECOMPILED` to `false` 
# if you prefer `:evision` to be compiled from source
# note that to compile from source, you may need at least 1GB RAM
# System.put_env("EVISION_PREFER_PRECOMPILED", "false")

Mix.install([
  {:evision, "~> 0.2"},
  {:kino, "~> 0.7"},
  {:req, "~> 0.3"}
], system_env: [
  # optional, defaults to `true`
  # set `EVISION_PREFER_PRECOMPILED` to `false`
  # if you prefer `:evision` to be compiled from source
  # note that to compile from source, you may need at least 1GB RAM
  {"EVISION_PREFER_PRECOMPILED", true},

  # optional, defaults to `true`
  # set `EVISION_ENABLE_CONTRIB` to `false`
  # if you don't need modules from `opencv_contrib`
  {"EVISION_ENABLE_CONTRIB", true},

  # optional, defaults to `false`
  # set `EVISION_ENABLE_CUDA` to `true`
  # if you wish to use CUDA related functions
  # note that `EVISION_ENABLE_CONTRIB` also has to be `true`
  # because cuda related modules come from the `opencv_contrib` repo
  {"EVISION_ENABLE_CUDA", false},

  # required when 
  # - `EVISION_ENABLE_CUDA` is `true`
  # - and `EVISION_PREFER_PRECOMPILED` is `true`
  #
  # set `EVISION_CUDA_VERSION` to the version that matches 
  # your local CUDA runtime version
  #
  # current available versions are
  # - 118
  # - 121
  {"EVISION_CUDA_VERSION", "118"},

  # require for Windows users when 
  # - `EVISION_ENABLE_CUDA` is `true`
  # set `EVISION_CUDA_RUNTIME_DIR` to the directory that contains
  # CUDA runtime libraries
  {"EVISION_CUDA_RUNTIME_DIR", "C:/PATH/TO/CUDA/RUNTIME"}
])
:ok

Define Some Helper Functions

defmodule Helper do
  def download!(url, save_as, overwrite? \\ false) do
    unless File.exists?(save_as) do
      Req.get!(url, http_errors: :raise, output: save_as, cache: not overwrite?)
    end

    :ok
  end
end
{:module, Helper, <<70, 79, 82, 49, 0, 0, 10, ...>>, {:download!, 3}}

Download GoogLeNet Model and A Test Image

  • Model parameters. bvlc_googlenet.caffemodel
  • Model config. bvlc_googlenet.prototxt
  • List of class names. classification_classes_ILSVRC2012.txt
  • Test image. space_shuttle.jpg
# change to the file's directory
# or somewhere you have write permission
File.cd!(__DIR__)

Helper.download!(
  "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel",
  "bvlc_googlenet.caffemodel"
)

Helper.download!(
  "https://raw.githubusercontent.com/opencv/opencv_extra/master/testdata/dnn/bvlc_googlenet.prototxt",
  "bvlc_googlenet.prototxt"
)

Helper.download!(
  "https://raw.githubusercontent.com/opencv/opencv/master/samples/data/dnn/classification_classes_ILSVRC2012.txt",
  "classification_classes_ILSVRC2012.txt"
)

Helper.download!("https://docs.opencv.org/4.5.4/space_shuttle.jpg", "space_shuttle.jpg")
:ok

Read Class Names

classes =
  "classification_classes_ILSVRC2012.txt"
  |> File.read!()
  |> String.split("\n")
["tench, Tinca tinca", "goldfish, Carassius auratus",
 "great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias",
 "tiger shark, Galeocerdo cuvieri", "hammerhead, hammerhead shark",
 "electric ray, crampfish, numbfish, torpedo", "stingray", "cock", "hen",
 "ostrich, Struthio camelus", "brambling, Fringilla montifringilla",
 "goldfinch, Carduelis carduelis", "house finch, linnet, Carpodacus mexicanus", "junco, snowbird",
 "indigo bunting, indigo finch, indigo bird, Passerina cyanea",
 "robin, American robin, Turdus migratorius", "bulbul", "jay", "magpie", "chickadee",
 "water ouzel, dipper", "kite", "bald eagle, American eagle, Haliaeetus leucocephalus", "vulture",
 "great grey owl, great gray owl, Strix nebulosa",
 "European fire salamander, Salamandra salamandra", "common newt, Triturus vulgaris", "eft",
 "spotted salamander, Ambystoma maculatum", "axolotl, mud puppy, Ambystoma mexicanum",
 "bullfrog, Rana catesbeiana", "tree frog, tree-frog",
 "tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui",
 "loggerhead, loggerhead turtle, Caretta caretta",
 "leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea", "mud turtle", "terrapin",
 "box turtle, box tortoise", "banded gecko", "common iguana, iguana, Iguana iguana",
 "American chameleon, anole, Anolis carolinensis", "whiptail, whiptail lizard", "agama",
 "frilled lizard, Chlamydosaurus kingi", "alligator lizard", "Gila monster, Heloderma suspectum",
 "green lizard, Lacerta viridis", "African chameleon, Chamaeleo chamaeleon",
 "Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis",
 "African crocodile, Nile crocodile, Crocodylus niloticus", ...]

Load the GoogLeNet Model

alias Evision, as: Cv

model =
  Cv.DNN.readNet("bvlc_googlenet.caffemodel",
    config: "bvlc_googlenet.prototxt",
    framework: ""
  )
%Evision.DNN.Net{ref: #Reference<0.4003430890.1440088085.55917>}

Set Backend and Target

# "0: automatically (by default), "
# "1: Halide language (http://halide-lang.org/), "
# "2: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), "
# "3: OpenCV implementation, "
# "4: VKCOM, "
# "5: CUDA
model = Cv.DNN.Net.setPreferableBackend(model, 0)

# "0: CPU target (by default), "
# "1: OpenCL, "
# "2: OpenCL fp16 (half-float precision), "
# "3: VPU, "
# "4: Vulkan, "
# "6: CUDA, "
# "7: CUDA fp16 (half-float preprocess)
model = Cv.DNN.Net.setPreferableTarget(model, 0)
%Evision.DNN.Net{ref: #Reference<0.4003430890.1440088085.55917>}

Read the Test Image and Set It as the Input

mat = Cv.imread("space_shuttle.jpg")

blob =
  Cv.DNN.blobFromImage(mat,
    scalefactor: 1,
    swapRB: true,
    mean: [-104, -117, -123],
    size: [224, 224]
  )

model = Cv.DNN.Net.setInput(model, blob, name: "", scalefactor: 1.0, mean: [0, 0, 0])
%Evision.DNN.Net{ref: #Reference<0.4003430890.1440088085.55917>}

Run the Forward Function

start_time = :os.system_time(:millisecond)
pred = Cv.DNN.Net.forward(model, outputName: "")
end_time = :os.system_time(:millisecond)
"Inference time=>#{end_time - start_time} ms"
"Inference time=>49 ms"

Get the Classification Result

pred = pred |> Cv.Mat.to_nx(Nx.BinaryBackend)
pred_class_id = pred |> Nx.argmax() |> Nx.to_flat_list() |> Enum.at(0)

confidence =
  pred
  |> Nx.take(Nx.tensor(pred_class_id), axis: 1)
  |> Nx.to_flat_list()
  |> Enum.at(0)

class_label =
  classes
  |> Enum.at(pred_class_id)

"Predict result: #{class_label}=>#{Float.round(confidence * 100, 2)}"
"Predict result: space shuttle=>99.13"