View Source Evision.DNN Example - Object Detection Task with GoogleNet

# set `EVISION_PREFER_PRECOMPILED` to `false` 
# if you prefer `:evision` to be compiled from source
# note that to compile from source, you may need at least 1GB RAM
# System.put_env("EVISION_PREFER_PRECOMPILED", "false")

Mix.install([
  {:evision, "~> 0.1.21"},
  {:kino, "~> 0.7"},
  {:req, "~> 0.3"}
])
:ok

define-some-helper-functions

Define Some Helper Functions

defmodule Helper do
  def download!(url, save_as, overwrite? \\ false) do
    unless File.exists?(save_as) do
      Req.get!(url, http_errors: :raise, output: save_as, cache: not overwrite?)
    end

    :ok
  end
end
{:module, Helper, <<70, 79, 82, 49, 0, 0, 10, ...>>, {:download!, 3}}

download-googlenet-model-and-a-test-image

Download GoogLeNet Model and A Test Image

  • Model parameters. bvlc_googlenet.caffemodel
  • Model config. bvlc_googlenet.prototxt
  • List of class names. classification_classes_ILSVRC2012.txt
  • Test image. space_shuttle.jpg
# change to the file's directory
# or somewhere you have write permission
File.cd!(__DIR__)

Helper.download!(
  "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel",
  "bvlc_googlenet.caffemodel"
)

Helper.download!(
  "https://raw.githubusercontent.com/opencv/opencv_extra/master/testdata/dnn/bvlc_googlenet.prototxt",
  "bvlc_googlenet.prototxt"
)

Helper.download!(
  "https://raw.githubusercontent.com/opencv/opencv/master/samples/data/dnn/classification_classes_ILSVRC2012.txt",
  "classification_classes_ILSVRC2012.txt"
)

Helper.download!("https://docs.opencv.org/4.5.4/space_shuttle.jpg", "space_shuttle.jpg")
:ok

read-class-names

Read Class Names

classes =
  "classification_classes_ILSVRC2012.txt"
  |> File.read!()
  |> String.split("\n")
["tench, Tinca tinca", "goldfish, Carassius auratus",
 "great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias",
 "tiger shark, Galeocerdo cuvieri", "hammerhead, hammerhead shark",
 "electric ray, crampfish, numbfish, torpedo", "stingray", "cock", "hen",
 "ostrich, Struthio camelus", "brambling, Fringilla montifringilla",
 "goldfinch, Carduelis carduelis", "house finch, linnet, Carpodacus mexicanus", "junco, snowbird",
 "indigo bunting, indigo finch, indigo bird, Passerina cyanea",
 "robin, American robin, Turdus migratorius", "bulbul", "jay", "magpie", "chickadee",
 "water ouzel, dipper", "kite", "bald eagle, American eagle, Haliaeetus leucocephalus", "vulture",
 "great grey owl, great gray owl, Strix nebulosa",
 "European fire salamander, Salamandra salamandra", "common newt, Triturus vulgaris", "eft",
 "spotted salamander, Ambystoma maculatum", "axolotl, mud puppy, Ambystoma mexicanum",
 "bullfrog, Rana catesbeiana", "tree frog, tree-frog",
 "tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui",
 "loggerhead, loggerhead turtle, Caretta caretta",
 "leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea", "mud turtle", "terrapin",
 "box turtle, box tortoise", "banded gecko", "common iguana, iguana, Iguana iguana",
 "American chameleon, anole, Anolis carolinensis", "whiptail, whiptail lizard", "agama",
 "frilled lizard, Chlamydosaurus kingi", "alligator lizard", "Gila monster, Heloderma suspectum",
 "green lizard, Lacerta viridis", "African chameleon, Chamaeleo chamaeleon",
 "Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis",
 "African crocodile, Nile crocodile, Crocodylus niloticus", ...]

load-the-googlenet-model

Load the GoogLeNet Model

alias Evision, as: Cv

model =
  Cv.DNN.readNet("bvlc_googlenet.caffemodel",
    config: "bvlc_googlenet.prototxt",
    framework: ""
  )
%Evision.DNN.Net{ref: #Reference<0.4003430890.1440088085.55917>}

set-backend-and-target

Set Backend and Target

# "0: automatically (by default), "
# "1: Halide language (http://halide-lang.org/), "
# "2: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), "
# "3: OpenCV implementation, "
# "4: VKCOM, "
# "5: CUDA
model = Cv.DNN.Net.setPreferableBackend(model, 0)

# "0: CPU target (by default), "
# "1: OpenCL, "
# "2: OpenCL fp16 (half-float precision), "
# "3: VPU, "
# "4: Vulkan, "
# "6: CUDA, "
# "7: CUDA fp16 (half-float preprocess)
model = Cv.DNN.Net.setPreferableTarget(model, 0)
%Evision.DNN.Net{ref: #Reference<0.4003430890.1440088085.55917>}

read-the-test-image-and-set-it-as-the-input

Read the Test Image and Set It as the Input

mat = Cv.imread("space_shuttle.jpg")

blob =
  Cv.DNN.blobFromImage(mat,
    scalefactor: 1,
    swapRB: true,
    mean: [-104, -117, -123],
    size: [224, 224]
  )

model = Cv.DNN.Net.setInput(model, blob, name: "", scalefactor: 1.0, mean: [0, 0, 0])
%Evision.DNN.Net{ref: #Reference<0.4003430890.1440088085.55917>}

run-the-forward-function

Run the Forward Function

start_time = :os.system_time(:millisecond)
pred = Cv.DNN.Net.forward(model, outputName: "")
end_time = :os.system_time(:millisecond)
"Inference time=>#{end_time - start_time} ms"
"Inference time=>49 ms"

get-the-classification-result

Get the Classification Result

pred = pred |> Cv.Mat.to_nx(Nx.BinaryBackend)
pred_class_id = pred |> Nx.argmax() |> Nx.to_flat_list() |> Enum.at(0)

confidence =
  pred
  |> Nx.take(Nx.tensor(pred_class_id), axis: 1)
  |> Nx.to_flat_list()
  |> Enum.at(0)

class_label =
  classes
  |> Enum.at(pred_class_id)

"Predict result: #{class_label}=>#{Float.round(confidence * 100, 2)}"
"Predict result: space shuttle=>99.13"