Custom Evaluators

View Source

The evaluator is where your domain-specific logic lives. This guide explains how to implement the neuroevolution_evaluator behaviour for different use cases.

The Behaviour

-callback evaluate(Individual, Options) -> {ok, UpdatedIndividual} | {error, Reason} when
    Individual :: individual(),
    Options :: map(),
    UpdatedIndividual :: individual(),
    Reason :: term().

-callback calculate_fitness(Metrics) -> Fitness when
    Metrics :: map(),
    Fitness :: float().

Basic Structure

-module(my_evaluator).
-behaviour(neuroevolution_evaluator).

-include_lib("macula_neuroevolution/include/neuroevolution.hrl").

-export([evaluate/2, calculate_fitness/1]).

evaluate(Individual, Options) ->
    %% 1. Extract the network
    Network = Individual#individual.network,

    %% 2. Run your evaluation logic
    Results = run_evaluation(Network, Options),

    %% 3. Update individual with metrics
    UpdatedIndividual = Individual#individual{
        metrics = results_to_metrics(Results)
    },
    {ok, UpdatedIndividual}.

calculate_fitness(Metrics) ->
    %% Convert metrics to a single fitness score
    %% Higher is better
    compute_fitness(Metrics).

Example: Game AI Evaluator

Here's a complete example for a game-playing AI:

-module(game_evaluator).
-behaviour(neuroevolution_evaluator).

-include_lib("macula_neuroevolution/include/neuroevolution.hrl").

-export([evaluate/2, calculate_fitness/1]).

%% Evaluate by playing multiple games
evaluate(Individual, Options) ->
    Network = Individual#individual.network,
    NumGames = maps:get(games_per_eval, Options, 10),

    %% Play multiple games to reduce variance
    GameResults = [play_game(Network) || _ <- lists:seq(1, NumGames)],

    %% Aggregate results
    TotalScore = lists:sum([R#game_result.score || R <- GameResults]),
    TotalMoves = lists:sum([R#game_result.moves || R <- GameResults]),
    Wins = length([R || R <- GameResults, R#game_result.won]),

    UpdatedIndividual = Individual#individual{
        metrics = #{
            total_score => TotalScore,
            total_moves => TotalMoves,
            wins => Wins,
            games_played => NumGames
        }
    },
    {ok, UpdatedIndividual}.

%% Multi-objective fitness
calculate_fitness(Metrics) ->
    Score = maps:get(total_score, Metrics, 0),
    Moves = maps:get(total_moves, Metrics, 0),
    Wins = maps:get(wins, Metrics, 0),

    %% Weighted combination
    Score * 10.0 + Moves * 0.1 + Wins * 100.0.

%% Internal: Play one game
play_game(Network) ->
    InitialState = game:new(),
    play_loop(Network, InitialState, 0, 0).

play_loop(Network, State, Score, Moves) ->
    case game:is_over(State) of
        true ->
            #game_result{
                score = Score,
                moves = Moves,
                won = game:is_won(State)
            };
        false ->
            %% Get game state as network input
            Input = game:to_input_vector(State),

            %% Get network decision
            Output = network_evaluator:propagate(Network, Input),
            Action = output_to_action(Output),

            %% Apply action
            {NewState, Reward} = game:step(State, Action),
            play_loop(Network, NewState, Score + Reward, Moves + 1)
    end.

output_to_action(Output) ->
    %% Convert network output to discrete action
    %% e.g., argmax for classification
    {MaxVal, MaxIdx} = lists:foldl(
        fun({Val, Idx}, {BestVal, BestIdx}) ->
            case Val > BestVal of
                true -> {Val, Idx};
                false -> {BestVal, BestIdx}
            end
        end,
        {hd(Output), 0},
        lists:zip(tl(Output), lists:seq(1, length(Output) - 1))
    ),
    MaxIdx.

Example: Function Approximation

For supervised learning / function approximation:

-module(regression_evaluator).
-behaviour(neuroevolution_evaluator).

-include_lib("macula_neuroevolution/include/neuroevolution.hrl").

-export([evaluate/2, calculate_fitness/1]).

evaluate(Individual, Options) ->
    Network = Individual#individual.network,
    Dataset = maps:get(dataset, Options),

    %% Evaluate on all samples
    Errors = lists:map(
        fun({Input, Expected}) ->
            Output = network_evaluator:propagate(Network, Input),
            mean_squared_error(Output, Expected)
        end,
        Dataset
    ),

    MeanError = lists:sum(Errors) / length(Errors),

    UpdatedIndividual = Individual#individual{
        metrics = #{
            mean_error => MeanError,
            samples => length(Dataset)
        }
    },
    {ok, UpdatedIndividual}.

%% Lower error = higher fitness
calculate_fitness(Metrics) ->
    Error = maps:get(mean_error, Metrics, 1.0),
    %% Convert to fitness (higher is better)
    1.0 / (1.0 + Error).

mean_squared_error(Output, Expected) ->
    Diffs = lists:zipwith(fun(O, E) -> (O - E) * (O - E) end, Output, Expected),
    lists:sum(Diffs) / length(Diffs).

Example: Parallel Evaluation with Timeout

For expensive evaluations:

-module(parallel_evaluator).
-behaviour(neuroevolution_evaluator).

-include_lib("macula_neuroevolution/include/neuroevolution.hrl").

-export([evaluate/2, calculate_fitness/1]).

evaluate(Individual, Options) ->
    Network = Individual#individual.network,
    Timeout = maps:get(timeout_ms, Options, 5000),

    %% Spawn evaluation in separate process
    Self = self(),
    Ref = make_ref(),

    spawn_link(fun() ->
        Result = run_expensive_evaluation(Network),
        Self ! {eval_result, Ref, Result}
    end),

    %% Wait with timeout
    receive
        {eval_result, Ref, Result} ->
            UpdatedIndividual = Individual#individual{
                metrics = Result
            },
            {ok, UpdatedIndividual}
    after Timeout ->
        %% Timeout - return poor fitness
        UpdatedIndividual = Individual#individual{
            metrics = #{timeout => true, score => 0}
        },
        {ok, UpdatedIndividual}
    end.

calculate_fitness(Metrics) ->
    case maps:get(timeout, Metrics, false) of
        true -> 0.0;  % Penalize timeouts
        false -> maps:get(score, Metrics, 0.0)
    end.

run_expensive_evaluation(Network) ->
    %% Your expensive computation here
    #{score => compute_score(Network)}.

Best Practices

1. Multiple Evaluations

Run each network multiple times to reduce variance:

NumTrials = maps:get(trials, Options, 10),
Results = [evaluate_once(Network) || _ <- lists:seq(1, NumTrials)],
AggregatedMetrics = aggregate(Results).

2. Normalize Fitness

Keep fitness values in a reasonable range:

calculate_fitness(Metrics) ->
    RawScore = maps:get(score, Metrics, 0),
    %% Normalize to [0, 1] or similar range
    math:tanh(RawScore / 1000.0).

3. Multi-Objective Fitness

Combine multiple objectives carefully:

calculate_fitness(Metrics) ->
    Score = maps:get(score, Metrics, 0),
    Efficiency = maps:get(efficiency, Metrics, 0),
    Safety = maps:get(safety, Metrics, 1),

    %% Weighted sum with constraints
    case Safety < 0.5 of
        true -> 0.0;  % Safety constraint
        false -> Score * 0.7 + Efficiency * 0.3
    end.

4. Handle Errors Gracefully

evaluate(Individual, Options) ->
    try
        do_evaluation(Individual, Options)
    catch
        _:Reason ->
            error_logger:warning_msg("Evaluation failed: ~p~n", [Reason]),
            %% Return with zero fitness
            {ok, Individual#individual{metrics = #{error => true}}}
    end.

Fitness Function Design

The fitness function is critical to evolutionary success:

  • Higher is better - The selection algorithms expect higher fitness = better performance
  • Smooth gradients - Avoid cliff functions where small changes cause big fitness jumps
  • Discriminating - Even poor solutions should have slightly different fitness
  • Reward partial success - Don't only reward complete solutions

Example of incremental fitness:

calculate_fitness(Metrics) ->
    %% Reward any progress, not just winning
    DistanceTraveled = maps:get(distance, Metrics, 0),
    CollectedItems = maps:get(items, Metrics, 0),
    ReachedGoal = maps:get(goal_reached, Metrics, false),

    BaseFitness = DistanceTraveled * 0.1 + CollectedItems * 10.0,
    case ReachedGoal of
        true -> BaseFitness + 1000.0;  % Big bonus for goal
        false -> BaseFitness
    end.