v0.6.1 - Modular Evolution Components

View Source

Overview

This release refactors population evolution into modular DXNN2-aligned components. Extract inline selection and fitness logic into separate modules, and add proper species management and crossover operations.

Phase: Structural (completion) Duration: 1-2 weeks Prerequisites: v0.6.0 (population_monitor baseline)

Objectives

  1. Extract selection algorithms to dedicated module
  2. Extract fitness postprocessing to dedicated module
  3. Implement species identification and management
  4. Add crossover/recombination operators
  5. Improve modularity and DXNN2 alignment
  6. Maintain comprehensive test coverage

Modules to Create

1. selection_algorithm.erl

Purpose: Pluggable selection strategies for survivor selection.

DXNN2 Alignment: Matches DXNN2's selection_algorithm.erl module structure.

Exports:

-module(selection_algorithm).

-export([
    competition/2,
    top_x/2,
    steady_state/2,
    tournament/3
]).

%% @doc Competition-based selection (top X%).
%%
%% Sorts agents by fitness and selects top performers.
%% This is the default DXNN2 selection strategy.
%%
%% @param AgentFitnesses list of {AgentId, Fitness} tuples
%% @param SurvivalRate fraction to keep (0.0-1.0)
%% @returns list of surviving AgentIds
-spec competition([{term(), [float()]}], float()) -> [term()].
competition(AgentFitnesses, SurvivalRate) ->
    %% Sort by fitness descending
    Sorted = lists:sort(
        fun({_, F1}, {_, F2}) ->
            sum_fitness(F1) >= sum_fitness(F2)
        end,
        AgentFitnesses
    ),

    NumSurvivors = max(1, round(length(AgentFitnesses) * SurvivalRate)),
    Survivors = lists:sublist(Sorted, NumSurvivors),
    [AgentId || {AgentId, _} <- Survivors].

%% @doc Top-X selection with absolute count.
-spec top_x([{term(), [float()]}], pos_integer()) -> [term()].
top_x(AgentFitnesses, Count) ->
    Sorted = lists:sort(
        fun({_, F1}, {_, F2}) ->
            sum_fitness(F1) >= sum_fitness(F2)
        end,
        AgentFitnesses
    ),
    Survivors = lists:sublist(Sorted, Count),
    [AgentId || {AgentId, _} <- Survivors].

%% @doc Steady-state selection (replace worst).
%%
%% Keeps top performers and replaces worst with new offspring.
%% Useful for online/continuous evolution.
-spec steady_state([{term(), [float()]}], float()) -> [term()].
steady_state(AgentFitnesses, ReplacementRate) ->
    %% Keep 1 - ReplacementRate of top agents
    SurvivalRate = 1.0 - ReplacementRate,
    competition(AgentFitnesses, SurvivalRate).

%% @doc Tournament selection.
%%
%% Randomly selects K agents, picks best.
%% Repeats until desired number of survivors selected.
%%
%% @param AgentFitnesses list of {AgentId, Fitness}
%% @param TournamentSize number of agents per tournament
%% @param NumSurvivors desired number of survivors
%% @returns list of surviving AgentIds
-spec tournament([{term(), [float()]}], pos_integer(), pos_integer()) -> [term()].
tournament(AgentFitnesses, TournamentSize, NumSurvivors) ->
    run_tournaments(AgentFitnesses, TournamentSize, NumSurvivors, []).

%% @private Run multiple tournaments
run_tournaments(_AgentFitnesses, _TournamentSize, 0, Acc) ->
    Acc;
run_tournaments(AgentFitnesses, TournamentSize, Remaining, Acc) ->
    %% Select random K agents
    Candidates = select_random_n(AgentFitnesses, TournamentSize),

    %% Pick best from tournament
    Winner = lists:foldl(
        fun({Id, F}, {BestId, BestF}) ->
            case sum_fitness(F) > sum_fitness(BestF) of
                true -> {Id, F};
                false -> {BestId, BestF}
            end
        end,
        hd(Candidates),
        tl(Candidates)
    ),

    {WinnerId, _} = Winner,
    run_tournaments(AgentFitnesses, TournamentSize, Remaining - 1, [WinnerId | Acc]).

%% @private Select N random elements
select_random_n(List, N) when N >= length(List) ->
    List;
select_random_n(List, N) ->
    Shuffled = [X || {_, X} <- lists:sort([{rand:uniform(), E} || E <- List])],
    lists:sublist(Shuffled, N).

%% @private Sum fitness vector
sum_fitness(Fitness) ->
    lists:sum(Fitness).

Tests: selection_algorithm_tests.erl

  • Test each selection strategy
  • Verify fitness ordering
  • Test edge cases (empty, single agent)

2. fitness_postprocessor.erl

Purpose: Transform and normalize fitness values before selection.

DXNN2 Alignment: Matches DXNN2's fitness_postprocessor.erl module.

Exports:

-module(fitness_postprocessor).

-export([
    none/1,
    size_proportional/1,
    age_proportional/1,
    normalize/1,
    pareto_dominance/1
]).

%% @doc No postprocessing - fitness as-is.
-spec none([{term(), [float()]}]) -> [{term(), [float()]}].
none(AgentFitnesses) ->
    AgentFitnesses.

%% @doc Size-proportional fitness adjustment.
%%
%% Penalizes larger networks to encourage parsimony.
%% Adjusted fitness = raw fitness / (1 + size_penalty * network_size)
-spec size_proportional([{term(), [float()]}]) -> [{term(), [float()]}].
size_proportional(AgentFitnesses) ->
    lists:map(
        fun({AgentId, Fitness}) ->
            Agent = genotype:dirty_read({agent, AgentId}),
            NetworkSize = calculate_network_size(Agent),
            SizePenalty = 0.01,  % Configurable

            AdjustedFitness = lists:map(
                fun(F) -> F / (1.0 + SizePenalty * NetworkSize) end,
                Fitness
            ),
            {AgentId, AdjustedFitness}
        end,
        AgentFitnesses
    ).

%% @doc Age-proportional fitness adjustment.
%%
%% Rewards younger agents to encourage novelty.
-spec age_proportional([{term(), [float()]}]) -> [{term(), [float()]}].
age_proportional(AgentFitnesses) ->
    lists:map(
        fun({AgentId, Fitness}) ->
            Agent = genotype:dirty_read({agent, AgentId}),
            Generation = Agent#agent.generation,
            AgePenalty = 0.005,

            AdjustedFitness = lists:map(
                fun(F) -> F / (1.0 + AgePenalty * Generation) end,
                Fitness
            ),
            {AgentId, AdjustedFitness}
        end,
        AgentFitnesses
    ).

%% @doc Normalize fitness to [0, 1] range.
-spec normalize([{term(), [float()]}]) -> [{term(), [float()]}].
normalize([]) ->
    [];
normalize(AgentFitnesses) ->
    %% Find min/max for each objective
    NumObjectives = length(element(2, hd(AgentFitnesses))),

    %% Extract fitness vectors
    AllFitnesses = [F || {_, F} <- AgentFitnesses],

    %% Calculate min/max per objective
    MinMax = lists:map(
        fun(ObjIdx) ->
            Values = [lists:nth(ObjIdx, F) || F <- AllFitnesses],
            {lists:min(Values), lists:max(Values)}
        end,
        lists:seq(1, NumObjectives)
    ),

    %% Normalize each agent
    lists:map(
        fun({AgentId, Fitness}) ->
            Normalized = lists:zipwith(
                fun(Value, {Min, Max}) ->
                    case Max - Min of
                        0 -> 0.5;  % All same
                        Range -> (Value - Min) / Range
                    end
                end,
                Fitness,
                MinMax
            ),
            {AgentId, Normalized}
        end,
        AgentFitnesses
    ).

%% @doc Pareto dominance ranking for multi-objective optimization.
%%
%% Assigns dominance rank (lower is better).
%% Non-dominated solutions get rank 1.
-spec pareto_dominance([{term(), [float()]}]) -> [{term(), [float()]}].
pareto_dominance(AgentFitnesses) ->
    %% Assign dominance ranks
    WithRanks = assign_dominance_ranks(AgentFitnesses),

    %% Convert ranks to fitness (inverse rank)
    lists:map(
        fun({AgentId, _Fitness, Rank}) ->
            %% Lower rank = better, so use inverse
            AdjustedFitness = [1.0 / float(Rank)],
            {AgentId, AdjustedFitness}
        end,
        WithRanks
    ).

%% @private Assign Pareto dominance ranks
assign_dominance_ranks(AgentFitnesses) ->
    assign_ranks(AgentFitnesses, [], 1).

assign_ranks([], Ranked, _Rank) ->
    Ranked;
assign_ranks(Remaining, Ranked, Rank) ->
    %% Find non-dominated solutions in Remaining
    NonDominated = find_non_dominated(Remaining),

    %% Tag with rank
    Tagged = [{Id, F, Rank} || {Id, F} <- NonDominated],

    %% Remove from Remaining
    RemainingIds = [Id || {Id, _} <- NonDominated],
    NewRemaining = [{Id, F} || {Id, F} <- Remaining, not lists:member(Id, RemainingIds)],

    assign_ranks(NewRemaining, Ranked ++ Tagged, Rank + 1).

%% @private Find non-dominated solutions
find_non_dominated(AgentFitnesses) ->
    lists:filter(
        fun({_Id, Fitness}) ->
            %% Check if any other solution dominates this one
            not lists:any(
                fun({_OtherId, OtherFitness}) ->
                    dominates(OtherFitness, Fitness)
                end,
                AgentFitnesses
            )
        end,
        AgentFitnesses
    ).

%% @private Check if F1 dominates F2
dominates(F1, F2) ->
    %% F1 dominates F2 if F1 is >= F2 in all objectives
    %% and strictly > in at least one
    AllGreaterOrEqual = lists:all(
        fun({V1, V2}) -> V1 >= V2 end,
        lists:zip(F1, F2)
    ),

    AnyStrictlyGreater = lists:any(
        fun({V1, V2}) -> V1 > V2 end,
        lists:zip(F1, F2)
    ),

    AllGreaterOrEqual andalso AnyStrictlyGreater.

%% @private Calculate network size (neuron count)
calculate_network_size(Agent) ->
    Cortex = genotype:dirty_read({cortex, Agent#agent.cx_id}),
    length(Cortex#cortex.neuron_ids).

Tests: fitness_postprocessor_tests.erl

  • Test each postprocessing strategy
  • Verify size/age penalties
  • Test normalization edge cases
  • Test Pareto dominance ranking

3. species_identifier.erl

Purpose: Identify and manage species based on behavioral/structural similarity.

DXNN2 Alignment: Matches DXNN2's specie_identifier.erl module.

Exports:

-module(species_identifier).

-export([
    identify_species/2,
    calculate_distance/2,
    update_species_map/2,
    calculate_species_fitness/2
]).

%% @doc Assign agent to species based on similarity threshold.
%%
%% Uses behavioral fingerprint to determine species membership.
%% Creates new species if no similar species exists.
%%
%% @param AgentId agent to assign
%% @param SpeciesMap current species assignments
%% @returns {SpecieId, UpdatedSpeciesMap}
-spec identify_species(term(), #{term() => [term()]}) ->
    {term(), #{term() => [term()]}}.
identify_species(AgentId, SpeciesMap) ->
    Agent = genotype:dirty_read({agent, AgentId}),
    Fingerprint = Agent#agent.fingerprint,

    %% Find closest species
    case find_closest_species(Fingerprint, SpeciesMap) of
        {SpecieId, Distance} when Distance < 3.0 ->
            %% Join existing species
            Members = maps:get(SpecieId, SpeciesMap, []),
            UpdatedMap = maps:put(SpecieId, [AgentId | Members], SpeciesMap),
            {SpecieId, UpdatedMap};
        _ ->
            %% Create new species
            NewSpecieId = genotype:generate_UniqueId(),
            UpdatedMap = maps:put(NewSpecieId, [AgentId], SpeciesMap),
            {NewSpecieId, UpdatedMap}
    end.

%% @doc Calculate distance between two agents.
%%
%% Uses genotype fingerprint for structural distance.
-spec calculate_distance(term(), term()) -> float().
calculate_distance(AgentId1, AgentId2) ->
    Agent1 = genotype:dirty_read({agent, AgentId1}),
    Agent2 = genotype:dirty_read({agent, AgentId2}),

    FP1 = Agent1#agent.fingerprint,
    FP2 = Agent2#agent.fingerprint,

    fingerprint_distance(FP1, FP2).

%% @doc Update species map after evolution.
%%
%% Removes extinct species, updates membership.
-spec update_species_map(#{term() => [term()]}, [term()]) ->
    #{term() => [term()]}.
update_species_map(SpeciesMap, LivingAgents) ->
    %% Remove extinct species
    maps:filter(
        fun(_SpecieId, Members) ->
            %% Keep species with living members
            lists:any(fun(M) -> lists:member(M, LivingAgents) end, Members)
        end,
        SpeciesMap
    ).

%% @doc Calculate average fitness per species.
-spec calculate_species_fitness(#{term() => [term()]}, [{term(), [float()]}]) ->
    #{term() => [float()]}.
calculate_species_fitness(SpeciesMap, AgentFitnesses) ->
    FitnessMap = maps:from_list(AgentFitnesses),

    maps:map(
        fun(_SpecieId, Members) ->
            %% Get fitness for all members
            MemberFitnesses = [maps:get(M, FitnessMap) || M <- Members, maps:is_key(M, FitnessMap)],

            case MemberFitnesses of
                [] -> [0.0];
                _ -> average_fitness(MemberFitnesses)
            end
        end,
        SpeciesMap
    ).

%% @private Find closest species to given fingerprint
find_closest_species(_Fingerprint, SpeciesMap) when map_size(SpeciesMap) == 0 ->
    {undefined, infinity};
find_closest_species(Fingerprint, SpeciesMap) ->
    %% Get representative from each species (first member)
    Reps = [{SpecieId, hd(Members)} || {SpecieId, Members} <- maps:to_list(SpeciesMap)],

    %% Calculate distance to each
    Distances = lists:map(
        fun({SpecieId, RepId}) ->
            Rep = genotype:dirty_read({agent, RepId}),
            Distance = fingerprint_distance(Fingerprint, Rep#agent.fingerprint),
            {SpecieId, Distance}
        end,
        Reps
    ),

    %% Find minimum
    lists:foldl(
        fun({SId, D}, {BestSId, BestD}) ->
            case D < BestD of
                true -> {SId, D};
                false -> {BestSId, BestD}
            end
        end,
        hd(Distances),
        tl(Distances)
    ).

%% @private Calculate distance between fingerprints
fingerprint_distance(FP1, FP2) ->
    %% Simple Euclidean distance on fingerprint tuples
    case {FP1, FP2} of
        {undefined, _} -> infinity;
        {_, undefined} -> infinity;
        _ ->
            %% FP is a list of values
            Diffs = lists:zipwith(fun(A, B) -> (A - B) * (A - B) end, FP1, FP2),
            math:sqrt(lists:sum(Diffs))
    end.

%% @private Average fitness vectors
average_fitness(Fitnesses) ->
    NumObjectives = length(hd(Fitnesses)),

    lists:map(
        fun(ObjIdx) ->
            Values = [lists:nth(ObjIdx, F) || F <- Fitnesses],
            lists:sum(Values) / length(Values)
        end,
        lists:seq(1, NumObjectives)
    ).

Tests: species_identifier_tests.erl

  • Test species assignment
  • Test distance calculation
  • Test species map updates
  • Test species fitness calculation

4. crossover.erl

Purpose: Sexual reproduction via genome recombination.

DXNN2 Note: DXNN2 primarily uses mutation, but crossover is mentioned in advanced chapters.

Exports:

-module(crossover).

-export([
    crossover/2,
    neuron_crossover/2,
    weight_crossover/2
]).

%% @doc Perform crossover between two parent agents.
%%
%% Creates offspring by combining genetic material from both parents.
%% Returns new agent ID.
%%
%% @param ParentId1 first parent
%% @param ParentId2 second parent
%% @returns {ok, OffspringId} | {error, Reason}
-spec crossover(term(), term()) -> {ok, term()} | {error, term()}.
crossover(ParentId1, ParentId2) ->
    Parent1 = genotype:dirty_read({agent, ParentId1}),
    Parent2 = genotype:dirty_read({agent, ParentId2}),

    %% Must have compatible morphologies
    case Parent1#agent.constraint#constraint.morphology ==
         Parent2#agent.constraint#constraint.morphology of
        false ->
            {error, incompatible_morphology};
        true ->
            %% Clone parent1 as base
            OffspringId = genotype:clone_Agent(ParentId1),

            %% Crossover neurons from parent2
            perform_neuron_crossover(OffspringId, ParentId2),

            {ok, OffspringId}
    end.

%% @doc Crossover at neuron level.
%%
%% Randomly selects neurons from parent2 to replace in offspring.
-spec neuron_crossover(term(), term()) -> ok.
neuron_crossover(OffspringId, DonorParentId) ->
    Offspring = genotype:dirty_read({agent, OffspringId}),
    OffspringCortex = genotype:dirty_read({cortex, Offspring#agent.cx_id}),

    Donor = genotype:dirty_read({agent, DonorParentId}),
    DonorCortex = genotype:dirty_read({cortex, Donor#agent.cx_id}),

    %% For each neuron in offspring, 50% chance to replace with donor
    lists:foreach(
        fun(NeuronId) ->
            case rand:uniform() < 0.5 of
                true ->
                    %% Find corresponding neuron in donor (same layer)
                    case find_matching_neuron(NeuronId, DonorCortex#cortex.neuron_ids) of
                        {ok, DonorNeuronId} ->
                            copy_neuron_weights(DonorNeuronId, NeuronId);
                        error ->
                            ok
                    end;
                false ->
                    ok
            end
        end,
        OffspringCortex#cortex.neuron_ids
    ),
    ok.

%% @doc Crossover at weight level.
%%
%% Performs uniform crossover on synaptic weights.
-spec weight_crossover(term(), term()) -> ok.
weight_crossover(NeuronId1, NeuronId2) ->
    Neuron1 = genotype:dirty_read({neuron, NeuronId1}),
    Neuron2 = genotype:dirty_read({neuron, NeuronId2}),

    %% Crossover weights for matching inputs
    NewInputIdps = lists:zipwith(
        fun({FromId1, Weights1}, {FromId2, Weights2}) ->
            %% Only crossover if same source
            case FromId1 == FromId2 of
                true ->
                    %% Uniform crossover at weight level
                    CrossedWeights = lists:zipwith(
                        fun(W1, W2) ->
                            case rand:uniform() < 0.5 of
                                true -> W1;
                                false -> W2
                            end
                        end,
                        Weights1,
                        Weights2
                    ),
                    {FromId1, CrossedWeights};
                false ->
                    {FromId1, Weights1}
            end
        end,
        Neuron1#neuron.input_idps,
        Neuron2#neuron.input_idps
    ),

    UpdatedNeuron = Neuron1#neuron{input_idps = NewInputIdps},
    genotype:write(UpdatedNeuron),
    ok.

%% @private Find neuron in same layer
find_matching_neuron({{Layer, _}, neuron}, DonorNeurons) ->
    %% Find first donor neuron in same layer
    Matches = [N || {{L, _}, neuron} = N <- DonorNeurons, L == Layer],
    case Matches of
        [] -> error;
        [First | _] -> {ok, First}
    end.

%% @private Copy weights from donor to target neuron
copy_neuron_weights(DonorId, TargetId) ->
    Donor = genotype:dirty_read({neuron, DonorId}),
    Target = genotype:dirty_read({neuron, TargetId}),

    %% Copy weights for matching connections
    NewInputIdps = lists:map(
        fun({FromId, _Weights}) ->
            %% Try to find matching input in donor
            case lists:keyfind(FromId, 1, Donor#neuron.input_idps) of
                {FromId, DonorWeights} ->
                    {FromId, DonorWeights};
                false ->
                    %% No match, keep original
                    {FromId, _Weights}
            end
        end,
        Target#neuron.input_idps
    ),

    UpdatedTarget = Target#neuron{input_idps = NewInputIdps},
    genotype:write(UpdatedTarget),
    ok.

Tests: crossover_tests.erl

  • Test basic crossover
  • Test morphology compatibility check
  • Test neuron-level crossover
  • Test weight-level crossover

Integration with population_monitor

Update population_monitor.erl to use the new modules:

%% In population_monitor.erl init/1:
State = #population_state{
    ...
    selection_algorithm = maps:get(selection_algorithm, Config, competition),
    fitness_postprocessor = maps:get(fitness_postprocessor, Config, none),
    ...
}.

%% In handle_generation_complete/1:
%% Apply fitness postprocessing
PostprocessedFitnesses = fitness_postprocessor:FitnessPostprocessor(
    State#population_state.fitness_acc
),

%% Select survivors using configured algorithm
Survivors = selection_algorithm:SelectionAlgorithm(
    PostprocessedFitnesses,
    State#population_state.survival_rate
),

%% Update species map
UpdatedSpeciesMap = species_identifier:update_species_map(
    State#population_state.species_map,
    Survivors
),

%% Reproduce with optional crossover
NewAgentIds = reproduce_with_crossover(
    Survivors,
    State#population_state.total_agents,
    State#population_state.crossover_rate  % Add to state
),

Tests to Write

Module-specific tests (as noted above):

  • selection_algorithm_tests.erl
  • fitness_postprocessor_tests.erl
  • species_identifier_tests.erl
  • crossover_tests.erl

Integration tests:

  • Test population_monitor with different selection algorithms
  • Test fitness postprocessing effects
  • Test species formation over generations
  • Test crossover integration

Quality Gates

v0.6.1 Acceptance Criteria

  1. Modularity

    • [ ] Selection logic extracted to selection_algorithm.erl
    • [ ] Fitness postprocessing in fitness_postprocessor.erl
    • [ ] Species management in species_identifier.erl
    • [ ] Crossover in crossover.erl
  2. DXNN2 Alignment

    • [ ] Module names match DXNN2 architecture
    • [ ] Selection strategies follow DXNN2 patterns
    • [ ] Species identification uses fingerprints
  3. Test Coverage

    • [ ] All new modules have 80%+ coverage
    • [ ] Integration tests pass
    • [ ] Total test count > 300
  4. Static Analysis

    • [ ] Zero dialyzer warnings

Implementation Order

  1. Week 1:

    • Day 1-2: selection_algorithm.erl + tests
    • Day 3-4: fitness_postprocessor.erl + tests
    • Day 5: Integration with population_monitor
  2. Week 2:

    • Day 1-2: species_identifier.erl + tests
    • Day 3-4: crossover.erl + tests
    • Day 5: Integration tests, documentation

Effort Estimate

TaskEstimate
selection_algorithm.erl1.5 days
fitness_postprocessor.erl2 days
species_identifier.erl2 days
crossover.erl2 days
Integration + tests2 days
Documentation0.5 days
Total10 days

Version: 0.6.1 Phase: Structural (completion) Status: Planned