Nous.Eval.Suite (nous v0.13.3)

A collection of test cases with shared configuration.

Suites group related test cases and provide shared defaults for model, timeout, and other settings.

Example

suite = Suite.new(
  name: "weather_agent_tests",
  description: "Tests for the weather agent",
  default_model: "lmstudio:ministral-3-14b-reasoning",
  test_cases: [
    TestCase.new(id: "basic", input: "What's the weather?", ...),
    TestCase.new(id: "city", input: "Weather in Tokyo?", ...)
  ]
)

Loading from YAML

{:ok, suite} = Suite.from_yaml("test/eval/suites/weather.yaml")

Filtering

# Filter by tags
filtered = Suite.filter_by_tags(suite, [:basic, :tool])

# Exclude tags
filtered = Suite.exclude_tags(suite, [:slow])

Summary

Types

t()

Functions

add_case(suite, test_case)

Add a test case to the suite.

add_cases(suite, test_cases)

Add multiple test cases to the suite.

all_tags(suite)

Get all unique tags used in the suite.

count(suite)

Get number of test cases.

exclude_tags(suite, tags)

Exclude test cases with any of the specified tags.

filter_by_tags(suite, tags)

Filter test cases by tags (include only cases with ANY of the specified tags).

from_directory(dir)

Load all suites from a directory.

from_yaml(path)

Load a suite from a YAML file.

from_yaml!(path)

Load a suite from a YAML file, raising on error.

get_case(suite, id)

Get test case by ID.

new(opts)

Create a new test suite.

validate(suite)

Validate a suite and all its test cases.

Types

t()

@type t() :: %Nous.Eval.Suite{
  default_instructions: String.t() | nil,
  default_model: String.t() | nil,
  default_timeout: non_neg_integer(),
  description: String.t() | nil,
  metadata: map(),
  name: String.t(),
  parallelism: non_neg_integer(),
  retry_failed: non_neg_integer(),
  setup: (-> map()) | nil,
  teardown: (map() -> :ok) | nil,
  test_cases: [Nous.Eval.TestCase.t()]
}

Functions

add_case(suite, test_case)

@spec add_case(t(), Nous.Eval.TestCase.t()) :: t()

Add a test case to the suite.

add_cases(suite, test_cases)

@spec add_cases(t(), [Nous.Eval.TestCase.t()]) :: t()

Add multiple test cases to the suite.

all_tags(suite)

@spec all_tags(t()) :: [atom()]

Get all unique tags used in the suite.

count(suite)

@spec count(t()) :: non_neg_integer()

Get number of test cases.

exclude_tags(suite, tags)

@spec exclude_tags(t(), [atom()]) :: t()

Exclude test cases with any of the specified tags.

filter_by_tags(suite, tags)

@spec filter_by_tags(t(), [atom()]) :: t()

Filter test cases by tags (include only cases with ANY of the specified tags).

from_directory(dir)

@spec from_directory(String.t()) :: {:ok, [t()]} | {:error, term()}

Load all suites from a directory.

Loads all .yaml and .yml files from the directory.

from_yaml(path)

@spec from_yaml(String.t()) :: {:ok, t()} | {:error, term()}

Load a suite from a YAML file.

Example YAML

name: my_suite
default_model: lmstudio:ministral-3-14b-reasoning
default_timeout: 30000

test_cases:
  - id: greeting
    input: "Say hello"
    expected:
      contains: ["hello"]
    eval_type: contains
    tags: [basic]

  - id: math
    input: "What is 2+2?"
    expected: "4"
    eval_type: exact_match

from_yaml!(path)

@spec from_yaml!(String.t()) :: t()

Load a suite from a YAML file, raising on error.

get_case(suite, id)

@spec get_case(t(), String.t()) :: Nous.Eval.TestCase.t() | nil

Get test case by ID.

new(opts)

@spec new(keyword()) :: t()

Create a new test suite.

Options

:name - Suite name (required)
:description - Human-readable description
:test_cases - List of TestCase structs
:default_model - Default model for all test cases
:default_instructions - Default system instructions
:default_timeout - Default timeout in ms (default: 30_000)
:parallelism - Number of concurrent tests (default: 1)
:retry_failed - Retry count for failed tests (default: 0)
:setup - Function called before suite runs
:teardown - Function called after suite runs
:metadata - Additional metadata

Example

suite = Suite.new(
  name: "my_tests",
  default_model: "lmstudio:ministral-3-14b-reasoning",
  test_cases: [
    TestCase.new(id: "test1", input: "Hello"),
    TestCase.new(id: "test2", input: "World")
  ]
)

validate(suite)

@spec validate(t()) :: :ok | {:error, term()}

Validate a suite and all its test cases.