Aludel.Evals (aludel v0.2.0)

Context for managing evaluation suites, test cases, and runs.

Summary

Functions

change_suite(suite, attrs \\ %{})

Returns a changeset for tracking suite changes.

change_test_case(test_case, attrs \\ %{})

Returns a changeset for tracking test case changes.

create_suite(attrs \\ %{})

Creates a new suite.

create_suite_run(attrs \\ %{})

Creates a new suite run.

create_test_case(attrs \\ %{})

Creates a new test case.

create_test_case_document(attrs \\ %{})

Creates a test case document.

delete_suite(suite)

Deletes a suite.

delete_suite_run(suite_run)

Deletes a suite run.

delete_test_case(test_case)

Deletes a test case.

delete_test_case_document(document)

Deletes a test case document.

execute_suite(suite, version, provider)

Executes a test suite against a prompt version and provider.

get_suite!(id)

Gets a suite by ID, raising if not found.

get_suite_run!(id)

Gets a suite run by ID, raising if not found.

get_suite_with_prompt!(id)

Gets a suite by ID with prompt preloaded, raising if not found.

get_suite_with_test_cases!(id)

Gets a suite with all test cases preloaded.

get_suite_with_test_cases_and_prompt!(id)

Gets a suite with test cases and prompt preloaded.

get_test_case!(id)

Gets a test case by ID, raising if not found.

get_test_case_document!(id)

Gets a single test case document.

get_test_case_with_documents!(id)

Gets a test case with documents preloaded.

launch_suite_execution(recipient, suite_id, version_id, provider_id)

Launches suite execution in a supervised task and reports completion back to the given recipient process.

list_suite_runs()

Lists all suite runs in the system.

list_suite_runs_for_suite(suite_id)

Gets suite runs for a specific suite.

list_suite_runs_for_suite_with_associations(suite_id)

Gets suite runs for a specific suite with prompt_version and provider preloaded.

list_suites()

Lists all suites in the system.

list_suites_with_prompt()

Lists all suites with their associated prompt preloaded.

list_test_cases()

Lists all test cases in the system.

pass_rates_by_prompt()

Calculates pass rates grouped by prompt.

reload_suite_run_with_associations(suite_run)

Reloads a suite run with associations preloaded.

retry_suite_run_test_case(suite_run, test_case_id)

Retries a single test case result within an existing suite run.

update_suite(suite, attrs)

Updates an existing suite.

update_test_case(test_case, attrs)

Updates an existing test case.

Functions

change_suite(suite, attrs \\ %{})

@spec change_suite(Aludel.Evals.Suite.t(), map()) :: Ecto.Changeset.t()

Returns a changeset for tracking suite changes.

change_test_case(test_case, attrs \\ %{})

@spec change_test_case(Aludel.Evals.TestCase.t(), map()) :: Ecto.Changeset.t()

Returns a changeset for tracking test case changes.

create_suite(attrs \\ %{})

@spec create_suite(map()) ::
  {:ok, Aludel.Evals.Suite.t()} | {:error, Ecto.Changeset.t()}

Creates a new suite.

create_suite_run(attrs \\ %{})

@spec create_suite_run(map()) ::
  {:ok, Aludel.Evals.SuiteRun.t()} | {:error, Ecto.Changeset.t()}

Creates a new suite run.

create_test_case(attrs \\ %{})

@spec create_test_case(map()) ::
  {:ok, Aludel.Evals.TestCase.t()} | {:error, Ecto.Changeset.t()}

Creates a new test case.

create_test_case_document(attrs \\ %{})

@spec create_test_case_document(map()) ::
  {:ok, Aludel.Evals.TestCaseDocument.t()} | {:error, Ecto.Changeset.t()}

Creates a test case document.

delete_suite(suite)

@spec delete_suite(Aludel.Evals.Suite.t()) ::
  {:ok, Aludel.Evals.Suite.t()} | {:error, Ecto.Changeset.t()}

Deletes a suite.

delete_suite_run(suite_run)

@spec delete_suite_run(Aludel.Evals.SuiteRun.t()) ::
  {:ok, Aludel.Evals.SuiteRun.t()} | {:error, Ecto.Changeset.t()}

Deletes a suite run.

delete_test_case(test_case)

@spec delete_test_case(Aludel.Evals.TestCase.t()) ::
  {:ok, Aludel.Evals.TestCase.t()} | {:error, Ecto.Changeset.t()}

Deletes a test case.

delete_test_case_document(document)

@spec delete_test_case_document(Aludel.Evals.TestCaseDocument.t()) ::
  {:ok, Aludel.Evals.TestCaseDocument.t()} | {:error, Ecto.Changeset.t()}

Deletes a test case document.

execute_suite(suite, version, provider)

@spec execute_suite(
  Aludel.Evals.Suite.t(),
  Aludel.Prompts.PromptVersion.t(),
  Aludel.Providers.Provider.t()
) :: {:ok, Aludel.Evals.SuiteRun.t()} | {:error, term()}

Executes a test suite against a prompt version and provider.

Runs all test cases for the suite, evaluating their assertions against the LLM output and creating a suite_run with results.

Parameters

suite: The test suite to execute
prompt_version: The prompt version to use
provider: The LLM provider to call

Returns

{:ok, suite_run} with execution results
{:error, reason} if execution fails

get_suite!(id)

@spec get_suite!(binary()) :: Aludel.Evals.Suite.t()

Gets a suite by ID, raising if not found.

get_suite_run!(id)

@spec get_suite_run!(binary()) :: Aludel.Evals.SuiteRun.t()

Gets a suite run by ID, raising if not found.

get_suite_with_prompt!(id)

@spec get_suite_with_prompt!(binary()) :: Aludel.Evals.Suite.t()

Gets a suite by ID with prompt preloaded, raising if not found.

get_suite_with_test_cases!(id)

@spec get_suite_with_test_cases!(binary()) :: Aludel.Evals.Suite.t()

Gets a suite with all test cases preloaded.

get_suite_with_test_cases_and_prompt!(id)

@spec get_suite_with_test_cases_and_prompt!(binary()) :: Aludel.Evals.Suite.t()

Gets a suite with test cases and prompt preloaded.

get_test_case!(id)

@spec get_test_case!(binary()) :: Aludel.Evals.TestCase.t()

Gets a test case by ID, raising if not found.

get_test_case_document!(id)

@spec get_test_case_document!(binary()) :: Aludel.Evals.TestCaseDocument.t()

Gets a single test case document.

Raises Ecto.NoResultsError if the document does not exist.

get_test_case_with_documents!(id)

@spec get_test_case_with_documents!(binary()) :: Aludel.Evals.TestCase.t()

Gets a test case with documents preloaded.

launch_suite_execution(recipient, suite_id, version_id, provider_id)

@spec launch_suite_execution(pid(), binary(), binary(), binary()) ::
  {:ok, reference()} | {:error, term()}

Launches suite execution in a supervised task and reports completion back to the given recipient process.

list_suite_runs()

@spec list_suite_runs() :: [Aludel.Evals.SuiteRun.t()]

Lists all suite runs in the system.

list_suite_runs_for_suite(suite_id)

Gets suite runs for a specific suite.

list_suite_runs_for_suite_with_associations(suite_id)

Gets suite runs for a specific suite with prompt_version and provider preloaded.

list_suites()

@spec list_suites() :: [Aludel.Evals.Suite.t()]

Lists all suites in the system.

list_suites_with_prompt()

@spec list_suites_with_prompt() :: [Aludel.Evals.Suite.t()]

Lists all suites with their associated prompt preloaded.

list_test_cases()

@spec list_test_cases() :: [Aludel.Evals.TestCase.t()]

Lists all test cases in the system.

pass_rates_by_prompt()

@spec pass_rates_by_prompt() :: [map()]

Calculates pass rates grouped by prompt.

Returns a list of maps with prompt info and pass rate statistics.

reload_suite_run_with_associations(suite_run)

@spec reload_suite_run_with_associations(Aludel.Evals.SuiteRun.t()) ::
  Aludel.Evals.SuiteRun.t()

Reloads a suite run with associations preloaded.

retry_suite_run_test_case(suite_run, test_case_id)

@spec retry_suite_run_test_case(Aludel.Evals.SuiteRun.t(), binary()) ::
  {:ok, Aludel.Evals.SuiteRun.t()} | {:error, term()}

Retries a single test case result within an existing suite run.

The existing embedded result is replaced in-place and the suite run aggregates are recalculated from the updated result set.

update_suite(suite, attrs)

@spec update_suite(Aludel.Evals.Suite.t(), map()) ::
  {:ok, Aludel.Evals.Suite.t()} | {:error, Ecto.Changeset.t()}

Updates an existing suite.

update_test_case(test_case, attrs)

@spec update_test_case(Aludel.Evals.TestCase.t(), map()) ::
  {:ok, Aludel.Evals.TestCase.t()} | {:error, Ecto.Changeset.t()}

Updates an existing test case.