ExUnit integration for LLM evaluations.
Usage
defmodule MyApp.RAGEvalTest do
use ExUnit.Case
use Tribunal.EvalCase
@moduletag :eval
test "response is faithful" do
response = MyApp.RAG.query("What's the return policy?")
assert_contains response, "30 days"
assert_faithful response, context: @docs, threshold: 0.8
refute_hallucination response, context: @docs
end
endDataset-Driven Tests
defmodule MyApp.RAGEvalTest do
use ExUnit.Case
use Tribunal.EvalCase
@moduletag :eval
tribunal_eval "test/evals/datasets/questions.json",
provider: {MyApp.RAG, :query}
endRun with: mix test --only eval
Summary
Functions
Generates tests from a dataset file.