View Source OpenAi.Evals (OpenAI REST API Client v1.0.1)

Provides API endpoints related to evals

Summary

Types

delete_eval_200_json_resp()

delete_eval_run_200_json_resp()

Functions

cancel_eval_run(eval_id, run_id, opts \\ [])

Cancel an ongoing evaluation run.

create_eval(body, opts \\ [])

Create the structure of an evaluation that can be used to test a model's performance. An evaluation is a set of testing criteria and a datasource. After creating an evaluation, you can run it on different models and model parameters. We support several types of graders and datasources. For more information, see the Evals guide.

create_eval_run(eval_id, body, opts \\ [])

Create a new evaluation run. This is the endpoint that will kick off grading.

delete_eval(eval_id, opts \\ [])

Delete an evaluation.

delete_eval_run(eval_id, run_id, opts \\ [])

Delete an eval run.

get_eval(eval_id, opts \\ [])

Get an evaluation by ID.

get_eval_run(eval_id, run_id, opts \\ [])

Get an evaluation run by ID.

get_eval_run_output_item(eval_id, run_id, output_item_id, opts \\ [])

Get an evaluation run output item by ID.

get_eval_run_output_items(eval_id, run_id, opts \\ [])

Get a list of output items for an evaluation run.

get_eval_runs(eval_id, opts \\ [])

Get a list of runs for an evaluation.

list_evals(opts \\ [])

List evaluations for a project.

update_eval(eval_id, body, opts \\ [])

Update certain properties of an evaluation.

Types

delete_eval_200_json_resp()

@type delete_eval_200_json_resp() :: %{
  deleted: boolean(),
  eval_id: String.t(),
  object: String.t()
}

delete_eval_run_200_json_resp()

@type delete_eval_run_200_json_resp() :: %{
  deleted: boolean() | nil,
  object: String.t() | nil,
  run_id: String.t() | nil
}

Functions

cancel_eval_run(eval_id, run_id, opts \\ [])

@spec cancel_eval_run(eval_id :: String.t(), run_id :: String.t(), opts :: keyword()) ::
  {:ok, OpenAi.Eval.Run.t()} | {:error, OpenAi.Error.error()}

Cancel an ongoing evaluation run.

create_eval(body, opts \\ [])

@spec create_eval(body :: OpenAi.Eval.RequestCreate.t(), opts :: keyword()) ::
  {:ok, OpenAi.Eval.t()} | {:error, OpenAi.Error.error()}

create_eval_run(eval_id, body, opts \\ [])

@spec create_eval_run(
  eval_id :: String.t(),
  body :: OpenAi.Eval.Run.CreateRequest.t(),
  opts :: keyword()
) :: {:ok, OpenAi.Eval.Run.t()} | {:error, OpenAi.Error.error()}

Create a new evaluation run. This is the endpoint that will kick off grading.

delete_eval(eval_id, opts \\ [])

@spec delete_eval(eval_id :: String.t(), opts :: keyword()) ::
  {:ok, delete_eval_200_json_resp()} | {:error, OpenAi.Error.error()}

Delete an evaluation.

delete_eval_run(eval_id, run_id, opts \\ [])

@spec delete_eval_run(eval_id :: String.t(), run_id :: String.t(), opts :: keyword()) ::
  {:ok, delete_eval_run_200_json_resp()} | {:error, OpenAi.Error.error()}

Delete an eval run.

get_eval(eval_id, opts \\ [])

@spec get_eval(eval_id :: String.t(), opts :: keyword()) ::
  {:ok, OpenAi.Eval.t()} | {:error, OpenAi.Error.error()}

Get an evaluation by ID.

get_eval_run(eval_id, run_id, opts \\ [])

@spec get_eval_run(eval_id :: String.t(), run_id :: String.t(), opts :: keyword()) ::
  {:ok, OpenAi.Eval.Run.t()} | {:error, OpenAi.Error.error()}

Get an evaluation run by ID.

get_eval_run_output_item(eval_id, run_id, output_item_id, opts \\ [])

@spec get_eval_run_output_item(
  eval_id :: String.t(),
  run_id :: String.t(),
  output_item_id :: String.t(),
  opts :: keyword()
) :: {:ok, OpenAi.Eval.Run.OutputItem.t()} | {:error, OpenAi.Error.error()}

Get an evaluation run output item by ID.

get_eval_run_output_items(eval_id, run_id, opts \\ [])

@spec get_eval_run_output_items(
  eval_id :: String.t(),
  run_id :: String.t(),
  opts :: keyword()
) ::
  {:ok, OpenAi.Eval.Run.OutputItem.List.t()} | {:error, OpenAi.Error.error()}

Get a list of output items for an evaluation run.

Options

after: Identifier for the last output item from the previous pagination request.
limit: Number of output items to retrieve.
status: Filter output items by status. Use failed to filter by failed output items or pass to filter by passed output items.
order: Sort order for output items by timestamp. Use asc for ascending order or desc for descending order. Defaults to asc.

get_eval_runs(eval_id, opts \\ [])

@spec get_eval_runs(eval_id :: String.t(), opts :: keyword()) ::
  {:ok, OpenAi.Eval.Run.List.t()} | {:error, OpenAi.Error.error()}

Get a list of runs for an evaluation.

Options

after: Identifier for the last run from the previous pagination request.
limit: Number of runs to retrieve.
order: Sort order for runs by timestamp. Use asc for ascending order or desc for descending order. Defaults to asc.
status: Filter runs by status. One of queued | in_progress | failed | completed | canceled.

list_evals(opts \\ [])

@spec list_evals(opts :: keyword()) ::
  {:ok, OpenAi.Eval.List.t()} | {:error, OpenAi.Error.error()}

List evaluations for a project.

Options

after: Identifier for the last eval from the previous pagination request.
limit: Number of evals to retrieve.
order: Sort order for evals by timestamp. Use asc for ascending order or desc for descending order.
order_by: Evals can be ordered by creation time or last updated time. Use created_at for creation time or updated_at for last updated time.

update_eval(eval_id, body, opts \\ [])

@spec update_eval(eval_id :: String.t(), body :: map(), opts :: keyword()) ::
  {:ok, OpenAi.Eval.t()} | {:error, OpenAi.Error.error()}

Update certain properties of an evaluation.