Video Generation Guide

View Source

Generate high-quality videos from text descriptions using Google's Veo models through the Vertex AI API.

Overview

The Video Generation API (Veo) allows you to:

  • Generate high-quality videos from text prompts
  • Create videos with customizable duration, aspect ratio, and frame rate
  • Monitor generation progress with long-running operations

Important Notes:

  • Video generation requires Vertex AI authentication (not available on Gemini API)
  • Generation is asynchronous and can take 2-5 minutes per video
  • Videos are typically 4-8 seconds in duration
  • Generated videos are stored in Google Cloud Storage (GCS)
  • Subject to Google's safety filters and Responsible AI policies

Quick Start

alias Gemini.APIs.Videos
alias Gemini.Types.Generation.Video.VideoGenerationConfig

# Start video generation
{:ok, operation} = Videos.generate(
  "A cat playing piano in a cozy living room"
)

# Wait for completion (automatic polling)
{:ok, completed_op} = Videos.wait_for_completion(operation.name)

# Extract video URIs
{:ok, videos} = Gemini.Types.Generation.Video.extract_videos(completed_op)

# Get the GCS URI
video_uri = hd(videos).video_uri
IO.puts("Video ready: #{video_uri}")

Generating Videos

Basic Generation

# Default configuration (8 seconds, 16:9, 24fps)
{:ok, operation} = Videos.generate(
  "A serene mountain landscape with flowing river"
)

Custom Configuration

config = %VideoGenerationConfig{
  number_of_videos: 2,
  duration_seconds: 4,
  aspect_ratio: "9:16",  # Vertical for mobile
  fps: 30,
  compression_format: :h265
}

{:ok, operation} = Videos.generate(
  "Cinematic drone shot of a futuristic city at night",
  config
)

Video Durations

Supported durations:

  • 4 seconds - Shorter, faster generation
  • 8 seconds - Default, more content
config = %VideoGenerationConfig{
  duration_seconds: 4
}

{:ok, operation} = Videos.generate("Quick action sequence", config)

Aspect Ratios

Supported aspect ratios:

  • "16:9" - Horizontal/desktop (1280x720) - default
  • "9:16" - Vertical/mobile (720x1280)
  • "1:1" - Square (1024x1024)
# Vertical video for social media
config = %VideoGenerationConfig{
  aspect_ratio: "9:16",
  duration_seconds: 4
}

{:ok, operation} = Videos.generate(
  "A person dancing in a vibrant street",
  config
)

Frame Rates

Supported frame rates:

  • 24 fps - Cinematic (default)
  • 25 fps - PAL standard
  • 30 fps - Smoother motion
config = %VideoGenerationConfig{
  fps: 30,
  duration_seconds: 8
}

{:ok, operation} = Videos.generate("Fast-paced sports action", config)

Waiting for Completion

Automatic Polling

The recommended way to wait for video generation:

{:ok, operation} = Videos.generate("A beautiful sunset over ocean")

# Wait with automatic polling
{:ok, completed} = Videos.wait_for_completion(
  operation.name,
  poll_interval: 10_000,  # Check every 10 seconds
  timeout: 300_000,       # Wait up to 5 minutes
  on_progress: fn op ->
    if progress = Gemini.Types.Operation.get_progress(op) do
      IO.puts("Progress: #{progress}%")
    end
  end
)

# Check if successful
if Gemini.Types.Operation.succeeded?(completed) do
  {:ok, videos} = Gemini.Types.Generation.Video.extract_videos(completed)
  IO.puts("Success! Video: #{hd(videos).video_uri}")
else
  IO.puts("Failed: #{completed.error.message}")
end

Manual Polling

For more control over the polling process:

{:ok, operation} = Videos.generate("A cat playing with toys")

# Poll manually in a loop
defmodule VideoPoller do
  def poll_until_complete(operation_name, max_attempts \\ 30) do
    poll_loop(operation_name, 0, max_attempts)
  end

  defp poll_loop(operation_name, attempt, max_attempts) when attempt < max_attempts do
    {:ok, op} = Videos.get_operation(operation_name)

    cond do
      Gemini.Types.Operation.succeeded?(op) ->
        {:ok, op}

      Gemini.Types.Operation.failed?(op) ->
        {:error, op.error}

      true ->
        # Still running, wait and try again
        Process.sleep(10_000)
        poll_loop(operation_name, attempt + 1, max_attempts)
    end
  end

  defp poll_loop(_operation_name, _attempt, _max_attempts) do
    {:error, "Timeout: Video generation took too long"}
  end
end

case VideoPoller.poll_until_complete(operation.name) do
  {:ok, completed} ->
    {:ok, videos} = Gemini.Types.Generation.Video.extract_videos(completed)
    IO.puts("Video ready!")

  {:error, reason} ->
    IO.puts("Error: #{inspect(reason)}")
end

Progress Tracking

Monitor generation progress:

{:ok, operation} = Videos.generate("An animated forest scene")

# Wrap operation for video-specific helpers
video_op = Videos.wrap_operation(operation)

IO.puts("Progress: #{video_op.progress_percent}%")
IO.puts("ETA: #{video_op.estimated_completion_time}")

Working with Generated Videos

Downloading Videos

Videos are stored in GCS and can be downloaded:

{:ok, completed} = Videos.wait_for_completion(operation.name)
{:ok, videos} = Gemini.Types.Generation.Video.extract_videos(completed)

video = hd(videos)

# GCS URI format: gs://bucket-name/path/to/video.mp4
gcs_uri = video.video_uri

# Download using Google Cloud Storage client or gsutil
# gsutil cp #{gcs_uri} ./my_video.mp4

Video Metadata

{:ok, videos} = Gemini.Types.Generation.Video.extract_videos(completed_op)
video = hd(videos)

IO.inspect(video.mime_type)          # "video/mp4"
IO.inspect(video.duration_seconds)   # 8.0
IO.inspect(video.resolution)         # %{"width" => 1280, "height" => 720}
IO.inspect(video.safety_attributes)  # Safety classification
IO.inspect(video.rai_info)           # Responsible AI info

Advanced Configuration

Compression Formats

# H.264 (widely compatible, default)
config = %VideoGenerationConfig{
  compression_format: :h264
}

# H.265 (better quality, smaller file size)
config = %VideoGenerationConfig{
  compression_format: :h265
}

Negative Prompts

Specify what to avoid in the video:

config = %VideoGenerationConfig{
  negative_prompt: "blurry, low quality, distorted, shaky camera",
  guidance_scale: 10.0
}

{:ok, operation} = Videos.generate("High quality cinematic shot", config)

Guidance Scale

Control how closely the model follows your prompt:

# Lower values = more creative/varied
config = %VideoGenerationConfig{
  guidance_scale: 5.0
}

# Higher values = stricter adherence to prompt
config = %VideoGenerationConfig{
  guidance_scale: 15.0
}

Reproducible Generation

Use seeds for consistent results:

config = %VideoGenerationConfig{
  seed: 12345,
  number_of_videos: 1
}

# Generate the same video multiple times
{:ok, op1} = Videos.generate("A red balloon floating", config)
{:ok, op2} = Videos.generate("A red balloon floating", config)
# Videos will be identical

Safety and Content Filtering

Safety Filter Levels

# Strict filtering (recommended for public applications)
config = %VideoGenerationConfig{
  safety_filter_level: :block_most
}

# Moderate filtering (default)
config = %VideoGenerationConfig{
  safety_filter_level: :block_some
}

# Permissive filtering
config = %VideoGenerationConfig{
  safety_filter_level: :block_few
}

Person Generation Policy

# Allow adult humans (18+)
config = %VideoGenerationConfig{
  person_generation: :allow_adult
}

# Allow people of all ages
config = %VideoGenerationConfig{
  person_generation: :allow_all
}

# Don't generate recognizable people (default)
config = %VideoGenerationConfig{
  person_generation: :dont_allow
}

Operation Management

Listing Operations

# List all video generation operations
{:ok, response} = Videos.list_operations()

Enum.each(response.operations, fn op ->
  IO.puts("#{op.name}: #{if op.done, do: "complete", else: "running"}")
end)

# List only completed operations
{:ok, response} = Videos.list_operations(filter: "done=true")

# Pagination
{:ok, response} = Videos.list_operations(page_size: 10)

if Gemini.Types.ListOperationsResponse.has_more_pages?(response) do
  {:ok, next_page} = Videos.list_operations(
    page_token: response.next_page_token
  )
end

Canceling Operations

{:ok, operation} = Videos.generate("A long video")

# Cancel if taking too long
:ok = Videos.cancel(operation.name)

Error Handling

case Videos.generate("A realistic video") do
  {:ok, operation} ->
    case Videos.wait_for_completion(operation.name) do
      {:ok, completed} ->
        if Gemini.Types.Operation.succeeded?(completed) do
          {:ok, videos} = Gemini.Types.Generation.Video.extract_videos(completed)
          IO.puts("Success! Generated #{length(videos)} videos")
        else
          IO.puts("Generation failed: #{completed.error.message}")
        end

      {:error, :timeout} ->
        IO.puts("Timeout: Video generation took too long")
        # Operation may still complete later
        Videos.cancel(operation.name)

      {:error, reason} ->
        IO.puts("Error: #{inspect(reason)}")
    end

  {:error, %{type: :auth_error}} ->
    IO.puts("Authentication failed. Check Vertex AI credentials.")

  {:error, %{type: :api_error, message: msg}} ->
    IO.puts("API error: #{msg}")

  {:error, reason} ->
    IO.puts("Error: #{inspect(reason)}")
end

Best Practices

1. Be Specific and Descriptive

# Vague
"A landscape"

# Specific
"Cinematic aerial drone shot slowly panning over a serene mountain lake at sunrise, with mist rising from the water and golden light illuminating snow-capped peaks in the background"

2. Specify Camera Movement

prompts = [
  "Static shot of a bustling city street",
  "Slow zoom in on a blooming flower",
  "Pan left across a vast desert landscape",
  "Dolly forward through a dark forest",
  "Orbital shot circling around a modern building"
]

3. Use Temporal Descriptions

"Time-lapse of clouds moving across the sky at sunset"
"Slow motion shot of water droplets splashing"
"Quick cut montage of city life"

4. Batch Processing

prompts = [
  "A red car driving down a highway",
  "A blue ocean with waves crashing",
  "A green forest with sunlight filtering through trees"
]

config = %VideoGenerationConfig{
  duration_seconds: 4,
  aspect_ratio: "16:9"
}

# Start all generations
operations = prompts
|> Enum.map(fn prompt ->
  {:ok, op} = Videos.generate(prompt, config)
  op
end)

# Wait for all to complete
results = operations
|> Task.async_stream(fn op ->
  Videos.wait_for_completion(op.name, timeout: 300_000)
end, timeout: 310_000)
|> Enum.to_list()

5. Handle Long-Running Operations

# Start generation
{:ok, operation} = Videos.generate("Epic cinematic scene")

# Store operation name for later
operation_id = operation.name

# Later, in another process/request
{:ok, current_status} = Videos.get_operation(operation_id)

if current_status.done do
  {:ok, videos} = Gemini.Types.Generation.Video.extract_videos(current_status)
  # Process videos
else
  IO.puts("Still generating... #{current_status.metadata}")
end

Performance Considerations

Generation Time

Typical generation times:

  • 4 seconds video: ~2-3 minutes
  • 8 seconds video: ~3-5 minutes

Factors affecting speed:

  • Video duration (longer = slower)
  • Complexity of the prompt
  • Resolution and frame rate
  • System load

Resource Management

# Limit concurrent video generations
max_concurrent = 3

prompts
|> Task.async_stream(
  fn prompt ->
    {:ok, op} = Videos.generate(prompt)
    Videos.wait_for_completion(op.name)
  end,
  max_concurrency: max_concurrent,
  timeout: 600_000  # 10 minutes per video
)
|> Enum.to_list()

Configuration Options

VideoGenerationConfig

FieldTypeDefaultDescription
number_of_videos1..41Number of videos to generate
duration_seconds4 or 88Video duration
aspect_ratioString.t()"16:9"Video aspect ratio
fps24, 25, or 3024Frames per second
compression_format:h264 or :h265:h264Video compression
safety_filter_levelatom():block_someContent filtering level
negative_promptString.t()nilWhat to avoid
seedinteger()nilRandom seed for reproducibility
guidance_scalefloat()nilPrompt adherence (1.0-20.0)
person_generationatom():dont_allowPerson generation policy

Troubleshooting

Operation Times Out

# Increase timeout
{:ok, completed} = Videos.wait_for_completion(
  operation.name,
  timeout: 600_000  # 10 minutes
)

# Or poll manually with longer intervals
{:ok, op} = Videos.get_operation(operation.name)

Content Blocked by Safety Filters

{:ok, completed} = Videos.wait_for_completion(operation.name)

if completed.error do
  IO.puts("Error: #{completed.error.message}")
  # Try with different prompt or safety settings
end

Downloading from GCS

# Use Google Cloud Storage client
# Or gsutil command line tool:
# gsutil cp gs://bucket/path/video.mp4 ./local_video.mp4

# With authentication
# gcloud auth application-default login
# gsutil cp #{video.video_uri} ./output.mp4

See Also