TantivyEx.SpaceAnalysis (TantivyEx v0.4.1)

View Source

Space usage analysis and optimization recommendations for TantivyEx indexes.

This module provides comprehensive analysis of index space usage, including segment breakdown, field-level analysis, and optimization recommendations.

Features

  • Detailed space usage analysis by segments and fields
  • Storage breakdown by component type (postings, terms, fast fields, etc.)
  • Comparison between different analysis snapshots
  • Optimization recommendations based on usage patterns
  • Caching for expensive analysis operations

Usage

# Create a space analysis resource
{:ok, analyzer} = TantivyEx.SpaceAnalysis.new()

# Configure analysis settings
:ok = TantivyEx.SpaceAnalysis.configure(analyzer, %{
  include_file_details: true,
  include_field_breakdown: true,
  cache_results: true,
  cache_ttl_seconds: 300
})

# Analyze an index
{:ok, analysis} = TantivyEx.SpaceAnalysis.analyze_index(analyzer, index, "snapshot_1")

# Get optimization recommendations
{:ok, recommendations} = TantivyEx.SpaceAnalysis.get_recommendations(analyzer, "snapshot_1")

# Compare two analyses
{:ok, comparison} = TantivyEx.SpaceAnalysis.compare(analyzer, "snapshot_1", "snapshot_2")

Summary

Functions

Clear the analysis cache.

Compare space usage between two analyses.

Configure space analysis settings.

Format analysis results for human-readable output.

Get cached analysis results.

Get space efficiency metrics.

Get optimization recommendations based on analysis.

Create a new space analysis resource.

Types

analysis_config()

@type analysis_config() :: %{
  include_file_details: boolean(),
  include_field_breakdown: boolean(),
  cache_results: boolean(),
  cache_ttl_seconds: pos_integer()
}

analysis_resource()

@type analysis_resource() :: reference()

field_space_usage()

@type field_space_usage() :: %{
  field_name: String.t(),
  total_size_bytes: non_neg_integer(),
  indexed_size_bytes: non_neg_integer(),
  stored_size_bytes: non_neg_integer(),
  fast_fields_size_bytes: non_neg_integer(),
  percentage_of_index: float()
}

index_metadata()

@type index_metadata() :: %{
  total_docs: non_neg_integer(),
  deleted_docs: non_neg_integer(),
  schema_size_bytes: non_neg_integer(),
  num_fields: non_neg_integer(),
  index_settings: %{required(String.t()) => String.t()}
}

index_resource()

@type index_resource() :: reference()

recommendation()

@type recommendation() :: %{
  type: String.t(),
  priority: String.t(),
  description: String.t(),
  potential_savings_bytes: non_neg_integer()
}

segment_analysis()

@type segment_analysis() :: %{
  segment_id: String.t(),
  size_bytes: non_neg_integer(),
  doc_count: non_neg_integer(),
  deleted_docs: non_neg_integer(),
  compression_ratio: float(),
  files: [segment_file()]
}

segment_file()

@type segment_file() :: %{
  file_type: String.t(),
  file_name: String.t(),
  size_bytes: non_neg_integer(),
  percentage_of_segment: float()
}

space_analysis()

@type space_analysis() :: %{
  total_size_bytes: non_neg_integer(),
  segment_count: non_neg_integer(),
  segments: [segment_analysis()],
  field_analysis: %{required(String.t()) => field_space_usage()},
  index_metadata: index_metadata(),
  storage_breakdown: storage_breakdown()
}

storage_breakdown()

@type storage_breakdown() :: %{
  postings: non_neg_integer(),
  term_dictionary: non_neg_integer(),
  fast_fields: non_neg_integer(),
  field_norms: non_neg_integer(),
  stored_fields: non_neg_integer(),
  positions: non_neg_integer(),
  delete_bitset: non_neg_integer(),
  other: non_neg_integer()
}

Functions

analyze_index(analysis_resource, index_resource, analysis_id)

@spec analyze_index(analysis_resource(), index_resource(), String.t()) ::
  {:ok, space_analysis()} | {:error, term()}

Analyze space usage for an index.

Performs comprehensive analysis of index space usage including segments, fields, and storage breakdown.

Parameters

  • analysis_resource - The analysis resource
  • index_resource - The index to analyze
  • analysis_id - Unique identifier for this analysis

Returns

  • {:ok, analysis} - Space analysis results
  • {:error, reason} - If analysis fails

Examples

{:ok, analysis} = TantivyEx.SpaceAnalysis.analyze_index(analyzer, index, "daily_snapshot")
# Returns detailed space analysis with size and field information

clear_cache(analysis_resource)

@spec clear_cache(analysis_resource()) :: :ok | {:error, term()}

Clear the analysis cache.

Removes all cached analysis results to free memory.

Parameters

  • analysis_resource - The analysis resource

Returns

  • :ok - If cache is cleared successfully
  • {:error, reason} - If clearing fails

Examples

:ok = TantivyEx.SpaceAnalysis.clear_cache(analyzer)

compare(analysis_resource, analysis_id_1, analysis_id_2)

@spec compare(analysis_resource(), String.t(), String.t()) ::
  {:ok, map()} | {:error, term()}

Compare space usage between two analyses.

Provides detailed comparison showing changes in size, segments, and documents.

Parameters

  • analysis_resource - The analysis resource
  • analysis_id_1 - First analysis identifier
  • analysis_id_2 - Second analysis identifier

Returns

  • {:ok, comparison} - Comparison results
  • {:error, reason} - If comparison fails

Examples

{:ok, comparison} = TantivyEx.SpaceAnalysis.compare(analyzer, "before", "after")

IO.puts("Size change: #{comparison.comparison.size_difference_bytes} bytes")
IO.puts("Change %: #{comparison.comparison.size_change_percentage}%")

configure(analysis_resource, config)

@spec configure(analysis_resource(), analysis_config()) :: :ok | {:error, term()}

Configure space analysis settings.

Parameters

  • analysis_resource - The analysis resource
  • config - Configuration map with analysis settings

Configuration Options

  • :include_file_details - Include detailed file breakdown (default: true)
  • :include_field_breakdown - Include per-field analysis (default: true)
  • :cache_results - Cache analysis results (default: true)
  • :cache_ttl_seconds - Cache TTL in seconds (default: 300)

Returns

  • :ok - If configuration succeeds
  • {:error, reason} - If configuration fails

Examples

:ok = TantivyEx.SpaceAnalysis.configure(analyzer, %{
  include_file_details: true,
  include_field_breakdown: true,
  cache_results: true,
  cache_ttl_seconds: 600
})

format_summary(analysis)

@spec format_summary(space_analysis()) :: {:ok, String.t()}

Format analysis results for human-readable output.

Parameters

  • analysis - Space analysis results

Returns

  • {:ok, formatted_string} - Human-readable analysis summary

Examples

{:ok, analysis} = TantivyEx.SpaceAnalysis.analyze_index(analyzer, index, "test")
{:ok, summary} = TantivyEx.SpaceAnalysis.format_summary(analysis)
IO.puts(summary)

get_cached(analysis_resource, analysis_id)

@spec get_cached(analysis_resource(), String.t()) :: {:ok, map()} | {:error, term()}

Get cached analysis results.

Retrieves previously cached analysis results if available.

Parameters

  • analysis_resource - The analysis resource
  • analysis_id - The analysis identifier

Returns

  • {:ok, analysis_summary} - If cached results are found
  • {:error, :not_found} - If no cached results exist
  • {:error, reason} - If retrieval fails

Examples

case TantivyEx.SpaceAnalysis.get_cached(analyzer, "daily_snapshot") do
  {:ok, summary} ->
    IO.puts("Found cached analysis: #{summary.total_size_bytes} bytes")

  {:error, :not_found} ->
    # Run new analysis
    TantivyEx.SpaceAnalysis.analyze_index(analyzer, index, "daily_snapshot")
end

get_efficiency_metrics(analysis)

@spec get_efficiency_metrics(space_analysis()) :: {:ok, map()}

Get space efficiency metrics.

Convenience function to extract key efficiency indicators from analysis.

Parameters

  • analysis - Space analysis results

Returns

  • {:ok, metrics} - Efficiency metrics

Examples

{:ok, analysis} = TantivyEx.SpaceAnalysis.analyze_index(analyzer, index, "test")
{:ok, metrics} = TantivyEx.SpaceAnalysis.get_efficiency_metrics(analysis)
# %{space_per_doc: 1024.5, compression_ratio: 0.75, deletion_ratio: 0.05}

get_recommendations(analysis_resource, analysis_id)

@spec get_recommendations(analysis_resource(), String.t()) ::
  {:ok, map()} | {:error, term()}

Get optimization recommendations based on analysis.

Analyzes space usage patterns and provides actionable recommendations for optimizing index storage.

Parameters

  • analysis_resource - The analysis resource
  • analysis_id - The analysis identifier

Returns

  • {:ok, recommendations} - List of optimization recommendations
  • {:error, reason} - If analysis fails

Recommendation Types

  • merge_segments - Reduce segment count through merging
  • optimize_deletes - Clean up deleted documents
  • field_optimization - Optimize field storage settings

Examples

{:ok, %{recommendations: recs}} = TantivyEx.SpaceAnalysis.get_recommendations(analyzer, "snapshot")

for rec <- recs do
  IO.puts("#{rec.priority}: #{rec.description}")
  IO.puts("Potential savings: #{rec.potential_savings_bytes} bytes")
end

new()

@spec new() :: {:ok, analysis_resource()} | {:error, term()}

Create a new space analysis resource.

Returns

  • {:ok, analysis_resource} - A new analysis resource
  • {:error, reason} - If creation fails

Examples

{:ok, analyzer} = TantivyEx.SpaceAnalysis.new()