Working with Indexes
View SourceCDX (Compound Index) files provide fast B-tree based lookups for DBF files. Indexes dramatically improve search performance on large datasets by maintaining sorted key structures.
Understanding CDX Indexes
What are CDX Indexes?
CDX files are compound index files that store:
- B-tree structures for fast key lookups
- Multiple indexes in a single file
- Key expressions for complex indexing
- Sort order information for proper collation
Benefits of Using Indexes
- Fast Searches: O(log n) lookup time vs O(n) linear search
- Range Queries: Efficient range-based searches
- Sorted Access: Iterate through records in key order
- Multiple Keys: Support for multiple indexes per file
Basic Index Operations
Opening Index Files
# Open DBF and its associated CDX file
{:ok, dbf} = Xbase.Parser.open_dbf("customers.dbf")
{:ok, cdx} = Xbase.CdxParser.open_cdx("customers.cdx")
# Index files typically have the same name as DBF with .cdx extension
{:ok, cdx} = Xbase.CdxParser.open_cdx("customers.cdx")
Creating Index Files
# Create index on a field
{:ok, cdx} = Xbase.CdxParser.create_index("customers.cdx", %{
key_expression: "LAST_NAME",
key_length: 30,
index_name: "LASTNAME_IDX"
})
# Create compound index (multiple fields)
{:ok, cdx} = Xbase.CdxParser.create_index("customers.cdx", %{
key_expression: "LAST_NAME + FIRST_NAME",
key_length: 60,
index_name: "FULLNAME_IDX"
})
# Create expression-based index
{:ok, cdx} = Xbase.CdxParser.create_index("orders.cdx", %{
key_expression: "DTOS(ORDER_DATE) + STR(AMOUNT, 10, 2)",
key_length: 18,
index_name: "DATE_AMOUNT_IDX"
})
Search Operations
Exact Key Searches
# Search for exact key match
case Xbase.CdxParser.search_key(cdx, "SMITH") do
{:ok, %{record_number: record_num, found: true}} ->
# Key found, get the record
{:ok, record} = Xbase.Parser.read_record(dbf, record_num)
IO.inspect(record.data)
{:ok, %{found: false}} ->
IO.puts("Key not found")
{:error, reason} ->
IO.puts("Search error: #{inspect(reason)}")
end
Range Searches
# Find all records with keys between "A" and "M"
{:ok, results} = Xbase.CdxParser.search_range(cdx, "A", "M")
# Process matching records
Enum.each(results, fn %{record_number: record_num, key_value: key} ->
{:ok, record} = Xbase.Parser.read_record(dbf, record_num)
IO.puts("#{key}: #{record.data["FIRST_NAME"]} #{record.data["LAST_NAME"]}")
end)
Partial Key Searches
# Find all keys starting with "SMI"
{:ok, matches} = Xbase.CdxParser.search_partial(cdx, "SMI")
# Get first 10 matching records
matches
|> Enum.take(10)
|> Enum.each(fn %{record_number: record_num} ->
{:ok, record} = Xbase.Parser.read_record(dbf, record_num)
IO.inspect(record.data)
end)
Advanced Index Usage
Multiple Indexes
# Work with multiple indexes on the same file
{:ok, name_idx} = Xbase.CdxParser.open_index(cdx, "LASTNAME_IDX")
{:ok, date_idx} = Xbase.CdxParser.open_index(cdx, "BIRTHDATE_IDX")
{:ok, amount_idx} = Xbase.CdxParser.open_index(cdx, "AMOUNT_IDX")
# Search using different indexes
{:ok, by_name} = Xbase.CdxParser.search_key(name_idx, "JOHNSON")
{:ok, by_date} = Xbase.CdxParser.search_range(date_idx, "19900101", "19991231")
{:ok, by_amount} = Xbase.CdxParser.search_range(amount_idx, "1000.00", "9999.99")
Ordered Iteration
# Iterate through records in index order
{:ok, iterator} = Xbase.CdxParser.create_iterator(cdx)
# Process records in sorted order
Xbase.CdxParser.iterate_keys(iterator, fn key_info ->
{:ok, record} = Xbase.Parser.read_record(dbf, key_info.record_number)
process_record_in_order(record)
:continue # or :halt to stop
end)
Index Maintenance
# Rebuild index for consistency
{:ok, cdx} = Xbase.CdxParser.rebuild_index(cdx, dbf)
# Compact index to remove fragmentation
{:ok, compacted_cdx} = Xbase.CdxParser.compact_index(cdx, "customers_new.cdx")
# Verify index integrity
case Xbase.CdxParser.verify_index(cdx, dbf) do
{:ok, :valid} ->
IO.puts("Index is consistent with DBF file")
{:error, inconsistencies} ->
IO.puts("Index inconsistencies found: #{inspect(inconsistencies)}")
end
Performance Optimization
Index Selection Strategy
# Choose indexes based on query patterns
defmodule IndexStrategy do
def choose_index(query_type, available_indexes) do
case query_type do
{:exact_match, field} ->
find_best_index_for_field(available_indexes, field)
{:range_query, field} ->
find_sorted_index_for_field(available_indexes, field)
{:compound_search, fields} ->
find_compound_index(available_indexes, fields)
end
end
defp find_best_index_for_field(indexes, field) do
# Find index with field as primary key
Enum.find(indexes, fn idx ->
idx.key_expression == field or
String.starts_with?(idx.key_expression, field <> " + ")
end)
end
end
Caching Strategies
# Use ETS for index result caching
defmodule IndexCache do
def setup_cache do
:ets.new(:index_cache, [:set, :public, :named_table])
end
def cached_search(cdx, key) do
case :ets.lookup(:index_cache, key) do
[{^key, result}] ->
result
[] ->
result = Xbase.CdxParser.search_key(cdx, key)
:ets.insert(:index_cache, {key, result})
result
end
end
end
Index Types and Expressions
Simple Field Indexes
# Single field indexes
indexes = [
%{key_expression: "CUSTOMER_ID", key_length: 10}, # Numeric field
%{key_expression: "LAST_NAME", key_length: 30}, # Character field
%{key_expression: "ORDER_DATE", key_length: 8}, # Date field
%{key_expression: "IS_ACTIVE", key_length: 1} # Logical field
]
Compound Field Indexes
# Multiple field indexes for complex queries
compound_indexes = [
%{
key_expression: "STATE + CITY + LAST_NAME",
key_length: 65,
name: "GEOGRAPHIC_NAME"
},
%{
key_expression: "DTOS(ORDER_DATE) + CUSTOMER_ID",
key_length: 18,
name: "DATE_CUSTOMER"
}
]
Expression-Based Indexes
# Complex expression indexes
expression_indexes = [
%{
# Uppercase index for case-insensitive searches
key_expression: "UPPER(LAST_NAME)",
key_length: 30,
name: "UPPER_LASTNAME"
},
%{
# Combined date and amount for financial queries
key_expression: "DTOS(ORDER_DATE) + STR(AMOUNT, 12, 2)",
key_length: 20,
name: "DATE_AMOUNT"
},
%{
# Full name concatenation
key_expression: "TRIM(FIRST_NAME) + ' ' + TRIM(LAST_NAME)",
key_length: 61,
name: "FULL_NAME"
}
]
Common Index Patterns
Customer Database
# Typical indexes for a customer database
defmodule CustomerIndexes do
def create_standard_indexes(dbf_path) do
cdx_path = String.replace(dbf_path, ".dbf", ".cdx")
indexes = [
# Primary key
%{key_expression: "CUSTOMER_ID", key_length: 10, name: "PK_CUSTOMER"},
# Name searches
%{key_expression: "LAST_NAME", key_length: 30, name: "IDX_LASTNAME"},
%{key_expression: "LAST_NAME + FIRST_NAME", key_length: 60, name: "IDX_FULLNAME"},
# Geographic searches
%{key_expression: "STATE + CITY", key_length: 35, name: "IDX_LOCATION"},
# Contact searches
%{key_expression: "EMAIL", key_length: 50, name: "IDX_EMAIL"},
%{key_expression: "PHONE", key_length: 15, name: "IDX_PHONE"},
# Business logic
%{key_expression: "UPPER(COMPANY_NAME)", key_length: 50, name: "IDX_COMPANY"}
]
{:ok, cdx} = Xbase.CdxParser.create_multiple_indexes(cdx_path, indexes)
cdx
end
end
Sales Database
# Indexes optimized for sales queries
defmodule SalesIndexes do
def create_sales_indexes(dbf_path) do
cdx_path = String.replace(dbf_path, ".dbf", ".cdx")
indexes = [
# Time-based queries
%{key_expression: "DTOS(SALE_DATE)", key_length: 8, name: "IDX_DATE"},
%{key_expression: "DTOS(SALE_DATE) + CUSTOMER_ID", key_length: 18, name: "IDX_DATE_CUSTOMER"},
# Amount-based queries
%{key_expression: "STR(AMOUNT, 12, 2)", key_length: 12, name: "IDX_AMOUNT"},
%{key_expression: "PRODUCT_CODE + DTOS(SALE_DATE)", key_length: 18, name: "IDX_PRODUCT_DATE"},
# Status tracking
%{key_expression: "STATUS + DTOS(SALE_DATE)", key_length: 11, name: "IDX_STATUS_DATE"},
# Salesperson performance
%{key_expression: "SALESPERSON_ID + DTOS(SALE_DATE)", key_length: 18, name: "IDX_SALES_PERSON"}
]
{:ok, cdx} = Xbase.CdxParser.create_multiple_indexes(cdx_path, indexes)
cdx
end
end
Error Handling
Index-Specific Errors
defmodule IndexErrorHandler do
def safe_index_operation(operation_fn) do
try do
operation_fn.()
rescue
error ->
handle_index_error(error)
end
end
defp handle_index_error(error) do
case error do
%{reason: :index_corrupted} ->
{:error, "Index file is corrupted and needs rebuilding"}
%{reason: :key_too_long} ->
{:error, "Search key exceeds maximum index key length"}
%{reason: :index_not_found} ->
{:error, "Specified index does not exist in CDX file"}
%{reason: :unsupported_expression} ->
{:error, "Index expression contains unsupported functions"}
_ ->
{:error, "Unexpected index error: #{inspect(error)}"}
end
end
end
Recovery Procedures
defmodule IndexRecovery do
def recover_corrupted_index(cdx_path, dbf_path) do
with {:ok, dbf} <- Xbase.Parser.open_dbf(dbf_path),
{:ok, backup_path} <- create_backup(cdx_path),
{:ok, cdx} <- rebuild_from_dbf(cdx_path, dbf) do
case Xbase.CdxParser.verify_index(cdx, dbf) do
{:ok, :valid} ->
File.rm(backup_path)
{:ok, "Index successfully recovered"}
{:error, _} ->
File.rename(backup_path, cdx_path)
{:error, "Recovery failed, original index restored"}
end
end
end
defp create_backup(cdx_path) do
backup_path = cdx_path <> ".backup"
case File.copy(cdx_path, backup_path) do
{:ok, _} -> {:ok, backup_path}
error -> error
end
end
defp rebuild_from_dbf(cdx_path, dbf) do
# Extract index definitions from existing CDX or use defaults
index_definitions = extract_index_definitions(cdx_path)
Xbase.CdxParser.create_multiple_indexes(cdx_path, index_definitions)
end
end
Best Practices
1. Index Design Guidelines
# Good index design principles
defmodule IndexDesignGuidelines do
def design_indexes(table_analysis) do
[
# Index frequently searched fields
create_search_indexes(table_analysis.search_patterns),
# Index foreign key relationships
create_relationship_indexes(table_analysis.relationships),
# Index sort operations
create_sort_indexes(table_analysis.sort_patterns),
# Compound indexes for multi-field queries
create_compound_indexes(table_analysis.compound_queries)
]
|> List.flatten()
end
defp create_search_indexes(search_patterns) do
search_patterns
|> Enum.filter(fn pattern -> pattern.frequency > 0.1 end) # 10% threshold
|> Enum.map(fn pattern ->
%{
key_expression: pattern.field,
key_length: calculate_key_length(pattern.field),
name: "IDX_#{String.upcase(pattern.field)}"
}
end)
end
end
2. Maintenance Schedule
defmodule IndexMaintenance do
def schedule_maintenance(cdx_files) do
Enum.each(cdx_files, fn cdx_path ->
case analyze_index_health(cdx_path) do
{:needs_rebuild, reason} ->
schedule_rebuild(cdx_path, reason)
{:needs_compact, fragmentation} ->
schedule_compaction(cdx_path, fragmentation)
{:healthy, stats} ->
log_health_status(cdx_path, stats)
end
end)
end
defp analyze_index_health(cdx_path) do
with {:ok, cdx} <- Xbase.CdxParser.open_cdx(cdx_path),
{:ok, stats} <- Xbase.CdxParser.get_index_statistics(cdx) do
cond do
stats.corruption_detected -> {:needs_rebuild, :corruption}
stats.fragmentation_ratio > 0.4 -> {:needs_compact, stats.fragmentation_ratio}
true -> {:healthy, stats}
end
end
end
end
3. Performance Monitoring
defmodule IndexPerformanceMonitor do
def monitor_index_performance(cdx, operations) do
Enum.map(operations, fn operation ->
{time, result} = :timer.tc(fn -> execute_operation(cdx, operation) end)
%{
operation: operation,
execution_time_ms: time / 1000,
result: result,
performance_rating: rate_performance(time, operation)
}
end)
end
defp rate_performance(time_microseconds, operation) do
time_ms = time_microseconds / 1000
case operation.type do
:exact_search when time_ms < 1 -> :excellent
:exact_search when time_ms < 10 -> :good
:range_query when time_ms < 50 -> :excellent
:range_query when time_ms < 200 -> :good
_ -> :needs_optimization
end
end
end
Integration with DBF Operations
Coordinated File Operations
defmodule CoordinatedOperations do
def insert_with_index_update(dbf, cdx, record_data) do
Xbase.Parser.with_transaction(dbf, fn dbf ->
# Add record to DBF
{:ok, updated_dbf} = Xbase.Parser.append_record(dbf, record_data)
# Update all indexes
record_index = updated_dbf.header.record_count - 1
{:ok, updated_cdx} = update_all_indexes(cdx, record_data, record_index)
{:ok, {updated_dbf, updated_cdx}}
end)
end
def update_with_index_maintenance(dbf, cdx, record_index, new_data) do
# Get old record for index removal
{:ok, old_record} = Xbase.Parser.read_record(dbf, record_index)
Xbase.Parser.with_transaction(dbf, fn dbf ->
# Update DBF record
{:ok, updated_dbf} = Xbase.Parser.update_record(dbf, record_index, new_data)
# Remove old index entries
{:ok, cdx_after_removal} = remove_from_indexes(cdx, old_record.data, record_index)
# Add new index entries
{:ok, updated_cdx} = add_to_indexes(cdx_after_removal, new_data, record_index)
{:ok, {updated_dbf, updated_cdx}}
end)
end
defp update_all_indexes(cdx, record_data, record_index) do
# Implementation would iterate through all indexes in CDX
# and add entries for the new record
Xbase.CdxParser.add_record_to_all_indexes(cdx, record_data, record_index)
end
end
This comprehensive guide covers all aspects of working with CDX indexes in Xbase, from basic operations to advanced optimization strategies.