Complex Join Patterns Guide
This guide provides comprehensive examples and patterns for implementing complex joins using Selecto's advanced join infrastructure.
table-of-contents
Table of Contents
- Star Schema Dimensions
- Hierarchical Joins
- Many-to-Many Tagging
- Self-Referencing Hierarchies
- Mixed Join Patterns
- Performance Considerations
- Advanced CTE Integration
star-schema-dimensions
Star Schema Dimensions
Star schema dimensions are optimized for analytical queries, providing denormalized access to dimension data with automatic display field handling.
basic-star-dimension-setup
Basic Star Dimension Setup
# Domain configuration for star schema
star_domain = %{
name: "Analytics Star Schema",
source: %{
source_table: "sales_facts",
primary_key: :id,
fields: [:id, :sale_amount, :customer_id, :product_id, :date_id],
columns: %{
id: %{type: :integer},
sale_amount: %{type: :decimal},
customer_id: %{type: :integer},
product_id: %{type: :integer},
date_id: %{type: :integer}
},
associations: %{
customer: %{queryable: :customers, field: :customer, owner_key: :customer_id, related_key: :id},
product: %{queryable: :products, field: :product, owner_key: :product_id, related_key: :id},
date: %{queryable: :dates, field: :date, owner_key: :date_id, related_key: :id}
}
},
schemas: %{
customers: %{
name: "Customer Dimension",
source_table: "customer_dim",
fields: [:id, :name, :segment, :region, :country],
columns: %{
id: %{type: :integer},
name: %{type: :string},
segment: %{type: :string},
region: %{type: :string},
country: %{type: :string}
}
},
products: %{
name: "Product Dimension",
source_table: "product_dim",
fields: [:id, :name, :category, :subcategory, :brand],
columns: %{
id: %{type: :integer},
name: %{type: :string},
category: %{type: :string},
subcategory: %{type: :string},
brand: %{type: :string}
}
},
dates: %{
name: "Date Dimension",
source_table: "date_dim",
fields: [:id, :date_value, :year, :quarter, :month, :day_name],
columns: %{
id: %{type: :integer},
date_value: %{type: :date},
year: %{type: :integer},
quarter: %{type: :integer},
month: %{type: :integer},
day_name: %{type: :string}
}
}
},
joins: %{
# Star dimensions with automatic display field resolution
customer: %{type: :star_dimension, display_field: :name},
product: %{type: :star_dimension, display_field: :name},
date: %{type: :star_dimension, display_field: :date_value}
}
}
# Configure and query
selecto = Selecto.configure(star_domain, conn)
# Query with star dimension fields
sales_by_region = selecto
|> Selecto.select([
"customer_display", # Automatic display field (customer.name)
"customer.segment", # Dimension attribute
"customer.region", # Dimension attribute
"product_display", # Automatic display field (product.name)
"product.category", # Dimension attribute
"date.year", # Time dimension
{:func, "sum", ["sale_amount"]},
{:func, "count", ["*"]}
])
|> Selecto.filter([
{"date.year", 2024},
{"customer.segment", {:in, ["Premium", "Enterprise"]}},
{"product.category", {:not_eq, "Discontinued"}}
])
|> Selecto.group_by([
"customer.region",
"product.category",
"date.year"
])
|> Selecto.execute()
nested-star-dimensions
Nested Star Dimensions
# Extended star schema with snowflake dimension normalization
nested_star_domain = %{
# ... base configuration ...
schemas: %{
# ... existing schemas ...
regions: %{
name: "Region Dimension",
source_table: "region_dim",
fields: [:id, :name, :country_id],
columns: %{
id: %{type: :integer},
name: %{type: :string},
country_id: %{type: :integer}
},
associations: %{
country: %{queryable: :countries, field: :country, owner_key: :country_id, related_key: :id}
}
},
countries: %{
name: "Country Dimension",
source_table: "country_dim",
fields: [:id, :name, :continent],
columns: %{
id: %{type: :integer},
name: %{type: :string},
continent: %{type: :string}
}
}
},
joins: %{
customer: %{
type: :star_dimension,
display_field: :name,
joins: %{
# Snowflake pattern: dimension joins to other dimensions
region: %{
type: :star_dimension,
display_field: :name,
joins: %{
country: %{type: :star_dimension, display_field: :name}
}
}
}
}
}
}
# Query with nested star dimensions
geographic_analysis = selecto
|> Selecto.select([
"customer.region.country_display", # Nested dimension display
"customer.region.country.continent", # Deep dimension attribute
{:func, "sum", ["sale_amount"]},
{:func, "count", ["DISTINCT", "customer_id"]}
])
|> Selecto.filter([
{"customer.region.country.continent", {:in, ["North America", "Europe"]}},
{"sale_amount", {:gt, 1000}}
])
|> Selecto.group_by([
"customer.region.country_display",
"customer.region.country.continent"
])
|> Selecto.execute()
hierarchical-joins
Hierarchical Joins
Hierarchical joins handle tree-like data structures using either adjacency lists or materialized paths, with automatic CTE generation for traversal.
adjacency-list-hierarchies
Adjacency List Hierarchies
# Domain with adjacency list hierarchy
hierarchy_domain = %{
name: "Organizational Hierarchy",
source: %{
source_table: "employees",
primary_key: :id,
fields: [:id, :name, :position, :department_id, :manager_id],
columns: %{
id: %{type: :integer},
name: %{type: :string},
position: %{type: :string},
department_id: %{type: :integer},
manager_id: %{type: :integer}
},
associations: %{
manager: %{queryable: :employees, field: :manager, owner_key: :manager_id, related_key: :id},
department: %{queryable: :departments, field: :department, owner_key: :department_id, related_key: :id}
}
},
schemas: %{
departments: %{
name: "Department",
source_table: "departments",
fields: [:id, :name, :parent_id],
columns: %{
id: %{type: :integer},
name: %{type: :string},
parent_id: %{type: :integer}
}
}
},
joins: %{
# Self-referencing hierarchy (management chain)
manager: %{
type: :hierarchical,
hierarchy_type: :adjacency_list,
depth_limit: 6,
path_separator: " > "
},
# Department hierarchy
department: %{
type: :hierarchical,
hierarchy_type: :adjacency_list,
depth_limit: 4
}
}
}
selecto = Selecto.configure(hierarchy_domain, conn)
# Query management hierarchy
management_analysis = selecto
|> Selecto.select([
"name",
"position",
"manager_path", # Full path to CEO: "CEO > VP > Director > Manager"
"manager_level", # Depth in hierarchy (0 = CEO, 1 = VP, etc.)
"manager_path_array", # Array of manager IDs in path
"department.name",
"department_path", # Department hierarchy path
"department_level" # Department depth
])
|> Selecto.filter([
{"manager_level", {:between, 2, 4}}, # Middle management layers
{"department_level", {:lte, 2}}, # Top 2 department levels
{"position", {:like, "%Manager%"}}
])
|> Selecto.order_by([
"department_level",
"manager_level",
"name"
])
|> Selecto.execute()
materialized-path-hierarchies
Materialized Path Hierarchies
# Domain with materialized path hierarchy (more efficient for deep trees)
path_hierarchy_domain = %{
name: "Content Categories",
source: %{
source_table: "articles",
primary_key: :id,
fields: [:id, :title, :content, :category_id],
columns: %{
id: %{type: :integer},
title: %{type: :string},
content: %{type: :text},
category_id: %{type: :integer}
},
associations: %{
category: %{queryable: :categories, field: :category, owner_key: :category_id, related_key: :id}
}
},
schemas: %{
categories: %{
name: "Category",
source_table: "categories",
fields: [:id, :name, :path, :parent_id],
columns: %{
id: %{type: :integer},
name: %{type: :string},
path: %{type: :string}, # e.g., "/technology/web-dev/frontend"
parent_id: %{type: :integer}
}
}
},
joins: %{
category: %{
type: :hierarchical,
hierarchy_type: :materialized_path,
path_field: :path,
path_separator: "/",
depth_limit: 8
}
}
}
selecto = Selecto.configure(path_hierarchy_domain, conn)
# Query with path-based hierarchy
category_analysis = selecto
|> Selecto.select([
"title",
"category.name",
"category_path", # Full category path
"category_level", # Depth (calculated from path)
"category_ancestors", # Array of ancestor category names
{:func, "count", ["*"]}
])
|> Selecto.filter([
# Find all articles in "Technology" branch (any level)
{"category_path", {:like, "%/technology/%"}},
# Or specific depth
{"category_level", {:eq, 3}}
])
|> Selecto.group_by([
"category.name",
"category_path",
"category_level"
])
|> Selecto.execute()
many-to-many-tagging
Many-to-Many Tagging
Many-to-many tagging joins provide faceted filtering and tag aggregation for flexible categorization systems.
basic-tagging-setup
Basic Tagging Setup
# Domain with many-to-many tagging
tagging_domain = %{
name: "Content Tagging System",
source: %{
source_table: "articles",
primary_key: :id,
fields: [:id, :title, :content, :author_id, :published_at],
columns: %{
id: %{type: :integer},
title: %{type: :string},
content: %{type: :text},
author_id: %{type: :integer},
published_at: %{type: :utc_datetime}
},
associations: %{
author: %{queryable: :users, field: :author, owner_key: :author_id, related_key: :id},
tags: %{queryable: :article_tags, field: :tags, owner_key: :id, related_key: :article_id}
}
},
schemas: %{
users: %{
name: "User",
source_table: "users",
fields: [:id, :name, :email],
columns: %{
id: %{type: :integer},
name: %{type: :string},
email: %{type: :string}
}
},
article_tags: %{
name: "Article Tag",
source_table: "article_tags",
fields: [:id, :name, :article_id, :weight],
columns: %{
id: %{type: :integer},
name: %{type: :string},
article_id: %{type: :integer},
weight: %{type: :integer} # Tag relevance weight
}
}
},
joins: %{
author: %{type: :left, display_field: :name},
tags: %{
type: :tagging,
tag_field: :name,
weight_field: :weight, # Optional: for weighted tagging
aggregation: :string_agg # How to combine multiple tags
}
}
}
selecto = Selecto.configure(tagging_domain, conn)
# Query with tag filtering and aggregation
tagged_content = selecto
|> Selecto.select([
"title",
"author_display",
"tags_list", # Aggregated tag string: "tech,web,frontend"
"tags_count", # Number of tags
"tags_weight_sum", # Sum of tag weights
"published_at"
])
|> Selecto.filter([
# Faceted tag filtering
{"tags_filter", "technology"}, # Has "technology" tag
{"tags_any", ["web", "mobile"]}, # Has any of these tags
{"tags_all", ["frontend", "react"]}, # Has all of these tags
{"tags_weight_min", 5}, # Minimum tag weight
{"published_at", {:gte, ~D[2024-01-01]}}
])
|> Selecto.order_by([
{:desc, "tags_weight_sum"}, # Order by tag relevance
{:desc, "published_at"}
])
|> Selecto.execute()
advanced-tagging-patterns
Advanced Tagging Patterns
# Multi-dimensional tagging (categories + user tags)
multi_tag_domain = %{
# ... base configuration ...
associations: %{
# ... existing associations ...
user_tags: %{queryable: :user_article_tags, field: :user_tags, owner_key: :id, related_key: :article_id},
system_tags: %{queryable: :system_tags, field: :system_tags, owner_key: :id, related_key: :article_id}
},
schemas: %{
# ... existing schemas ...
user_article_tags: %{
name: "User Tag",
source_table: "user_article_tags",
fields: [:id, :name, :article_id, :user_id, :created_at],
columns: %{
id: %{type: :integer},
name: %{type: :string},
article_id: %{type: :integer},
user_id: %{type: :integer},
created_at: %{type: :utc_datetime}
}
},
system_tags: %{
name: "System Tag",
source_table: "system_tags",
fields: [:id, :name, :article_id, :confidence],
columns: %{
id: %{type: :integer},
name: %{type: :string},
article_id: %{type: :integer},
confidence: %{type: :float} # AI-generated tag confidence
}
}
},
joins: %{
# ... existing joins ...
user_tags: %{
type: :tagging,
tag_field: :name,
aggregation: :array_agg # Keep as array for complex filtering
},
system_tags: %{
type: :tagging,
tag_field: :name,
weight_field: :confidence,
min_weight: 0.7 # Only high-confidence system tags
}
}
}
# Query with multiple tag dimensions
multi_dimensional_tags = selecto
|> Selecto.select([
"title",
"user_tags_array", # User-generated tags as array
"system_tags_list", # AI tags as string (high confidence only)
"system_tags_avg_confidence", # Average AI confidence
{:func, "count", ["DISTINCT", "user_tags.user_id"]} # Unique taggers
])
|> Selecto.filter([
{"user_tags_count", {:gte, 3}}, # At least 3 user tags
{"system_tags_filter", "programming"}, # AI detected "programming"
{"system_tags_avg_confidence", {:gte, 0.8}} # High AI confidence
])
|> Selecto.group_by(["id", "title"])
|> Selecto.execute()
self-referencing-hierarchies
Self-Referencing Hierarchies
Self-referencing hierarchies handle cases where entities reference other entities of the same type, such as management structures or threaded comments.
comment-threading
Comment Threading
# Domain for threaded comments
comment_domain = %{
name: "Threaded Comments",
source: %{
source_table: "comments",
primary_key: :id,
fields: [:id, :content, :author_id, :parent_id, :article_id, :created_at],
columns: %{
id: %{type: :integer},
content: %{type: :text},
author_id: %{type: :integer},
parent_id: %{type: :integer}, # Self-reference
article_id: %{type: :integer},
created_at: %{type: :utc_datetime}
},
associations: %{
parent: %{queryable: :comments, field: :parent, owner_key: :parent_id, related_key: :id},
author: %{queryable: :users, field: :author, owner_key: :author_id, related_key: :id},
article: %{queryable: :articles, field: :article, owner_key: :article_id, related_key: :id}
}
},
# ... schemas for users, articles ...
joins: %{
parent: %{
type: :hierarchical,
hierarchy_type: :adjacency_list,
depth_limit: 10, # Limit thread depth
self_reference: true
},
author: %{type: :left, display_field: :name},
article: %{type: :left, display_field: :title}
}
}
selecto = Selecto.configure(comment_domain, conn)
# Query comment threads with hierarchy
comment_threads = selecto
|> Selecto.select([
"content",
"author_display",
"parent_path", # Path to root comment
"parent_level", # Thread depth (0 = root, 1 = reply, etc.)
"thread_root_id", # ID of root comment in thread
"created_at"
])
|> Selecto.filter([
{"article_id", {:eq, 123}},
{"parent_level", {:lte, 5}} # Limit thread depth for display
])
|> Selecto.order_by([
"thread_root_id", # Group by thread
"parent_level", # Order by depth
"created_at" # Then by time
])
|> Selecto.execute()
organizational-reporting
Organizational Reporting
# Query direct reports and team sizes
org_structure = selecto
|> Selecto.select([
"name",
"position",
"manager_display", # Direct manager name
"manager_level", # Levels from CEO
{:func, "count", ["subordinates.id"]}, # Direct reports count
"team_size_total", # Total team size (all levels down)
"manager_path" # Full reporting chain to CEO
])
|> Selecto.filter([
{"manager_level", {:between, 1, 4}}, # Skip CEO, limit depth
{"active", true}
])
|> Selecto.group_by([
"id", "name", "position",
"manager_display", "manager_level"
])
|> Selecto.order_by([
"manager_level",
{:desc, {:func, "count", ["subordinates.id"]}}
])
|> Selecto.execute()
mixed-join-patterns
Mixed Join Patterns
Complex domains often combine multiple join patterns for comprehensive data relationships.
e-commerce-with-mixed-patterns
E-commerce with Mixed Patterns
# Domain combining star dimensions, hierarchies, and tagging
ecommerce_mixed = %{
name: "E-commerce Analytics",
source: %{
source_table: "orders",
primary_key: :id,
fields: [:id, :total, :customer_id, :created_at, :status],
# ... columns ...
associations: %{
customer: %{queryable: :customers, field: :customer, owner_key: :customer_id, related_key: :id},
items: %{queryable: :order_items, field: :items, owner_key: :id, related_key: :order_id}
}
},
schemas: %{
customers: %{
# ... customer schema with region association ...
},
order_items: %{
# ... order item schema with product association ...
},
products: %{
# ... product schema with category and tags associations ...
},
categories: %{
# ... hierarchical categories with path field ...
}
},
joins: %{
# Star dimension for customer analytics
customer: %{
type: :star_dimension,
display_field: :name,
joins: %{
region: %{type: :star_dimension, display_field: :name}
}
},
# Left join to order items with nested patterns
items: %{
type: :left,
joins: %{
# Star dimension for product analytics
product: %{
type: :star_dimension,
display_field: :name,
joins: %{
# Hierarchical category structure
category: %{
type: :hierarchical,
hierarchy_type: :materialized_path,
path_field: :path,
path_separator: "/"
},
# Many-to-many product tags
tags: %{type: :tagging, tag_field: :name}
}
}
}
}
}
}
selecto = Selecto.configure(ecommerce_mixed, conn)
# Complex query using all join patterns
comprehensive_analysis = selecto
|> Selecto.select([
# Star dimension fields
"customer_display",
"customer.region_display",
# Hierarchical category analysis
"items.product.category_path",
"items.product.category_level",
# Tagging analysis
"items.product.tags_list",
"items.product.tags_count",
# Aggregated measures
{:func, "sum", ["total"]},
{:func, "avg", ["items.quantity"]},
{:func, "count", ["DISTINCT", "items.product_id"]}
])
|> Selecto.filter([
# Star dimension filters
{"customer.region.name", {:in, ["North America", "Europe"]}},
# Hierarchical filters
{"items.product.category_level", {:lte, 3}},
{"items.product.category_path", {:like, "%electronics%"}},
# Tagging filters
{"items.product.tags_filter", "premium"},
{"items.product.tags_count", {:gte, 2}},
# Standard filters
{"status", "completed"},
{"created_at", {:gte, ~D[2024-01-01]}}
])
|> Selecto.group_by([
"customer.region_display",
"items.product.category_path",
"items.product.category_level"
])
|> Selecto.order_by([
{:desc, {:func, "sum", ["total"]}},
"items.product.category_level"
])
|> Selecto.execute()
performance-considerations
Performance Considerations
join-ordering-optimization
Join Ordering Optimization
# Optimal join order for performance
def optimize_join_order(selecto) do
selecto
# 1. Apply most selective filters first (reduces dataset early)
|> Selecto.filter([
{"created_at", {:between, ~D[2024-01-01], ~D[2024-03-31]}}, # Date range
{"status", "active"}, # Selective filter
{"customer.segment", "enterprise"} # Dimension filter
])
# 2. Select specific fields (avoid SELECT *)
|> Selecto.select([
"customer_display",
"items.product.category_path",
{:func, "sum", ["total"]}
])
# 3. Group by dimension fields only
|> Selecto.group_by(["customer_display", "items.product.category_path"])
# 4. Order by aggregated values for efficient top-N
|> Selecto.order_by([{:desc, {:func, "sum", ["total"]}}])
|> Selecto.limit(100)
end
index-recommendations
Index Recommendations
-- Recommended indexes for join patterns
-- Star dimension joins
CREATE INDEX idx_orders_customer_date ON orders(customer_id, created_at);
CREATE INDEX idx_customers_segment_region ON customers(segment, region_id);
-- Hierarchical joins
CREATE INDEX idx_categories_parent_path ON categories(parent_id, path);
CREATE INDEX idx_employees_manager_level ON employees(manager_id) WHERE active = true;
-- Tagging joins
CREATE INDEX idx_product_tags_product_name ON product_tags(product_id, name);
CREATE INDEX idx_article_tags_name_weight ON article_tags(name, weight) WHERE weight >= 0.5;
-- Composite indexes for common filter combinations
CREATE INDEX idx_orders_status_date_customer ON orders(status, created_at, customer_id);
advanced-cte-integration
Advanced CTE Integration
custom-cte-with-join-patterns
Custom CTE with Join Patterns
alias Selecto.Advanced.CTE
alias Selecto.Builder.CteSql
# Build custom CTE that leverages join patterns
def build_customer_segment_analysis(conn) do
# Base selecto with join patterns
base_selecto = Selecto.configure(ecommerce_mixed, conn)
|> Selecto.select([
"customer_id",
"customer.segment",
"customer.region_display",
"items.product.category_level",
{:func, "sum", ["total"]},
{:func, "count", ["*"]}
])
|> Selecto.filter([
{"created_at", {:gte, ~D[2024-01-01]}},
{"status", "completed"}
])
|> Selecto.group_by([
"customer_id",
"customer.segment",
"customer.region_display",
"items.product.category_level"
])
# Convert to CTE spec
customer_cte = CTE.create_cte("customer_analysis", fn -> base_selecto end)
# Main query using the CTE
main_query = [
"SELECT ",
"customer_segment, region_display, category_level, ",
"SUM(total_sales) as segment_sales, ",
"AVG(order_count) as avg_orders_per_customer, ",
"COUNT(DISTINCT customer_id) as customer_count ",
"FROM customer_analysis ",
"GROUP BY customer_segment, region_display, category_level ",
"ORDER BY segment_sales DESC"
]
{complete_query, _combined_params} = CteSql.integrate_ctes_with_query(
[customer_cte],
main_query,
[]
)
{sql, final_params} = Selecto.SQL.Params.finalize(complete_query)
Postgrex.query(conn, sql, final_params)
endThis comprehensive guide demonstrates how to effectively use Selecto's complex join patterns for real-world data analysis scenarios. Each pattern is optimized for specific use cases and can be combined to handle sophisticated data relationships.