
data-warehouse
Data warehouse design mastery with star schema, dimensional modeling, fact/dimension tables, slowly changing dimensions, and enterprise best practices. Complete schema examples included.
Data warehouse design mastery with star schema, dimensional modeling, fact/dimension tables, slowly changing dimensions, and enterprise best practices. Complete schema examples included.
Data Warehouse Design
Star Schema Basics
Fact Table Design
-- Star schema with sales fact table
CREATE TABLE fact_sales (
sales_id BIGINT PRIMARY KEY,
date_id INT NOT NULL,
customer_id INT NOT NULL,
product_id INT NOT NULL,
store_id INT NOT NULL,
quantity INT NOT NULL,
unit_price DECIMAL(10, 2),
sale_amount DECIMAL(12, 2),
discount_amount DECIMAL(12, 2),
net_sales DECIMAL(12, 2),
tax_amount DECIMAL(12, 2),
total_sale DECIMAL(12, 2),
-- Foreign keys
FOREIGN KEY (date_id) REFERENCES dim_date(date_id),
FOREIGN KEY (customer_id) REFERENCES dim_customer(customer_id),
FOREIGN KEY (product_id) REFERENCES dim_product(product_id),
FOREIGN KEY (store_id) REFERENCES dim_store(store_id)
);
-- Create indexes on foreign keys for query performance
CREATE INDEX idx_fact_sales_date ON fact_sales(date_id);
CREATE INDEX idx_fact_sales_customer ON fact_sales(customer_id);
CREATE INDEX idx_fact_sales_product ON fact_sales(product_id);
CREATE INDEX idx_fact_sales_store ON fact_sales(store_id);
Dimension Table Design
-- Date dimension (conformed dimension - used across multiple facts)
CREATE TABLE dim_date (
date_id INT PRIMARY KEY,
full_date DATE UNIQUE,
day_of_week INT,
day_of_week_name VARCHAR(10),
day_of_month INT,
week_of_year INT,
month_number INT,
month_name VARCHAR(12),
quarter INT,
year INT,
fiscal_quarter INT,
fiscal_year INT,
is_holiday BOOLEAN,
is_weekend BOOLEAN,
is_weekday BOOLEAN
);
-- Customer dimension
CREATE TABLE dim_customer (
customer_id INT PRIMARY KEY,
customer_code VARCHAR(20),
first_name VARCHAR(50),
last_name VARCHAR(50),
email VARCHAR(100),
phone VARCHAR(20),
gender VARCHAR(10),
birth_date DATE,
-- Address hierarchy
street_address VARCHAR(100),
city VARCHAR(50),
state_province VARCHAR(50),
postal_code VARCHAR(10),
country VARCHAR(50),
region VARCHAR(50),
-- Customer segment
customer_segment VARCHAR(50),
customer_lifetime_value DECIMAL(12, 2),
-- Slowly changing dimension columns
effective_date DATE,
end_date DATE,
is_current BOOLEAN,
-- Audit columns
created_date DATE,
updated_date DATE
);
-- Product dimension
CREATE TABLE dim_product (
product_id INT PRIMARY KEY,
product_code VARCHAR(50),
product_name VARCHAR(200),
product_category VARCHAR(50),
product_subcategory VARCHAR(50),
product_line VARCHAR(50),
supplier_id INT,
brand VARCHAR(50),
model VARCHAR(100),
color VARCHAR(30),
size VARCHAR(10),
unit_cost DECIMAL(10, 2),
list_price DECIMAL(10, 2),
cost_to_list_ratio DECIMAL(5, 4),
product_status VARCHAR(20),
effective_date DATE,
end_date DATE,
is_current BOOLEAN
);
Slowly Changing Dimensions (SCD)
Type 1: Overwrite
-- Simply update the existing record
UPDATE dim_customer
SET
email = 'new_email@example.com',
phone = '555-9999',
updated_date = CURRENT_DATE
WHERE customer_id = 1;
Type 2: Add New Row
-- Close old row
UPDATE dim_customer
SET
is_current = FALSE,
end_date = CURRENT_DATE - INTERVAL '1 day'
WHERE customer_id = 1 AND is_current = TRUE;
-- Insert new row
INSERT INTO dim_customer
VALUES (
customer_id,
customer_code,
new_values...,
CURRENT_DATE, -- effective_date
NULL, -- end_date
TRUE, -- is_current
CURRENT_DATE -- created_date
);
Type 3: Add New Column
-- Add previous value columns
ALTER TABLE dim_customer ADD COLUMN previous_city VARCHAR(50);
ALTER TABLE dim_customer ADD COLUMN previous_city_start_date DATE;
-- Update previous columns when changing current
UPDATE dim_customer
SET
previous_city = city,
previous_city_start_date = CURRENT_DATE,
city = 'New York'
WHERE customer_id = 1;
Conformed Dimensions
-- Single dim_date used across all fact tables
SELECT
f.sales_id,
f.quantity * f.unit_price as revenue,
d.month_name,
d.year
FROM fact_sales f
JOIN dim_date d ON f.date_id = d.date_id;
-- Reuse dim_customer in multiple facts
SELECT
fs.sales_id,
fc.call_id,
c.customer_segment
FROM fact_sales fs
JOIN dim_customer c ON fs.customer_id = c.customer_id
LEFT JOIN fact_customer_calls fc ON c.customer_id = fc.customer_id;
Aggregate Tables (Materialized Views)
-- Pre-calculate common aggregations for performance
CREATE MATERIALIZED VIEW sales_summary_daily AS
SELECT
d.full_date,
d.month_name,
d.year,
p.product_category,
c.customer_segment,
COUNT(DISTINCT fs.sales_id) as transaction_count,
SUM(fs.quantity) as total_quantity,
ROUND(SUM(fs.net_sales), 2) as total_sales,
ROUND(AVG(fs.net_sales), 2) as avg_sale,
COUNT(DISTINCT fs.customer_id) as unique_customers
FROM fact_sales fs
JOIN dim_date d ON fs.date_id = d.date_id
JOIN dim_product p ON fs.product_id = p.product_id
JOIN dim_customer c ON fs.customer_id = c.customer_id
GROUP BY d.full_date, d.month_name, d.year, p.product_category, c.customer_segment;
-- Refresh materialized view
REFRESH MATERIALIZED VIEW sales_summary_daily;
-- Query aggregate table instead of fact table
SELECT
month_name,
product_category,
SUM(total_sales) as monthly_sales
FROM sales_summary_daily
WHERE year = 2024
GROUP BY month_name, product_category;
Bridge Tables (Many-to-Many)
-- For many-to-many relationships (e.g., product to categories)
CREATE TABLE bridge_product_category (
product_id INT,
category_id INT,
PRIMARY KEY (product_id, category_id),
FOREIGN KEY (product_id) REFERENCES dim_product(product_id),
FOREIGN KEY (category_id) REFERENCES dim_category(category_id)
);
-- Query with bridge table
SELECT
p.product_name,
STRING_AGG(DISTINCT c.category_name, ', ') as categories,
COUNT(DISTINCT bc.category_id) as category_count
FROM dim_product p
LEFT JOIN bridge_product_category bc ON p.product_id = bc.product_id
LEFT JOIN dim_category c ON bc.category_id = c.category_id
GROUP BY p.product_id, p.product_name;
Data Quality Metrics
-- Monitor fact table metrics
SELECT
COUNT(*) as total_records,
COUNT(DISTINCT customer_id) as unique_customers,
COUNT(DISTINCT product_id) as unique_products,
MIN(sale_amount) as min_sale,
MAX(sale_amount) as max_sale,
ROUND(AVG(sale_amount), 2) as avg_sale,
COUNT(CASE WHEN sale_amount < 0 THEN 1 END) as negative_sales,
COUNT(CASE WHEN sale_amount IS NULL THEN 1 END) as null_sales,
MAX(load_timestamp) as last_load_time
FROM fact_sales;
-- Dimension quality checks
SELECT
'dim_customer' as dimension,
COUNT(*) as total_records,
COUNT(DISTINCT customer_id) as distinct_ids,
COUNT(CASE WHEN first_name IS NULL THEN 1 END) as null_first_names,
COUNT(CASE WHEN is_current = TRUE THEN 1 END) as current_records
FROM dim_customer;
Next Steps
Learn ETL/ELT pipeline design and data transformation patterns in the etl-pipelines skill.
You Might Also Like
Related Skills

zig-system-calls
Guides using bun.sys for system calls and file I/O in Zig. Use when implementing file operations instead of std.fs or std.posix.
oven-sh
bun-file-io
Use this when you are working on file operations like reading, writing, scanning, or deleting files. It summarizes the preferred file APIs and patterns used in this repo. It also notes when to use filesystem helpers for directories.
anomalyco
vector-index-tuning
Optimize vector index performance for latency, recall, and memory. Use when tuning HNSW parameters, selecting quantization strategies, or scaling vector search infrastructure.
wshobson
similarity-search-patterns
Implement efficient similarity search with vector databases. Use when building semantic search, implementing nearest neighbor queries, or optimizing retrieval performance.
wshobson
dbt-transformation-patterns
Master dbt (data build tool) for analytics engineering with model organization, testing, documentation, and incremental strategies. Use when building data transformations, creating data models, or implementing analytics engineering best practices.
wshobson
event-store-design
Design and implement event stores for event-sourced systems. Use when building event sourcing infrastructure, choosing event store technologies, or implementing event persistence patterns.
wshobson