# -*- coding: utf-8 -*- """检查 DWD 维度表 SCD2 配置""" import os import sys from pathlib import Path project_root = Path(__file__).parent.parent / "etl_billiards" sys.path.insert(0, str(project_root)) from dotenv import load_dotenv load_dotenv(project_root / ".env") from database.connection import DatabaseConnection dsn = os.getenv("PG_DSN") db = DatabaseConnection(dsn) print("=" * 70) print("DWD Dimension Tables - SCD2 Analysis") print("=" * 70) # 获取所有维度表 tables = db.query(""" SELECT table_name FROM information_schema.tables WHERE table_schema = 'billiards_dwd' AND table_name LIKE 'dim_%' ORDER BY table_name """) scd_cols = {"scd2_start_time", "scd2_end_time", "scd2_is_current", "scd2_version"} scd2_tables = [] type1_tables = [] for t in tables: tbl = t["table_name"] cols = db.query(""" SELECT column_name FROM information_schema.columns WHERE table_schema = 'billiards_dwd' AND table_name = %s """, (tbl,)) col_names = {c["column_name"].lower() for c in cols} has_scd = col_names & scd_cols if has_scd: scd2_tables.append((tbl, has_scd)) else: type1_tables.append(tbl) print("\n[SCD2 Tables - History Tracking]") print("-" * 50) if scd2_tables: for tbl, cols in scd2_tables: print(f" {tbl}") print(f" SCD2 cols: {', '.join(sorted(cols))}") else: print(" (none)") print(f"\n[Type1 Tables - Direct Overwrite] ({len(type1_tables)} tables)") print("-" * 50) for tbl in type1_tables: print(f" {tbl}") print("\n" + "=" * 70) print("Processing Logic") print("=" * 70) print(""" Code path in dwd_load_task.py: if table.startswith('dim_'): _merge_dim() | +-- if has SCD2 columns: | _merge_dim_scd2() | -> Compare data, close old version, insert new version | -> Uses INSERT (no ON CONFLICT) | -> SCD2 NOT affected by fact_upsert config | +-- else: _merge_dim_type1_upsert() -> Uses ON CONFLICT DO UPDATE -> Direct overwrite (Type1) else: _load_fact_generic() -> Uses ON CONFLICT DO UPDATE (if fact_upsert=true) CONCLUSION: SCD2 logic is INDEPENDENT, NOT affected by conflict mode settings. """) db.close()