This commit is contained in:
Neo
2026-02-04 21:39:01 +08:00
parent ee773a9b52
commit a3f4d04335
148 changed files with 31455 additions and 182 deletions

90
tmp/check_scd2_tables.py Normal file
View File

@@ -0,0 +1,90 @@
# -*- coding: utf-8 -*-
"""检查 DWD 维度表 SCD2 配置"""
import os
import sys
from pathlib import Path
project_root = Path(__file__).parent.parent / "etl_billiards"
sys.path.insert(0, str(project_root))
from dotenv import load_dotenv
load_dotenv(project_root / ".env")
from database.connection import DatabaseConnection
dsn = os.getenv("PG_DSN")
db = DatabaseConnection(dsn)
print("=" * 70)
print("DWD Dimension Tables - SCD2 Analysis")
print("=" * 70)
# 获取所有维度表
tables = db.query("""
SELECT table_name
FROM information_schema.tables
WHERE table_schema = 'billiards_dwd'
AND table_name LIKE 'dim_%'
ORDER BY table_name
""")
scd_cols = {"scd2_start_time", "scd2_end_time", "scd2_is_current", "scd2_version"}
scd2_tables = []
type1_tables = []
for t in tables:
tbl = t["table_name"]
cols = db.query("""
SELECT column_name FROM information_schema.columns
WHERE table_schema = 'billiards_dwd' AND table_name = %s
""", (tbl,))
col_names = {c["column_name"].lower() for c in cols}
has_scd = col_names & scd_cols
if has_scd:
scd2_tables.append((tbl, has_scd))
else:
type1_tables.append(tbl)
print("\n[SCD2 Tables - History Tracking]")
print("-" * 50)
if scd2_tables:
for tbl, cols in scd2_tables:
print(f" {tbl}")
print(f" SCD2 cols: {', '.join(sorted(cols))}")
else:
print(" (none)")
print(f"\n[Type1 Tables - Direct Overwrite] ({len(type1_tables)} tables)")
print("-" * 50)
for tbl in type1_tables:
print(f" {tbl}")
print("\n" + "=" * 70)
print("Processing Logic")
print("=" * 70)
print("""
Code path in dwd_load_task.py:
if table.startswith('dim_'):
_merge_dim()
|
+-- if has SCD2 columns:
| _merge_dim_scd2()
| -> Compare data, close old version, insert new version
| -> Uses INSERT (no ON CONFLICT)
| -> SCD2 NOT affected by fact_upsert config
|
+-- else:
_merge_dim_type1_upsert()
-> Uses ON CONFLICT DO UPDATE
-> Direct overwrite (Type1)
else:
_load_fact_generic()
-> Uses ON CONFLICT DO UPDATE (if fact_upsert=true)
CONCLUSION: SCD2 logic is INDEPENDENT, NOT affected by conflict mode settings.
""")
db.close()