Updata2
This commit is contained in:
90
tmp/check_scd2_tables.py
Normal file
90
tmp/check_scd2_tables.py
Normal file
@@ -0,0 +1,90 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""检查 DWD 维度表 SCD2 配置"""
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
project_root = Path(__file__).parent.parent / "etl_billiards"
|
||||
sys.path.insert(0, str(project_root))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(project_root / ".env")
|
||||
|
||||
from database.connection import DatabaseConnection
|
||||
|
||||
dsn = os.getenv("PG_DSN")
|
||||
db = DatabaseConnection(dsn)
|
||||
|
||||
print("=" * 70)
|
||||
print("DWD Dimension Tables - SCD2 Analysis")
|
||||
print("=" * 70)
|
||||
|
||||
# 获取所有维度表
|
||||
tables = db.query("""
|
||||
SELECT table_name
|
||||
FROM information_schema.tables
|
||||
WHERE table_schema = 'billiards_dwd'
|
||||
AND table_name LIKE 'dim_%'
|
||||
ORDER BY table_name
|
||||
""")
|
||||
|
||||
scd_cols = {"scd2_start_time", "scd2_end_time", "scd2_is_current", "scd2_version"}
|
||||
|
||||
scd2_tables = []
|
||||
type1_tables = []
|
||||
|
||||
for t in tables:
|
||||
tbl = t["table_name"]
|
||||
cols = db.query("""
|
||||
SELECT column_name FROM information_schema.columns
|
||||
WHERE table_schema = 'billiards_dwd' AND table_name = %s
|
||||
""", (tbl,))
|
||||
col_names = {c["column_name"].lower() for c in cols}
|
||||
|
||||
has_scd = col_names & scd_cols
|
||||
if has_scd:
|
||||
scd2_tables.append((tbl, has_scd))
|
||||
else:
|
||||
type1_tables.append(tbl)
|
||||
|
||||
print("\n[SCD2 Tables - History Tracking]")
|
||||
print("-" * 50)
|
||||
if scd2_tables:
|
||||
for tbl, cols in scd2_tables:
|
||||
print(f" {tbl}")
|
||||
print(f" SCD2 cols: {', '.join(sorted(cols))}")
|
||||
else:
|
||||
print(" (none)")
|
||||
|
||||
print(f"\n[Type1 Tables - Direct Overwrite] ({len(type1_tables)} tables)")
|
||||
print("-" * 50)
|
||||
for tbl in type1_tables:
|
||||
print(f" {tbl}")
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("Processing Logic")
|
||||
print("=" * 70)
|
||||
print("""
|
||||
Code path in dwd_load_task.py:
|
||||
|
||||
if table.startswith('dim_'):
|
||||
_merge_dim()
|
||||
|
|
||||
+-- if has SCD2 columns:
|
||||
| _merge_dim_scd2()
|
||||
| -> Compare data, close old version, insert new version
|
||||
| -> Uses INSERT (no ON CONFLICT)
|
||||
| -> SCD2 NOT affected by fact_upsert config
|
||||
|
|
||||
+-- else:
|
||||
_merge_dim_type1_upsert()
|
||||
-> Uses ON CONFLICT DO UPDATE
|
||||
-> Direct overwrite (Type1)
|
||||
else:
|
||||
_load_fact_generic()
|
||||
-> Uses ON CONFLICT DO UPDATE (if fact_upsert=true)
|
||||
|
||||
CONCLUSION: SCD2 logic is INDEPENDENT, NOT affected by conflict mode settings.
|
||||
""")
|
||||
|
||||
db.close()
|
||||
Reference in New Issue
Block a user