Files
feiqiu-ETL/etl_billiards/scd/scd2_handler.py
2025-11-18 02:32:00 +08:00

93 lines
2.9 KiB
Python

# -*- coding: utf-8 -*-
"""SCD2 (Slowly Changing Dimension Type 2) 处理器"""
from datetime import datetime
class SCD2Handler:
"""SCD2历史记录处理器"""
def __init__(self, db_ops):
self.db = db_ops
def upsert(self, table_name: str, natural_key: list, tracked_fields: list,
record: dict, effective_date: datetime = None) -> str:
"""
处理SCD2更新
Args:
table_name: 表名
natural_key: 自然键字段列表
tracked_fields: 需要跟踪变化的字段列表
record: 记录数据
effective_date: 生效日期
Returns:
操作类型: 'INSERT', 'UPDATE', 'UNCHANGED'
"""
effective_date = effective_date or datetime.now()
# 查找当前有效记录
where_clause = " AND ".join([f"{k} = %({k})s" for k in natural_key])
sql_select = f"""
SELECT * FROM {table_name}
WHERE {where_clause}
AND valid_to IS NULL
"""
# 使用 db 的 connection
current = self.db.conn.cursor()
current.execute(sql_select, record)
existing = current.fetchone()
if not existing:
# 新记录:直接插入
record["valid_from"] = effective_date
record["valid_to"] = None
record["is_current"] = True
fields = list(record.keys())
placeholders = ", ".join([f"%({f})s" for f in fields])
sql_insert = f"""
INSERT INTO {table_name} ({', '.join(fields)})
VALUES ({placeholders})
"""
current.execute(sql_insert, record)
return 'INSERT'
# 检查是否有变化
has_changes = any(
existing.get(field) != record.get(field)
for field in tracked_fields
)
if not has_changes:
return 'UNCHANGED'
# 有变化:关闭旧记录,插入新记录
update_where = " AND ".join([f"{k} = %({k})s" for k in natural_key])
sql_close = f"""
UPDATE {table_name}
SET valid_to = %(effective_date)s,
is_current = FALSE
WHERE {update_where}
AND valid_to IS NULL
"""
record["effective_date"] = effective_date
current.execute(sql_close, record)
# 插入新记录
record["valid_from"] = effective_date
record["valid_to"] = None
record["is_current"] = True
fields = list(record.keys())
if "effective_date" in fields:
fields.remove("effective_date")
placeholders = ", ".join([f"%({f})s" for f in fields])
sql_insert = f"""
INSERT INTO {table_name} ({', '.join(fields)})
VALUES ({placeholders})
"""
current.execute(sql_insert, record)
return 'UPDATE'