# -*- coding: utf-8 -*- """SCD2 (Slowly Changing Dimension Type 2) 处理器""" from datetime import datetime class SCD2Handler: """SCD2历史记录处理器""" def __init__(self, db_ops): self.db = db_ops def upsert(self, table_name: str, natural_key: list, tracked_fields: list, record: dict, effective_date: datetime = None) -> str: """ 处理SCD2更新 Args: table_name: 表名 natural_key: 自然键字段列表 tracked_fields: 需要跟踪变化的字段列表 record: 记录数据 effective_date: 生效日期 Returns: 操作类型: 'INSERT', 'UPDATE', 'UNCHANGED' """ effective_date = effective_date or datetime.now() # 查找当前有效记录 where_clause = " AND ".join([f"{k} = %({k})s" for k in natural_key]) sql_select = f""" SELECT * FROM {table_name} WHERE {where_clause} AND valid_to IS NULL """ # 使用 db 的 connection current = self.db.conn.cursor() current.execute(sql_select, record) existing = current.fetchone() if not existing: # 新记录:直接插入 record["valid_from"] = effective_date record["valid_to"] = None record["is_current"] = True fields = list(record.keys()) placeholders = ", ".join([f"%({f})s" for f in fields]) sql_insert = f""" INSERT INTO {table_name} ({', '.join(fields)}) VALUES ({placeholders}) """ current.execute(sql_insert, record) return 'INSERT' # 检查是否有变化 has_changes = any( existing.get(field) != record.get(field) for field in tracked_fields ) if not has_changes: return 'UNCHANGED' # 有变化:关闭旧记录,插入新记录 update_where = " AND ".join([f"{k} = %({k})s" for k in natural_key]) sql_close = f""" UPDATE {table_name} SET valid_to = %(effective_date)s, is_current = FALSE WHERE {update_where} AND valid_to IS NULL """ record["effective_date"] = effective_date current.execute(sql_close, record) # 插入新记录 record["valid_from"] = effective_date record["valid_to"] = None record["is_current"] = True fields = list(record.keys()) if "effective_date" in fields: fields.remove("effective_date") placeholders = ", ".join([f"%({f})s" for f in fields]) sql_insert = f""" INSERT INTO {table_name} ({', '.join(fields)}) VALUES ({placeholders}) """ current.execute(sql_insert, record) return 'UPDATE'