93 lines
2.9 KiB
Python
93 lines
2.9 KiB
Python
# -*- coding: utf-8 -*-
|
|
"""SCD2 (Slowly Changing Dimension Type 2) 处理器"""
|
|
from datetime import datetime
|
|
|
|
class SCD2Handler:
|
|
"""SCD2历史记录处理器"""
|
|
|
|
def __init__(self, db_ops):
|
|
self.db = db_ops
|
|
|
|
def upsert(self, table_name: str, natural_key: list, tracked_fields: list,
|
|
record: dict, effective_date: datetime = None) -> str:
|
|
"""
|
|
处理SCD2更新
|
|
|
|
Args:
|
|
table_name: 表名
|
|
natural_key: 自然键字段列表
|
|
tracked_fields: 需要跟踪变化的字段列表
|
|
record: 记录数据
|
|
effective_date: 生效日期
|
|
|
|
Returns:
|
|
操作类型: 'INSERT', 'UPDATE', 'UNCHANGED'
|
|
"""
|
|
effective_date = effective_date or datetime.now()
|
|
|
|
# 查找当前有效记录
|
|
where_clause = " AND ".join([f"{k} = %({k})s" for k in natural_key])
|
|
sql_select = f"""
|
|
SELECT * FROM {table_name}
|
|
WHERE {where_clause}
|
|
AND valid_to IS NULL
|
|
"""
|
|
|
|
# 使用 db 的 connection
|
|
current = self.db.conn.cursor()
|
|
current.execute(sql_select, record)
|
|
existing = current.fetchone()
|
|
|
|
if not existing:
|
|
# 新记录:直接插入
|
|
record["valid_from"] = effective_date
|
|
record["valid_to"] = None
|
|
record["is_current"] = True
|
|
|
|
fields = list(record.keys())
|
|
placeholders = ", ".join([f"%({f})s" for f in fields])
|
|
sql_insert = f"""
|
|
INSERT INTO {table_name} ({', '.join(fields)})
|
|
VALUES ({placeholders})
|
|
"""
|
|
current.execute(sql_insert, record)
|
|
return 'INSERT'
|
|
|
|
# 检查是否有变化
|
|
has_changes = any(
|
|
existing.get(field) != record.get(field)
|
|
for field in tracked_fields
|
|
)
|
|
|
|
if not has_changes:
|
|
return 'UNCHANGED'
|
|
|
|
# 有变化:关闭旧记录,插入新记录
|
|
update_where = " AND ".join([f"{k} = %({k})s" for k in natural_key])
|
|
sql_close = f"""
|
|
UPDATE {table_name}
|
|
SET valid_to = %(effective_date)s,
|
|
is_current = FALSE
|
|
WHERE {update_where}
|
|
AND valid_to IS NULL
|
|
"""
|
|
record["effective_date"] = effective_date
|
|
current.execute(sql_close, record)
|
|
|
|
# 插入新记录
|
|
record["valid_from"] = effective_date
|
|
record["valid_to"] = None
|
|
record["is_current"] = True
|
|
|
|
fields = list(record.keys())
|
|
if "effective_date" in fields:
|
|
fields.remove("effective_date")
|
|
placeholders = ", ".join([f"%({f})s" for f in fields])
|
|
sql_insert = f"""
|
|
INSERT INTO {table_name} ({', '.join(fields)})
|
|
VALUES ({placeholders})
|
|
"""
|
|
current.execute(sql_insert, record)
|
|
|
|
return 'UPDATE'
|