# -*- coding: utf-8 -*- """ 从 ODS 同步回填 DWD 缺失的列值 """ import os import sys from pathlib import Path project_root = Path(__file__).parent.parent / "etl_billiards" sys.path.insert(0, str(project_root)) from dotenv import load_dotenv load_dotenv(project_root / ".env") from database.connection import DatabaseConnection # DWD 回填配置: (dwd_table, ods_table, join_condition, [(dwd_col, ods_col), ...]) BACKFILL_CONFIGS = [ # dwd_settlement_head ( "billiards_dwd.dwd_settlement_head", "billiards_ods.settlement_records", "d.order_settle_id = o.id", [ ("pl_coupon_sale_amount", "plcouponsaleamount"), ("mervou_sales_amount", "mervousalesamount"), ("electricity_money", "electricitymoney"), ("real_electricity_money", "realelectricitymoney"), ("electricity_adjust_money", "electricityadjustmoney"), ] ), # dwd_recharge_order ( "billiards_dwd.dwd_recharge_order", "billiards_ods.recharge_settlements", "d.recharge_order_id = o.id", [ ("pl_coupon_sale_amount", "plcouponsaleamount"), ("mervou_sales_amount", "mervousalesamount"), ("electricity_money", "electricitymoney"), ("real_electricity_money", "realelectricitymoney"), ("electricity_adjust_money", "electricityadjustmoney"), ] ), # dwd_member_balance_change ( "billiards_dwd.dwd_member_balance_change", "billiards_ods.member_balance_changes", "d.balance_change_id = o.id", [ ("principal_before", "principal_before"), ("principal_after", "principal_after"), ("principal_change_amount", "principal_data"), ] ), # dim_member ( "billiards_dwd.dim_member", "billiards_ods.member_profiles", "d.member_id = o.id", [ ("pay_money_sum", "pay_money_sum"), ("recharge_money_sum", "recharge_money_sum"), ] ), # dim_member_ex ( "billiards_dwd.dim_member_ex", "billiards_ods.member_profiles", "d.member_id = o.id", [ ("person_tenant_org_id", "person_tenant_org_id"), ("person_tenant_org_name", "person_tenant_org_name"), ("register_source", "register_source"), ] ), # dim_member_card_account ( "billiards_dwd.dim_member_card_account", "billiards_ods.member_stored_value_cards", "d.member_card_id = o.id", [ ("principal_balance", "principal_balance"), ("member_grade", "member_grade"), ] ), # dim_member_card_account_ex ( "billiards_dwd.dim_member_card_account_ex", "billiards_ods.member_stored_value_cards", "d.member_card_id = o.id", [ ("able_share_member_discount", "able_share_member_discount"), ("electricity_deduct_radio", "electricity_deduct_radio"), ("electricity_discount", "electricity_discount"), ("electricity_card_deduct", "electricitycarddeduct"), ("recharge_freeze_balance", "rechargefreezebalance"), ] ), # dwd_table_fee_log ( "billiards_dwd.dwd_table_fee_log", "billiards_ods.table_fee_transactions", "d.table_fee_log_id = o.id", [ ("activity_discount_amount", "activity_discount_amount"), ("real_service_money", "real_service_money"), ] ), # dwd_table_fee_log_ex ( "billiards_dwd.dwd_table_fee_log_ex", "billiards_ods.table_fee_transactions", "d.table_fee_log_id = o.id", [ ("order_consumption_type", "order_consumption_type"), ] ), # dwd_assistant_service_log ( "billiards_dwd.dwd_assistant_service_log", "billiards_ods.assistant_service_records", "d.assistant_service_id = o.id", [ ("real_service_money", "real_service_money"), ] ), # dwd_assistant_service_log_ex ( "billiards_dwd.dwd_assistant_service_log_ex", "billiards_ods.assistant_service_records", "d.assistant_service_id = o.id", [ ("assistant_team_name", "assistantteamname"), ] ), # dwd_store_goods_sale ( "billiards_dwd.dwd_store_goods_sale", "billiards_ods.store_goods_sales_records", "d.store_goods_sale_id = o.id", [ ("coupon_share_money", "coupon_share_money"), ] ), # dwd_groupbuy_redemption ( "billiards_dwd.dwd_groupbuy_redemption", "billiards_ods.group_buy_redemption_records", "d.redemption_id = o.id", [ ("coupon_sale_id", "coupon_sale_id"), ("member_discount_money", "member_discount_money"), ] ), # dwd_groupbuy_redemption_ex ( "billiards_dwd.dwd_groupbuy_redemption_ex", "billiards_ods.group_buy_redemption_records", "d.redemption_id = o.id", [ ("assistant_share_money", "assistant_share_money"), ("table_share_money", "table_share_money"), ("goods_share_money", "goods_share_money"), ("recharge_share_money", "recharge_share_money"), ] ), # dim_table ( "billiards_dwd.dim_table", "billiards_ods.site_tables_master", "d.table_id = o.id", [ ("order_id", "order_id"), ] ), # dim_store_goods ( "billiards_dwd.dim_store_goods", "billiards_ods.store_goods_master", "d.site_goods_id = o.id", [ ("commodity_code", "commodity_code"), ("not_sale", "not_sale"), ] ), # dim_tenant_goods ( "billiards_dwd.dim_tenant_goods", "billiards_ods.tenant_goods_master", "d.tenant_goods_id = o.id", [ ("not_sale", "not_sale"), ] ), # dim_groupbuy_package ( "billiards_dwd.dim_groupbuy_package", "billiards_ods.group_buy_packages", "d.groupbuy_package_id = o.id", [ ("sort", "sort"), ("is_first_limit", "is_first_limit"), ] ), ] def column_exists(db, table: str, column: str) -> bool: schema, tbl = table.split(".") result = db.query(""" SELECT 1 FROM information_schema.columns WHERE table_schema = %s AND table_name = %s AND column_name = %s """, (schema, tbl, column.lower())) return bool(result) def main(): dsn = os.getenv("PG_DSN") if not dsn: print("Error: PG_DSN not set") return db = DatabaseConnection(dsn) print("=" * 70) print("DWD Backfill from ODS Script") print("=" * 70) total_updates = 0 errors = [] for dwd_table, ods_table, join_cond, columns in BACKFILL_CONFIGS: print(f"\n[{dwd_table}]") for dwd_col, ods_col in columns: # Check column exists in both tables if not column_exists(db, dwd_table, dwd_col): print(f" {dwd_col}: SKIP (DWD column not found)") continue if not column_exists(db, ods_table, ods_col): print(f" {dwd_col}: SKIP (ODS column {ods_col} not found)") continue # Build UPDATE SQL sql = f""" UPDATE {dwd_table} d SET "{dwd_col}" = o."{ods_col}" FROM {ods_table} o WHERE {join_cond} AND d."{dwd_col}" IS NULL AND o."{ods_col}" IS NOT NULL """ try: db.execute(sql) db.commit() # Count non-null count_sql = f'SELECT COUNT(*) as cnt FROM {dwd_table} WHERE "{dwd_col}" IS NOT NULL' cnt = db.query(count_sql)[0]["cnt"] print(f" {dwd_col}: OK (now {cnt} non-null)") total_updates += 1 except Exception as e: db.rollback() err_msg = str(e).split("\n")[0][:80] print(f" {dwd_col}: ERROR - {err_msg}") errors.append((dwd_table, dwd_col, err_msg)) print("\n" + "=" * 70) print(f"Completed: {total_updates} columns processed") if errors: print(f"Errors: {len(errors)}") for t, c, e in errors: print(f" - {t}.{c}: {e}") db.close() if __name__ == "__main__": main()