908 lines
42 KiB
Python
908 lines
42 KiB
Python
# -*- coding: utf-8 -*-
|
||
"""DWD 装载任务:从 ODS 增量写入 DWD(维度 SCD2,事实按时间增量)。"""
|
||
from __future__ import annotations
|
||
|
||
from datetime import datetime
|
||
from typing import Any, Dict, Iterable, List, Sequence
|
||
|
||
from psycopg2.extras import RealDictCursor
|
||
|
||
from .base_task import BaseTask, TaskContext
|
||
|
||
|
||
class DwdLoadTask(BaseTask):
|
||
"""负责 DWD 装载:维度表做 SCD2 合并,事实表按时间增量写入。"""
|
||
|
||
# DWD -> ODS 表映射(ODS 表名已与示例 JSON 前缀统一)
|
||
TABLE_MAP: dict[str, str] = {
|
||
# 维度
|
||
# 门店:改用台费流水中的 siteprofile 快照,补齐 org/地址等字段
|
||
"billiards_dwd.dim_site": "billiards_ods.table_fee_transactions",
|
||
"billiards_dwd.dim_site_ex": "billiards_ods.table_fee_transactions",
|
||
"billiards_dwd.dim_table": "billiards_ods.site_tables_master",
|
||
"billiards_dwd.dim_table_ex": "billiards_ods.site_tables_master",
|
||
"billiards_dwd.dim_assistant": "billiards_ods.assistant_accounts_master",
|
||
"billiards_dwd.dim_assistant_ex": "billiards_ods.assistant_accounts_master",
|
||
"billiards_dwd.dim_member": "billiards_ods.member_profiles",
|
||
"billiards_dwd.dim_member_ex": "billiards_ods.member_profiles",
|
||
"billiards_dwd.dim_member_card_account": "billiards_ods.member_stored_value_cards",
|
||
"billiards_dwd.dim_member_card_account_ex": "billiards_ods.member_stored_value_cards",
|
||
"billiards_dwd.dim_tenant_goods": "billiards_ods.tenant_goods_master",
|
||
"billiards_dwd.dim_tenant_goods_ex": "billiards_ods.tenant_goods_master",
|
||
"billiards_dwd.dim_store_goods": "billiards_ods.store_goods_master",
|
||
"billiards_dwd.dim_store_goods_ex": "billiards_ods.store_goods_master",
|
||
"billiards_dwd.dim_goods_category": "billiards_ods.stock_goods_category_tree",
|
||
"billiards_dwd.dim_groupbuy_package": "billiards_ods.group_buy_packages",
|
||
"billiards_dwd.dim_groupbuy_package_ex": "billiards_ods.group_buy_packages",
|
||
# 事实
|
||
"billiards_dwd.dwd_settlement_head": "billiards_ods.settlement_records",
|
||
"billiards_dwd.dwd_settlement_head_ex": "billiards_ods.settlement_records",
|
||
"billiards_dwd.dwd_table_fee_log": "billiards_ods.table_fee_transactions",
|
||
"billiards_dwd.dwd_table_fee_log_ex": "billiards_ods.table_fee_transactions",
|
||
"billiards_dwd.dwd_table_fee_adjust": "billiards_ods.table_fee_discount_records",
|
||
"billiards_dwd.dwd_table_fee_adjust_ex": "billiards_ods.table_fee_discount_records",
|
||
"billiards_dwd.dwd_store_goods_sale": "billiards_ods.store_goods_sales_records",
|
||
"billiards_dwd.dwd_store_goods_sale_ex": "billiards_ods.store_goods_sales_records",
|
||
"billiards_dwd.dwd_assistant_service_log": "billiards_ods.assistant_service_records",
|
||
"billiards_dwd.dwd_assistant_service_log_ex": "billiards_ods.assistant_service_records",
|
||
"billiards_dwd.dwd_assistant_trash_event": "billiards_ods.assistant_cancellation_records",
|
||
"billiards_dwd.dwd_assistant_trash_event_ex": "billiards_ods.assistant_cancellation_records",
|
||
"billiards_dwd.dwd_member_balance_change": "billiards_ods.member_balance_changes",
|
||
"billiards_dwd.dwd_member_balance_change_ex": "billiards_ods.member_balance_changes",
|
||
"billiards_dwd.dwd_groupbuy_redemption": "billiards_ods.group_buy_redemption_records",
|
||
"billiards_dwd.dwd_groupbuy_redemption_ex": "billiards_ods.group_buy_redemption_records",
|
||
"billiards_dwd.dwd_platform_coupon_redemption": "billiards_ods.platform_coupon_redemption_records",
|
||
"billiards_dwd.dwd_platform_coupon_redemption_ex": "billiards_ods.platform_coupon_redemption_records",
|
||
"billiards_dwd.dwd_recharge_order": "billiards_ods.recharge_settlements",
|
||
"billiards_dwd.dwd_recharge_order_ex": "billiards_ods.recharge_settlements",
|
||
"billiards_dwd.dwd_payment": "billiards_ods.payment_transactions",
|
||
"billiards_dwd.dwd_refund": "billiards_ods.refund_transactions",
|
||
"billiards_dwd.dwd_refund_ex": "billiards_ods.refund_transactions",
|
||
}
|
||
|
||
SCD_COLS = {"scd2_start_time", "scd2_end_time", "scd2_is_current", "scd2_version"}
|
||
FACT_ORDER_CANDIDATES = [
|
||
"fetched_at",
|
||
"pay_time",
|
||
"create_time",
|
||
"update_time",
|
||
"occur_time",
|
||
"settle_time",
|
||
"start_use_time",
|
||
]
|
||
|
||
# 特殊列映射:dwd 列名 -> 源列表达式(可选 CAST)
|
||
FACT_MAPPINGS: dict[str, list[tuple[str, str, str | None]]] = {
|
||
# 维度表(补齐主键/字段差异)
|
||
"billiards_dwd.dim_site": [
|
||
("org_id", "siteprofile->>'org_id'", None),
|
||
("shop_name", "siteprofile->>'shop_name'", None),
|
||
("site_label", "siteprofile->>'site_label'", None),
|
||
("full_address", "siteprofile->>'full_address'", None),
|
||
("address", "siteprofile->>'address'", None),
|
||
("longitude", "siteprofile->>'longitude'", "numeric"),
|
||
("latitude", "siteprofile->>'latitude'", "numeric"),
|
||
("tenant_site_region_id", "siteprofile->>'tenant_site_region_id'", None),
|
||
("business_tel", "siteprofile->>'business_tel'", None),
|
||
("site_type", "siteprofile->>'site_type'", None),
|
||
("shop_status", "siteprofile->>'shop_status'", None),
|
||
("tenant_id", "siteprofile->>'tenant_id'", None),
|
||
],
|
||
"billiards_dwd.dim_site_ex": [
|
||
("auto_light", "siteprofile->>'auto_light'", None),
|
||
("attendance_enabled", "siteprofile->>'attendance_enabled'", None),
|
||
("attendance_distance", "siteprofile->>'attendance_distance'", None),
|
||
("prod_env", "siteprofile->>'prod_env'", None),
|
||
("light_status", "siteprofile->>'light_status'", None),
|
||
("light_type", "siteprofile->>'light_type'", None),
|
||
("light_token", "siteprofile->>'light_token'", None),
|
||
("address", "siteprofile->>'address'", None),
|
||
("avatar", "siteprofile->>'avatar'", None),
|
||
("wifi_name", "siteprofile->>'wifi_name'", None),
|
||
("wifi_password", "siteprofile->>'wifi_password'", None),
|
||
("customer_service_qrcode", "siteprofile->>'customer_service_qrcode'", None),
|
||
("customer_service_wechat", "siteprofile->>'customer_service_wechat'", None),
|
||
("fixed_pay_qrcode", "siteprofile->>'fixed_pay_qrCode'", None),
|
||
("longitude", "siteprofile->>'longitude'", "numeric"),
|
||
("latitude", "siteprofile->>'latitude'", "numeric"),
|
||
("tenant_site_region_id", "siteprofile->>'tenant_site_region_id'", None),
|
||
("site_type", "siteprofile->>'site_type'", None),
|
||
("site_label", "siteprofile->>'site_label'", None),
|
||
("shop_status", "siteprofile->>'shop_status'", None),
|
||
("create_time", "siteprofile->>'create_time'", "timestamptz"),
|
||
("update_time", "siteprofile->>'update_time'", "timestamptz"),
|
||
],
|
||
"billiards_dwd.dim_table": [
|
||
("table_id", "id", None),
|
||
("site_table_area_name", "areaname", None),
|
||
("tenant_table_area_id", "site_table_area_id", None),
|
||
],
|
||
"billiards_dwd.dim_table_ex": [
|
||
("table_id", "id", None),
|
||
("table_cloth_use_time", "table_cloth_use_time", None),
|
||
],
|
||
"billiards_dwd.dim_assistant": [("assistant_id", "id", None), ("user_id", "staff_id", None)],
|
||
"billiards_dwd.dim_assistant_ex": [
|
||
("assistant_id", "id", None),
|
||
("introduce", "introduce", None),
|
||
("group_name", "group_name", None),
|
||
("light_equipment_id", "light_equipment_id", None),
|
||
],
|
||
"billiards_dwd.dim_member": [("member_id", "id", None)],
|
||
"billiards_dwd.dim_member_ex": [
|
||
("member_id", "id", None),
|
||
("register_site_name", "site_name", None),
|
||
],
|
||
"billiards_dwd.dim_member_card_account": [("member_card_id", "id", None)],
|
||
"billiards_dwd.dim_member_card_account_ex": [
|
||
("member_card_id", "id", None),
|
||
("tenant_name", "tenantname", None),
|
||
("tenantavatar", "tenantavatar", None),
|
||
("card_no", "card_no", None),
|
||
("bind_password", "bind_password", None),
|
||
("use_scene", "use_scene", None),
|
||
("tableareaid", "tableareaid", None),
|
||
("goodscategoryid", "goodscategoryid", None),
|
||
],
|
||
"billiards_dwd.dim_tenant_goods": [
|
||
("tenant_goods_id", "id", None),
|
||
("category_name", "categoryname", None),
|
||
],
|
||
"billiards_dwd.dim_tenant_goods_ex": [
|
||
("tenant_goods_id", "id", None),
|
||
("remark_name", "remark_name", None),
|
||
("goods_bar_code", "goods_bar_code", None),
|
||
("commodity_code_list", "commodity_code", None),
|
||
("is_in_site", "isinsite", "boolean"),
|
||
],
|
||
"billiards_dwd.dim_store_goods": [
|
||
("site_goods_id", "id", None),
|
||
("category_level1_name", "onecategoryname", None),
|
||
("category_level2_name", "twocategoryname", None),
|
||
("created_at", "create_time", None),
|
||
("updated_at", "update_time", None),
|
||
("avg_monthly_sales", "average_monthly_sales", None),
|
||
("batch_stock_qty", "stock", None),
|
||
("sale_qty", "sale_num", None),
|
||
("total_sales_qty", "total_sales", None),
|
||
],
|
||
"billiards_dwd.dim_store_goods_ex": [
|
||
("site_goods_id", "id", None),
|
||
("goods_barcode", "goods_bar_code", None),
|
||
("stock_qty", "stock", None),
|
||
("stock_secondary_qty", "stock_a", None),
|
||
("safety_stock_qty", "safe_stock", None),
|
||
("site_name", "sitename", None),
|
||
("goods_cover_url", "goods_cover", None),
|
||
("provisional_total_cost", "total_purchase_cost", None),
|
||
("is_discountable", "able_discount", None),
|
||
("freeze_status", "freeze", None),
|
||
("remark", "remark", None),
|
||
("days_on_shelf", "days_available", None),
|
||
("sort_order", "sort", None),
|
||
],
|
||
"billiards_dwd.dim_goods_category": [
|
||
("category_id", "id", None),
|
||
("tenant_id", "tenant_id", None),
|
||
("category_name", "category_name", None),
|
||
("alias_name", "alias_name", None),
|
||
("parent_category_id", "pid", None),
|
||
("business_name", "business_name", None),
|
||
("tenant_goods_business_id", "tenant_goods_business_id", None),
|
||
("sort_order", "sort", None),
|
||
("open_salesman", "open_salesman", None),
|
||
("is_warehousing", "is_warehousing", None),
|
||
("category_level", "CASE WHEN pid = 0 THEN 1 ELSE 2 END", None),
|
||
("is_leaf", "CASE WHEN categoryboxes IS NULL OR jsonb_array_length(categoryboxes)=0 THEN 1 ELSE 0 END", None),
|
||
],
|
||
"billiards_dwd.dim_groupbuy_package": [
|
||
("groupbuy_package_id", "id", None),
|
||
("package_template_id", "package_id", None),
|
||
("coupon_face_value", "coupon_money", None),
|
||
("duration_seconds", "duration", None),
|
||
],
|
||
"billiards_dwd.dim_groupbuy_package_ex": [
|
||
("groupbuy_package_id", "id", None),
|
||
("table_area_id", "table_area_id", None),
|
||
("tenant_table_area_id", "tenant_table_area_id", None),
|
||
("usable_range", "usable_range", None),
|
||
("table_area_id_list", "table_area_id_list", None),
|
||
("package_type", "type", None),
|
||
],
|
||
# 事实表主键及关键差异列
|
||
"billiards_dwd.dwd_table_fee_log": [("table_fee_log_id", "id", None)],
|
||
"billiards_dwd.dwd_table_fee_log_ex": [
|
||
("table_fee_log_id", "id", None),
|
||
("salesman_name", "salesman_name", None),
|
||
],
|
||
"billiards_dwd.dwd_table_fee_adjust": [
|
||
("table_fee_adjust_id", "id", None),
|
||
("table_id", "site_table_id", None),
|
||
("table_area_id", "tenant_table_area_id", None),
|
||
("table_area_name", "tableprofile->>'table_area_name'", None),
|
||
("adjust_time", "create_time", None),
|
||
],
|
||
"billiards_dwd.dwd_table_fee_adjust_ex": [
|
||
("table_fee_adjust_id", "id", None),
|
||
("ledger_name", "ledger_name", None),
|
||
],
|
||
"billiards_dwd.dwd_store_goods_sale": [("store_goods_sale_id", "id", None), ("discount_price", "discount_money", None)],
|
||
"billiards_dwd.dwd_store_goods_sale_ex": [
|
||
("store_goods_sale_id", "id", None),
|
||
("option_value_name", "option_value_name", None),
|
||
("open_salesman_flag", "opensalesman", "integer"),
|
||
("salesman_name", "salesman_name", None),
|
||
("salesman_org_id", "sales_man_org_id", None),
|
||
("legacy_order_goods_id", "ordergoodsid", None),
|
||
("site_name", "sitename", None),
|
||
("legacy_site_id", "siteid", None),
|
||
],
|
||
"billiards_dwd.dwd_assistant_service_log": [
|
||
("assistant_service_id", "id", None),
|
||
("assistant_no", "assistantno", None),
|
||
("site_assistant_id", "order_assistant_id", None),
|
||
("level_name", "levelname", None),
|
||
("skill_name", "skillname", None),
|
||
],
|
||
"billiards_dwd.dwd_assistant_service_log_ex": [
|
||
("assistant_service_id", "id", None),
|
||
("assistant_name", "assistantname", None),
|
||
("ledger_group_name", "ledger_group_name", None),
|
||
("trash_applicant_name", "trash_applicant_name", None),
|
||
("trash_reason", "trash_reason", None),
|
||
("salesman_name", "salesman_name", None),
|
||
("table_name", "tablename", None),
|
||
],
|
||
"billiards_dwd.dwd_assistant_trash_event": [
|
||
("assistant_trash_event_id", "id", None),
|
||
("assistant_no", "assistantname", None),
|
||
("abolish_amount", "assistantabolishamount", None),
|
||
("charge_minutes_raw", "pdchargeminutes", None),
|
||
("site_id", "siteid", None),
|
||
("table_id", "tableid", None),
|
||
("table_area_id", "tableareaid", None),
|
||
("assistant_name", "assistantname", None),
|
||
("trash_reason", "trashreason", None),
|
||
("create_time", "createtime", None),
|
||
],
|
||
"billiards_dwd.dwd_assistant_trash_event_ex": [
|
||
("assistant_trash_event_id", "id", None),
|
||
("table_area_name", "tablearea", None),
|
||
("table_name", "tablename", None),
|
||
],
|
||
"billiards_dwd.dwd_member_balance_change": [
|
||
("balance_change_id", "id", None),
|
||
("balance_before", "before", None),
|
||
("change_amount", "account_data", None),
|
||
("balance_after", "after", None),
|
||
("card_type_name", "membercardtypename", None),
|
||
("change_time", "create_time", None),
|
||
("member_name", "membername", None),
|
||
("member_mobile", "membermobile", None),
|
||
],
|
||
"billiards_dwd.dwd_member_balance_change_ex": [
|
||
("balance_change_id", "id", None),
|
||
("pay_site_name", "paysitename", None),
|
||
("register_site_name", "registersitename", None),
|
||
],
|
||
"billiards_dwd.dwd_groupbuy_redemption": [("redemption_id", "id", None)],
|
||
"billiards_dwd.dwd_groupbuy_redemption_ex": [
|
||
("redemption_id", "id", None),
|
||
("table_area_name", "tableareaname", None),
|
||
("site_name", "sitename", None),
|
||
("table_name", "tablename", None),
|
||
("goods_option_price", "goodsoptionprice", None),
|
||
("salesman_name", "salesman_name", None),
|
||
("salesman_org_id", "sales_man_org_id", None),
|
||
("ledger_group_name", "ledger_group_name", None),
|
||
],
|
||
"billiards_dwd.dwd_platform_coupon_redemption": [("platform_coupon_redemption_id", "id", None)],
|
||
"billiards_dwd.dwd_platform_coupon_redemption_ex": [
|
||
("platform_coupon_redemption_id", "id", None),
|
||
("coupon_cover", "coupon_cover", None),
|
||
],
|
||
"billiards_dwd.dwd_payment": [("payment_id", "id", None), ("pay_date", "pay_time", "date")],
|
||
"billiards_dwd.dwd_refund": [("refund_id", "id", None)],
|
||
"billiards_dwd.dwd_refund_ex": [
|
||
("refund_id", "id", None),
|
||
("tenant_name", "tenantname", None),
|
||
("channel_payer_id", "channel_payer_id", None),
|
||
("channel_pay_no", "channel_pay_no", None),
|
||
],
|
||
# 结算头:settlement_records(源列为小写驼峰/无下划线,需要显式映射)
|
||
"billiards_dwd.dwd_settlement_head": [
|
||
("order_settle_id", "id", None),
|
||
("tenant_id", "tenantid", None),
|
||
("site_id", "siteid", None),
|
||
("site_name", "sitename", None),
|
||
("table_id", "tableid", None),
|
||
("settle_name", "settlename", None),
|
||
("order_trade_no", "settlerelateid", None),
|
||
("create_time", "createtime", None),
|
||
("pay_time", "paytime", None),
|
||
("settle_type", "settletype", None),
|
||
("revoke_order_id", "revokeorderid", None),
|
||
("member_id", "memberid", None),
|
||
("member_name", "membername", None),
|
||
("member_phone", "memberphone", None),
|
||
("member_card_account_id", "tenantmembercardid", None),
|
||
("member_card_type_name", "membercardtypename", None),
|
||
("is_bind_member", "isbindmember", None),
|
||
("member_discount_amount", "memberdiscountamount", None),
|
||
("consume_money", "consumemoney", None),
|
||
("table_charge_money", "tablechargemoney", None),
|
||
("goods_money", "goodsmoney", None),
|
||
("real_goods_money", "realgoodsmoney", None),
|
||
("assistant_pd_money", "assistantpdmoney", None),
|
||
("assistant_cx_money", "assistantcxmoney", None),
|
||
("adjust_amount", "adjustamount", None),
|
||
("pay_amount", "payamount", None),
|
||
("balance_amount", "balanceamount", None),
|
||
("recharge_card_amount", "rechargecardamount", None),
|
||
("gift_card_amount", "giftcardamount", None),
|
||
("coupon_amount", "couponamount", None),
|
||
("rounding_amount", "roundingamount", None),
|
||
("point_amount", "pointamount", None),
|
||
],
|
||
"billiards_dwd.dwd_settlement_head_ex": [
|
||
("order_settle_id", "id", None),
|
||
("serial_number", "serialnumber", None),
|
||
("settle_status", "settlestatus", None),
|
||
("can_be_revoked", "canberevoked", "boolean"),
|
||
("revoke_order_name", "revokeordername", None),
|
||
("revoke_time", "revoketime", None),
|
||
("is_first_order", "isfirst", "boolean"),
|
||
("service_money", "servicemoney", None),
|
||
("cash_amount", "cashamount", None),
|
||
("card_amount", "cardamount", None),
|
||
("online_amount", "onlineamount", None),
|
||
("refund_amount", "refundamount", None),
|
||
("prepay_money", "prepaymoney", None),
|
||
("payment_method", "paymentmethod", None),
|
||
("coupon_sale_amount", "couponsaleamount", None),
|
||
("all_coupon_discount", "allcoupondiscount", None),
|
||
("goods_promotion_money", "goodspromotionmoney", None),
|
||
("assistant_promotion_money", "assistantpromotionmoney", None),
|
||
("activity_discount", "activitydiscount", None),
|
||
("assistant_manual_discount", "assistantmanualdiscount", None),
|
||
("point_discount_price", "pointdiscountprice", None),
|
||
("point_discount_cost", "pointdiscountcost", None),
|
||
("is_use_coupon", "isusecoupon", "boolean"),
|
||
("is_use_discount", "isusediscount", "boolean"),
|
||
("is_activity", "isactivity", "boolean"),
|
||
("operator_name", "operatorname", None),
|
||
("salesman_name", "salesmanname", None),
|
||
("order_remark", "orderremark", None),
|
||
("operator_id", "operatorid", None),
|
||
("salesman_user_id", "salesmanuserid", None),
|
||
],
|
||
# 充值结算:recharge_settlements(字段风格同 settlement_records)
|
||
"billiards_dwd.dwd_recharge_order": [
|
||
("recharge_order_id", "id", None),
|
||
("tenant_id", "tenantid", None),
|
||
("site_id", "siteid", None),
|
||
("member_id", "memberid", None),
|
||
("member_name_snapshot", "membername", None),
|
||
("member_phone_snapshot", "memberphone", None),
|
||
("tenant_member_card_id", "tenantmembercardid", None),
|
||
("member_card_type_name", "membercardtypename", None),
|
||
("settle_relate_id", "settlerelateid", None),
|
||
("settle_type", "settletype", None),
|
||
("settle_name", "settlename", None),
|
||
("is_first", "isfirst", None),
|
||
("pay_amount", "payamount", None),
|
||
("refund_amount", "refundamount", None),
|
||
("point_amount", "pointamount", None),
|
||
("cash_amount", "cashamount", None),
|
||
("payment_method", "paymentmethod", None),
|
||
("create_time", "createtime", None),
|
||
("pay_time", "paytime", None),
|
||
],
|
||
"billiards_dwd.dwd_recharge_order_ex": [
|
||
("recharge_order_id", "id", None),
|
||
("site_name_snapshot", "sitename", None),
|
||
("salesman_name", "salesmanname", None),
|
||
("order_remark", "orderremark", None),
|
||
("revoke_order_name", "revokeordername", None),
|
||
("settle_status", "settlestatus", None),
|
||
("is_bind_member", "isbindmember", "boolean"),
|
||
("is_activity", "isactivity", "boolean"),
|
||
("is_use_coupon", "isusecoupon", "boolean"),
|
||
("is_use_discount", "isusediscount", "boolean"),
|
||
("can_be_revoked", "canberevoked", "boolean"),
|
||
("online_amount", "onlineamount", None),
|
||
("balance_amount", "balanceamount", None),
|
||
("card_amount", "cardamount", None),
|
||
("coupon_amount", "couponamount", None),
|
||
("recharge_card_amount", "rechargecardamount", None),
|
||
("gift_card_amount", "giftcardamount", None),
|
||
("prepay_money", "prepaymoney", None),
|
||
("consume_money", "consumemoney", None),
|
||
("goods_money", "goodsmoney", None),
|
||
("real_goods_money", "realgoodsmoney", None),
|
||
("table_charge_money", "tablechargemoney", None),
|
||
("service_money", "servicemoney", None),
|
||
("activity_discount", "activitydiscount", None),
|
||
("all_coupon_discount", "allcoupondiscount", None),
|
||
("goods_promotion_money", "goodspromotionmoney", None),
|
||
("assistant_promotion_money", "assistantpromotionmoney", None),
|
||
("assistant_pd_money", "assistantpdmoney", None),
|
||
("assistant_cx_money", "assistantcxmoney", None),
|
||
("assistant_manual_discount", "assistantmanualdiscount", None),
|
||
("coupon_sale_amount", "couponsaleamount", None),
|
||
("member_discount_amount", "memberdiscountamount", None),
|
||
("point_discount_price", "pointdiscountprice", None),
|
||
("point_discount_cost", "pointdiscountcost", None),
|
||
("adjust_amount", "adjustamount", None),
|
||
("rounding_amount", "roundingamount", None),
|
||
("operator_id", "operatorid", None),
|
||
("operator_name_snapshot", "operatorname", None),
|
||
("salesman_user_id", "salesmanuserid", None),
|
||
("salesman_name", "salesmanname", None),
|
||
("order_remark", "orderremark", None),
|
||
("table_id", "tableid", None),
|
||
("serial_number", "serialnumber", None),
|
||
("revoke_order_id", "revokeorderid", None),
|
||
("revoke_order_name", "revokeordername", None),
|
||
("revoke_time", "revoketime", None),
|
||
],
|
||
}
|
||
|
||
def get_task_code(self) -> str:
|
||
"""返回任务编码。"""
|
||
return "DWD_LOAD_FROM_ODS"
|
||
|
||
def extract(self, context: TaskContext) -> dict[str, Any]:
|
||
"""准备运行所需的上下文信息。"""
|
||
return {"now": datetime.now()}
|
||
|
||
def load(self, extracted: dict[str, Any], context: TaskContext) -> dict[str, Any]:
|
||
"""遍历映射关系,维度执行 SCD2 合并,事实表按时间增量插入。"""
|
||
now = extracted["now"]
|
||
summary: List[Dict[str, Any]] = []
|
||
with self.db.conn.cursor(cursor_factory=RealDictCursor) as cur:
|
||
for dwd_table, ods_table in self.TABLE_MAP.items():
|
||
dwd_cols = self._get_columns(cur, dwd_table)
|
||
ods_cols = self._get_columns(cur, ods_table)
|
||
if not dwd_cols:
|
||
self.logger.warning("跳过 %s,未能获取 DWD 列信息", dwd_table)
|
||
continue
|
||
|
||
if self._table_base(dwd_table).startswith("dim_"):
|
||
processed = self._merge_dim_scd2(cur, dwd_table, ods_table, dwd_cols, ods_cols, now)
|
||
summary.append({"table": dwd_table, "mode": "SCD2", "processed": processed})
|
||
else:
|
||
dwd_types = self._get_column_types(cur, dwd_table, "billiards_dwd")
|
||
ods_types = self._get_column_types(cur, ods_table, "billiards_ods")
|
||
inserted = self._merge_fact_increment(
|
||
cur, dwd_table, ods_table, dwd_cols, ods_cols, dwd_types, ods_types
|
||
)
|
||
summary.append({"table": dwd_table, "mode": "INCREMENT", "inserted": inserted})
|
||
|
||
self.db.conn.commit()
|
||
return {"tables": summary}
|
||
|
||
# ---------------------- helpers ----------------------
|
||
def _get_columns(self, cur, table: str) -> List[str]:
|
||
"""获取指定表的列名(小写)。"""
|
||
schema, name = self._split_table_name(table, default_schema="billiards_dwd")
|
||
cur.execute(
|
||
"""
|
||
SELECT column_name
|
||
FROM information_schema.columns
|
||
WHERE table_schema = %s AND table_name = %s
|
||
""",
|
||
(schema, name),
|
||
)
|
||
return [r["column_name"].lower() for r in cur.fetchall()]
|
||
|
||
def _get_primary_keys(self, cur, table: str) -> List[str]:
|
||
"""获取表的主键列名列表。"""
|
||
schema, name = self._split_table_name(table, default_schema="billiards_dwd")
|
||
cur.execute(
|
||
"""
|
||
SELECT kcu.column_name
|
||
FROM information_schema.table_constraints tc
|
||
JOIN information_schema.key_column_usage kcu
|
||
ON tc.constraint_name = kcu.constraint_name
|
||
AND tc.table_schema = kcu.table_schema
|
||
AND tc.table_name = kcu.table_name
|
||
WHERE tc.table_schema = %s
|
||
AND tc.table_name = %s
|
||
AND tc.constraint_type = 'PRIMARY KEY'
|
||
ORDER BY kcu.ordinal_position
|
||
""",
|
||
(schema, name),
|
||
)
|
||
return [r["column_name"].lower() for r in cur.fetchall()]
|
||
|
||
def _get_column_types(self, cur, table: str, default_schema: str) -> Dict[str, str]:
|
||
"""获取列的数据类型(information_schema.data_type)。"""
|
||
schema, name = self._split_table_name(table, default_schema=default_schema)
|
||
cur.execute(
|
||
"""
|
||
SELECT column_name, data_type
|
||
FROM information_schema.columns
|
||
WHERE table_schema = %s AND table_name = %s
|
||
""",
|
||
(schema, name),
|
||
)
|
||
return {r["column_name"].lower(): r["data_type"].lower() for r in cur.fetchall()}
|
||
|
||
def _build_column_mapping(
|
||
self, dwd_table: str, pk_cols: Sequence[str], ods_cols: Sequence[str]
|
||
) -> Dict[str, tuple[str, str | None]]:
|
||
"""合并显式 FACT_MAPPINGS 与主键兜底映射。"""
|
||
mapping_entries = self.FACT_MAPPINGS.get(dwd_table, [])
|
||
mapping: Dict[str, tuple[str, str | None]] = {
|
||
dst.lower(): (src, cast_type) for dst, src, cast_type in mapping_entries
|
||
}
|
||
ods_set = {c.lower() for c in ods_cols}
|
||
for pk in pk_cols:
|
||
pk_lower = pk.lower()
|
||
if pk_lower not in mapping and pk_lower not in ods_set and "id" in ods_set:
|
||
mapping[pk_lower] = ("id", None)
|
||
return mapping
|
||
|
||
def _fetch_source_rows(
|
||
self, cur, table: str, columns: Sequence[str], where_sql: str = "", params: Sequence[Any] = None
|
||
) -> List[Dict[str, Any]]:
|
||
"""从源表读取指定列,返回小写键的字典列表。"""
|
||
schema, name = self._split_table_name(table, default_schema="billiards_ods")
|
||
cols_sql = ", ".join(f'"{c}"' for c in columns)
|
||
sql = f'SELECT {cols_sql} FROM "{schema}"."{name}" {where_sql}'
|
||
cur.execute(sql, params or [])
|
||
rows = []
|
||
for r in cur.fetchall():
|
||
rows.append({k.lower(): v for k, v in r.items()})
|
||
return rows
|
||
|
||
def _expand_goods_category_rows(self, rows: list[Dict[str, Any]]) -> list[Dict[str, Any]]:
|
||
"""将分类表中的 categoryboxes 元素展开为子类记录。"""
|
||
expanded: list[Dict[str, Any]] = []
|
||
for r in rows:
|
||
expanded.append(r)
|
||
boxes = r.get("categoryboxes")
|
||
if isinstance(boxes, list):
|
||
for child in boxes:
|
||
if not isinstance(child, dict):
|
||
continue
|
||
child_row: Dict[str, Any] = {}
|
||
# 继承父级的租户与业务大类信息
|
||
child_row["tenant_id"] = r.get("tenant_id")
|
||
child_row["business_name"] = child.get("business_name", r.get("business_name"))
|
||
child_row["tenant_goods_business_id"] = child.get(
|
||
"tenant_goods_business_id", r.get("tenant_goods_business_id")
|
||
)
|
||
# 合并子类字段
|
||
child_row.update(child)
|
||
# 默认父子关系
|
||
child_row.setdefault("pid", r.get("id"))
|
||
# 衍生层级/叶子标记
|
||
child_boxes = child_row.get("categoryboxes")
|
||
if not isinstance(child_boxes, list):
|
||
is_leaf = 1
|
||
else:
|
||
is_leaf = 1 if len(child_boxes) == 0 else 0
|
||
child_row.setdefault("category_level", 2)
|
||
child_row.setdefault("is_leaf", is_leaf)
|
||
expanded.append(child_row)
|
||
return expanded
|
||
|
||
def _merge_dim_scd2(
|
||
self,
|
||
cur,
|
||
dwd_table: str,
|
||
ods_table: str,
|
||
dwd_cols: Sequence[str],
|
||
ods_cols: Sequence[str],
|
||
now: datetime,
|
||
) -> int:
|
||
"""对维表执行 SCD2 合并:对比变更关闭旧版并插入新版。"""
|
||
pk_cols = self._get_primary_keys(cur, dwd_table)
|
||
if not pk_cols:
|
||
raise ValueError(f"{dwd_table} 未配置主键,无法执行 SCD2 合并")
|
||
|
||
mapping = self._build_column_mapping(dwd_table, pk_cols, ods_cols)
|
||
ods_set = {c.lower() for c in ods_cols}
|
||
table_sql = self._format_table(ods_table, "billiards_ods")
|
||
# 构造 SELECT 表达式,支持 JSON/expression 映射
|
||
select_exprs: list[str] = []
|
||
added: set[str] = set()
|
||
for col in dwd_cols:
|
||
lc = col.lower()
|
||
if lc in self.SCD_COLS:
|
||
continue
|
||
if lc in mapping:
|
||
src, cast_type = mapping[lc]
|
||
select_exprs.append(f"{self._cast_expr(src, cast_type)} AS \"{lc}\"")
|
||
added.add(lc)
|
||
elif lc in ods_set:
|
||
select_exprs.append(f'"{lc}" AS "{lc}"')
|
||
added.add(lc)
|
||
# 分类维度需要额外读取 categoryboxes 以展开子类
|
||
if dwd_table == "billiards_dwd.dim_goods_category" and "categoryboxes" not in added and "categoryboxes" in ods_set:
|
||
select_exprs.append('"categoryboxes" AS "categoryboxes"')
|
||
added.add("categoryboxes")
|
||
# 主键兜底确保被选出
|
||
for pk in pk_cols:
|
||
lc = pk.lower()
|
||
if lc not in added:
|
||
if lc in mapping:
|
||
src, cast_type = mapping[lc]
|
||
select_exprs.append(f"{self._cast_expr(src, cast_type)} AS \"{lc}\"")
|
||
elif lc in ods_set:
|
||
select_exprs.append(f'"{lc}" AS "{lc}"')
|
||
added.add(lc)
|
||
|
||
if not select_exprs:
|
||
return 0
|
||
|
||
sql = f"SELECT {', '.join(select_exprs)} FROM {table_sql}"
|
||
cur.execute(sql)
|
||
rows = [{k.lower(): v for k, v in r.items()} for r in cur.fetchall()]
|
||
|
||
# 特殊:分类维度展开子类
|
||
if dwd_table == "billiards_dwd.dim_goods_category":
|
||
rows = self._expand_goods_category_rows(rows)
|
||
|
||
inserted_or_updated = 0
|
||
seen_pk = set()
|
||
for row in rows:
|
||
mapped_row: Dict[str, Any] = {}
|
||
for col in dwd_cols:
|
||
lc = col.lower()
|
||
if lc in self.SCD_COLS:
|
||
continue
|
||
value = row.get(lc)
|
||
if value is None and lc in mapping:
|
||
src, _ = mapping[lc]
|
||
value = row.get(src.lower())
|
||
mapped_row[lc] = value
|
||
|
||
pk_key = tuple(mapped_row.get(pk) for pk in pk_cols)
|
||
if pk_key in seen_pk:
|
||
continue
|
||
seen_pk.add(pk_key)
|
||
if self._upsert_scd2_row(cur, dwd_table, dwd_cols, pk_cols, mapped_row, now):
|
||
inserted_or_updated += 1
|
||
return len(rows)
|
||
|
||
def _upsert_scd2_row(
|
||
self,
|
||
cur,
|
||
dwd_table: str,
|
||
dwd_cols: Sequence[str],
|
||
pk_cols: Sequence[str],
|
||
src_row: Dict[str, Any],
|
||
now: datetime,
|
||
) -> bool:
|
||
"""SCD2 合并:若有变更则关闭旧版并插入新版本。"""
|
||
pk_values = [src_row.get(pk) for pk in pk_cols]
|
||
if any(v is None for v in pk_values):
|
||
self.logger.warning("跳过 %s:主键缺失 %s", dwd_table, dict(zip(pk_cols, pk_values)))
|
||
return False
|
||
|
||
where_clause = " AND ".join(f'"{pk}" = %s' for pk in pk_cols)
|
||
table_sql = self._format_table(dwd_table, "billiards_dwd")
|
||
cur.execute(
|
||
f"SELECT * FROM {table_sql} WHERE {where_clause} AND COALESCE(scd2_is_current,1)=1 LIMIT 1",
|
||
pk_values,
|
||
)
|
||
current = cur.fetchone()
|
||
if current:
|
||
current = {k.lower(): v for k, v in current.items()}
|
||
|
||
if current and not self._is_row_changed(current, src_row, dwd_cols):
|
||
return False
|
||
|
||
if current:
|
||
version = (current.get("scd2_version") or 1) + 1
|
||
self._close_current_dim(cur, dwd_table, pk_cols, pk_values, now)
|
||
else:
|
||
version = 1
|
||
|
||
self._insert_dim_row(cur, dwd_table, dwd_cols, src_row, now, version)
|
||
return True
|
||
|
||
def _close_current_dim(self, cur, table: str, pk_cols: Sequence[str], pk_values: Sequence[Any], now: datetime) -> None:
|
||
"""关闭当前版本,标记 scd2_is_current=0 并填充结束时间。"""
|
||
set_sql = "scd2_end_time = %s, scd2_is_current = 0"
|
||
where_clause = " AND ".join(f'"{pk}" = %s' for pk in pk_cols)
|
||
table_sql = self._format_table(table, "billiards_dwd")
|
||
cur.execute(f"UPDATE {table_sql} SET {set_sql} WHERE {where_clause} AND COALESCE(scd2_is_current,1)=1", [now, *pk_values])
|
||
|
||
def _insert_dim_row(
|
||
self,
|
||
cur,
|
||
table: str,
|
||
dwd_cols: Sequence[str],
|
||
src_row: Dict[str, Any],
|
||
now: datetime,
|
||
version: int,
|
||
) -> None:
|
||
"""插入新的 SCD2 版本行。"""
|
||
insert_cols: List[str] = []
|
||
placeholders: List[str] = []
|
||
values: List[Any] = []
|
||
for col in sorted(dwd_cols):
|
||
lc = col.lower()
|
||
insert_cols.append(f'"{lc}"')
|
||
placeholders.append("%s")
|
||
if lc == "scd2_start_time":
|
||
values.append(now)
|
||
elif lc == "scd2_end_time":
|
||
values.append(datetime(9999, 12, 31, 0, 0, 0))
|
||
elif lc == "scd2_is_current":
|
||
values.append(1)
|
||
elif lc == "scd2_version":
|
||
values.append(version)
|
||
else:
|
||
values.append(src_row.get(lc))
|
||
table_sql = self._format_table(table, "billiards_dwd")
|
||
sql = f'INSERT INTO {table_sql} ({", ".join(insert_cols)}) VALUES ({", ".join(placeholders)})'
|
||
cur.execute(sql, values)
|
||
|
||
def _is_row_changed(self, current: Dict[str, Any], incoming: Dict[str, Any], dwd_cols: Sequence[str]) -> bool:
|
||
"""比较非 SCD2 列,判断是否存在变更。"""
|
||
for col in dwd_cols:
|
||
lc = col.lower()
|
||
if lc in self.SCD_COLS:
|
||
continue
|
||
if current.get(lc) != incoming.get(lc):
|
||
return True
|
||
return False
|
||
|
||
def _merge_fact_increment(
|
||
self,
|
||
cur,
|
||
dwd_table: str,
|
||
ods_table: str,
|
||
dwd_cols: Sequence[str],
|
||
ods_cols: Sequence[str],
|
||
dwd_types: Dict[str, str],
|
||
ods_types: Dict[str, str],
|
||
) -> int:
|
||
"""事实表按时间增量插入,默认按列名交集写入。"""
|
||
mapping_entries = self.FACT_MAPPINGS.get(dwd_table) or []
|
||
mapping: Dict[str, tuple[str, str | None]] = {
|
||
dst.lower(): (src, cast_type) for dst, src, cast_type in mapping_entries
|
||
}
|
||
|
||
mapping_dest = [dst for dst, _, _ in mapping_entries]
|
||
insert_cols: List[str] = list(mapping_dest)
|
||
for col in dwd_cols:
|
||
if col in self.SCD_COLS:
|
||
continue
|
||
if col in insert_cols:
|
||
continue
|
||
if col in ods_cols:
|
||
insert_cols.append(col)
|
||
|
||
pk_cols = self._get_primary_keys(cur, dwd_table)
|
||
ods_set = {c.lower() for c in ods_cols}
|
||
existing_lower = [c.lower() for c in insert_cols]
|
||
for pk in pk_cols:
|
||
pk_lower = pk.lower()
|
||
if pk_lower in existing_lower:
|
||
continue
|
||
if pk_lower in ods_set:
|
||
insert_cols.append(pk)
|
||
existing_lower.append(pk_lower)
|
||
elif "id" in ods_set:
|
||
insert_cols.append(pk)
|
||
existing_lower.append(pk_lower)
|
||
mapping[pk_lower] = ("id", None)
|
||
|
||
# 保持列顺序同时去重
|
||
seen_cols: set[str] = set()
|
||
ordered_cols: list[str] = []
|
||
for col in insert_cols:
|
||
lc = col.lower()
|
||
if lc not in seen_cols:
|
||
seen_cols.add(lc)
|
||
ordered_cols.append(col)
|
||
insert_cols = ordered_cols
|
||
|
||
if not insert_cols:
|
||
self.logger.warning("跳过 %s:未找到可插入的列", dwd_table)
|
||
return 0
|
||
|
||
order_col = self._pick_order_column(dwd_cols, ods_cols)
|
||
where_sql = ""
|
||
params: List[Any] = []
|
||
dwd_table_sql = self._format_table(dwd_table, "billiards_dwd")
|
||
ods_table_sql = self._format_table(ods_table, "billiards_ods")
|
||
if order_col:
|
||
cur.execute(f'SELECT COALESCE(MAX("{order_col}"), %s) FROM {dwd_table_sql}', ("1970-01-01",))
|
||
row = cur.fetchone() or {}
|
||
watermark = list(row.values())[0] if row else "1970-01-01"
|
||
where_sql = f'WHERE "{order_col}" > %s'
|
||
params.append(watermark)
|
||
|
||
default_cols = [c for c in insert_cols if c.lower() not in mapping]
|
||
default_expr_map: Dict[str, str] = {}
|
||
if default_cols:
|
||
default_exprs = self._build_fact_select_exprs(default_cols, dwd_types, ods_types)
|
||
default_expr_map = dict(zip(default_cols, default_exprs))
|
||
|
||
select_exprs: List[str] = []
|
||
for col in insert_cols:
|
||
key = col.lower()
|
||
if key in mapping:
|
||
src, cast_type = mapping[key]
|
||
select_exprs.append(self._cast_expr(src, cast_type))
|
||
else:
|
||
select_exprs.append(default_expr_map[col])
|
||
|
||
select_cols_sql = ", ".join(select_exprs)
|
||
insert_cols_sql = ", ".join(f'"{c}"' for c in insert_cols)
|
||
sql = f'INSERT INTO {dwd_table_sql} ({insert_cols_sql}) SELECT {select_cols_sql} FROM {ods_table_sql} {where_sql}'
|
||
|
||
pk_cols = self._get_primary_keys(cur, dwd_table)
|
||
if pk_cols:
|
||
pk_sql = ", ".join(f'"{c}"' for c in pk_cols)
|
||
sql += f" ON CONFLICT ({pk_sql}) DO NOTHING"
|
||
|
||
cur.execute(sql, params)
|
||
return cur.rowcount
|
||
|
||
def _pick_order_column(self, dwd_cols: Iterable[str], ods_cols: Iterable[str]) -> str | None:
|
||
"""选择用于增量的时间列(需同时存在于 DWD 与 ODS)。"""
|
||
lower_cols = {c.lower() for c in dwd_cols} & {c.lower() for c in ods_cols}
|
||
for candidate in self.FACT_ORDER_CANDIDATES:
|
||
if candidate.lower() in lower_cols:
|
||
return candidate.lower()
|
||
return None
|
||
|
||
def _build_fact_select_exprs(
|
||
self,
|
||
insert_cols: Sequence[str],
|
||
dwd_types: Dict[str, str],
|
||
ods_types: Dict[str, str],
|
||
) -> List[str]:
|
||
"""构造事实表 SELECT 列表,需要时做类型转换。"""
|
||
numeric_types = {"integer", "bigint", "smallint", "numeric", "double precision", "real", "decimal"}
|
||
text_types = {"text", "character varying", "varchar"}
|
||
exprs = []
|
||
for col in insert_cols:
|
||
d_type = dwd_types.get(col)
|
||
o_type = ods_types.get(col)
|
||
if d_type in numeric_types and o_type in text_types:
|
||
exprs.append(f"CAST(NULLIF(CAST(\"{col}\" AS text), '') AS numeric):: {d_type}")
|
||
else:
|
||
exprs.append(f'"{col}"')
|
||
return exprs
|
||
|
||
def _split_table_name(self, name: str, default_schema: str) -> tuple[str, str]:
|
||
"""拆分 schema.table,若无 schema 则补默认 schema。"""
|
||
parts = name.split(".")
|
||
if len(parts) == 2:
|
||
return parts[0], parts[1].lower()
|
||
return default_schema, name.lower()
|
||
|
||
def _table_base(self, name: str) -> str:
|
||
"""获取不含 schema 的表名。"""
|
||
return name.split(".")[-1]
|
||
|
||
def _format_table(self, name: str, default_schema: str) -> str:
|
||
"""返回带引号的 schema.table 名称。"""
|
||
schema, table = self._split_table_name(name, default_schema)
|
||
return f'"{schema}"."{table}"'
|
||
|
||
def _cast_expr(self, col: str, cast_type: str | None) -> str:
|
||
"""构造带可选 CAST 的列表达式。"""
|
||
if col.upper() == "NULL":
|
||
base = "NULL"
|
||
else:
|
||
is_expr = not col.isidentifier() or "->" in col or "#>>" in col or "::" in col or "'" in col
|
||
base = col if is_expr else f'"{col}"'
|
||
if cast_type:
|
||
cast_lower = cast_type.lower()
|
||
if cast_lower in {"bigint", "integer", "numeric", "decimal"}:
|
||
return f"CAST(NULLIF(CAST({base} AS text), '') AS numeric):: {cast_type}"
|
||
if cast_lower == "timestamptz":
|
||
return f"({base})::timestamptz"
|
||
return f"{base}::{cast_type}"
|
||
return base
|