DWD完成
This commit is contained in:
907
etl_billiards/tasks/dwd_load_task.py
Normal file
907
etl_billiards/tasks/dwd_load_task.py
Normal file
@@ -0,0 +1,907 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""DWD 装载任务:从 ODS 增量写入 DWD(维度 SCD2,事实按时间增量)。"""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, Iterable, List, Sequence
|
||||
|
||||
from psycopg2.extras import RealDictCursor
|
||||
|
||||
from .base_task import BaseTask, TaskContext
|
||||
|
||||
|
||||
class DwdLoadTask(BaseTask):
|
||||
"""负责 DWD 装载:维度表做 SCD2 合并,事实表按时间增量写入。"""
|
||||
|
||||
# DWD -> ODS 表映射(ODS 表名已与示例 JSON 前缀统一)
|
||||
TABLE_MAP: dict[str, str] = {
|
||||
# 维度
|
||||
# 门店:改用台费流水中的 siteprofile 快照,补齐 org/地址等字段
|
||||
"billiards_dwd.dim_site": "billiards_ods.table_fee_transactions",
|
||||
"billiards_dwd.dim_site_ex": "billiards_ods.table_fee_transactions",
|
||||
"billiards_dwd.dim_table": "billiards_ods.site_tables_master",
|
||||
"billiards_dwd.dim_table_ex": "billiards_ods.site_tables_master",
|
||||
"billiards_dwd.dim_assistant": "billiards_ods.assistant_accounts_master",
|
||||
"billiards_dwd.dim_assistant_ex": "billiards_ods.assistant_accounts_master",
|
||||
"billiards_dwd.dim_member": "billiards_ods.member_profiles",
|
||||
"billiards_dwd.dim_member_ex": "billiards_ods.member_profiles",
|
||||
"billiards_dwd.dim_member_card_account": "billiards_ods.member_stored_value_cards",
|
||||
"billiards_dwd.dim_member_card_account_ex": "billiards_ods.member_stored_value_cards",
|
||||
"billiards_dwd.dim_tenant_goods": "billiards_ods.tenant_goods_master",
|
||||
"billiards_dwd.dim_tenant_goods_ex": "billiards_ods.tenant_goods_master",
|
||||
"billiards_dwd.dim_store_goods": "billiards_ods.store_goods_master",
|
||||
"billiards_dwd.dim_store_goods_ex": "billiards_ods.store_goods_master",
|
||||
"billiards_dwd.dim_goods_category": "billiards_ods.stock_goods_category_tree",
|
||||
"billiards_dwd.dim_groupbuy_package": "billiards_ods.group_buy_packages",
|
||||
"billiards_dwd.dim_groupbuy_package_ex": "billiards_ods.group_buy_packages",
|
||||
# 事实
|
||||
"billiards_dwd.dwd_settlement_head": "billiards_ods.settlement_records",
|
||||
"billiards_dwd.dwd_settlement_head_ex": "billiards_ods.settlement_records",
|
||||
"billiards_dwd.dwd_table_fee_log": "billiards_ods.table_fee_transactions",
|
||||
"billiards_dwd.dwd_table_fee_log_ex": "billiards_ods.table_fee_transactions",
|
||||
"billiards_dwd.dwd_table_fee_adjust": "billiards_ods.table_fee_discount_records",
|
||||
"billiards_dwd.dwd_table_fee_adjust_ex": "billiards_ods.table_fee_discount_records",
|
||||
"billiards_dwd.dwd_store_goods_sale": "billiards_ods.store_goods_sales_records",
|
||||
"billiards_dwd.dwd_store_goods_sale_ex": "billiards_ods.store_goods_sales_records",
|
||||
"billiards_dwd.dwd_assistant_service_log": "billiards_ods.assistant_service_records",
|
||||
"billiards_dwd.dwd_assistant_service_log_ex": "billiards_ods.assistant_service_records",
|
||||
"billiards_dwd.dwd_assistant_trash_event": "billiards_ods.assistant_cancellation_records",
|
||||
"billiards_dwd.dwd_assistant_trash_event_ex": "billiards_ods.assistant_cancellation_records",
|
||||
"billiards_dwd.dwd_member_balance_change": "billiards_ods.member_balance_changes",
|
||||
"billiards_dwd.dwd_member_balance_change_ex": "billiards_ods.member_balance_changes",
|
||||
"billiards_dwd.dwd_groupbuy_redemption": "billiards_ods.group_buy_redemption_records",
|
||||
"billiards_dwd.dwd_groupbuy_redemption_ex": "billiards_ods.group_buy_redemption_records",
|
||||
"billiards_dwd.dwd_platform_coupon_redemption": "billiards_ods.platform_coupon_redemption_records",
|
||||
"billiards_dwd.dwd_platform_coupon_redemption_ex": "billiards_ods.platform_coupon_redemption_records",
|
||||
"billiards_dwd.dwd_recharge_order": "billiards_ods.recharge_settlements",
|
||||
"billiards_dwd.dwd_recharge_order_ex": "billiards_ods.recharge_settlements",
|
||||
"billiards_dwd.dwd_payment": "billiards_ods.payment_transactions",
|
||||
"billiards_dwd.dwd_refund": "billiards_ods.refund_transactions",
|
||||
"billiards_dwd.dwd_refund_ex": "billiards_ods.refund_transactions",
|
||||
}
|
||||
|
||||
SCD_COLS = {"scd2_start_time", "scd2_end_time", "scd2_is_current", "scd2_version"}
|
||||
FACT_ORDER_CANDIDATES = [
|
||||
"fetched_at",
|
||||
"pay_time",
|
||||
"create_time",
|
||||
"update_time",
|
||||
"occur_time",
|
||||
"settle_time",
|
||||
"start_use_time",
|
||||
]
|
||||
|
||||
# 特殊列映射:dwd 列名 -> 源列表达式(可选 CAST)
|
||||
FACT_MAPPINGS: dict[str, list[tuple[str, str, str | None]]] = {
|
||||
# 维度表(补齐主键/字段差异)
|
||||
"billiards_dwd.dim_site": [
|
||||
("org_id", "siteprofile->>'org_id'", None),
|
||||
("shop_name", "siteprofile->>'shop_name'", None),
|
||||
("site_label", "siteprofile->>'site_label'", None),
|
||||
("full_address", "siteprofile->>'full_address'", None),
|
||||
("address", "siteprofile->>'address'", None),
|
||||
("longitude", "siteprofile->>'longitude'", "numeric"),
|
||||
("latitude", "siteprofile->>'latitude'", "numeric"),
|
||||
("tenant_site_region_id", "siteprofile->>'tenant_site_region_id'", None),
|
||||
("business_tel", "siteprofile->>'business_tel'", None),
|
||||
("site_type", "siteprofile->>'site_type'", None),
|
||||
("shop_status", "siteprofile->>'shop_status'", None),
|
||||
("tenant_id", "siteprofile->>'tenant_id'", None),
|
||||
],
|
||||
"billiards_dwd.dim_site_ex": [
|
||||
("auto_light", "siteprofile->>'auto_light'", None),
|
||||
("attendance_enabled", "siteprofile->>'attendance_enabled'", None),
|
||||
("attendance_distance", "siteprofile->>'attendance_distance'", None),
|
||||
("prod_env", "siteprofile->>'prod_env'", None),
|
||||
("light_status", "siteprofile->>'light_status'", None),
|
||||
("light_type", "siteprofile->>'light_type'", None),
|
||||
("light_token", "siteprofile->>'light_token'", None),
|
||||
("address", "siteprofile->>'address'", None),
|
||||
("avatar", "siteprofile->>'avatar'", None),
|
||||
("wifi_name", "siteprofile->>'wifi_name'", None),
|
||||
("wifi_password", "siteprofile->>'wifi_password'", None),
|
||||
("customer_service_qrcode", "siteprofile->>'customer_service_qrcode'", None),
|
||||
("customer_service_wechat", "siteprofile->>'customer_service_wechat'", None),
|
||||
("fixed_pay_qrcode", "siteprofile->>'fixed_pay_qrCode'", None),
|
||||
("longitude", "siteprofile->>'longitude'", "numeric"),
|
||||
("latitude", "siteprofile->>'latitude'", "numeric"),
|
||||
("tenant_site_region_id", "siteprofile->>'tenant_site_region_id'", None),
|
||||
("site_type", "siteprofile->>'site_type'", None),
|
||||
("site_label", "siteprofile->>'site_label'", None),
|
||||
("shop_status", "siteprofile->>'shop_status'", None),
|
||||
("create_time", "siteprofile->>'create_time'", "timestamptz"),
|
||||
("update_time", "siteprofile->>'update_time'", "timestamptz"),
|
||||
],
|
||||
"billiards_dwd.dim_table": [
|
||||
("table_id", "id", None),
|
||||
("site_table_area_name", "areaname", None),
|
||||
("tenant_table_area_id", "site_table_area_id", None),
|
||||
],
|
||||
"billiards_dwd.dim_table_ex": [
|
||||
("table_id", "id", None),
|
||||
("table_cloth_use_time", "table_cloth_use_time", None),
|
||||
],
|
||||
"billiards_dwd.dim_assistant": [("assistant_id", "id", None), ("user_id", "staff_id", None)],
|
||||
"billiards_dwd.dim_assistant_ex": [
|
||||
("assistant_id", "id", None),
|
||||
("introduce", "introduce", None),
|
||||
("group_name", "group_name", None),
|
||||
("light_equipment_id", "light_equipment_id", None),
|
||||
],
|
||||
"billiards_dwd.dim_member": [("member_id", "id", None)],
|
||||
"billiards_dwd.dim_member_ex": [
|
||||
("member_id", "id", None),
|
||||
("register_site_name", "site_name", None),
|
||||
],
|
||||
"billiards_dwd.dim_member_card_account": [("member_card_id", "id", None)],
|
||||
"billiards_dwd.dim_member_card_account_ex": [
|
||||
("member_card_id", "id", None),
|
||||
("tenant_name", "tenantname", None),
|
||||
("tenantavatar", "tenantavatar", None),
|
||||
("card_no", "card_no", None),
|
||||
("bind_password", "bind_password", None),
|
||||
("use_scene", "use_scene", None),
|
||||
("tableareaid", "tableareaid", None),
|
||||
("goodscategoryid", "goodscategoryid", None),
|
||||
],
|
||||
"billiards_dwd.dim_tenant_goods": [
|
||||
("tenant_goods_id", "id", None),
|
||||
("category_name", "categoryname", None),
|
||||
],
|
||||
"billiards_dwd.dim_tenant_goods_ex": [
|
||||
("tenant_goods_id", "id", None),
|
||||
("remark_name", "remark_name", None),
|
||||
("goods_bar_code", "goods_bar_code", None),
|
||||
("commodity_code_list", "commodity_code", None),
|
||||
("is_in_site", "isinsite", "boolean"),
|
||||
],
|
||||
"billiards_dwd.dim_store_goods": [
|
||||
("site_goods_id", "id", None),
|
||||
("category_level1_name", "onecategoryname", None),
|
||||
("category_level2_name", "twocategoryname", None),
|
||||
("created_at", "create_time", None),
|
||||
("updated_at", "update_time", None),
|
||||
("avg_monthly_sales", "average_monthly_sales", None),
|
||||
("batch_stock_qty", "stock", None),
|
||||
("sale_qty", "sale_num", None),
|
||||
("total_sales_qty", "total_sales", None),
|
||||
],
|
||||
"billiards_dwd.dim_store_goods_ex": [
|
||||
("site_goods_id", "id", None),
|
||||
("goods_barcode", "goods_bar_code", None),
|
||||
("stock_qty", "stock", None),
|
||||
("stock_secondary_qty", "stock_a", None),
|
||||
("safety_stock_qty", "safe_stock", None),
|
||||
("site_name", "sitename", None),
|
||||
("goods_cover_url", "goods_cover", None),
|
||||
("provisional_total_cost", "total_purchase_cost", None),
|
||||
("is_discountable", "able_discount", None),
|
||||
("freeze_status", "freeze", None),
|
||||
("remark", "remark", None),
|
||||
("days_on_shelf", "days_available", None),
|
||||
("sort_order", "sort", None),
|
||||
],
|
||||
"billiards_dwd.dim_goods_category": [
|
||||
("category_id", "id", None),
|
||||
("tenant_id", "tenant_id", None),
|
||||
("category_name", "category_name", None),
|
||||
("alias_name", "alias_name", None),
|
||||
("parent_category_id", "pid", None),
|
||||
("business_name", "business_name", None),
|
||||
("tenant_goods_business_id", "tenant_goods_business_id", None),
|
||||
("sort_order", "sort", None),
|
||||
("open_salesman", "open_salesman", None),
|
||||
("is_warehousing", "is_warehousing", None),
|
||||
("category_level", "CASE WHEN pid = 0 THEN 1 ELSE 2 END", None),
|
||||
("is_leaf", "CASE WHEN categoryboxes IS NULL OR jsonb_array_length(categoryboxes)=0 THEN 1 ELSE 0 END", None),
|
||||
],
|
||||
"billiards_dwd.dim_groupbuy_package": [
|
||||
("groupbuy_package_id", "id", None),
|
||||
("package_template_id", "package_id", None),
|
||||
("coupon_face_value", "coupon_money", None),
|
||||
("duration_seconds", "duration", None),
|
||||
],
|
||||
"billiards_dwd.dim_groupbuy_package_ex": [
|
||||
("groupbuy_package_id", "id", None),
|
||||
("table_area_id", "table_area_id", None),
|
||||
("tenant_table_area_id", "tenant_table_area_id", None),
|
||||
("usable_range", "usable_range", None),
|
||||
("table_area_id_list", "table_area_id_list", None),
|
||||
("package_type", "type", None),
|
||||
],
|
||||
# 事实表主键及关键差异列
|
||||
"billiards_dwd.dwd_table_fee_log": [("table_fee_log_id", "id", None)],
|
||||
"billiards_dwd.dwd_table_fee_log_ex": [
|
||||
("table_fee_log_id", "id", None),
|
||||
("salesman_name", "salesman_name", None),
|
||||
],
|
||||
"billiards_dwd.dwd_table_fee_adjust": [
|
||||
("table_fee_adjust_id", "id", None),
|
||||
("table_id", "site_table_id", None),
|
||||
("table_area_id", "tenant_table_area_id", None),
|
||||
("table_area_name", "tableprofile->>'table_area_name'", None),
|
||||
("adjust_time", "create_time", None),
|
||||
],
|
||||
"billiards_dwd.dwd_table_fee_adjust_ex": [
|
||||
("table_fee_adjust_id", "id", None),
|
||||
("ledger_name", "ledger_name", None),
|
||||
],
|
||||
"billiards_dwd.dwd_store_goods_sale": [("store_goods_sale_id", "id", None), ("discount_price", "discount_money", None)],
|
||||
"billiards_dwd.dwd_store_goods_sale_ex": [
|
||||
("store_goods_sale_id", "id", None),
|
||||
("option_value_name", "option_value_name", None),
|
||||
("open_salesman_flag", "opensalesman", "integer"),
|
||||
("salesman_name", "salesman_name", None),
|
||||
("salesman_org_id", "sales_man_org_id", None),
|
||||
("legacy_order_goods_id", "ordergoodsid", None),
|
||||
("site_name", "sitename", None),
|
||||
("legacy_site_id", "siteid", None),
|
||||
],
|
||||
"billiards_dwd.dwd_assistant_service_log": [
|
||||
("assistant_service_id", "id", None),
|
||||
("assistant_no", "assistantno", None),
|
||||
("site_assistant_id", "order_assistant_id", None),
|
||||
("level_name", "levelname", None),
|
||||
("skill_name", "skillname", None),
|
||||
],
|
||||
"billiards_dwd.dwd_assistant_service_log_ex": [
|
||||
("assistant_service_id", "id", None),
|
||||
("assistant_name", "assistantname", None),
|
||||
("ledger_group_name", "ledger_group_name", None),
|
||||
("trash_applicant_name", "trash_applicant_name", None),
|
||||
("trash_reason", "trash_reason", None),
|
||||
("salesman_name", "salesman_name", None),
|
||||
("table_name", "tablename", None),
|
||||
],
|
||||
"billiards_dwd.dwd_assistant_trash_event": [
|
||||
("assistant_trash_event_id", "id", None),
|
||||
("assistant_no", "assistantname", None),
|
||||
("abolish_amount", "assistantabolishamount", None),
|
||||
("charge_minutes_raw", "pdchargeminutes", None),
|
||||
("site_id", "siteid", None),
|
||||
("table_id", "tableid", None),
|
||||
("table_area_id", "tableareaid", None),
|
||||
("assistant_name", "assistantname", None),
|
||||
("trash_reason", "trashreason", None),
|
||||
("create_time", "createtime", None),
|
||||
],
|
||||
"billiards_dwd.dwd_assistant_trash_event_ex": [
|
||||
("assistant_trash_event_id", "id", None),
|
||||
("table_area_name", "tablearea", None),
|
||||
("table_name", "tablename", None),
|
||||
],
|
||||
"billiards_dwd.dwd_member_balance_change": [
|
||||
("balance_change_id", "id", None),
|
||||
("balance_before", "before", None),
|
||||
("change_amount", "account_data", None),
|
||||
("balance_after", "after", None),
|
||||
("card_type_name", "membercardtypename", None),
|
||||
("change_time", "create_time", None),
|
||||
("member_name", "membername", None),
|
||||
("member_mobile", "membermobile", None),
|
||||
],
|
||||
"billiards_dwd.dwd_member_balance_change_ex": [
|
||||
("balance_change_id", "id", None),
|
||||
("pay_site_name", "paysitename", None),
|
||||
("register_site_name", "registersitename", None),
|
||||
],
|
||||
"billiards_dwd.dwd_groupbuy_redemption": [("redemption_id", "id", None)],
|
||||
"billiards_dwd.dwd_groupbuy_redemption_ex": [
|
||||
("redemption_id", "id", None),
|
||||
("table_area_name", "tableareaname", None),
|
||||
("site_name", "sitename", None),
|
||||
("table_name", "tablename", None),
|
||||
("goods_option_price", "goodsoptionprice", None),
|
||||
("salesman_name", "salesman_name", None),
|
||||
("salesman_org_id", "sales_man_org_id", None),
|
||||
("ledger_group_name", "ledger_group_name", None),
|
||||
],
|
||||
"billiards_dwd.dwd_platform_coupon_redemption": [("platform_coupon_redemption_id", "id", None)],
|
||||
"billiards_dwd.dwd_platform_coupon_redemption_ex": [
|
||||
("platform_coupon_redemption_id", "id", None),
|
||||
("coupon_cover", "coupon_cover", None),
|
||||
],
|
||||
"billiards_dwd.dwd_payment": [("payment_id", "id", None), ("pay_date", "pay_time", "date")],
|
||||
"billiards_dwd.dwd_refund": [("refund_id", "id", None)],
|
||||
"billiards_dwd.dwd_refund_ex": [
|
||||
("refund_id", "id", None),
|
||||
("tenant_name", "tenantname", None),
|
||||
("channel_payer_id", "channel_payer_id", None),
|
||||
("channel_pay_no", "channel_pay_no", None),
|
||||
],
|
||||
# 结算头:settlement_records(源列为小写驼峰/无下划线,需要显式映射)
|
||||
"billiards_dwd.dwd_settlement_head": [
|
||||
("order_settle_id", "id", None),
|
||||
("tenant_id", "tenantid", None),
|
||||
("site_id", "siteid", None),
|
||||
("site_name", "sitename", None),
|
||||
("table_id", "tableid", None),
|
||||
("settle_name", "settlename", None),
|
||||
("order_trade_no", "settlerelateid", None),
|
||||
("create_time", "createtime", None),
|
||||
("pay_time", "paytime", None),
|
||||
("settle_type", "settletype", None),
|
||||
("revoke_order_id", "revokeorderid", None),
|
||||
("member_id", "memberid", None),
|
||||
("member_name", "membername", None),
|
||||
("member_phone", "memberphone", None),
|
||||
("member_card_account_id", "tenantmembercardid", None),
|
||||
("member_card_type_name", "membercardtypename", None),
|
||||
("is_bind_member", "isbindmember", None),
|
||||
("member_discount_amount", "memberdiscountamount", None),
|
||||
("consume_money", "consumemoney", None),
|
||||
("table_charge_money", "tablechargemoney", None),
|
||||
("goods_money", "goodsmoney", None),
|
||||
("real_goods_money", "realgoodsmoney", None),
|
||||
("assistant_pd_money", "assistantpdmoney", None),
|
||||
("assistant_cx_money", "assistantcxmoney", None),
|
||||
("adjust_amount", "adjustamount", None),
|
||||
("pay_amount", "payamount", None),
|
||||
("balance_amount", "balanceamount", None),
|
||||
("recharge_card_amount", "rechargecardamount", None),
|
||||
("gift_card_amount", "giftcardamount", None),
|
||||
("coupon_amount", "couponamount", None),
|
||||
("rounding_amount", "roundingamount", None),
|
||||
("point_amount", "pointamount", None),
|
||||
],
|
||||
"billiards_dwd.dwd_settlement_head_ex": [
|
||||
("order_settle_id", "id", None),
|
||||
("serial_number", "serialnumber", None),
|
||||
("settle_status", "settlestatus", None),
|
||||
("can_be_revoked", "canberevoked", "boolean"),
|
||||
("revoke_order_name", "revokeordername", None),
|
||||
("revoke_time", "revoketime", None),
|
||||
("is_first_order", "isfirst", "boolean"),
|
||||
("service_money", "servicemoney", None),
|
||||
("cash_amount", "cashamount", None),
|
||||
("card_amount", "cardamount", None),
|
||||
("online_amount", "onlineamount", None),
|
||||
("refund_amount", "refundamount", None),
|
||||
("prepay_money", "prepaymoney", None),
|
||||
("payment_method", "paymentmethod", None),
|
||||
("coupon_sale_amount", "couponsaleamount", None),
|
||||
("all_coupon_discount", "allcoupondiscount", None),
|
||||
("goods_promotion_money", "goodspromotionmoney", None),
|
||||
("assistant_promotion_money", "assistantpromotionmoney", None),
|
||||
("activity_discount", "activitydiscount", None),
|
||||
("assistant_manual_discount", "assistantmanualdiscount", None),
|
||||
("point_discount_price", "pointdiscountprice", None),
|
||||
("point_discount_cost", "pointdiscountcost", None),
|
||||
("is_use_coupon", "isusecoupon", "boolean"),
|
||||
("is_use_discount", "isusediscount", "boolean"),
|
||||
("is_activity", "isactivity", "boolean"),
|
||||
("operator_name", "operatorname", None),
|
||||
("salesman_name", "salesmanname", None),
|
||||
("order_remark", "orderremark", None),
|
||||
("operator_id", "operatorid", None),
|
||||
("salesman_user_id", "salesmanuserid", None),
|
||||
],
|
||||
# 充值结算:recharge_settlements(字段风格同 settlement_records)
|
||||
"billiards_dwd.dwd_recharge_order": [
|
||||
("recharge_order_id", "id", None),
|
||||
("tenant_id", "tenantid", None),
|
||||
("site_id", "siteid", None),
|
||||
("member_id", "memberid", None),
|
||||
("member_name_snapshot", "membername", None),
|
||||
("member_phone_snapshot", "memberphone", None),
|
||||
("tenant_member_card_id", "tenantmembercardid", None),
|
||||
("member_card_type_name", "membercardtypename", None),
|
||||
("settle_relate_id", "settlerelateid", None),
|
||||
("settle_type", "settletype", None),
|
||||
("settle_name", "settlename", None),
|
||||
("is_first", "isfirst", None),
|
||||
("pay_amount", "payamount", None),
|
||||
("refund_amount", "refundamount", None),
|
||||
("point_amount", "pointamount", None),
|
||||
("cash_amount", "cashamount", None),
|
||||
("payment_method", "paymentmethod", None),
|
||||
("create_time", "createtime", None),
|
||||
("pay_time", "paytime", None),
|
||||
],
|
||||
"billiards_dwd.dwd_recharge_order_ex": [
|
||||
("recharge_order_id", "id", None),
|
||||
("site_name_snapshot", "sitename", None),
|
||||
("salesman_name", "salesmanname", None),
|
||||
("order_remark", "orderremark", None),
|
||||
("revoke_order_name", "revokeordername", None),
|
||||
("settle_status", "settlestatus", None),
|
||||
("is_bind_member", "isbindmember", "boolean"),
|
||||
("is_activity", "isactivity", "boolean"),
|
||||
("is_use_coupon", "isusecoupon", "boolean"),
|
||||
("is_use_discount", "isusediscount", "boolean"),
|
||||
("can_be_revoked", "canberevoked", "boolean"),
|
||||
("online_amount", "onlineamount", None),
|
||||
("balance_amount", "balanceamount", None),
|
||||
("card_amount", "cardamount", None),
|
||||
("coupon_amount", "couponamount", None),
|
||||
("recharge_card_amount", "rechargecardamount", None),
|
||||
("gift_card_amount", "giftcardamount", None),
|
||||
("prepay_money", "prepaymoney", None),
|
||||
("consume_money", "consumemoney", None),
|
||||
("goods_money", "goodsmoney", None),
|
||||
("real_goods_money", "realgoodsmoney", None),
|
||||
("table_charge_money", "tablechargemoney", None),
|
||||
("service_money", "servicemoney", None),
|
||||
("activity_discount", "activitydiscount", None),
|
||||
("all_coupon_discount", "allcoupondiscount", None),
|
||||
("goods_promotion_money", "goodspromotionmoney", None),
|
||||
("assistant_promotion_money", "assistantpromotionmoney", None),
|
||||
("assistant_pd_money", "assistantpdmoney", None),
|
||||
("assistant_cx_money", "assistantcxmoney", None),
|
||||
("assistant_manual_discount", "assistantmanualdiscount", None),
|
||||
("coupon_sale_amount", "couponsaleamount", None),
|
||||
("member_discount_amount", "memberdiscountamount", None),
|
||||
("point_discount_price", "pointdiscountprice", None),
|
||||
("point_discount_cost", "pointdiscountcost", None),
|
||||
("adjust_amount", "adjustamount", None),
|
||||
("rounding_amount", "roundingamount", None),
|
||||
("operator_id", "operatorid", None),
|
||||
("operator_name_snapshot", "operatorname", None),
|
||||
("salesman_user_id", "salesmanuserid", None),
|
||||
("salesman_name", "salesmanname", None),
|
||||
("order_remark", "orderremark", None),
|
||||
("table_id", "tableid", None),
|
||||
("serial_number", "serialnumber", None),
|
||||
("revoke_order_id", "revokeorderid", None),
|
||||
("revoke_order_name", "revokeordername", None),
|
||||
("revoke_time", "revoketime", None),
|
||||
],
|
||||
}
|
||||
|
||||
def get_task_code(self) -> str:
|
||||
"""返回任务编码。"""
|
||||
return "DWD_LOAD_FROM_ODS"
|
||||
|
||||
def extract(self, context: TaskContext) -> dict[str, Any]:
|
||||
"""准备运行所需的上下文信息。"""
|
||||
return {"now": datetime.now()}
|
||||
|
||||
def load(self, extracted: dict[str, Any], context: TaskContext) -> dict[str, Any]:
|
||||
"""遍历映射关系,维度执行 SCD2 合并,事实表按时间增量插入。"""
|
||||
now = extracted["now"]
|
||||
summary: List[Dict[str, Any]] = []
|
||||
with self.db.conn.cursor(cursor_factory=RealDictCursor) as cur:
|
||||
for dwd_table, ods_table in self.TABLE_MAP.items():
|
||||
dwd_cols = self._get_columns(cur, dwd_table)
|
||||
ods_cols = self._get_columns(cur, ods_table)
|
||||
if not dwd_cols:
|
||||
self.logger.warning("跳过 %s,未能获取 DWD 列信息", dwd_table)
|
||||
continue
|
||||
|
||||
if self._table_base(dwd_table).startswith("dim_"):
|
||||
processed = self._merge_dim_scd2(cur, dwd_table, ods_table, dwd_cols, ods_cols, now)
|
||||
summary.append({"table": dwd_table, "mode": "SCD2", "processed": processed})
|
||||
else:
|
||||
dwd_types = self._get_column_types(cur, dwd_table, "billiards_dwd")
|
||||
ods_types = self._get_column_types(cur, ods_table, "billiards_ods")
|
||||
inserted = self._merge_fact_increment(
|
||||
cur, dwd_table, ods_table, dwd_cols, ods_cols, dwd_types, ods_types
|
||||
)
|
||||
summary.append({"table": dwd_table, "mode": "INCREMENT", "inserted": inserted})
|
||||
|
||||
self.db.conn.commit()
|
||||
return {"tables": summary}
|
||||
|
||||
# ---------------------- helpers ----------------------
|
||||
def _get_columns(self, cur, table: str) -> List[str]:
|
||||
"""获取指定表的列名(小写)。"""
|
||||
schema, name = self._split_table_name(table, default_schema="billiards_dwd")
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT column_name
|
||||
FROM information_schema.columns
|
||||
WHERE table_schema = %s AND table_name = %s
|
||||
""",
|
||||
(schema, name),
|
||||
)
|
||||
return [r["column_name"].lower() for r in cur.fetchall()]
|
||||
|
||||
def _get_primary_keys(self, cur, table: str) -> List[str]:
|
||||
"""获取表的主键列名列表。"""
|
||||
schema, name = self._split_table_name(table, default_schema="billiards_dwd")
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT kcu.column_name
|
||||
FROM information_schema.table_constraints tc
|
||||
JOIN information_schema.key_column_usage kcu
|
||||
ON tc.constraint_name = kcu.constraint_name
|
||||
AND tc.table_schema = kcu.table_schema
|
||||
AND tc.table_name = kcu.table_name
|
||||
WHERE tc.table_schema = %s
|
||||
AND tc.table_name = %s
|
||||
AND tc.constraint_type = 'PRIMARY KEY'
|
||||
ORDER BY kcu.ordinal_position
|
||||
""",
|
||||
(schema, name),
|
||||
)
|
||||
return [r["column_name"].lower() for r in cur.fetchall()]
|
||||
|
||||
def _get_column_types(self, cur, table: str, default_schema: str) -> Dict[str, str]:
|
||||
"""获取列的数据类型(information_schema.data_type)。"""
|
||||
schema, name = self._split_table_name(table, default_schema=default_schema)
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT column_name, data_type
|
||||
FROM information_schema.columns
|
||||
WHERE table_schema = %s AND table_name = %s
|
||||
""",
|
||||
(schema, name),
|
||||
)
|
||||
return {r["column_name"].lower(): r["data_type"].lower() for r in cur.fetchall()}
|
||||
|
||||
def _build_column_mapping(
|
||||
self, dwd_table: str, pk_cols: Sequence[str], ods_cols: Sequence[str]
|
||||
) -> Dict[str, tuple[str, str | None]]:
|
||||
"""合并显式 FACT_MAPPINGS 与主键兜底映射。"""
|
||||
mapping_entries = self.FACT_MAPPINGS.get(dwd_table, [])
|
||||
mapping: Dict[str, tuple[str, str | None]] = {
|
||||
dst.lower(): (src, cast_type) for dst, src, cast_type in mapping_entries
|
||||
}
|
||||
ods_set = {c.lower() for c in ods_cols}
|
||||
for pk in pk_cols:
|
||||
pk_lower = pk.lower()
|
||||
if pk_lower not in mapping and pk_lower not in ods_set and "id" in ods_set:
|
||||
mapping[pk_lower] = ("id", None)
|
||||
return mapping
|
||||
|
||||
def _fetch_source_rows(
|
||||
self, cur, table: str, columns: Sequence[str], where_sql: str = "", params: Sequence[Any] = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""从源表读取指定列,返回小写键的字典列表。"""
|
||||
schema, name = self._split_table_name(table, default_schema="billiards_ods")
|
||||
cols_sql = ", ".join(f'"{c}"' for c in columns)
|
||||
sql = f'SELECT {cols_sql} FROM "{schema}"."{name}" {where_sql}'
|
||||
cur.execute(sql, params or [])
|
||||
rows = []
|
||||
for r in cur.fetchall():
|
||||
rows.append({k.lower(): v for k, v in r.items()})
|
||||
return rows
|
||||
|
||||
def _expand_goods_category_rows(self, rows: list[Dict[str, Any]]) -> list[Dict[str, Any]]:
|
||||
"""将分类表中的 categoryboxes 元素展开为子类记录。"""
|
||||
expanded: list[Dict[str, Any]] = []
|
||||
for r in rows:
|
||||
expanded.append(r)
|
||||
boxes = r.get("categoryboxes")
|
||||
if isinstance(boxes, list):
|
||||
for child in boxes:
|
||||
if not isinstance(child, dict):
|
||||
continue
|
||||
child_row: Dict[str, Any] = {}
|
||||
# 继承父级的租户与业务大类信息
|
||||
child_row["tenant_id"] = r.get("tenant_id")
|
||||
child_row["business_name"] = child.get("business_name", r.get("business_name"))
|
||||
child_row["tenant_goods_business_id"] = child.get(
|
||||
"tenant_goods_business_id", r.get("tenant_goods_business_id")
|
||||
)
|
||||
# 合并子类字段
|
||||
child_row.update(child)
|
||||
# 默认父子关系
|
||||
child_row.setdefault("pid", r.get("id"))
|
||||
# 衍生层级/叶子标记
|
||||
child_boxes = child_row.get("categoryboxes")
|
||||
if not isinstance(child_boxes, list):
|
||||
is_leaf = 1
|
||||
else:
|
||||
is_leaf = 1 if len(child_boxes) == 0 else 0
|
||||
child_row.setdefault("category_level", 2)
|
||||
child_row.setdefault("is_leaf", is_leaf)
|
||||
expanded.append(child_row)
|
||||
return expanded
|
||||
|
||||
def _merge_dim_scd2(
|
||||
self,
|
||||
cur,
|
||||
dwd_table: str,
|
||||
ods_table: str,
|
||||
dwd_cols: Sequence[str],
|
||||
ods_cols: Sequence[str],
|
||||
now: datetime,
|
||||
) -> int:
|
||||
"""对维表执行 SCD2 合并:对比变更关闭旧版并插入新版。"""
|
||||
pk_cols = self._get_primary_keys(cur, dwd_table)
|
||||
if not pk_cols:
|
||||
raise ValueError(f"{dwd_table} 未配置主键,无法执行 SCD2 合并")
|
||||
|
||||
mapping = self._build_column_mapping(dwd_table, pk_cols, ods_cols)
|
||||
ods_set = {c.lower() for c in ods_cols}
|
||||
table_sql = self._format_table(ods_table, "billiards_ods")
|
||||
# 构造 SELECT 表达式,支持 JSON/expression 映射
|
||||
select_exprs: list[str] = []
|
||||
added: set[str] = set()
|
||||
for col in dwd_cols:
|
||||
lc = col.lower()
|
||||
if lc in self.SCD_COLS:
|
||||
continue
|
||||
if lc in mapping:
|
||||
src, cast_type = mapping[lc]
|
||||
select_exprs.append(f"{self._cast_expr(src, cast_type)} AS \"{lc}\"")
|
||||
added.add(lc)
|
||||
elif lc in ods_set:
|
||||
select_exprs.append(f'"{lc}" AS "{lc}"')
|
||||
added.add(lc)
|
||||
# 分类维度需要额外读取 categoryboxes 以展开子类
|
||||
if dwd_table == "billiards_dwd.dim_goods_category" and "categoryboxes" not in added and "categoryboxes" in ods_set:
|
||||
select_exprs.append('"categoryboxes" AS "categoryboxes"')
|
||||
added.add("categoryboxes")
|
||||
# 主键兜底确保被选出
|
||||
for pk in pk_cols:
|
||||
lc = pk.lower()
|
||||
if lc not in added:
|
||||
if lc in mapping:
|
||||
src, cast_type = mapping[lc]
|
||||
select_exprs.append(f"{self._cast_expr(src, cast_type)} AS \"{lc}\"")
|
||||
elif lc in ods_set:
|
||||
select_exprs.append(f'"{lc}" AS "{lc}"')
|
||||
added.add(lc)
|
||||
|
||||
if not select_exprs:
|
||||
return 0
|
||||
|
||||
sql = f"SELECT {', '.join(select_exprs)} FROM {table_sql}"
|
||||
cur.execute(sql)
|
||||
rows = [{k.lower(): v for k, v in r.items()} for r in cur.fetchall()]
|
||||
|
||||
# 特殊:分类维度展开子类
|
||||
if dwd_table == "billiards_dwd.dim_goods_category":
|
||||
rows = self._expand_goods_category_rows(rows)
|
||||
|
||||
inserted_or_updated = 0
|
||||
seen_pk = set()
|
||||
for row in rows:
|
||||
mapped_row: Dict[str, Any] = {}
|
||||
for col in dwd_cols:
|
||||
lc = col.lower()
|
||||
if lc in self.SCD_COLS:
|
||||
continue
|
||||
value = row.get(lc)
|
||||
if value is None and lc in mapping:
|
||||
src, _ = mapping[lc]
|
||||
value = row.get(src.lower())
|
||||
mapped_row[lc] = value
|
||||
|
||||
pk_key = tuple(mapped_row.get(pk) for pk in pk_cols)
|
||||
if pk_key in seen_pk:
|
||||
continue
|
||||
seen_pk.add(pk_key)
|
||||
if self._upsert_scd2_row(cur, dwd_table, dwd_cols, pk_cols, mapped_row, now):
|
||||
inserted_or_updated += 1
|
||||
return len(rows)
|
||||
|
||||
def _upsert_scd2_row(
|
||||
self,
|
||||
cur,
|
||||
dwd_table: str,
|
||||
dwd_cols: Sequence[str],
|
||||
pk_cols: Sequence[str],
|
||||
src_row: Dict[str, Any],
|
||||
now: datetime,
|
||||
) -> bool:
|
||||
"""SCD2 合并:若有变更则关闭旧版并插入新版本。"""
|
||||
pk_values = [src_row.get(pk) for pk in pk_cols]
|
||||
if any(v is None for v in pk_values):
|
||||
self.logger.warning("跳过 %s:主键缺失 %s", dwd_table, dict(zip(pk_cols, pk_values)))
|
||||
return False
|
||||
|
||||
where_clause = " AND ".join(f'"{pk}" = %s' for pk in pk_cols)
|
||||
table_sql = self._format_table(dwd_table, "billiards_dwd")
|
||||
cur.execute(
|
||||
f"SELECT * FROM {table_sql} WHERE {where_clause} AND COALESCE(scd2_is_current,1)=1 LIMIT 1",
|
||||
pk_values,
|
||||
)
|
||||
current = cur.fetchone()
|
||||
if current:
|
||||
current = {k.lower(): v for k, v in current.items()}
|
||||
|
||||
if current and not self._is_row_changed(current, src_row, dwd_cols):
|
||||
return False
|
||||
|
||||
if current:
|
||||
version = (current.get("scd2_version") or 1) + 1
|
||||
self._close_current_dim(cur, dwd_table, pk_cols, pk_values, now)
|
||||
else:
|
||||
version = 1
|
||||
|
||||
self._insert_dim_row(cur, dwd_table, dwd_cols, src_row, now, version)
|
||||
return True
|
||||
|
||||
def _close_current_dim(self, cur, table: str, pk_cols: Sequence[str], pk_values: Sequence[Any], now: datetime) -> None:
|
||||
"""关闭当前版本,标记 scd2_is_current=0 并填充结束时间。"""
|
||||
set_sql = "scd2_end_time = %s, scd2_is_current = 0"
|
||||
where_clause = " AND ".join(f'"{pk}" = %s' for pk in pk_cols)
|
||||
table_sql = self._format_table(table, "billiards_dwd")
|
||||
cur.execute(f"UPDATE {table_sql} SET {set_sql} WHERE {where_clause} AND COALESCE(scd2_is_current,1)=1", [now, *pk_values])
|
||||
|
||||
def _insert_dim_row(
|
||||
self,
|
||||
cur,
|
||||
table: str,
|
||||
dwd_cols: Sequence[str],
|
||||
src_row: Dict[str, Any],
|
||||
now: datetime,
|
||||
version: int,
|
||||
) -> None:
|
||||
"""插入新的 SCD2 版本行。"""
|
||||
insert_cols: List[str] = []
|
||||
placeholders: List[str] = []
|
||||
values: List[Any] = []
|
||||
for col in sorted(dwd_cols):
|
||||
lc = col.lower()
|
||||
insert_cols.append(f'"{lc}"')
|
||||
placeholders.append("%s")
|
||||
if lc == "scd2_start_time":
|
||||
values.append(now)
|
||||
elif lc == "scd2_end_time":
|
||||
values.append(datetime(9999, 12, 31, 0, 0, 0))
|
||||
elif lc == "scd2_is_current":
|
||||
values.append(1)
|
||||
elif lc == "scd2_version":
|
||||
values.append(version)
|
||||
else:
|
||||
values.append(src_row.get(lc))
|
||||
table_sql = self._format_table(table, "billiards_dwd")
|
||||
sql = f'INSERT INTO {table_sql} ({", ".join(insert_cols)}) VALUES ({", ".join(placeholders)})'
|
||||
cur.execute(sql, values)
|
||||
|
||||
def _is_row_changed(self, current: Dict[str, Any], incoming: Dict[str, Any], dwd_cols: Sequence[str]) -> bool:
|
||||
"""比较非 SCD2 列,判断是否存在变更。"""
|
||||
for col in dwd_cols:
|
||||
lc = col.lower()
|
||||
if lc in self.SCD_COLS:
|
||||
continue
|
||||
if current.get(lc) != incoming.get(lc):
|
||||
return True
|
||||
return False
|
||||
|
||||
def _merge_fact_increment(
|
||||
self,
|
||||
cur,
|
||||
dwd_table: str,
|
||||
ods_table: str,
|
||||
dwd_cols: Sequence[str],
|
||||
ods_cols: Sequence[str],
|
||||
dwd_types: Dict[str, str],
|
||||
ods_types: Dict[str, str],
|
||||
) -> int:
|
||||
"""事实表按时间增量插入,默认按列名交集写入。"""
|
||||
mapping_entries = self.FACT_MAPPINGS.get(dwd_table) or []
|
||||
mapping: Dict[str, tuple[str, str | None]] = {
|
||||
dst.lower(): (src, cast_type) for dst, src, cast_type in mapping_entries
|
||||
}
|
||||
|
||||
mapping_dest = [dst for dst, _, _ in mapping_entries]
|
||||
insert_cols: List[str] = list(mapping_dest)
|
||||
for col in dwd_cols:
|
||||
if col in self.SCD_COLS:
|
||||
continue
|
||||
if col in insert_cols:
|
||||
continue
|
||||
if col in ods_cols:
|
||||
insert_cols.append(col)
|
||||
|
||||
pk_cols = self._get_primary_keys(cur, dwd_table)
|
||||
ods_set = {c.lower() for c in ods_cols}
|
||||
existing_lower = [c.lower() for c in insert_cols]
|
||||
for pk in pk_cols:
|
||||
pk_lower = pk.lower()
|
||||
if pk_lower in existing_lower:
|
||||
continue
|
||||
if pk_lower in ods_set:
|
||||
insert_cols.append(pk)
|
||||
existing_lower.append(pk_lower)
|
||||
elif "id" in ods_set:
|
||||
insert_cols.append(pk)
|
||||
existing_lower.append(pk_lower)
|
||||
mapping[pk_lower] = ("id", None)
|
||||
|
||||
# 保持列顺序同时去重
|
||||
seen_cols: set[str] = set()
|
||||
ordered_cols: list[str] = []
|
||||
for col in insert_cols:
|
||||
lc = col.lower()
|
||||
if lc not in seen_cols:
|
||||
seen_cols.add(lc)
|
||||
ordered_cols.append(col)
|
||||
insert_cols = ordered_cols
|
||||
|
||||
if not insert_cols:
|
||||
self.logger.warning("跳过 %s:未找到可插入的列", dwd_table)
|
||||
return 0
|
||||
|
||||
order_col = self._pick_order_column(dwd_cols, ods_cols)
|
||||
where_sql = ""
|
||||
params: List[Any] = []
|
||||
dwd_table_sql = self._format_table(dwd_table, "billiards_dwd")
|
||||
ods_table_sql = self._format_table(ods_table, "billiards_ods")
|
||||
if order_col:
|
||||
cur.execute(f'SELECT COALESCE(MAX("{order_col}"), %s) FROM {dwd_table_sql}', ("1970-01-01",))
|
||||
row = cur.fetchone() or {}
|
||||
watermark = list(row.values())[0] if row else "1970-01-01"
|
||||
where_sql = f'WHERE "{order_col}" > %s'
|
||||
params.append(watermark)
|
||||
|
||||
default_cols = [c for c in insert_cols if c.lower() not in mapping]
|
||||
default_expr_map: Dict[str, str] = {}
|
||||
if default_cols:
|
||||
default_exprs = self._build_fact_select_exprs(default_cols, dwd_types, ods_types)
|
||||
default_expr_map = dict(zip(default_cols, default_exprs))
|
||||
|
||||
select_exprs: List[str] = []
|
||||
for col in insert_cols:
|
||||
key = col.lower()
|
||||
if key in mapping:
|
||||
src, cast_type = mapping[key]
|
||||
select_exprs.append(self._cast_expr(src, cast_type))
|
||||
else:
|
||||
select_exprs.append(default_expr_map[col])
|
||||
|
||||
select_cols_sql = ", ".join(select_exprs)
|
||||
insert_cols_sql = ", ".join(f'"{c}"' for c in insert_cols)
|
||||
sql = f'INSERT INTO {dwd_table_sql} ({insert_cols_sql}) SELECT {select_cols_sql} FROM {ods_table_sql} {where_sql}'
|
||||
|
||||
pk_cols = self._get_primary_keys(cur, dwd_table)
|
||||
if pk_cols:
|
||||
pk_sql = ", ".join(f'"{c}"' for c in pk_cols)
|
||||
sql += f" ON CONFLICT ({pk_sql}) DO NOTHING"
|
||||
|
||||
cur.execute(sql, params)
|
||||
return cur.rowcount
|
||||
|
||||
def _pick_order_column(self, dwd_cols: Iterable[str], ods_cols: Iterable[str]) -> str | None:
|
||||
"""选择用于增量的时间列(需同时存在于 DWD 与 ODS)。"""
|
||||
lower_cols = {c.lower() for c in dwd_cols} & {c.lower() for c in ods_cols}
|
||||
for candidate in self.FACT_ORDER_CANDIDATES:
|
||||
if candidate.lower() in lower_cols:
|
||||
return candidate.lower()
|
||||
return None
|
||||
|
||||
def _build_fact_select_exprs(
|
||||
self,
|
||||
insert_cols: Sequence[str],
|
||||
dwd_types: Dict[str, str],
|
||||
ods_types: Dict[str, str],
|
||||
) -> List[str]:
|
||||
"""构造事实表 SELECT 列表,需要时做类型转换。"""
|
||||
numeric_types = {"integer", "bigint", "smallint", "numeric", "double precision", "real", "decimal"}
|
||||
text_types = {"text", "character varying", "varchar"}
|
||||
exprs = []
|
||||
for col in insert_cols:
|
||||
d_type = dwd_types.get(col)
|
||||
o_type = ods_types.get(col)
|
||||
if d_type in numeric_types and o_type in text_types:
|
||||
exprs.append(f"CAST(NULLIF(CAST(\"{col}\" AS text), '') AS numeric):: {d_type}")
|
||||
else:
|
||||
exprs.append(f'"{col}"')
|
||||
return exprs
|
||||
|
||||
def _split_table_name(self, name: str, default_schema: str) -> tuple[str, str]:
|
||||
"""拆分 schema.table,若无 schema 则补默认 schema。"""
|
||||
parts = name.split(".")
|
||||
if len(parts) == 2:
|
||||
return parts[0], parts[1].lower()
|
||||
return default_schema, name.lower()
|
||||
|
||||
def _table_base(self, name: str) -> str:
|
||||
"""获取不含 schema 的表名。"""
|
||||
return name.split(".")[-1]
|
||||
|
||||
def _format_table(self, name: str, default_schema: str) -> str:
|
||||
"""返回带引号的 schema.table 名称。"""
|
||||
schema, table = self._split_table_name(name, default_schema)
|
||||
return f'"{schema}"."{table}"'
|
||||
|
||||
def _cast_expr(self, col: str, cast_type: str | None) -> str:
|
||||
"""构造带可选 CAST 的列表达式。"""
|
||||
if col.upper() == "NULL":
|
||||
base = "NULL"
|
||||
else:
|
||||
is_expr = not col.isidentifier() or "->" in col or "#>>" in col or "::" in col or "'" in col
|
||||
base = col if is_expr else f'"{col}"'
|
||||
if cast_type:
|
||||
cast_lower = cast_type.lower()
|
||||
if cast_lower in {"bigint", "integer", "numeric", "decimal"}:
|
||||
return f"CAST(NULLIF(CAST({base} AS text), '') AS numeric):: {cast_type}"
|
||||
if cast_lower == "timestamptz":
|
||||
return f"({base})::timestamptz"
|
||||
return f"{base}::{cast_type}"
|
||||
return base
|
||||
105
etl_billiards/tasks/dwd_quality_task.py
Normal file
105
etl_billiards/tasks/dwd_quality_task.py
Normal file
@@ -0,0 +1,105 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""DWD 质量核对任务:按 dwd_quality_check.md 输出行数/金额对照报表。"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Iterable, List, Sequence, Tuple
|
||||
|
||||
from psycopg2.extras import RealDictCursor
|
||||
|
||||
from .base_task import BaseTask, TaskContext
|
||||
from .dwd_load_task import DwdLoadTask
|
||||
|
||||
|
||||
class DwdQualityTask(BaseTask):
|
||||
"""对 ODS 与 DWD 进行行数、金额对照核查,生成 JSON 报表。"""
|
||||
|
||||
REPORT_PATH = Path("etl_billiards/reports/dwd_quality_report.json")
|
||||
AMOUNT_KEYWORDS = ("amount", "money", "fee", "balance")
|
||||
|
||||
def get_task_code(self) -> str:
|
||||
"""返回任务编码。"""
|
||||
return "DWD_QUALITY_CHECK"
|
||||
|
||||
def extract(self, context: TaskContext) -> dict[str, Any]:
|
||||
"""准备运行时上下文。"""
|
||||
return {"now": datetime.now()}
|
||||
|
||||
def load(self, extracted: dict[str, Any], context: TaskContext) -> dict[str, Any]:
|
||||
"""输出行数/金额差异报表到本地文件。"""
|
||||
report: Dict[str, Any] = {
|
||||
"generated_at": extracted["now"].isoformat(),
|
||||
"tables": [],
|
||||
"note": "行数/金额核对,金额字段基于列名包含 amount/money/fee/balance 的数值列自动扫描。",
|
||||
}
|
||||
|
||||
with self.db.conn.cursor(cursor_factory=RealDictCursor) as cur:
|
||||
for dwd_table, ods_table in DwdLoadTask.TABLE_MAP.items():
|
||||
count_info = self._compare_counts(cur, dwd_table, ods_table)
|
||||
amount_info = self._compare_amounts(cur, dwd_table, ods_table)
|
||||
report["tables"].append(
|
||||
{
|
||||
"dwd_table": dwd_table,
|
||||
"ods_table": ods_table,
|
||||
"count": count_info,
|
||||
"amounts": amount_info,
|
||||
}
|
||||
)
|
||||
|
||||
self.REPORT_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
self.REPORT_PATH.write_text(json.dumps(report, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
self.logger.info("DWD 质检报表已生成:%s", self.REPORT_PATH)
|
||||
return {"report_path": str(self.REPORT_PATH)}
|
||||
|
||||
# ---------------------- helpers ----------------------
|
||||
def _compare_counts(self, cur, dwd_table: str, ods_table: str) -> Dict[str, Any]:
|
||||
"""统计两端行数并返回差异。"""
|
||||
dwd_schema, dwd_name = self._split_table_name(dwd_table, default_schema="billiards_dwd")
|
||||
ods_schema, ods_name = self._split_table_name(ods_table, default_schema="billiards_ods")
|
||||
cur.execute(f'SELECT COUNT(1) AS cnt FROM "{dwd_schema}"."{dwd_name}"')
|
||||
dwd_cnt = cur.fetchone()["cnt"]
|
||||
cur.execute(f'SELECT COUNT(1) AS cnt FROM "{ods_schema}"."{ods_name}"')
|
||||
ods_cnt = cur.fetchone()["cnt"]
|
||||
return {"dwd": dwd_cnt, "ods": ods_cnt, "diff": dwd_cnt - ods_cnt}
|
||||
|
||||
def _compare_amounts(self, cur, dwd_table: str, ods_table: str) -> List[Dict[str, Any]]:
|
||||
"""扫描金额相关列,生成 ODS 与 DWD 的汇总对照。"""
|
||||
dwd_schema, dwd_name = self._split_table_name(dwd_table, default_schema="billiards_dwd")
|
||||
ods_schema, ods_name = self._split_table_name(ods_table, default_schema="billiards_ods")
|
||||
|
||||
dwd_amount_cols = self._get_numeric_amount_columns(cur, dwd_schema, dwd_name)
|
||||
ods_amount_cols = self._get_numeric_amount_columns(cur, ods_schema, ods_name)
|
||||
common_amount_cols = sorted(set(dwd_amount_cols) & set(ods_amount_cols))
|
||||
|
||||
results: List[Dict[str, Any]] = []
|
||||
for col in common_amount_cols:
|
||||
cur.execute(f'SELECT COALESCE(SUM("{col}"),0) AS val FROM "{dwd_schema}"."{dwd_name}"')
|
||||
dwd_sum = cur.fetchone()["val"]
|
||||
cur.execute(f'SELECT COALESCE(SUM("{col}"),0) AS val FROM "{ods_schema}"."{ods_name}"')
|
||||
ods_sum = cur.fetchone()["val"]
|
||||
results.append({"column": col, "dwd_sum": float(dwd_sum or 0), "ods_sum": float(ods_sum or 0), "diff": float(dwd_sum or 0) - float(ods_sum or 0)})
|
||||
return results
|
||||
|
||||
def _get_numeric_amount_columns(self, cur, schema: str, table: str) -> List[str]:
|
||||
"""获取列名包含金额关键词的数值型字段。"""
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT column_name
|
||||
FROM information_schema.columns
|
||||
WHERE table_schema = %s
|
||||
AND table_name = %s
|
||||
AND data_type IN ('numeric','double precision','integer','bigint','smallint','real','decimal')
|
||||
""",
|
||||
(schema, table),
|
||||
)
|
||||
cols = [r["column_name"].lower() for r in cur.fetchall()]
|
||||
return [c for c in cols if any(key in c for key in self.AMOUNT_KEYWORDS)]
|
||||
|
||||
def _split_table_name(self, name: str, default_schema: str) -> Tuple[str, str]:
|
||||
"""拆分 schema 与表名,缺省使用 default_schema。"""
|
||||
parts = name.split(".")
|
||||
if len(parts) == 2:
|
||||
return parts[0], parts[1]
|
||||
return default_schema, name
|
||||
36
etl_billiards/tasks/init_dwd_schema_task.py
Normal file
36
etl_billiards/tasks/init_dwd_schema_task.py
Normal file
@@ -0,0 +1,36 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""初始化 DWD Schema:执行 schema_dwd_doc.sql,可选先 DROP SCHEMA。"""
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from .base_task import BaseTask, TaskContext
|
||||
|
||||
|
||||
class InitDwdSchemaTask(BaseTask):
|
||||
"""通过调度执行 DWD schema 初始化。"""
|
||||
|
||||
def get_task_code(self) -> str:
|
||||
"""返回任务编码。"""
|
||||
return "INIT_DWD_SCHEMA"
|
||||
|
||||
def extract(self, context: TaskContext) -> dict[str, Any]:
|
||||
"""读取 DWD SQL 文件与参数。"""
|
||||
base_dir = Path(__file__).resolve().parents[1] / "database"
|
||||
dwd_path = Path(self.config.get("schema.dwd_file", base_dir / "schema_dwd_doc.sql"))
|
||||
if not dwd_path.exists():
|
||||
raise FileNotFoundError(f"未找到 DWD schema 文件: {dwd_path}")
|
||||
|
||||
drop_first = self.config.get("dwd.drop_schema_first", False)
|
||||
return {"dwd_sql": dwd_path.read_text(encoding="utf-8"), "dwd_file": str(dwd_path), "drop_first": drop_first}
|
||||
|
||||
def load(self, extracted: dict[str, Any], context: TaskContext) -> dict:
|
||||
"""可选 DROP schema,再执行 DWD DDL。"""
|
||||
with self.db.conn.cursor() as cur:
|
||||
if extracted["drop_first"]:
|
||||
cur.execute("DROP SCHEMA IF EXISTS billiards_dwd CASCADE;")
|
||||
self.logger.info("已执行 DROP SCHEMA billiards_dwd CASCADE")
|
||||
self.logger.info("执行 DWD schema 文件: %s", extracted["dwd_file"])
|
||||
cur.execute(extracted["dwd_sql"])
|
||||
return {"executed": 1, "files": [extracted["dwd_file"]]}
|
||||
73
etl_billiards/tasks/init_schema_task.py
Normal file
73
etl_billiards/tasks/init_schema_task.py
Normal file
@@ -0,0 +1,73 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""任务:初始化运行环境,执行 ODS 与 etl_admin 的 DDL,并准备日志/导出目录。"""
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from .base_task import BaseTask, TaskContext
|
||||
|
||||
|
||||
class InitOdsSchemaTask(BaseTask):
|
||||
"""通过调度执行初始化:创建必要目录,执行 ODS 与 etl_admin 的 DDL。"""
|
||||
|
||||
def get_task_code(self) -> str:
|
||||
"""返回任务编码。"""
|
||||
return "INIT_ODS_SCHEMA"
|
||||
|
||||
def extract(self, context: TaskContext) -> dict[str, Any]:
|
||||
"""读取 SQL 文件路径,收集需创建的目录。"""
|
||||
base_dir = Path(__file__).resolve().parents[1] / "database"
|
||||
ods_path = Path(self.config.get("schema.ods_file", base_dir / "schema_ODS_doc.sql"))
|
||||
admin_path = Path(self.config.get("schema.etl_admin_file", base_dir / "schema_etl_admin.sql"))
|
||||
if not ods_path.exists():
|
||||
raise FileNotFoundError(f"找不到 ODS schema 文件: {ods_path}")
|
||||
if not admin_path.exists():
|
||||
raise FileNotFoundError(f"找不到 etl_admin schema 文件: {admin_path}")
|
||||
|
||||
log_root = Path(self.config.get("io.log_root") or self.config["io"]["log_root"])
|
||||
export_root = Path(self.config.get("io.export_root") or self.config["io"]["export_root"])
|
||||
fetch_root = Path(self.config.get("pipeline.fetch_root") or self.config["pipeline"]["fetch_root"])
|
||||
ingest_dir = Path(self.config.get("pipeline.ingest_source_dir") or fetch_root)
|
||||
|
||||
return {
|
||||
"ods_sql": ods_path.read_text(encoding="utf-8"),
|
||||
"admin_sql": admin_path.read_text(encoding="utf-8"),
|
||||
"ods_file": str(ods_path),
|
||||
"admin_file": str(admin_path),
|
||||
"dirs": [log_root, export_root, fetch_root, ingest_dir],
|
||||
}
|
||||
|
||||
def load(self, extracted: dict[str, Any], context: TaskContext) -> dict:
|
||||
"""执行 DDL 并创建必要目录。
|
||||
|
||||
安全提示:
|
||||
ODS DDL 文件可能携带头部说明或异常注释,为避免因非 SQL 文本导致执行失败,这里会做一次轻量清洗后再执行。
|
||||
"""
|
||||
for d in extracted["dirs"]:
|
||||
Path(d).mkdir(parents=True, exist_ok=True)
|
||||
self.logger.info("已确保目录存在: %s", d)
|
||||
|
||||
# 处理 ODS SQL:去掉头部说明行,以及易出错的 COMMENT ON 行(如 CamelCase 未加引号)
|
||||
ods_sql_raw: str = extracted["ods_sql"]
|
||||
drop_idx = ods_sql_raw.find("DROP SCHEMA")
|
||||
if drop_idx > 0:
|
||||
ods_sql_raw = ods_sql_raw[drop_idx:]
|
||||
cleaned_lines: list[str] = []
|
||||
for line in ods_sql_raw.splitlines():
|
||||
if line.strip().upper().startswith("COMMENT ON "):
|
||||
continue
|
||||
cleaned_lines.append(line)
|
||||
ods_sql = "\n".join(cleaned_lines)
|
||||
|
||||
with self.db.conn.cursor() as cur:
|
||||
self.logger.info("执行 etl_admin schema 文件: %s", extracted["admin_file"])
|
||||
cur.execute(extracted["admin_sql"])
|
||||
self.logger.info("执行 ODS schema 文件: %s", extracted["ods_file"])
|
||||
cur.execute(ods_sql)
|
||||
|
||||
return {
|
||||
"executed": 2,
|
||||
"files": [extracted["admin_file"], extracted["ods_file"]],
|
||||
"dirs_prepared": [str(p) for p in extracted["dirs"]],
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,4 +1,4 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# -*- coding: utf-8 -*-
|
||||
from .base_dwd_task import BaseDwdTask
|
||||
from loaders.dimensions.member import MemberLoader
|
||||
from models.parsers import TypeParser
|
||||
@@ -7,7 +7,7 @@ import json
|
||||
class MembersDwdTask(BaseDwdTask):
|
||||
"""
|
||||
DWD Task: Process Member Records from ODS to Dimension Table
|
||||
Source: billiards_ods.ods_member_profile
|
||||
Source: billiards_ods.member_profiles
|
||||
Target: billiards.dim_member
|
||||
"""
|
||||
|
||||
@@ -29,7 +29,7 @@ class MembersDwdTask(BaseDwdTask):
|
||||
|
||||
# Iterate ODS Data
|
||||
batches = self.iter_ods_rows(
|
||||
table_name="billiards_ods.ods_member_profile",
|
||||
table_name="billiards_ods.member_profiles",
|
||||
columns=["site_id", "member_id", "payload", "fetched_at"],
|
||||
start_time=window_start,
|
||||
end_time=window_end
|
||||
@@ -87,3 +87,4 @@ class MembersDwdTask(BaseDwdTask):
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error parsing member: {e}")
|
||||
return None
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# -*- coding: utf-8 -*-
|
||||
"""ODS ingestion tasks."""
|
||||
from __future__ import annotations
|
||||
|
||||
@@ -62,11 +62,11 @@ class BaseOdsTask(BaseTask):
|
||||
|
||||
def execute(self) -> dict:
|
||||
spec = self.SPEC
|
||||
self.logger.info("开始执行 %s (ODS)", spec.code)
|
||||
self.logger.info("寮€濮嬫墽琛?%s (ODS)", spec.code)
|
||||
|
||||
store_id = TypeParser.parse_int(self.config.get("app.store_id"))
|
||||
if not store_id:
|
||||
raise ValueError("app.store_id 未配置,无法执行 ODS 任务")
|
||||
raise ValueError("app.store_id 鏈厤缃紝鏃犳硶鎵ц ODS 浠诲姟")
|
||||
|
||||
page_size = self.config.get("api.page_size", 200)
|
||||
params = self._build_params(spec, store_id)
|
||||
@@ -122,13 +122,13 @@ class BaseOdsTask(BaseTask):
|
||||
counts["fetched"] += len(page_records)
|
||||
|
||||
self.db.commit()
|
||||
self.logger.info("%s ODS 任务完成: %s", spec.code, counts)
|
||||
self.logger.info("%s ODS 浠诲姟瀹屾垚: %s", spec.code, counts)
|
||||
return self._build_result("SUCCESS", counts)
|
||||
|
||||
except Exception:
|
||||
self.db.rollback()
|
||||
counts["errors"] += 1
|
||||
self.logger.error("%s ODS 任务失败", spec.code, exc_info=True)
|
||||
self.logger.error("%s ODS 浠诲姟澶辫触", spec.code, exc_info=True)
|
||||
raise
|
||||
|
||||
def _build_params(self, spec: OdsTaskSpec, store_id: int) -> dict:
|
||||
@@ -201,7 +201,7 @@ class BaseOdsTask(BaseTask):
|
||||
value = self._extract_value(record, col_spec)
|
||||
if value is None and col_spec.required:
|
||||
self.logger.warning(
|
||||
"%s 缺少必填字段 %s,原始记录: %s",
|
||||
"%s 缂哄皯蹇呭~瀛楁 %s锛屽師濮嬭褰? %s",
|
||||
spec.code,
|
||||
col_spec.column,
|
||||
record,
|
||||
@@ -265,9 +265,38 @@ def _int_col(name: str, *sources: str, required: bool = False) -> ColumnSpec:
|
||||
)
|
||||
|
||||
|
||||
def _decimal_col(name: str, *sources: str) -> ColumnSpec:
|
||||
"""??????????????"""
|
||||
return ColumnSpec(
|
||||
column=name,
|
||||
sources=sources,
|
||||
transform=lambda v: TypeParser.parse_decimal(v, 2),
|
||||
)
|
||||
|
||||
|
||||
def _bool_col(name: str, *sources: str) -> ColumnSpec:
|
||||
"""??????????????0/1?true/false ???"""
|
||||
|
||||
def _to_bool(value):
|
||||
if value is None:
|
||||
return None
|
||||
if isinstance(value, bool):
|
||||
return value
|
||||
s = str(value).strip().lower()
|
||||
if s in {"1", "true", "t", "yes", "y"}:
|
||||
return True
|
||||
if s in {"0", "false", "f", "no", "n"}:
|
||||
return False
|
||||
return bool(value)
|
||||
|
||||
return ColumnSpec(column=name, sources=sources, transform=_to_bool)
|
||||
|
||||
|
||||
|
||||
|
||||
ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
|
||||
OdsTaskSpec(
|
||||
code="ODS_ASSISTANT_ACCOUNTS",
|
||||
code="ODS_ASSISTANT_ACCOUNT",
|
||||
class_name="OdsAssistantAccountsTask",
|
||||
table_name="billiards_ods.assistant_accounts_master",
|
||||
endpoint="/PersonnelManagement/SearchAssistantInfo",
|
||||
@@ -281,10 +310,10 @@ ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
|
||||
include_fetched_at=False,
|
||||
include_record_index=True,
|
||||
conflict_columns_override=("source_file", "record_index"),
|
||||
description="助教账号档案 ODS:SearchAssistantInfo -> assistantInfos 原始 JSON",
|
||||
description="鍔╂暀璐﹀彿妗f ODS锛歋earchAssistantInfo -> assistantInfos 鍘熷 JSON",
|
||||
),
|
||||
OdsTaskSpec(
|
||||
code="ODS_ORDER_SETTLE",
|
||||
code="ODS_SETTLEMENT_RECORDS",
|
||||
class_name="OdsOrderSettleTask",
|
||||
table_name="billiards_ods.settlement_records",
|
||||
endpoint="/Site/GetAllOrderSettleList",
|
||||
@@ -299,7 +328,7 @@ ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
|
||||
include_record_index=True,
|
||||
conflict_columns_override=("source_file", "record_index"),
|
||||
requires_window=False,
|
||||
description="结账记录 ODS:GetAllOrderSettleList -> settleList 原始 JSON",
|
||||
description="缁撹处璁板綍 ODS锛欸etAllOrderSettleList -> settleList 鍘熷 JSON",
|
||||
),
|
||||
OdsTaskSpec(
|
||||
code="ODS_TABLE_USE",
|
||||
@@ -317,7 +346,7 @@ ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
|
||||
include_record_index=True,
|
||||
conflict_columns_override=("source_file", "record_index"),
|
||||
requires_window=False,
|
||||
description="台费计费流水 ODS:GetSiteTableOrderDetails -> siteTableUseDetailsList 原始 JSON",
|
||||
description="鍙拌垂璁¤垂娴佹按 ODS锛欸etSiteTableOrderDetails -> siteTableUseDetailsList 鍘熷 JSON",
|
||||
),
|
||||
OdsTaskSpec(
|
||||
code="ODS_ASSISTANT_LEDGER",
|
||||
@@ -334,7 +363,7 @@ ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
|
||||
include_fetched_at=False,
|
||||
include_record_index=True,
|
||||
conflict_columns_override=("source_file", "record_index"),
|
||||
description="助教服务流水 ODS:GetOrderAssistantDetails -> orderAssistantDetails 原始 JSON",
|
||||
description="鍔╂暀鏈嶅姟娴佹按 ODS锛欸etOrderAssistantDetails -> orderAssistantDetails 鍘熷 JSON",
|
||||
),
|
||||
OdsTaskSpec(
|
||||
code="ODS_ASSISTANT_ABOLISH",
|
||||
@@ -351,10 +380,10 @@ ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
|
||||
include_fetched_at=False,
|
||||
include_record_index=True,
|
||||
conflict_columns_override=("source_file", "record_index"),
|
||||
description="助教废除记录 ODS:GetAbolitionAssistant -> abolitionAssistants 原始 JSON",
|
||||
description="鍔╂暀搴熼櫎璁板綍 ODS锛欸etAbolitionAssistant -> abolitionAssistants 鍘熷 JSON",
|
||||
),
|
||||
OdsTaskSpec(
|
||||
code="ODS_GOODS_LEDGER",
|
||||
code="ODS_STORE_GOODS_SALES",
|
||||
class_name="OdsGoodsLedgerTask",
|
||||
table_name="billiards_ods.store_goods_sales_records",
|
||||
endpoint="/TenantGoods/GetGoodsSalesList",
|
||||
@@ -369,7 +398,7 @@ ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
|
||||
include_record_index=True,
|
||||
conflict_columns_override=("source_file", "record_index"),
|
||||
requires_window=False,
|
||||
description="门店商品销售流水 ODS:GetGoodsSalesList -> orderGoodsLedgers 原始 JSON",
|
||||
description="闂ㄥ簵鍟嗗搧閿€鍞祦姘?ODS锛欸etGoodsSalesList -> orderGoodsLedgers 鍘熷 JSON",
|
||||
),
|
||||
OdsTaskSpec(
|
||||
code="ODS_PAYMENT",
|
||||
@@ -386,7 +415,7 @@ ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
|
||||
include_record_index=True,
|
||||
conflict_columns_override=("source_file", "record_index"),
|
||||
requires_window=False,
|
||||
description="支付流水 ODS:GetPayLogListPage 原始 JSON",
|
||||
description="鏀粯娴佹按 ODS锛欸etPayLogListPage 鍘熷 JSON",
|
||||
),
|
||||
OdsTaskSpec(
|
||||
code="ODS_REFUND",
|
||||
@@ -403,10 +432,10 @@ ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
|
||||
include_record_index=True,
|
||||
conflict_columns_override=("source_file", "record_index"),
|
||||
requires_window=False,
|
||||
description="退款流水 ODS:GetRefundPayLogList 原始 JSON",
|
||||
description="閫€娆炬祦姘?ODS锛欸etRefundPayLogList 鍘熷 JSON",
|
||||
),
|
||||
OdsTaskSpec(
|
||||
code="ODS_COUPON_VERIFY",
|
||||
code="ODS_PLATFORM_COUPON",
|
||||
class_name="OdsCouponVerifyTask",
|
||||
table_name="billiards_ods.platform_coupon_redemption_records",
|
||||
endpoint="/Promotion/GetOfflineCouponConsumePageList",
|
||||
@@ -420,7 +449,7 @@ ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
|
||||
include_record_index=True,
|
||||
conflict_columns_override=("source_file", "record_index"),
|
||||
requires_window=False,
|
||||
description="平台/团购券核销 ODS:GetOfflineCouponConsumePageList 原始 JSON",
|
||||
description="骞冲彴/鍥㈣喘鍒告牳閿€ ODS锛欸etOfflineCouponConsumePageList 鍘熷 JSON",
|
||||
),
|
||||
OdsTaskSpec(
|
||||
code="ODS_MEMBER",
|
||||
@@ -438,7 +467,7 @@ ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
|
||||
include_record_index=True,
|
||||
conflict_columns_override=("source_file", "record_index"),
|
||||
requires_window=False,
|
||||
description="会员档案 ODS:GetTenantMemberList -> tenantMemberInfos 原始 JSON",
|
||||
description="浼氬憳妗f ODS锛欸etTenantMemberList -> tenantMemberInfos 鍘熷 JSON",
|
||||
),
|
||||
OdsTaskSpec(
|
||||
code="ODS_MEMBER_CARD",
|
||||
@@ -456,7 +485,7 @@ ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
|
||||
include_record_index=True,
|
||||
conflict_columns_override=("source_file", "record_index"),
|
||||
requires_window=False,
|
||||
description="会员储值卡 ODS:GetTenantMemberCardList -> tenantMemberCards 原始 JSON",
|
||||
description="浼氬憳鍌ㄥ€煎崱 ODS锛欸etTenantMemberCardList -> tenantMemberCards 鍘熷 JSON",
|
||||
),
|
||||
OdsTaskSpec(
|
||||
code="ODS_MEMBER_BALANCE",
|
||||
@@ -474,7 +503,7 @@ ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
|
||||
include_record_index=True,
|
||||
conflict_columns_override=("source_file", "record_index"),
|
||||
requires_window=False,
|
||||
description="会员余额变动 ODS:GetMemberCardBalanceChange -> tenantMemberCardLogs 原始 JSON",
|
||||
description="浼氬憳浣欓鍙樺姩 ODS锛欸etMemberCardBalanceChange -> tenantMemberCardLogs 鍘熷 JSON",
|
||||
),
|
||||
OdsTaskSpec(
|
||||
code="ODS_RECHARGE_SETTLE",
|
||||
@@ -483,19 +512,83 @@ ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
|
||||
endpoint="/Site/GetRechargeSettleList",
|
||||
data_path=("data",),
|
||||
list_key="settleList",
|
||||
pk_columns=(),
|
||||
pk_columns=(_int_col("recharge_order_id", "settleList.id", "id", required=True),),
|
||||
extra_columns=(
|
||||
_int_col("tenant_id", "settleList.tenantId", "tenantId"),
|
||||
_int_col("site_id", "settleList.siteId", "siteId", "siteProfile.id"),
|
||||
ColumnSpec("site_name_snapshot", sources=("siteProfile.shop_name", "settleList.siteName")),
|
||||
_int_col("member_id", "settleList.memberId", "memberId"),
|
||||
ColumnSpec("member_name_snapshot", sources=("settleList.memberName", "memberName")),
|
||||
ColumnSpec("member_phone_snapshot", sources=("settleList.memberPhone", "memberPhone")),
|
||||
_int_col("tenant_member_card_id", "settleList.tenantMemberCardId", "tenantMemberCardId"),
|
||||
ColumnSpec("member_card_type_name", sources=("settleList.memberCardTypeName", "memberCardTypeName")),
|
||||
_int_col("settle_relate_id", "settleList.settleRelateId", "settleRelateId"),
|
||||
_int_col("settle_type", "settleList.settleType", "settleType"),
|
||||
ColumnSpec("settle_name", sources=("settleList.settleName", "settleName")),
|
||||
_int_col("is_first", "settleList.isFirst", "isFirst"),
|
||||
_int_col("settle_status", "settleList.settleStatus", "settleStatus"),
|
||||
_decimal_col("pay_amount", "settleList.payAmount", "payAmount"),
|
||||
_decimal_col("refund_amount", "settleList.refundAmount", "refundAmount"),
|
||||
_decimal_col("point_amount", "settleList.pointAmount", "pointAmount"),
|
||||
_decimal_col("cash_amount", "settleList.cashAmount", "cashAmount"),
|
||||
_decimal_col("online_amount", "settleList.onlineAmount", "onlineAmount"),
|
||||
_decimal_col("balance_amount", "settleList.balanceAmount", "balanceAmount"),
|
||||
_decimal_col("card_amount", "settleList.cardAmount", "cardAmount"),
|
||||
_decimal_col("coupon_amount", "settleList.couponAmount", "couponAmount"),
|
||||
_decimal_col("recharge_card_amount", "settleList.rechargeCardAmount", "rechargeCardAmount"),
|
||||
_decimal_col("gift_card_amount", "settleList.giftCardAmount", "giftCardAmount"),
|
||||
_decimal_col("prepay_money", "settleList.prepayMoney", "prepayMoney"),
|
||||
_decimal_col("consume_money", "settleList.consumeMoney", "consumeMoney"),
|
||||
_decimal_col("goods_money", "settleList.goodsMoney", "goodsMoney"),
|
||||
_decimal_col("real_goods_money", "settleList.realGoodsMoney", "realGoodsMoney"),
|
||||
_decimal_col("table_charge_money", "settleList.tableChargeMoney", "tableChargeMoney"),
|
||||
_decimal_col("service_money", "settleList.serviceMoney", "serviceMoney"),
|
||||
_decimal_col("activity_discount", "settleList.activityDiscount", "activityDiscount"),
|
||||
_decimal_col("all_coupon_discount", "settleList.allCouponDiscount", "allCouponDiscount"),
|
||||
_decimal_col("goods_promotion_money", "settleList.goodsPromotionMoney", "goodsPromotionMoney"),
|
||||
_decimal_col("assistant_promotion_money", "settleList.assistantPromotionMoney", "assistantPromotionMoney"),
|
||||
_decimal_col("assistant_pd_money", "settleList.assistantPdMoney", "assistantPdMoney"),
|
||||
_decimal_col("assistant_cx_money", "settleList.assistantCxMoney", "assistantCxMoney"),
|
||||
_decimal_col("assistant_manual_discount", "settleList.assistantManualDiscount", "assistantManualDiscount"),
|
||||
_decimal_col("coupon_sale_amount", "settleList.couponSaleAmount", "couponSaleAmount"),
|
||||
_decimal_col("member_discount_amount", "settleList.memberDiscountAmount", "memberDiscountAmount"),
|
||||
_decimal_col("point_discount_price", "settleList.pointDiscountPrice", "pointDiscountPrice"),
|
||||
_decimal_col("point_discount_cost", "settleList.pointDiscountCost", "pointDiscountCost"),
|
||||
_decimal_col("adjust_amount", "settleList.adjustAmount", "adjustAmount"),
|
||||
_decimal_col("rounding_amount", "settleList.roundingAmount", "roundingAmount"),
|
||||
_int_col("payment_method", "settleList.paymentMethod", "paymentMethod"),
|
||||
_bool_col("can_be_revoked", "settleList.canBeRevoked", "canBeRevoked"),
|
||||
_bool_col("is_bind_member", "settleList.isBindMember", "isBindMember"),
|
||||
_bool_col("is_activity", "settleList.isActivity", "isActivity"),
|
||||
_bool_col("is_use_coupon", "settleList.isUseCoupon", "isUseCoupon"),
|
||||
_bool_col("is_use_discount", "settleList.isUseDiscount", "isUseDiscount"),
|
||||
_int_col("operator_id", "settleList.operatorId", "operatorId"),
|
||||
ColumnSpec("operator_name_snapshot", sources=("settleList.operatorName", "operatorName")),
|
||||
_int_col("salesman_user_id", "settleList.salesManUserId", "salesmanUserId", "salesManUserId"),
|
||||
ColumnSpec("salesman_name", sources=("settleList.salesManName", "salesmanName", "settleList.salesmanName")),
|
||||
ColumnSpec("order_remark", sources=("settleList.orderRemark", "orderRemark")),
|
||||
_int_col("table_id", "settleList.tableId", "tableId"),
|
||||
_int_col("serial_number", "settleList.serialNumber", "serialNumber"),
|
||||
_int_col("revoke_order_id", "settleList.revokeOrderId", "revokeOrderId"),
|
||||
ColumnSpec("revoke_order_name", sources=("settleList.revokeOrderName", "revokeOrderName")),
|
||||
ColumnSpec("revoke_time", sources=("settleList.revokeTime", "revokeTime")),
|
||||
ColumnSpec("create_time", sources=("settleList.createTime", "createTime")),
|
||||
ColumnSpec("pay_time", sources=("settleList.payTime", "payTime")),
|
||||
ColumnSpec("site_profile", sources=("siteProfile",)),
|
||||
),
|
||||
include_site_column=False,
|
||||
include_source_endpoint=False,
|
||||
include_source_endpoint=True,
|
||||
include_page_no=False,
|
||||
include_page_size=False,
|
||||
include_fetched_at=False,
|
||||
include_record_index=True,
|
||||
conflict_columns_override=("source_file", "record_index"),
|
||||
include_fetched_at=True,
|
||||
include_record_index=False,
|
||||
conflict_columns_override=None,
|
||||
requires_window=False,
|
||||
description="会员充值结算 ODS:GetRechargeSettleList -> settleList 原始 JSON",
|
||||
description="?????? ODS?GetRechargeSettleList -> data.settleList ????",
|
||||
),
|
||||
|
||||
OdsTaskSpec(
|
||||
code="ODS_PACKAGE",
|
||||
code="ODS_GROUP_PACKAGE",
|
||||
class_name="OdsPackageTask",
|
||||
table_name="billiards_ods.group_buy_packages",
|
||||
endpoint="/PackageCoupon/QueryPackageCouponList",
|
||||
@@ -510,7 +603,7 @@ ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
|
||||
include_record_index=True,
|
||||
conflict_columns_override=("source_file", "record_index"),
|
||||
requires_window=False,
|
||||
description="团购套餐定义 ODS:QueryPackageCouponList -> packageCouponList 原始 JSON",
|
||||
description="鍥㈣喘濂楅瀹氫箟 ODS锛歈ueryPackageCouponList -> packageCouponList 鍘熷 JSON",
|
||||
),
|
||||
OdsTaskSpec(
|
||||
code="ODS_GROUP_BUY_REDEMPTION",
|
||||
@@ -528,7 +621,7 @@ ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
|
||||
include_record_index=True,
|
||||
conflict_columns_override=("source_file", "record_index"),
|
||||
requires_window=False,
|
||||
description="团购套餐核销 ODS:GetSiteTableUseDetails -> siteTableUseDetailsList 原始 JSON",
|
||||
description="鍥㈣喘濂楅鏍搁攢 ODS锛欸etSiteTableUseDetails -> siteTableUseDetailsList 鍘熷 JSON",
|
||||
),
|
||||
OdsTaskSpec(
|
||||
code="ODS_INVENTORY_STOCK",
|
||||
@@ -545,7 +638,7 @@ ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
|
||||
include_record_index=True,
|
||||
conflict_columns_override=("source_file", "record_index"),
|
||||
requires_window=False,
|
||||
description="库存汇总 ODS:GetGoodsStockReport 原始 JSON",
|
||||
description="搴撳瓨姹囨€?ODS锛欸etGoodsStockReport 鍘熷 JSON",
|
||||
),
|
||||
OdsTaskSpec(
|
||||
code="ODS_INVENTORY_CHANGE",
|
||||
@@ -562,7 +655,7 @@ ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
|
||||
include_fetched_at=False,
|
||||
include_record_index=True,
|
||||
conflict_columns_override=("source_file", "record_index"),
|
||||
description="库存变化记录 ODS:QueryGoodsOutboundReceipt -> queryDeliveryRecordsList 原始 JSON",
|
||||
description="搴撳瓨鍙樺寲璁板綍 ODS锛歈ueryGoodsOutboundReceipt -> queryDeliveryRecordsList 鍘熷 JSON",
|
||||
),
|
||||
OdsTaskSpec(
|
||||
code="ODS_TABLES",
|
||||
@@ -580,7 +673,7 @@ ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
|
||||
include_record_index=True,
|
||||
conflict_columns_override=("source_file", "record_index"),
|
||||
requires_window=False,
|
||||
description="台桌维表 ODS:GetSiteTables -> siteTables 原始 JSON",
|
||||
description="鍙版缁磋〃 ODS锛欸etSiteTables -> siteTables 鍘熷 JSON",
|
||||
),
|
||||
OdsTaskSpec(
|
||||
code="ODS_GOODS_CATEGORY",
|
||||
@@ -598,7 +691,7 @@ ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
|
||||
include_record_index=True,
|
||||
conflict_columns_override=("source_file", "record_index"),
|
||||
requires_window=False,
|
||||
description="库存商品分类树 ODS:QueryPrimarySecondaryCategory -> goodsCategoryList 原始 JSON",
|
||||
description="搴撳瓨鍟嗗搧鍒嗙被鏍?ODS锛歈ueryPrimarySecondaryCategory -> goodsCategoryList 鍘熷 JSON",
|
||||
),
|
||||
OdsTaskSpec(
|
||||
code="ODS_STORE_GOODS",
|
||||
@@ -616,10 +709,10 @@ ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
|
||||
include_record_index=True,
|
||||
conflict_columns_override=("source_file", "record_index"),
|
||||
requires_window=False,
|
||||
description="门店商品档案 ODS:GetGoodsInventoryList -> orderGoodsList 原始 JSON",
|
||||
description="闂ㄥ簵鍟嗗搧妗f ODS锛欸etGoodsInventoryList -> orderGoodsList 鍘熷 JSON",
|
||||
),
|
||||
OdsTaskSpec(
|
||||
code="ODS_TABLE_DISCOUNT",
|
||||
code="ODS_TABLE_FEE_DISCOUNT",
|
||||
class_name="OdsTableDiscountTask",
|
||||
table_name="billiards_ods.table_fee_discount_records",
|
||||
endpoint="/Site/GetTaiFeeAdjustList",
|
||||
@@ -634,7 +727,7 @@ ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
|
||||
include_record_index=True,
|
||||
conflict_columns_override=("source_file", "record_index"),
|
||||
requires_window=False,
|
||||
description="台费折扣/调账 ODS:GetTaiFeeAdjustList -> taiFeeAdjustInfos 原始 JSON",
|
||||
description="鍙拌垂鎶樻墸/璋冭处 ODS锛欸etTaiFeeAdjustList -> taiFeeAdjustInfos 鍘熷 JSON",
|
||||
),
|
||||
OdsTaskSpec(
|
||||
code="ODS_TENANT_GOODS",
|
||||
@@ -652,7 +745,7 @@ ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
|
||||
include_record_index=True,
|
||||
conflict_columns_override=("source_file", "record_index"),
|
||||
requires_window=False,
|
||||
description="租户商品档案 ODS:QueryTenantGoods -> tenantGoodsList 原始 JSON",
|
||||
description="绉熸埛鍟嗗搧妗f ODS锛歈ueryTenantGoods -> tenantGoodsList 鍘熷 JSON",
|
||||
),
|
||||
OdsTaskSpec(
|
||||
code="ODS_SETTLEMENT_TICKET",
|
||||
@@ -671,7 +764,7 @@ ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
|
||||
conflict_columns_override=("source_file", "record_index"),
|
||||
requires_window=False,
|
||||
include_site_id=False,
|
||||
description="结账小票详情 ODS:GetOrderSettleTicketNew 原始 JSON",
|
||||
description="缁撹处灏忕エ璇︽儏 ODS锛欸etOrderSettleTicketNew 鍘熷 JSON",
|
||||
),
|
||||
)
|
||||
|
||||
@@ -725,7 +818,7 @@ class OdsSettlementTicketTask(BaseOdsTask):
|
||||
|
||||
if not candidates:
|
||||
self.logger.info(
|
||||
"%s: 窗口[%s ~ %s] 未发现需要抓取的小票",
|
||||
"%s: 绐楀彛[%s ~ %s] 鏈彂鐜伴渶瑕佹姄鍙栫殑灏忕エ",
|
||||
spec.code,
|
||||
context.window_start,
|
||||
context.window_end,
|
||||
@@ -755,7 +848,7 @@ class OdsSettlementTicketTask(BaseOdsTask):
|
||||
counts["updated"] += updated
|
||||
self.db.commit()
|
||||
self.logger.info(
|
||||
"%s: 小票抓取完成,候选=%s 插入=%s 更新=%s 跳过=%s",
|
||||
"%s: 灏忕エ鎶撳彇瀹屾垚锛屽€欓€?%s 鎻掑叆=%s 鏇存柊=%s 璺宠繃=%s",
|
||||
spec.code,
|
||||
len(candidates),
|
||||
inserted,
|
||||
@@ -767,7 +860,7 @@ class OdsSettlementTicketTask(BaseOdsTask):
|
||||
except Exception:
|
||||
counts["errors"] += 1
|
||||
self.db.rollback()
|
||||
self.logger.error("%s: 小票抓取失败", spec.code, exc_info=True)
|
||||
self.logger.error("%s: 灏忕エ鎶撳彇澶辫触", spec.code, exc_info=True)
|
||||
raise
|
||||
|
||||
# ------------------------------------------------------------------ helpers
|
||||
@@ -782,7 +875,7 @@ class OdsSettlementTicketTask(BaseOdsTask):
|
||||
try:
|
||||
rows = self.db.query(sql)
|
||||
except Exception:
|
||||
self.logger.warning("查询已有小票失败,按空集处理", exc_info=True)
|
||||
self.logger.warning("鏌ヨ宸叉湁灏忕エ澶辫触锛屾寜绌洪泦澶勭悊", exc_info=True)
|
||||
return set()
|
||||
|
||||
return {
|
||||
@@ -819,7 +912,7 @@ class OdsSettlementTicketTask(BaseOdsTask):
|
||||
try:
|
||||
rows = self.db.query(sql, params)
|
||||
except Exception:
|
||||
self.logger.warning("读取支付流水以获取结算单ID失败,将尝试调用支付接口回退", exc_info=True)
|
||||
self.logger.warning("璇诲彇鏀粯娴佹按浠ヨ幏鍙栫粨绠楀崟ID澶辫触锛屽皢灏濊瘯璋冪敤鏀粯鎺ュ彛鍥為€€", exc_info=True)
|
||||
return set()
|
||||
|
||||
return {
|
||||
@@ -853,7 +946,7 @@ class OdsSettlementTicketTask(BaseOdsTask):
|
||||
if relate_id:
|
||||
candidate_ids.add(relate_id)
|
||||
except Exception:
|
||||
self.logger.warning("调用支付接口获取结算单ID失败,当前批次将跳过回退来源", exc_info=True)
|
||||
self.logger.warning("璋冪敤鏀粯鎺ュ彛鑾峰彇缁撶畻鍗旾D澶辫触锛屽綋鍓嶆壒娆″皢璺宠繃鍥為€€鏉ユ簮", exc_info=True)
|
||||
return candidate_ids
|
||||
|
||||
def _fetch_ticket_payload(self, order_settle_id: int):
|
||||
@@ -869,10 +962,10 @@ class OdsSettlementTicketTask(BaseOdsTask):
|
||||
payload = response
|
||||
except Exception:
|
||||
self.logger.warning(
|
||||
"调用小票接口失败 orderSettleId=%s", order_settle_id, exc_info=True
|
||||
"璋冪敤灏忕エ鎺ュ彛澶辫触 orderSettleId=%s", order_settle_id, exc_info=True
|
||||
)
|
||||
if isinstance(payload, dict) and isinstance(payload.get("data"), list) and len(payload["data"]) == 1:
|
||||
# 本地桩/回放可能把响应包装成单元素 list,这里展开以贴近真实结构
|
||||
# 鏈湴妗?鍥炴斁鍙兘鎶婂搷搴斿寘瑁呮垚鍗曞厓绱?list锛岃繖閲屽睍寮€浠ヨ创杩戠湡瀹炵粨鏋?
|
||||
payload = payload["data"][0]
|
||||
return payload
|
||||
|
||||
@@ -899,27 +992,29 @@ def _build_task_class(spec: OdsTaskSpec) -> Type[BaseOdsTask]:
|
||||
|
||||
|
||||
ENABLED_ODS_CODES = {
|
||||
"ODS_ASSISTANT_ACCOUNTS",
|
||||
"ODS_ASSISTANT_ACCOUNT",
|
||||
"ODS_ASSISTANT_LEDGER",
|
||||
"ODS_ASSISTANT_ABOLISH",
|
||||
"ODS_INVENTORY_CHANGE",
|
||||
"ODS_INVENTORY_STOCK",
|
||||
"ODS_PACKAGE",
|
||||
"ODS_GROUP_PACKAGE",
|
||||
"ODS_GROUP_BUY_REDEMPTION",
|
||||
"ODS_MEMBER",
|
||||
"ODS_MEMBER_BALANCE",
|
||||
"ODS_MEMBER_CARD",
|
||||
"ODS_PAYMENT",
|
||||
"ODS_REFUND",
|
||||
"ODS_COUPON_VERIFY",
|
||||
"ODS_PLATFORM_COUPON",
|
||||
"ODS_RECHARGE_SETTLE",
|
||||
"ODS_TABLE_USE",
|
||||
"ODS_TABLES",
|
||||
"ODS_GOODS_CATEGORY",
|
||||
"ODS_STORE_GOODS",
|
||||
"ODS_TABLE_DISCOUNT",
|
||||
"ODS_TABLE_FEE_DISCOUNT",
|
||||
"ODS_STORE_GOODS_SALES",
|
||||
"ODS_TENANT_GOODS",
|
||||
"ODS_SETTLEMENT_TICKET",
|
||||
"ODS_ORDER_SETTLE",
|
||||
"ODS_SETTLEMENT_RECORDS",
|
||||
}
|
||||
|
||||
ODS_TASK_CLASSES: Dict[str, Type[BaseOdsTask]] = {
|
||||
@@ -931,3 +1026,4 @@ ODS_TASK_CLASSES: Dict[str, Type[BaseOdsTask]] = {
|
||||
ODS_TASK_CLASSES["ODS_SETTLEMENT_TICKET"] = OdsSettlementTicketTask
|
||||
|
||||
__all__ = ["ODS_TASK_CLASSES", "ODS_TASK_SPECS", "BaseOdsTask", "ENABLED_ODS_CODES"]
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# -*- coding: utf-8 -*-
|
||||
from .base_dwd_task import BaseDwdTask
|
||||
from loaders.facts.payment import PaymentLoader
|
||||
from models.parsers import TypeParser
|
||||
@@ -29,7 +29,7 @@ class PaymentsDwdTask(BaseDwdTask):
|
||||
|
||||
# Iterate ODS Data
|
||||
batches = self.iter_ods_rows(
|
||||
table_name="billiards_ods.ods_payment_record",
|
||||
table_name="billiards_ods.payment_transactions",
|
||||
columns=["site_id", "pay_id", "payload", "fetched_at"],
|
||||
start_time=window_start,
|
||||
end_time=window_end
|
||||
@@ -136,3 +136,4 @@ class PaymentsDwdTask(BaseDwdTask):
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error parsing payment: {e}")
|
||||
return None
|
||||
|
||||
|
||||
Reference in New Issue
Block a user