在前后端开发联调前 的提交20260223
This commit is contained in:
@@ -38,6 +38,8 @@ class DwdLoadTask(BaseTask):
|
||||
"dwd.dim_goods_category": "ods.stock_goods_category_tree",
|
||||
"dwd.dim_groupbuy_package": "ods.group_buy_packages",
|
||||
"dwd.dim_groupbuy_package_ex": "ods.group_buy_packages",
|
||||
"dwd.dim_staff": "ods.staff_info_master",
|
||||
"dwd.dim_staff_ex": "ods.staff_info_master",
|
||||
# 事实
|
||||
"dwd.dwd_settlement_head": "ods.settlement_records",
|
||||
"dwd.dwd_settlement_head_ex": "ods.settlement_records",
|
||||
@@ -49,8 +51,6 @@ class DwdLoadTask(BaseTask):
|
||||
"dwd.dwd_store_goods_sale_ex": "ods.store_goods_sales_records",
|
||||
"dwd.dwd_assistant_service_log": "ods.assistant_service_records",
|
||||
"dwd.dwd_assistant_service_log_ex": "ods.assistant_service_records",
|
||||
"dwd.dwd_assistant_trash_event": "ods.assistant_cancellation_records",
|
||||
"dwd.dwd_assistant_trash_event_ex": "ods.assistant_cancellation_records",
|
||||
"dwd.dwd_member_balance_change": "ods.member_balance_changes",
|
||||
"dwd.dwd_member_balance_change_ex": "ods.member_balance_changes",
|
||||
"dwd.dwd_groupbuy_redemption": "ods.group_buy_redemption_records",
|
||||
@@ -62,6 +62,8 @@ class DwdLoadTask(BaseTask):
|
||||
"dwd.dwd_payment": "ods.payment_transactions",
|
||||
"dwd.dwd_refund": "ods.refund_transactions",
|
||||
"dwd.dwd_refund_ex": "ods.refund_transactions",
|
||||
"dwd.dwd_goods_stock_summary": "ods.goods_stock_summary",
|
||||
"dwd.dwd_goods_stock_movement": "ods.goods_stock_movements",
|
||||
}
|
||||
|
||||
SCD_COLS = {"scd2_start_time", "scd2_end_time", "scd2_is_current", "scd2_version"}
|
||||
@@ -90,6 +92,17 @@ class DwdLoadTask(BaseTask):
|
||||
except Exception as exc: # noqa: BLE001
|
||||
self.logger.warning("检查 fetched_at 为空记录失败:%s, err=%s", ods_table, exc)
|
||||
|
||||
@staticmethod
|
||||
def _pick_snapshot_order_column(cols: Sequence[str]) -> str | None:
|
||||
"""从 ODS 列中选取用于快照排序的列(fetched_at > update_time > create_time)。
|
||||
CHANGE 2026-02-21: BUG 9 fix — 从 integrity_checker 移入本类,解决 AttributeError。
|
||||
"""
|
||||
lower = {c.lower() for c in cols}
|
||||
for candidate in ("fetched_at", "update_time", "create_time"):
|
||||
if candidate in lower:
|
||||
return candidate
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _latest_snapshot_select_sql(
|
||||
select_cols_sql: str,
|
||||
@@ -157,6 +170,20 @@ class DwdLoadTask(BaseTask):
|
||||
"dwd.dim_table_ex": [
|
||||
("table_id", "id", None),
|
||||
("table_cloth_use_time", "table_cloth_use_time", None),
|
||||
("create_time", "create_time", None), # 台桌创建时间
|
||||
("light_status", "light_status", None), # 台灯状态
|
||||
("tablestatusname", "tablestatusname", None), # 台桌状态中文名(ODS 驼峰 → PG 小写)
|
||||
("sitename", "sitename", None), # 门店名称快照(ODS 驼峰 → PG 小写)
|
||||
("applet_qr_code_url", '"appletQrCodeUrl"', None), # 小程序二维码 URL(ODS 带引号保留大小写)
|
||||
("audit_status", "audit_status", None), # 审核状态
|
||||
("charge_free", "charge_free", None), # 是否免费台
|
||||
("delay_lights_time", "delay_lights_time", None), # 台灯熄灭延迟时间
|
||||
("is_rest_area", "is_rest_area", None), # 是否休息区
|
||||
("only_allow_groupon", "only_allow_groupon", None), # 是否仅允许团购
|
||||
("order_delay_time", "order_delay_time", None), # 订单自动延时时长
|
||||
("self_table", "self_table", None), # 是否自有台桌
|
||||
("temporary_light_second", "temporary_light_second", None), # 临时开灯秒数
|
||||
("virtual_table", "virtual_table", None), # 是否虚拟台桌
|
||||
],
|
||||
"dwd.dim_assistant": [("assistant_id", "id", None), ("user_id", "user_id", None)],
|
||||
"dwd.dim_assistant_ex": [
|
||||
@@ -164,11 +191,16 @@ class DwdLoadTask(BaseTask):
|
||||
("introduce", "introduce", None),
|
||||
("group_name", "group_name", None),
|
||||
("light_equipment_id", "light_equipment_id", None),
|
||||
("system_role_id", "system_role_id", None), # 系统角色 ID
|
||||
("job_num", "job_num", None), # 工号
|
||||
("cx_unit_price", "cx_unit_price", None), # 促销单价
|
||||
("pd_unit_price", "pd_unit_price", None), # 陪打单价
|
||||
],
|
||||
"dwd.dim_member": [
|
||||
("member_id", "id", None),
|
||||
("pay_money_sum", "pay_money_sum", None),
|
||||
("recharge_money_sum", "recharge_money_sum", None),
|
||||
# update_time: 上游 API 不提供此字段,保持 NULL(已在一致性检查白名单中标记)
|
||||
],
|
||||
"dwd.dim_member_ex": [
|
||||
("member_id", "id", None),
|
||||
@@ -206,7 +238,7 @@ class DwdLoadTask(BaseTask):
|
||||
("tenant_goods_id", "id", None),
|
||||
("remark_name", "remark_name", None),
|
||||
("goods_bar_code", "goods_bar_code", None),
|
||||
("commodity_code_list", "commodity_code", None),
|
||||
("commodity_code_list", "commoditycode", "TEXT[]"), # CHANGE 2026-02-21: 从 commoditycode(PG 数组格式 {xxx})映射,CAST 为 TEXT[]
|
||||
("is_in_site", "isinsite", "boolean"),
|
||||
],
|
||||
"dwd.dim_store_goods": [
|
||||
@@ -216,7 +248,7 @@ class DwdLoadTask(BaseTask):
|
||||
("created_at", "create_time", None),
|
||||
("updated_at", "update_time", None),
|
||||
("avg_monthly_sales", "average_monthly_sales", None),
|
||||
("batch_stock_qty", "stock", None),
|
||||
("batch_stock_qty", "batch_stock_quantity", None), # CHANGE 2026-02-20: 修正映射源 stock→batch_stock_quantity(批次库存,非当前库存)
|
||||
("sale_qty", "sale_num", None),
|
||||
("total_sales_qty", "total_sales", None),
|
||||
("commodity_code", "commodity_code", None),
|
||||
@@ -230,12 +262,13 @@ class DwdLoadTask(BaseTask):
|
||||
("safety_stock_qty", "safe_stock", None),
|
||||
("site_name", "sitename", None),
|
||||
("goods_cover_url", "goods_cover", None),
|
||||
("provisional_total_cost", "total_purchase_cost", None),
|
||||
("provisional_total_cost", "provisional_total_cost", None), # CHANGE 2026-02-20: 修正映射源 total_purchase_cost→provisional_total_cost(暂估成本,非实际采购成本)
|
||||
("is_discountable", "able_discount", None),
|
||||
("freeze_status", "freeze", None),
|
||||
("remark", "remark", None),
|
||||
("days_on_shelf", "days_available", None),
|
||||
("sort_order", "sort", None),
|
||||
("time_slot_sale", "time_slot_sale", None), # CHANGE 2026-02-21: 新增分时段销售标记
|
||||
],
|
||||
"dwd.dim_goods_category": [
|
||||
("category_id", "id", None),
|
||||
@@ -268,6 +301,24 @@ class DwdLoadTask(BaseTask):
|
||||
("package_type", "type", None),
|
||||
("tenant_coupon_sale_order_item_id", "tenantcouponsaleorderitemid", None),
|
||||
],
|
||||
"dwd.dim_staff": [
|
||||
("staff_id", "id", None),
|
||||
("entry_time", "entry_time", "timestamptz"),
|
||||
("resign_time", "resign_time", "timestamptz"),
|
||||
],
|
||||
"dwd.dim_staff_ex": [
|
||||
("staff_id", "id", None),
|
||||
("rank_name", "rankname", None),
|
||||
("cashier_point_id", "cashierpointid", "bigint"),
|
||||
("cashier_point_name", "cashierpointname", None),
|
||||
("group_id", "groupid", "bigint"),
|
||||
("group_name", "groupname", None),
|
||||
("system_user_id", "systemuserid", "bigint"),
|
||||
("tenant_org_id", "tenantorgid", "bigint"),
|
||||
("auth_code_create", "auth_code_create", "timestamptz"),
|
||||
("create_time", "create_time", "timestamptz"),
|
||||
("user_roles", "userroles", "jsonb"),
|
||||
],
|
||||
# 事实表主键及关键差异列
|
||||
"dwd.dwd_table_fee_log": [
|
||||
("table_fee_log_id", "id", None),
|
||||
@@ -300,7 +351,10 @@ class DwdLoadTask(BaseTask):
|
||||
],
|
||||
"dwd.dwd_store_goods_sale": [
|
||||
("store_goods_sale_id", "id", None),
|
||||
("discount_price", "discount_money", None),
|
||||
# CHANGE 2026-02-20: 修正列名误导——原 DWD discount_price 实际映射自 ODS discount_money(折扣金额),
|
||||
# 现重命名为 discount_money 以反映真实语义;新增 discount_price 映射 ODS 真正的折后单价。
|
||||
("discount_money", "discount_money", None),
|
||||
("discount_price", "discount_price", None),
|
||||
("coupon_share_money", "coupon_share_money", None),
|
||||
],
|
||||
"dwd.dwd_store_goods_sale_ex": [
|
||||
@@ -316,7 +370,9 @@ class DwdLoadTask(BaseTask):
|
||||
"dwd.dwd_assistant_service_log": [
|
||||
("assistant_service_id", "id", None),
|
||||
("assistant_no", "assistantno", None),
|
||||
("site_assistant_id", "order_assistant_id", None),
|
||||
# CHANGE 2026-02-20: 修正映射源——site_assistant_id 应来自 ODS site_assistant_id(助教档案 ID),
|
||||
# 而非 order_assistant_id(订单级助教明细 ID)。order_assistant_id 由同名列自动映射。
|
||||
("site_assistant_id", "site_assistant_id", None),
|
||||
("level_name", "levelname", None),
|
||||
("skill_name", "skillname", None),
|
||||
("real_service_money", "real_service_money", None),
|
||||
@@ -330,24 +386,8 @@ class DwdLoadTask(BaseTask):
|
||||
("salesman_name", "salesman_name", None),
|
||||
("table_name", "tablename", None),
|
||||
("assistant_team_name", "assistantteamname", None),
|
||||
],
|
||||
"dwd.dwd_assistant_trash_event": [
|
||||
("assistant_trash_event_id", "id", None),
|
||||
("assistant_no", "assistantname", None),
|
||||
("abolish_amount", "assistantabolishamount", None),
|
||||
("charge_minutes_raw", "pdchargeminutes", None),
|
||||
("site_id", "siteid", None),
|
||||
("table_id", "tableid", None),
|
||||
("table_area_id", "tableareaid", None),
|
||||
("assistant_name", "assistantname", None),
|
||||
("trash_reason", "trashreason", None),
|
||||
("create_time", "createtime", None),
|
||||
("tenant_id", "tenant_id", None),
|
||||
],
|
||||
"dwd.dwd_assistant_trash_event_ex": [
|
||||
("assistant_trash_event_id", "id", None),
|
||||
("table_area_name", "tablearea", None),
|
||||
("table_name", "tablename", None),
|
||||
("operator_id", "operator_id", None), # 操作员 ID
|
||||
("operator_name", "operator_name", None), # 操作员姓名
|
||||
],
|
||||
"dwd.dwd_member_balance_change": [
|
||||
("balance_change_id", "id", None),
|
||||
@@ -360,12 +400,15 @@ class DwdLoadTask(BaseTask):
|
||||
("member_mobile", "membermobile", None),
|
||||
("principal_before", "principal_before", None),
|
||||
("principal_after", "principal_after", None),
|
||||
# CHANGE [2026-02-20] intent: 补齐 principal_change_amount,由 principal_after - principal_before 计算
|
||||
("principal_change_amount", "COALESCE(CAST(principal_after AS numeric),0) - COALESCE(CAST(principal_before AS numeric),0)", "numeric"),
|
||||
],
|
||||
"dwd.dwd_member_balance_change_ex": [
|
||||
("balance_change_id", "id", None),
|
||||
("pay_site_name", "paysitename", None),
|
||||
("register_site_name", "registersitename", None),
|
||||
("principal_data", "principal_data", None),
|
||||
("relate_id", "relate_id", None), # 关联业务单据 ID
|
||||
],
|
||||
"dwd.dwd_groupbuy_redemption": [
|
||||
("redemption_id", "id", None),
|
||||
@@ -502,13 +545,16 @@ class DwdLoadTask(BaseTask):
|
||||
("payment_method", "paymentmethod", None),
|
||||
("create_time", "createtime", None),
|
||||
("pay_time", "paytime", None),
|
||||
("pl_coupon_sale_amount", "plcouponsaleamount", None), # 平台券销售金额
|
||||
("mervou_sales_amount", "mervousalesamount", None), # 储值券销售金额
|
||||
("electricity_money", "electricitymoney", None), # 电费金额
|
||||
("real_electricity_money", "realelectricitymoney", None), # 实际电费金额
|
||||
("electricity_adjust_money", "electricityadjustmoney", None), # 电费调整金额
|
||||
],
|
||||
# CHANGE 2026-02-20: 移除 salesman_name/order_remark/revoke_order_name 的重复条目(原列表首尾各出现一次)
|
||||
"dwd.dwd_recharge_order_ex": [
|
||||
("recharge_order_id", "id", None),
|
||||
("site_name_snapshot", "sitename", None),
|
||||
("salesman_name", "salesmanname", None),
|
||||
("order_remark", "orderremark", None),
|
||||
("revoke_order_name", "revokeordername", None),
|
||||
("settle_status", "settlestatus", None),
|
||||
("is_bind_member", "isbindmember", "boolean"),
|
||||
("is_activity", "isactivity", "boolean"),
|
||||
@@ -551,6 +597,47 @@ class DwdLoadTask(BaseTask):
|
||||
("revoke_order_name", "revokeordername", None),
|
||||
("revoke_time", "revoketime", None),
|
||||
],
|
||||
# 库存汇总:goods_stock_summary(ODS 列名全小写)
|
||||
# CHANGE 2026-02-21: BUG 10 fix — ODS 列名是小写(sitegoodsid),不是驼峰
|
||||
"dwd.dwd_goods_stock_summary": [
|
||||
("site_goods_id", '"sitegoodsid"', "bigint"), # 门店商品 ID(PK)
|
||||
("goods_name", '"goodsname"', None), # 商品名称
|
||||
("goods_unit", '"goodsunit"', None), # 计量单位
|
||||
("goods_category_id", '"goodscategoryid"', "bigint"), # 一级分类 ID
|
||||
("goods_category_second_id", '"goodscategorysecondid"', "bigint"), # 二级分类 ID
|
||||
("category_name", '"categoryname"', None), # 分类名称
|
||||
("range_start_stock", '"rangestartstock"', "numeric"), # 期初库存
|
||||
("range_end_stock", '"rangeendstock"', "numeric"), # 期末库存
|
||||
("range_in", '"rangein"', "numeric"), # 入库数量
|
||||
("range_out", '"rangeout"', "numeric"), # 出库数量
|
||||
("range_sale", '"rangesale"', "numeric"), # 销售数量
|
||||
("range_sale_money", '"rangesalemoney"', "numeric"), # 销售金额
|
||||
("range_inventory", '"rangeinventory"', "numeric"), # 盘点调整量
|
||||
("current_stock", '"currentstock"', "numeric"), # 当前库存
|
||||
],
|
||||
# 库存变动流水:goods_stock_movements(ODS 列名全小写)
|
||||
# CHANGE 2026-02-21: BUG 10 fix — ODS 列名是小写,不是驼峰
|
||||
"dwd.dwd_goods_stock_movement": [
|
||||
("site_goods_stock_id", '"sitegoodsstockid"', "bigint"), # 库存变动记录 ID(PK)
|
||||
("tenant_id", '"tenantid"', "bigint"), # 租户 ID
|
||||
("site_id", '"siteid"', "bigint"), # 门店 ID
|
||||
("site_goods_id", '"sitegoodsid"', "bigint"), # 门店商品 ID
|
||||
("goods_name", '"goodsname"', None), # 商品名称
|
||||
("goods_category_id", '"goodscategoryid"', "bigint"), # 一级分类 ID
|
||||
("goods_second_category_id", '"goodssecondcategoryid"', "bigint"), # 二级分类 ID
|
||||
("unit", "unit", None), # 计量单位(ODS 已是小写)
|
||||
("price", "price", "numeric"), # 商品单价(ODS 已是小写)
|
||||
("stock_type", '"stocktype"', "integer"), # 库存变动类型
|
||||
("change_num", '"changenum"', "numeric"), # 变动数量
|
||||
("start_num", '"startnum"', "numeric"), # 变动前库存
|
||||
("end_num", '"endnum"', "numeric"), # 变动后库存
|
||||
("change_num_a", '"changenuma"', "numeric"), # 辅助单位变动量
|
||||
("start_num_a", '"startnuma"', "numeric"), # 辅助单位变动前库存
|
||||
("end_num_a", '"endnuma"', "numeric"), # 辅助单位变动后库存
|
||||
("remark", "remark", None), # 备注(ODS 已是小写)
|
||||
("operator_name", '"operatorname"', None), # 操作人
|
||||
("create_time", '"createtime"', "timestamptz"), # 变动时间
|
||||
],
|
||||
}
|
||||
|
||||
def get_task_code(self) -> str:
|
||||
@@ -624,7 +711,7 @@ class DwdLoadTask(BaseTask):
|
||||
errors.append({"table": dwd_table, "error": str(exc)})
|
||||
continue
|
||||
|
||||
return {"tables": summary, "errors": errors}
|
||||
return {"tables": summary, "errors": len(errors), "error_details": errors}
|
||||
|
||||
# ---------------------- 辅助方法 ----------------------
|
||||
def _get_columns(self, cur, table: str) -> List[str]:
|
||||
@@ -777,6 +864,10 @@ class DwdLoadTask(BaseTask):
|
||||
self.logger.error("跳过 %s:ODS 表 %s 缺少 fetched_at 列", dwd_table, ods_table)
|
||||
return {"processed": 0, "inserted": 0, "updated": 0, "skipped": 0}
|
||||
self._log_missing_fetched_at(cur, ods_table)
|
||||
# CHANGE 2026-02-22: BUG 12 — 获取列类型,用于哨兵日期过滤
|
||||
dwd_types = self._get_column_types(cur, dwd_table, "dwd")
|
||||
ods_types = self._get_column_types(cur, ods_table, "ods")
|
||||
ts_types = {"timestamp without time zone", "timestamp with time zone"}
|
||||
table_sql = self._format_table(ods_table, "ods")
|
||||
# 构造 SELECT 表达式,支持 JSON/expression 映射
|
||||
select_exprs: list[str] = []
|
||||
@@ -790,7 +881,14 @@ class DwdLoadTask(BaseTask):
|
||||
select_exprs.append(f"{self._cast_expr(src, cast_type)} AS \"{lc}\"")
|
||||
added.add(lc)
|
||||
elif lc in ods_set:
|
||||
select_exprs.append(f'"{lc}" AS "{lc}"')
|
||||
# CHANGE 2026-02-22: BUG 12 — 同名列如果是时间类型,加哨兵值过滤
|
||||
if dwd_types.get(lc) in ts_types and ods_types.get(lc) in ts_types:
|
||||
select_exprs.append(
|
||||
f'CASE WHEN "{lc}" >= \'{self._SENTINEL_DATE_THRESHOLD}\'::timestamp '
|
||||
f'THEN "{lc}" ELSE NULL END AS "{lc}"'
|
||||
)
|
||||
else:
|
||||
select_exprs.append(f'"{lc}" AS "{lc}"')
|
||||
added.add(lc)
|
||||
# 分类维度需要额外读取 categoryboxes 以展开子类
|
||||
if dwd_table == "dwd.dim_goods_category" and "categoryboxes" not in added and "categoryboxes" in ods_set:
|
||||
@@ -810,7 +908,7 @@ class DwdLoadTask(BaseTask):
|
||||
if not select_exprs:
|
||||
return {"processed": 0, "inserted": 0, "updated": 0, "skipped": 0}
|
||||
|
||||
order_col = self._pick_snapshot_order_column(ods_cols)
|
||||
order_col = self._pick_snapshot_order_column(ods_cols) # CHANGE 2026-02-21: BUG 9 fix — 方法从 integrity_checker 移入本类
|
||||
key_exprs: list[str] = []
|
||||
for key in business_keys:
|
||||
lc = key.lower()
|
||||
@@ -860,9 +958,22 @@ class DwdLoadTask(BaseTask):
|
||||
return {"processed": 0, "inserted": 0, "updated": 0, "skipped": 0}
|
||||
|
||||
# 预加载当前版本(scd2_is_current=1),避免逐行 SELECT 造成大量 round-trip
|
||||
# CHANGE 2026-02-22: BUG 12 — 用显式列列表替代 SELECT *,对 timestamptz 列做哨兵值过滤
|
||||
# 防止 BC 日期导致 psycopg2 fetchall() 抛出 ValueError
|
||||
table_sql_dwd = self._format_table(dwd_table, "dwd")
|
||||
where_current = " AND ".join([f"COALESCE(scd2_is_current,1)=1"])
|
||||
cur.execute(f"SELECT * FROM {table_sql_dwd} WHERE {where_current}")
|
||||
dwd_select_exprs: list[str] = []
|
||||
for col in dwd_cols:
|
||||
lc = col.lower()
|
||||
if dwd_types.get(lc) in ts_types:
|
||||
dwd_select_exprs.append(
|
||||
f'CASE WHEN "{lc}" >= \'{self._SENTINEL_DATE_THRESHOLD}\'::timestamptz '
|
||||
f'THEN "{lc}" ELSE NULL END AS "{lc}"'
|
||||
)
|
||||
else:
|
||||
dwd_select_exprs.append(f'"{lc}"')
|
||||
dwd_select_sql = ", ".join(dwd_select_exprs)
|
||||
where_current = "COALESCE(scd2_is_current,1)=1"
|
||||
cur.execute(f"SELECT {dwd_select_sql} FROM {table_sql_dwd} WHERE {where_current}")
|
||||
current_rows = cur.fetchall() or []
|
||||
current_by_pk: dict[tuple[Any, ...], Dict[str, Any]] = {}
|
||||
for r in current_rows:
|
||||
@@ -1245,12 +1356,19 @@ class DwdLoadTask(BaseTask):
|
||||
"""构造事实表 SELECT 列表,需要时做类型转换。"""
|
||||
numeric_types = {"integer", "bigint", "smallint", "numeric", "double precision", "real", "decimal"}
|
||||
text_types = {"text", "character varying", "varchar"}
|
||||
ts_types = {"timestamp without time zone", "timestamp with time zone"}
|
||||
exprs = []
|
||||
for col in insert_cols:
|
||||
d_type = dwd_types.get(col)
|
||||
o_type = ods_types.get(col)
|
||||
if d_type in numeric_types and o_type in text_types:
|
||||
exprs.append(f"CAST(NULLIF(CAST(\"{col}\" AS text), '') AS numeric):: {d_type}")
|
||||
elif d_type in ts_types and o_type in ts_types:
|
||||
# CHANGE 2026-02-22: BUG 12 — 哨兵日期过滤,防止 0001-01-01 转 timestamptz 变 BC
|
||||
exprs.append(
|
||||
f'CASE WHEN "{col}" >= \'{self._SENTINEL_DATE_THRESHOLD}\'::timestamp '
|
||||
f'THEN "{col}" ELSE NULL END'
|
||||
)
|
||||
else:
|
||||
exprs.append(f'"{col}"')
|
||||
return exprs
|
||||
@@ -1271,8 +1389,15 @@ class DwdLoadTask(BaseTask):
|
||||
schema, table = self._split_table_name(name, default_schema)
|
||||
return f'"{schema}"."{table}"'
|
||||
|
||||
# CHANGE 2026-02-22: BUG 12 fix — 哨兵日期阈值,上游 API 用 0001-01-01 表示"未设置"
|
||||
_SENTINEL_DATE_THRESHOLD = "0002-01-01"
|
||||
|
||||
def _cast_expr(self, col: str, cast_type: str | None) -> str:
|
||||
"""构造带可选 CAST 的列表达式。"""
|
||||
"""构造带可选 CAST 的列表达式。
|
||||
|
||||
对 timestamptz 转换额外包裹哨兵值过滤:ODS 中 0001-01-01 在
|
||||
Asia/Shanghai 时区下会变成 BC 日期,psycopg2 无法解析。
|
||||
"""
|
||||
if col.upper() == "NULL":
|
||||
base = "NULL"
|
||||
else:
|
||||
@@ -1282,8 +1407,11 @@ class DwdLoadTask(BaseTask):
|
||||
cast_lower = cast_type.lower()
|
||||
if cast_lower in {"bigint", "integer", "numeric", "decimal"}:
|
||||
return f"CAST(NULLIF(CAST({base} AS text), '') AS numeric):: {cast_type}"
|
||||
if cast_lower == "timestamptz":
|
||||
return f"({base})::timestamptz"
|
||||
if cast_lower in {"timestamptz", "timestamp with time zone"}:
|
||||
# 哨兵值过滤:< 0002-01-01 的值置为 NULL
|
||||
# base 可能是 text(JSONB ->> 提取),需先 CAST 为 timestamp 再比较
|
||||
return (f"CASE WHEN ({base})::timestamp >= '{self._SENTINEL_DATE_THRESHOLD}'::timestamp "
|
||||
f"THEN ({base})::timestamptz ELSE NULL END")
|
||||
return f"{base}::{cast_type}"
|
||||
return base
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Iterable, List, Sequence, Tuple
|
||||
@@ -16,7 +17,9 @@ from tasks.dwd.dwd_load_task import DwdLoadTask
|
||||
class DwdQualityTask(BaseTask):
|
||||
"""对 ODS 与 DWD 进行行数、金额对照核查,生成 JSON 报表。"""
|
||||
|
||||
REPORT_PATH = Path("reports/dwd_quality_report.json")
|
||||
# 从 .env 读取 ETL_REPORT_ROOT(必须配置)
|
||||
_report_root = os.environ.get("ETL_REPORT_ROOT")
|
||||
REPORT_PATH = Path(_report_root) / "dwd_quality_report.json" if _report_root else None
|
||||
AMOUNT_KEYWORDS = ("amount", "money", "fee", "balance")
|
||||
|
||||
def get_task_code(self) -> str:
|
||||
@@ -29,6 +32,11 @@ class DwdQualityTask(BaseTask):
|
||||
|
||||
def load(self, extracted: dict[str, Any], context: TaskContext) -> dict[str, Any]:
|
||||
"""输出行数/金额差异报表到本地文件。"""
|
||||
if self.REPORT_PATH is None:
|
||||
raise RuntimeError(
|
||||
"环境变量 ETL_REPORT_ROOT 未定义,无法生成质检报表。"
|
||||
"请在根 .env 中配置,参考 docs/deployment/EXPORT-PATHS.md"
|
||||
)
|
||||
report: Dict[str, Any] = {
|
||||
"generated_at": extracted["now"].isoformat(),
|
||||
"tables": [],
|
||||
|
||||
Reference in New Issue
Block a user