在前后端开发联调前 的提交20260223

This commit is contained in:
Neo
2026-02-23 23:02:20 +08:00
parent 254ccb1e77
commit fafc95e64c
1142 changed files with 10366960 additions and 36957 deletions

View File

@@ -38,6 +38,8 @@ class DwdLoadTask(BaseTask):
"dwd.dim_goods_category": "ods.stock_goods_category_tree",
"dwd.dim_groupbuy_package": "ods.group_buy_packages",
"dwd.dim_groupbuy_package_ex": "ods.group_buy_packages",
"dwd.dim_staff": "ods.staff_info_master",
"dwd.dim_staff_ex": "ods.staff_info_master",
# 事实
"dwd.dwd_settlement_head": "ods.settlement_records",
"dwd.dwd_settlement_head_ex": "ods.settlement_records",
@@ -49,8 +51,6 @@ class DwdLoadTask(BaseTask):
"dwd.dwd_store_goods_sale_ex": "ods.store_goods_sales_records",
"dwd.dwd_assistant_service_log": "ods.assistant_service_records",
"dwd.dwd_assistant_service_log_ex": "ods.assistant_service_records",
"dwd.dwd_assistant_trash_event": "ods.assistant_cancellation_records",
"dwd.dwd_assistant_trash_event_ex": "ods.assistant_cancellation_records",
"dwd.dwd_member_balance_change": "ods.member_balance_changes",
"dwd.dwd_member_balance_change_ex": "ods.member_balance_changes",
"dwd.dwd_groupbuy_redemption": "ods.group_buy_redemption_records",
@@ -62,6 +62,8 @@ class DwdLoadTask(BaseTask):
"dwd.dwd_payment": "ods.payment_transactions",
"dwd.dwd_refund": "ods.refund_transactions",
"dwd.dwd_refund_ex": "ods.refund_transactions",
"dwd.dwd_goods_stock_summary": "ods.goods_stock_summary",
"dwd.dwd_goods_stock_movement": "ods.goods_stock_movements",
}
SCD_COLS = {"scd2_start_time", "scd2_end_time", "scd2_is_current", "scd2_version"}
@@ -90,6 +92,17 @@ class DwdLoadTask(BaseTask):
except Exception as exc: # noqa: BLE001
self.logger.warning("检查 fetched_at 为空记录失败:%s, err=%s", ods_table, exc)
@staticmethod
def _pick_snapshot_order_column(cols: Sequence[str]) -> str | None:
"""从 ODS 列中选取用于快照排序的列fetched_at > update_time > create_time
CHANGE 2026-02-21: BUG 9 fix — 从 integrity_checker 移入本类,解决 AttributeError。
"""
lower = {c.lower() for c in cols}
for candidate in ("fetched_at", "update_time", "create_time"):
if candidate in lower:
return candidate
return None
@staticmethod
def _latest_snapshot_select_sql(
select_cols_sql: str,
@@ -157,6 +170,20 @@ class DwdLoadTask(BaseTask):
"dwd.dim_table_ex": [
("table_id", "id", None),
("table_cloth_use_time", "table_cloth_use_time", None),
("create_time", "create_time", None), # 台桌创建时间
("light_status", "light_status", None), # 台灯状态
("tablestatusname", "tablestatusname", None), # 台桌状态中文名ODS 驼峰 → PG 小写)
("sitename", "sitename", None), # 门店名称快照ODS 驼峰 → PG 小写)
("applet_qr_code_url", '"appletQrCodeUrl"', None), # 小程序二维码 URLODS 带引号保留大小写)
("audit_status", "audit_status", None), # 审核状态
("charge_free", "charge_free", None), # 是否免费台
("delay_lights_time", "delay_lights_time", None), # 台灯熄灭延迟时间
("is_rest_area", "is_rest_area", None), # 是否休息区
("only_allow_groupon", "only_allow_groupon", None), # 是否仅允许团购
("order_delay_time", "order_delay_time", None), # 订单自动延时时长
("self_table", "self_table", None), # 是否自有台桌
("temporary_light_second", "temporary_light_second", None), # 临时开灯秒数
("virtual_table", "virtual_table", None), # 是否虚拟台桌
],
"dwd.dim_assistant": [("assistant_id", "id", None), ("user_id", "user_id", None)],
"dwd.dim_assistant_ex": [
@@ -164,11 +191,16 @@ class DwdLoadTask(BaseTask):
("introduce", "introduce", None),
("group_name", "group_name", None),
("light_equipment_id", "light_equipment_id", None),
("system_role_id", "system_role_id", None), # 系统角色 ID
("job_num", "job_num", None), # 工号
("cx_unit_price", "cx_unit_price", None), # 促销单价
("pd_unit_price", "pd_unit_price", None), # 陪打单价
],
"dwd.dim_member": [
("member_id", "id", None),
("pay_money_sum", "pay_money_sum", None),
("recharge_money_sum", "recharge_money_sum", None),
# update_time: 上游 API 不提供此字段,保持 NULL已在一致性检查白名单中标记
],
"dwd.dim_member_ex": [
("member_id", "id", None),
@@ -206,7 +238,7 @@ class DwdLoadTask(BaseTask):
("tenant_goods_id", "id", None),
("remark_name", "remark_name", None),
("goods_bar_code", "goods_bar_code", None),
("commodity_code_list", "commodity_code", None),
("commodity_code_list", "commoditycode", "TEXT[]"), # CHANGE 2026-02-21: 从 commoditycodePG 数组格式 {xxx}映射CAST 为 TEXT[]
("is_in_site", "isinsite", "boolean"),
],
"dwd.dim_store_goods": [
@@ -216,7 +248,7 @@ class DwdLoadTask(BaseTask):
("created_at", "create_time", None),
("updated_at", "update_time", None),
("avg_monthly_sales", "average_monthly_sales", None),
("batch_stock_qty", "stock", None),
("batch_stock_qty", "batch_stock_quantity", None), # CHANGE 2026-02-20: 修正映射源 stock→batch_stock_quantity批次库存非当前库存
("sale_qty", "sale_num", None),
("total_sales_qty", "total_sales", None),
("commodity_code", "commodity_code", None),
@@ -230,12 +262,13 @@ class DwdLoadTask(BaseTask):
("safety_stock_qty", "safe_stock", None),
("site_name", "sitename", None),
("goods_cover_url", "goods_cover", None),
("provisional_total_cost", "total_purchase_cost", None),
("provisional_total_cost", "provisional_total_cost", None), # CHANGE 2026-02-20: 修正映射源 total_purchase_cost→provisional_total_cost暂估成本非实际采购成本
("is_discountable", "able_discount", None),
("freeze_status", "freeze", None),
("remark", "remark", None),
("days_on_shelf", "days_available", None),
("sort_order", "sort", None),
("time_slot_sale", "time_slot_sale", None), # CHANGE 2026-02-21: 新增分时段销售标记
],
"dwd.dim_goods_category": [
("category_id", "id", None),
@@ -268,6 +301,24 @@ class DwdLoadTask(BaseTask):
("package_type", "type", None),
("tenant_coupon_sale_order_item_id", "tenantcouponsaleorderitemid", None),
],
"dwd.dim_staff": [
("staff_id", "id", None),
("entry_time", "entry_time", "timestamptz"),
("resign_time", "resign_time", "timestamptz"),
],
"dwd.dim_staff_ex": [
("staff_id", "id", None),
("rank_name", "rankname", None),
("cashier_point_id", "cashierpointid", "bigint"),
("cashier_point_name", "cashierpointname", None),
("group_id", "groupid", "bigint"),
("group_name", "groupname", None),
("system_user_id", "systemuserid", "bigint"),
("tenant_org_id", "tenantorgid", "bigint"),
("auth_code_create", "auth_code_create", "timestamptz"),
("create_time", "create_time", "timestamptz"),
("user_roles", "userroles", "jsonb"),
],
# 事实表主键及关键差异列
"dwd.dwd_table_fee_log": [
("table_fee_log_id", "id", None),
@@ -300,7 +351,10 @@ class DwdLoadTask(BaseTask):
],
"dwd.dwd_store_goods_sale": [
("store_goods_sale_id", "id", None),
("discount_price", "discount_money", None),
# CHANGE 2026-02-20: 修正列名误导——原 DWD discount_price 实际映射自 ODS discount_money折扣金额
# 现重命名为 discount_money 以反映真实语义;新增 discount_price 映射 ODS 真正的折后单价。
("discount_money", "discount_money", None),
("discount_price", "discount_price", None),
("coupon_share_money", "coupon_share_money", None),
],
"dwd.dwd_store_goods_sale_ex": [
@@ -316,7 +370,9 @@ class DwdLoadTask(BaseTask):
"dwd.dwd_assistant_service_log": [
("assistant_service_id", "id", None),
("assistant_no", "assistantno", None),
("site_assistant_id", "order_assistant_id", None),
# CHANGE 2026-02-20: 修正映射源——site_assistant_id 应来自 ODS site_assistant_id助教档案 ID
# 而非 order_assistant_id订单级助教明细 ID。order_assistant_id 由同名列自动映射。
("site_assistant_id", "site_assistant_id", None),
("level_name", "levelname", None),
("skill_name", "skillname", None),
("real_service_money", "real_service_money", None),
@@ -330,24 +386,8 @@ class DwdLoadTask(BaseTask):
("salesman_name", "salesman_name", None),
("table_name", "tablename", None),
("assistant_team_name", "assistantteamname", None),
],
"dwd.dwd_assistant_trash_event": [
("assistant_trash_event_id", "id", None),
("assistant_no", "assistantname", None),
("abolish_amount", "assistantabolishamount", None),
("charge_minutes_raw", "pdchargeminutes", None),
("site_id", "siteid", None),
("table_id", "tableid", None),
("table_area_id", "tableareaid", None),
("assistant_name", "assistantname", None),
("trash_reason", "trashreason", None),
("create_time", "createtime", None),
("tenant_id", "tenant_id", None),
],
"dwd.dwd_assistant_trash_event_ex": [
("assistant_trash_event_id", "id", None),
("table_area_name", "tablearea", None),
("table_name", "tablename", None),
("operator_id", "operator_id", None), # 操作员 ID
("operator_name", "operator_name", None), # 操作员姓名
],
"dwd.dwd_member_balance_change": [
("balance_change_id", "id", None),
@@ -360,12 +400,15 @@ class DwdLoadTask(BaseTask):
("member_mobile", "membermobile", None),
("principal_before", "principal_before", None),
("principal_after", "principal_after", None),
# CHANGE [2026-02-20] intent: 补齐 principal_change_amount由 principal_after - principal_before 计算
("principal_change_amount", "COALESCE(CAST(principal_after AS numeric),0) - COALESCE(CAST(principal_before AS numeric),0)", "numeric"),
],
"dwd.dwd_member_balance_change_ex": [
("balance_change_id", "id", None),
("pay_site_name", "paysitename", None),
("register_site_name", "registersitename", None),
("principal_data", "principal_data", None),
("relate_id", "relate_id", None), # 关联业务单据 ID
],
"dwd.dwd_groupbuy_redemption": [
("redemption_id", "id", None),
@@ -502,13 +545,16 @@ class DwdLoadTask(BaseTask):
("payment_method", "paymentmethod", None),
("create_time", "createtime", None),
("pay_time", "paytime", None),
("pl_coupon_sale_amount", "plcouponsaleamount", None), # 平台券销售金额
("mervou_sales_amount", "mervousalesamount", None), # 储值券销售金额
("electricity_money", "electricitymoney", None), # 电费金额
("real_electricity_money", "realelectricitymoney", None), # 实际电费金额
("electricity_adjust_money", "electricityadjustmoney", None), # 电费调整金额
],
# CHANGE 2026-02-20: 移除 salesman_name/order_remark/revoke_order_name 的重复条目(原列表首尾各出现一次)
"dwd.dwd_recharge_order_ex": [
("recharge_order_id", "id", None),
("site_name_snapshot", "sitename", None),
("salesman_name", "salesmanname", None),
("order_remark", "orderremark", None),
("revoke_order_name", "revokeordername", None),
("settle_status", "settlestatus", None),
("is_bind_member", "isbindmember", "boolean"),
("is_activity", "isactivity", "boolean"),
@@ -551,6 +597,47 @@ class DwdLoadTask(BaseTask):
("revoke_order_name", "revokeordername", None),
("revoke_time", "revoketime", None),
],
# 库存汇总goods_stock_summaryODS 列名全小写)
# CHANGE 2026-02-21: BUG 10 fix — ODS 列名是小写sitegoodsid不是驼峰
"dwd.dwd_goods_stock_summary": [
("site_goods_id", '"sitegoodsid"', "bigint"), # 门店商品 IDPK
("goods_name", '"goodsname"', None), # 商品名称
("goods_unit", '"goodsunit"', None), # 计量单位
("goods_category_id", '"goodscategoryid"', "bigint"), # 一级分类 ID
("goods_category_second_id", '"goodscategorysecondid"', "bigint"), # 二级分类 ID
("category_name", '"categoryname"', None), # 分类名称
("range_start_stock", '"rangestartstock"', "numeric"), # 期初库存
("range_end_stock", '"rangeendstock"', "numeric"), # 期末库存
("range_in", '"rangein"', "numeric"), # 入库数量
("range_out", '"rangeout"', "numeric"), # 出库数量
("range_sale", '"rangesale"', "numeric"), # 销售数量
("range_sale_money", '"rangesalemoney"', "numeric"), # 销售金额
("range_inventory", '"rangeinventory"', "numeric"), # 盘点调整量
("current_stock", '"currentstock"', "numeric"), # 当前库存
],
# 库存变动流水goods_stock_movementsODS 列名全小写)
# CHANGE 2026-02-21: BUG 10 fix — ODS 列名是小写,不是驼峰
"dwd.dwd_goods_stock_movement": [
("site_goods_stock_id", '"sitegoodsstockid"', "bigint"), # 库存变动记录 IDPK
("tenant_id", '"tenantid"', "bigint"), # 租户 ID
("site_id", '"siteid"', "bigint"), # 门店 ID
("site_goods_id", '"sitegoodsid"', "bigint"), # 门店商品 ID
("goods_name", '"goodsname"', None), # 商品名称
("goods_category_id", '"goodscategoryid"', "bigint"), # 一级分类 ID
("goods_second_category_id", '"goodssecondcategoryid"', "bigint"), # 二级分类 ID
("unit", "unit", None), # 计量单位ODS 已是小写)
("price", "price", "numeric"), # 商品单价ODS 已是小写)
("stock_type", '"stocktype"', "integer"), # 库存变动类型
("change_num", '"changenum"', "numeric"), # 变动数量
("start_num", '"startnum"', "numeric"), # 变动前库存
("end_num", '"endnum"', "numeric"), # 变动后库存
("change_num_a", '"changenuma"', "numeric"), # 辅助单位变动量
("start_num_a", '"startnuma"', "numeric"), # 辅助单位变动前库存
("end_num_a", '"endnuma"', "numeric"), # 辅助单位变动后库存
("remark", "remark", None), # 备注ODS 已是小写)
("operator_name", '"operatorname"', None), # 操作人
("create_time", '"createtime"', "timestamptz"), # 变动时间
],
}
def get_task_code(self) -> str:
@@ -624,7 +711,7 @@ class DwdLoadTask(BaseTask):
errors.append({"table": dwd_table, "error": str(exc)})
continue
return {"tables": summary, "errors": errors}
return {"tables": summary, "errors": len(errors), "error_details": errors}
# ---------------------- 辅助方法 ----------------------
def _get_columns(self, cur, table: str) -> List[str]:
@@ -777,6 +864,10 @@ class DwdLoadTask(BaseTask):
self.logger.error("跳过 %sODS 表 %s 缺少 fetched_at 列", dwd_table, ods_table)
return {"processed": 0, "inserted": 0, "updated": 0, "skipped": 0}
self._log_missing_fetched_at(cur, ods_table)
# CHANGE 2026-02-22: BUG 12 — 获取列类型,用于哨兵日期过滤
dwd_types = self._get_column_types(cur, dwd_table, "dwd")
ods_types = self._get_column_types(cur, ods_table, "ods")
ts_types = {"timestamp without time zone", "timestamp with time zone"}
table_sql = self._format_table(ods_table, "ods")
# 构造 SELECT 表达式,支持 JSON/expression 映射
select_exprs: list[str] = []
@@ -790,7 +881,14 @@ class DwdLoadTask(BaseTask):
select_exprs.append(f"{self._cast_expr(src, cast_type)} AS \"{lc}\"")
added.add(lc)
elif lc in ods_set:
select_exprs.append(f'"{lc}" AS "{lc}"')
# CHANGE 2026-02-22: BUG 12 — 同名列如果是时间类型,加哨兵值过滤
if dwd_types.get(lc) in ts_types and ods_types.get(lc) in ts_types:
select_exprs.append(
f'CASE WHEN "{lc}" >= \'{self._SENTINEL_DATE_THRESHOLD}\'::timestamp '
f'THEN "{lc}" ELSE NULL END AS "{lc}"'
)
else:
select_exprs.append(f'"{lc}" AS "{lc}"')
added.add(lc)
# 分类维度需要额外读取 categoryboxes 以展开子类
if dwd_table == "dwd.dim_goods_category" and "categoryboxes" not in added and "categoryboxes" in ods_set:
@@ -810,7 +908,7 @@ class DwdLoadTask(BaseTask):
if not select_exprs:
return {"processed": 0, "inserted": 0, "updated": 0, "skipped": 0}
order_col = self._pick_snapshot_order_column(ods_cols)
order_col = self._pick_snapshot_order_column(ods_cols) # CHANGE 2026-02-21: BUG 9 fix — 方法从 integrity_checker 移入本类
key_exprs: list[str] = []
for key in business_keys:
lc = key.lower()
@@ -860,9 +958,22 @@ class DwdLoadTask(BaseTask):
return {"processed": 0, "inserted": 0, "updated": 0, "skipped": 0}
# 预加载当前版本scd2_is_current=1避免逐行 SELECT 造成大量 round-trip
# CHANGE 2026-02-22: BUG 12 — 用显式列列表替代 SELECT *,对 timestamptz 列做哨兵值过滤
# 防止 BC 日期导致 psycopg2 fetchall() 抛出 ValueError
table_sql_dwd = self._format_table(dwd_table, "dwd")
where_current = " AND ".join([f"COALESCE(scd2_is_current,1)=1"])
cur.execute(f"SELECT * FROM {table_sql_dwd} WHERE {where_current}")
dwd_select_exprs: list[str] = []
for col in dwd_cols:
lc = col.lower()
if dwd_types.get(lc) in ts_types:
dwd_select_exprs.append(
f'CASE WHEN "{lc}" >= \'{self._SENTINEL_DATE_THRESHOLD}\'::timestamptz '
f'THEN "{lc}" ELSE NULL END AS "{lc}"'
)
else:
dwd_select_exprs.append(f'"{lc}"')
dwd_select_sql = ", ".join(dwd_select_exprs)
where_current = "COALESCE(scd2_is_current,1)=1"
cur.execute(f"SELECT {dwd_select_sql} FROM {table_sql_dwd} WHERE {where_current}")
current_rows = cur.fetchall() or []
current_by_pk: dict[tuple[Any, ...], Dict[str, Any]] = {}
for r in current_rows:
@@ -1245,12 +1356,19 @@ class DwdLoadTask(BaseTask):
"""构造事实表 SELECT 列表,需要时做类型转换。"""
numeric_types = {"integer", "bigint", "smallint", "numeric", "double precision", "real", "decimal"}
text_types = {"text", "character varying", "varchar"}
ts_types = {"timestamp without time zone", "timestamp with time zone"}
exprs = []
for col in insert_cols:
d_type = dwd_types.get(col)
o_type = ods_types.get(col)
if d_type in numeric_types and o_type in text_types:
exprs.append(f"CAST(NULLIF(CAST(\"{col}\" AS text), '') AS numeric):: {d_type}")
elif d_type in ts_types and o_type in ts_types:
# CHANGE 2026-02-22: BUG 12 — 哨兵日期过滤,防止 0001-01-01 转 timestamptz 变 BC
exprs.append(
f'CASE WHEN "{col}" >= \'{self._SENTINEL_DATE_THRESHOLD}\'::timestamp '
f'THEN "{col}" ELSE NULL END'
)
else:
exprs.append(f'"{col}"')
return exprs
@@ -1271,8 +1389,15 @@ class DwdLoadTask(BaseTask):
schema, table = self._split_table_name(name, default_schema)
return f'"{schema}"."{table}"'
# CHANGE 2026-02-22: BUG 12 fix — 哨兵日期阈值,上游 API 用 0001-01-01 表示"未设置"
_SENTINEL_DATE_THRESHOLD = "0002-01-01"
def _cast_expr(self, col: str, cast_type: str | None) -> str:
"""构造带可选 CAST 的列表达式。"""
"""构造带可选 CAST 的列表达式。
对 timestamptz 转换额外包裹哨兵值过滤ODS 中 0001-01-01 在
Asia/Shanghai 时区下会变成 BC 日期psycopg2 无法解析。
"""
if col.upper() == "NULL":
base = "NULL"
else:
@@ -1282,8 +1407,11 @@ class DwdLoadTask(BaseTask):
cast_lower = cast_type.lower()
if cast_lower in {"bigint", "integer", "numeric", "decimal"}:
return f"CAST(NULLIF(CAST({base} AS text), '') AS numeric):: {cast_type}"
if cast_lower == "timestamptz":
return f"({base})::timestamptz"
if cast_lower in {"timestamptz", "timestamp with time zone"}:
# 哨兵值过滤:< 0002-01-01 的值置为 NULL
# base 可能是 textJSONB ->> 提取),需先 CAST 为 timestamp 再比较
return (f"CASE WHEN ({base})::timestamp >= '{self._SENTINEL_DATE_THRESHOLD}'::timestamp "
f"THEN ({base})::timestamptz ELSE NULL END")
return f"{base}::{cast_type}"
return base

View File

@@ -3,6 +3,7 @@
from __future__ import annotations
import json
import os
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, Iterable, List, Sequence, Tuple
@@ -16,7 +17,9 @@ from tasks.dwd.dwd_load_task import DwdLoadTask
class DwdQualityTask(BaseTask):
"""对 ODS 与 DWD 进行行数、金额对照核查,生成 JSON 报表。"""
REPORT_PATH = Path("reports/dwd_quality_report.json")
# 从 .env 读取 ETL_REPORT_ROOT必须配置
_report_root = os.environ.get("ETL_REPORT_ROOT")
REPORT_PATH = Path(_report_root) / "dwd_quality_report.json" if _report_root else None
AMOUNT_KEYWORDS = ("amount", "money", "fee", "balance")
def get_task_code(self) -> str:
@@ -29,6 +32,11 @@ class DwdQualityTask(BaseTask):
def load(self, extracted: dict[str, Any], context: TaskContext) -> dict[str, Any]:
"""输出行数/金额差异报表到本地文件。"""
if self.REPORT_PATH is None:
raise RuntimeError(
"环境变量 ETL_REPORT_ROOT 未定义,无法生成质检报表。"
"请在根 .env 中配置,参考 docs/deployment/EXPORT-PATHS.md"
)
report: Dict[str, Any] = {
"generated_at": extracted["now"].isoformat(),
"tables": [],