feat(etl): app2a DWS 增加 member_order_count 聚合 + 修复 area 未匹配订单 all 兜底

1. finance_area_daily.py:
   - _AREA_AGG_FIELDS 增加 member_order_count · _COUNT_FIELDS 常量统一 int 转换
   - extract SQL 增加 sh.member_id 字段
   - transform 按 CLAUDE.md DWS 规范 member_id > 0 判定是否会员订单
   - _build_area_row / _build_sum_row 支持新计数字段

2. pre-existing bug 修复(顺手):
   area_code 为 None(table_id 未映射)的订单之前既不计入具体区域也不计入 all,
   导致全店 order_count/member_order_count > 各区域之和。
   修复:新增 _unknown 桶收纳未匹配订单 · 构建 all 行时追加合入 source_rows。

3. backfill_finance_area_daily.py extract SQL 加 sh.member_id
   支持回填历史 member_order_count 数据。

实测:纯函数单测 + 测试库 ETL 7 天回放 · 04-18/04-20 等日期全店 vs 区域和
从差 1 单修复为 0 差异 · 纯函数新增 2 条未匹配订单用例断言全通过。

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Neo
2026-04-22 21:54:37 +08:00
parent b44096600d
commit cd511d0670
2 changed files with 47 additions and 14 deletions

View File

@@ -99,8 +99,11 @@ _RECHARGE_FIELDS = [
# 所有仅 all 行有值的字段
_ALL_ONLY_FIELDS = _CASHFLOW_FIELDS + _CARD_FIELDS + _RECHARGE_FIELDS
# 按区域聚合的字段(收入 + 优惠 + order_count
_AREA_AGG_FIELDS = _REVENUE_FIELDS + _DISCOUNT_FIELDS + ["confirmed_income", "order_count"]
# 按区域聚合的字段(收入 + 优惠 + order_count + member_order_count
_AREA_AGG_FIELDS = _REVENUE_FIELDS + _DISCOUNT_FIELDS + ["confirmed_income", "order_count", "member_order_count"]
# 计数字段int 类型,非 Decimal— 用于 _build_area_row / _build_sum_row 区分类型转换
_COUNT_FIELDS = {"order_count", "member_order_count"}
_ZERO = Decimal("0")
@@ -190,7 +193,9 @@ class FinanceAreaDailyTask(FinanceBaseTask):
sh.adjust_amount,
sh.member_discount_amount,
sh.rounding_amount,
sh.gift_card_amount
sh.gift_card_amount,
-- 会员标识(散客 member_id ≤ 0会员 member_id > 0详见 CLAUDE.md DWS 规范)
sh.member_id
FROM dwd.dwd_settlement_head sh
LEFT JOIN dwd.dim_table dt
ON dt.table_id = sh.table_id
@@ -439,6 +444,13 @@ def transform_area_daily(
confirmed_income = gross - discount_total
# 会员订单判定(散客 member_id ≤ 0 不计入,与全店 finance_base_task.py 逻辑一致)
member_id_raw = row.get("member_id")
try:
is_member_order = 1 if (member_id_raw is not None and int(member_id_raw) > 0) else 0
except (TypeError, ValueError):
is_member_order = 0
fields = {
"table_fee_amount": table_fee,
"goods_amount": goods,
@@ -454,19 +466,24 @@ def transform_area_daily(
"discount_total": discount_total,
"confirmed_income": confirmed_income,
"order_count": 1,
"member_order_count": is_member_order,
}
# 累加到具体区域
# 累加到具体区域(未匹配区域的订单累加到 _unknown 桶,供 all 行汇总使用)
# CHANGE 2026-04-23 | 修复 pre-existing bugarea_code 为 None 的订单之前既不计入具体区域也不计入 all
# 导致全店 order_count/member_order_count > 各区域之和。现在 _unknown 桶会合入 all 汇总。
if area_code is not None:
bucket = area_agg[sd][area_code]
for k, v in fields.items():
bucket[k] = bucket[k] + v
else:
bucket = area_agg[sd]["_unknown"]
for k, v in fields.items():
bucket[k] = bucket[k] + v
# 汇总输出未知区域名称(避免逐行 warning 刷屏)
if _unknown_area_counts:
summary = ", ".join(f"'{k}': {v}" for k, v in _unknown_area_counts.items())
logger.warning(
"DWS_FINANCE_AREA_DAILY: 共 %d 条结算单区域未匹配(不计入具体区域,仅计入 all: %s",
"DWS_FINANCE_AREA_DAILY: 共 %d 条结算单区域未匹配(已计入 all 但不计入任何具体区域): %s",
sum(_unknown_area_counts.values()),
summary,
)
@@ -505,13 +522,24 @@ def transform_area_daily(
source_rows=hall_source,
)
# all 行 = 各具体区域之和(收入/优惠/order_count+ 全局现金流/充值/卡消费
# all 行 = 各具体区域之和 + 未匹配区域订单_unknown 桶+ 全局现金流/充值/卡消费
# CHANGE 2026-04-23 | 修复全店 vs 区域和差异:未匹配区域订单通过 _unknown 桶合入 all
all_sources = list(specific_rows.values())
unknown_bucket = day_agg.get("_unknown", {})
if unknown_bucket:
all_sources.append(_build_area_row(
site_id=site_id,
tenant_id=tenant_id,
stat_date=sd,
area_code="_unknown",
agg=unknown_bucket,
))
all_row = _build_sum_row(
site_id=site_id,
tenant_id=tenant_id,
stat_date=sd,
area_code="all",
source_rows=list(specific_rows.values()),
source_rows=all_sources,
)
# 填充全局现金流/充值/卡消费
gs = global_index.get(sd, {})
@@ -541,9 +569,9 @@ def _build_area_row(
"stat_date": stat_date,
"area_code": area_code,
}
# 收入 + 优惠 + confirmed_income + order_count
# 收入 + 优惠 + confirmed_income + order_count + member_order_count
for field in _AREA_AGG_FIELDS:
if field == "order_count":
if field in _COUNT_FIELDS:
row[field] = int(agg.get(field, 0))
else:
row[field] = agg.get(field, _ZERO)
@@ -568,7 +596,7 @@ def _build_sum_row(
"area_code": area_code,
}
for field in _AREA_AGG_FIELDS:
if field == "order_count":
if field in _COUNT_FIELDS:
row[field] = sum(int(r.get(field, 0)) for r in source_rows)
else:
row[field] = sum(