feat(etl): app2a DWS 增加 member_order_count 聚合 + 修复 area 未匹配订单 all 兜底
1. finance_area_daily.py: - _AREA_AGG_FIELDS 增加 member_order_count · _COUNT_FIELDS 常量统一 int 转换 - extract SQL 增加 sh.member_id 字段 - transform 按 CLAUDE.md DWS 规范 member_id > 0 判定是否会员订单 - _build_area_row / _build_sum_row 支持新计数字段 2. pre-existing bug 修复(顺手): area_code 为 None(table_id 未映射)的订单之前既不计入具体区域也不计入 all, 导致全店 order_count/member_order_count > 各区域之和。 修复:新增 _unknown 桶收纳未匹配订单 · 构建 all 行时追加合入 source_rows。 3. backfill_finance_area_daily.py extract SQL 加 sh.member_id 支持回填历史 member_order_count 数据。 实测:纯函数单测 + 测试库 ETL 7 天回放 · 04-18/04-20 等日期全店 vs 区域和 从差 1 单修复为 0 差异 · 纯函数新增 2 条未匹配订单用例断言全通过。 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -99,8 +99,11 @@ _RECHARGE_FIELDS = [
|
||||
# 所有仅 all 行有值的字段
|
||||
_ALL_ONLY_FIELDS = _CASHFLOW_FIELDS + _CARD_FIELDS + _RECHARGE_FIELDS
|
||||
|
||||
# 按区域聚合的字段(收入 + 优惠 + order_count)
|
||||
_AREA_AGG_FIELDS = _REVENUE_FIELDS + _DISCOUNT_FIELDS + ["confirmed_income", "order_count"]
|
||||
# 按区域聚合的字段(收入 + 优惠 + order_count + member_order_count)
|
||||
_AREA_AGG_FIELDS = _REVENUE_FIELDS + _DISCOUNT_FIELDS + ["confirmed_income", "order_count", "member_order_count"]
|
||||
|
||||
# 计数字段(int 类型,非 Decimal)— 用于 _build_area_row / _build_sum_row 区分类型转换
|
||||
_COUNT_FIELDS = {"order_count", "member_order_count"}
|
||||
|
||||
_ZERO = Decimal("0")
|
||||
|
||||
@@ -190,7 +193,9 @@ class FinanceAreaDailyTask(FinanceBaseTask):
|
||||
sh.adjust_amount,
|
||||
sh.member_discount_amount,
|
||||
sh.rounding_amount,
|
||||
sh.gift_card_amount
|
||||
sh.gift_card_amount,
|
||||
-- 会员标识(散客 member_id ≤ 0,会员 member_id > 0,详见 CLAUDE.md DWS 规范)
|
||||
sh.member_id
|
||||
FROM dwd.dwd_settlement_head sh
|
||||
LEFT JOIN dwd.dim_table dt
|
||||
ON dt.table_id = sh.table_id
|
||||
@@ -439,6 +444,13 @@ def transform_area_daily(
|
||||
|
||||
confirmed_income = gross - discount_total
|
||||
|
||||
# 会员订单判定(散客 member_id ≤ 0 不计入,与全店 finance_base_task.py 逻辑一致)
|
||||
member_id_raw = row.get("member_id")
|
||||
try:
|
||||
is_member_order = 1 if (member_id_raw is not None and int(member_id_raw) > 0) else 0
|
||||
except (TypeError, ValueError):
|
||||
is_member_order = 0
|
||||
|
||||
fields = {
|
||||
"table_fee_amount": table_fee,
|
||||
"goods_amount": goods,
|
||||
@@ -454,19 +466,24 @@ def transform_area_daily(
|
||||
"discount_total": discount_total,
|
||||
"confirmed_income": confirmed_income,
|
||||
"order_count": 1,
|
||||
"member_order_count": is_member_order,
|
||||
}
|
||||
|
||||
# 累加到具体区域
|
||||
# 累加到具体区域(未匹配区域的订单累加到 _unknown 桶,供 all 行汇总使用)
|
||||
# CHANGE 2026-04-23 | 修复 pre-existing bug:area_code 为 None 的订单之前既不计入具体区域也不计入 all,
|
||||
# 导致全店 order_count/member_order_count > 各区域之和。现在 _unknown 桶会合入 all 汇总。
|
||||
if area_code is not None:
|
||||
bucket = area_agg[sd][area_code]
|
||||
for k, v in fields.items():
|
||||
bucket[k] = bucket[k] + v
|
||||
else:
|
||||
bucket = area_agg[sd]["_unknown"]
|
||||
for k, v in fields.items():
|
||||
bucket[k] = bucket[k] + v
|
||||
|
||||
# 汇总输出未知区域名称(避免逐行 warning 刷屏)
|
||||
if _unknown_area_counts:
|
||||
summary = ", ".join(f"'{k}': {v}次" for k, v in _unknown_area_counts.items())
|
||||
logger.warning(
|
||||
"DWS_FINANCE_AREA_DAILY: 共 %d 条结算单区域未匹配(不计入具体区域,仅计入 all): %s",
|
||||
"DWS_FINANCE_AREA_DAILY: 共 %d 条结算单区域未匹配(已计入 all 但不计入任何具体区域): %s",
|
||||
sum(_unknown_area_counts.values()),
|
||||
summary,
|
||||
)
|
||||
@@ -505,13 +522,24 @@ def transform_area_daily(
|
||||
source_rows=hall_source,
|
||||
)
|
||||
|
||||
# all 行 = 各具体区域之和(收入/优惠/order_count)+ 全局现金流/充值/卡消费
|
||||
# all 行 = 各具体区域之和 + 未匹配区域订单(_unknown 桶)+ 全局现金流/充值/卡消费
|
||||
# CHANGE 2026-04-23 | 修复全店 vs 区域和差异:未匹配区域订单通过 _unknown 桶合入 all
|
||||
all_sources = list(specific_rows.values())
|
||||
unknown_bucket = day_agg.get("_unknown", {})
|
||||
if unknown_bucket:
|
||||
all_sources.append(_build_area_row(
|
||||
site_id=site_id,
|
||||
tenant_id=tenant_id,
|
||||
stat_date=sd,
|
||||
area_code="_unknown",
|
||||
agg=unknown_bucket,
|
||||
))
|
||||
all_row = _build_sum_row(
|
||||
site_id=site_id,
|
||||
tenant_id=tenant_id,
|
||||
stat_date=sd,
|
||||
area_code="all",
|
||||
source_rows=list(specific_rows.values()),
|
||||
source_rows=all_sources,
|
||||
)
|
||||
# 填充全局现金流/充值/卡消费
|
||||
gs = global_index.get(sd, {})
|
||||
@@ -541,9 +569,9 @@ def _build_area_row(
|
||||
"stat_date": stat_date,
|
||||
"area_code": area_code,
|
||||
}
|
||||
# 收入 + 优惠 + confirmed_income + order_count
|
||||
# 收入 + 优惠 + confirmed_income + order_count + member_order_count
|
||||
for field in _AREA_AGG_FIELDS:
|
||||
if field == "order_count":
|
||||
if field in _COUNT_FIELDS:
|
||||
row[field] = int(agg.get(field, 0))
|
||||
else:
|
||||
row[field] = agg.get(field, _ZERO)
|
||||
@@ -568,7 +596,7 @@ def _build_sum_row(
|
||||
"area_code": area_code,
|
||||
}
|
||||
for field in _AREA_AGG_FIELDS:
|
||||
if field == "order_count":
|
||||
if field in _COUNT_FIELDS:
|
||||
row[field] = sum(int(r.get(field, 0)) for r in source_rows)
|
||||
else:
|
||||
row[field] = sum(
|
||||
|
||||
@@ -101,7 +101,11 @@ _calc_period_date_range = _fbc_mod._calc_period_date_range
|
||||
def extract_settlement_with_area(
|
||||
conn, site_id: int, stat_date: date, cutoff_hour: int = 8
|
||||
) -> list[dict]:
|
||||
"""从 dwd_settlement_head + dim_table 提取单日结算单(含区域名称)。"""
|
||||
"""从 dwd_settlement_head + dim_table 提取单日结算单(含区域名称 + 会员标识)。
|
||||
|
||||
CHANGE 2026-04-23 | 新增 sh.member_id 字段以支持 member_order_count 列回填
|
||||
(散客 member_id ≤ 0 判定,详见 CLAUDE.md DWS 规范)
|
||||
"""
|
||||
biz_expr = biz_date_sql_expr("sh.pay_time", cutoff_hour)
|
||||
sql = f"""
|
||||
SELECT
|
||||
@@ -120,7 +124,8 @@ def extract_settlement_with_area(
|
||||
sh.adjust_amount,
|
||||
sh.member_discount_amount,
|
||||
sh.rounding_amount,
|
||||
sh.gift_card_amount
|
||||
sh.gift_card_amount,
|
||||
sh.member_id
|
||||
FROM dwd.dwd_settlement_head sh
|
||||
LEFT JOIN dwd.dim_table dt
|
||||
ON dt.table_id = sh.table_id
|
||||
|
||||
Reference in New Issue
Block a user