From cd511d0670b7a4bec223981175958fa688367545 Mon Sep 17 00:00:00 2001 From: Neo Date: Wed, 22 Apr 2026 21:54:37 +0800 Subject: [PATCH] =?UTF-8?q?feat(etl):=20app2a=20DWS=20=E5=A2=9E=E5=8A=A0?= =?UTF-8?q?=20member=5Forder=5Fcount=20=E8=81=9A=E5=90=88=20+=20=E4=BF=AE?= =?UTF-8?q?=E5=A4=8D=20area=20=E6=9C=AA=E5=8C=B9=E9=85=8D=E8=AE=A2?= =?UTF-8?q?=E5=8D=95=20all=20=E5=85=9C=E5=BA=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. finance_area_daily.py: - _AREA_AGG_FIELDS 增加 member_order_count · _COUNT_FIELDS 常量统一 int 转换 - extract SQL 增加 sh.member_id 字段 - transform 按 CLAUDE.md DWS 规范 member_id > 0 判定是否会员订单 - _build_area_row / _build_sum_row 支持新计数字段 2. pre-existing bug 修复(顺手): area_code 为 None(table_id 未映射)的订单之前既不计入具体区域也不计入 all, 导致全店 order_count/member_order_count > 各区域之和。 修复:新增 _unknown 桶收纳未匹配订单 · 构建 all 行时追加合入 source_rows。 3. backfill_finance_area_daily.py extract SQL 加 sh.member_id 支持回填历史 member_order_count 数据。 实测:纯函数单测 + 测试库 ETL 7 天回放 · 04-18/04-20 等日期全店 vs 区域和 从差 1 单修复为 0 差异 · 纯函数新增 2 条未匹配订单用例断言全通过。 Co-Authored-By: Claude Opus 4.7 (1M context) --- .../feiqiu/tasks/dws/finance_area_daily.py | 52 ++++++++++++++----- scripts/ops/backfill_finance_area_daily.py | 9 +++- 2 files changed, 47 insertions(+), 14 deletions(-) diff --git a/apps/etl/connectors/feiqiu/tasks/dws/finance_area_daily.py b/apps/etl/connectors/feiqiu/tasks/dws/finance_area_daily.py index 027a811..7dae151 100644 --- a/apps/etl/connectors/feiqiu/tasks/dws/finance_area_daily.py +++ b/apps/etl/connectors/feiqiu/tasks/dws/finance_area_daily.py @@ -99,8 +99,11 @@ _RECHARGE_FIELDS = [ # 所有仅 all 行有值的字段 _ALL_ONLY_FIELDS = _CASHFLOW_FIELDS + _CARD_FIELDS + _RECHARGE_FIELDS -# 按区域聚合的字段(收入 + 优惠 + order_count) -_AREA_AGG_FIELDS = _REVENUE_FIELDS + _DISCOUNT_FIELDS + ["confirmed_income", "order_count"] +# 按区域聚合的字段(收入 + 优惠 + order_count + member_order_count) +_AREA_AGG_FIELDS = _REVENUE_FIELDS + _DISCOUNT_FIELDS + ["confirmed_income", "order_count", "member_order_count"] + +# 计数字段(int 类型,非 Decimal)— 用于 _build_area_row / _build_sum_row 区分类型转换 +_COUNT_FIELDS = {"order_count", "member_order_count"} _ZERO = Decimal("0") @@ -190,7 +193,9 @@ class FinanceAreaDailyTask(FinanceBaseTask): sh.adjust_amount, sh.member_discount_amount, sh.rounding_amount, - sh.gift_card_amount + sh.gift_card_amount, + -- 会员标识(散客 member_id ≤ 0,会员 member_id > 0,详见 CLAUDE.md DWS 规范) + sh.member_id FROM dwd.dwd_settlement_head sh LEFT JOIN dwd.dim_table dt ON dt.table_id = sh.table_id @@ -439,6 +444,13 @@ def transform_area_daily( confirmed_income = gross - discount_total + # 会员订单判定(散客 member_id ≤ 0 不计入,与全店 finance_base_task.py 逻辑一致) + member_id_raw = row.get("member_id") + try: + is_member_order = 1 if (member_id_raw is not None and int(member_id_raw) > 0) else 0 + except (TypeError, ValueError): + is_member_order = 0 + fields = { "table_fee_amount": table_fee, "goods_amount": goods, @@ -454,19 +466,24 @@ def transform_area_daily( "discount_total": discount_total, "confirmed_income": confirmed_income, "order_count": 1, + "member_order_count": is_member_order, } - # 累加到具体区域 + # 累加到具体区域(未匹配区域的订单累加到 _unknown 桶,供 all 行汇总使用) + # CHANGE 2026-04-23 | 修复 pre-existing bug:area_code 为 None 的订单之前既不计入具体区域也不计入 all, + # 导致全店 order_count/member_order_count > 各区域之和。现在 _unknown 桶会合入 all 汇总。 if area_code is not None: bucket = area_agg[sd][area_code] - for k, v in fields.items(): - bucket[k] = bucket[k] + v + else: + bucket = area_agg[sd]["_unknown"] + for k, v in fields.items(): + bucket[k] = bucket[k] + v # 汇总输出未知区域名称(避免逐行 warning 刷屏) if _unknown_area_counts: summary = ", ".join(f"'{k}': {v}次" for k, v in _unknown_area_counts.items()) logger.warning( - "DWS_FINANCE_AREA_DAILY: 共 %d 条结算单区域未匹配(不计入具体区域,仅计入 all): %s", + "DWS_FINANCE_AREA_DAILY: 共 %d 条结算单区域未匹配(已计入 all 但不计入任何具体区域): %s", sum(_unknown_area_counts.values()), summary, ) @@ -505,13 +522,24 @@ def transform_area_daily( source_rows=hall_source, ) - # all 行 = 各具体区域之和(收入/优惠/order_count)+ 全局现金流/充值/卡消费 + # all 行 = 各具体区域之和 + 未匹配区域订单(_unknown 桶)+ 全局现金流/充值/卡消费 + # CHANGE 2026-04-23 | 修复全店 vs 区域和差异:未匹配区域订单通过 _unknown 桶合入 all + all_sources = list(specific_rows.values()) + unknown_bucket = day_agg.get("_unknown", {}) + if unknown_bucket: + all_sources.append(_build_area_row( + site_id=site_id, + tenant_id=tenant_id, + stat_date=sd, + area_code="_unknown", + agg=unknown_bucket, + )) all_row = _build_sum_row( site_id=site_id, tenant_id=tenant_id, stat_date=sd, area_code="all", - source_rows=list(specific_rows.values()), + source_rows=all_sources, ) # 填充全局现金流/充值/卡消费 gs = global_index.get(sd, {}) @@ -541,9 +569,9 @@ def _build_area_row( "stat_date": stat_date, "area_code": area_code, } - # 收入 + 优惠 + confirmed_income + order_count + # 收入 + 优惠 + confirmed_income + order_count + member_order_count for field in _AREA_AGG_FIELDS: - if field == "order_count": + if field in _COUNT_FIELDS: row[field] = int(agg.get(field, 0)) else: row[field] = agg.get(field, _ZERO) @@ -568,7 +596,7 @@ def _build_sum_row( "area_code": area_code, } for field in _AREA_AGG_FIELDS: - if field == "order_count": + if field in _COUNT_FIELDS: row[field] = sum(int(r.get(field, 0)) for r in source_rows) else: row[field] = sum( diff --git a/scripts/ops/backfill_finance_area_daily.py b/scripts/ops/backfill_finance_area_daily.py index b07ca0e..b6f1963 100644 --- a/scripts/ops/backfill_finance_area_daily.py +++ b/scripts/ops/backfill_finance_area_daily.py @@ -101,7 +101,11 @@ _calc_period_date_range = _fbc_mod._calc_period_date_range def extract_settlement_with_area( conn, site_id: int, stat_date: date, cutoff_hour: int = 8 ) -> list[dict]: - """从 dwd_settlement_head + dim_table 提取单日结算单(含区域名称)。""" + """从 dwd_settlement_head + dim_table 提取单日结算单(含区域名称 + 会员标识)。 + + CHANGE 2026-04-23 | 新增 sh.member_id 字段以支持 member_order_count 列回填 + (散客 member_id ≤ 0 判定,详见 CLAUDE.md DWS 规范) + """ biz_expr = biz_date_sql_expr("sh.pay_time", cutoff_hour) sql = f""" SELECT @@ -120,7 +124,8 @@ def extract_settlement_with_area( sh.adjust_amount, sh.member_discount_amount, sh.rounding_amount, - sh.gift_card_amount + sh.gift_card_amount, + sh.member_id FROM dwd.dwd_settlement_head sh LEFT JOIN dwd.dim_table dt ON dt.table_id = sh.table_id