# -*- coding: utf-8 -*- """ 会员消费汇总任务 功能说明: 以"会员"为粒度,统计消费行为和滚动窗口指标 数据来源: - dwd_settlement_head: 结账单头表 - dim_member: 会员维度 - dim_member_card_account: 会员卡账户 目标表: billiards_dws.dws_member_consumption_summary 更新策略: - 更新频率:每日更新 - 幂等方式:delete-before-insert(按统计日期) 业务规则: - 散客处理:member_id=0 不进入此表 - 滚动窗口:7/10/15/30/60/90天 - 卡余额:区分储值卡(现金卡)和赠送卡 作者:ETL团队 创建日期:2026-02-01 """ from __future__ import annotations from datetime import date, datetime, timedelta from decimal import Decimal from typing import Any, Dict, List, Optional, Set, Tuple from .base_dws_task import BaseDwsTask, TaskContext class MemberConsumptionTask(BaseDwsTask): """ 会员消费汇总任务 统计每个会员的: - 首次/最近消费日期 - 累计消费统计 - 滚动窗口统计(7/10/15/30/60/90天) - 卡余额快照 - 活跃度指标和客户分层 """ def get_task_code(self) -> str: return "DWS_MEMBER_CONSUMPTION" def get_target_table(self) -> str: return "dws_member_consumption_summary" def get_primary_keys(self) -> List[str]: return ["site_id", "member_id", "stat_date"] # ========================================================================== # ETL主流程 # ========================================================================== def extract(self, context: TaskContext) -> Dict[str, Any]: """ 提取数据 """ stat_date = context.window_end.date() if hasattr(context.window_end, 'date') else context.window_end site_id = context.store_id self.logger.info( "%s: 提取数据,统计日期 %s", self.get_task_code(), stat_date ) # 1. 获取会员消费统计(含滚动窗口) consumption_stats = self._extract_consumption_stats(site_id, stat_date) # 2. 获取会员信息 member_info = self._extract_member_info(site_id) # 3. 获取会员卡余额 card_balances = self._extract_card_balances(site_id) return { 'consumption_stats': consumption_stats, 'member_info': member_info, 'card_balances': card_balances, 'stat_date': stat_date, 'site_id': site_id } def transform(self, extracted: Dict[str, Any], context: TaskContext) -> List[Dict[str, Any]]: """ 转换数据 """ consumption_stats = extracted['consumption_stats'] member_info = extracted['member_info'] card_balances = extracted['card_balances'] stat_date = extracted['stat_date'] site_id = extracted['site_id'] self.logger.info( "%s: 转换数据,%d 条会员消费记录", self.get_task_code(), len(consumption_stats) ) results = [] for stats in consumption_stats: member_id = stats.get('member_id') # 跳过散客 if self.is_guest(member_id): continue memb_info = member_info.get(member_id, {}) balance = card_balances.get(member_id, {}) # 计算活跃度和客户分层 days_since_last = self._calc_days_since(stat_date, stats.get('last_consume_date')) customer_tier = self._calculate_customer_tier(stats, days_since_last) record = { 'site_id': site_id, 'tenant_id': self.config.get("app.tenant_id", site_id), 'member_id': member_id, 'stat_date': stat_date, # 会员基本信息 'member_nickname': memb_info.get('nickname'), 'member_mobile': self._mask_mobile(memb_info.get('mobile')), 'card_grade_name': memb_info.get('member_card_grade_name'), 'register_date': memb_info.get('register_date'), # 全量累计统计 'first_consume_date': stats.get('first_consume_date'), 'last_consume_date': stats.get('last_consume_date'), 'total_visit_count': self.safe_int(stats.get('total_visit_count', 0)), 'total_consume_amount': self.safe_decimal(stats.get('total_consume_amount', 0)), 'total_recharge_amount': self.safe_decimal(memb_info.get('recharge_money_sum', 0)), 'total_table_fee': self.safe_decimal(stats.get('total_table_fee', 0)), 'total_goods_amount': self.safe_decimal(stats.get('total_goods_amount', 0)), 'total_assistant_amount': self.safe_decimal(stats.get('total_assistant_amount', 0)), # 滚动窗口统计 'visit_count_7d': self.safe_int(stats.get('visit_count_7d', 0)), 'visit_count_10d': self.safe_int(stats.get('visit_count_10d', 0)), 'visit_count_15d': self.safe_int(stats.get('visit_count_15d', 0)), 'visit_count_30d': self.safe_int(stats.get('visit_count_30d', 0)), 'visit_count_60d': self.safe_int(stats.get('visit_count_60d', 0)), 'visit_count_90d': self.safe_int(stats.get('visit_count_90d', 0)), 'consume_amount_7d': self.safe_decimal(stats.get('consume_amount_7d', 0)), 'consume_amount_10d': self.safe_decimal(stats.get('consume_amount_10d', 0)), 'consume_amount_15d': self.safe_decimal(stats.get('consume_amount_15d', 0)), 'consume_amount_30d': self.safe_decimal(stats.get('consume_amount_30d', 0)), 'consume_amount_60d': self.safe_decimal(stats.get('consume_amount_60d', 0)), 'consume_amount_90d': self.safe_decimal(stats.get('consume_amount_90d', 0)), # 卡余额 'cash_card_balance': self.safe_decimal(balance.get('cash_balance', 0)), 'gift_card_balance': self.safe_decimal(balance.get('gift_balance', 0)), 'total_card_balance': self.safe_decimal(balance.get('total_balance', 0)), # 活跃度指标 'days_since_last': days_since_last, 'is_active_7d': self.safe_int(stats.get('visit_count_7d', 0)) > 0, 'is_active_30d': self.safe_int(stats.get('visit_count_30d', 0)) > 0, 'is_active_90d': self.safe_int(stats.get('visit_count_90d', 0)) > 0, # 客户分层 'customer_tier': customer_tier, } results.append(record) return results def load(self, transformed: List[Dict[str, Any]], context: TaskContext) -> Dict: """ 加载数据 """ if not transformed: self.logger.info("%s: 无数据需要写入", self.get_task_code()) return {"counts": {"fetched": 0, "inserted": 0, "updated": 0, "skipped": 0, "errors": 0}} deleted = self.delete_existing_data(context, date_col="stat_date") inserted = self.bulk_insert(transformed) self.logger.info( "%s: 加载完成,删除 %d 行,插入 %d 行", self.get_task_code(), deleted, inserted ) return { "counts": { "fetched": len(transformed), "inserted": inserted, "updated": 0, "skipped": 0, "errors": 0 }, "extra": {"deleted": deleted} } # ========================================================================== # 数据提取方法 # ========================================================================== def _extract_consumption_stats( self, site_id: int, stat_date: date ) -> List[Dict[str, Any]]: """ 提取会员消费统计(含滚动窗口) """ sql = """ WITH consume_base AS ( SELECT member_id, DATE(create_time) AS consume_date, consume_money, table_charge_money, goods_money, assistant_pd_money + assistant_cx_money AS assistant_amount FROM billiards_dwd.dwd_settlement_head WHERE site_id = %s AND member_id IS NOT NULL AND member_id != 0 ) SELECT member_id, MIN(consume_date) AS first_consume_date, MAX(consume_date) AS last_consume_date, -- 全量累计 COUNT(*) AS total_visit_count, SUM(consume_money) AS total_consume_amount, SUM(table_charge_money) AS total_table_fee, SUM(goods_money) AS total_goods_amount, SUM(assistant_amount) AS total_assistant_amount, -- 滚动窗口 COUNT(CASE WHEN consume_date >= %s - INTERVAL '6 days' THEN 1 END) AS visit_count_7d, COUNT(CASE WHEN consume_date >= %s - INTERVAL '9 days' THEN 1 END) AS visit_count_10d, COUNT(CASE WHEN consume_date >= %s - INTERVAL '14 days' THEN 1 END) AS visit_count_15d, COUNT(CASE WHEN consume_date >= %s - INTERVAL '29 days' THEN 1 END) AS visit_count_30d, COUNT(CASE WHEN consume_date >= %s - INTERVAL '59 days' THEN 1 END) AS visit_count_60d, COUNT(CASE WHEN consume_date >= %s - INTERVAL '89 days' THEN 1 END) AS visit_count_90d, SUM(CASE WHEN consume_date >= %s - INTERVAL '6 days' THEN consume_money ELSE 0 END) AS consume_amount_7d, SUM(CASE WHEN consume_date >= %s - INTERVAL '9 days' THEN consume_money ELSE 0 END) AS consume_amount_10d, SUM(CASE WHEN consume_date >= %s - INTERVAL '14 days' THEN consume_money ELSE 0 END) AS consume_amount_15d, SUM(CASE WHEN consume_date >= %s - INTERVAL '29 days' THEN consume_money ELSE 0 END) AS consume_amount_30d, SUM(CASE WHEN consume_date >= %s - INTERVAL '59 days' THEN consume_money ELSE 0 END) AS consume_amount_60d, SUM(CASE WHEN consume_date >= %s - INTERVAL '89 days' THEN consume_money ELSE 0 END) AS consume_amount_90d FROM consume_base GROUP BY member_id """ params = [site_id] + [stat_date] * 12 rows = self.db.query(sql, tuple(params)) return [dict(row) for row in rows] if rows else [] def _extract_member_info(self, site_id: int) -> Dict[int, Dict[str, Any]]: """ 提取会员信息 """ sql = """ SELECT member_id, nickname, mobile, member_card_grade_name, DATE(create_time) AS register_date, recharge_money_sum FROM billiards_dwd.dim_member WHERE site_id = %s """ rows = self.db.query(sql, (site_id,)) result = {} for row in (rows or []): row_dict = dict(row) result[row_dict['member_id']] = row_dict return result def _extract_card_balances(self, site_id: int) -> Dict[int, Dict[str, Decimal]]: """ 提取会员卡余额 """ # 卡类型ID CASH_CARD_TYPE_ID = 2793249295533893 GIFT_CARD_TYPE_IDS = [2791990152417157, 2793266846533445, 2794699703437125] sql = """ SELECT tenant_member_id AS member_id, card_type_id, balance FROM billiards_dwd.dim_member_card_account WHERE site_id = %s AND valid_to IS NULL """ rows = self.db.query(sql, (site_id,)) result: Dict[int, Dict[str, Decimal]] = {} for row in (rows or []): row_dict = dict(row) member_id = row_dict.get('member_id') card_type_id = row_dict.get('card_type_id') balance = self.safe_decimal(row_dict.get('balance', 0)) if member_id not in result: result[member_id] = { 'cash_balance': Decimal('0'), 'gift_balance': Decimal('0'), 'total_balance': Decimal('0') } if card_type_id == CASH_CARD_TYPE_ID: result[member_id]['cash_balance'] += balance elif card_type_id in GIFT_CARD_TYPE_IDS: result[member_id]['gift_balance'] += balance result[member_id]['total_balance'] = ( result[member_id]['cash_balance'] + result[member_id]['gift_balance'] ) return result # ========================================================================== # 工具方法 # ========================================================================== def _mask_mobile(self, mobile: Optional[str]) -> Optional[str]: """手机号脱敏""" if not mobile or len(mobile) < 7: return mobile return mobile[:3] + "****" + mobile[-4:] def _calc_days_since(self, stat_date: date, last_date: Optional[date]) -> Optional[int]: """计算距离最近消费的天数""" if not last_date: return None if isinstance(last_date, datetime): last_date = last_date.date() return (stat_date - last_date).days def _calculate_customer_tier( self, stats: Dict[str, Any], days_since_last: Optional[int] ) -> str: """ 计算客户分层 分层规则: - 高价值:90天内消费>=3次 且 消费金额>=1000 - 中等:30天内有消费 - 低活跃:90天内有消费但30天内无消费 - 流失:90天内无消费 """ visit_90d = self.safe_int(stats.get('visit_count_90d', 0)) visit_30d = self.safe_int(stats.get('visit_count_30d', 0)) amount_90d = self.safe_decimal(stats.get('consume_amount_90d', 0)) if visit_90d >= 3 and amount_90d >= 1000: return "高价值" elif visit_30d > 0: return "中等" elif visit_90d > 0: return "低活跃" else: return "流失" # 便于外部导入 __all__ = ['MemberConsumptionTask']