Files
feiqiu-ETL/etl_billiards/tasks/dws/member_consumption_task.py
2026-02-04 21:39:01 +08:00

369 lines
14 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
"""
会员消费汇总任务
功能说明:
"会员"为粒度,统计消费行为和滚动窗口指标
数据来源:
- dwd_settlement_head: 结账单头表
- dim_member: 会员维度
- dim_member_card_account: 会员卡账户
目标表:
billiards_dws.dws_member_consumption_summary
更新策略:
- 更新频率:每日更新
- 幂等方式delete-before-insert按统计日期
业务规则:
- 散客处理member_id=0 不进入此表
- 滚动窗口7/10/15/30/60/90天
- 卡余额:区分储值卡(现金卡)和赠送卡
作者ETL团队
创建日期2026-02-01
"""
from __future__ import annotations
from datetime import date, datetime, timedelta
from decimal import Decimal
from typing import Any, Dict, List, Optional, Set, Tuple
from .base_dws_task import BaseDwsTask, TaskContext
class MemberConsumptionTask(BaseDwsTask):
"""
会员消费汇总任务
统计每个会员的:
- 首次/最近消费日期
- 累计消费统计
- 滚动窗口统计7/10/15/30/60/90天
- 卡余额快照
- 活跃度指标和客户分层
"""
def get_task_code(self) -> str:
return "DWS_MEMBER_CONSUMPTION"
def get_target_table(self) -> str:
return "dws_member_consumption_summary"
def get_primary_keys(self) -> List[str]:
return ["site_id", "member_id", "stat_date"]
# ==========================================================================
# ETL主流程
# ==========================================================================
def extract(self, context: TaskContext) -> Dict[str, Any]:
"""
提取数据
"""
stat_date = context.window_end.date() if hasattr(context.window_end, 'date') else context.window_end
site_id = context.store_id
self.logger.info(
"%s: 提取数据,统计日期 %s",
self.get_task_code(), stat_date
)
# 1. 获取会员消费统计(含滚动窗口)
consumption_stats = self._extract_consumption_stats(site_id, stat_date)
# 2. 获取会员信息
member_info = self._extract_member_info(site_id)
# 3. 获取会员卡余额
card_balances = self._extract_card_balances(site_id)
return {
'consumption_stats': consumption_stats,
'member_info': member_info,
'card_balances': card_balances,
'stat_date': stat_date,
'site_id': site_id
}
def transform(self, extracted: Dict[str, Any], context: TaskContext) -> List[Dict[str, Any]]:
"""
转换数据
"""
consumption_stats = extracted['consumption_stats']
member_info = extracted['member_info']
card_balances = extracted['card_balances']
stat_date = extracted['stat_date']
site_id = extracted['site_id']
self.logger.info(
"%s: 转换数据,%d 条会员消费记录",
self.get_task_code(), len(consumption_stats)
)
results = []
for stats in consumption_stats:
member_id = stats.get('member_id')
# 跳过散客
if self.is_guest(member_id):
continue
memb_info = member_info.get(member_id, {})
balance = card_balances.get(member_id, {})
# 计算活跃度和客户分层
days_since_last = self._calc_days_since(stat_date, stats.get('last_consume_date'))
customer_tier = self._calculate_customer_tier(stats, days_since_last)
record = {
'site_id': site_id,
'tenant_id': self.config.get("app.tenant_id", site_id),
'member_id': member_id,
'stat_date': stat_date,
# 会员基本信息
'member_nickname': memb_info.get('nickname'),
'member_mobile': self._mask_mobile(memb_info.get('mobile')),
'card_grade_name': memb_info.get('member_card_grade_name'),
'register_date': memb_info.get('register_date'),
# 全量累计统计
'first_consume_date': stats.get('first_consume_date'),
'last_consume_date': stats.get('last_consume_date'),
'total_visit_count': self.safe_int(stats.get('total_visit_count', 0)),
'total_consume_amount': self.safe_decimal(stats.get('total_consume_amount', 0)),
'total_recharge_amount': self.safe_decimal(memb_info.get('recharge_money_sum', 0)),
'total_table_fee': self.safe_decimal(stats.get('total_table_fee', 0)),
'total_goods_amount': self.safe_decimal(stats.get('total_goods_amount', 0)),
'total_assistant_amount': self.safe_decimal(stats.get('total_assistant_amount', 0)),
# 滚动窗口统计
'visit_count_7d': self.safe_int(stats.get('visit_count_7d', 0)),
'visit_count_10d': self.safe_int(stats.get('visit_count_10d', 0)),
'visit_count_15d': self.safe_int(stats.get('visit_count_15d', 0)),
'visit_count_30d': self.safe_int(stats.get('visit_count_30d', 0)),
'visit_count_60d': self.safe_int(stats.get('visit_count_60d', 0)),
'visit_count_90d': self.safe_int(stats.get('visit_count_90d', 0)),
'consume_amount_7d': self.safe_decimal(stats.get('consume_amount_7d', 0)),
'consume_amount_10d': self.safe_decimal(stats.get('consume_amount_10d', 0)),
'consume_amount_15d': self.safe_decimal(stats.get('consume_amount_15d', 0)),
'consume_amount_30d': self.safe_decimal(stats.get('consume_amount_30d', 0)),
'consume_amount_60d': self.safe_decimal(stats.get('consume_amount_60d', 0)),
'consume_amount_90d': self.safe_decimal(stats.get('consume_amount_90d', 0)),
# 卡余额
'cash_card_balance': self.safe_decimal(balance.get('cash_balance', 0)),
'gift_card_balance': self.safe_decimal(balance.get('gift_balance', 0)),
'total_card_balance': self.safe_decimal(balance.get('total_balance', 0)),
# 活跃度指标
'days_since_last': days_since_last,
'is_active_7d': self.safe_int(stats.get('visit_count_7d', 0)) > 0,
'is_active_30d': self.safe_int(stats.get('visit_count_30d', 0)) > 0,
'is_active_90d': self.safe_int(stats.get('visit_count_90d', 0)) > 0,
# 客户分层
'customer_tier': customer_tier,
}
results.append(record)
return results
def load(self, transformed: List[Dict[str, Any]], context: TaskContext) -> Dict:
"""
加载数据
"""
if not transformed:
self.logger.info("%s: 无数据需要写入", self.get_task_code())
return {"counts": {"fetched": 0, "inserted": 0, "updated": 0, "skipped": 0, "errors": 0}}
deleted = self.delete_existing_data(context, date_col="stat_date")
inserted = self.bulk_insert(transformed)
self.logger.info(
"%s: 加载完成,删除 %d 行,插入 %d",
self.get_task_code(), deleted, inserted
)
return {
"counts": {
"fetched": len(transformed),
"inserted": inserted,
"updated": 0,
"skipped": 0,
"errors": 0
},
"extra": {"deleted": deleted}
}
# ==========================================================================
# 数据提取方法
# ==========================================================================
def _extract_consumption_stats(
self,
site_id: int,
stat_date: date
) -> List[Dict[str, Any]]:
"""
提取会员消费统计(含滚动窗口)
"""
sql = """
WITH consume_base AS (
SELECT
member_id,
DATE(create_time) AS consume_date,
consume_money,
table_charge_money,
goods_money,
assistant_pd_money + assistant_cx_money AS assistant_amount
FROM billiards_dwd.dwd_settlement_head
WHERE site_id = %s
AND member_id IS NOT NULL
AND member_id != 0
)
SELECT
member_id,
MIN(consume_date) AS first_consume_date,
MAX(consume_date) AS last_consume_date,
-- 全量累计
COUNT(*) AS total_visit_count,
SUM(consume_money) AS total_consume_amount,
SUM(table_charge_money) AS total_table_fee,
SUM(goods_money) AS total_goods_amount,
SUM(assistant_amount) AS total_assistant_amount,
-- 滚动窗口
COUNT(CASE WHEN consume_date >= %s - INTERVAL '6 days' THEN 1 END) AS visit_count_7d,
COUNT(CASE WHEN consume_date >= %s - INTERVAL '9 days' THEN 1 END) AS visit_count_10d,
COUNT(CASE WHEN consume_date >= %s - INTERVAL '14 days' THEN 1 END) AS visit_count_15d,
COUNT(CASE WHEN consume_date >= %s - INTERVAL '29 days' THEN 1 END) AS visit_count_30d,
COUNT(CASE WHEN consume_date >= %s - INTERVAL '59 days' THEN 1 END) AS visit_count_60d,
COUNT(CASE WHEN consume_date >= %s - INTERVAL '89 days' THEN 1 END) AS visit_count_90d,
SUM(CASE WHEN consume_date >= %s - INTERVAL '6 days' THEN consume_money ELSE 0 END) AS consume_amount_7d,
SUM(CASE WHEN consume_date >= %s - INTERVAL '9 days' THEN consume_money ELSE 0 END) AS consume_amount_10d,
SUM(CASE WHEN consume_date >= %s - INTERVAL '14 days' THEN consume_money ELSE 0 END) AS consume_amount_15d,
SUM(CASE WHEN consume_date >= %s - INTERVAL '29 days' THEN consume_money ELSE 0 END) AS consume_amount_30d,
SUM(CASE WHEN consume_date >= %s - INTERVAL '59 days' THEN consume_money ELSE 0 END) AS consume_amount_60d,
SUM(CASE WHEN consume_date >= %s - INTERVAL '89 days' THEN consume_money ELSE 0 END) AS consume_amount_90d
FROM consume_base
GROUP BY member_id
"""
params = [site_id] + [stat_date] * 12
rows = self.db.query(sql, tuple(params))
return [dict(row) for row in rows] if rows else []
def _extract_member_info(self, site_id: int) -> Dict[int, Dict[str, Any]]:
"""
提取会员信息
"""
sql = """
SELECT
member_id,
nickname,
mobile,
member_card_grade_name,
DATE(create_time) AS register_date,
recharge_money_sum
FROM billiards_dwd.dim_member
WHERE site_id = %s
"""
rows = self.db.query(sql, (site_id,))
result = {}
for row in (rows or []):
row_dict = dict(row)
result[row_dict['member_id']] = row_dict
return result
def _extract_card_balances(self, site_id: int) -> Dict[int, Dict[str, Decimal]]:
"""
提取会员卡余额
"""
# 卡类型ID
CASH_CARD_TYPE_ID = 2793249295533893
GIFT_CARD_TYPE_IDS = [2791990152417157, 2793266846533445, 2794699703437125]
sql = """
SELECT
tenant_member_id AS member_id,
card_type_id,
balance
FROM billiards_dwd.dim_member_card_account
WHERE site_id = %s
AND valid_to IS NULL
"""
rows = self.db.query(sql, (site_id,))
result: Dict[int, Dict[str, Decimal]] = {}
for row in (rows or []):
row_dict = dict(row)
member_id = row_dict.get('member_id')
card_type_id = row_dict.get('card_type_id')
balance = self.safe_decimal(row_dict.get('balance', 0))
if member_id not in result:
result[member_id] = {
'cash_balance': Decimal('0'),
'gift_balance': Decimal('0'),
'total_balance': Decimal('0')
}
if card_type_id == CASH_CARD_TYPE_ID:
result[member_id]['cash_balance'] += balance
elif card_type_id in GIFT_CARD_TYPE_IDS:
result[member_id]['gift_balance'] += balance
result[member_id]['total_balance'] = (
result[member_id]['cash_balance'] + result[member_id]['gift_balance']
)
return result
# ==========================================================================
# 工具方法
# ==========================================================================
def _mask_mobile(self, mobile: Optional[str]) -> Optional[str]:
"""手机号脱敏"""
if not mobile or len(mobile) < 7:
return mobile
return mobile[:3] + "****" + mobile[-4:]
def _calc_days_since(self, stat_date: date, last_date: Optional[date]) -> Optional[int]:
"""计算距离最近消费的天数"""
if not last_date:
return None
if isinstance(last_date, datetime):
last_date = last_date.date()
return (stat_date - last_date).days
def _calculate_customer_tier(
self,
stats: Dict[str, Any],
days_since_last: Optional[int]
) -> str:
"""
计算客户分层
分层规则:
- 高价值90天内消费>=3次 且 消费金额>=1000
- 中等30天内有消费
- 低活跃90天内有消费但30天内无消费
- 流失90天内无消费
"""
visit_90d = self.safe_int(stats.get('visit_count_90d', 0))
visit_30d = self.safe_int(stats.get('visit_count_30d', 0))
amount_90d = self.safe_decimal(stats.get('consume_amount_90d', 0))
if visit_90d >= 3 and amount_90d >= 1000:
return "高价值"
elif visit_30d > 0:
return "中等"
elif visit_90d > 0:
return "低活跃"
else:
return "流失"
# 便于外部导入
__all__ = ['MemberConsumptionTask']