Updata2
This commit is contained in:
368
etl_billiards/tasks/dws/member_consumption_task.py
Normal file
368
etl_billiards/tasks/dws/member_consumption_task.py
Normal file
@@ -0,0 +1,368 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
会员消费汇总任务
|
||||
|
||||
功能说明:
|
||||
以"会员"为粒度,统计消费行为和滚动窗口指标
|
||||
|
||||
数据来源:
|
||||
- dwd_settlement_head: 结账单头表
|
||||
- dim_member: 会员维度
|
||||
- dim_member_card_account: 会员卡账户
|
||||
|
||||
目标表:
|
||||
billiards_dws.dws_member_consumption_summary
|
||||
|
||||
更新策略:
|
||||
- 更新频率:每日更新
|
||||
- 幂等方式:delete-before-insert(按统计日期)
|
||||
|
||||
业务规则:
|
||||
- 散客处理:member_id=0 不进入此表
|
||||
- 滚动窗口:7/10/15/30/60/90天
|
||||
- 卡余额:区分储值卡(现金卡)和赠送卡
|
||||
|
||||
作者:ETL团队
|
||||
创建日期:2026-02-01
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import date, datetime, timedelta
|
||||
from decimal import Decimal
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple
|
||||
|
||||
from .base_dws_task import BaseDwsTask, TaskContext
|
||||
|
||||
|
||||
class MemberConsumptionTask(BaseDwsTask):
|
||||
"""
|
||||
会员消费汇总任务
|
||||
|
||||
统计每个会员的:
|
||||
- 首次/最近消费日期
|
||||
- 累计消费统计
|
||||
- 滚动窗口统计(7/10/15/30/60/90天)
|
||||
- 卡余额快照
|
||||
- 活跃度指标和客户分层
|
||||
"""
|
||||
|
||||
def get_task_code(self) -> str:
|
||||
return "DWS_MEMBER_CONSUMPTION"
|
||||
|
||||
def get_target_table(self) -> str:
|
||||
return "dws_member_consumption_summary"
|
||||
|
||||
def get_primary_keys(self) -> List[str]:
|
||||
return ["site_id", "member_id", "stat_date"]
|
||||
|
||||
# ==========================================================================
|
||||
# ETL主流程
|
||||
# ==========================================================================
|
||||
|
||||
def extract(self, context: TaskContext) -> Dict[str, Any]:
|
||||
"""
|
||||
提取数据
|
||||
"""
|
||||
stat_date = context.window_end.date() if hasattr(context.window_end, 'date') else context.window_end
|
||||
site_id = context.store_id
|
||||
|
||||
self.logger.info(
|
||||
"%s: 提取数据,统计日期 %s",
|
||||
self.get_task_code(), stat_date
|
||||
)
|
||||
|
||||
# 1. 获取会员消费统计(含滚动窗口)
|
||||
consumption_stats = self._extract_consumption_stats(site_id, stat_date)
|
||||
|
||||
# 2. 获取会员信息
|
||||
member_info = self._extract_member_info(site_id)
|
||||
|
||||
# 3. 获取会员卡余额
|
||||
card_balances = self._extract_card_balances(site_id)
|
||||
|
||||
return {
|
||||
'consumption_stats': consumption_stats,
|
||||
'member_info': member_info,
|
||||
'card_balances': card_balances,
|
||||
'stat_date': stat_date,
|
||||
'site_id': site_id
|
||||
}
|
||||
|
||||
def transform(self, extracted: Dict[str, Any], context: TaskContext) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
转换数据
|
||||
"""
|
||||
consumption_stats = extracted['consumption_stats']
|
||||
member_info = extracted['member_info']
|
||||
card_balances = extracted['card_balances']
|
||||
stat_date = extracted['stat_date']
|
||||
site_id = extracted['site_id']
|
||||
|
||||
self.logger.info(
|
||||
"%s: 转换数据,%d 条会员消费记录",
|
||||
self.get_task_code(), len(consumption_stats)
|
||||
)
|
||||
|
||||
results = []
|
||||
|
||||
for stats in consumption_stats:
|
||||
member_id = stats.get('member_id')
|
||||
|
||||
# 跳过散客
|
||||
if self.is_guest(member_id):
|
||||
continue
|
||||
|
||||
memb_info = member_info.get(member_id, {})
|
||||
balance = card_balances.get(member_id, {})
|
||||
|
||||
# 计算活跃度和客户分层
|
||||
days_since_last = self._calc_days_since(stat_date, stats.get('last_consume_date'))
|
||||
customer_tier = self._calculate_customer_tier(stats, days_since_last)
|
||||
|
||||
record = {
|
||||
'site_id': site_id,
|
||||
'tenant_id': self.config.get("app.tenant_id", site_id),
|
||||
'member_id': member_id,
|
||||
'stat_date': stat_date,
|
||||
# 会员基本信息
|
||||
'member_nickname': memb_info.get('nickname'),
|
||||
'member_mobile': self._mask_mobile(memb_info.get('mobile')),
|
||||
'card_grade_name': memb_info.get('member_card_grade_name'),
|
||||
'register_date': memb_info.get('register_date'),
|
||||
# 全量累计统计
|
||||
'first_consume_date': stats.get('first_consume_date'),
|
||||
'last_consume_date': stats.get('last_consume_date'),
|
||||
'total_visit_count': self.safe_int(stats.get('total_visit_count', 0)),
|
||||
'total_consume_amount': self.safe_decimal(stats.get('total_consume_amount', 0)),
|
||||
'total_recharge_amount': self.safe_decimal(memb_info.get('recharge_money_sum', 0)),
|
||||
'total_table_fee': self.safe_decimal(stats.get('total_table_fee', 0)),
|
||||
'total_goods_amount': self.safe_decimal(stats.get('total_goods_amount', 0)),
|
||||
'total_assistant_amount': self.safe_decimal(stats.get('total_assistant_amount', 0)),
|
||||
# 滚动窗口统计
|
||||
'visit_count_7d': self.safe_int(stats.get('visit_count_7d', 0)),
|
||||
'visit_count_10d': self.safe_int(stats.get('visit_count_10d', 0)),
|
||||
'visit_count_15d': self.safe_int(stats.get('visit_count_15d', 0)),
|
||||
'visit_count_30d': self.safe_int(stats.get('visit_count_30d', 0)),
|
||||
'visit_count_60d': self.safe_int(stats.get('visit_count_60d', 0)),
|
||||
'visit_count_90d': self.safe_int(stats.get('visit_count_90d', 0)),
|
||||
'consume_amount_7d': self.safe_decimal(stats.get('consume_amount_7d', 0)),
|
||||
'consume_amount_10d': self.safe_decimal(stats.get('consume_amount_10d', 0)),
|
||||
'consume_amount_15d': self.safe_decimal(stats.get('consume_amount_15d', 0)),
|
||||
'consume_amount_30d': self.safe_decimal(stats.get('consume_amount_30d', 0)),
|
||||
'consume_amount_60d': self.safe_decimal(stats.get('consume_amount_60d', 0)),
|
||||
'consume_amount_90d': self.safe_decimal(stats.get('consume_amount_90d', 0)),
|
||||
# 卡余额
|
||||
'cash_card_balance': self.safe_decimal(balance.get('cash_balance', 0)),
|
||||
'gift_card_balance': self.safe_decimal(balance.get('gift_balance', 0)),
|
||||
'total_card_balance': self.safe_decimal(balance.get('total_balance', 0)),
|
||||
# 活跃度指标
|
||||
'days_since_last': days_since_last,
|
||||
'is_active_7d': self.safe_int(stats.get('visit_count_7d', 0)) > 0,
|
||||
'is_active_30d': self.safe_int(stats.get('visit_count_30d', 0)) > 0,
|
||||
'is_active_90d': self.safe_int(stats.get('visit_count_90d', 0)) > 0,
|
||||
# 客户分层
|
||||
'customer_tier': customer_tier,
|
||||
}
|
||||
results.append(record)
|
||||
|
||||
return results
|
||||
|
||||
def load(self, transformed: List[Dict[str, Any]], context: TaskContext) -> Dict:
|
||||
"""
|
||||
加载数据
|
||||
"""
|
||||
if not transformed:
|
||||
self.logger.info("%s: 无数据需要写入", self.get_task_code())
|
||||
return {"counts": {"fetched": 0, "inserted": 0, "updated": 0, "skipped": 0, "errors": 0}}
|
||||
|
||||
deleted = self.delete_existing_data(context, date_col="stat_date")
|
||||
inserted = self.bulk_insert(transformed)
|
||||
|
||||
self.logger.info(
|
||||
"%s: 加载完成,删除 %d 行,插入 %d 行",
|
||||
self.get_task_code(), deleted, inserted
|
||||
)
|
||||
|
||||
return {
|
||||
"counts": {
|
||||
"fetched": len(transformed),
|
||||
"inserted": inserted,
|
||||
"updated": 0,
|
||||
"skipped": 0,
|
||||
"errors": 0
|
||||
},
|
||||
"extra": {"deleted": deleted}
|
||||
}
|
||||
|
||||
# ==========================================================================
|
||||
# 数据提取方法
|
||||
# ==========================================================================
|
||||
|
||||
def _extract_consumption_stats(
|
||||
self,
|
||||
site_id: int,
|
||||
stat_date: date
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
提取会员消费统计(含滚动窗口)
|
||||
"""
|
||||
sql = """
|
||||
WITH consume_base AS (
|
||||
SELECT
|
||||
member_id,
|
||||
DATE(create_time) AS consume_date,
|
||||
consume_money,
|
||||
table_charge_money,
|
||||
goods_money,
|
||||
assistant_pd_money + assistant_cx_money AS assistant_amount
|
||||
FROM billiards_dwd.dwd_settlement_head
|
||||
WHERE site_id = %s
|
||||
AND member_id IS NOT NULL
|
||||
AND member_id != 0
|
||||
)
|
||||
SELECT
|
||||
member_id,
|
||||
MIN(consume_date) AS first_consume_date,
|
||||
MAX(consume_date) AS last_consume_date,
|
||||
-- 全量累计
|
||||
COUNT(*) AS total_visit_count,
|
||||
SUM(consume_money) AS total_consume_amount,
|
||||
SUM(table_charge_money) AS total_table_fee,
|
||||
SUM(goods_money) AS total_goods_amount,
|
||||
SUM(assistant_amount) AS total_assistant_amount,
|
||||
-- 滚动窗口
|
||||
COUNT(CASE WHEN consume_date >= %s - INTERVAL '6 days' THEN 1 END) AS visit_count_7d,
|
||||
COUNT(CASE WHEN consume_date >= %s - INTERVAL '9 days' THEN 1 END) AS visit_count_10d,
|
||||
COUNT(CASE WHEN consume_date >= %s - INTERVAL '14 days' THEN 1 END) AS visit_count_15d,
|
||||
COUNT(CASE WHEN consume_date >= %s - INTERVAL '29 days' THEN 1 END) AS visit_count_30d,
|
||||
COUNT(CASE WHEN consume_date >= %s - INTERVAL '59 days' THEN 1 END) AS visit_count_60d,
|
||||
COUNT(CASE WHEN consume_date >= %s - INTERVAL '89 days' THEN 1 END) AS visit_count_90d,
|
||||
SUM(CASE WHEN consume_date >= %s - INTERVAL '6 days' THEN consume_money ELSE 0 END) AS consume_amount_7d,
|
||||
SUM(CASE WHEN consume_date >= %s - INTERVAL '9 days' THEN consume_money ELSE 0 END) AS consume_amount_10d,
|
||||
SUM(CASE WHEN consume_date >= %s - INTERVAL '14 days' THEN consume_money ELSE 0 END) AS consume_amount_15d,
|
||||
SUM(CASE WHEN consume_date >= %s - INTERVAL '29 days' THEN consume_money ELSE 0 END) AS consume_amount_30d,
|
||||
SUM(CASE WHEN consume_date >= %s - INTERVAL '59 days' THEN consume_money ELSE 0 END) AS consume_amount_60d,
|
||||
SUM(CASE WHEN consume_date >= %s - INTERVAL '89 days' THEN consume_money ELSE 0 END) AS consume_amount_90d
|
||||
FROM consume_base
|
||||
GROUP BY member_id
|
||||
"""
|
||||
params = [site_id] + [stat_date] * 12
|
||||
rows = self.db.query(sql, tuple(params))
|
||||
return [dict(row) for row in rows] if rows else []
|
||||
|
||||
def _extract_member_info(self, site_id: int) -> Dict[int, Dict[str, Any]]:
|
||||
"""
|
||||
提取会员信息
|
||||
"""
|
||||
sql = """
|
||||
SELECT
|
||||
member_id,
|
||||
nickname,
|
||||
mobile,
|
||||
member_card_grade_name,
|
||||
DATE(create_time) AS register_date,
|
||||
recharge_money_sum
|
||||
FROM billiards_dwd.dim_member
|
||||
WHERE site_id = %s
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id,))
|
||||
|
||||
result = {}
|
||||
for row in (rows or []):
|
||||
row_dict = dict(row)
|
||||
result[row_dict['member_id']] = row_dict
|
||||
return result
|
||||
|
||||
def _extract_card_balances(self, site_id: int) -> Dict[int, Dict[str, Decimal]]:
|
||||
"""
|
||||
提取会员卡余额
|
||||
"""
|
||||
# 卡类型ID
|
||||
CASH_CARD_TYPE_ID = 2793249295533893
|
||||
GIFT_CARD_TYPE_IDS = [2791990152417157, 2793266846533445, 2794699703437125]
|
||||
|
||||
sql = """
|
||||
SELECT
|
||||
tenant_member_id AS member_id,
|
||||
card_type_id,
|
||||
balance
|
||||
FROM billiards_dwd.dim_member_card_account
|
||||
WHERE site_id = %s
|
||||
AND valid_to IS NULL
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id,))
|
||||
|
||||
result: Dict[int, Dict[str, Decimal]] = {}
|
||||
for row in (rows or []):
|
||||
row_dict = dict(row)
|
||||
member_id = row_dict.get('member_id')
|
||||
card_type_id = row_dict.get('card_type_id')
|
||||
balance = self.safe_decimal(row_dict.get('balance', 0))
|
||||
|
||||
if member_id not in result:
|
||||
result[member_id] = {
|
||||
'cash_balance': Decimal('0'),
|
||||
'gift_balance': Decimal('0'),
|
||||
'total_balance': Decimal('0')
|
||||
}
|
||||
|
||||
if card_type_id == CASH_CARD_TYPE_ID:
|
||||
result[member_id]['cash_balance'] += balance
|
||||
elif card_type_id in GIFT_CARD_TYPE_IDS:
|
||||
result[member_id]['gift_balance'] += balance
|
||||
|
||||
result[member_id]['total_balance'] = (
|
||||
result[member_id]['cash_balance'] + result[member_id]['gift_balance']
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
# ==========================================================================
|
||||
# 工具方法
|
||||
# ==========================================================================
|
||||
|
||||
def _mask_mobile(self, mobile: Optional[str]) -> Optional[str]:
|
||||
"""手机号脱敏"""
|
||||
if not mobile or len(mobile) < 7:
|
||||
return mobile
|
||||
return mobile[:3] + "****" + mobile[-4:]
|
||||
|
||||
def _calc_days_since(self, stat_date: date, last_date: Optional[date]) -> Optional[int]:
|
||||
"""计算距离最近消费的天数"""
|
||||
if not last_date:
|
||||
return None
|
||||
if isinstance(last_date, datetime):
|
||||
last_date = last_date.date()
|
||||
return (stat_date - last_date).days
|
||||
|
||||
def _calculate_customer_tier(
|
||||
self,
|
||||
stats: Dict[str, Any],
|
||||
days_since_last: Optional[int]
|
||||
) -> str:
|
||||
"""
|
||||
计算客户分层
|
||||
|
||||
分层规则:
|
||||
- 高价值:90天内消费>=3次 且 消费金额>=1000
|
||||
- 中等:30天内有消费
|
||||
- 低活跃:90天内有消费但30天内无消费
|
||||
- 流失:90天内无消费
|
||||
"""
|
||||
visit_90d = self.safe_int(stats.get('visit_count_90d', 0))
|
||||
visit_30d = self.safe_int(stats.get('visit_count_30d', 0))
|
||||
amount_90d = self.safe_decimal(stats.get('consume_amount_90d', 0))
|
||||
|
||||
if visit_90d >= 3 and amount_90d >= 1000:
|
||||
return "高价值"
|
||||
elif visit_30d > 0:
|
||||
return "中等"
|
||||
elif visit_90d > 0:
|
||||
return "低活跃"
|
||||
else:
|
||||
return "流失"
|
||||
|
||||
|
||||
# 便于外部导入
|
||||
__all__ = ['MemberConsumptionTask']
|
||||
Reference in New Issue
Block a user