Updata2
This commit is contained in:
55
etl_billiards/tasks/dws/__init__.py
Normal file
55
etl_billiards/tasks/dws/__init__.py
Normal file
@@ -0,0 +1,55 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
DWS层ETL任务模块
|
||||
|
||||
包含:
|
||||
- BaseDwsTask: DWS任务基类
|
||||
- 助教维度任务
|
||||
- 客户维度任务
|
||||
- 财务维度任务
|
||||
- 指数算法任务
|
||||
"""
|
||||
|
||||
from .base_dws_task import BaseDwsTask, TimeLayer, TimeWindow, CourseType, DiscountType
|
||||
from .assistant_daily_task import AssistantDailyTask
|
||||
from .assistant_monthly_task import AssistantMonthlyTask
|
||||
from .assistant_customer_task import AssistantCustomerTask
|
||||
from .assistant_salary_task import AssistantSalaryTask
|
||||
from .assistant_finance_task import AssistantFinanceTask
|
||||
from .member_consumption_task import MemberConsumptionTask
|
||||
from .member_visit_task import MemberVisitTask
|
||||
from .finance_daily_task import FinanceDailyTask
|
||||
from .finance_recharge_task import FinanceRechargeTask
|
||||
from .finance_income_task import FinanceIncomeStructureTask
|
||||
from .finance_discount_task import FinanceDiscountDetailTask
|
||||
from .retention_cleanup_task import DwsRetentionCleanupTask
|
||||
|
||||
# 指数算法任务
|
||||
from .index import RecallIndexTask, IntimacyIndexTask
|
||||
|
||||
__all__ = [
|
||||
# 基类
|
||||
"BaseDwsTask",
|
||||
"TimeLayer",
|
||||
"TimeWindow",
|
||||
"CourseType",
|
||||
"DiscountType",
|
||||
# 助教维度
|
||||
"AssistantDailyTask",
|
||||
"AssistantMonthlyTask",
|
||||
"AssistantCustomerTask",
|
||||
"AssistantSalaryTask",
|
||||
"AssistantFinanceTask",
|
||||
# 客户维度
|
||||
"MemberConsumptionTask",
|
||||
"MemberVisitTask",
|
||||
# 财务维度
|
||||
"FinanceDailyTask",
|
||||
"FinanceRechargeTask",
|
||||
"FinanceIncomeStructureTask",
|
||||
"FinanceDiscountDetailTask",
|
||||
"DwsRetentionCleanupTask",
|
||||
# 指数算法
|
||||
"RecallIndexTask",
|
||||
"IntimacyIndexTask",
|
||||
]
|
||||
333
etl_billiards/tasks/dws/assistant_customer_task.py
Normal file
333
etl_billiards/tasks/dws/assistant_customer_task.py
Normal file
@@ -0,0 +1,333 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
助教服务客户统计任务
|
||||
|
||||
功能说明:
|
||||
以"助教+客户"为粒度,统计服务关系和滚动窗口指标
|
||||
|
||||
数据来源:
|
||||
- dwd_assistant_service_log: 助教服务流水
|
||||
- dim_member: 会员维度
|
||||
|
||||
目标表:
|
||||
billiards_dws.dws_assistant_customer_stats
|
||||
|
||||
更新策略:
|
||||
- 更新频率:每日更新
|
||||
- 幂等方式:delete-before-insert(按统计日期)
|
||||
|
||||
业务规则:
|
||||
- 散客处理:member_id=0 不进入此表统计
|
||||
- 滚动窗口:7/10/15/30/60/90天
|
||||
- 活跃度:近7天/30天是否有服务
|
||||
|
||||
作者:ETL团队
|
||||
创建日期:2026-02-01
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import date, datetime, timedelta
|
||||
from decimal import Decimal
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple
|
||||
|
||||
from .base_dws_task import BaseDwsTask, TaskContext
|
||||
|
||||
|
||||
class AssistantCustomerTask(BaseDwsTask):
|
||||
"""
|
||||
助教服务客户统计任务
|
||||
|
||||
统计每个助教与每个客户的服务关系:
|
||||
- 首次/最近服务日期
|
||||
- 累计服务统计
|
||||
- 滚动窗口统计(7/10/15/30/60/90天)
|
||||
- 活跃度指标
|
||||
"""
|
||||
|
||||
def get_task_code(self) -> str:
|
||||
return "DWS_ASSISTANT_CUSTOMER"
|
||||
|
||||
def get_target_table(self) -> str:
|
||||
return "dws_assistant_customer_stats"
|
||||
|
||||
def get_primary_keys(self) -> List[str]:
|
||||
return ["site_id", "assistant_id", "member_id", "stat_date"]
|
||||
|
||||
# ==========================================================================
|
||||
# ETL主流程
|
||||
# ==========================================================================
|
||||
|
||||
def extract(self, context: TaskContext) -> Dict[str, Any]:
|
||||
"""
|
||||
提取数据
|
||||
"""
|
||||
stat_date = context.window_end.date() if hasattr(context.window_end, 'date') else context.window_end
|
||||
site_id = context.store_id
|
||||
|
||||
self.logger.info(
|
||||
"%s: 提取数据,统计日期 %s",
|
||||
self.get_task_code(), stat_date
|
||||
)
|
||||
|
||||
# 计算最大回溯日期(90天窗口)
|
||||
lookback_start = stat_date - timedelta(days=90)
|
||||
|
||||
# 1. 获取助教-客户服务记录(包含历史全量用于累计统计)
|
||||
service_pairs = self._extract_service_pairs(site_id, stat_date)
|
||||
|
||||
# 2. 获取会员信息
|
||||
member_info = self._extract_member_info(site_id)
|
||||
|
||||
# 3. 获取助教信息
|
||||
assistant_info = self._extract_assistant_info(site_id)
|
||||
|
||||
return {
|
||||
'service_pairs': service_pairs,
|
||||
'member_info': member_info,
|
||||
'assistant_info': assistant_info,
|
||||
'stat_date': stat_date,
|
||||
'site_id': site_id
|
||||
}
|
||||
|
||||
def transform(self, extracted: Dict[str, Any], context: TaskContext) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
转换数据:计算各窗口统计
|
||||
"""
|
||||
service_pairs = extracted['service_pairs']
|
||||
member_info = extracted['member_info']
|
||||
assistant_info = extracted['assistant_info']
|
||||
stat_date = extracted['stat_date']
|
||||
site_id = extracted['site_id']
|
||||
|
||||
self.logger.info(
|
||||
"%s: 转换数据,%d 条服务关系记录",
|
||||
self.get_task_code(), len(service_pairs)
|
||||
)
|
||||
|
||||
# 构建统计记录
|
||||
results = []
|
||||
|
||||
for pair in service_pairs:
|
||||
assistant_id = pair.get('assistant_id')
|
||||
member_id = pair.get('member_id')
|
||||
|
||||
# 跳过散客
|
||||
if self.is_guest(member_id):
|
||||
continue
|
||||
|
||||
asst_info = assistant_info.get(assistant_id, {})
|
||||
memb_info = member_info.get(member_id, {})
|
||||
|
||||
# 构建记录
|
||||
record = {
|
||||
'site_id': site_id,
|
||||
'tenant_id': self.config.get("app.tenant_id", site_id),
|
||||
'assistant_id': assistant_id,
|
||||
'assistant_nickname': asst_info.get('nickname', pair.get('assistant_nickname')),
|
||||
'member_id': member_id,
|
||||
'member_nickname': memb_info.get('nickname'),
|
||||
'member_mobile': self._mask_mobile(memb_info.get('mobile')),
|
||||
'stat_date': stat_date,
|
||||
# 全量累计统计
|
||||
'first_service_date': pair.get('first_service_date'),
|
||||
'last_service_date': pair.get('last_service_date'),
|
||||
'total_service_count': self.safe_int(pair.get('total_service_count', 0)),
|
||||
'total_service_hours': self.safe_decimal(pair.get('total_service_hours', 0)),
|
||||
'total_service_amount': self.safe_decimal(pair.get('total_service_amount', 0)),
|
||||
# 滚动窗口统计
|
||||
'service_count_7d': self.safe_int(pair.get('service_count_7d', 0)),
|
||||
'service_count_10d': self.safe_int(pair.get('service_count_10d', 0)),
|
||||
'service_count_15d': self.safe_int(pair.get('service_count_15d', 0)),
|
||||
'service_count_30d': self.safe_int(pair.get('service_count_30d', 0)),
|
||||
'service_count_60d': self.safe_int(pair.get('service_count_60d', 0)),
|
||||
'service_count_90d': self.safe_int(pair.get('service_count_90d', 0)),
|
||||
'service_hours_7d': self.safe_decimal(pair.get('service_hours_7d', 0)),
|
||||
'service_hours_10d': self.safe_decimal(pair.get('service_hours_10d', 0)),
|
||||
'service_hours_15d': self.safe_decimal(pair.get('service_hours_15d', 0)),
|
||||
'service_hours_30d': self.safe_decimal(pair.get('service_hours_30d', 0)),
|
||||
'service_hours_60d': self.safe_decimal(pair.get('service_hours_60d', 0)),
|
||||
'service_hours_90d': self.safe_decimal(pair.get('service_hours_90d', 0)),
|
||||
'service_amount_7d': self.safe_decimal(pair.get('service_amount_7d', 0)),
|
||||
'service_amount_10d': self.safe_decimal(pair.get('service_amount_10d', 0)),
|
||||
'service_amount_15d': self.safe_decimal(pair.get('service_amount_15d', 0)),
|
||||
'service_amount_30d': self.safe_decimal(pair.get('service_amount_30d', 0)),
|
||||
'service_amount_60d': self.safe_decimal(pair.get('service_amount_60d', 0)),
|
||||
'service_amount_90d': self.safe_decimal(pair.get('service_amount_90d', 0)),
|
||||
# 活跃度指标
|
||||
'days_since_last': self._calc_days_since(stat_date, pair.get('last_service_date')),
|
||||
'is_active_7d': self.safe_int(pair.get('service_count_7d', 0)) > 0,
|
||||
'is_active_30d': self.safe_int(pair.get('service_count_30d', 0)) > 0,
|
||||
}
|
||||
results.append(record)
|
||||
|
||||
return results
|
||||
|
||||
def load(self, transformed: List[Dict[str, Any]], context: TaskContext) -> Dict:
|
||||
"""
|
||||
加载数据
|
||||
"""
|
||||
if not transformed:
|
||||
self.logger.info("%s: 无数据需要写入", self.get_task_code())
|
||||
return {"counts": {"fetched": 0, "inserted": 0, "updated": 0, "skipped": 0, "errors": 0}}
|
||||
|
||||
# 删除已存在的数据
|
||||
deleted = self.delete_existing_data(context, date_col="stat_date")
|
||||
|
||||
# 批量插入
|
||||
inserted = self.bulk_insert(transformed)
|
||||
|
||||
self.logger.info(
|
||||
"%s: 加载完成,删除 %d 行,插入 %d 行",
|
||||
self.get_task_code(), deleted, inserted
|
||||
)
|
||||
|
||||
return {
|
||||
"counts": {
|
||||
"fetched": len(transformed),
|
||||
"inserted": inserted,
|
||||
"updated": 0,
|
||||
"skipped": 0,
|
||||
"errors": 0
|
||||
},
|
||||
"extra": {"deleted": deleted}
|
||||
}
|
||||
|
||||
# ==========================================================================
|
||||
# 数据提取方法
|
||||
# ==========================================================================
|
||||
|
||||
def _extract_service_pairs(
|
||||
self,
|
||||
site_id: int,
|
||||
stat_date: date
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
提取助教-客户服务统计(含滚动窗口)
|
||||
"""
|
||||
sql = """
|
||||
WITH service_base AS (
|
||||
SELECT
|
||||
site_assistant_id AS assistant_id,
|
||||
nickname AS assistant_nickname,
|
||||
tenant_member_id AS member_id,
|
||||
DATE(start_use_time) AS service_date,
|
||||
income_seconds,
|
||||
ledger_amount
|
||||
FROM billiards_dwd.dwd_assistant_service_log
|
||||
WHERE site_id = %s
|
||||
AND tenant_member_id IS NOT NULL
|
||||
AND tenant_member_id != 0
|
||||
)
|
||||
SELECT
|
||||
assistant_id,
|
||||
MAX(assistant_nickname) AS assistant_nickname,
|
||||
member_id,
|
||||
MIN(service_date) AS first_service_date,
|
||||
MAX(service_date) AS last_service_date,
|
||||
-- 全量累计
|
||||
COUNT(*) AS total_service_count,
|
||||
SUM(income_seconds) / 3600.0 AS total_service_hours,
|
||||
SUM(ledger_amount) AS total_service_amount,
|
||||
-- 7天窗口
|
||||
COUNT(CASE WHEN service_date >= %s - INTERVAL '6 days' THEN 1 END) AS service_count_7d,
|
||||
SUM(CASE WHEN service_date >= %s - INTERVAL '6 days' THEN income_seconds ELSE 0 END) / 3600.0 AS service_hours_7d,
|
||||
SUM(CASE WHEN service_date >= %s - INTERVAL '6 days' THEN ledger_amount ELSE 0 END) AS service_amount_7d,
|
||||
-- 10天窗口
|
||||
COUNT(CASE WHEN service_date >= %s - INTERVAL '9 days' THEN 1 END) AS service_count_10d,
|
||||
SUM(CASE WHEN service_date >= %s - INTERVAL '9 days' THEN income_seconds ELSE 0 END) / 3600.0 AS service_hours_10d,
|
||||
SUM(CASE WHEN service_date >= %s - INTERVAL '9 days' THEN ledger_amount ELSE 0 END) AS service_amount_10d,
|
||||
-- 15天窗口
|
||||
COUNT(CASE WHEN service_date >= %s - INTERVAL '14 days' THEN 1 END) AS service_count_15d,
|
||||
SUM(CASE WHEN service_date >= %s - INTERVAL '14 days' THEN income_seconds ELSE 0 END) / 3600.0 AS service_hours_15d,
|
||||
SUM(CASE WHEN service_date >= %s - INTERVAL '14 days' THEN ledger_amount ELSE 0 END) AS service_amount_15d,
|
||||
-- 30天窗口
|
||||
COUNT(CASE WHEN service_date >= %s - INTERVAL '29 days' THEN 1 END) AS service_count_30d,
|
||||
SUM(CASE WHEN service_date >= %s - INTERVAL '29 days' THEN income_seconds ELSE 0 END) / 3600.0 AS service_hours_30d,
|
||||
SUM(CASE WHEN service_date >= %s - INTERVAL '29 days' THEN ledger_amount ELSE 0 END) AS service_amount_30d,
|
||||
-- 60天窗口
|
||||
COUNT(CASE WHEN service_date >= %s - INTERVAL '59 days' THEN 1 END) AS service_count_60d,
|
||||
SUM(CASE WHEN service_date >= %s - INTERVAL '59 days' THEN income_seconds ELSE 0 END) / 3600.0 AS service_hours_60d,
|
||||
SUM(CASE WHEN service_date >= %s - INTERVAL '59 days' THEN ledger_amount ELSE 0 END) AS service_amount_60d,
|
||||
-- 90天窗口
|
||||
COUNT(CASE WHEN service_date >= %s - INTERVAL '89 days' THEN 1 END) AS service_count_90d,
|
||||
SUM(CASE WHEN service_date >= %s - INTERVAL '89 days' THEN income_seconds ELSE 0 END) / 3600.0 AS service_hours_90d,
|
||||
SUM(CASE WHEN service_date >= %s - INTERVAL '89 days' THEN ledger_amount ELSE 0 END) AS service_amount_90d
|
||||
FROM service_base
|
||||
GROUP BY assistant_id, member_id
|
||||
HAVING MAX(service_date) >= %s - INTERVAL '90 days'
|
||||
"""
|
||||
# 构建参数(每个窗口需要3个日期参数)
|
||||
params = [site_id]
|
||||
for _ in range(6): # 6个窗口,每个3个参数
|
||||
params.extend([stat_date, stat_date, stat_date])
|
||||
params.append(stat_date) # HAVING条件
|
||||
|
||||
rows = self.db.query(sql, tuple(params))
|
||||
return [dict(row) for row in rows] if rows else []
|
||||
|
||||
def _extract_member_info(self, site_id: int) -> Dict[int, Dict[str, Any]]:
|
||||
"""
|
||||
提取会员信息
|
||||
"""
|
||||
sql = """
|
||||
SELECT
|
||||
member_id,
|
||||
nickname,
|
||||
mobile
|
||||
FROM billiards_dwd.dim_member
|
||||
WHERE site_id = %s
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id,))
|
||||
|
||||
result = {}
|
||||
for row in (rows or []):
|
||||
row_dict = dict(row)
|
||||
result[row_dict['member_id']] = row_dict
|
||||
return result
|
||||
|
||||
def _extract_assistant_info(self, site_id: int) -> Dict[int, Dict[str, Any]]:
|
||||
"""
|
||||
提取助教信息
|
||||
"""
|
||||
sql = """
|
||||
SELECT
|
||||
site_assistant_id AS assistant_id,
|
||||
nickname
|
||||
FROM billiards_dwd.dim_assistant
|
||||
WHERE site_id = %s
|
||||
AND valid_to IS NULL
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id,))
|
||||
|
||||
result = {}
|
||||
for row in (rows or []):
|
||||
row_dict = dict(row)
|
||||
result[row_dict['assistant_id']] = row_dict
|
||||
return result
|
||||
|
||||
# ==========================================================================
|
||||
# 工具方法
|
||||
# ==========================================================================
|
||||
|
||||
def _mask_mobile(self, mobile: Optional[str]) -> Optional[str]:
|
||||
"""
|
||||
手机号脱敏
|
||||
"""
|
||||
if not mobile or len(mobile) < 7:
|
||||
return mobile
|
||||
return mobile[:3] + "****" + mobile[-4:]
|
||||
|
||||
def _calc_days_since(self, stat_date: date, last_date: Optional[date]) -> Optional[int]:
|
||||
"""
|
||||
计算距离最近服务的天数
|
||||
"""
|
||||
if not last_date:
|
||||
return None
|
||||
if isinstance(last_date, datetime):
|
||||
last_date = last_date.date()
|
||||
return (stat_date - last_date).days
|
||||
|
||||
|
||||
# 便于外部导入
|
||||
__all__ = ['AssistantCustomerTask']
|
||||
344
etl_billiards/tasks/dws/assistant_daily_task.py
Normal file
344
etl_billiards/tasks/dws/assistant_daily_task.py
Normal file
@@ -0,0 +1,344 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
助教日度业绩明细任务
|
||||
|
||||
功能说明:
|
||||
以"助教+日期"为粒度,汇总每日业绩明细
|
||||
|
||||
数据来源:
|
||||
- dwd_assistant_service_log: 助教服务流水
|
||||
- dwd_assistant_trash_event: 废除记录(排除)
|
||||
- dim_assistant: 助教维度(SCD2,获取当日等级)
|
||||
- cfg_skill_type: 技能→课程类型映射
|
||||
|
||||
目标表:
|
||||
billiards_dws.dws_assistant_daily_detail
|
||||
|
||||
更新策略:
|
||||
- 更新频率:每小时增量更新
|
||||
- 幂等方式:delete-before-insert(按日期窗口)
|
||||
|
||||
业务规则:
|
||||
- 有效业绩:需排除dwd_assistant_trash_event中的废除记录
|
||||
- 助教等级:使用SCD2 as-of取值,获取统计日当日生效的等级
|
||||
- 课程类型:通过skill_id映射,分为基础课和附加课
|
||||
|
||||
作者:ETL团队
|
||||
创建日期:2026-02-01
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import date, datetime, timedelta
|
||||
from decimal import Decimal
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple
|
||||
|
||||
from .base_dws_task import BaseDwsTask, CourseType, TaskContext
|
||||
|
||||
|
||||
class AssistantDailyTask(BaseDwsTask):
|
||||
"""
|
||||
助教日度业绩明细任务
|
||||
|
||||
汇总每个助教每天的:
|
||||
- 服务次数(总/基础课/附加课)
|
||||
- 计费时长(秒/小时)
|
||||
- 计费金额
|
||||
- 服务客户数(去重)
|
||||
- 服务台桌数(去重)
|
||||
- 被废除的记录统计
|
||||
"""
|
||||
|
||||
def get_task_code(self) -> str:
|
||||
return "DWS_ASSISTANT_DAILY"
|
||||
|
||||
def get_target_table(self) -> str:
|
||||
return "dws_assistant_daily_detail"
|
||||
|
||||
def get_primary_keys(self) -> List[str]:
|
||||
return ["site_id", "assistant_id", "stat_date"]
|
||||
|
||||
# ==========================================================================
|
||||
# ETL主流程
|
||||
# ==========================================================================
|
||||
|
||||
def extract(self, context: TaskContext) -> Dict[str, Any]:
|
||||
"""
|
||||
提取数据:从DWD层读取助教服务记录
|
||||
"""
|
||||
start_date = context.window_start.date() if hasattr(context.window_start, 'date') else context.window_start
|
||||
end_date = context.window_end.date() if hasattr(context.window_end, 'date') else context.window_end
|
||||
site_id = context.store_id
|
||||
|
||||
self.logger.info(
|
||||
"%s: 提取数据,日期范围 %s ~ %s",
|
||||
self.get_task_code(), start_date, end_date
|
||||
)
|
||||
|
||||
# 1. 获取助教服务记录
|
||||
service_records = self._extract_service_records(site_id, start_date, end_date)
|
||||
|
||||
# 2. 获取废除记录
|
||||
trash_records = self._extract_trash_records(site_id, start_date, end_date)
|
||||
|
||||
# 3. 加载配置缓存
|
||||
self.load_config_cache()
|
||||
|
||||
return {
|
||||
'service_records': service_records,
|
||||
'trash_records': trash_records,
|
||||
'start_date': start_date,
|
||||
'end_date': end_date,
|
||||
'site_id': site_id
|
||||
}
|
||||
|
||||
def transform(self, extracted: Dict[str, Any], context: TaskContext) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
转换数据:按助教+日期聚合
|
||||
"""
|
||||
service_records = extracted['service_records']
|
||||
trash_records = extracted['trash_records']
|
||||
site_id = extracted['site_id']
|
||||
|
||||
self.logger.info(
|
||||
"%s: 转换数据,服务记录 %d 条,废除记录 %d 条",
|
||||
self.get_task_code(), len(service_records), len(trash_records)
|
||||
)
|
||||
|
||||
# 构建废除记录索引(assistant_service_id -> trash_info)
|
||||
trash_index = self._build_trash_index(trash_records)
|
||||
|
||||
# 按助教+日期聚合
|
||||
aggregated = self._aggregate_by_assistant_date(
|
||||
service_records,
|
||||
trash_index,
|
||||
site_id
|
||||
)
|
||||
|
||||
return aggregated
|
||||
|
||||
def load(self, transformed: List[Dict[str, Any]], context: TaskContext) -> Dict:
|
||||
"""
|
||||
加载数据:写入DWS表
|
||||
"""
|
||||
if not transformed:
|
||||
self.logger.info("%s: 无数据需要写入", self.get_task_code())
|
||||
return {"counts": {"fetched": 0, "inserted": 0, "updated": 0, "skipped": 0, "errors": 0}}
|
||||
|
||||
# 删除已存在的数据(幂等)
|
||||
deleted = self.delete_existing_data(context, date_col="stat_date")
|
||||
|
||||
# 批量插入
|
||||
inserted = self.bulk_insert(transformed)
|
||||
|
||||
self.logger.info(
|
||||
"%s: 加载完成,删除 %d 行,插入 %d 行",
|
||||
self.get_task_code(), deleted, inserted
|
||||
)
|
||||
|
||||
return {
|
||||
"counts": {
|
||||
"fetched": len(transformed),
|
||||
"inserted": inserted,
|
||||
"updated": 0,
|
||||
"skipped": 0,
|
||||
"errors": 0
|
||||
},
|
||||
"extra": {"deleted": deleted}
|
||||
}
|
||||
|
||||
# ==========================================================================
|
||||
# 数据提取方法
|
||||
# ==========================================================================
|
||||
|
||||
def _extract_service_records(
|
||||
self,
|
||||
site_id: int,
|
||||
start_date: date,
|
||||
end_date: date
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
提取助教服务记录
|
||||
"""
|
||||
sql = """
|
||||
SELECT
|
||||
asl.assistant_service_id,
|
||||
asl.order_settle_id,
|
||||
asl.site_assistant_id AS assistant_id,
|
||||
asl.nickname AS assistant_nickname,
|
||||
asl.assistant_level,
|
||||
asl.skill_id,
|
||||
asl.skill_name,
|
||||
asl.tenant_member_id AS member_id,
|
||||
asl.site_table_id AS table_id,
|
||||
asl.income_seconds,
|
||||
asl.real_use_seconds,
|
||||
asl.ledger_amount,
|
||||
asl.ledger_unit_price,
|
||||
DATE(asl.start_use_time) AS service_date
|
||||
FROM billiards_dwd.dwd_assistant_service_log asl
|
||||
WHERE asl.site_id = %s
|
||||
AND DATE(asl.start_use_time) >= %s
|
||||
AND DATE(asl.start_use_time) <= %s
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, start_date, end_date))
|
||||
return [dict(row) for row in rows] if rows else []
|
||||
|
||||
def _extract_trash_records(
|
||||
self,
|
||||
site_id: int,
|
||||
start_date: date,
|
||||
end_date: date
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
提取废除记录
|
||||
|
||||
有效业绩的排除规则:仅对"助教废除表"的记录进行处理排除
|
||||
"""
|
||||
sql = """
|
||||
SELECT
|
||||
assistant_service_id,
|
||||
trash_seconds,
|
||||
trash_reason,
|
||||
trash_time
|
||||
FROM billiards_dwd.dwd_assistant_trash_event
|
||||
WHERE site_id = %s
|
||||
AND DATE(trash_time) >= %s
|
||||
AND DATE(trash_time) <= %s
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, start_date, end_date))
|
||||
return [dict(row) for row in rows] if rows else []
|
||||
|
||||
# ==========================================================================
|
||||
# 数据转换方法
|
||||
# ==========================================================================
|
||||
|
||||
def _build_trash_index(
|
||||
self,
|
||||
trash_records: List[Dict[str, Any]]
|
||||
) -> Dict[int, Dict[str, Any]]:
|
||||
"""
|
||||
构建废除记录索引
|
||||
"""
|
||||
index = {}
|
||||
for record in trash_records:
|
||||
service_id = record.get('assistant_service_id')
|
||||
if service_id:
|
||||
index[service_id] = record
|
||||
return index
|
||||
|
||||
def _aggregate_by_assistant_date(
|
||||
self,
|
||||
service_records: List[Dict[str, Any]],
|
||||
trash_index: Dict[int, Dict[str, Any]],
|
||||
site_id: int
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
按助教+日期聚合服务记录
|
||||
"""
|
||||
# 聚合字典:(assistant_id, service_date) -> aggregated_data
|
||||
agg_dict: Dict[Tuple[int, date], Dict[str, Any]] = {}
|
||||
|
||||
for record in service_records:
|
||||
assistant_id = record.get('assistant_id')
|
||||
service_date = record.get('service_date')
|
||||
|
||||
if not assistant_id or not service_date:
|
||||
continue
|
||||
|
||||
key = (assistant_id, service_date)
|
||||
|
||||
# 初始化聚合数据
|
||||
if key not in agg_dict:
|
||||
# 获取助教当日等级(SCD2 as-of)
|
||||
level_info = self.get_assistant_level_asof(assistant_id, service_date)
|
||||
|
||||
agg_dict[key] = {
|
||||
'site_id': site_id,
|
||||
'tenant_id': self.config.get("app.tenant_id", site_id),
|
||||
'assistant_id': assistant_id,
|
||||
'assistant_nickname': record.get('assistant_nickname'),
|
||||
'stat_date': service_date,
|
||||
'assistant_level_code': level_info.get('level_code') if level_info else record.get('assistant_level'),
|
||||
'assistant_level_name': level_info.get('level_name') if level_info else None,
|
||||
'total_service_count': 0,
|
||||
'base_service_count': 0,
|
||||
'bonus_service_count': 0,
|
||||
'total_seconds': 0,
|
||||
'base_seconds': 0,
|
||||
'bonus_seconds': 0,
|
||||
'total_hours': Decimal('0'),
|
||||
'base_hours': Decimal('0'),
|
||||
'bonus_hours': Decimal('0'),
|
||||
'total_ledger_amount': Decimal('0'),
|
||||
'base_ledger_amount': Decimal('0'),
|
||||
'bonus_ledger_amount': Decimal('0'),
|
||||
'unique_customers': set(),
|
||||
'unique_tables': set(),
|
||||
'trashed_seconds': 0,
|
||||
'trashed_count': 0,
|
||||
}
|
||||
|
||||
agg = agg_dict[key]
|
||||
|
||||
# 获取服务信息
|
||||
service_id = record.get('assistant_service_id')
|
||||
income_seconds = self.safe_int(record.get('income_seconds', 0))
|
||||
ledger_amount = self.safe_decimal(record.get('ledger_amount', 0))
|
||||
skill_id = record.get('skill_id')
|
||||
member_id = record.get('member_id')
|
||||
table_id = record.get('table_id')
|
||||
|
||||
# 判断课程类型
|
||||
course_type = self.get_course_type(skill_id) if skill_id else CourseType.BASE
|
||||
is_base = course_type == CourseType.BASE
|
||||
|
||||
# 检查是否被废除
|
||||
is_trashed = service_id in trash_index
|
||||
|
||||
if is_trashed:
|
||||
# 废除记录单独统计
|
||||
trash_info = trash_index[service_id]
|
||||
trash_seconds = self.safe_int(trash_info.get('trash_seconds', income_seconds))
|
||||
agg['trashed_seconds'] += trash_seconds
|
||||
agg['trashed_count'] += 1
|
||||
else:
|
||||
# 正常记录累加
|
||||
agg['total_service_count'] += 1
|
||||
agg['total_seconds'] += income_seconds
|
||||
agg['total_ledger_amount'] += ledger_amount
|
||||
|
||||
if is_base:
|
||||
agg['base_service_count'] += 1
|
||||
agg['base_seconds'] += income_seconds
|
||||
agg['base_ledger_amount'] += ledger_amount
|
||||
else:
|
||||
agg['bonus_service_count'] += 1
|
||||
agg['bonus_seconds'] += income_seconds
|
||||
agg['bonus_ledger_amount'] += ledger_amount
|
||||
|
||||
# 客户和台桌去重统计(不论是否废除)
|
||||
if member_id and not self.is_guest(member_id):
|
||||
agg['unique_customers'].add(member_id)
|
||||
if table_id:
|
||||
agg['unique_tables'].add(table_id)
|
||||
|
||||
# 转换为列表并计算派生字段
|
||||
result = []
|
||||
for key, agg in agg_dict.items():
|
||||
# 计算小时数
|
||||
agg['total_hours'] = self.seconds_to_hours(agg['total_seconds'])
|
||||
agg['base_hours'] = self.seconds_to_hours(agg['base_seconds'])
|
||||
agg['bonus_hours'] = self.seconds_to_hours(agg['bonus_seconds'])
|
||||
|
||||
# 转换set为count
|
||||
agg['unique_customers'] = len(agg['unique_customers'])
|
||||
agg['unique_tables'] = len(agg['unique_tables'])
|
||||
|
||||
result.append(agg)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
# 便于外部导入
|
||||
__all__ = ['AssistantDailyTask']
|
||||
199
etl_billiards/tasks/dws/assistant_finance_task.py
Normal file
199
etl_billiards/tasks/dws/assistant_finance_task.py
Normal file
@@ -0,0 +1,199 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
助教收支分析任务
|
||||
|
||||
功能说明:
|
||||
以"日期+助教"为粒度,分析助教产出的收入和成本
|
||||
|
||||
数据来源:
|
||||
- dwd_assistant_service_log: 助教服务流水(收入)
|
||||
- dws_assistant_salary_calc: 工资计算(成本)
|
||||
|
||||
目标表:
|
||||
billiards_dws.dws_assistant_finance_analysis
|
||||
|
||||
更新策略:
|
||||
- 更新频率:每日更新
|
||||
- 幂等方式:delete-before-insert(按日期)
|
||||
|
||||
作者:ETL团队
|
||||
创建日期:2026-02-01
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import date, datetime, timedelta
|
||||
from decimal import Decimal
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from .base_dws_task import BaseDwsTask, CourseType, TaskContext
|
||||
|
||||
|
||||
class AssistantFinanceTask(BaseDwsTask):
|
||||
"""
|
||||
助教收支分析任务
|
||||
"""
|
||||
|
||||
def get_task_code(self) -> str:
|
||||
return "DWS_ASSISTANT_FINANCE"
|
||||
|
||||
def get_target_table(self) -> str:
|
||||
return "dws_assistant_finance_analysis"
|
||||
|
||||
def get_primary_keys(self) -> List[str]:
|
||||
return ["site_id", "stat_date", "assistant_id"]
|
||||
|
||||
def extract(self, context: TaskContext) -> Dict[str, Any]:
|
||||
start_date = context.window_start.date() if hasattr(context.window_start, 'date') else context.window_start
|
||||
end_date = context.window_end.date() if hasattr(context.window_end, 'date') else context.window_end
|
||||
site_id = context.store_id
|
||||
|
||||
# 获取助教日度收入
|
||||
daily_revenue = self._extract_daily_revenue(site_id, start_date, end_date)
|
||||
|
||||
# 获取月度工资(用于计算日均成本)
|
||||
monthly_salary = self._extract_monthly_salary(site_id, start_date, end_date)
|
||||
|
||||
# 加载配置
|
||||
self.load_config_cache()
|
||||
|
||||
return {
|
||||
'daily_revenue': daily_revenue,
|
||||
'monthly_salary': monthly_salary,
|
||||
'start_date': start_date,
|
||||
'end_date': end_date,
|
||||
'site_id': site_id
|
||||
}
|
||||
|
||||
def transform(self, extracted: Dict[str, Any], context: TaskContext) -> List[Dict[str, Any]]:
|
||||
daily_revenue = extracted['daily_revenue']
|
||||
monthly_salary = extracted['monthly_salary']
|
||||
site_id = extracted['site_id']
|
||||
|
||||
# 构建月度工资索引
|
||||
salary_index = {}
|
||||
for sal in monthly_salary:
|
||||
asst_id = sal.get('assistant_id')
|
||||
month = sal.get('salary_month')
|
||||
if asst_id and month:
|
||||
salary_index[(asst_id, month)] = sal
|
||||
|
||||
results = []
|
||||
for rev in daily_revenue:
|
||||
assistant_id = rev.get('assistant_id')
|
||||
stat_date = rev.get('stat_date')
|
||||
|
||||
# 获取对应月份的工资
|
||||
month_start = stat_date.replace(day=1) if isinstance(stat_date, date) else None
|
||||
salary = salary_index.get((assistant_id, month_start), {})
|
||||
|
||||
# 计算日均成本
|
||||
gross_salary = self.safe_decimal(salary.get('gross_salary', 0))
|
||||
work_days = self.safe_int(salary.get('work_days', 1)) or 1
|
||||
cost_daily = gross_salary / Decimal(str(work_days))
|
||||
|
||||
revenue_total = self.safe_decimal(rev.get('revenue_total', 0))
|
||||
gross_profit = revenue_total - cost_daily
|
||||
gross_margin = gross_profit / revenue_total if revenue_total > 0 else Decimal('0')
|
||||
|
||||
record = {
|
||||
'site_id': site_id,
|
||||
'tenant_id': self.config.get("app.tenant_id", site_id),
|
||||
'stat_date': stat_date,
|
||||
'assistant_id': assistant_id,
|
||||
'assistant_nickname': rev.get('assistant_nickname'),
|
||||
'revenue_total': revenue_total,
|
||||
'revenue_base': self.safe_decimal(rev.get('revenue_base', 0)),
|
||||
'revenue_bonus': self.safe_decimal(rev.get('revenue_bonus', 0)),
|
||||
'cost_daily': cost_daily,
|
||||
'gross_profit': gross_profit,
|
||||
'gross_margin': gross_margin,
|
||||
'service_count': self.safe_int(rev.get('service_count', 0)),
|
||||
'service_hours': self.safe_decimal(rev.get('service_hours', 0)),
|
||||
'unique_customers': self.safe_int(rev.get('unique_customers', 0)),
|
||||
}
|
||||
results.append(record)
|
||||
|
||||
return results
|
||||
|
||||
def load(self, transformed: List[Dict[str, Any]], context: TaskContext) -> Dict:
|
||||
if not transformed:
|
||||
return {"counts": {"fetched": 0, "inserted": 0, "updated": 0, "skipped": 0, "errors": 0}}
|
||||
|
||||
deleted = self.delete_existing_data(context, date_col="stat_date")
|
||||
inserted = self.bulk_insert(transformed)
|
||||
|
||||
return {
|
||||
"counts": {"fetched": len(transformed), "inserted": inserted, "updated": 0, "skipped": 0, "errors": 0},
|
||||
"extra": {"deleted": deleted}
|
||||
}
|
||||
|
||||
def _extract_daily_revenue(self, site_id: int, start_date: date, end_date: date) -> List[Dict[str, Any]]:
|
||||
# 基础课skill_id
|
||||
BASE_SKILL_ID = 2791903611396869
|
||||
|
||||
sql = """
|
||||
SELECT
|
||||
DATE(start_use_time) AS stat_date,
|
||||
site_assistant_id AS assistant_id,
|
||||
MAX(nickname) AS assistant_nickname,
|
||||
COUNT(*) AS service_count,
|
||||
SUM(income_seconds) / 3600.0 AS service_hours,
|
||||
SUM(ledger_amount) AS revenue_total,
|
||||
SUM(CASE WHEN skill_id = %s THEN ledger_amount ELSE 0 END) AS revenue_base,
|
||||
SUM(CASE WHEN skill_id != %s THEN ledger_amount ELSE 0 END) AS revenue_bonus,
|
||||
COUNT(DISTINCT tenant_member_id) AS unique_customers
|
||||
FROM billiards_dwd.dwd_assistant_service_log
|
||||
WHERE site_id = %s
|
||||
AND DATE(start_use_time) >= %s
|
||||
AND DATE(start_use_time) <= %s
|
||||
GROUP BY DATE(start_use_time), site_assistant_id
|
||||
"""
|
||||
rows = self.db.query(sql, (BASE_SKILL_ID, BASE_SKILL_ID, site_id, start_date, end_date))
|
||||
return [dict(row) for row in rows] if rows else []
|
||||
|
||||
def _extract_monthly_salary(self, site_id: int, start_date: date, end_date: date) -> List[Dict[str, Any]]:
|
||||
# 获取涉及的月份
|
||||
month_start = start_date.replace(day=1)
|
||||
month_end = end_date.replace(day=1)
|
||||
|
||||
sql = """
|
||||
SELECT
|
||||
assistant_id,
|
||||
salary_month,
|
||||
gross_salary,
|
||||
effective_hours
|
||||
FROM billiards_dws.dws_assistant_salary_calc
|
||||
WHERE site_id = %s
|
||||
AND salary_month >= %s
|
||||
AND salary_month <= %s
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, month_start, month_end))
|
||||
|
||||
# 获取每月工作天数
|
||||
work_days_sql = """
|
||||
SELECT
|
||||
assistant_id,
|
||||
DATE_TRUNC('month', stat_date)::DATE AS month,
|
||||
COUNT(DISTINCT stat_date) AS work_days
|
||||
FROM billiards_dws.dws_assistant_daily_detail
|
||||
WHERE site_id = %s
|
||||
AND stat_date >= %s
|
||||
AND stat_date <= %s
|
||||
GROUP BY assistant_id, DATE_TRUNC('month', stat_date)
|
||||
"""
|
||||
work_days_rows = self.db.query(work_days_sql, (site_id, start_date, end_date))
|
||||
work_days_index = {(r['assistant_id'], r['month']): r['work_days'] for r in (work_days_rows or [])}
|
||||
|
||||
results = []
|
||||
for row in (rows or []):
|
||||
row_dict = dict(row)
|
||||
asst_id = row_dict.get('assistant_id')
|
||||
month = row_dict.get('salary_month')
|
||||
row_dict['work_days'] = work_days_index.get((asst_id, month), 20)
|
||||
results.append(row_dict)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
__all__ = ['AssistantFinanceTask']
|
||||
444
etl_billiards/tasks/dws/assistant_monthly_task.py
Normal file
444
etl_billiards/tasks/dws/assistant_monthly_task.py
Normal file
@@ -0,0 +1,444 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
助教月度业绩汇总任务
|
||||
|
||||
功能说明:
|
||||
以"助教+月份"为粒度,汇总月度业绩及档位计算
|
||||
|
||||
数据来源:
|
||||
- dws_assistant_daily_detail: 日度明细(聚合)
|
||||
- dim_assistant: 助教维度(入职日期、等级)
|
||||
- cfg_performance_tier: 绩效档位配置
|
||||
|
||||
目标表:
|
||||
billiards_dws.dws_assistant_monthly_summary
|
||||
|
||||
更新策略:
|
||||
- 更新频率:每日更新当月数据
|
||||
- 幂等方式:delete-before-insert(按月份)
|
||||
|
||||
业务规则:
|
||||
- 新入职判断:入职日期在月1日0点之后则为新入职
|
||||
- 有效业绩:total_hours - trashed_hours
|
||||
- 档位匹配:根据有效业绩小时数匹配cfg_performance_tier
|
||||
- 排名计算:按有效业绩小时数降序,考虑并列(如2个第一则都是1,下一个是3)
|
||||
|
||||
作者:ETL团队
|
||||
创建日期:2026-02-01
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import date, datetime, timedelta
|
||||
from decimal import Decimal
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple
|
||||
|
||||
from .base_dws_task import BaseDwsTask, TaskContext
|
||||
|
||||
|
||||
class AssistantMonthlyTask(BaseDwsTask):
|
||||
"""
|
||||
助教月度业绩汇总任务
|
||||
|
||||
汇总每个助教每月的:
|
||||
- 工作天数、服务次数、时长
|
||||
- 有效业绩(扣除废除记录后)
|
||||
- 档位匹配
|
||||
- 月度排名(用于Top3奖金)
|
||||
"""
|
||||
|
||||
def get_task_code(self) -> str:
|
||||
return "DWS_ASSISTANT_MONTHLY"
|
||||
|
||||
def get_target_table(self) -> str:
|
||||
return "dws_assistant_monthly_summary"
|
||||
|
||||
def get_primary_keys(self) -> List[str]:
|
||||
return ["site_id", "assistant_id", "stat_month"]
|
||||
|
||||
# ==========================================================================
|
||||
# ETL主流程
|
||||
# ==========================================================================
|
||||
|
||||
def extract(self, context: TaskContext) -> Dict[str, Any]:
|
||||
"""
|
||||
提取数据:从日度明细表聚合
|
||||
"""
|
||||
# 确定月份范围
|
||||
start_date = context.window_start.date() if hasattr(context.window_start, 'date') else context.window_start
|
||||
end_date = context.window_end.date() if hasattr(context.window_end, 'date') else context.window_end
|
||||
site_id = context.store_id
|
||||
|
||||
# 获取涉及的月份列表
|
||||
months = self._get_months_in_range(start_date, end_date)
|
||||
|
||||
self.logger.info(
|
||||
"%s: 提取数据,月份范围 %s",
|
||||
self.get_task_code(), [str(m) for m in months]
|
||||
)
|
||||
|
||||
# 1. 获取日度明细聚合数据
|
||||
daily_aggregates = self._extract_daily_aggregates(site_id, months)
|
||||
|
||||
# 2. 获取助教基本信息
|
||||
assistant_info = self._extract_assistant_info(site_id)
|
||||
|
||||
# 3. 加载配置缓存
|
||||
self.load_config_cache()
|
||||
|
||||
return {
|
||||
'daily_aggregates': daily_aggregates,
|
||||
'assistant_info': assistant_info,
|
||||
'months': months,
|
||||
'site_id': site_id
|
||||
}
|
||||
|
||||
def transform(self, extracted: Dict[str, Any], context: TaskContext) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
转换数据:计算月度汇总、档位匹配、排名
|
||||
"""
|
||||
daily_aggregates = extracted['daily_aggregates']
|
||||
assistant_info = extracted['assistant_info']
|
||||
months = extracted['months']
|
||||
site_id = extracted['site_id']
|
||||
|
||||
self.logger.info(
|
||||
"%s: 转换数据,%d 个月份,%d 条聚合记录",
|
||||
self.get_task_code(), len(months), len(daily_aggregates)
|
||||
)
|
||||
|
||||
# 按月份处理
|
||||
all_results = []
|
||||
for month in months:
|
||||
month_results = self._process_month(
|
||||
daily_aggregates,
|
||||
assistant_info,
|
||||
month,
|
||||
site_id
|
||||
)
|
||||
all_results.extend(month_results)
|
||||
|
||||
return all_results
|
||||
|
||||
def load(self, transformed: List[Dict[str, Any]], context: TaskContext) -> Dict:
|
||||
"""
|
||||
加载数据:写入DWS表
|
||||
"""
|
||||
if not transformed:
|
||||
self.logger.info("%s: 无数据需要写入", self.get_task_code())
|
||||
return {"counts": {"fetched": 0, "inserted": 0, "updated": 0, "skipped": 0, "errors": 0}}
|
||||
|
||||
# 删除已存在的数据(按月份)
|
||||
deleted = self._delete_by_months(context, transformed)
|
||||
|
||||
# 批量插入
|
||||
inserted = self.bulk_insert(transformed)
|
||||
|
||||
self.logger.info(
|
||||
"%s: 加载完成,删除 %d 行,插入 %d 行",
|
||||
self.get_task_code(), deleted, inserted
|
||||
)
|
||||
|
||||
return {
|
||||
"counts": {
|
||||
"fetched": len(transformed),
|
||||
"inserted": inserted,
|
||||
"updated": 0,
|
||||
"skipped": 0,
|
||||
"errors": 0
|
||||
},
|
||||
"extra": {"deleted": deleted}
|
||||
}
|
||||
|
||||
# ==========================================================================
|
||||
# 数据提取方法
|
||||
# ==========================================================================
|
||||
|
||||
def _get_months_in_range(self, start_date: date, end_date: date) -> List[date]:
|
||||
"""
|
||||
获取日期范围内的所有月份(月第一天)
|
||||
"""
|
||||
months = []
|
||||
current = start_date.replace(day=1)
|
||||
end_month = end_date.replace(day=1)
|
||||
|
||||
while current <= end_month:
|
||||
months.append(current)
|
||||
# 下个月
|
||||
if current.month == 12:
|
||||
current = current.replace(year=current.year + 1, month=1)
|
||||
else:
|
||||
current = current.replace(month=current.month + 1)
|
||||
|
||||
return months
|
||||
|
||||
def _extract_daily_aggregates(
|
||||
self,
|
||||
site_id: int,
|
||||
months: List[date]
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
从日度明细表提取并按月聚合
|
||||
"""
|
||||
if not months:
|
||||
return []
|
||||
|
||||
# 构建月份条件
|
||||
month_conditions = []
|
||||
for month in months:
|
||||
next_month = (month.replace(day=28) + timedelta(days=4)).replace(day=1)
|
||||
month_conditions.append(f"(stat_date >= '{month}' AND stat_date < '{next_month}')")
|
||||
|
||||
month_where = " OR ".join(month_conditions)
|
||||
|
||||
sql = f"""
|
||||
SELECT
|
||||
assistant_id,
|
||||
assistant_nickname,
|
||||
assistant_level_code,
|
||||
assistant_level_name,
|
||||
DATE_TRUNC('month', stat_date)::DATE AS stat_month,
|
||||
COUNT(DISTINCT stat_date) AS work_days,
|
||||
SUM(total_service_count) AS total_service_count,
|
||||
SUM(base_service_count) AS base_service_count,
|
||||
SUM(bonus_service_count) AS bonus_service_count,
|
||||
SUM(total_hours) AS total_hours,
|
||||
SUM(base_hours) AS base_hours,
|
||||
SUM(bonus_hours) AS bonus_hours,
|
||||
SUM(total_ledger_amount) AS total_ledger_amount,
|
||||
SUM(base_ledger_amount) AS base_ledger_amount,
|
||||
SUM(bonus_ledger_amount) AS bonus_ledger_amount,
|
||||
SUM(unique_customers) AS total_unique_customers,
|
||||
SUM(unique_tables) AS total_unique_tables,
|
||||
SUM(trashed_seconds) AS trashed_seconds,
|
||||
SUM(trashed_count) AS trashed_count
|
||||
FROM billiards_dws.dws_assistant_daily_detail
|
||||
WHERE site_id = %s AND ({month_where})
|
||||
GROUP BY assistant_id, assistant_nickname, assistant_level_code, assistant_level_name,
|
||||
DATE_TRUNC('month', stat_date)
|
||||
"""
|
||||
|
||||
rows = self.db.query(sql, (site_id,))
|
||||
return [dict(row) for row in rows] if rows else []
|
||||
|
||||
def _extract_assistant_info(self, site_id: int) -> Dict[int, Dict[str, Any]]:
|
||||
"""
|
||||
提取助教基本信息
|
||||
"""
|
||||
sql = """
|
||||
SELECT
|
||||
site_assistant_id AS assistant_id,
|
||||
nickname,
|
||||
assistant_level,
|
||||
entry_date AS hire_date
|
||||
FROM billiards_dwd.dim_assistant
|
||||
WHERE site_id = %s
|
||||
AND valid_to IS NULL -- 当前有效记录
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id,))
|
||||
|
||||
result = {}
|
||||
for row in (rows or []):
|
||||
row_dict = dict(row)
|
||||
result[row_dict['assistant_id']] = row_dict
|
||||
return result
|
||||
|
||||
# ==========================================================================
|
||||
# 数据转换方法
|
||||
# ==========================================================================
|
||||
|
||||
def _process_month(
|
||||
self,
|
||||
daily_aggregates: List[Dict[str, Any]],
|
||||
assistant_info: Dict[int, Dict[str, Any]],
|
||||
month: date,
|
||||
site_id: int
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
处理单个月份的数据
|
||||
"""
|
||||
# 筛选该月份的数据
|
||||
month_data = [
|
||||
agg for agg in daily_aggregates
|
||||
if agg.get('stat_month') == month
|
||||
]
|
||||
|
||||
if not month_data:
|
||||
return []
|
||||
|
||||
# 构建月度汇总记录
|
||||
month_records = []
|
||||
|
||||
for agg in month_data:
|
||||
assistant_id = agg.get('assistant_id')
|
||||
asst_info = assistant_info.get(assistant_id, {})
|
||||
|
||||
# 计算有效业绩
|
||||
total_hours = self.safe_decimal(agg.get('total_hours', 0))
|
||||
trashed_hours = self.seconds_to_hours(self.safe_int(agg.get('trashed_seconds', 0)))
|
||||
effective_hours = total_hours - trashed_hours
|
||||
|
||||
# 判断是否新入职
|
||||
hire_date = asst_info.get('hire_date')
|
||||
is_new_hire = False
|
||||
if hire_date:
|
||||
if isinstance(hire_date, datetime):
|
||||
hire_date = hire_date.date()
|
||||
is_new_hire = self.is_new_hire_in_month(hire_date, month)
|
||||
|
||||
# 匹配档位
|
||||
tier_hours = effective_hours
|
||||
max_tier_level = None
|
||||
if is_new_hire:
|
||||
tier_hours = self._calc_new_hire_tier_hours(effective_hours, self.safe_int(agg.get('work_days', 0)))
|
||||
if hire_date and hire_date.day > 25:
|
||||
max_tier_level = 3
|
||||
tier = self.get_performance_tier(
|
||||
tier_hours,
|
||||
is_new_hire,
|
||||
effective_date=month,
|
||||
max_tier_level=max_tier_level
|
||||
)
|
||||
|
||||
# 获取月末的等级信息(用于记录)
|
||||
month_end = self._get_month_end(month)
|
||||
level_info = self.get_assistant_level_asof(assistant_id, month_end)
|
||||
|
||||
record = {
|
||||
'site_id': site_id,
|
||||
'tenant_id': self.config.get("app.tenant_id", site_id),
|
||||
'assistant_id': assistant_id,
|
||||
'assistant_nickname': agg.get('assistant_nickname'),
|
||||
'stat_month': month,
|
||||
'assistant_level_code': level_info.get('level_code') if level_info else agg.get('assistant_level_code'),
|
||||
'assistant_level_name': level_info.get('level_name') if level_info else agg.get('assistant_level_name'),
|
||||
'hire_date': hire_date,
|
||||
'is_new_hire': is_new_hire,
|
||||
'work_days': self.safe_int(agg.get('work_days', 0)),
|
||||
'total_service_count': self.safe_int(agg.get('total_service_count', 0)),
|
||||
'base_service_count': self.safe_int(agg.get('base_service_count', 0)),
|
||||
'bonus_service_count': self.safe_int(agg.get('bonus_service_count', 0)),
|
||||
'total_hours': total_hours,
|
||||
'base_hours': self.safe_decimal(agg.get('base_hours', 0)),
|
||||
'bonus_hours': self.safe_decimal(agg.get('bonus_hours', 0)),
|
||||
'effective_hours': effective_hours,
|
||||
'trashed_hours': trashed_hours,
|
||||
'total_ledger_amount': self.safe_decimal(agg.get('total_ledger_amount', 0)),
|
||||
'base_ledger_amount': self.safe_decimal(agg.get('base_ledger_amount', 0)),
|
||||
'bonus_ledger_amount': self.safe_decimal(agg.get('bonus_ledger_amount', 0)),
|
||||
'unique_customers': self.safe_int(agg.get('total_unique_customers', 0)),
|
||||
'unique_tables': self.safe_int(agg.get('total_unique_tables', 0)),
|
||||
'avg_service_seconds': self._calc_avg_service_seconds(agg),
|
||||
'tier_id': tier.get('tier_id') if tier else None,
|
||||
'tier_code': tier.get('tier_code') if tier else None,
|
||||
'tier_name': tier.get('tier_name') if tier else None,
|
||||
'rank_by_hours': None, # 后面计算
|
||||
'rank_with_ties': None, # 后面计算
|
||||
}
|
||||
month_records.append(record)
|
||||
|
||||
# 计算排名
|
||||
self._calculate_ranks(month_records)
|
||||
|
||||
return month_records
|
||||
|
||||
def _get_month_end(self, month: date) -> date:
|
||||
"""
|
||||
获取月末日期
|
||||
"""
|
||||
if month.month == 12:
|
||||
next_month = month.replace(year=month.year + 1, month=1, day=1)
|
||||
else:
|
||||
next_month = month.replace(month=month.month + 1, day=1)
|
||||
return next_month - timedelta(days=1)
|
||||
|
||||
def _calc_avg_service_seconds(self, agg: Dict[str, Any]) -> Decimal:
|
||||
"""
|
||||
计算平均单次服务时长
|
||||
"""
|
||||
total_count = self.safe_int(agg.get('total_service_count', 0))
|
||||
if total_count == 0:
|
||||
return Decimal('0')
|
||||
|
||||
total_hours = self.safe_decimal(agg.get('total_hours', 0))
|
||||
total_seconds = total_hours * Decimal('3600')
|
||||
return total_seconds / Decimal(str(total_count))
|
||||
|
||||
def _calc_new_hire_tier_hours(self, effective_hours: Decimal, work_days: int) -> Decimal:
|
||||
"""
|
||||
新入职定档:日均 * 30(仅用于定档,不影响奖金与排名)
|
||||
"""
|
||||
if work_days <= 0:
|
||||
return Decimal('0')
|
||||
return (effective_hours / Decimal(str(work_days))) * Decimal('30')
|
||||
|
||||
def _calculate_ranks(self, records: List[Dict[str, Any]]) -> None:
|
||||
"""
|
||||
计算排名(考虑并列)
|
||||
|
||||
Top3排名口径:按有效业绩总小时数排名,
|
||||
如遇并列则都算,比如2个第一,则记为2个第一,一个第三
|
||||
"""
|
||||
if not records:
|
||||
return
|
||||
|
||||
# 按有效业绩降序排序
|
||||
sorted_records = sorted(
|
||||
records,
|
||||
key=lambda x: x.get('effective_hours', Decimal('0')),
|
||||
reverse=True
|
||||
)
|
||||
|
||||
# 计算考虑并列的排名
|
||||
values = [
|
||||
(r.get('assistant_id'), r.get('effective_hours', Decimal('0')))
|
||||
for r in sorted_records
|
||||
]
|
||||
ranked = self.calculate_rank_with_ties(values)
|
||||
|
||||
# 创建排名映射
|
||||
rank_map = {
|
||||
assistant_id: (rank, dense_rank)
|
||||
for assistant_id, rank, dense_rank in ranked
|
||||
}
|
||||
|
||||
# 更新记录
|
||||
for record in records:
|
||||
assistant_id = record.get('assistant_id')
|
||||
if assistant_id in rank_map:
|
||||
rank, _ = rank_map[assistant_id]
|
||||
record['rank_by_hours'] = rank
|
||||
record['rank_with_ties'] = rank # 使用考虑并列的排名
|
||||
|
||||
def _delete_by_months(
|
||||
self,
|
||||
context: TaskContext,
|
||||
records: List[Dict[str, Any]]
|
||||
) -> int:
|
||||
"""
|
||||
按月份删除已存在的数据
|
||||
"""
|
||||
# 获取涉及的月份
|
||||
months = set(r.get('stat_month') for r in records if r.get('stat_month'))
|
||||
|
||||
if not months:
|
||||
return 0
|
||||
|
||||
target_table = self.get_target_table()
|
||||
full_table = f"{self.DWS_SCHEMA}.{target_table}"
|
||||
|
||||
total_deleted = 0
|
||||
with self.db.conn.cursor() as cur:
|
||||
for month in months:
|
||||
sql = f"""
|
||||
DELETE FROM {full_table}
|
||||
WHERE site_id = %s AND stat_month = %s
|
||||
"""
|
||||
cur.execute(sql, (context.store_id, month))
|
||||
total_deleted += cur.rowcount
|
||||
|
||||
return total_deleted
|
||||
|
||||
|
||||
# 便于外部导入
|
||||
__all__ = ['AssistantMonthlyTask']
|
||||
403
etl_billiards/tasks/dws/assistant_salary_task.py
Normal file
403
etl_billiards/tasks/dws/assistant_salary_task.py
Normal file
@@ -0,0 +1,403 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
助教工资计算任务
|
||||
|
||||
功能说明:
|
||||
以"助教+月份"为粒度,计算月度工资明细
|
||||
|
||||
数据来源:
|
||||
- dws_assistant_monthly_summary: 月度业绩汇总
|
||||
- dws_assistant_recharge_commission: 充值提成(Excel导入)
|
||||
- cfg_performance_tier: 绩效档位配置
|
||||
- cfg_assistant_level_price: 等级定价配置
|
||||
- cfg_bonus_rules: 奖金规则配置
|
||||
|
||||
目标表:
|
||||
billiards_dws.dws_assistant_salary_calc
|
||||
|
||||
更新策略:
|
||||
- 更新频率:月初计算上月工资
|
||||
- 幂等方式:delete-before-insert(按月份)
|
||||
|
||||
业务规则(来自DWS数据库处理需求.md):
|
||||
- 基础课收入 = 基础课小时数 × (客户支付价格 - 专业课抽成)
|
||||
例:中级助教基础课170小时,3档 = 170 × (108 - 13) = 16,150元
|
||||
- 附加课收入 = 附加课小时数 × 附加课价格 × (1 - 打赏课抽成比例)
|
||||
例:附加课15小时,3档 = 15 × 190 × (1 - 0.35) = 1,852.5元
|
||||
- 冲刺奖金:H>=190:300, H>=220:800(不累计,取最高档)
|
||||
- Top3奖金:1st:1000, 2nd:600, 3rd:400(并列都算)
|
||||
- 充值提成:来自dws_assistant_recharge_commission
|
||||
- SCD2口径:等级定价使用月份对应的历史值
|
||||
|
||||
作者:ETL团队
|
||||
创建日期:2026-02-01
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import date, datetime, timedelta
|
||||
from decimal import Decimal
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from .base_dws_task import BaseDwsTask, TaskContext
|
||||
|
||||
|
||||
class AssistantSalaryTask(BaseDwsTask):
|
||||
"""
|
||||
助教工资计算任务
|
||||
|
||||
计算每个助教每月的工资明细:
|
||||
- 课时收入(基础课+附加课)
|
||||
- 扣款(档位扣款+其他)
|
||||
- 奖金(档位奖金+冲刺+Top3+充值提成+其他)
|
||||
- 应发工资
|
||||
"""
|
||||
|
||||
def get_task_code(self) -> str:
|
||||
return "DWS_ASSISTANT_SALARY"
|
||||
|
||||
def get_target_table(self) -> str:
|
||||
return "dws_assistant_salary_calc"
|
||||
|
||||
def get_primary_keys(self) -> List[str]:
|
||||
return ["site_id", "assistant_id", "salary_month"]
|
||||
|
||||
# ==========================================================================
|
||||
# ETL主流程
|
||||
# ==========================================================================
|
||||
|
||||
def extract(self, context: TaskContext) -> Dict[str, Any]:
|
||||
"""
|
||||
提取数据
|
||||
"""
|
||||
# 确定工资月份(通常是上月)
|
||||
end_date = context.window_end.date() if hasattr(context.window_end, 'date') else context.window_end
|
||||
salary_month = self._get_salary_month(end_date)
|
||||
site_id = context.store_id
|
||||
|
||||
self.logger.info(
|
||||
"%s: 提取数据,工资月份 %s",
|
||||
self.get_task_code(), salary_month
|
||||
)
|
||||
|
||||
# 1. 获取月度业绩汇总
|
||||
monthly_summary = self._extract_monthly_summary(site_id, salary_month)
|
||||
|
||||
# 2. 获取充值提成
|
||||
recharge_commission = self._extract_recharge_commission(site_id, salary_month)
|
||||
|
||||
# 3. 加载配置缓存
|
||||
self.load_config_cache()
|
||||
|
||||
return {
|
||||
'monthly_summary': monthly_summary,
|
||||
'recharge_commission': recharge_commission,
|
||||
'salary_month': salary_month,
|
||||
'site_id': site_id
|
||||
}
|
||||
|
||||
def transform(self, extracted: Dict[str, Any], context: TaskContext) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
转换数据:计算工资
|
||||
"""
|
||||
monthly_summary = extracted['monthly_summary']
|
||||
recharge_commission = extracted['recharge_commission']
|
||||
salary_month = extracted['salary_month']
|
||||
site_id = extracted['site_id']
|
||||
|
||||
self.logger.info(
|
||||
"%s: 转换数据,%d 条月度汇总记录",
|
||||
self.get_task_code(), len(monthly_summary)
|
||||
)
|
||||
|
||||
# 构建充值提成索引
|
||||
commission_index = {}
|
||||
for comm in recharge_commission:
|
||||
asst_id = comm.get('assistant_id')
|
||||
if asst_id:
|
||||
commission_index[asst_id] = commission_index.get(asst_id, Decimal('0')) + \
|
||||
self.safe_decimal(comm.get('commission_amount', 0))
|
||||
|
||||
# 计算工资
|
||||
results = []
|
||||
for summary in monthly_summary:
|
||||
record = self._calculate_salary(summary, commission_index, salary_month, site_id)
|
||||
results.append(record)
|
||||
|
||||
return results
|
||||
|
||||
def load(self, transformed: List[Dict[str, Any]], context: TaskContext) -> Dict:
|
||||
"""
|
||||
加载数据
|
||||
"""
|
||||
if not transformed:
|
||||
self.logger.info("%s: 无数据需要写入", self.get_task_code())
|
||||
return {"counts": {"fetched": 0, "inserted": 0, "updated": 0, "skipped": 0, "errors": 0}}
|
||||
|
||||
# 删除已存在的数据
|
||||
deleted = self._delete_by_month(context, transformed)
|
||||
|
||||
# 批量插入
|
||||
inserted = self.bulk_insert(transformed)
|
||||
|
||||
self.logger.info(
|
||||
"%s: 加载完成,删除 %d 行,插入 %d 行",
|
||||
self.get_task_code(), deleted, inserted
|
||||
)
|
||||
|
||||
return {
|
||||
"counts": {
|
||||
"fetched": len(transformed),
|
||||
"inserted": inserted,
|
||||
"updated": 0,
|
||||
"skipped": 0,
|
||||
"errors": 0
|
||||
},
|
||||
"extra": {"deleted": deleted}
|
||||
}
|
||||
|
||||
# ==========================================================================
|
||||
# 数据提取方法
|
||||
# ==========================================================================
|
||||
|
||||
def _get_salary_month(self, end_date: date) -> date:
|
||||
"""
|
||||
获取工资月份(默认为上月)
|
||||
"""
|
||||
# 如果是月初,计算上月工资
|
||||
if end_date.day <= 5:
|
||||
if end_date.month == 1:
|
||||
return date(end_date.year - 1, 12, 1)
|
||||
else:
|
||||
return date(end_date.year, end_date.month - 1, 1)
|
||||
else:
|
||||
# 否则计算当月(可能是调整)
|
||||
return end_date.replace(day=1)
|
||||
|
||||
def _extract_monthly_summary(
|
||||
self,
|
||||
site_id: int,
|
||||
salary_month: date
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
提取月度业绩汇总
|
||||
"""
|
||||
sql = """
|
||||
SELECT
|
||||
assistant_id,
|
||||
assistant_nickname,
|
||||
stat_month,
|
||||
assistant_level_code,
|
||||
assistant_level_name,
|
||||
hire_date,
|
||||
is_new_hire,
|
||||
effective_hours,
|
||||
base_hours,
|
||||
bonus_hours,
|
||||
tier_id,
|
||||
tier_code,
|
||||
tier_name,
|
||||
rank_with_ties
|
||||
FROM billiards_dws.dws_assistant_monthly_summary
|
||||
WHERE site_id = %s AND stat_month = %s
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, salary_month))
|
||||
return [dict(row) for row in rows] if rows else []
|
||||
|
||||
def _extract_recharge_commission(
|
||||
self,
|
||||
site_id: int,
|
||||
salary_month: date
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
提取充值提成
|
||||
"""
|
||||
sql = """
|
||||
SELECT
|
||||
assistant_id,
|
||||
commission_amount
|
||||
FROM billiards_dws.dws_assistant_recharge_commission
|
||||
WHERE site_id = %s AND commission_month = %s
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, salary_month))
|
||||
return [dict(row) for row in rows] if rows else []
|
||||
|
||||
# ==========================================================================
|
||||
# 工资计算方法
|
||||
# ==========================================================================
|
||||
|
||||
def _calculate_salary(
|
||||
self,
|
||||
summary: Dict[str, Any],
|
||||
commission_index: Dict[int, Decimal],
|
||||
salary_month: date,
|
||||
site_id: int
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
计算单个助教的月度工资
|
||||
"""
|
||||
assistant_id = summary.get('assistant_id')
|
||||
level_code = summary.get('assistant_level_code')
|
||||
effective_hours = self.safe_decimal(summary.get('effective_hours', 0))
|
||||
base_hours = self.safe_decimal(summary.get('base_hours', 0))
|
||||
bonus_hours = self.safe_decimal(summary.get('bonus_hours', 0))
|
||||
is_new_hire = summary.get('is_new_hire', False)
|
||||
rank = summary.get('rank_with_ties')
|
||||
|
||||
# 获取等级定价(SCD2口径,按月份取值)
|
||||
# base_course_price: 客户支付价格(初级98/中级108/高级118/星级138)
|
||||
# bonus_course_price: 附加课客户支付价格(固定190元)
|
||||
level_price = self.get_level_price(level_code, salary_month)
|
||||
base_course_price = self.safe_decimal(
|
||||
level_price.get('base_course_price', 98) if level_price else 98
|
||||
)
|
||||
bonus_course_price = self.safe_decimal(
|
||||
level_price.get('bonus_course_price', 190) if level_price else 190
|
||||
)
|
||||
|
||||
# 获取档位配置
|
||||
# base_deduction: 专业课抽成(元/小时),球房从每小时扣除
|
||||
# bonus_deduction_ratio: 打赏课抽成比例,球房从附加课收入扣除的比例
|
||||
tier = self.get_performance_tier_by_id(summary.get('tier_id'), salary_month)
|
||||
if not tier:
|
||||
tier = self.get_performance_tier(
|
||||
effective_hours,
|
||||
is_new_hire,
|
||||
effective_date=salary_month
|
||||
)
|
||||
base_deduction = self.safe_decimal(tier.get('base_deduction', 18)) if tier else Decimal('18')
|
||||
bonus_deduction_ratio = self.safe_decimal(tier.get('bonus_deduction_ratio', 0.40)) if tier else Decimal('0.40')
|
||||
vacation_days = tier.get('vacation_days', 0) if tier else 0
|
||||
vacation_unlimited = tier.get('vacation_unlimited', False) if tier else False
|
||||
|
||||
# ============================================================
|
||||
# 工资计算公式(来自DWS数据库处理需求.md)
|
||||
# ============================================================
|
||||
# 基础课收入 = 基础课小时数 × (客户支付价格 - 专业课抽成)
|
||||
# 例:中级助教170小时,3档 = 170 × (108 - 13) = 16,150元
|
||||
base_income = base_hours * (base_course_price - base_deduction)
|
||||
|
||||
# 附加课收入 = 附加课小时数 × 附加课价格 × (1 - 打赏课抽成比例)
|
||||
# 例:15小时,3档 = 15 × 190 × (1 - 0.35) = 1,852.5元
|
||||
bonus_income = bonus_hours * bonus_course_price * (Decimal('1') - bonus_deduction_ratio)
|
||||
|
||||
# 课时收入合计
|
||||
total_course_income = base_income + bonus_income
|
||||
|
||||
# 计算冲刺奖金(H>=190:300, H>=220:800,不累计取最高)
|
||||
sprint_bonus = self.calculate_sprint_bonus(effective_hours, salary_month)
|
||||
|
||||
# 计算Top3排名奖金(1st:1000, 2nd:600, 3rd:400,并列都算)
|
||||
top_rank_bonus = Decimal('0')
|
||||
if rank and rank <= 3:
|
||||
top_rank_bonus = self.calculate_top_rank_bonus(rank, salary_month)
|
||||
|
||||
# 获取充值提成
|
||||
recharge_commission = commission_index.get(assistant_id, Decimal('0'))
|
||||
|
||||
# 汇总奖金
|
||||
other_bonus = Decimal('0') # 预留其他奖金
|
||||
total_bonus = sprint_bonus + top_rank_bonus + recharge_commission + other_bonus
|
||||
|
||||
# 计算应发工资 = 课时收入 + 奖金
|
||||
gross_salary = total_course_income + total_bonus
|
||||
|
||||
# 构建记录
|
||||
return {
|
||||
'site_id': site_id,
|
||||
'tenant_id': self.config.get("app.tenant_id", site_id),
|
||||
'assistant_id': assistant_id,
|
||||
'assistant_nickname': summary.get('assistant_nickname'),
|
||||
'salary_month': salary_month,
|
||||
'assistant_level_code': level_code,
|
||||
'assistant_level_name': summary.get('assistant_level_name'),
|
||||
'hire_date': summary.get('hire_date'),
|
||||
'is_new_hire': is_new_hire,
|
||||
'effective_hours': effective_hours,
|
||||
'base_hours': base_hours,
|
||||
'bonus_hours': bonus_hours,
|
||||
'tier_id': summary.get('tier_id'),
|
||||
'tier_code': tier.get('tier_code') if tier else None,
|
||||
'tier_name': tier.get('tier_name') if tier else None,
|
||||
'rank_with_ties': rank,
|
||||
# 定价信息
|
||||
'base_course_price': base_course_price,
|
||||
'bonus_course_price': bonus_course_price,
|
||||
'base_deduction': base_deduction,
|
||||
'bonus_deduction_ratio': bonus_deduction_ratio,
|
||||
# 收入明细
|
||||
'base_income': base_income,
|
||||
'bonus_income': bonus_income,
|
||||
'total_course_income': total_course_income,
|
||||
# 奖金明细
|
||||
'sprint_bonus': sprint_bonus,
|
||||
'top_rank_bonus': top_rank_bonus,
|
||||
'recharge_commission': recharge_commission,
|
||||
'other_bonus': other_bonus,
|
||||
'total_bonus': total_bonus,
|
||||
# 应发工资
|
||||
'gross_salary': gross_salary,
|
||||
# 假期
|
||||
'vacation_days': vacation_days,
|
||||
'vacation_unlimited': vacation_unlimited,
|
||||
'calc_notes': self._build_calc_notes(summary, tier, sprint_bonus, top_rank_bonus),
|
||||
}
|
||||
|
||||
def _build_calc_notes(
|
||||
self,
|
||||
summary: Dict[str, Any],
|
||||
tier: Optional[Dict[str, Any]],
|
||||
sprint_bonus: Decimal,
|
||||
top_rank_bonus: Decimal
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
构建计算备注
|
||||
"""
|
||||
notes = []
|
||||
|
||||
if summary.get('is_new_hire'):
|
||||
notes.append("新入职首月")
|
||||
|
||||
if tier:
|
||||
notes.append(f"档位: {tier.get('tier_name', 'N/A')}")
|
||||
|
||||
if sprint_bonus > 0:
|
||||
notes.append(f"冲刺奖金: {sprint_bonus}")
|
||||
|
||||
if top_rank_bonus > 0:
|
||||
rank = summary.get('rank_with_ties')
|
||||
notes.append(f"Top{rank}奖金: {top_rank_bonus}")
|
||||
|
||||
return "; ".join(notes) if notes else None
|
||||
|
||||
def _delete_by_month(
|
||||
self,
|
||||
context: TaskContext,
|
||||
records: List[Dict[str, Any]]
|
||||
) -> int:
|
||||
"""
|
||||
按月份删除已存在的数据
|
||||
"""
|
||||
months = set(r.get('salary_month') for r in records if r.get('salary_month'))
|
||||
|
||||
if not months:
|
||||
return 0
|
||||
|
||||
target_table = self.get_target_table()
|
||||
full_table = f"{self.DWS_SCHEMA}.{target_table}"
|
||||
|
||||
total_deleted = 0
|
||||
with self.db.conn.cursor() as cur:
|
||||
for month in months:
|
||||
sql = f"""
|
||||
DELETE FROM {full_table}
|
||||
WHERE site_id = %s AND salary_month = %s
|
||||
"""
|
||||
cur.execute(sql, (context.store_id, month))
|
||||
total_deleted += cur.rowcount
|
||||
|
||||
return total_deleted
|
||||
|
||||
|
||||
# 便于外部导入
|
||||
__all__ = ['AssistantSalaryTask']
|
||||
1223
etl_billiards/tasks/dws/base_dws_task.py
Normal file
1223
etl_billiards/tasks/dws/base_dws_task.py
Normal file
File diff suppressed because it is too large
Load Diff
574
etl_billiards/tasks/dws/finance_daily_task.py
Normal file
574
etl_billiards/tasks/dws/finance_daily_task.py
Normal file
@@ -0,0 +1,574 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
财务日度汇总任务
|
||||
|
||||
功能说明:
|
||||
以"日期"为粒度,汇总当日财务数据
|
||||
|
||||
数据来源:
|
||||
- dwd_settlement_head: 结账单头表
|
||||
- dwd_groupbuy_redemption: 团购核销
|
||||
- dwd_recharge_order: 充值订单
|
||||
- dws_finance_expense_summary: 支出汇总(Excel导入)
|
||||
- dws_platform_settlement: 平台回款/服务费(Excel导入)
|
||||
|
||||
目标表:
|
||||
billiards_dws.dws_finance_daily_summary
|
||||
|
||||
更新策略:
|
||||
- 更新频率:每小时更新当日数据
|
||||
- 幂等方式:delete-before-insert(按日期)
|
||||
|
||||
业务规则:
|
||||
- 发生额:table_charge_money + goods_money + assistant_pd_money + assistant_cx_money
|
||||
- 团购优惠:coupon_amount - 团购支付金额
|
||||
- 团购支付:pl_coupon_sale_amount 或关联 groupbuy_redemption.ledger_unit_price
|
||||
- 首充/续充:通过 is_first 字段区分
|
||||
|
||||
作者:ETL团队
|
||||
创建日期:2026-02-01
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import calendar
|
||||
from datetime import date, datetime, timedelta
|
||||
from decimal import Decimal
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from .base_dws_task import BaseDwsTask, TaskContext
|
||||
|
||||
|
||||
class FinanceDailyTask(BaseDwsTask):
|
||||
"""
|
||||
财务日度汇总任务
|
||||
|
||||
汇总每日的:
|
||||
- 发生额(正价)
|
||||
- 优惠拆分
|
||||
- 确认收入
|
||||
- 现金流(流入/流出)
|
||||
- 充值统计(首充/续充)
|
||||
- 订单统计
|
||||
"""
|
||||
|
||||
def get_task_code(self) -> str:
|
||||
return "DWS_FINANCE_DAILY"
|
||||
|
||||
def get_target_table(self) -> str:
|
||||
return "dws_finance_daily_summary"
|
||||
|
||||
def get_primary_keys(self) -> List[str]:
|
||||
return ["site_id", "stat_date"]
|
||||
|
||||
# ==========================================================================
|
||||
# ETL主流程
|
||||
# ==========================================================================
|
||||
|
||||
def extract(self, context: TaskContext) -> Dict[str, Any]:
|
||||
"""
|
||||
提取数据
|
||||
"""
|
||||
start_date = context.window_start.date() if hasattr(context.window_start, 'date') else context.window_start
|
||||
end_date = context.window_end.date() if hasattr(context.window_end, 'date') else context.window_end
|
||||
site_id = context.store_id
|
||||
|
||||
self.logger.info(
|
||||
"%s: 提取数据,日期范围 %s ~ %s",
|
||||
self.get_task_code(), start_date, end_date
|
||||
)
|
||||
|
||||
# 1. 获取结账单汇总
|
||||
settlement_summary = self._extract_settlement_summary(site_id, start_date, end_date)
|
||||
|
||||
# 2. 获取团购核销汇总
|
||||
groupbuy_summary = self._extract_groupbuy_summary(site_id, start_date, end_date)
|
||||
|
||||
# 3. 获取充值汇总
|
||||
recharge_summary = self._extract_recharge_summary(site_id, start_date, end_date)
|
||||
|
||||
# 4. 获取支出汇总(来自导入表)
|
||||
expense_summary = self._extract_expense_summary(site_id, start_date, end_date)
|
||||
|
||||
# 5. 获取平台回款汇总(来自导入表)
|
||||
platform_summary = self._extract_platform_summary(site_id, start_date, end_date)
|
||||
|
||||
# 6. 获取大客户优惠明细(用于拆分手动优惠)
|
||||
big_customer_summary = self._extract_big_customer_discounts(site_id, start_date, end_date)
|
||||
|
||||
return {
|
||||
'settlement_summary': settlement_summary,
|
||||
'groupbuy_summary': groupbuy_summary,
|
||||
'recharge_summary': recharge_summary,
|
||||
'expense_summary': expense_summary,
|
||||
'platform_summary': platform_summary,
|
||||
'big_customer_summary': big_customer_summary,
|
||||
'start_date': start_date,
|
||||
'end_date': end_date,
|
||||
'site_id': site_id
|
||||
}
|
||||
|
||||
def transform(self, extracted: Dict[str, Any], context: TaskContext) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
转换数据:按日期聚合
|
||||
"""
|
||||
settlement_summary = extracted['settlement_summary']
|
||||
groupbuy_summary = extracted['groupbuy_summary']
|
||||
recharge_summary = extracted['recharge_summary']
|
||||
expense_summary = extracted['expense_summary']
|
||||
platform_summary = extracted['platform_summary']
|
||||
big_customer_summary = extracted['big_customer_summary']
|
||||
site_id = extracted['site_id']
|
||||
|
||||
self.logger.info(
|
||||
"%s: 转换数据,%d 天结账数据,%d 天充值数据",
|
||||
self.get_task_code(), len(settlement_summary), len(recharge_summary)
|
||||
)
|
||||
|
||||
# 按日期合并数据
|
||||
dates = set()
|
||||
for item in settlement_summary + recharge_summary + expense_summary + platform_summary:
|
||||
stat_date = item.get('stat_date')
|
||||
if stat_date:
|
||||
dates.add(stat_date)
|
||||
|
||||
# 构建索引
|
||||
settle_index = {s['stat_date']: s for s in settlement_summary}
|
||||
groupbuy_index = {g['stat_date']: g for g in groupbuy_summary}
|
||||
recharge_index = {r['stat_date']: r for r in recharge_summary}
|
||||
expense_index = {e['stat_date']: e for e in expense_summary}
|
||||
platform_index = {p['stat_date']: p for p in platform_summary}
|
||||
big_customer_index = {b['stat_date']: b for b in big_customer_summary}
|
||||
|
||||
results = []
|
||||
for stat_date in sorted(dates):
|
||||
settle = settle_index.get(stat_date, {})
|
||||
groupbuy = groupbuy_index.get(stat_date, {})
|
||||
recharge = recharge_index.get(stat_date, {})
|
||||
expense = expense_index.get(stat_date, {})
|
||||
platform = platform_index.get(stat_date, {})
|
||||
big_customer = big_customer_index.get(stat_date, {})
|
||||
|
||||
record = self._build_daily_record(
|
||||
stat_date, settle, groupbuy, recharge, expense, platform, big_customer, site_id
|
||||
)
|
||||
results.append(record)
|
||||
|
||||
return results
|
||||
|
||||
def load(self, transformed: List[Dict[str, Any]], context: TaskContext) -> Dict:
|
||||
"""
|
||||
加载数据
|
||||
"""
|
||||
if not transformed:
|
||||
self.logger.info("%s: 无数据需要写入", self.get_task_code())
|
||||
return {"counts": {"fetched": 0, "inserted": 0, "updated": 0, "skipped": 0, "errors": 0}}
|
||||
|
||||
deleted = self.delete_existing_data(context, date_col="stat_date")
|
||||
inserted = self.bulk_insert(transformed)
|
||||
|
||||
self.logger.info(
|
||||
"%s: 加载完成,删除 %d 行,插入 %d 行",
|
||||
self.get_task_code(), deleted, inserted
|
||||
)
|
||||
|
||||
return {
|
||||
"counts": {
|
||||
"fetched": len(transformed),
|
||||
"inserted": inserted,
|
||||
"updated": 0,
|
||||
"skipped": 0,
|
||||
"errors": 0
|
||||
},
|
||||
"extra": {"deleted": deleted}
|
||||
}
|
||||
|
||||
# ==========================================================================
|
||||
# 数据提取方法
|
||||
# ==========================================================================
|
||||
|
||||
def _extract_settlement_summary(
|
||||
self,
|
||||
site_id: int,
|
||||
start_date: date,
|
||||
end_date: date
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
提取结账单日汇总
|
||||
"""
|
||||
sql = """
|
||||
SELECT
|
||||
DATE(create_time) AS stat_date,
|
||||
COUNT(*) AS order_count,
|
||||
COUNT(CASE WHEN member_id != 0 AND member_id IS NOT NULL THEN 1 END) AS member_order_count,
|
||||
COUNT(CASE WHEN member_id = 0 OR member_id IS NULL THEN 1 END) AS guest_order_count,
|
||||
-- 发生额(正价)
|
||||
SUM(table_charge_money) AS table_fee_amount,
|
||||
SUM(goods_money) AS goods_amount,
|
||||
SUM(assistant_pd_money) AS assistant_pd_amount,
|
||||
SUM(assistant_cx_money) AS assistant_cx_amount,
|
||||
SUM(table_charge_money + goods_money + assistant_pd_money + assistant_cx_money) AS gross_amount,
|
||||
-- 支付
|
||||
SUM(pay_amount) AS cash_pay_amount,
|
||||
SUM(recharge_card_amount) AS card_pay_amount,
|
||||
SUM(balance_amount) AS balance_pay_amount,
|
||||
SUM(gift_card_amount) AS gift_card_pay_amount,
|
||||
-- 优惠
|
||||
SUM(coupon_amount) AS coupon_amount,
|
||||
SUM(adjust_amount) AS adjust_amount,
|
||||
SUM(member_discount_amount) AS member_discount_amount,
|
||||
SUM(rounding_amount) AS rounding_amount,
|
||||
SUM(pl_coupon_sale_amount) AS pl_coupon_sale_amount,
|
||||
-- 消费金额
|
||||
SUM(consume_money) AS total_consume
|
||||
FROM billiards_dwd.dwd_settlement_head
|
||||
WHERE site_id = %s
|
||||
AND DATE(create_time) >= %s
|
||||
AND DATE(create_time) <= %s
|
||||
GROUP BY DATE(create_time)
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, start_date, end_date))
|
||||
return [dict(row) for row in rows] if rows else []
|
||||
|
||||
def _extract_groupbuy_summary(
|
||||
self,
|
||||
site_id: int,
|
||||
start_date: date,
|
||||
end_date: date
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
提取团购核销日汇总
|
||||
"""
|
||||
sql = """
|
||||
SELECT
|
||||
DATE(redeem_time) AS stat_date,
|
||||
COUNT(*) AS groupbuy_count,
|
||||
SUM(ledger_unit_price) AS groupbuy_pay_total
|
||||
FROM billiards_dwd.dwd_groupbuy_redemption
|
||||
WHERE site_id = %s
|
||||
AND DATE(redeem_time) >= %s
|
||||
AND DATE(redeem_time) <= %s
|
||||
GROUP BY DATE(redeem_time)
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, start_date, end_date))
|
||||
return [dict(row) for row in rows] if rows else []
|
||||
|
||||
def _extract_recharge_summary(
|
||||
self,
|
||||
site_id: int,
|
||||
start_date: date,
|
||||
end_date: date
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
提取充值日汇总
|
||||
"""
|
||||
sql = """
|
||||
SELECT
|
||||
DATE(create_time) AS stat_date,
|
||||
COUNT(*) AS recharge_count,
|
||||
SUM(pay_money + gift_money) AS recharge_total,
|
||||
SUM(pay_money) AS recharge_cash,
|
||||
SUM(gift_money) AS recharge_gift,
|
||||
COUNT(CASE WHEN is_first = 1 THEN 1 END) AS first_recharge_count,
|
||||
SUM(CASE WHEN is_first = 1 THEN pay_money + gift_money ELSE 0 END) AS first_recharge_total,
|
||||
SUM(CASE WHEN is_first = 1 THEN pay_money ELSE 0 END) AS first_recharge_cash,
|
||||
SUM(CASE WHEN is_first = 1 THEN gift_money ELSE 0 END) AS first_recharge_gift,
|
||||
COUNT(CASE WHEN is_first = 0 OR is_first IS NULL THEN 1 END) AS renewal_count,
|
||||
SUM(CASE WHEN is_first = 0 OR is_first IS NULL THEN pay_money + gift_money ELSE 0 END) AS renewal_total,
|
||||
SUM(CASE WHEN is_first = 0 OR is_first IS NULL THEN pay_money ELSE 0 END) AS renewal_cash,
|
||||
SUM(CASE WHEN is_first = 0 OR is_first IS NULL THEN gift_money ELSE 0 END) AS renewal_gift,
|
||||
COUNT(DISTINCT member_id) AS recharge_member_count
|
||||
FROM billiards_dwd.dwd_recharge_order
|
||||
WHERE site_id = %s
|
||||
AND DATE(create_time) >= %s
|
||||
AND DATE(create_time) <= %s
|
||||
GROUP BY DATE(create_time)
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, start_date, end_date))
|
||||
return [dict(row) for row in rows] if rows else []
|
||||
|
||||
def _extract_expense_summary(
|
||||
self,
|
||||
site_id: int,
|
||||
start_date: date,
|
||||
end_date: date
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
提取支出汇总(来自导入表,按月分摊到日)
|
||||
"""
|
||||
if start_date > end_date:
|
||||
return []
|
||||
|
||||
start_month = start_date.replace(day=1)
|
||||
end_month = end_date.replace(day=1)
|
||||
|
||||
sql = """
|
||||
SELECT
|
||||
expense_month,
|
||||
SUM(expense_amount) AS expense_amount
|
||||
FROM billiards_dws.dws_finance_expense_summary
|
||||
WHERE site_id = %s
|
||||
AND expense_month >= %s
|
||||
AND expense_month <= %s
|
||||
GROUP BY expense_month
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, start_month, end_month))
|
||||
if not rows:
|
||||
return []
|
||||
|
||||
daily_totals: Dict[date, Decimal] = {}
|
||||
for row in rows:
|
||||
row_dict = dict(row)
|
||||
month_date = row_dict.get('expense_month')
|
||||
if not month_date:
|
||||
continue
|
||||
amount = self.safe_decimal(row_dict.get('expense_amount', 0))
|
||||
days_in_month = calendar.monthrange(month_date.year, month_date.month)[1]
|
||||
daily_amount = amount / Decimal(str(days_in_month)) if days_in_month > 0 else Decimal('0')
|
||||
|
||||
for day in range(1, days_in_month + 1):
|
||||
stat_date = date(month_date.year, month_date.month, day)
|
||||
if stat_date < start_date or stat_date > end_date:
|
||||
continue
|
||||
daily_totals[stat_date] = daily_totals.get(stat_date, Decimal('0')) + daily_amount
|
||||
|
||||
return [
|
||||
{'stat_date': stat_date, 'expense_amount': amount}
|
||||
for stat_date, amount in sorted(daily_totals.items())
|
||||
]
|
||||
|
||||
def _extract_platform_summary(
|
||||
self,
|
||||
site_id: int,
|
||||
start_date: date,
|
||||
end_date: date
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
提取平台回款/服务费汇总(来自导入表)
|
||||
"""
|
||||
sql = """
|
||||
SELECT
|
||||
settlement_date AS stat_date,
|
||||
SUM(settlement_amount) AS settlement_amount,
|
||||
SUM(commission_amount) AS commission_amount,
|
||||
SUM(service_fee) AS service_fee
|
||||
FROM billiards_dws.dws_platform_settlement
|
||||
WHERE site_id = %s
|
||||
AND settlement_date >= %s
|
||||
AND settlement_date <= %s
|
||||
GROUP BY settlement_date
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, start_date, end_date))
|
||||
return [dict(row) for row in rows] if rows else []
|
||||
|
||||
def _extract_big_customer_discounts(
|
||||
self,
|
||||
site_id: int,
|
||||
start_date: date,
|
||||
end_date: date
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
提取大客户优惠(用于拆分手动调整)
|
||||
"""
|
||||
member_ids = self._parse_id_list(self.config.get("dws.discount.big_customer_member_ids"))
|
||||
order_ids = self._parse_id_list(self.config.get("dws.discount.big_customer_order_ids"))
|
||||
if not member_ids and not order_ids:
|
||||
return []
|
||||
|
||||
sql = """
|
||||
SELECT
|
||||
pay_time::DATE AS stat_date,
|
||||
order_settle_id,
|
||||
member_id,
|
||||
adjust_amount
|
||||
FROM billiards_dwd.dwd_settlement_head
|
||||
WHERE site_id = %s
|
||||
AND pay_time >= %s
|
||||
AND pay_time < %s + INTERVAL '1 day'
|
||||
AND adjust_amount != 0
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, start_date, end_date))
|
||||
if not rows:
|
||||
return []
|
||||
|
||||
result: Dict[date, Dict[str, Any]] = {}
|
||||
for row in rows:
|
||||
row_dict = dict(row)
|
||||
stat_date = row_dict.get('stat_date')
|
||||
if not stat_date:
|
||||
continue
|
||||
order_id = row_dict.get('order_settle_id')
|
||||
member_id = row_dict.get('member_id')
|
||||
if order_id not in order_ids and member_id not in member_ids:
|
||||
continue
|
||||
amount = abs(self.safe_decimal(row_dict.get('adjust_amount', 0)))
|
||||
entry = result.setdefault(stat_date, {'stat_date': stat_date, 'big_customer_amount': Decimal('0'), 'big_customer_count': 0})
|
||||
entry['big_customer_amount'] += amount
|
||||
entry['big_customer_count'] += 1
|
||||
|
||||
return list(result.values())
|
||||
|
||||
def _parse_id_list(self, value: Any) -> set:
|
||||
if not value:
|
||||
return set()
|
||||
if isinstance(value, str):
|
||||
items = [v.strip() for v in value.split(",") if v.strip()]
|
||||
return {int(v) for v in items if v.isdigit()}
|
||||
if isinstance(value, (list, tuple, set)):
|
||||
result = set()
|
||||
for item in value:
|
||||
if item is None:
|
||||
continue
|
||||
try:
|
||||
result.add(int(item))
|
||||
except (ValueError, TypeError):
|
||||
continue
|
||||
return result
|
||||
return set()
|
||||
|
||||
# ==========================================================================
|
||||
# 数据转换方法
|
||||
# ==========================================================================
|
||||
|
||||
def _build_daily_record(
|
||||
self,
|
||||
stat_date: date,
|
||||
settle: Dict[str, Any],
|
||||
groupbuy: Dict[str, Any],
|
||||
recharge: Dict[str, Any],
|
||||
expense: Dict[str, Any],
|
||||
platform: Dict[str, Any],
|
||||
big_customer: Dict[str, Any],
|
||||
site_id: int
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
构建日度财务记录
|
||||
"""
|
||||
# 发生额
|
||||
gross_amount = self.safe_decimal(settle.get('gross_amount', 0))
|
||||
table_fee_amount = self.safe_decimal(settle.get('table_fee_amount', 0))
|
||||
goods_amount = self.safe_decimal(settle.get('goods_amount', 0))
|
||||
assistant_pd_amount = self.safe_decimal(settle.get('assistant_pd_amount', 0))
|
||||
assistant_cx_amount = self.safe_decimal(settle.get('assistant_cx_amount', 0))
|
||||
|
||||
# 支付
|
||||
cash_pay_amount = self.safe_decimal(settle.get('cash_pay_amount', 0))
|
||||
card_pay_amount = self.safe_decimal(settle.get('card_pay_amount', 0))
|
||||
balance_pay_amount = self.safe_decimal(settle.get('balance_pay_amount', 0))
|
||||
gift_card_pay_amount = self.safe_decimal(settle.get('gift_card_pay_amount', 0))
|
||||
|
||||
# 优惠
|
||||
coupon_amount = self.safe_decimal(settle.get('coupon_amount', 0))
|
||||
pl_coupon_sale = self.safe_decimal(settle.get('pl_coupon_sale_amount', 0))
|
||||
groupbuy_pay = self.safe_decimal(groupbuy.get('groupbuy_pay_total', 0))
|
||||
|
||||
# 团购支付金额:优先使用pl_coupon_sale_amount,否则使用groupbuy核销金额
|
||||
if pl_coupon_sale > 0:
|
||||
groupbuy_pay_amount = pl_coupon_sale
|
||||
else:
|
||||
groupbuy_pay_amount = groupbuy_pay
|
||||
|
||||
# 团购优惠 = 团购抵消台费 - 团购支付金额
|
||||
discount_groupbuy = coupon_amount - groupbuy_pay_amount
|
||||
if discount_groupbuy < 0:
|
||||
discount_groupbuy = Decimal('0')
|
||||
|
||||
adjust_amount = self.safe_decimal(settle.get('adjust_amount', 0))
|
||||
member_discount = self.safe_decimal(settle.get('member_discount_amount', 0))
|
||||
rounding_amount = self.safe_decimal(settle.get('rounding_amount', 0))
|
||||
big_customer_amount = self.safe_decimal(big_customer.get('big_customer_amount', 0))
|
||||
other_discount = adjust_amount - big_customer_amount
|
||||
if other_discount < 0:
|
||||
other_discount = Decimal('0')
|
||||
|
||||
# 优惠合计
|
||||
discount_total = discount_groupbuy + member_discount + gift_card_pay_amount + adjust_amount + rounding_amount
|
||||
|
||||
# 确认收入
|
||||
confirmed_income = gross_amount - discount_total
|
||||
|
||||
# 现金流
|
||||
platform_settlement_amount = self.safe_decimal(platform.get('settlement_amount', 0))
|
||||
platform_fee_amount = (
|
||||
self.safe_decimal(platform.get('commission_amount', 0))
|
||||
+ self.safe_decimal(platform.get('service_fee', 0))
|
||||
)
|
||||
recharge_cash_inflow = self.safe_decimal(recharge.get('recharge_cash', 0))
|
||||
platform_inflow = platform_settlement_amount if platform_settlement_amount > 0 else groupbuy_pay_amount
|
||||
cash_inflow_total = cash_pay_amount + platform_inflow + recharge_cash_inflow
|
||||
|
||||
cash_outflow_total = self.safe_decimal(expense.get('expense_amount', 0)) + platform_fee_amount
|
||||
cash_balance_change = cash_inflow_total - cash_outflow_total
|
||||
|
||||
# 卡消费
|
||||
cash_card_consume = card_pay_amount + balance_pay_amount
|
||||
gift_card_consume = gift_card_pay_amount
|
||||
card_consume_total = cash_card_consume + gift_card_consume
|
||||
|
||||
# 充值统计
|
||||
recharge_count = self.safe_int(recharge.get('recharge_count', 0))
|
||||
recharge_total = self.safe_decimal(recharge.get('recharge_total', 0))
|
||||
recharge_cash = self.safe_decimal(recharge.get('recharge_cash', 0))
|
||||
recharge_gift = self.safe_decimal(recharge.get('recharge_gift', 0))
|
||||
first_recharge_count = self.safe_int(recharge.get('first_recharge_count', 0))
|
||||
first_recharge_amount = self.safe_decimal(recharge.get('first_recharge_total', 0))
|
||||
renewal_count = self.safe_int(recharge.get('renewal_count', 0))
|
||||
renewal_amount = self.safe_decimal(recharge.get('renewal_total', 0))
|
||||
|
||||
# 订单统计
|
||||
order_count = self.safe_int(settle.get('order_count', 0))
|
||||
member_order_count = self.safe_int(settle.get('member_order_count', 0))
|
||||
guest_order_count = self.safe_int(settle.get('guest_order_count', 0))
|
||||
avg_order_amount = gross_amount / order_count if order_count > 0 else Decimal('0')
|
||||
|
||||
return {
|
||||
'site_id': site_id,
|
||||
'tenant_id': self.config.get("app.tenant_id", site_id),
|
||||
'stat_date': stat_date,
|
||||
# 发生额
|
||||
'gross_amount': gross_amount,
|
||||
'table_fee_amount': table_fee_amount,
|
||||
'goods_amount': goods_amount,
|
||||
'assistant_pd_amount': assistant_pd_amount,
|
||||
'assistant_cx_amount': assistant_cx_amount,
|
||||
# 优惠
|
||||
'discount_total': discount_total,
|
||||
'discount_groupbuy': discount_groupbuy,
|
||||
'discount_vip': member_discount,
|
||||
'discount_gift_card': gift_card_pay_amount,
|
||||
'discount_manual': adjust_amount,
|
||||
'discount_rounding': rounding_amount,
|
||||
'discount_other': other_discount,
|
||||
# 确认收入
|
||||
'confirmed_income': confirmed_income,
|
||||
# 现金流
|
||||
'cash_inflow_total': cash_inflow_total,
|
||||
'cash_pay_amount': cash_pay_amount,
|
||||
'groupbuy_pay_amount': groupbuy_pay_amount,
|
||||
'platform_settlement_amount': platform_settlement_amount,
|
||||
'platform_fee_amount': platform_fee_amount,
|
||||
'recharge_cash_inflow': recharge_cash_inflow,
|
||||
'card_consume_total': card_consume_total,
|
||||
'cash_card_consume': cash_card_consume,
|
||||
'gift_card_consume': gift_card_consume,
|
||||
'cash_outflow_total': cash_outflow_total,
|
||||
'cash_balance_change': cash_balance_change,
|
||||
# 充值统计
|
||||
'recharge_count': recharge_count,
|
||||
'recharge_total': recharge_total,
|
||||
'recharge_cash': recharge_cash,
|
||||
'recharge_gift': recharge_gift,
|
||||
'first_recharge_count': first_recharge_count,
|
||||
'first_recharge_amount': first_recharge_amount,
|
||||
'renewal_count': renewal_count,
|
||||
'renewal_amount': renewal_amount,
|
||||
# 订单统计
|
||||
'order_count': order_count,
|
||||
'member_order_count': member_order_count,
|
||||
'guest_order_count': guest_order_count,
|
||||
'avg_order_amount': avg_order_amount,
|
||||
}
|
||||
|
||||
|
||||
# 便于外部导入
|
||||
__all__ = ['FinanceDailyTask']
|
||||
410
etl_billiards/tasks/dws/finance_discount_task.py
Normal file
410
etl_billiards/tasks/dws/finance_discount_task.py
Normal file
@@ -0,0 +1,410 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
优惠明细分析任务
|
||||
|
||||
功能说明:
|
||||
以"日期+优惠类型"为粒度,分析优惠构成
|
||||
|
||||
数据来源:
|
||||
- dwd_settlement_head: 结账单头表(优惠字段)
|
||||
- dwd_groupbuy_redemption: 团购核销(团购实付金额)
|
||||
- dwd_member_balance_change: 余额变动(赠送卡消费)
|
||||
|
||||
目标表:
|
||||
billiards_dws.dws_finance_discount_detail
|
||||
|
||||
更新策略:
|
||||
- 更新频率:每日更新
|
||||
- 幂等方式:delete-before-insert(按日期)
|
||||
|
||||
业务规则:
|
||||
- 团购优惠 (GROUPBUY): coupon_amount - 团购实付金额
|
||||
- 会员折扣 (VIP): member_discount_amount
|
||||
- 赠送卡抵扣 (GIFT_CARD): gift_card_amount
|
||||
- 抹零 (ROUNDING): rounding_amount
|
||||
- 大客户优惠 (BIG_CUSTOMER): 手动调整中标记的大客户订单
|
||||
- 其他优惠 (OTHER): 手动调整中除大客户外的部分
|
||||
|
||||
作者:ETL团队
|
||||
创建日期:2026-02-01
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import date, datetime, timedelta
|
||||
from decimal import Decimal
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from .base_dws_task import BaseDwsTask, TaskContext
|
||||
|
||||
|
||||
class FinanceDiscountDetailTask(BaseDwsTask):
|
||||
"""
|
||||
优惠明细分析任务
|
||||
|
||||
分析各类优惠的使用情况:
|
||||
- 团购优惠
|
||||
- 会员折扣
|
||||
- 赠送卡抵扣
|
||||
- 手动调整
|
||||
- 抹零
|
||||
- 其他优惠
|
||||
"""
|
||||
|
||||
def get_task_code(self) -> str:
|
||||
return "DWS_FINANCE_DISCOUNT_DETAIL"
|
||||
|
||||
def get_target_table(self) -> str:
|
||||
return "dws_finance_discount_detail"
|
||||
|
||||
def get_primary_keys(self) -> List[str]:
|
||||
return ["site_id", "stat_date", "discount_type_code"]
|
||||
|
||||
def extract(self, context: TaskContext) -> Dict[str, Any]:
|
||||
"""
|
||||
抽取优惠相关数据
|
||||
|
||||
数据来源:
|
||||
1. settlement_head: 各类优惠字段
|
||||
2. groupbuy_redemption: 团购实付金额
|
||||
"""
|
||||
start_date = context.window_start.date() if hasattr(context.window_start, 'date') else context.window_start
|
||||
end_date = context.window_end.date() if hasattr(context.window_end, 'date') else context.window_end
|
||||
site_id = context.store_id
|
||||
|
||||
# 从settlement_head抽取优惠数据
|
||||
discount_summary = self._extract_discount_summary(site_id, start_date, end_date)
|
||||
|
||||
# 从groupbuy_redemption获取团购实付金额
|
||||
groupbuy_payments = self._extract_groupbuy_payments(site_id, start_date, end_date)
|
||||
|
||||
# 提取大客户优惠(拆分手动调整)
|
||||
big_customer_summary = self._extract_big_customer_discounts(site_id, start_date, end_date)
|
||||
|
||||
return {
|
||||
'discount_summary': discount_summary,
|
||||
'groupbuy_payments': groupbuy_payments,
|
||||
'big_customer_summary': big_customer_summary,
|
||||
}
|
||||
|
||||
def _extract_discount_summary(
|
||||
self,
|
||||
site_id: int,
|
||||
start_date: date,
|
||||
end_date: date
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
从结账单头表抽取优惠汇总
|
||||
|
||||
字段说明:
|
||||
- coupon_amount: 团购抵消台费金额
|
||||
- adjust_amount: 手动调整金额(台费打折)
|
||||
- member_discount_amount: 会员折扣
|
||||
- rounding_amount: 抹零金额
|
||||
- gift_card_amount: 赠送卡支付
|
||||
- pl_coupon_sale_amount: 平台券销售金额(团购实付路径1)
|
||||
"""
|
||||
sql = """
|
||||
SELECT
|
||||
pay_time::DATE AS stat_date,
|
||||
-- 团购相关
|
||||
COALESCE(SUM(coupon_amount), 0) AS coupon_amount_total,
|
||||
COALESCE(SUM(pl_coupon_sale_amount), 0) AS pl_coupon_sale_total,
|
||||
COUNT(CASE WHEN coupon_amount > 0 THEN 1 END) AS coupon_order_count,
|
||||
-- 手动调整
|
||||
COALESCE(SUM(adjust_amount), 0) AS adjust_amount_total,
|
||||
COUNT(CASE WHEN adjust_amount != 0 THEN 1 END) AS adjust_order_count,
|
||||
-- 会员折扣
|
||||
COALESCE(SUM(member_discount_amount), 0) AS member_discount_total,
|
||||
COUNT(CASE WHEN member_discount_amount > 0 THEN 1 END) AS member_discount_order_count,
|
||||
-- 抹零
|
||||
COALESCE(SUM(rounding_amount), 0) AS rounding_amount_total,
|
||||
COUNT(CASE WHEN rounding_amount != 0 THEN 1 END) AS rounding_order_count,
|
||||
-- 赠送卡
|
||||
COALESCE(SUM(gift_card_amount), 0) AS gift_card_amount_total,
|
||||
COUNT(CASE WHEN gift_card_amount > 0 THEN 1 END) AS gift_card_order_count,
|
||||
-- 总订单数
|
||||
COUNT(*) AS total_orders
|
||||
FROM billiards_dwd.dwd_settlement_head
|
||||
WHERE site_id = %(site_id)s
|
||||
AND pay_time >= %(start_date)s
|
||||
AND pay_time < %(end_date)s + INTERVAL '1 day'
|
||||
AND settle_status = 1 -- 已结账
|
||||
GROUP BY pay_time::DATE
|
||||
ORDER BY stat_date
|
||||
"""
|
||||
rows = self.db.query(sql, {
|
||||
'site_id': site_id,
|
||||
'start_date': start_date,
|
||||
'end_date': end_date,
|
||||
})
|
||||
return [dict(row) for row in rows] if rows else []
|
||||
|
||||
def _extract_groupbuy_payments(
|
||||
self,
|
||||
site_id: int,
|
||||
start_date: date,
|
||||
end_date: date
|
||||
) -> Dict[date, Decimal]:
|
||||
"""
|
||||
从团购核销表获取团购实付金额
|
||||
|
||||
团购实付金额计算:
|
||||
- 若 pl_coupon_sale_amount > 0,使用该值
|
||||
- 否则使用 groupbuy_redemption.ledger_unit_price
|
||||
|
||||
返回:{日期: 团购实付总额}
|
||||
"""
|
||||
sql = """
|
||||
SELECT
|
||||
sh.pay_time::DATE AS stat_date,
|
||||
SUM(
|
||||
CASE
|
||||
WHEN sh.pl_coupon_sale_amount > 0 THEN sh.pl_coupon_sale_amount
|
||||
ELSE COALESCE(gr.ledger_unit_price, 0)
|
||||
END
|
||||
) AS groupbuy_payment
|
||||
FROM billiards_dwd.dwd_settlement_head sh
|
||||
LEFT JOIN billiards_dwd.dwd_groupbuy_redemption gr
|
||||
ON gr.order_settle_id = sh.order_settle_id
|
||||
WHERE sh.site_id = %(site_id)s
|
||||
AND sh.pay_time >= %(start_date)s
|
||||
AND sh.pay_time < %(end_date)s + INTERVAL '1 day'
|
||||
AND sh.settle_status = 1
|
||||
AND sh.coupon_amount > 0 -- 只统计有团购的订单
|
||||
GROUP BY sh.pay_time::DATE
|
||||
"""
|
||||
rows = self.db.query(sql, {
|
||||
'site_id': site_id,
|
||||
'start_date': start_date,
|
||||
'end_date': end_date,
|
||||
})
|
||||
|
||||
result = {}
|
||||
if rows:
|
||||
for row in rows:
|
||||
result[row['stat_date']] = self.safe_decimal(row.get('groupbuy_payment', 0))
|
||||
return result
|
||||
|
||||
def transform(self, data: Dict[str, Any], context: TaskContext) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
转换数据
|
||||
|
||||
将抽取的数据转换为目标表格式:
|
||||
- 每种优惠类型一条记录
|
||||
- 计算团购优惠(coupon_amount - 团购实付)
|
||||
- 计算优惠占比
|
||||
"""
|
||||
site_id = context.store_id
|
||||
tenant_id = self.config.get("app.tenant_id", site_id)
|
||||
|
||||
discount_summary = data.get('discount_summary', [])
|
||||
groupbuy_payments = data.get('groupbuy_payments', {})
|
||||
big_customer_summary = {r['stat_date']: r for r in data.get('big_customer_summary', [])}
|
||||
|
||||
records = []
|
||||
|
||||
# 优惠类型定义
|
||||
# (type_code, type_name, amount_field, count_field, special_calc)
|
||||
discount_types = [
|
||||
('GROUPBUY', '团购优惠', 'coupon_amount_total', 'coupon_order_count', True),
|
||||
('VIP', '会员折扣', 'member_discount_total', 'member_discount_order_count', False),
|
||||
('ROUNDING', '抹零', 'rounding_amount_total', 'rounding_order_count', False),
|
||||
('GIFT_CARD', '赠送卡抵扣', 'gift_card_amount_total', 'gift_card_order_count', False),
|
||||
]
|
||||
|
||||
for daily_data in discount_summary:
|
||||
stat_date = daily_data.get('stat_date')
|
||||
|
||||
# 计算各类优惠金额
|
||||
daily_discounts = {}
|
||||
total_discount = Decimal('0')
|
||||
|
||||
for type_code, type_name, amount_field, count_field, special_calc in discount_types:
|
||||
if special_calc and type_code == 'GROUPBUY':
|
||||
# 团购优惠 = 团购抵消台费 - 团购实付
|
||||
coupon_amount = self.safe_decimal(daily_data.get(amount_field, 0))
|
||||
groupbuy_paid = groupbuy_payments.get(stat_date, Decimal('0'))
|
||||
discount_amount = coupon_amount - groupbuy_paid
|
||||
# 确保优惠金额为正数
|
||||
discount_amount = max(discount_amount, Decimal('0'))
|
||||
else:
|
||||
discount_amount = abs(self.safe_decimal(daily_data.get(amount_field, 0)))
|
||||
|
||||
usage_count = daily_data.get(count_field, 0) or 0
|
||||
|
||||
daily_discounts[type_code] = {
|
||||
'type_name': type_name,
|
||||
'amount': discount_amount,
|
||||
'count': usage_count,
|
||||
}
|
||||
total_discount += discount_amount
|
||||
|
||||
# 拆分手动调整为大客户/其他
|
||||
adjust_amount = abs(self.safe_decimal(daily_data.get('adjust_amount_total', 0)))
|
||||
adjust_count = daily_data.get('adjust_order_count', 0) or 0
|
||||
big_customer_info = big_customer_summary.get(stat_date, {})
|
||||
big_customer_amount = self.safe_decimal(big_customer_info.get('big_customer_amount', 0))
|
||||
big_customer_count = big_customer_info.get('big_customer_count', 0) or 0
|
||||
other_amount = adjust_amount - big_customer_amount
|
||||
if other_amount < 0:
|
||||
other_amount = Decimal('0')
|
||||
other_count = adjust_count - big_customer_count
|
||||
if other_count < 0:
|
||||
other_count = 0
|
||||
|
||||
daily_discounts['BIG_CUSTOMER'] = {
|
||||
'type_name': '大客户优惠',
|
||||
'amount': big_customer_amount,
|
||||
'count': big_customer_count,
|
||||
}
|
||||
daily_discounts['OTHER'] = {
|
||||
'type_name': '其他优惠',
|
||||
'amount': other_amount,
|
||||
'count': other_count,
|
||||
}
|
||||
total_discount += big_customer_amount + other_amount
|
||||
|
||||
# 为每种优惠类型生成记录
|
||||
for type_code, discount_info in daily_discounts.items():
|
||||
discount_amount = discount_info['amount']
|
||||
usage_count = discount_info['count']
|
||||
|
||||
# 计算占比(避免除零)
|
||||
discount_ratio = (discount_amount / total_discount) if total_discount > 0 else Decimal('0')
|
||||
|
||||
records.append({
|
||||
'site_id': site_id,
|
||||
'tenant_id': tenant_id,
|
||||
'stat_date': stat_date,
|
||||
'discount_type_code': type_code,
|
||||
'discount_type_name': discount_info['type_name'],
|
||||
'discount_amount': discount_amount,
|
||||
'discount_ratio': round(discount_ratio, 4),
|
||||
'usage_count': usage_count,
|
||||
'affected_orders': usage_count, # 简化:使用次数=影响订单数
|
||||
})
|
||||
|
||||
return records
|
||||
|
||||
def _extract_big_customer_discounts(
|
||||
self,
|
||||
site_id: int,
|
||||
start_date: date,
|
||||
end_date: date
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
提取大客户优惠(基于手动调整)
|
||||
"""
|
||||
member_ids = self._parse_id_list(self.config.get("dws.discount.big_customer_member_ids"))
|
||||
order_ids = self._parse_id_list(self.config.get("dws.discount.big_customer_order_ids"))
|
||||
if not member_ids and not order_ids:
|
||||
return []
|
||||
|
||||
sql = """
|
||||
SELECT
|
||||
pay_time::DATE AS stat_date,
|
||||
order_settle_id,
|
||||
member_id,
|
||||
adjust_amount
|
||||
FROM billiards_dwd.dwd_settlement_head
|
||||
WHERE site_id = %(site_id)s
|
||||
AND pay_time >= %(start_date)s
|
||||
AND pay_time < %(end_date)s + INTERVAL '1 day'
|
||||
AND adjust_amount != 0
|
||||
"""
|
||||
rows = self.db.query(sql, {
|
||||
'site_id': site_id,
|
||||
'start_date': start_date,
|
||||
'end_date': end_date,
|
||||
})
|
||||
if not rows:
|
||||
return []
|
||||
|
||||
result: Dict[date, Dict[str, Any]] = {}
|
||||
for row in rows:
|
||||
row_dict = dict(row)
|
||||
stat_date = row_dict.get('stat_date')
|
||||
if not stat_date:
|
||||
continue
|
||||
order_id = row_dict.get('order_settle_id')
|
||||
member_id = row_dict.get('member_id')
|
||||
if order_id not in order_ids and member_id not in member_ids:
|
||||
continue
|
||||
amount = abs(self.safe_decimal(row_dict.get('adjust_amount', 0)))
|
||||
entry = result.setdefault(stat_date, {'stat_date': stat_date, 'big_customer_amount': Decimal('0'), 'big_customer_count': 0})
|
||||
entry['big_customer_amount'] += amount
|
||||
entry['big_customer_count'] += 1
|
||||
|
||||
return list(result.values())
|
||||
|
||||
def _parse_id_list(self, value: Any) -> set:
|
||||
if not value:
|
||||
return set()
|
||||
if isinstance(value, str):
|
||||
items = [v.strip() for v in value.split(",") if v.strip()]
|
||||
return {int(v) for v in items if v.isdigit()}
|
||||
if isinstance(value, (list, tuple, set)):
|
||||
result = set()
|
||||
for item in value:
|
||||
if item is None:
|
||||
continue
|
||||
try:
|
||||
result.add(int(item))
|
||||
except (ValueError, TypeError):
|
||||
continue
|
||||
return result
|
||||
return set()
|
||||
|
||||
def load(self, records: List[Dict[str, Any]], context: TaskContext) -> Dict[str, Any]:
|
||||
"""
|
||||
加载数据到目标表
|
||||
|
||||
使用幂等方式:delete-before-insert(按日期范围)
|
||||
"""
|
||||
if not records:
|
||||
return {'inserted': 0, 'deleted': 0}
|
||||
|
||||
site_id = context.store_id
|
||||
start_date = context.window_start.date() if hasattr(context.window_start, 'date') else context.window_start
|
||||
end_date = context.window_end.date() if hasattr(context.window_end, 'date') else context.window_end
|
||||
|
||||
# 删除窗口内的旧数据
|
||||
delete_sql = """
|
||||
DELETE FROM billiards_dws.dws_finance_discount_detail
|
||||
WHERE site_id = %(site_id)s
|
||||
AND stat_date >= %(start_date)s
|
||||
AND stat_date <= %(end_date)s
|
||||
"""
|
||||
deleted = self.db.execute(delete_sql, {
|
||||
'site_id': site_id,
|
||||
'start_date': start_date,
|
||||
'end_date': end_date,
|
||||
})
|
||||
|
||||
# 批量插入新数据
|
||||
insert_sql = """
|
||||
INSERT INTO billiards_dws.dws_finance_discount_detail (
|
||||
site_id, tenant_id, stat_date,
|
||||
discount_type_code, discount_type_name,
|
||||
discount_amount, discount_ratio,
|
||||
usage_count, affected_orders,
|
||||
created_at, updated_at
|
||||
) VALUES (
|
||||
%(site_id)s, %(tenant_id)s, %(stat_date)s,
|
||||
%(discount_type_code)s, %(discount_type_name)s,
|
||||
%(discount_amount)s, %(discount_ratio)s,
|
||||
%(usage_count)s, %(affected_orders)s,
|
||||
NOW(), NOW()
|
||||
)
|
||||
"""
|
||||
|
||||
inserted = 0
|
||||
for record in records:
|
||||
self.db.execute(insert_sql, record)
|
||||
inserted += 1
|
||||
|
||||
return {
|
||||
'deleted': deleted or 0,
|
||||
'inserted': inserted,
|
||||
}
|
||||
437
etl_billiards/tasks/dws/finance_income_task.py
Normal file
437
etl_billiards/tasks/dws/finance_income_task.py
Normal file
@@ -0,0 +1,437 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
收入结构分析任务
|
||||
|
||||
功能说明:
|
||||
以"日期+区域/类型"为粒度,分析收入结构
|
||||
|
||||
数据来源:
|
||||
- dwd_settlement_head: 结账单头表(台费、商品、助教正价)
|
||||
- dwd_table_fee_log: 台费流水(区域关联)
|
||||
- dwd_assistant_service_log: 助教服务流水(区域关联)
|
||||
- cfg_area_category: 区域分类映射
|
||||
|
||||
目标表:
|
||||
billiards_dws.dws_finance_income_structure
|
||||
|
||||
更新策略:
|
||||
- 更新频率:每日更新
|
||||
- 幂等方式:delete-before-insert(按日期+类型)
|
||||
|
||||
业务规则:
|
||||
- 结构类型1(INCOME_TYPE):按收入类型分析(台费/商品/助教基础课/助教附加课)
|
||||
- 结构类型2(AREA):按区域分析(普通台球区/VIP包厢/斯诺克/麻将/KTV等)
|
||||
- 区域映射使用cfg_area_category配置
|
||||
|
||||
作者:ETL团队
|
||||
创建日期:2026-02-01
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import date, datetime, timedelta
|
||||
from decimal import Decimal
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from .base_dws_task import BaseDwsTask, TaskContext
|
||||
|
||||
|
||||
class FinanceIncomeStructureTask(BaseDwsTask):
|
||||
"""
|
||||
收入结构分析任务
|
||||
|
||||
分析收入的两种维度:
|
||||
1. INCOME_TYPE: 按收入类型(台费/商品/助教基础课/助教附加课)
|
||||
2. AREA: 按区域(使用cfg_area_category映射)
|
||||
"""
|
||||
|
||||
def get_task_code(self) -> str:
|
||||
return "DWS_FINANCE_INCOME_STRUCTURE"
|
||||
|
||||
def get_target_table(self) -> str:
|
||||
return "dws_finance_income_structure"
|
||||
|
||||
def get_primary_keys(self) -> List[str]:
|
||||
return ["site_id", "stat_date", "structure_type", "category_code"]
|
||||
|
||||
def extract(self, context: TaskContext) -> Dict[str, Any]:
|
||||
"""
|
||||
抽取数据
|
||||
|
||||
分两条路径抽取:
|
||||
1. 按收入类型汇总(来自settlement_head)
|
||||
2. 按区域汇总(来自table_fee_log和assistant_service_log)
|
||||
"""
|
||||
start_date = context.window_start.date() if hasattr(context.window_start, 'date') else context.window_start
|
||||
end_date = context.window_end.date() if hasattr(context.window_end, 'date') else context.window_end
|
||||
site_id = context.store_id
|
||||
|
||||
# 按收入类型汇总
|
||||
income_by_type = self._extract_income_by_type(site_id, start_date, end_date)
|
||||
|
||||
# 按区域汇总
|
||||
income_by_area = self._extract_income_by_area(site_id, start_date, end_date)
|
||||
|
||||
return {
|
||||
'income_by_type': income_by_type,
|
||||
'income_by_area': income_by_area,
|
||||
}
|
||||
|
||||
def _extract_income_by_type(
|
||||
self,
|
||||
site_id: int,
|
||||
start_date: date,
|
||||
end_date: date
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
按收入类型汇总
|
||||
|
||||
收入类型分类:
|
||||
- TABLE_FEE: 台费收入 (table_charge_money)
|
||||
- GOODS: 商品收入 (goods_money)
|
||||
- ASSISTANT_BASE: 助教基础课 (assistant_pd_money)
|
||||
- ASSISTANT_BONUS: 助教附加课 (assistant_cx_money)
|
||||
"""
|
||||
sql = """
|
||||
SELECT
|
||||
pay_time::DATE AS stat_date,
|
||||
-- 台费收入
|
||||
COALESCE(SUM(table_charge_money), 0) AS table_fee_income,
|
||||
COUNT(CASE WHEN table_charge_money > 0 THEN 1 END) AS table_fee_orders,
|
||||
-- 商品收入
|
||||
COALESCE(SUM(goods_money), 0) AS goods_income,
|
||||
COUNT(CASE WHEN goods_money > 0 THEN 1 END) AS goods_orders,
|
||||
-- 助教基础课收入(PD=陪打)
|
||||
COALESCE(SUM(assistant_pd_money), 0) AS assistant_base_income,
|
||||
COUNT(CASE WHEN assistant_pd_money > 0 THEN 1 END) AS assistant_base_orders,
|
||||
-- 助教附加课收入(CX=超休/促销)
|
||||
COALESCE(SUM(assistant_cx_money), 0) AS assistant_bonus_income,
|
||||
COUNT(CASE WHEN assistant_cx_money > 0 THEN 1 END) AS assistant_bonus_orders,
|
||||
-- 总订单数
|
||||
COUNT(*) AS total_orders
|
||||
FROM billiards_dwd.dwd_settlement_head
|
||||
WHERE site_id = %(site_id)s
|
||||
AND pay_time >= %(start_date)s
|
||||
AND pay_time < %(end_date)s + INTERVAL '1 day'
|
||||
AND settle_status = 1 -- 已结账
|
||||
GROUP BY pay_time::DATE
|
||||
ORDER BY stat_date
|
||||
"""
|
||||
rows = self.db.query(sql, {
|
||||
'site_id': site_id,
|
||||
'start_date': start_date,
|
||||
'end_date': end_date,
|
||||
})
|
||||
return [dict(row) for row in rows] if rows else []
|
||||
|
||||
def _extract_income_by_area(
|
||||
self,
|
||||
site_id: int,
|
||||
start_date: date,
|
||||
end_date: date
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
按区域汇总收入
|
||||
|
||||
关联dim_table获取区域名称,再映射到cfg_area_category
|
||||
"""
|
||||
sql = """
|
||||
WITH
|
||||
-- 台费按区域汇总
|
||||
table_fee_by_area AS (
|
||||
SELECT
|
||||
tfl.pay_time::DATE AS stat_date,
|
||||
dt.site_table_area_name AS area_name,
|
||||
COALESCE(SUM(tfl.ledger_amount), 0) AS income_amount,
|
||||
COALESCE(SUM(tfl.ledger_time_seconds), 0) AS duration_seconds,
|
||||
COUNT(DISTINCT tfl.order_settle_id) AS order_count
|
||||
FROM billiards_dwd.dwd_table_fee_log tfl
|
||||
LEFT JOIN billiards_dwd.dim_table dt
|
||||
ON dt.site_table_id = tfl.site_table_id
|
||||
WHERE tfl.site_id = %(site_id)s
|
||||
AND tfl.pay_time >= %(start_date)s
|
||||
AND tfl.pay_time < %(end_date)s + INTERVAL '1 day'
|
||||
GROUP BY tfl.pay_time::DATE, dt.site_table_area_name
|
||||
),
|
||||
-- 助教服务按区域汇总
|
||||
assistant_by_area AS (
|
||||
SELECT
|
||||
asl.start_use_time::DATE AS stat_date,
|
||||
dt.site_table_area_name AS area_name,
|
||||
COALESCE(SUM(asl.ledger_amount), 0) AS income_amount,
|
||||
COALESCE(SUM(asl.income_seconds), 0) AS duration_seconds,
|
||||
COUNT(DISTINCT asl.order_settle_id) AS order_count
|
||||
FROM billiards_dwd.dwd_assistant_service_log asl
|
||||
LEFT JOIN billiards_dwd.dim_table dt
|
||||
ON dt.site_table_id = asl.site_table_id
|
||||
WHERE asl.site_id = %(site_id)s
|
||||
AND asl.start_use_time >= %(start_date)s
|
||||
AND asl.start_use_time < %(end_date)s + INTERVAL '1 day'
|
||||
GROUP BY asl.start_use_time::DATE, dt.site_table_area_name
|
||||
)
|
||||
-- 合并台费和助教服务
|
||||
SELECT
|
||||
COALESCE(t.stat_date, a.stat_date) AS stat_date,
|
||||
COALESCE(t.area_name, a.area_name) AS area_name,
|
||||
COALESCE(t.income_amount, 0) + COALESCE(a.income_amount, 0) AS income_amount,
|
||||
COALESCE(t.duration_seconds, 0) + COALESCE(a.duration_seconds, 0) AS duration_seconds,
|
||||
GREATEST(COALESCE(t.order_count, 0), COALESCE(a.order_count, 0)) AS order_count
|
||||
FROM table_fee_by_area t
|
||||
FULL OUTER JOIN assistant_by_area a
|
||||
ON t.stat_date = a.stat_date AND t.area_name = a.area_name
|
||||
ORDER BY stat_date, area_name
|
||||
"""
|
||||
rows = self.db.query(sql, {
|
||||
'site_id': site_id,
|
||||
'start_date': start_date,
|
||||
'end_date': end_date,
|
||||
})
|
||||
return [dict(row) for row in rows] if rows else []
|
||||
|
||||
def transform(self, data: Dict[str, Any], context: TaskContext) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
转换数据
|
||||
|
||||
将抽取的数据转换为目标表格式:
|
||||
1. 按收入类型展开(每种类型一条记录)
|
||||
2. 按区域展开(每个区域一条记录)
|
||||
3. 计算占比
|
||||
"""
|
||||
site_id = context.store_id
|
||||
tenant_id = self.config.get("app.tenant_id", site_id)
|
||||
|
||||
records = []
|
||||
|
||||
# 处理按收入类型的数据
|
||||
income_type_records = self._transform_income_by_type(
|
||||
data.get('income_by_type', []),
|
||||
site_id,
|
||||
tenant_id
|
||||
)
|
||||
records.extend(income_type_records)
|
||||
|
||||
# 处理按区域的数据
|
||||
area_records = self._transform_income_by_area(
|
||||
data.get('income_by_area', []),
|
||||
site_id,
|
||||
tenant_id
|
||||
)
|
||||
records.extend(area_records)
|
||||
|
||||
return records
|
||||
|
||||
def _transform_income_by_type(
|
||||
self,
|
||||
income_data: List[Dict[str, Any]],
|
||||
site_id: int,
|
||||
tenant_id: int
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
转换按收入类型的数据
|
||||
|
||||
将每日汇总数据展开为4条记录(台费/商品/基础课/附加课)
|
||||
"""
|
||||
# 收入类型定义
|
||||
income_types = [
|
||||
('TABLE_FEE', '台费收入', 'table_fee_income', 'table_fee_orders'),
|
||||
('GOODS', '商品收入', 'goods_income', 'goods_orders'),
|
||||
('ASSISTANT_BASE', '助教基础课', 'assistant_base_income', 'assistant_base_orders'),
|
||||
('ASSISTANT_BONUS', '助教附加课', 'assistant_bonus_income', 'assistant_bonus_orders'),
|
||||
]
|
||||
|
||||
records = []
|
||||
|
||||
for daily_data in income_data:
|
||||
stat_date = daily_data.get('stat_date')
|
||||
|
||||
# 计算当日总收入(用于计算占比)
|
||||
total_income = sum(
|
||||
self.safe_decimal(daily_data.get(field, 0))
|
||||
for _, _, field, _ in income_types
|
||||
)
|
||||
|
||||
# 为每种收入类型生成一条记录
|
||||
for type_code, type_name, income_field, order_field in income_types:
|
||||
income_amount = self.safe_decimal(daily_data.get(income_field, 0))
|
||||
order_count = daily_data.get(order_field, 0) or 0
|
||||
|
||||
# 计算占比(避免除零)
|
||||
income_ratio = (income_amount / total_income) if total_income > 0 else Decimal('0')
|
||||
|
||||
records.append({
|
||||
'site_id': site_id,
|
||||
'tenant_id': tenant_id,
|
||||
'stat_date': stat_date,
|
||||
'structure_type': 'INCOME_TYPE',
|
||||
'category_code': type_code,
|
||||
'category_name': type_name,
|
||||
'income_amount': income_amount,
|
||||
'income_ratio': round(income_ratio, 4),
|
||||
'order_count': order_count,
|
||||
'duration_minutes': 0, # 收入类型维度不统计时长
|
||||
})
|
||||
|
||||
return records
|
||||
|
||||
def _transform_income_by_area(
|
||||
self,
|
||||
area_data: List[Dict[str, Any]],
|
||||
site_id: int,
|
||||
tenant_id: int
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
转换按区域的数据
|
||||
|
||||
将区域名称映射到cfg_area_category的category_code
|
||||
"""
|
||||
records = []
|
||||
|
||||
# 加载区域分类配置
|
||||
area_categories = self._get_config_cache().get('area_categories', {})
|
||||
|
||||
# 按日期分组计算总收入(用于计算占比)
|
||||
daily_totals = {}
|
||||
for row in area_data:
|
||||
stat_date = row.get('stat_date')
|
||||
income = self.safe_decimal(row.get('income_amount', 0))
|
||||
daily_totals[stat_date] = daily_totals.get(stat_date, Decimal('0')) + income
|
||||
|
||||
# 按日期+区域聚合(相同category_code需要合并)
|
||||
aggregated = {}
|
||||
|
||||
for row in area_data:
|
||||
stat_date = row.get('stat_date')
|
||||
area_name = row.get('area_name') or '未知区域'
|
||||
income_amount = self.safe_decimal(row.get('income_amount', 0))
|
||||
duration_seconds = row.get('duration_seconds', 0) or 0
|
||||
order_count = row.get('order_count', 0) or 0
|
||||
|
||||
# 映射区域名称到分类代码
|
||||
category = self._map_area_to_category(area_name, area_categories)
|
||||
category_code = category.get('category_code', 'OTHER')
|
||||
category_name = category.get('category_name', '其他区域')
|
||||
|
||||
# 聚合键
|
||||
key = (stat_date, category_code)
|
||||
|
||||
if key not in aggregated:
|
||||
aggregated[key] = {
|
||||
'stat_date': stat_date,
|
||||
'category_code': category_code,
|
||||
'category_name': category_name,
|
||||
'income_amount': Decimal('0'),
|
||||
'duration_seconds': 0,
|
||||
'order_count': 0,
|
||||
}
|
||||
|
||||
aggregated[key]['income_amount'] += income_amount
|
||||
aggregated[key]['duration_seconds'] += duration_seconds
|
||||
aggregated[key]['order_count'] += order_count
|
||||
|
||||
# 生成记录
|
||||
for key, agg_data in aggregated.items():
|
||||
stat_date = agg_data['stat_date']
|
||||
total_income = daily_totals.get(stat_date, Decimal('1'))
|
||||
income_amount = agg_data['income_amount']
|
||||
|
||||
# 计算占比
|
||||
income_ratio = (income_amount / total_income) if total_income > 0 else Decimal('0')
|
||||
|
||||
records.append({
|
||||
'site_id': site_id,
|
||||
'tenant_id': tenant_id,
|
||||
'stat_date': stat_date,
|
||||
'structure_type': 'AREA',
|
||||
'category_code': agg_data['category_code'],
|
||||
'category_name': agg_data['category_name'],
|
||||
'income_amount': income_amount,
|
||||
'income_ratio': round(income_ratio, 4),
|
||||
'order_count': agg_data['order_count'],
|
||||
'duration_minutes': agg_data['duration_seconds'] // 60,
|
||||
})
|
||||
|
||||
return records
|
||||
|
||||
def _map_area_to_category(
|
||||
self,
|
||||
area_name: str,
|
||||
area_categories: Dict[str, Dict[str, Any]]
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
将区域名称映射到分类
|
||||
|
||||
匹配规则:
|
||||
1. 精确匹配 match_pattern
|
||||
2. 模糊匹配(LIKE)
|
||||
3. 默认返回 OTHER
|
||||
"""
|
||||
if not area_name:
|
||||
return {'category_code': 'OTHER', 'category_name': '其他区域'}
|
||||
|
||||
# 遍历配置查找匹配
|
||||
for pattern, category in area_categories.items():
|
||||
match_type = category.get('match_type', 'exact')
|
||||
|
||||
if match_type == 'exact':
|
||||
if area_name == pattern:
|
||||
return category
|
||||
elif match_type == 'like':
|
||||
# 简单的模糊匹配(包含关系)
|
||||
if pattern.replace('%', '') in area_name:
|
||||
return category
|
||||
|
||||
# 默认分类
|
||||
return {'category_code': 'OTHER', 'category_name': '其他区域'}
|
||||
|
||||
def load(self, records: List[Dict[str, Any]], context: TaskContext) -> Dict[str, Any]:
|
||||
"""
|
||||
加载数据到目标表
|
||||
|
||||
使用幂等方式:delete-before-insert(按日期范围)
|
||||
"""
|
||||
if not records:
|
||||
return {'inserted': 0, 'deleted': 0}
|
||||
|
||||
site_id = context.store_id
|
||||
start_date = context.window_start.date() if hasattr(context.window_start, 'date') else context.window_start
|
||||
end_date = context.window_end.date() if hasattr(context.window_end, 'date') else context.window_end
|
||||
|
||||
# 删除窗口内的旧数据
|
||||
delete_sql = """
|
||||
DELETE FROM billiards_dws.dws_finance_income_structure
|
||||
WHERE site_id = %(site_id)s
|
||||
AND stat_date >= %(start_date)s
|
||||
AND stat_date <= %(end_date)s
|
||||
"""
|
||||
deleted = self.db.execute(delete_sql, {
|
||||
'site_id': site_id,
|
||||
'start_date': start_date,
|
||||
'end_date': end_date,
|
||||
})
|
||||
|
||||
# 批量插入新数据
|
||||
insert_sql = """
|
||||
INSERT INTO billiards_dws.dws_finance_income_structure (
|
||||
site_id, tenant_id, stat_date,
|
||||
structure_type, category_code, category_name,
|
||||
income_amount, income_ratio,
|
||||
order_count, duration_minutes,
|
||||
created_at, updated_at
|
||||
) VALUES (
|
||||
%(site_id)s, %(tenant_id)s, %(stat_date)s,
|
||||
%(structure_type)s, %(category_code)s, %(category_name)s,
|
||||
%(income_amount)s, %(income_ratio)s,
|
||||
%(order_count)s, %(duration_minutes)s,
|
||||
NOW(), NOW()
|
||||
)
|
||||
"""
|
||||
|
||||
inserted = 0
|
||||
for record in records:
|
||||
self.db.execute(insert_sql, record)
|
||||
inserted += 1
|
||||
|
||||
return {
|
||||
'deleted': deleted or 0,
|
||||
'inserted': inserted,
|
||||
}
|
||||
172
etl_billiards/tasks/dws/finance_recharge_task.py
Normal file
172
etl_billiards/tasks/dws/finance_recharge_task.py
Normal file
@@ -0,0 +1,172 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
充值统计任务
|
||||
|
||||
功能说明:
|
||||
以"日期"为粒度,统计充值数据
|
||||
|
||||
数据来源:
|
||||
- dwd_recharge_order: 充值订单
|
||||
- dim_member_card_account: 会员卡账户(余额快照)
|
||||
|
||||
目标表:
|
||||
billiards_dws.dws_finance_recharge_summary
|
||||
|
||||
更新策略:
|
||||
- 更新频率:每日更新
|
||||
- 幂等方式:delete-before-insert(按日期)
|
||||
|
||||
业务规则:
|
||||
- 首充/续充:通过 is_first 字段区分
|
||||
- 现金/赠送:通过 pay_money/gift_money 区分
|
||||
- 卡余额:区分储值卡和赠送卡
|
||||
|
||||
作者:ETL团队
|
||||
创建日期:2026-02-01
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import date, datetime, timedelta
|
||||
from decimal import Decimal
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from .base_dws_task import BaseDwsTask, TaskContext
|
||||
|
||||
|
||||
class FinanceRechargeTask(BaseDwsTask):
|
||||
"""
|
||||
充值统计任务
|
||||
"""
|
||||
|
||||
def get_task_code(self) -> str:
|
||||
return "DWS_FINANCE_RECHARGE"
|
||||
|
||||
def get_target_table(self) -> str:
|
||||
return "dws_finance_recharge_summary"
|
||||
|
||||
def get_primary_keys(self) -> List[str]:
|
||||
return ["site_id", "stat_date"]
|
||||
|
||||
def extract(self, context: TaskContext) -> Dict[str, Any]:
|
||||
start_date = context.window_start.date() if hasattr(context.window_start, 'date') else context.window_start
|
||||
end_date = context.window_end.date() if hasattr(context.window_end, 'date') else context.window_end
|
||||
site_id = context.store_id
|
||||
|
||||
recharge_summary = self._extract_recharge_summary(site_id, start_date, end_date)
|
||||
card_balances = self._extract_card_balances(site_id, end_date)
|
||||
|
||||
return {
|
||||
'recharge_summary': recharge_summary,
|
||||
'card_balances': card_balances,
|
||||
'start_date': start_date,
|
||||
'end_date': end_date,
|
||||
'site_id': site_id
|
||||
}
|
||||
|
||||
def transform(self, extracted: Dict[str, Any], context: TaskContext) -> List[Dict[str, Any]]:
|
||||
recharge_summary = extracted['recharge_summary']
|
||||
card_balances = extracted['card_balances']
|
||||
site_id = extracted['site_id']
|
||||
|
||||
results = []
|
||||
for recharge in recharge_summary:
|
||||
stat_date = recharge.get('stat_date')
|
||||
|
||||
# 获取当日卡余额快照
|
||||
balance = card_balances if stat_date == extracted['end_date'] else {}
|
||||
|
||||
record = {
|
||||
'site_id': site_id,
|
||||
'tenant_id': self.config.get("app.tenant_id", site_id),
|
||||
'stat_date': stat_date,
|
||||
'recharge_count': self.safe_int(recharge.get('recharge_count', 0)),
|
||||
'recharge_total': self.safe_decimal(recharge.get('recharge_total', 0)),
|
||||
'recharge_cash': self.safe_decimal(recharge.get('recharge_cash', 0)),
|
||||
'recharge_gift': self.safe_decimal(recharge.get('recharge_gift', 0)),
|
||||
'first_recharge_count': self.safe_int(recharge.get('first_recharge_count', 0)),
|
||||
'first_recharge_cash': self.safe_decimal(recharge.get('first_recharge_cash', 0)),
|
||||
'first_recharge_gift': self.safe_decimal(recharge.get('first_recharge_gift', 0)),
|
||||
'first_recharge_total': self.safe_decimal(recharge.get('first_recharge_total', 0)),
|
||||
'renewal_count': self.safe_int(recharge.get('renewal_count', 0)),
|
||||
'renewal_cash': self.safe_decimal(recharge.get('renewal_cash', 0)),
|
||||
'renewal_gift': self.safe_decimal(recharge.get('renewal_gift', 0)),
|
||||
'renewal_total': self.safe_decimal(recharge.get('renewal_total', 0)),
|
||||
'recharge_member_count': self.safe_int(recharge.get('recharge_member_count', 0)),
|
||||
'new_member_count': self.safe_int(recharge.get('new_member_count', 0)),
|
||||
'total_card_balance': self.safe_decimal(balance.get('total_balance', 0)),
|
||||
'cash_card_balance': self.safe_decimal(balance.get('cash_balance', 0)),
|
||||
'gift_card_balance': self.safe_decimal(balance.get('gift_balance', 0)),
|
||||
}
|
||||
results.append(record)
|
||||
|
||||
return results
|
||||
|
||||
def load(self, transformed: List[Dict[str, Any]], context: TaskContext) -> Dict:
|
||||
if not transformed:
|
||||
return {"counts": {"fetched": 0, "inserted": 0, "updated": 0, "skipped": 0, "errors": 0}}
|
||||
|
||||
deleted = self.delete_existing_data(context, date_col="stat_date")
|
||||
inserted = self.bulk_insert(transformed)
|
||||
|
||||
return {
|
||||
"counts": {"fetched": len(transformed), "inserted": inserted, "updated": 0, "skipped": 0, "errors": 0},
|
||||
"extra": {"deleted": deleted}
|
||||
}
|
||||
|
||||
def _extract_recharge_summary(self, site_id: int, start_date: date, end_date: date) -> List[Dict[str, Any]]:
|
||||
sql = """
|
||||
SELECT
|
||||
DATE(create_time) AS stat_date,
|
||||
COUNT(*) AS recharge_count,
|
||||
SUM(pay_money + gift_money) AS recharge_total,
|
||||
SUM(pay_money) AS recharge_cash,
|
||||
SUM(gift_money) AS recharge_gift,
|
||||
COUNT(CASE WHEN is_first = 1 THEN 1 END) AS first_recharge_count,
|
||||
SUM(CASE WHEN is_first = 1 THEN pay_money ELSE 0 END) AS first_recharge_cash,
|
||||
SUM(CASE WHEN is_first = 1 THEN gift_money ELSE 0 END) AS first_recharge_gift,
|
||||
SUM(CASE WHEN is_first = 1 THEN pay_money + gift_money ELSE 0 END) AS first_recharge_total,
|
||||
COUNT(CASE WHEN is_first != 1 OR is_first IS NULL THEN 1 END) AS renewal_count,
|
||||
SUM(CASE WHEN is_first != 1 OR is_first IS NULL THEN pay_money ELSE 0 END) AS renewal_cash,
|
||||
SUM(CASE WHEN is_first != 1 OR is_first IS NULL THEN gift_money ELSE 0 END) AS renewal_gift,
|
||||
SUM(CASE WHEN is_first != 1 OR is_first IS NULL THEN pay_money + gift_money ELSE 0 END) AS renewal_total,
|
||||
COUNT(DISTINCT member_id) AS recharge_member_count,
|
||||
COUNT(DISTINCT CASE WHEN is_first = 1 THEN member_id END) AS new_member_count
|
||||
FROM billiards_dwd.dwd_recharge_order
|
||||
WHERE site_id = %s AND DATE(create_time) >= %s AND DATE(create_time) <= %s
|
||||
GROUP BY DATE(create_time)
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, start_date, end_date))
|
||||
return [dict(row) for row in rows] if rows else []
|
||||
|
||||
def _extract_card_balances(self, site_id: int, stat_date: date) -> Dict[str, Decimal]:
|
||||
CASH_CARD_TYPE_ID = 2793249295533893
|
||||
GIFT_CARD_TYPE_IDS = [2791990152417157, 2793266846533445, 2794699703437125]
|
||||
|
||||
sql = """
|
||||
SELECT card_type_id, SUM(balance) AS total_balance
|
||||
FROM billiards_dwd.dim_member_card_account
|
||||
WHERE site_id = %s AND valid_to IS NULL
|
||||
GROUP BY card_type_id
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id,))
|
||||
|
||||
cash_balance = Decimal('0')
|
||||
gift_balance = Decimal('0')
|
||||
|
||||
for row in (rows or []):
|
||||
card_type_id = row['card_type_id']
|
||||
balance = self.safe_decimal(row['total_balance'])
|
||||
if card_type_id == CASH_CARD_TYPE_ID:
|
||||
cash_balance += balance
|
||||
elif card_type_id in GIFT_CARD_TYPE_IDS:
|
||||
gift_balance += balance
|
||||
|
||||
return {
|
||||
'cash_balance': cash_balance,
|
||||
'gift_balance': gift_balance,
|
||||
'total_balance': cash_balance + gift_balance
|
||||
}
|
||||
|
||||
|
||||
__all__ = ['FinanceRechargeTask']
|
||||
16
etl_billiards/tasks/dws/index/__init__.py
Normal file
16
etl_billiards/tasks/dws/index/__init__.py
Normal file
@@ -0,0 +1,16 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
指数算法任务模块
|
||||
|
||||
包含:
|
||||
- RecallIndexTask: 客户召回指数计算任务
|
||||
- IntimacyIndexTask: 客户-助教亲密指数计算任务
|
||||
"""
|
||||
|
||||
from .recall_index_task import RecallIndexTask
|
||||
from .intimacy_index_task import IntimacyIndexTask
|
||||
|
||||
__all__ = [
|
||||
'RecallIndexTask',
|
||||
'IntimacyIndexTask',
|
||||
]
|
||||
518
etl_billiards/tasks/dws/index/base_index_task.py
Normal file
518
etl_billiards/tasks/dws/index/base_index_task.py
Normal file
@@ -0,0 +1,518 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
指数算法任务基类
|
||||
|
||||
功能说明:
|
||||
- 提供半衰期时间衰减函数
|
||||
- 提供分位数计算和分位截断
|
||||
- 提供0-10映射方法
|
||||
- 提供算法参数加载
|
||||
- 提供分位点历史记录(用于EWMA平滑)
|
||||
|
||||
算法原理:
|
||||
1. 时间衰减函数(半衰期模型):decay(d; h) = exp(-ln(2) * d / h)
|
||||
当 d=h 时权重衰减到 0.5,越近权重越大
|
||||
|
||||
2. 0-10映射流程:
|
||||
Raw Score → Winsorize(P5, P95) → [可选Log压缩] → MinMax(0, 10)
|
||||
|
||||
作者:ETL团队
|
||||
创建日期:2026-02-03
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
from abc import abstractmethod
|
||||
from dataclasses import dataclass
|
||||
from datetime import date, datetime
|
||||
from decimal import Decimal
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from ..base_dws_task import BaseDwsTask, TaskContext
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# 数据类定义
|
||||
# =============================================================================
|
||||
|
||||
@dataclass
|
||||
class IndexParameters:
|
||||
"""指数算法参数数据类"""
|
||||
params: Dict[str, float]
|
||||
loaded_at: datetime
|
||||
|
||||
|
||||
@dataclass
|
||||
class PercentileHistory:
|
||||
"""分位点历史记录"""
|
||||
percentile_5: float
|
||||
percentile_95: float
|
||||
percentile_5_smoothed: float
|
||||
percentile_95_smoothed: float
|
||||
record_count: int
|
||||
calc_time: datetime
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# 指数任务基类
|
||||
# =============================================================================
|
||||
|
||||
class BaseIndexTask(BaseDwsTask):
|
||||
"""
|
||||
指数算法任务基类
|
||||
|
||||
提供指数计算通用功能:
|
||||
1. 半衰期时间衰减函数
|
||||
2. 分位数计算与截断
|
||||
3. 0-10归一化映射
|
||||
4. 算法参数加载
|
||||
5. 分位点历史管理(EWMA平滑)
|
||||
"""
|
||||
|
||||
# 子类需要定义的指数类型
|
||||
INDEX_TYPE: str = ""
|
||||
|
||||
# 参数缓存
|
||||
_index_params_cache: Optional[IndexParameters] = None
|
||||
_index_params_ttl: int = 300 # 缓存有效期(秒)
|
||||
|
||||
# 默认参数
|
||||
DEFAULT_LOOKBACK_DAYS = 60
|
||||
DEFAULT_PERCENTILE_LOWER = 5
|
||||
DEFAULT_PERCENTILE_UPPER = 95
|
||||
DEFAULT_EWMA_ALPHA = 0.2
|
||||
|
||||
# ==========================================================================
|
||||
# 抽象方法(子类需实现)
|
||||
# ==========================================================================
|
||||
|
||||
@abstractmethod
|
||||
def get_index_type(self) -> str:
|
||||
"""获取指数类型(RECALL/INTIMACY)"""
|
||||
raise NotImplementedError
|
||||
|
||||
# ==========================================================================
|
||||
# 时间衰减函数
|
||||
# ==========================================================================
|
||||
|
||||
def decay(self, days: float, halflife: float) -> float:
|
||||
"""
|
||||
半衰期衰减函数
|
||||
|
||||
公式: decay(d; h) = exp(-ln(2) * d / h)
|
||||
|
||||
解释:当 d=h 时权重衰减到 0.5;越近权重越大,符合"近期更重要"的直觉
|
||||
|
||||
Args:
|
||||
days: 事件距今天数 (d >= 0)
|
||||
halflife: 半衰期 (h > 0),单位:天
|
||||
|
||||
Returns:
|
||||
衰减后的权重,范围 (0, 1]
|
||||
|
||||
Examples:
|
||||
>>> decay(0, 7) # 今天,权重=1.0
|
||||
1.0
|
||||
>>> decay(7, 7) # 7天前,半衰期=7,权重=0.5
|
||||
0.5
|
||||
>>> decay(14, 7) # 14天前,权重=0.25
|
||||
0.25
|
||||
"""
|
||||
if halflife <= 0:
|
||||
raise ValueError("半衰期必须大于0")
|
||||
if days < 0:
|
||||
days = 0
|
||||
return math.exp(-math.log(2) * days / halflife)
|
||||
|
||||
# ==========================================================================
|
||||
# 分位数计算
|
||||
# ==========================================================================
|
||||
|
||||
def calculate_percentiles(
|
||||
self,
|
||||
scores: List[float],
|
||||
lower: int = 5,
|
||||
upper: int = 95
|
||||
) -> Tuple[float, float]:
|
||||
"""
|
||||
计算分位点
|
||||
|
||||
Args:
|
||||
scores: 分数列表
|
||||
lower: 下分位点百分比(默认5)
|
||||
upper: 上分位点百分比(默认95)
|
||||
|
||||
Returns:
|
||||
(下分位值, 上分位值) 元组
|
||||
"""
|
||||
if not scores:
|
||||
return 0.0, 0.0
|
||||
|
||||
sorted_scores = sorted(scores)
|
||||
n = len(sorted_scores)
|
||||
|
||||
# 计算分位点索引
|
||||
lower_idx = max(0, int(n * lower / 100) - 1)
|
||||
upper_idx = min(n - 1, int(n * upper / 100))
|
||||
|
||||
return sorted_scores[lower_idx], sorted_scores[upper_idx]
|
||||
|
||||
def winsorize(self, value: float, lower: float, upper: float) -> float:
|
||||
"""
|
||||
分位截断(Winsorize)
|
||||
|
||||
将值限制在 [lower, upper] 范围内
|
||||
|
||||
Args:
|
||||
value: 原始值
|
||||
lower: 下限(P5分位)
|
||||
upper: 上限(P95分位)
|
||||
|
||||
Returns:
|
||||
截断后的值
|
||||
"""
|
||||
return min(max(value, lower), upper)
|
||||
|
||||
# ==========================================================================
|
||||
# 0-10映射
|
||||
# ==========================================================================
|
||||
|
||||
def normalize_to_display(
|
||||
self,
|
||||
value: float,
|
||||
min_val: float,
|
||||
max_val: float,
|
||||
use_log: bool = False,
|
||||
epsilon: float = 1e-6
|
||||
) -> float:
|
||||
"""
|
||||
归一化到0-10分
|
||||
|
||||
映射流程:
|
||||
1. [可选] 对数压缩:y = ln(1 + x)
|
||||
2. MinMax映射:score = 10 * (y - min) / (max - min)
|
||||
|
||||
Args:
|
||||
value: 原始值(已Winsorize)
|
||||
min_val: 最小值(通常为P5)
|
||||
max_val: 最大值(通常为P95)
|
||||
use_log: 是否使用对数压缩(亲密指数建议启用)
|
||||
epsilon: 防除零小量
|
||||
|
||||
Returns:
|
||||
0-10范围的分数
|
||||
"""
|
||||
if use_log:
|
||||
value = math.log1p(value)
|
||||
min_val = math.log1p(min_val)
|
||||
max_val = math.log1p(max_val)
|
||||
|
||||
# 防止分母为0
|
||||
range_val = max_val - min_val
|
||||
if range_val < epsilon:
|
||||
return 5.0 # 几乎全员相同时返回中间值
|
||||
|
||||
score = 10.0 * (value - min_val) / range_val
|
||||
|
||||
# 确保在0-10范围内
|
||||
return max(0.0, min(10.0, score))
|
||||
|
||||
def batch_normalize_to_display(
|
||||
self,
|
||||
raw_scores: List[Tuple[Any, float]], # [(entity_id, raw_score), ...]
|
||||
use_log: bool = False,
|
||||
percentile_lower: int = 5,
|
||||
percentile_upper: int = 95,
|
||||
use_smoothing: bool = False,
|
||||
site_id: Optional[int] = None
|
||||
) -> List[Tuple[Any, float, float]]:
|
||||
"""
|
||||
批量归一化Raw Score到Display Score
|
||||
|
||||
流程:
|
||||
1. 提取所有raw_score
|
||||
2. 计算分位点(可选EWMA平滑)
|
||||
3. Winsorize截断
|
||||
4. MinMax映射到0-10
|
||||
|
||||
Args:
|
||||
raw_scores: (entity_id, raw_score) 元组列表
|
||||
use_log: 是否使用对数压缩
|
||||
percentile_lower: 下分位百分比
|
||||
percentile_upper: 上分位百分比
|
||||
use_smoothing: 是否使用EWMA平滑分位点
|
||||
site_id: 门店ID(平滑时需要)
|
||||
|
||||
Returns:
|
||||
(entity_id, raw_score, display_score) 元组列表
|
||||
"""
|
||||
if not raw_scores:
|
||||
return []
|
||||
|
||||
# 提取raw_score
|
||||
scores = [s for _, s in raw_scores]
|
||||
|
||||
# 计算分位点
|
||||
q_l, q_u = self.calculate_percentiles(scores, percentile_lower, percentile_upper)
|
||||
|
||||
# EWMA平滑
|
||||
if use_smoothing and site_id is not None:
|
||||
q_l, q_u = self._apply_ewma_smoothing(site_id, q_l, q_u)
|
||||
|
||||
# 映射
|
||||
results = []
|
||||
for entity_id, raw_score in raw_scores:
|
||||
clipped = self.winsorize(raw_score, q_l, q_u)
|
||||
display = self.normalize_to_display(clipped, q_l, q_u, use_log)
|
||||
results.append((entity_id, raw_score, round(display, 2)))
|
||||
|
||||
return results
|
||||
|
||||
# ==========================================================================
|
||||
# 算法参数加载
|
||||
# ==========================================================================
|
||||
|
||||
def load_index_parameters(
|
||||
self,
|
||||
index_type: Optional[str] = None,
|
||||
force_reload: bool = False
|
||||
) -> Dict[str, float]:
|
||||
"""
|
||||
加载指数算法参数
|
||||
|
||||
Args:
|
||||
index_type: 指数类型(默认使用子类定义的INDEX_TYPE)
|
||||
force_reload: 是否强制重新加载
|
||||
|
||||
Returns:
|
||||
参数名到参数值的字典
|
||||
"""
|
||||
if index_type is None:
|
||||
index_type = self.get_index_type()
|
||||
|
||||
now = datetime.now(self.tz)
|
||||
|
||||
# 检查缓存
|
||||
if (
|
||||
not force_reload
|
||||
and self._index_params_cache is not None
|
||||
and (now - self._index_params_cache.loaded_at).total_seconds() < self._index_params_ttl
|
||||
):
|
||||
return self._index_params_cache.params
|
||||
|
||||
self.logger.debug("加载指数算法参数: %s", index_type)
|
||||
|
||||
sql = """
|
||||
SELECT param_name, param_value
|
||||
FROM billiards_dws.cfg_index_parameters
|
||||
WHERE index_type = %s
|
||||
AND effective_from <= CURRENT_DATE
|
||||
AND (effective_to IS NULL OR effective_to >= CURRENT_DATE)
|
||||
ORDER BY effective_from DESC
|
||||
"""
|
||||
|
||||
rows = self.db.query(sql, (index_type,))
|
||||
|
||||
params = {}
|
||||
seen = set()
|
||||
for row in (rows or []):
|
||||
row_dict = dict(row)
|
||||
name = row_dict['param_name']
|
||||
if name not in seen:
|
||||
params[name] = float(row_dict['param_value'])
|
||||
seen.add(name)
|
||||
|
||||
self._index_params_cache = IndexParameters(
|
||||
params=params,
|
||||
loaded_at=now
|
||||
)
|
||||
|
||||
return params
|
||||
|
||||
def get_param(self, name: str, default: float = 0.0) -> float:
|
||||
"""
|
||||
获取单个参数值
|
||||
|
||||
Args:
|
||||
name: 参数名
|
||||
default: 默认值
|
||||
|
||||
Returns:
|
||||
参数值
|
||||
"""
|
||||
params = self.load_index_parameters()
|
||||
return params.get(name, default)
|
||||
|
||||
# ==========================================================================
|
||||
# 分位点历史管理(EWMA平滑)
|
||||
# ==========================================================================
|
||||
|
||||
def get_last_percentile_history(
|
||||
self,
|
||||
site_id: int,
|
||||
index_type: Optional[str] = None
|
||||
) -> Optional[PercentileHistory]:
|
||||
"""
|
||||
获取最近一次分位点历史
|
||||
|
||||
Args:
|
||||
site_id: 门店ID
|
||||
index_type: 指数类型
|
||||
|
||||
Returns:
|
||||
PercentileHistory 或 None
|
||||
"""
|
||||
if index_type is None:
|
||||
index_type = self.get_index_type()
|
||||
|
||||
sql = """
|
||||
SELECT
|
||||
percentile_5, percentile_95,
|
||||
percentile_5_smoothed, percentile_95_smoothed,
|
||||
record_count, calc_time
|
||||
FROM billiards_dws.dws_index_percentile_history
|
||||
WHERE site_id = %s AND index_type = %s
|
||||
ORDER BY calc_time DESC
|
||||
LIMIT 1
|
||||
"""
|
||||
|
||||
rows = self.db.query(sql, (site_id, index_type))
|
||||
|
||||
if not rows:
|
||||
return None
|
||||
|
||||
row = dict(rows[0])
|
||||
return PercentileHistory(
|
||||
percentile_5=float(row['percentile_5'] or 0),
|
||||
percentile_95=float(row['percentile_95'] or 0),
|
||||
percentile_5_smoothed=float(row['percentile_5_smoothed'] or 0),
|
||||
percentile_95_smoothed=float(row['percentile_95_smoothed'] or 0),
|
||||
record_count=int(row['record_count'] or 0),
|
||||
calc_time=row['calc_time']
|
||||
)
|
||||
|
||||
def save_percentile_history(
|
||||
self,
|
||||
site_id: int,
|
||||
percentile_5: float,
|
||||
percentile_95: float,
|
||||
percentile_5_smoothed: float,
|
||||
percentile_95_smoothed: float,
|
||||
record_count: int,
|
||||
min_raw: float,
|
||||
max_raw: float,
|
||||
avg_raw: float,
|
||||
index_type: Optional[str] = None
|
||||
) -> None:
|
||||
"""
|
||||
保存分位点历史
|
||||
|
||||
Args:
|
||||
site_id: 门店ID
|
||||
percentile_5: 原始5分位
|
||||
percentile_95: 原始95分位
|
||||
percentile_5_smoothed: 平滑后5分位
|
||||
percentile_95_smoothed: 平滑后95分位
|
||||
record_count: 记录数
|
||||
min_raw: 最小Raw Score
|
||||
max_raw: 最大Raw Score
|
||||
avg_raw: 平均Raw Score
|
||||
index_type: 指数类型
|
||||
"""
|
||||
if index_type is None:
|
||||
index_type = self.get_index_type()
|
||||
|
||||
sql = """
|
||||
INSERT INTO billiards_dws.dws_index_percentile_history (
|
||||
site_id, index_type, calc_time,
|
||||
percentile_5, percentile_95,
|
||||
percentile_5_smoothed, percentile_95_smoothed,
|
||||
record_count, min_raw_score, max_raw_score, avg_raw_score
|
||||
) VALUES (%s, %s, NOW(), %s, %s, %s, %s, %s, %s, %s, %s)
|
||||
"""
|
||||
|
||||
with self.db.conn.cursor() as cur:
|
||||
cur.execute(sql, (
|
||||
site_id, index_type,
|
||||
percentile_5, percentile_95,
|
||||
percentile_5_smoothed, percentile_95_smoothed,
|
||||
record_count, min_raw, max_raw, avg_raw
|
||||
))
|
||||
self.db.conn.commit()
|
||||
|
||||
def _apply_ewma_smoothing(
|
||||
self,
|
||||
site_id: int,
|
||||
current_p5: float,
|
||||
current_p95: float,
|
||||
alpha: Optional[float] = None
|
||||
) -> Tuple[float, float]:
|
||||
"""
|
||||
应用EWMA平滑到分位点
|
||||
|
||||
公式: Q_t = (1 - α) * Q_{t-1} + α * Q_now
|
||||
|
||||
Args:
|
||||
site_id: 门店ID
|
||||
current_p5: 当前5分位
|
||||
current_p95: 当前95分位
|
||||
alpha: 平滑系数(默认0.2)
|
||||
|
||||
Returns:
|
||||
(平滑后的P5, 平滑后的P95)
|
||||
"""
|
||||
if alpha is None:
|
||||
alpha = self.get_param('ewma_alpha', self.DEFAULT_EWMA_ALPHA)
|
||||
|
||||
history = self.get_last_percentile_history(site_id)
|
||||
|
||||
if history is None:
|
||||
# 首次计算,不平滑
|
||||
return current_p5, current_p95
|
||||
|
||||
smoothed_p5 = (1 - alpha) * history.percentile_5_smoothed + alpha * current_p5
|
||||
smoothed_p95 = (1 - alpha) * history.percentile_95_smoothed + alpha * current_p95
|
||||
|
||||
return smoothed_p5, smoothed_p95
|
||||
|
||||
# ==========================================================================
|
||||
# 统计工具方法
|
||||
# ==========================================================================
|
||||
|
||||
def calculate_median(self, values: List[float]) -> float:
|
||||
"""计算中位数"""
|
||||
if not values:
|
||||
return 0.0
|
||||
sorted_vals = sorted(values)
|
||||
n = len(sorted_vals)
|
||||
mid = n // 2
|
||||
if n % 2 == 0:
|
||||
return (sorted_vals[mid - 1] + sorted_vals[mid]) / 2
|
||||
return sorted_vals[mid]
|
||||
|
||||
def calculate_mad(self, values: List[float]) -> float:
|
||||
"""
|
||||
计算MAD(中位绝对偏差)
|
||||
|
||||
MAD = median(|x - median(x)|)
|
||||
|
||||
MAD是比标准差更稳健的离散度度量,不受极端值影响
|
||||
"""
|
||||
if not values:
|
||||
return 0.0
|
||||
median_val = self.calculate_median(values)
|
||||
deviations = [abs(v - median_val) for v in values]
|
||||
return self.calculate_median(deviations)
|
||||
|
||||
def safe_log(self, value: float, default: float = 0.0) -> float:
|
||||
"""安全的对数运算"""
|
||||
if value <= 0:
|
||||
return default
|
||||
return math.log(value)
|
||||
|
||||
def safe_ln1p(self, value: float) -> float:
|
||||
"""安全的ln(1+x)运算"""
|
||||
if value < -1:
|
||||
return 0.0
|
||||
return math.log1p(value)
|
||||
688
etl_billiards/tasks/dws/index/intimacy_index_task.py
Normal file
688
etl_billiards/tasks/dws/index/intimacy_index_task.py
Normal file
@@ -0,0 +1,688 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
客户-助教亲密指数计算任务
|
||||
|
||||
功能说明:
|
||||
- 衡量客户与助教的关系强度和近期温度
|
||||
- 用于助教约课精力分配和约课成功率预估
|
||||
- 附加课权重 = 基础课的1.5倍
|
||||
- 检测频率激增并放大权重
|
||||
|
||||
算法公式:
|
||||
Raw Score = (w_F × F + w_R × R + w_M × M + w_D × D) × mult
|
||||
|
||||
其中:
|
||||
- F = Σ(τ_i × decay(d_i, h_sess)) # 频次强度
|
||||
- R = decay(d_last, h_last) # 最近温度
|
||||
- M = Σ(ln(1+amt/A0) × decay(d_r, h_pay)) # 归因充值强度
|
||||
- D = Σ(sqrt(dur/60) × τ × decay(d, h)) # 时长贡献
|
||||
- mult = 1 + γ × burst # 激增放大
|
||||
- burst = max(0, ln(1 + (F_short/F_long - 1)))
|
||||
|
||||
特殊逻辑:
|
||||
- 会话合并:同一客人对同一助教,间隔<4小时算同次服务
|
||||
- 充值归因:服务结束后1小时内的充值算做该助教贡献
|
||||
|
||||
数据来源:
|
||||
- dwd_assistant_service_log: 服务记录
|
||||
- dwd_recharge_order: 充值记录
|
||||
|
||||
更新频率:每4小时
|
||||
|
||||
作者:ETL团队
|
||||
创建日期:2026-02-03
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import date, datetime, timedelta
|
||||
from decimal import Decimal
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from .base_index_task import BaseIndexTask, PercentileHistory
|
||||
from ..base_dws_task import TaskContext
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# 数据类定义
|
||||
# =============================================================================
|
||||
|
||||
@dataclass
|
||||
class ServiceSession:
|
||||
"""合并后的服务会话"""
|
||||
session_start: datetime
|
||||
session_end: datetime
|
||||
total_duration_minutes: int = 0
|
||||
course_weight: float = 1.0 # 1.0=基础课, 1.5=附加课
|
||||
is_incentive: bool = False # 是否为附加课
|
||||
|
||||
|
||||
@dataclass
|
||||
class AttributedRecharge:
|
||||
"""归因充值"""
|
||||
pay_time: datetime
|
||||
pay_amount: float
|
||||
days_ago: float
|
||||
|
||||
|
||||
@dataclass
|
||||
class MemberAssistantIntimacyData:
|
||||
"""客户-助教亲密数据"""
|
||||
member_id: int
|
||||
assistant_no: str # 助教工号(字符串,如 "1", "2", "15")
|
||||
assistant_nickname: str # 助教昵称
|
||||
site_id: int
|
||||
tenant_id: int
|
||||
|
||||
# 计算输入特征
|
||||
session_count: int = 0
|
||||
total_duration_minutes: int = 0
|
||||
basic_session_count: int = 0
|
||||
incentive_session_count: int = 0
|
||||
days_since_last_session: Optional[int] = None
|
||||
attributed_recharge_count: int = 0
|
||||
attributed_recharge_amount: float = 0.0
|
||||
|
||||
# 分项得分
|
||||
score_frequency: float = 0.0
|
||||
score_recency: float = 0.0
|
||||
score_recharge: float = 0.0
|
||||
score_duration: float = 0.0
|
||||
burst_multiplier: float = 1.0
|
||||
|
||||
# 最终分数
|
||||
raw_score: float = 0.0
|
||||
display_score: float = 0.0
|
||||
|
||||
# 中间数据
|
||||
sessions: List[ServiceSession] = field(default_factory=list)
|
||||
recharges: List[AttributedRecharge] = field(default_factory=list)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# 亲密指数任务
|
||||
# =============================================================================
|
||||
|
||||
class IntimacyIndexTask(BaseIndexTask):
|
||||
"""
|
||||
客户-助教亲密指数计算任务
|
||||
|
||||
计算流程:
|
||||
1. 提取近60天的助教服务记录
|
||||
2. 按(member_id, assistant_id)分组,合并4小时内的服务
|
||||
3. 提取归因充值(服务结束后1小时内)
|
||||
4. 计算5项分数(频次、最近、充值、时长、激增)
|
||||
5. 汇总Raw Score
|
||||
6. 分位截断 + Log压缩 + MinMax映射到0-10
|
||||
7. 写入DWS表
|
||||
"""
|
||||
|
||||
INDEX_TYPE = "INTIMACY"
|
||||
|
||||
# 技能ID映射
|
||||
SKILL_ID_BASIC = 2790683529513797 # 基础课
|
||||
SKILL_ID_INCENTIVE = 2790683529513798 # 附加课/激励课
|
||||
SKILL_ID_BOX = 3039912271463941 # 包厢课
|
||||
|
||||
# 默认参数
|
||||
DEFAULT_PARAMS = {
|
||||
'lookback_days': 60,
|
||||
'halflife_session': 14.0,
|
||||
'halflife_last': 10.0,
|
||||
'halflife_recharge': 21.0,
|
||||
'halflife_short': 7.0,
|
||||
'halflife_long': 30.0,
|
||||
'amount_base': 500.0,
|
||||
'incentive_weight': 1.5,
|
||||
'session_merge_hours': 4,
|
||||
'recharge_attribute_hours': 1,
|
||||
'weight_frequency': 2.0,
|
||||
'weight_recency': 1.5,
|
||||
'weight_recharge': 2.0,
|
||||
'weight_duration': 0.5,
|
||||
'burst_gamma': 0.6,
|
||||
'percentile_lower': 5,
|
||||
'percentile_upper': 95,
|
||||
}
|
||||
|
||||
# ==========================================================================
|
||||
# 抽象方法实现
|
||||
# ==========================================================================
|
||||
|
||||
def get_task_code(self) -> str:
|
||||
return "DWS_INTIMACY_INDEX"
|
||||
|
||||
def get_target_table(self) -> str:
|
||||
return "dws_member_assistant_intimacy"
|
||||
|
||||
def get_primary_keys(self) -> List[str]:
|
||||
return ['site_id', 'member_id', 'assistant_id']
|
||||
|
||||
def get_index_type(self) -> str:
|
||||
return self.INDEX_TYPE
|
||||
|
||||
# ==========================================================================
|
||||
# 任务执行
|
||||
# ==========================================================================
|
||||
|
||||
def execute(self, context: Optional[TaskContext]) -> Dict[str, Any]:
|
||||
"""执行亲密指数计算"""
|
||||
self.logger.info("开始计算客户-助教亲密指数")
|
||||
|
||||
# 获取门店ID
|
||||
site_id = self._get_site_id(context)
|
||||
tenant_id = self._get_tenant_id()
|
||||
|
||||
# 加载参数
|
||||
params = self._load_params()
|
||||
lookback_days = int(params['lookback_days'])
|
||||
|
||||
# 计算基准日期和时间
|
||||
now = datetime.now(self.tz)
|
||||
base_date = now.date()
|
||||
start_datetime = now - timedelta(days=lookback_days)
|
||||
|
||||
self.logger.info(
|
||||
"参数: lookback=%d天, h_sess=%.1f, h_last=%.1f, h_pay=%.1f, γ=%.2f",
|
||||
lookback_days, params['halflife_session'], params['halflife_last'],
|
||||
params['halflife_recharge'], params['burst_gamma']
|
||||
)
|
||||
|
||||
# 1. 提取服务记录
|
||||
raw_services = self._extract_service_records(site_id, start_datetime, now)
|
||||
self.logger.info("提取到 %d 条原始服务记录", len(raw_services))
|
||||
|
||||
if not raw_services:
|
||||
self.logger.warning("没有服务记录,跳过计算")
|
||||
return {'status': 'skipped', 'reason': 'no_data'}
|
||||
|
||||
# 2. 按(member_id, assistant_id)分组并合并会话
|
||||
pair_data = self._group_and_merge_sessions(raw_services, params, now)
|
||||
self.logger.info("合并为 %d 个客户-助教对", len(pair_data))
|
||||
|
||||
# 3. 提取归因充值
|
||||
self._extract_attributed_recharges(site_id, pair_data, params, now)
|
||||
|
||||
# 4. 计算每个pair的特征和分数
|
||||
intimacy_data_list: List[MemberAssistantIntimacyData] = []
|
||||
|
||||
for key, data in pair_data.items():
|
||||
data.site_id = site_id
|
||||
data.tenant_id = tenant_id
|
||||
|
||||
# 计算分项得分
|
||||
self._calculate_component_scores(data, params, now)
|
||||
|
||||
# 汇总Raw Score
|
||||
base_score = (
|
||||
params['weight_frequency'] * data.score_frequency +
|
||||
params['weight_recency'] * data.score_recency +
|
||||
params['weight_recharge'] * data.score_recharge +
|
||||
params['weight_duration'] * data.score_duration
|
||||
)
|
||||
data.raw_score = base_score * data.burst_multiplier
|
||||
|
||||
intimacy_data_list.append(data)
|
||||
|
||||
self.logger.info("计算完成 %d 个pair的Raw Score", len(intimacy_data_list))
|
||||
|
||||
# 5. 归一化到Display Score(使用对数压缩)
|
||||
raw_scores = [((d.member_id, d.assistant_no), d.raw_score) for d in intimacy_data_list]
|
||||
normalized = self.batch_normalize_to_display(
|
||||
raw_scores,
|
||||
use_log=True, # 亲密指数建议使用对数压缩
|
||||
percentile_lower=int(params['percentile_lower']),
|
||||
percentile_upper=int(params['percentile_upper']),
|
||||
use_smoothing=True,
|
||||
site_id=site_id
|
||||
)
|
||||
|
||||
# 更新display_score
|
||||
score_map = {key: (raw, display) for key, raw, display in normalized}
|
||||
for data in intimacy_data_list:
|
||||
key = (data.member_id, data.assistant_no)
|
||||
if key in score_map:
|
||||
_, data.display_score = score_map[key]
|
||||
|
||||
# 6. 保存分位点历史
|
||||
if intimacy_data_list:
|
||||
all_raw = [d.raw_score for d in intimacy_data_list]
|
||||
q_l, q_u = self.calculate_percentiles(
|
||||
all_raw,
|
||||
int(params['percentile_lower']),
|
||||
int(params['percentile_upper'])
|
||||
)
|
||||
smoothed_l, smoothed_u = self._apply_ewma_smoothing(site_id, q_l, q_u)
|
||||
|
||||
self.save_percentile_history(
|
||||
site_id=site_id,
|
||||
percentile_5=q_l,
|
||||
percentile_95=q_u,
|
||||
percentile_5_smoothed=smoothed_l,
|
||||
percentile_95_smoothed=smoothed_u,
|
||||
record_count=len(all_raw),
|
||||
min_raw=min(all_raw),
|
||||
max_raw=max(all_raw),
|
||||
avg_raw=sum(all_raw) / len(all_raw)
|
||||
)
|
||||
|
||||
# 7. 写入DWS表
|
||||
inserted = self._save_intimacy_data(intimacy_data_list)
|
||||
|
||||
self.logger.info("亲密指数计算完成,写入 %d 条记录", inserted)
|
||||
|
||||
return {
|
||||
'status': 'success',
|
||||
'pair_count': len(intimacy_data_list),
|
||||
'records_inserted': inserted
|
||||
}
|
||||
|
||||
# ==========================================================================
|
||||
# 数据提取方法
|
||||
# ==========================================================================
|
||||
|
||||
def _extract_service_records(
|
||||
self,
|
||||
site_id: int,
|
||||
start_datetime: datetime,
|
||||
end_datetime: datetime
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
提取服务记录
|
||||
|
||||
注意: 使用 assistant_no (助教工号) 作为助教标识,而不是 site_assistant_id
|
||||
因为 site_assistant_id 在数据中是每次服务的唯一ID,不是助教的唯一标识
|
||||
|
||||
Returns:
|
||||
[{'member_id', 'assistant_no', 'assistant_nickname', 'start_time', 'end_time', 'duration_minutes', 'skill_id'}, ...]
|
||||
"""
|
||||
sql = """
|
||||
SELECT
|
||||
tenant_member_id AS member_id,
|
||||
assistant_no,
|
||||
nickname AS assistant_nickname,
|
||||
start_use_time,
|
||||
last_use_time,
|
||||
COALESCE(income_seconds, 0) / 60 AS duration_minutes,
|
||||
skill_id
|
||||
FROM billiards_dwd.dwd_assistant_service_log
|
||||
WHERE site_id = %s
|
||||
AND tenant_member_id > 0 -- 排除散客
|
||||
AND is_delete = 0
|
||||
AND assistant_no IS NOT NULL -- 确保有助教工号
|
||||
AND last_use_time >= %s
|
||||
AND last_use_time < %s
|
||||
ORDER BY tenant_member_id, assistant_no, start_use_time
|
||||
"""
|
||||
|
||||
rows = self.db.query(sql, (site_id, start_datetime, end_datetime))
|
||||
|
||||
result = []
|
||||
for row in (rows or []):
|
||||
row_dict = dict(row)
|
||||
# 使用 assistant_no 作为助教标识
|
||||
assistant_no = row_dict['assistant_no']
|
||||
if assistant_no:
|
||||
result.append({
|
||||
'member_id': int(row_dict['member_id']),
|
||||
'assistant_no': str(assistant_no), # 助教工号(字符串)
|
||||
'assistant_nickname': row_dict['assistant_nickname'] or '',
|
||||
'start_time': row_dict['start_use_time'],
|
||||
'end_time': row_dict['last_use_time'],
|
||||
'duration_minutes': int(row_dict['duration_minutes'] or 0),
|
||||
'skill_id': int(row_dict['skill_id'] or 0)
|
||||
})
|
||||
|
||||
return result
|
||||
|
||||
def _group_and_merge_sessions(
|
||||
self,
|
||||
raw_services: List[Dict[str, Any]],
|
||||
params: Dict[str, float],
|
||||
now: datetime
|
||||
) -> Dict[Tuple[int, str], MemberAssistantIntimacyData]:
|
||||
"""
|
||||
按(member_id, assistant_no)分组并合并会话
|
||||
|
||||
合并逻辑:同一客人对同一助教,间隔<4小时算同次服务
|
||||
"""
|
||||
merge_threshold_hours = int(params['session_merge_hours'])
|
||||
merge_threshold = timedelta(hours=merge_threshold_hours)
|
||||
incentive_weight = params['incentive_weight']
|
||||
|
||||
pair_data: Dict[Tuple[int, str], MemberAssistantIntimacyData] = {}
|
||||
|
||||
# 按pair分组(使用assistant_no)
|
||||
pair_services: Dict[Tuple[int, str], List[Dict[str, Any]]] = {}
|
||||
for svc in raw_services:
|
||||
key = (svc['member_id'], svc['assistant_no'])
|
||||
if key not in pair_services:
|
||||
pair_services[key] = []
|
||||
pair_services[key].append(svc)
|
||||
|
||||
# 对每个pair合并会话
|
||||
for key, services in pair_services.items():
|
||||
member_id, assistant_no = key
|
||||
# 取第一个服务记录的昵称
|
||||
assistant_nickname = services[0]['assistant_nickname'] if services else ''
|
||||
|
||||
data = MemberAssistantIntimacyData(
|
||||
member_id=member_id,
|
||||
assistant_no=assistant_no,
|
||||
assistant_nickname=assistant_nickname,
|
||||
site_id=0, # 稍后填充
|
||||
tenant_id=0
|
||||
)
|
||||
|
||||
# 按开始时间排序
|
||||
sorted_services = sorted(services, key=lambda x: x['start_time'])
|
||||
|
||||
# 合并会话
|
||||
current_session: Optional[ServiceSession] = None
|
||||
|
||||
for svc in sorted_services:
|
||||
start_time = svc['start_time']
|
||||
end_time = svc['end_time']
|
||||
duration = svc['duration_minutes']
|
||||
skill_id = svc['skill_id']
|
||||
|
||||
# 判断课型
|
||||
is_incentive = (skill_id == self.SKILL_ID_INCENTIVE)
|
||||
weight = incentive_weight if is_incentive else 1.0
|
||||
|
||||
if current_session is None:
|
||||
# 开始新会话
|
||||
current_session = ServiceSession(
|
||||
session_start=start_time,
|
||||
session_end=end_time,
|
||||
total_duration_minutes=duration,
|
||||
course_weight=weight,
|
||||
is_incentive=is_incentive
|
||||
)
|
||||
elif start_time - current_session.session_end <= merge_threshold:
|
||||
# 合并到当前会话
|
||||
current_session.session_end = max(current_session.session_end, end_time)
|
||||
current_session.total_duration_minutes += duration
|
||||
# 同次服务取最高权重
|
||||
current_session.course_weight = max(current_session.course_weight, weight)
|
||||
current_session.is_incentive = current_session.is_incentive or is_incentive
|
||||
else:
|
||||
# 保存当前会话,开始新会话
|
||||
data.sessions.append(current_session)
|
||||
current_session = ServiceSession(
|
||||
session_start=start_time,
|
||||
session_end=end_time,
|
||||
total_duration_minutes=duration,
|
||||
course_weight=weight,
|
||||
is_incentive=is_incentive
|
||||
)
|
||||
|
||||
# 保存最后一个会话
|
||||
if current_session is not None:
|
||||
data.sessions.append(current_session)
|
||||
|
||||
# 统计特征
|
||||
data.session_count = len(data.sessions)
|
||||
data.total_duration_minutes = sum(s.total_duration_minutes for s in data.sessions)
|
||||
data.basic_session_count = sum(1 for s in data.sessions if not s.is_incentive)
|
||||
data.incentive_session_count = sum(1 for s in data.sessions if s.is_incentive)
|
||||
|
||||
# 最近一次服务
|
||||
if data.sessions:
|
||||
last_session = max(data.sessions, key=lambda s: s.session_end)
|
||||
data.days_since_last_session = (now - last_session.session_end).days
|
||||
|
||||
pair_data[key] = data
|
||||
|
||||
return pair_data
|
||||
|
||||
def _extract_attributed_recharges(
|
||||
self,
|
||||
site_id: int,
|
||||
pair_data: Dict[Tuple[int, int], MemberAssistantIntimacyData],
|
||||
params: Dict[str, float],
|
||||
now: datetime
|
||||
) -> None:
|
||||
"""
|
||||
提取归因充值
|
||||
|
||||
归因逻辑:服务结束后1小时内的充值算做该助教贡献
|
||||
"""
|
||||
attribution_hours = int(params['recharge_attribute_hours'])
|
||||
attribution_window = timedelta(hours=attribution_hours)
|
||||
|
||||
# 获取所有相关会员ID
|
||||
member_ids = set(key[0] for key in pair_data.keys())
|
||||
if not member_ids:
|
||||
return
|
||||
|
||||
member_ids_str = ','.join(str(m) for m in member_ids)
|
||||
|
||||
# 查询充值记录
|
||||
sql = f"""
|
||||
SELECT
|
||||
member_id,
|
||||
pay_time,
|
||||
pay_amount
|
||||
FROM billiards_dwd.dwd_recharge_order
|
||||
WHERE site_id = %s
|
||||
AND member_id IN ({member_ids_str})
|
||||
AND settle_type = 5 -- 充值订单
|
||||
AND pay_time >= %s
|
||||
"""
|
||||
|
||||
lookback_days = int(params['lookback_days'])
|
||||
start_datetime = now - timedelta(days=lookback_days)
|
||||
|
||||
rows = self.db.query(sql, (site_id, start_datetime))
|
||||
|
||||
# 为每个充值找到归因助教
|
||||
for row in (rows or []):
|
||||
row_dict = dict(row)
|
||||
member_id = int(row_dict['member_id'])
|
||||
pay_time = row_dict['pay_time']
|
||||
pay_amount = float(row_dict['pay_amount'] or 0)
|
||||
|
||||
if pay_amount <= 0:
|
||||
continue
|
||||
|
||||
# 查找该会员在pay_time前1小时内结束服务的助教
|
||||
for key, data in pair_data.items():
|
||||
if key[0] != member_id:
|
||||
continue
|
||||
|
||||
for session in data.sessions:
|
||||
# 服务结束后1小时内的充值
|
||||
if (session.session_end <= pay_time and
|
||||
pay_time - session.session_end <= attribution_window):
|
||||
# 归因给这个助教
|
||||
data.attributed_recharge_count += 1
|
||||
data.attributed_recharge_amount += pay_amount
|
||||
data.recharges.append(AttributedRecharge(
|
||||
pay_time=pay_time,
|
||||
pay_amount=pay_amount,
|
||||
days_ago=(now - pay_time).total_seconds() / 86400
|
||||
))
|
||||
break # 一笔充值只归因给一个助教
|
||||
|
||||
# ==========================================================================
|
||||
# 分数计算方法
|
||||
# ==========================================================================
|
||||
|
||||
def _calculate_component_scores(
|
||||
self,
|
||||
data: MemberAssistantIntimacyData,
|
||||
params: Dict[str, float],
|
||||
now: datetime
|
||||
) -> None:
|
||||
"""计算5项分数"""
|
||||
epsilon = 1e-6
|
||||
|
||||
h_sess = params['halflife_session']
|
||||
h_last = params['halflife_last']
|
||||
h_pay = params['halflife_recharge']
|
||||
h_short = params['halflife_short']
|
||||
h_long = params['halflife_long']
|
||||
A0 = params['amount_base']
|
||||
gamma = params['burst_gamma']
|
||||
|
||||
# 1. 频次强度 F = Σ(τ_i × decay(d_i, h_sess))
|
||||
F = 0.0
|
||||
for session in data.sessions:
|
||||
days_ago = (now - session.session_end).total_seconds() / 86400
|
||||
F += session.course_weight * self.decay(days_ago, h_sess)
|
||||
data.score_frequency = F
|
||||
|
||||
# 2. 最近温度 R = decay(d_last, h_last)
|
||||
if data.days_since_last_session is not None:
|
||||
data.score_recency = self.decay(data.days_since_last_session, h_last)
|
||||
else:
|
||||
data.score_recency = 0.0
|
||||
|
||||
# 3. 归因充值强度 M = Σ(ln(1+amt/A0) × decay(d_r, h_pay))
|
||||
M = 0.0
|
||||
for recharge in data.recharges:
|
||||
m_amt = math.log1p(recharge.pay_amount / A0)
|
||||
M += m_amt * self.decay(recharge.days_ago, h_pay)
|
||||
data.score_recharge = M
|
||||
|
||||
# 4. 时长贡献 D = Σ(sqrt(dur/60) × τ × decay(d, h_sess))
|
||||
D = 0.0
|
||||
for session in data.sessions:
|
||||
days_ago = (now - session.session_end).total_seconds() / 86400
|
||||
dur_hours = session.total_duration_minutes / 60.0
|
||||
D += math.sqrt(dur_hours) * session.course_weight * self.decay(days_ago, h_sess)
|
||||
data.score_duration = D
|
||||
|
||||
# 5. 频率激增放大 mult = 1 + γ × burst
|
||||
# F_short = Σ(τ × decay(d, h_short))
|
||||
# F_long = Σ(τ × decay(d, h_long))
|
||||
F_short = 0.0
|
||||
F_long = 0.0
|
||||
for session in data.sessions:
|
||||
days_ago = (now - session.session_end).total_seconds() / 86400
|
||||
F_short += session.course_weight * self.decay(days_ago, h_short)
|
||||
F_long += session.course_weight * self.decay(days_ago, h_long)
|
||||
|
||||
# burst = max(0, ln(1 + (F_short/F_long - 1)))
|
||||
ratio = F_short / (F_long + epsilon)
|
||||
if ratio > 1:
|
||||
burst = self.safe_ln1p(ratio - 1)
|
||||
else:
|
||||
burst = 0.0
|
||||
|
||||
data.burst_multiplier = 1 + gamma * burst
|
||||
|
||||
# ==========================================================================
|
||||
# 数据保存方法
|
||||
# ==========================================================================
|
||||
|
||||
def _save_intimacy_data(self, data_list: List[MemberAssistantIntimacyData]) -> int:
|
||||
"""保存亲密数据到DWS表"""
|
||||
if not data_list:
|
||||
return 0
|
||||
|
||||
# 先删除已存在的记录
|
||||
site_id = data_list[0].site_id
|
||||
|
||||
# 构建删除条件(使用assistant_no)
|
||||
# 注意:assistant_id字段在数据库中存储assistant_no的整数形式
|
||||
keys = [(d.member_id, d.assistant_no) for d in data_list]
|
||||
conditions = " OR ".join(
|
||||
f"(member_id = {m} AND assistant_id = {int(a)})" for m, a in keys
|
||||
)
|
||||
|
||||
delete_sql = f"""
|
||||
DELETE FROM billiards_dws.dws_member_assistant_intimacy
|
||||
WHERE site_id = %s AND ({conditions})
|
||||
"""
|
||||
|
||||
with self.db.conn.cursor() as cur:
|
||||
cur.execute(delete_sql, (site_id,))
|
||||
|
||||
# 插入新记录
|
||||
# 使用assistant_no的整数值作为assistant_id
|
||||
insert_sql = """
|
||||
INSERT INTO billiards_dws.dws_member_assistant_intimacy (
|
||||
site_id, tenant_id, member_id, assistant_id,
|
||||
session_count, total_duration_minutes,
|
||||
basic_session_count, incentive_session_count,
|
||||
days_since_last_session,
|
||||
attributed_recharge_count, attributed_recharge_amount,
|
||||
score_frequency, score_recency, score_recharge, score_duration,
|
||||
burst_multiplier, raw_score, display_score,
|
||||
calc_time, created_at, updated_at
|
||||
) VALUES (
|
||||
%s, %s, %s, %s,
|
||||
%s, %s,
|
||||
%s, %s,
|
||||
%s,
|
||||
%s, %s,
|
||||
%s, %s, %s, %s,
|
||||
%s, %s, %s,
|
||||
NOW(), NOW(), NOW()
|
||||
)
|
||||
"""
|
||||
|
||||
inserted = 0
|
||||
with self.db.conn.cursor() as cur:
|
||||
for data in data_list:
|
||||
# 将assistant_no转为整数作为assistant_id
|
||||
assistant_id = int(data.assistant_no) if data.assistant_no.isdigit() else 0
|
||||
cur.execute(insert_sql, (
|
||||
data.site_id, data.tenant_id, data.member_id, assistant_id,
|
||||
data.session_count, data.total_duration_minutes,
|
||||
data.basic_session_count, data.incentive_session_count,
|
||||
data.days_since_last_session,
|
||||
data.attributed_recharge_count, data.attributed_recharge_amount,
|
||||
data.score_frequency, data.score_recency, data.score_recharge, data.score_duration,
|
||||
data.burst_multiplier, data.raw_score, data.display_score
|
||||
))
|
||||
inserted += cur.rowcount
|
||||
|
||||
# 提交事务
|
||||
self.db.conn.commit()
|
||||
|
||||
return inserted
|
||||
|
||||
# ==========================================================================
|
||||
# 辅助方法
|
||||
# ==========================================================================
|
||||
|
||||
def _load_params(self) -> Dict[str, float]:
|
||||
"""加载参数,缺失时使用默认值"""
|
||||
params = self.load_index_parameters()
|
||||
result = dict(self.DEFAULT_PARAMS)
|
||||
result.update(params)
|
||||
return result
|
||||
|
||||
def _get_site_id(self, context: Optional[TaskContext]) -> int:
|
||||
"""获取门店ID"""
|
||||
if context and hasattr(context, 'store_id') and context.store_id:
|
||||
return context.store_id
|
||||
|
||||
site_id = self.config.get('app.default_site_id')
|
||||
if site_id:
|
||||
return int(site_id)
|
||||
|
||||
sql = "SELECT DISTINCT site_id FROM billiards_dwd.dwd_assistant_service_log LIMIT 1"
|
||||
rows = self.db.query(sql)
|
||||
if rows:
|
||||
return int(dict(rows[0])['site_id'])
|
||||
|
||||
raise ValueError("无法确定门店ID")
|
||||
|
||||
def _get_tenant_id(self) -> int:
|
||||
"""获取租户ID"""
|
||||
tenant_id = self.config.get('app.tenant_id')
|
||||
if tenant_id:
|
||||
return int(tenant_id)
|
||||
|
||||
sql = "SELECT DISTINCT tenant_id FROM billiards_dwd.dwd_assistant_service_log LIMIT 1"
|
||||
rows = self.db.query(sql)
|
||||
if rows:
|
||||
return int(dict(rows[0])['tenant_id'])
|
||||
|
||||
return 0
|
||||
564
etl_billiards/tasks/dws/index/recall_index_task.py
Normal file
564
etl_billiards/tasks/dws/index/recall_index_task.py
Normal file
@@ -0,0 +1,564 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
客户召回指数计算任务
|
||||
|
||||
功能说明:
|
||||
- 衡量客户召回的必要性和紧急程度
|
||||
- 尊重客户个人到店周期(μ=中位数, σ=MAD)
|
||||
- 对新客户、刚充值客户增加召回倾向
|
||||
- 检测"热了又断"的情况
|
||||
|
||||
算法公式:
|
||||
Raw Score = w_over × overdue + w_new × new_bonus + w_re × re_bonus + w_hot × hot_drop
|
||||
|
||||
其中:
|
||||
- overdue = 1 - exp(-max(0, (t-μ)/σ)) # 超期紧急性
|
||||
- new_bonus = decay(d_first, h_new) # 新客户加分
|
||||
- re_bonus = decay(d_recharge, h_re) # 刚充值加分
|
||||
- hot_drop = max(0, ln(1 + (r14/r60 - 1))) # 热度断档加分
|
||||
|
||||
数据来源:
|
||||
- dwd_settlement_head: 会员到店记录
|
||||
- dwd_recharge_order: 充值记录
|
||||
- dim_member: 首访时间
|
||||
|
||||
更新频率:每2小时
|
||||
|
||||
作者:ETL团队
|
||||
创建日期:2026-02-03
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
from dataclasses import dataclass
|
||||
from datetime import date, datetime, timedelta
|
||||
from decimal import Decimal
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from .base_index_task import BaseIndexTask, PercentileHistory
|
||||
from ..base_dws_task import TaskContext
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# 数据类定义
|
||||
# =============================================================================
|
||||
|
||||
@dataclass
|
||||
class MemberRecallData:
|
||||
"""会员召回数据"""
|
||||
member_id: int
|
||||
site_id: int
|
||||
tenant_id: int
|
||||
|
||||
# 计算输入特征
|
||||
days_since_last_visit: Optional[int] = None
|
||||
visit_interval_median: Optional[float] = None
|
||||
visit_interval_mad: Optional[float] = None
|
||||
days_since_first_visit: Optional[int] = None
|
||||
days_since_last_recharge: Optional[int] = None
|
||||
visits_last_14_days: int = 0
|
||||
visits_last_60_days: int = 0
|
||||
|
||||
# 分项得分
|
||||
score_overdue: float = 0.0
|
||||
score_new_bonus: float = 0.0
|
||||
score_recharge_bonus: float = 0.0
|
||||
score_hot_drop: float = 0.0
|
||||
|
||||
# 最终分数
|
||||
raw_score: float = 0.0
|
||||
display_score: float = 0.0
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# 召回指数任务
|
||||
# =============================================================================
|
||||
|
||||
class RecallIndexTask(BaseIndexTask):
|
||||
"""
|
||||
客户召回指数计算任务
|
||||
|
||||
计算流程:
|
||||
1. 提取近60天有到店记录的会员
|
||||
2. 计算每个会员的到店间隔特征(中位数、MAD)
|
||||
3. 计算4项分数(超期、新客、充值、热度断档)
|
||||
4. 汇总Raw Score
|
||||
5. 分位截断 + MinMax映射到0-10
|
||||
6. 写入DWS表
|
||||
"""
|
||||
|
||||
INDEX_TYPE = "RECALL"
|
||||
|
||||
# 默认参数
|
||||
DEFAULT_PARAMS = {
|
||||
'lookback_days': 60,
|
||||
'sigma_min': 2.0,
|
||||
'halflife_new': 7.0,
|
||||
'halflife_recharge': 10.0,
|
||||
'weight_overdue': 3.0,
|
||||
'weight_new': 1.0,
|
||||
'weight_recharge': 1.0,
|
||||
'weight_hot': 1.0,
|
||||
'percentile_lower': 5,
|
||||
'percentile_upper': 95,
|
||||
}
|
||||
|
||||
# ==========================================================================
|
||||
# 抽象方法实现
|
||||
# ==========================================================================
|
||||
|
||||
def get_task_code(self) -> str:
|
||||
return "DWS_RECALL_INDEX"
|
||||
|
||||
def get_target_table(self) -> str:
|
||||
return "dws_member_recall_index"
|
||||
|
||||
def get_primary_keys(self) -> List[str]:
|
||||
return ['site_id', 'member_id']
|
||||
|
||||
def get_index_type(self) -> str:
|
||||
return self.INDEX_TYPE
|
||||
|
||||
# ==========================================================================
|
||||
# 任务执行
|
||||
# ==========================================================================
|
||||
|
||||
def execute(self, context: Optional[TaskContext]) -> Dict[str, Any]:
|
||||
"""执行召回指数计算"""
|
||||
self.logger.info("开始计算客户召回指数")
|
||||
|
||||
# 获取门店ID
|
||||
site_id = self._get_site_id(context)
|
||||
tenant_id = self._get_tenant_id()
|
||||
|
||||
# 加载参数
|
||||
params = self._load_params()
|
||||
lookback_days = int(params['lookback_days'])
|
||||
|
||||
# 计算基准日期
|
||||
base_date = date.today()
|
||||
start_date = base_date - timedelta(days=lookback_days)
|
||||
|
||||
self.logger.info(
|
||||
"参数: lookback=%d天, sigma_min=%.1f, h_new=%.1f, h_re=%.1f",
|
||||
lookback_days, params['sigma_min'], params['halflife_new'], params['halflife_recharge']
|
||||
)
|
||||
|
||||
# 1. 提取会员到店数据
|
||||
member_visits = self._extract_member_visits(site_id, start_date, base_date)
|
||||
self.logger.info("提取到 %d 个会员的到店记录", len(member_visits))
|
||||
|
||||
if not member_visits:
|
||||
self.logger.warning("没有会员到店记录,跳过计算")
|
||||
return {'status': 'skipped', 'reason': 'no_data'}
|
||||
|
||||
# 2. 提取充值记录
|
||||
recharge_data = self._extract_recharge_data(site_id, start_date, base_date)
|
||||
self.logger.info("提取到 %d 个会员的充值记录", len(recharge_data))
|
||||
|
||||
# 3. 提取首访时间
|
||||
first_visit_data = self._extract_first_visit_data(site_id, list(member_visits.keys()))
|
||||
self.logger.info("提取到 %d 个会员的首访时间", len(first_visit_data))
|
||||
|
||||
# 4. 计算每个会员的召回数据
|
||||
recall_data_list: List[MemberRecallData] = []
|
||||
|
||||
for member_id, visit_dates in member_visits.items():
|
||||
data = MemberRecallData(
|
||||
member_id=member_id,
|
||||
site_id=site_id,
|
||||
tenant_id=tenant_id
|
||||
)
|
||||
|
||||
# 计算特征
|
||||
self._calculate_visit_features(data, visit_dates, base_date, params)
|
||||
|
||||
# 补充充值特征
|
||||
if member_id in recharge_data:
|
||||
last_recharge_date = recharge_data[member_id]
|
||||
data.days_since_last_recharge = (base_date - last_recharge_date).days
|
||||
|
||||
# 补充首访特征
|
||||
if member_id in first_visit_data:
|
||||
first_visit_date = first_visit_data[member_id]
|
||||
data.days_since_first_visit = (base_date - first_visit_date).days
|
||||
|
||||
# 计算分项得分
|
||||
self._calculate_component_scores(data, params)
|
||||
|
||||
# 汇总Raw Score
|
||||
data.raw_score = (
|
||||
params['weight_overdue'] * data.score_overdue +
|
||||
params['weight_new'] * data.score_new_bonus +
|
||||
params['weight_recharge'] * data.score_recharge_bonus +
|
||||
params['weight_hot'] * data.score_hot_drop
|
||||
)
|
||||
|
||||
recall_data_list.append(data)
|
||||
|
||||
self.logger.info("计算完成 %d 个会员的Raw Score", len(recall_data_list))
|
||||
|
||||
# 5. 归一化到Display Score
|
||||
raw_scores = [(d.member_id, d.raw_score) for d in recall_data_list]
|
||||
normalized = self.batch_normalize_to_display(
|
||||
raw_scores,
|
||||
use_log=False,
|
||||
percentile_lower=int(params['percentile_lower']),
|
||||
percentile_upper=int(params['percentile_upper']),
|
||||
use_smoothing=True,
|
||||
site_id=site_id
|
||||
)
|
||||
|
||||
# 更新display_score
|
||||
score_map = {member_id: (raw, display) for member_id, raw, display in normalized}
|
||||
for data in recall_data_list:
|
||||
if data.member_id in score_map:
|
||||
_, data.display_score = score_map[data.member_id]
|
||||
|
||||
# 6. 保存分位点历史
|
||||
if recall_data_list:
|
||||
all_raw = [d.raw_score for d in recall_data_list]
|
||||
q_l, q_u = self.calculate_percentiles(
|
||||
all_raw,
|
||||
int(params['percentile_lower']),
|
||||
int(params['percentile_upper'])
|
||||
)
|
||||
smoothed_l, smoothed_u = self._apply_ewma_smoothing(site_id, q_l, q_u)
|
||||
|
||||
self.save_percentile_history(
|
||||
site_id=site_id,
|
||||
percentile_5=q_l,
|
||||
percentile_95=q_u,
|
||||
percentile_5_smoothed=smoothed_l,
|
||||
percentile_95_smoothed=smoothed_u,
|
||||
record_count=len(all_raw),
|
||||
min_raw=min(all_raw),
|
||||
max_raw=max(all_raw),
|
||||
avg_raw=sum(all_raw) / len(all_raw)
|
||||
)
|
||||
|
||||
# 7. 写入DWS表
|
||||
inserted = self._save_recall_data(recall_data_list)
|
||||
|
||||
self.logger.info("召回指数计算完成,写入 %d 条记录", inserted)
|
||||
|
||||
return {
|
||||
'status': 'success',
|
||||
'member_count': len(recall_data_list),
|
||||
'records_inserted': inserted
|
||||
}
|
||||
|
||||
# ==========================================================================
|
||||
# 数据提取方法
|
||||
# ==========================================================================
|
||||
|
||||
def _extract_member_visits(
|
||||
self,
|
||||
site_id: int,
|
||||
start_date: date,
|
||||
end_date: date
|
||||
) -> Dict[int, List[date]]:
|
||||
"""
|
||||
提取会员到店记录
|
||||
|
||||
Returns:
|
||||
{member_id: [visit_date1, visit_date2, ...]}
|
||||
"""
|
||||
sql = """
|
||||
SELECT
|
||||
member_id,
|
||||
DATE(pay_time) AS visit_date
|
||||
FROM billiards_dwd.dwd_settlement_head
|
||||
WHERE site_id = %s
|
||||
AND member_id > 0 -- 排除散客
|
||||
AND settle_type = 1 -- 台桌结账
|
||||
AND pay_time >= %s
|
||||
AND pay_time < %s + INTERVAL '1 day'
|
||||
GROUP BY member_id, DATE(pay_time)
|
||||
ORDER BY member_id, visit_date
|
||||
"""
|
||||
|
||||
rows = self.db.query(sql, (site_id, start_date, end_date))
|
||||
|
||||
result: Dict[int, List[date]] = {}
|
||||
for row in (rows or []):
|
||||
row_dict = dict(row)
|
||||
member_id = int(row_dict['member_id'])
|
||||
visit_date = row_dict['visit_date']
|
||||
|
||||
if member_id not in result:
|
||||
result[member_id] = []
|
||||
result[member_id].append(visit_date)
|
||||
|
||||
return result
|
||||
|
||||
def _extract_recharge_data(
|
||||
self,
|
||||
site_id: int,
|
||||
start_date: date,
|
||||
end_date: date
|
||||
) -> Dict[int, date]:
|
||||
"""
|
||||
提取最近充值记录
|
||||
|
||||
Returns:
|
||||
{member_id: last_recharge_date}
|
||||
"""
|
||||
sql = """
|
||||
SELECT
|
||||
member_id,
|
||||
MAX(DATE(pay_time)) AS last_recharge_date
|
||||
FROM billiards_dwd.dwd_recharge_order
|
||||
WHERE site_id = %s
|
||||
AND member_id > 0
|
||||
AND settle_type = 5 -- 充值订单
|
||||
AND pay_time >= %s
|
||||
AND pay_time < %s + INTERVAL '1 day'
|
||||
GROUP BY member_id
|
||||
"""
|
||||
|
||||
rows = self.db.query(sql, (site_id, start_date, end_date))
|
||||
|
||||
result: Dict[int, date] = {}
|
||||
for row in (rows or []):
|
||||
row_dict = dict(row)
|
||||
result[int(row_dict['member_id'])] = row_dict['last_recharge_date']
|
||||
|
||||
return result
|
||||
|
||||
def _extract_first_visit_data(
|
||||
self,
|
||||
site_id: int,
|
||||
member_ids: List[int]
|
||||
) -> Dict[int, date]:
|
||||
"""
|
||||
提取首访时间
|
||||
|
||||
优先使用dim_member.create_time,如果没有则使用dwd_settlement_head中的首次消费时间
|
||||
|
||||
Returns:
|
||||
{member_id: first_visit_date}
|
||||
"""
|
||||
if not member_ids:
|
||||
return {}
|
||||
|
||||
# 使用dim_member的create_time作为首访时间
|
||||
member_ids_str = ','.join(str(m) for m in member_ids)
|
||||
sql = f"""
|
||||
SELECT
|
||||
member_id,
|
||||
DATE(create_time) AS first_visit_date
|
||||
FROM billiards_dwd.dim_member
|
||||
WHERE member_id IN ({member_ids_str})
|
||||
AND scd2_is_current = 1
|
||||
"""
|
||||
|
||||
rows = self.db.query(sql)
|
||||
|
||||
result: Dict[int, date] = {}
|
||||
for row in (rows or []):
|
||||
row_dict = dict(row)
|
||||
member_id = int(row_dict['member_id'])
|
||||
first_date = row_dict['first_visit_date']
|
||||
if first_date:
|
||||
result[member_id] = first_date
|
||||
|
||||
return result
|
||||
|
||||
# ==========================================================================
|
||||
# 特征计算方法
|
||||
# ==========================================================================
|
||||
|
||||
def _calculate_visit_features(
|
||||
self,
|
||||
data: MemberRecallData,
|
||||
visit_dates: List[date],
|
||||
base_date: date,
|
||||
params: Dict[str, float]
|
||||
) -> None:
|
||||
"""计算到店特征"""
|
||||
if not visit_dates:
|
||||
return
|
||||
|
||||
# 最近一次到店
|
||||
last_visit = max(visit_dates)
|
||||
data.days_since_last_visit = (base_date - last_visit).days
|
||||
|
||||
# 到店间隔
|
||||
sorted_dates = sorted(visit_dates)
|
||||
intervals = []
|
||||
for i in range(1, len(sorted_dates)):
|
||||
interval = (sorted_dates[i] - sorted_dates[i-1]).days
|
||||
intervals.append(float(interval))
|
||||
|
||||
if intervals:
|
||||
# 中位数(μ)
|
||||
data.visit_interval_median = self.calculate_median(intervals)
|
||||
|
||||
# MAD(σ),下限为sigma_min
|
||||
mad = self.calculate_mad(intervals)
|
||||
data.visit_interval_mad = max(mad, params['sigma_min'])
|
||||
else:
|
||||
# 只有一次到店,使用默认值
|
||||
data.visit_interval_median = 7.0 # 默认周期7天
|
||||
data.visit_interval_mad = params['sigma_min']
|
||||
|
||||
# 近14天/60天到店次数
|
||||
days_14_ago = base_date - timedelta(days=14)
|
||||
days_60_ago = base_date - timedelta(days=60)
|
||||
|
||||
data.visits_last_14_days = sum(1 for d in visit_dates if d >= days_14_ago)
|
||||
data.visits_last_60_days = sum(1 for d in visit_dates if d >= days_60_ago)
|
||||
|
||||
def _calculate_component_scores(
|
||||
self,
|
||||
data: MemberRecallData,
|
||||
params: Dict[str, float]
|
||||
) -> None:
|
||||
"""计算4项分数"""
|
||||
|
||||
# 1. 超期紧急性
|
||||
if data.days_since_last_visit is not None and data.visit_interval_median is not None:
|
||||
t = data.days_since_last_visit
|
||||
mu = data.visit_interval_median
|
||||
sigma = data.visit_interval_mad or params['sigma_min']
|
||||
|
||||
# z = max(0, (t - μ) / σ)
|
||||
z = max(0.0, (t - mu) / sigma)
|
||||
# overdue = 1 - exp(-z)
|
||||
data.score_overdue = 1.0 - math.exp(-z)
|
||||
|
||||
# 2. 新客户加分
|
||||
lookback_days = int(params['lookback_days'])
|
||||
if data.days_since_first_visit is not None and data.days_since_first_visit <= lookback_days:
|
||||
data.score_new_bonus = self.decay(
|
||||
data.days_since_first_visit,
|
||||
params['halflife_new']
|
||||
)
|
||||
|
||||
# 3. 刚充值加分
|
||||
if data.days_since_last_recharge is not None and data.days_since_last_recharge <= lookback_days:
|
||||
data.score_recharge_bonus = self.decay(
|
||||
data.days_since_last_recharge,
|
||||
params['halflife_recharge']
|
||||
)
|
||||
|
||||
# 4. 热度断档加分
|
||||
epsilon = 1e-6
|
||||
n14 = data.visits_last_14_days
|
||||
n60 = data.visits_last_60_days
|
||||
|
||||
r14 = n14 / 14.0
|
||||
r60 = (n60 + 1) / 60.0 # +1 平滑
|
||||
|
||||
hot_ratio = r14 / (r60 + epsilon)
|
||||
|
||||
# hot_drop = max(0, ln(1 + (hot_ratio - 1)))
|
||||
if hot_ratio > 1:
|
||||
data.score_hot_drop = self.safe_ln1p(hot_ratio - 1)
|
||||
else:
|
||||
data.score_hot_drop = 0.0
|
||||
|
||||
# ==========================================================================
|
||||
# 数据保存方法
|
||||
# ==========================================================================
|
||||
|
||||
def _save_recall_data(self, data_list: List[MemberRecallData]) -> int:
|
||||
"""保存召回数据到DWS表"""
|
||||
if not data_list:
|
||||
return 0
|
||||
|
||||
# 先删除已存在的记录
|
||||
site_id = data_list[0].site_id
|
||||
member_ids = [d.member_id for d in data_list]
|
||||
|
||||
member_ids_str = ','.join(str(m) for m in member_ids)
|
||||
delete_sql = f"""
|
||||
DELETE FROM billiards_dws.dws_member_recall_index
|
||||
WHERE site_id = %s AND member_id IN ({member_ids_str})
|
||||
"""
|
||||
|
||||
with self.db.conn.cursor() as cur:
|
||||
cur.execute(delete_sql, (site_id,))
|
||||
|
||||
# 插入新记录
|
||||
insert_sql = """
|
||||
INSERT INTO billiards_dws.dws_member_recall_index (
|
||||
site_id, tenant_id, member_id,
|
||||
days_since_last_visit, visit_interval_median, visit_interval_mad,
|
||||
days_since_first_visit, days_since_last_recharge,
|
||||
visits_last_14_days, visits_last_60_days,
|
||||
score_overdue, score_new_bonus, score_recharge_bonus, score_hot_drop,
|
||||
raw_score, display_score,
|
||||
calc_time, created_at, updated_at
|
||||
) VALUES (
|
||||
%s, %s, %s,
|
||||
%s, %s, %s,
|
||||
%s, %s,
|
||||
%s, %s,
|
||||
%s, %s, %s, %s,
|
||||
%s, %s,
|
||||
NOW(), NOW(), NOW()
|
||||
)
|
||||
"""
|
||||
|
||||
inserted = 0
|
||||
with self.db.conn.cursor() as cur:
|
||||
for data in data_list:
|
||||
cur.execute(insert_sql, (
|
||||
data.site_id, data.tenant_id, data.member_id,
|
||||
data.days_since_last_visit, data.visit_interval_median, data.visit_interval_mad,
|
||||
data.days_since_first_visit, data.days_since_last_recharge,
|
||||
data.visits_last_14_days, data.visits_last_60_days,
|
||||
data.score_overdue, data.score_new_bonus, data.score_recharge_bonus, data.score_hot_drop,
|
||||
data.raw_score, data.display_score
|
||||
))
|
||||
inserted += cur.rowcount
|
||||
|
||||
# 提交事务
|
||||
self.db.conn.commit()
|
||||
|
||||
return inserted
|
||||
|
||||
# ==========================================================================
|
||||
# 辅助方法
|
||||
# ==========================================================================
|
||||
|
||||
def _load_params(self) -> Dict[str, float]:
|
||||
"""加载参数,缺失时使用默认值"""
|
||||
params = self.load_index_parameters()
|
||||
result = dict(self.DEFAULT_PARAMS)
|
||||
result.update(params)
|
||||
return result
|
||||
|
||||
def _get_site_id(self, context: Optional[TaskContext]) -> int:
|
||||
"""获取门店ID"""
|
||||
if context and hasattr(context, 'store_id') and context.store_id:
|
||||
return context.store_id
|
||||
|
||||
# 从配置获取默认门店ID
|
||||
site_id = self.config.get('app.default_site_id')
|
||||
if site_id:
|
||||
return int(site_id)
|
||||
|
||||
# 查询数据库获取第一个门店
|
||||
sql = "SELECT DISTINCT site_id FROM billiards_dwd.dwd_settlement_head LIMIT 1"
|
||||
rows = self.db.query(sql)
|
||||
if rows:
|
||||
return int(dict(rows[0])['site_id'])
|
||||
|
||||
raise ValueError("无法确定门店ID")
|
||||
|
||||
def _get_tenant_id(self) -> int:
|
||||
"""获取租户ID"""
|
||||
tenant_id = self.config.get('app.tenant_id')
|
||||
if tenant_id:
|
||||
return int(tenant_id)
|
||||
|
||||
sql = "SELECT DISTINCT tenant_id FROM billiards_dwd.dwd_settlement_head LIMIT 1"
|
||||
rows = self.db.query(sql)
|
||||
if rows:
|
||||
return int(dict(rows[0])['tenant_id'])
|
||||
|
||||
return 0
|
||||
368
etl_billiards/tasks/dws/member_consumption_task.py
Normal file
368
etl_billiards/tasks/dws/member_consumption_task.py
Normal file
@@ -0,0 +1,368 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
会员消费汇总任务
|
||||
|
||||
功能说明:
|
||||
以"会员"为粒度,统计消费行为和滚动窗口指标
|
||||
|
||||
数据来源:
|
||||
- dwd_settlement_head: 结账单头表
|
||||
- dim_member: 会员维度
|
||||
- dim_member_card_account: 会员卡账户
|
||||
|
||||
目标表:
|
||||
billiards_dws.dws_member_consumption_summary
|
||||
|
||||
更新策略:
|
||||
- 更新频率:每日更新
|
||||
- 幂等方式:delete-before-insert(按统计日期)
|
||||
|
||||
业务规则:
|
||||
- 散客处理:member_id=0 不进入此表
|
||||
- 滚动窗口:7/10/15/30/60/90天
|
||||
- 卡余额:区分储值卡(现金卡)和赠送卡
|
||||
|
||||
作者:ETL团队
|
||||
创建日期:2026-02-01
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import date, datetime, timedelta
|
||||
from decimal import Decimal
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple
|
||||
|
||||
from .base_dws_task import BaseDwsTask, TaskContext
|
||||
|
||||
|
||||
class MemberConsumptionTask(BaseDwsTask):
|
||||
"""
|
||||
会员消费汇总任务
|
||||
|
||||
统计每个会员的:
|
||||
- 首次/最近消费日期
|
||||
- 累计消费统计
|
||||
- 滚动窗口统计(7/10/15/30/60/90天)
|
||||
- 卡余额快照
|
||||
- 活跃度指标和客户分层
|
||||
"""
|
||||
|
||||
def get_task_code(self) -> str:
|
||||
return "DWS_MEMBER_CONSUMPTION"
|
||||
|
||||
def get_target_table(self) -> str:
|
||||
return "dws_member_consumption_summary"
|
||||
|
||||
def get_primary_keys(self) -> List[str]:
|
||||
return ["site_id", "member_id", "stat_date"]
|
||||
|
||||
# ==========================================================================
|
||||
# ETL主流程
|
||||
# ==========================================================================
|
||||
|
||||
def extract(self, context: TaskContext) -> Dict[str, Any]:
|
||||
"""
|
||||
提取数据
|
||||
"""
|
||||
stat_date = context.window_end.date() if hasattr(context.window_end, 'date') else context.window_end
|
||||
site_id = context.store_id
|
||||
|
||||
self.logger.info(
|
||||
"%s: 提取数据,统计日期 %s",
|
||||
self.get_task_code(), stat_date
|
||||
)
|
||||
|
||||
# 1. 获取会员消费统计(含滚动窗口)
|
||||
consumption_stats = self._extract_consumption_stats(site_id, stat_date)
|
||||
|
||||
# 2. 获取会员信息
|
||||
member_info = self._extract_member_info(site_id)
|
||||
|
||||
# 3. 获取会员卡余额
|
||||
card_balances = self._extract_card_balances(site_id)
|
||||
|
||||
return {
|
||||
'consumption_stats': consumption_stats,
|
||||
'member_info': member_info,
|
||||
'card_balances': card_balances,
|
||||
'stat_date': stat_date,
|
||||
'site_id': site_id
|
||||
}
|
||||
|
||||
def transform(self, extracted: Dict[str, Any], context: TaskContext) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
转换数据
|
||||
"""
|
||||
consumption_stats = extracted['consumption_stats']
|
||||
member_info = extracted['member_info']
|
||||
card_balances = extracted['card_balances']
|
||||
stat_date = extracted['stat_date']
|
||||
site_id = extracted['site_id']
|
||||
|
||||
self.logger.info(
|
||||
"%s: 转换数据,%d 条会员消费记录",
|
||||
self.get_task_code(), len(consumption_stats)
|
||||
)
|
||||
|
||||
results = []
|
||||
|
||||
for stats in consumption_stats:
|
||||
member_id = stats.get('member_id')
|
||||
|
||||
# 跳过散客
|
||||
if self.is_guest(member_id):
|
||||
continue
|
||||
|
||||
memb_info = member_info.get(member_id, {})
|
||||
balance = card_balances.get(member_id, {})
|
||||
|
||||
# 计算活跃度和客户分层
|
||||
days_since_last = self._calc_days_since(stat_date, stats.get('last_consume_date'))
|
||||
customer_tier = self._calculate_customer_tier(stats, days_since_last)
|
||||
|
||||
record = {
|
||||
'site_id': site_id,
|
||||
'tenant_id': self.config.get("app.tenant_id", site_id),
|
||||
'member_id': member_id,
|
||||
'stat_date': stat_date,
|
||||
# 会员基本信息
|
||||
'member_nickname': memb_info.get('nickname'),
|
||||
'member_mobile': self._mask_mobile(memb_info.get('mobile')),
|
||||
'card_grade_name': memb_info.get('member_card_grade_name'),
|
||||
'register_date': memb_info.get('register_date'),
|
||||
# 全量累计统计
|
||||
'first_consume_date': stats.get('first_consume_date'),
|
||||
'last_consume_date': stats.get('last_consume_date'),
|
||||
'total_visit_count': self.safe_int(stats.get('total_visit_count', 0)),
|
||||
'total_consume_amount': self.safe_decimal(stats.get('total_consume_amount', 0)),
|
||||
'total_recharge_amount': self.safe_decimal(memb_info.get('recharge_money_sum', 0)),
|
||||
'total_table_fee': self.safe_decimal(stats.get('total_table_fee', 0)),
|
||||
'total_goods_amount': self.safe_decimal(stats.get('total_goods_amount', 0)),
|
||||
'total_assistant_amount': self.safe_decimal(stats.get('total_assistant_amount', 0)),
|
||||
# 滚动窗口统计
|
||||
'visit_count_7d': self.safe_int(stats.get('visit_count_7d', 0)),
|
||||
'visit_count_10d': self.safe_int(stats.get('visit_count_10d', 0)),
|
||||
'visit_count_15d': self.safe_int(stats.get('visit_count_15d', 0)),
|
||||
'visit_count_30d': self.safe_int(stats.get('visit_count_30d', 0)),
|
||||
'visit_count_60d': self.safe_int(stats.get('visit_count_60d', 0)),
|
||||
'visit_count_90d': self.safe_int(stats.get('visit_count_90d', 0)),
|
||||
'consume_amount_7d': self.safe_decimal(stats.get('consume_amount_7d', 0)),
|
||||
'consume_amount_10d': self.safe_decimal(stats.get('consume_amount_10d', 0)),
|
||||
'consume_amount_15d': self.safe_decimal(stats.get('consume_amount_15d', 0)),
|
||||
'consume_amount_30d': self.safe_decimal(stats.get('consume_amount_30d', 0)),
|
||||
'consume_amount_60d': self.safe_decimal(stats.get('consume_amount_60d', 0)),
|
||||
'consume_amount_90d': self.safe_decimal(stats.get('consume_amount_90d', 0)),
|
||||
# 卡余额
|
||||
'cash_card_balance': self.safe_decimal(balance.get('cash_balance', 0)),
|
||||
'gift_card_balance': self.safe_decimal(balance.get('gift_balance', 0)),
|
||||
'total_card_balance': self.safe_decimal(balance.get('total_balance', 0)),
|
||||
# 活跃度指标
|
||||
'days_since_last': days_since_last,
|
||||
'is_active_7d': self.safe_int(stats.get('visit_count_7d', 0)) > 0,
|
||||
'is_active_30d': self.safe_int(stats.get('visit_count_30d', 0)) > 0,
|
||||
'is_active_90d': self.safe_int(stats.get('visit_count_90d', 0)) > 0,
|
||||
# 客户分层
|
||||
'customer_tier': customer_tier,
|
||||
}
|
||||
results.append(record)
|
||||
|
||||
return results
|
||||
|
||||
def load(self, transformed: List[Dict[str, Any]], context: TaskContext) -> Dict:
|
||||
"""
|
||||
加载数据
|
||||
"""
|
||||
if not transformed:
|
||||
self.logger.info("%s: 无数据需要写入", self.get_task_code())
|
||||
return {"counts": {"fetched": 0, "inserted": 0, "updated": 0, "skipped": 0, "errors": 0}}
|
||||
|
||||
deleted = self.delete_existing_data(context, date_col="stat_date")
|
||||
inserted = self.bulk_insert(transformed)
|
||||
|
||||
self.logger.info(
|
||||
"%s: 加载完成,删除 %d 行,插入 %d 行",
|
||||
self.get_task_code(), deleted, inserted
|
||||
)
|
||||
|
||||
return {
|
||||
"counts": {
|
||||
"fetched": len(transformed),
|
||||
"inserted": inserted,
|
||||
"updated": 0,
|
||||
"skipped": 0,
|
||||
"errors": 0
|
||||
},
|
||||
"extra": {"deleted": deleted}
|
||||
}
|
||||
|
||||
# ==========================================================================
|
||||
# 数据提取方法
|
||||
# ==========================================================================
|
||||
|
||||
def _extract_consumption_stats(
|
||||
self,
|
||||
site_id: int,
|
||||
stat_date: date
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
提取会员消费统计(含滚动窗口)
|
||||
"""
|
||||
sql = """
|
||||
WITH consume_base AS (
|
||||
SELECT
|
||||
member_id,
|
||||
DATE(create_time) AS consume_date,
|
||||
consume_money,
|
||||
table_charge_money,
|
||||
goods_money,
|
||||
assistant_pd_money + assistant_cx_money AS assistant_amount
|
||||
FROM billiards_dwd.dwd_settlement_head
|
||||
WHERE site_id = %s
|
||||
AND member_id IS NOT NULL
|
||||
AND member_id != 0
|
||||
)
|
||||
SELECT
|
||||
member_id,
|
||||
MIN(consume_date) AS first_consume_date,
|
||||
MAX(consume_date) AS last_consume_date,
|
||||
-- 全量累计
|
||||
COUNT(*) AS total_visit_count,
|
||||
SUM(consume_money) AS total_consume_amount,
|
||||
SUM(table_charge_money) AS total_table_fee,
|
||||
SUM(goods_money) AS total_goods_amount,
|
||||
SUM(assistant_amount) AS total_assistant_amount,
|
||||
-- 滚动窗口
|
||||
COUNT(CASE WHEN consume_date >= %s - INTERVAL '6 days' THEN 1 END) AS visit_count_7d,
|
||||
COUNT(CASE WHEN consume_date >= %s - INTERVAL '9 days' THEN 1 END) AS visit_count_10d,
|
||||
COUNT(CASE WHEN consume_date >= %s - INTERVAL '14 days' THEN 1 END) AS visit_count_15d,
|
||||
COUNT(CASE WHEN consume_date >= %s - INTERVAL '29 days' THEN 1 END) AS visit_count_30d,
|
||||
COUNT(CASE WHEN consume_date >= %s - INTERVAL '59 days' THEN 1 END) AS visit_count_60d,
|
||||
COUNT(CASE WHEN consume_date >= %s - INTERVAL '89 days' THEN 1 END) AS visit_count_90d,
|
||||
SUM(CASE WHEN consume_date >= %s - INTERVAL '6 days' THEN consume_money ELSE 0 END) AS consume_amount_7d,
|
||||
SUM(CASE WHEN consume_date >= %s - INTERVAL '9 days' THEN consume_money ELSE 0 END) AS consume_amount_10d,
|
||||
SUM(CASE WHEN consume_date >= %s - INTERVAL '14 days' THEN consume_money ELSE 0 END) AS consume_amount_15d,
|
||||
SUM(CASE WHEN consume_date >= %s - INTERVAL '29 days' THEN consume_money ELSE 0 END) AS consume_amount_30d,
|
||||
SUM(CASE WHEN consume_date >= %s - INTERVAL '59 days' THEN consume_money ELSE 0 END) AS consume_amount_60d,
|
||||
SUM(CASE WHEN consume_date >= %s - INTERVAL '89 days' THEN consume_money ELSE 0 END) AS consume_amount_90d
|
||||
FROM consume_base
|
||||
GROUP BY member_id
|
||||
"""
|
||||
params = [site_id] + [stat_date] * 12
|
||||
rows = self.db.query(sql, tuple(params))
|
||||
return [dict(row) for row in rows] if rows else []
|
||||
|
||||
def _extract_member_info(self, site_id: int) -> Dict[int, Dict[str, Any]]:
|
||||
"""
|
||||
提取会员信息
|
||||
"""
|
||||
sql = """
|
||||
SELECT
|
||||
member_id,
|
||||
nickname,
|
||||
mobile,
|
||||
member_card_grade_name,
|
||||
DATE(create_time) AS register_date,
|
||||
recharge_money_sum
|
||||
FROM billiards_dwd.dim_member
|
||||
WHERE site_id = %s
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id,))
|
||||
|
||||
result = {}
|
||||
for row in (rows or []):
|
||||
row_dict = dict(row)
|
||||
result[row_dict['member_id']] = row_dict
|
||||
return result
|
||||
|
||||
def _extract_card_balances(self, site_id: int) -> Dict[int, Dict[str, Decimal]]:
|
||||
"""
|
||||
提取会员卡余额
|
||||
"""
|
||||
# 卡类型ID
|
||||
CASH_CARD_TYPE_ID = 2793249295533893
|
||||
GIFT_CARD_TYPE_IDS = [2791990152417157, 2793266846533445, 2794699703437125]
|
||||
|
||||
sql = """
|
||||
SELECT
|
||||
tenant_member_id AS member_id,
|
||||
card_type_id,
|
||||
balance
|
||||
FROM billiards_dwd.dim_member_card_account
|
||||
WHERE site_id = %s
|
||||
AND valid_to IS NULL
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id,))
|
||||
|
||||
result: Dict[int, Dict[str, Decimal]] = {}
|
||||
for row in (rows or []):
|
||||
row_dict = dict(row)
|
||||
member_id = row_dict.get('member_id')
|
||||
card_type_id = row_dict.get('card_type_id')
|
||||
balance = self.safe_decimal(row_dict.get('balance', 0))
|
||||
|
||||
if member_id not in result:
|
||||
result[member_id] = {
|
||||
'cash_balance': Decimal('0'),
|
||||
'gift_balance': Decimal('0'),
|
||||
'total_balance': Decimal('0')
|
||||
}
|
||||
|
||||
if card_type_id == CASH_CARD_TYPE_ID:
|
||||
result[member_id]['cash_balance'] += balance
|
||||
elif card_type_id in GIFT_CARD_TYPE_IDS:
|
||||
result[member_id]['gift_balance'] += balance
|
||||
|
||||
result[member_id]['total_balance'] = (
|
||||
result[member_id]['cash_balance'] + result[member_id]['gift_balance']
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
# ==========================================================================
|
||||
# 工具方法
|
||||
# ==========================================================================
|
||||
|
||||
def _mask_mobile(self, mobile: Optional[str]) -> Optional[str]:
|
||||
"""手机号脱敏"""
|
||||
if not mobile or len(mobile) < 7:
|
||||
return mobile
|
||||
return mobile[:3] + "****" + mobile[-4:]
|
||||
|
||||
def _calc_days_since(self, stat_date: date, last_date: Optional[date]) -> Optional[int]:
|
||||
"""计算距离最近消费的天数"""
|
||||
if not last_date:
|
||||
return None
|
||||
if isinstance(last_date, datetime):
|
||||
last_date = last_date.date()
|
||||
return (stat_date - last_date).days
|
||||
|
||||
def _calculate_customer_tier(
|
||||
self,
|
||||
stats: Dict[str, Any],
|
||||
days_since_last: Optional[int]
|
||||
) -> str:
|
||||
"""
|
||||
计算客户分层
|
||||
|
||||
分层规则:
|
||||
- 高价值:90天内消费>=3次 且 消费金额>=1000
|
||||
- 中等:30天内有消费
|
||||
- 低活跃:90天内有消费但30天内无消费
|
||||
- 流失:90天内无消费
|
||||
"""
|
||||
visit_90d = self.safe_int(stats.get('visit_count_90d', 0))
|
||||
visit_30d = self.safe_int(stats.get('visit_count_30d', 0))
|
||||
amount_90d = self.safe_decimal(stats.get('consume_amount_90d', 0))
|
||||
|
||||
if visit_90d >= 3 and amount_90d >= 1000:
|
||||
return "高价值"
|
||||
elif visit_30d > 0:
|
||||
return "中等"
|
||||
elif visit_90d > 0:
|
||||
return "低活跃"
|
||||
else:
|
||||
return "流失"
|
||||
|
||||
|
||||
# 便于外部导入
|
||||
__all__ = ['MemberConsumptionTask']
|
||||
386
etl_billiards/tasks/dws/member_visit_task.py
Normal file
386
etl_billiards/tasks/dws/member_visit_task.py
Normal file
@@ -0,0 +1,386 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
会员来店明细任务
|
||||
|
||||
功能说明:
|
||||
以"会员+订单"为粒度,记录每次来店消费明细
|
||||
|
||||
数据来源:
|
||||
- dwd_settlement_head: 结账单头表
|
||||
- dwd_assistant_service_log: 助教服务流水
|
||||
- dim_member: 会员维度
|
||||
- dim_table: 台桌维度
|
||||
- cfg_area_category: 区域分类映射
|
||||
|
||||
目标表:
|
||||
billiards_dws.dws_member_visit_detail
|
||||
|
||||
更新策略:
|
||||
- 更新频率:每日增量更新
|
||||
- 幂等方式:delete-before-insert(按日期窗口)
|
||||
|
||||
业务规则:
|
||||
- 散客处理:member_id=0 不进入此表
|
||||
- 区域分类:使用cfg_area_category映射
|
||||
- 助教服务:以JSON格式存储多个助教的服务明细
|
||||
|
||||
作者:ETL团队
|
||||
创建日期:2026-02-01
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from datetime import date, datetime, timedelta
|
||||
from decimal import Decimal
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple
|
||||
|
||||
from .base_dws_task import BaseDwsTask, TaskContext
|
||||
|
||||
|
||||
class MemberVisitTask(BaseDwsTask):
|
||||
"""
|
||||
会员来店明细任务
|
||||
|
||||
记录每个会员每次来店的:
|
||||
- 台桌信息和区域分类
|
||||
- 消费金额明细
|
||||
- 支付方式明细
|
||||
- 助教服务明细(JSON格式)
|
||||
"""
|
||||
|
||||
def get_task_code(self) -> str:
|
||||
return "DWS_MEMBER_VISIT"
|
||||
|
||||
def get_target_table(self) -> str:
|
||||
return "dws_member_visit_detail"
|
||||
|
||||
def get_primary_keys(self) -> List[str]:
|
||||
return ["site_id", "member_id", "order_settle_id"]
|
||||
|
||||
# ==========================================================================
|
||||
# ETL主流程
|
||||
# ==========================================================================
|
||||
|
||||
def extract(self, context: TaskContext) -> Dict[str, Any]:
|
||||
"""
|
||||
提取数据
|
||||
"""
|
||||
start_date = context.window_start.date() if hasattr(context.window_start, 'date') else context.window_start
|
||||
end_date = context.window_end.date() if hasattr(context.window_end, 'date') else context.window_end
|
||||
site_id = context.store_id
|
||||
|
||||
self.logger.info(
|
||||
"%s: 提取数据,日期范围 %s ~ %s",
|
||||
self.get_task_code(), start_date, end_date
|
||||
)
|
||||
|
||||
# 1. 获取结账单
|
||||
settlements = self._extract_settlements(site_id, start_date, end_date)
|
||||
|
||||
# 2. 获取助教服务明细
|
||||
assistant_services = self._extract_assistant_services(site_id, start_date, end_date)
|
||||
|
||||
# 3. 获取会员信息
|
||||
member_info = self._extract_member_info(site_id)
|
||||
|
||||
# 4. 获取台桌信息
|
||||
table_info = self._extract_table_info(site_id)
|
||||
|
||||
# 5. 加载配置
|
||||
self.load_config_cache()
|
||||
|
||||
return {
|
||||
'settlements': settlements,
|
||||
'assistant_services': assistant_services,
|
||||
'member_info': member_info,
|
||||
'table_info': table_info,
|
||||
'start_date': start_date,
|
||||
'end_date': end_date,
|
||||
'site_id': site_id
|
||||
}
|
||||
|
||||
def transform(self, extracted: Dict[str, Any], context: TaskContext) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
转换数据
|
||||
"""
|
||||
settlements = extracted['settlements']
|
||||
assistant_services = extracted['assistant_services']
|
||||
member_info = extracted['member_info']
|
||||
table_info = extracted['table_info']
|
||||
site_id = extracted['site_id']
|
||||
|
||||
self.logger.info(
|
||||
"%s: 转换数据,%d 条结账单",
|
||||
self.get_task_code(), len(settlements)
|
||||
)
|
||||
|
||||
# 构建助教服务索引:order_settle_id -> [services]
|
||||
service_index = self._build_service_index(assistant_services)
|
||||
|
||||
results = []
|
||||
|
||||
for settle in settlements:
|
||||
member_id = settle.get('member_id')
|
||||
|
||||
# 跳过散客
|
||||
if self.is_guest(member_id):
|
||||
continue
|
||||
|
||||
order_settle_id = settle.get('order_settle_id')
|
||||
table_id = settle.get('table_id')
|
||||
|
||||
memb_info = member_info.get(member_id, {})
|
||||
tbl_info = table_info.get(table_id, {})
|
||||
services = service_index.get(order_settle_id, [])
|
||||
|
||||
# 获取区域分类
|
||||
area_name = tbl_info.get('area_name')
|
||||
area_cat = self.get_area_category(area_name)
|
||||
|
||||
# 构建助教服务JSON
|
||||
assistant_services_json = self._build_assistant_services_json(services)
|
||||
|
||||
# 计算时长
|
||||
table_duration = self._calc_table_duration(settle)
|
||||
assistant_duration = sum(
|
||||
self.safe_int(s.get('income_seconds', 0))
|
||||
for s in services
|
||||
) // 60 # 转为分钟
|
||||
|
||||
record = {
|
||||
'site_id': site_id,
|
||||
'tenant_id': self.config.get("app.tenant_id", site_id),
|
||||
'member_id': member_id,
|
||||
'order_settle_id': order_settle_id,
|
||||
'visit_date': settle.get('visit_date'),
|
||||
'visit_time': settle.get('create_time'),
|
||||
# 会员信息
|
||||
'member_nickname': memb_info.get('nickname'),
|
||||
'member_mobile': self._mask_mobile(memb_info.get('mobile')),
|
||||
'member_birthday': memb_info.get('birthday'),
|
||||
# 台桌信息
|
||||
'table_id': table_id,
|
||||
'table_name': tbl_info.get('table_name'),
|
||||
'area_name': area_name,
|
||||
'area_category': area_cat.get('category_name'),
|
||||
# 消费金额
|
||||
'table_fee': self.safe_decimal(settle.get('table_charge_money', 0)),
|
||||
'goods_amount': self.safe_decimal(settle.get('goods_money', 0)),
|
||||
'assistant_amount': self.safe_decimal(settle.get('assistant_pd_money', 0)) + \
|
||||
self.safe_decimal(settle.get('assistant_cx_money', 0)),
|
||||
'total_consume': self.safe_decimal(settle.get('consume_money', 0)),
|
||||
'total_discount': self._calc_total_discount(settle),
|
||||
'actual_pay': self.safe_decimal(settle.get('pay_amount', 0)),
|
||||
# 支付方式
|
||||
'cash_pay': self.safe_decimal(settle.get('pay_amount', 0)),
|
||||
'cash_card_pay': self.safe_decimal(settle.get('balance_amount', 0)),
|
||||
'gift_card_pay': self.safe_decimal(settle.get('gift_card_amount', 0)),
|
||||
'groupbuy_pay': self.safe_decimal(settle.get('coupon_amount', 0)),
|
||||
# 时长
|
||||
'table_duration_min': table_duration,
|
||||
'assistant_duration_min': assistant_duration,
|
||||
# 助教服务明细
|
||||
'assistant_services': assistant_services_json,
|
||||
}
|
||||
results.append(record)
|
||||
|
||||
return results
|
||||
|
||||
def load(self, transformed: List[Dict[str, Any]], context: TaskContext) -> Dict:
|
||||
"""
|
||||
加载数据
|
||||
"""
|
||||
if not transformed:
|
||||
self.logger.info("%s: 无数据需要写入", self.get_task_code())
|
||||
return {"counts": {"fetched": 0, "inserted": 0, "updated": 0, "skipped": 0, "errors": 0}}
|
||||
|
||||
deleted = self.delete_existing_data(context, date_col="visit_date")
|
||||
inserted = self.bulk_insert(transformed)
|
||||
|
||||
self.logger.info(
|
||||
"%s: 加载完成,删除 %d 行,插入 %d 行",
|
||||
self.get_task_code(), deleted, inserted
|
||||
)
|
||||
|
||||
return {
|
||||
"counts": {
|
||||
"fetched": len(transformed),
|
||||
"inserted": inserted,
|
||||
"updated": 0,
|
||||
"skipped": 0,
|
||||
"errors": 0
|
||||
},
|
||||
"extra": {"deleted": deleted}
|
||||
}
|
||||
|
||||
# ==========================================================================
|
||||
# 数据提取方法
|
||||
# ==========================================================================
|
||||
|
||||
def _extract_settlements(
|
||||
self,
|
||||
site_id: int,
|
||||
start_date: date,
|
||||
end_date: date
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
提取结账单
|
||||
"""
|
||||
sql = """
|
||||
SELECT
|
||||
order_settle_id,
|
||||
order_trade_no,
|
||||
table_id,
|
||||
member_id,
|
||||
create_time,
|
||||
DATE(create_time) AS visit_date,
|
||||
consume_money,
|
||||
pay_amount,
|
||||
table_charge_money,
|
||||
goods_money,
|
||||
assistant_pd_money,
|
||||
assistant_cx_money,
|
||||
coupon_amount,
|
||||
adjust_amount,
|
||||
member_discount_amount,
|
||||
rounding_amount,
|
||||
gift_card_amount,
|
||||
balance_amount,
|
||||
recharge_card_amount
|
||||
FROM billiards_dwd.dwd_settlement_head
|
||||
WHERE site_id = %s
|
||||
AND DATE(create_time) >= %s
|
||||
AND DATE(create_time) <= %s
|
||||
AND member_id IS NOT NULL
|
||||
AND member_id != 0
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, start_date, end_date))
|
||||
return [dict(row) for row in rows] if rows else []
|
||||
|
||||
def _extract_assistant_services(
|
||||
self,
|
||||
site_id: int,
|
||||
start_date: date,
|
||||
end_date: date
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
提取助教服务明细
|
||||
"""
|
||||
sql = """
|
||||
SELECT
|
||||
order_settle_id,
|
||||
site_assistant_id AS assistant_id,
|
||||
nickname AS assistant_nickname,
|
||||
income_seconds,
|
||||
ledger_amount
|
||||
FROM billiards_dwd.dwd_assistant_service_log
|
||||
WHERE site_id = %s
|
||||
AND DATE(start_use_time) >= %s
|
||||
AND DATE(start_use_time) <= %s
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, start_date, end_date))
|
||||
return [dict(row) for row in rows] if rows else []
|
||||
|
||||
def _extract_member_info(self, site_id: int) -> Dict[int, Dict[str, Any]]:
|
||||
"""
|
||||
提取会员信息
|
||||
"""
|
||||
sql = """
|
||||
SELECT
|
||||
member_id,
|
||||
nickname,
|
||||
mobile,
|
||||
birthday
|
||||
FROM billiards_dwd.dim_member
|
||||
WHERE site_id = %s
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id,))
|
||||
return {r['member_id']: dict(r) for r in (rows or [])}
|
||||
|
||||
def _extract_table_info(self, site_id: int) -> Dict[int, Dict[str, Any]]:
|
||||
"""
|
||||
提取台桌信息
|
||||
"""
|
||||
sql = """
|
||||
SELECT
|
||||
site_table_id AS table_id,
|
||||
site_table_name AS table_name,
|
||||
site_table_area_name AS area_name
|
||||
FROM billiards_dwd.dim_table
|
||||
WHERE site_id = %s
|
||||
AND valid_to IS NULL
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id,))
|
||||
return {r['table_id']: dict(r) for r in (rows or [])}
|
||||
|
||||
# ==========================================================================
|
||||
# 工具方法
|
||||
# ==========================================================================
|
||||
|
||||
def _build_service_index(
|
||||
self,
|
||||
services: List[Dict[str, Any]]
|
||||
) -> Dict[int, List[Dict[str, Any]]]:
|
||||
"""
|
||||
构建助教服务索引
|
||||
"""
|
||||
index: Dict[int, List[Dict[str, Any]]] = {}
|
||||
for service in services:
|
||||
order_id = service.get('order_settle_id')
|
||||
if order_id:
|
||||
if order_id not in index:
|
||||
index[order_id] = []
|
||||
index[order_id].append(service)
|
||||
return index
|
||||
|
||||
def _build_assistant_services_json(
|
||||
self,
|
||||
services: List[Dict[str, Any]]
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
构建助教服务JSON
|
||||
"""
|
||||
if not services:
|
||||
return None
|
||||
|
||||
json_data = []
|
||||
for s in services:
|
||||
json_data.append({
|
||||
'assistant_id': s.get('assistant_id'),
|
||||
'nickname': s.get('assistant_nickname'),
|
||||
'duration_min': self.safe_int(s.get('income_seconds', 0)) // 60,
|
||||
'amount': float(self.safe_decimal(s.get('ledger_amount', 0)))
|
||||
})
|
||||
|
||||
return json.dumps(json_data, ensure_ascii=False)
|
||||
|
||||
def _calc_table_duration(self, settle: Dict[str, Any]) -> int:
|
||||
"""
|
||||
计算台桌使用时长(分钟)
|
||||
简化处理:根据台费和假设单价估算
|
||||
"""
|
||||
table_fee = self.safe_decimal(settle.get('table_charge_money', 0))
|
||||
if table_fee <= 0:
|
||||
return 0
|
||||
# 假设平均台费单价为0.5元/分钟
|
||||
return int(table_fee / Decimal('0.5'))
|
||||
|
||||
def _calc_total_discount(self, settle: Dict[str, Any]) -> Decimal:
|
||||
"""
|
||||
计算总优惠
|
||||
"""
|
||||
adjust = self.safe_decimal(settle.get('adjust_amount', 0))
|
||||
member_discount = self.safe_decimal(settle.get('member_discount_amount', 0))
|
||||
rounding = self.safe_decimal(settle.get('rounding_amount', 0))
|
||||
return adjust + member_discount + rounding
|
||||
|
||||
def _mask_mobile(self, mobile: Optional[str]) -> Optional[str]:
|
||||
"""手机号脱敏"""
|
||||
if not mobile or len(mobile) < 7:
|
||||
return mobile
|
||||
return mobile[:3] + "****" + mobile[-4:]
|
||||
|
||||
|
||||
# 便于外部导入
|
||||
__all__ = ['MemberVisitTask']
|
||||
161
etl_billiards/tasks/dws/retention_cleanup_task.py
Normal file
161
etl_billiards/tasks/dws/retention_cleanup_task.py
Normal file
@@ -0,0 +1,161 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
DWS 时间分层清理任务
|
||||
|
||||
功能说明:
|
||||
按配置的时间分层范围,对 DWS 表执行历史数据清理。
|
||||
该任务默认不启用,需通过配置显式开启。
|
||||
|
||||
配置示例(.env / settings):
|
||||
DWS_RETENTION_ENABLED=true
|
||||
DWS_RETENTION_LAYER=LAST_3_MONTHS
|
||||
DWS_RETENTION_TABLES=dws_finance_daily_summary,dws_assistant_daily_detail
|
||||
DWS_RETENTION_TABLE_LAYERS={"dws_finance_expense_summary":"ALL"}
|
||||
|
||||
作者:ETL团队
|
||||
创建日期:2026-02-03
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from datetime import date
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from .base_dws_task import BaseDwsTask, TaskContext, TimeLayer
|
||||
|
||||
|
||||
class DwsRetentionCleanupTask(BaseDwsTask):
|
||||
"""
|
||||
DWS 时间分层清理任务
|
||||
"""
|
||||
|
||||
DEFAULT_TABLES = [
|
||||
{"table": "dws_assistant_daily_detail", "date_col": "stat_date"},
|
||||
{"table": "dws_assistant_monthly_summary", "date_col": "stat_month"},
|
||||
{"table": "dws_assistant_customer_stats", "date_col": "stat_date"},
|
||||
{"table": "dws_assistant_salary_calc", "date_col": "salary_month"},
|
||||
{"table": "dws_assistant_recharge_commission", "date_col": "commission_month"},
|
||||
{"table": "dws_assistant_finance_analysis", "date_col": "stat_date"},
|
||||
{"table": "dws_member_consumption_summary", "date_col": "stat_date"},
|
||||
{"table": "dws_member_visit_detail", "date_col": "visit_date"},
|
||||
{"table": "dws_finance_daily_summary", "date_col": "stat_date"},
|
||||
{"table": "dws_finance_income_structure", "date_col": "stat_date"},
|
||||
{"table": "dws_finance_discount_detail", "date_col": "stat_date"},
|
||||
{"table": "dws_finance_recharge_summary", "date_col": "stat_date"},
|
||||
{"table": "dws_finance_expense_summary", "date_col": "expense_month"},
|
||||
{"table": "dws_platform_settlement", "date_col": "settlement_date"},
|
||||
]
|
||||
|
||||
def get_task_code(self) -> str:
|
||||
return "DWS_RETENTION_CLEANUP"
|
||||
|
||||
def get_target_table(self) -> str:
|
||||
return "dws_finance_daily_summary"
|
||||
|
||||
def get_primary_keys(self) -> List[str]:
|
||||
return []
|
||||
|
||||
def extract(self, context: TaskContext) -> Dict[str, Any]:
|
||||
return {"site_id": context.store_id}
|
||||
|
||||
def transform(self, extracted: Dict[str, Any], context: TaskContext) -> Dict[str, Any]:
|
||||
return extracted
|
||||
|
||||
def load(self, transformed: Dict[str, Any], context: TaskContext) -> Dict:
|
||||
"""
|
||||
执行清理逻辑
|
||||
"""
|
||||
if not self._is_retention_enabled():
|
||||
self.logger.info("%s: 未启用清理配置,跳过", self.get_task_code())
|
||||
return {"counts": {"cleaned": 0}}
|
||||
|
||||
base_date = context.window_end.date() if hasattr(context.window_end, "date") else context.window_end
|
||||
default_layer = self._get_retention_layer(self.config.get("dws.retention.layer", "ALL"))
|
||||
if default_layer is None:
|
||||
self.logger.warning("%s: 未识别的清理层级,跳过", self.get_task_code())
|
||||
return {"counts": {"cleaned": 0}}
|
||||
|
||||
target_tables = self._resolve_target_tables()
|
||||
if not target_tables:
|
||||
self.logger.info("%s: 未配置需要清理的表,跳过", self.get_task_code())
|
||||
return {"counts": {"cleaned": 0}}
|
||||
|
||||
table_layers = self._resolve_table_layers()
|
||||
|
||||
total_deleted = 0
|
||||
details = []
|
||||
for item in target_tables:
|
||||
table = item["table"]
|
||||
date_col = item["date_col"]
|
||||
layer_name = table_layers.get(table, default_layer.value)
|
||||
layer = self._get_retention_layer(layer_name)
|
||||
if layer is None or layer == TimeLayer.ALL:
|
||||
continue
|
||||
|
||||
time_range = self.get_time_layer_range(layer, base_date)
|
||||
cutoff = self._normalize_cutoff(date_col, time_range.start)
|
||||
deleted = self._cleanup_table(table, date_col, cutoff, context.store_id)
|
||||
total_deleted += deleted
|
||||
details.append({"table": table, "deleted": deleted, "cutoff": str(cutoff)})
|
||||
|
||||
self.logger.info("%s: 清理完成,总删除 %d 行", self.get_task_code(), total_deleted)
|
||||
return {"counts": {"cleaned": total_deleted}, "extra": {"details": details}}
|
||||
|
||||
def _is_retention_enabled(self) -> bool:
|
||||
return bool(self.config.get("dws.retention.enabled", False))
|
||||
|
||||
def _get_retention_layer(self, layer_name: Optional[str]) -> Optional[TimeLayer]:
|
||||
if not layer_name:
|
||||
return None
|
||||
name = str(layer_name).upper()
|
||||
try:
|
||||
return TimeLayer[name]
|
||||
except KeyError:
|
||||
return None
|
||||
|
||||
def _resolve_target_tables(self) -> List[Dict[str, str]]:
|
||||
table_list = self.config.get("dws.retention.tables")
|
||||
if not table_list:
|
||||
return self.DEFAULT_TABLES
|
||||
|
||||
if isinstance(table_list, str):
|
||||
names = [t.strip() for t in table_list.split(",") if t.strip()]
|
||||
else:
|
||||
names = list(table_list)
|
||||
|
||||
selected = []
|
||||
for item in self.DEFAULT_TABLES:
|
||||
if item["table"] in names:
|
||||
selected.append(item)
|
||||
return selected
|
||||
|
||||
def _resolve_table_layers(self) -> Dict[str, str]:
|
||||
raw = self.config.get("dws.retention.table_layers")
|
||||
if not raw:
|
||||
return {}
|
||||
if isinstance(raw, dict):
|
||||
return {str(k): str(v) for k, v in raw.items()}
|
||||
if isinstance(raw, str):
|
||||
try:
|
||||
parsed = json.loads(raw)
|
||||
if isinstance(parsed, dict):
|
||||
return {str(k): str(v) for k, v in parsed.items()}
|
||||
except json.JSONDecodeError:
|
||||
return {}
|
||||
return {}
|
||||
|
||||
def _normalize_cutoff(self, date_col: str, cutoff: date) -> date:
|
||||
monthly_cols = {"stat_month", "salary_month", "commission_month", "expense_month"}
|
||||
if date_col in monthly_cols:
|
||||
return cutoff.replace(day=1)
|
||||
return cutoff
|
||||
|
||||
def _cleanup_table(self, table: str, date_col: str, cutoff: date, site_id: int) -> int:
|
||||
full_table = f"{self.DWS_SCHEMA}.{table}"
|
||||
sql = f"DELETE FROM {full_table} WHERE site_id = %s AND {date_col} < %s"
|
||||
with self.db.conn.cursor() as cur:
|
||||
cur.execute(sql, (site_id, cutoff))
|
||||
return cur.rowcount
|
||||
|
||||
|
||||
__all__ = ["DwsRetentionCleanupTask"]
|
||||
Reference in New Issue
Block a user