Files
ZQYY.FQ-ETL/tasks/dws/assistant_monthly_task.py

601 lines
22 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
"""
助教月度业绩汇总任务
功能说明:
"助教+月份"为粒度,汇总月度业绩及档位计算
数据来源:
- dws_assistant_daily_detail: 日度明细(聚合)
- dim_assistant: 助教维度(入职日期、等级)
- cfg_performance_tier: 绩效档位配置
目标表:
billiards_dws.dws_assistant_monthly_summary
更新策略:
- 更新频率:每日更新当月数据
- 幂等方式delete-before-insert按月份
业务规则:
- 新入职判断入职日期在月1日0点之后则为新入职
- 有效业绩total_hours - trashed_hours
- 档位匹配根据有效业绩小时数匹配cfg_performance_tier
- 排名计算按有效业绩小时数降序考虑并列如2个第一则都是1下一个是3
作者ETL团队
创建日期2026-02-01
"""
from __future__ import annotations
from datetime import date, datetime, timedelta
from decimal import Decimal
from typing import Any, Dict, List, Optional, Set, Tuple
from .base_dws_task import BaseDwsTask, TaskContext
class AssistantMonthlyTask(BaseDwsTask):
"""
助教月度业绩汇总任务
汇总每个助教每月的:
- 工作天数、服务次数、时长
- 有效业绩(扣除废除记录后)
- 档位匹配
- 月度排名用于Top3奖金
"""
def get_task_code(self) -> str:
return "DWS_ASSISTANT_MONTHLY"
def get_target_table(self) -> str:
return "dws_assistant_monthly_summary"
def get_primary_keys(self) -> List[str]:
return ["site_id", "assistant_id", "stat_month"]
# ==========================================================================
# ETL主流程
# ==========================================================================
def extract(self, context: TaskContext) -> Dict[str, Any]:
"""
提取数据:从日度明细表聚合
"""
# 确定月份范围
start_date = context.window_start.date() if hasattr(context.window_start, 'date') else context.window_start
end_date = context.window_end.date() if hasattr(context.window_end, 'date') else context.window_end
site_id = context.store_id
# 获取涉及的月份列表
months = self._get_months_in_range(start_date, end_date)
months = self._filter_months_for_schedule(months, end_date)
self.logger.info(
"%s: 提取数据,月份范围 %s",
self.get_task_code(), [str(m) for m in months]
)
if not months:
self.logger.info("%s: 无需处理月份,跳过", self.get_task_code())
return {
'daily_aggregates': [],
'monthly_uniques': [],
'assistant_info': {},
'months': [],
'site_id': site_id
}
# 1. 获取日度明细聚合数据
daily_aggregates = self._extract_daily_aggregates(site_id, months)
# 1.1 获取月度去重客户/台桌统计从DWD直接去重
monthly_uniques = self._extract_monthly_uniques(site_id, months)
# 2. 获取助教基本信息
assistant_info = self._extract_assistant_info(site_id)
# 3. 加载配置缓存
self.load_config_cache()
return {
'daily_aggregates': daily_aggregates,
'monthly_uniques': monthly_uniques,
'assistant_info': assistant_info,
'months': months,
'site_id': site_id
}
def transform(self, extracted: Dict[str, Any], context: TaskContext) -> List[Dict[str, Any]]:
"""
转换数据:计算月度汇总、档位匹配、排名
"""
daily_aggregates = extracted['daily_aggregates']
monthly_uniques = extracted['monthly_uniques']
assistant_info = extracted['assistant_info']
months = extracted['months']
site_id = extracted['site_id']
self.logger.info(
"%s: 转换数据,%d 个月份,%d 条聚合记录",
self.get_task_code(), len(months), len(daily_aggregates)
)
# 月度去重索引
monthly_unique_index = {
(row.get('assistant_id'), row.get('stat_month')): row
for row in (monthly_uniques or [])
if row.get('assistant_id') and row.get('stat_month')
}
# 按月份处理
all_results = []
for month in months:
month_results = self._process_month(
daily_aggregates,
assistant_info,
monthly_unique_index,
month,
site_id
)
all_results.extend(month_results)
return all_results
def load(self, transformed: List[Dict[str, Any]], context: TaskContext) -> Dict:
"""
加载数据写入DWS表
"""
if not transformed:
self.logger.info("%s: 无数据需要写入", self.get_task_code())
return {"counts": {"fetched": 0, "inserted": 0, "updated": 0, "skipped": 0, "errors": 0}}
# 删除已存在的数据(按月份)
deleted = self._delete_by_months(context, transformed)
# 批量插入
inserted = self.bulk_insert(transformed)
self.logger.info(
"%s: 加载完成,删除 %d 行,插入 %d",
self.get_task_code(), deleted, inserted
)
return {
"counts": {
"fetched": len(transformed),
"inserted": inserted,
"updated": 0,
"skipped": 0,
"errors": 0
},
"extra": {"deleted": deleted}
}
# ==========================================================================
# 数据提取方法
# ==========================================================================
def _get_months_in_range(self, start_date: date, end_date: date) -> List[date]:
"""
获取日期范围内的所有月份(月第一天)
"""
months = []
current = start_date.replace(day=1)
end_month = end_date.replace(day=1)
while current <= end_month:
months.append(current)
# 下个月
if current.month == 12:
current = current.replace(year=current.year + 1, month=1)
else:
current = current.replace(month=current.month + 1)
return months
def _filter_months_for_schedule(self, months: List[date], end_date: date) -> List[date]:
"""
按调度口径过滤历史月份(默认仅当月,月初可包含上月)
"""
if not months:
return []
history_months = self.safe_int(self.config.get("dws.monthly.history_months", 0))
if history_months > 0:
current_month = self.get_month_first_day(end_date)
allowed = {current_month}
for offset in range(1, history_months + 1):
allowed.add(self.get_month_first_day(self._shift_months(current_month, -offset)))
filtered = [m for m in months if m in allowed]
skipped = [m for m in months if m not in allowed]
if skipped:
self.logger.info(
"%s: 跳过历史月份 %s",
self.get_task_code(),
[str(m) for m in skipped]
)
return filtered
allow_history = bool(self.config.get("dws.monthly.allow_history", False))
if allow_history:
return months
current_month = self.get_month_first_day(end_date)
allowed = {current_month}
grace_days = self.safe_int(self.config.get("dws.monthly.prev_month_grace_days", 5))
if grace_days > 0 and end_date.day <= grace_days:
prev_month = self.get_month_first_day(self._shift_months(current_month, -1))
allowed.add(prev_month)
filtered = [m for m in months if m in allowed]
skipped = [m for m in months if m not in allowed]
if skipped:
self.logger.info(
"%s: 跳过历史月份 %s",
self.get_task_code(),
[str(m) for m in skipped]
)
return filtered
def _extract_daily_aggregates(
self,
site_id: int,
months: List[date]
) -> List[Dict[str, Any]]:
"""
从日度明细表提取并按月聚合
"""
if not months:
return []
# 构建月份条件
month_conditions = []
for month in months:
next_month = (month.replace(day=28) + timedelta(days=4)).replace(day=1)
month_conditions.append(f"(stat_date >= '{month}' AND stat_date < '{next_month}')")
month_where = " OR ".join(month_conditions)
sql = f"""
SELECT
assistant_id,
assistant_nickname,
assistant_level_code,
assistant_level_name,
DATE_TRUNC('month', stat_date)::DATE AS stat_month,
COUNT(DISTINCT stat_date) AS work_days,
SUM(total_service_count) AS total_service_count,
SUM(base_service_count) AS base_service_count,
SUM(bonus_service_count) AS bonus_service_count,
SUM(room_service_count) AS room_service_count,
SUM(total_hours) AS total_hours,
SUM(base_hours) AS base_hours,
SUM(bonus_hours) AS bonus_hours,
SUM(room_hours) AS room_hours,
SUM(total_ledger_amount) AS total_ledger_amount,
SUM(base_ledger_amount) AS base_ledger_amount,
SUM(bonus_ledger_amount) AS bonus_ledger_amount,
SUM(room_ledger_amount) AS room_ledger_amount,
SUM(unique_customers) AS total_unique_customers,
SUM(unique_tables) AS total_unique_tables,
SUM(trashed_seconds) AS trashed_seconds,
SUM(trashed_count) AS trashed_count
FROM billiards_dws.dws_assistant_daily_detail
WHERE site_id = %s AND ({month_where})
GROUP BY assistant_id, assistant_nickname, assistant_level_code, assistant_level_name,
DATE_TRUNC('month', stat_date)
"""
rows = self.db.query(sql, (site_id,))
return [dict(row) for row in rows] if rows else []
def _extract_monthly_uniques(
self,
site_id: int,
months: List[date]
) -> List[Dict[str, Any]]:
"""
从DWD按月直接去重客户与台桌
"""
if not months:
return []
start_month = min(months)
end_month = max(months)
next_month = (end_month.replace(day=28) + timedelta(days=4)).replace(day=1)
sql = """
SELECT
site_assistant_id AS assistant_id,
DATE_TRUNC('month', start_use_time)::DATE AS stat_month,
COUNT(DISTINCT CASE WHEN tenant_member_id > 0 THEN tenant_member_id END) AS unique_customers,
COUNT(DISTINCT site_table_id) AS unique_tables
FROM billiards_dwd.dwd_assistant_service_log
WHERE site_id = %s
AND start_use_time >= %s
AND start_use_time < %s
AND is_delete = 0
GROUP BY site_assistant_id, DATE_TRUNC('month', start_use_time)
"""
rows = self.db.query(sql, (site_id, start_month, next_month))
return [dict(row) for row in rows] if rows else []
def _extract_assistant_info(self, site_id: int) -> Dict[int, Dict[str, Any]]:
"""
提取助教基本信息
"""
sql = """
SELECT
assistant_id,
nickname,
level AS assistant_level,
entry_time AS hire_date
FROM billiards_dwd.dim_assistant
WHERE site_id = %s
AND scd2_is_current = 1 -- 当前有效记录
"""
rows = self.db.query(sql, (site_id,))
result = {}
for row in (rows or []):
row_dict = dict(row)
result[row_dict['assistant_id']] = row_dict
return result
# ==========================================================================
# 数据转换方法
# ==========================================================================
def _process_month(
self,
daily_aggregates: List[Dict[str, Any]],
assistant_info: Dict[int, Dict[str, Any]],
monthly_unique_index: Dict[Tuple[int, date], Dict[str, Any]],
month: date,
site_id: int
) -> List[Dict[str, Any]]:
"""
处理单个月份的数据
"""
# 筛选该月份的数据
month_data = [
agg for agg in daily_aggregates
if agg.get('stat_month') == month
]
if not month_data:
return []
# 构建月度汇总记录
month_records = []
for agg in month_data:
assistant_id = agg.get('assistant_id')
asst_info = assistant_info.get(assistant_id, {})
# 计算有效业绩
total_hours = self.safe_decimal(agg.get('total_hours', 0))
trashed_hours = self.seconds_to_hours(self.safe_int(agg.get('trashed_seconds', 0)))
effective_hours = total_hours - trashed_hours
# 判断是否新入职
hire_date = asst_info.get('hire_date')
is_new_hire = False
if hire_date:
if isinstance(hire_date, datetime):
hire_date = hire_date.date()
is_new_hire = self.is_new_hire_in_month(hire_date, month)
# 匹配档位
tier_hours = effective_hours
max_tier_level = None
if is_new_hire:
tier_hours = self._calc_new_hire_tier_hours(effective_hours, self.safe_int(agg.get('work_days', 0)))
if self._should_apply_new_hire_tier_cap(month, hire_date):
max_tier_level = self._get_new_hire_max_tier_level()
tier = self.get_performance_tier(
tier_hours,
is_new_hire,
effective_date=month,
max_tier_level=max_tier_level
)
# 获取月末的等级信息(用于记录)
month_end = self._get_month_end(month)
level_info = self.get_assistant_level_asof(assistant_id, month_end)
# 月度去重客户/台桌从DWD直接去重
unique_info = monthly_unique_index.get((assistant_id, month), {})
unique_customers = self.safe_int(
unique_info.get('unique_customers', agg.get('total_unique_customers', 0))
)
unique_tables = self.safe_int(
unique_info.get('unique_tables', agg.get('total_unique_tables', 0))
)
record = {
'site_id': site_id,
'tenant_id': self.config.get("app.tenant_id", site_id),
'assistant_id': assistant_id,
'assistant_nickname': agg.get('assistant_nickname'),
'stat_month': month,
'assistant_level_code': level_info.get('level_code') if level_info else agg.get('assistant_level_code'),
'assistant_level_name': level_info.get('level_name') if level_info else agg.get('assistant_level_name'),
'hire_date': hire_date,
'is_new_hire': is_new_hire,
'work_days': self.safe_int(agg.get('work_days', 0)),
'total_service_count': self.safe_int(agg.get('total_service_count', 0)),
'base_service_count': self.safe_int(agg.get('base_service_count', 0)),
'bonus_service_count': self.safe_int(agg.get('bonus_service_count', 0)),
'room_service_count': self.safe_int(agg.get('room_service_count', 0)),
'total_hours': total_hours,
'base_hours': self.safe_decimal(agg.get('base_hours', 0)),
'bonus_hours': self.safe_decimal(agg.get('bonus_hours', 0)),
'room_hours': self.safe_decimal(agg.get('room_hours', 0)),
'effective_hours': effective_hours,
'trashed_hours': trashed_hours,
'total_ledger_amount': self.safe_decimal(agg.get('total_ledger_amount', 0)),
'base_ledger_amount': self.safe_decimal(agg.get('base_ledger_amount', 0)),
'bonus_ledger_amount': self.safe_decimal(agg.get('bonus_ledger_amount', 0)),
'room_ledger_amount': self.safe_decimal(agg.get('room_ledger_amount', 0)),
'unique_customers': unique_customers,
'unique_tables': unique_tables,
'avg_service_seconds': self._calc_avg_service_seconds(agg),
'tier_id': tier.get('tier_id') if tier else None,
'tier_code': tier.get('tier_code') if tier else None,
'tier_name': tier.get('tier_name') if tier else None,
'rank_by_hours': None, # 后面计算
'rank_with_ties': None, # 后面计算
}
month_records.append(record)
# 计算排名
self._calculate_ranks(month_records)
return month_records
def _get_month_end(self, month: date) -> date:
"""
获取月末日期
"""
if month.month == 12:
next_month = month.replace(year=month.year + 1, month=1, day=1)
else:
next_month = month.replace(month=month.month + 1, day=1)
return next_month - timedelta(days=1)
def _calc_avg_service_seconds(self, agg: Dict[str, Any]) -> Decimal:
"""
计算平均单次服务时长
"""
total_count = self.safe_int(agg.get('total_service_count', 0))
if total_count == 0:
return Decimal('0')
total_hours = self.safe_decimal(agg.get('total_hours', 0))
total_seconds = total_hours * Decimal('3600')
return total_seconds / Decimal(str(total_count))
def _calc_new_hire_tier_hours(self, effective_hours: Decimal, work_days: int) -> Decimal:
"""
新入职定档:日均 * 30仅用于定档不影响奖金与排名
"""
if work_days <= 0:
return Decimal('0')
return (effective_hours / Decimal(str(work_days))) * Decimal('30')
def _should_apply_new_hire_tier_cap(self, stat_month: date, hire_date: Optional[date]) -> bool:
"""
新入职封顶规则是否生效:
- 仅在规则生效月及之后(默认 2026-03-01 起)
- 仅当入职日期晚于封顶日(默认当月 25 日)
"""
if not hire_date:
return False
effective_from = self._get_new_hire_cap_effective_from()
cap_day = self._get_new_hire_cap_day()
return stat_month >= effective_from and hire_date.day > cap_day
def _get_new_hire_cap_effective_from(self) -> date:
"""
获取新入职封顶规则生效月份(默认 2026-03-01
"""
raw_value = self.config.get("dws.monthly.new_hire_cap_effective_from", "2026-03-01")
if isinstance(raw_value, datetime):
return raw_value.date()
if isinstance(raw_value, date):
return raw_value
if isinstance(raw_value, str):
try:
return datetime.strptime(raw_value.strip(), "%Y-%m-%d").date()
except ValueError:
pass
return date(2026, 3, 1)
def _get_new_hire_cap_day(self) -> int:
"""
获取新入职封顶日(默认 25
"""
value = self.safe_int(self.config.get("dws.monthly.new_hire_cap_day", 25))
return min(max(value, 1), 31)
def _get_new_hire_max_tier_level(self) -> int:
"""
获取新入职封顶档位等级(默认 2 档)
"""
value = self.safe_int(self.config.get("dws.monthly.new_hire_max_tier_level", 2))
return max(value, 0)
def _calculate_ranks(self, records: List[Dict[str, Any]]) -> None:
"""
计算排名(考虑并列)
Top3排名口径按有效业绩总小时数排名
如遇并列则都算比如2个第一则记为2个第一一个第三
"""
if not records:
return
# 按有效业绩降序排序
sorted_records = sorted(
records,
key=lambda x: x.get('effective_hours', Decimal('0')),
reverse=True
)
# 计算考虑并列的排名
values = [
(r.get('assistant_id'), r.get('effective_hours', Decimal('0')))
for r in sorted_records
]
ranked = self.calculate_rank_with_ties(values)
# 创建排名映射
rank_map = {
assistant_id: (rank, dense_rank)
for assistant_id, rank, dense_rank in ranked
}
# 更新记录
for record in records:
assistant_id = record.get('assistant_id')
if assistant_id in rank_map:
rank, _ = rank_map[assistant_id]
record['rank_by_hours'] = rank
record['rank_with_ties'] = rank # 使用考虑并列的排名
def _delete_by_months(
self,
context: TaskContext,
records: List[Dict[str, Any]]
) -> int:
"""
按月份删除已存在的数据
"""
# 获取涉及的月份
months = set(r.get('stat_month') for r in records if r.get('stat_month'))
if not months:
return 0
target_table = self.get_target_table()
full_table = f"{self.DWS_SCHEMA}.{target_table}"
total_deleted = 0
with self.db.conn.cursor() as cur:
for month in months:
sql = f"""
DELETE FROM {full_table}
WHERE site_id = %s AND stat_month = %s
"""
cur.execute(sql, (context.store_id, month))
total_deleted += cur.rowcount
return total_deleted
# 便于外部导入
__all__ = ['AssistantMonthlyTask']