feat: 2026-04-15~04-20 累积变更基线 — 多主线合流

主线 1: rns1-customer-coach-api + 04-miniapp-core-business 后端实施
  - 新增 GET /xcx/coaches/{id}/banner 轻量接口
  - performance/records 加 coach_id 参数 + view_board_coach 权限分流
  - coach/customer/performance/board/task 服务层重构
  - fdw_queries 结算单粒度聚合 + consumption_summary 视图统一
  - task_generator 回访宽限 72h + UPSERT 替代策略 + Step 5 保底清理
  - recall_detector settle_type=3 双重限制 + 门店级 resolved

主线 2: 小程序权限分流 + 新增 coach-service-records 管理者视角业绩明细页
  - perf-progress 共享模块去重 task-list/coach-detail 动画逻辑
  - isScattered 散客标记端到端
  - foodDetail/phoneFull/creator* 字段透传

主线 3: P19 指数回测框架 Phase 1+2
  - 3 个指数表 stat_date 日快照模式
  - 新增 DWS_INDEX_BACKFILL / DWS_TASK_SIMULATION 工具任务
  - task_engine 升级 HTTP 实时 + 推演回测双模式

主线 4: Core 维度层启用
  - 新增 CORE_DIM_SYNC 任务(DWD → core 4 维度表)
  - 修复 app 视图空查询问题

主线 5: member_project_tag 改为 LAST_30_VISITS 消费次数窗口

主线 6: 2 个迁移 SQL 已执行(stat_date + member_project_tag 新窗口)
  - schema 基线与 DDL 快照同步

主线 7: 开发机路径迁移 C:\NeoZQYY → C:\Project\NeoZQYY(约 95% 改动量)

附带: 新建运维脚本(churned_customer_report / simulate_historical_tasks /
      backfill_index_snapshots)+ tools/task-analysis/ 任务分析工具

合计 157 文件。未包含中间产物(tmp/ .playwright-mcp/ inspect-* excel/sheet 分析 txt)。
审计记录见下一个 commit。

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Neo
2026-04-20 06:32:07 +08:00
parent 79d3c2e97e
commit 2a7a5d68aa
157 changed files with 14304 additions and 3717 deletions

View File

@@ -31,6 +31,7 @@ from .finance_board_cache import FinanceBoardCacheTask
from .coach_area_hours_task import CoachAreaHoursTask
from .finance_base_task import FinanceBaseTask
from .maintenance_task import DwsMaintenanceTask
from .core_dim_sync_task import CoreDimSyncTask
from .goods_stock_daily_task import GoodsStockDailyTask
from .goods_stock_weekly_task import GoodsStockWeeklyTask
from .goods_stock_monthly_task import GoodsStockMonthlyTask
@@ -73,6 +74,7 @@ __all__ = [
"FinanceDiscountDetailTask",
"CoachAreaHoursTask",
"DwsMaintenanceTask",
"CoreDimSyncTask",
# 库存维度
"GoodsStockDailyTask",
"GoodsStockWeeklyTask",

View File

@@ -0,0 +1,173 @@
# -*- coding: utf-8 -*-
"""
Core 维度同步任务
功能说明:
将 DWD 层当前版本scd2_is_current=1的维度数据同步到 core 层。
core 层作为跨平台统一维度层,屏蔽 ODS/DWD 多数据源差异。
同步表:
- core.dim_assistant <- dwd.dim_assistant
- core.dim_member <- dwd.dim_member
- core.dim_site <- dwd.dim_site
- core.dim_table <- dwd.dim_table
更新策略:
TRUNCATE + INSERT 全量刷新(维度表数据量小,全量代价低)
作者ETL 团队
创建日期2026-04-15
"""
from __future__ import annotations
from typing import Any, Dict, List
from .base_dws_task import BaseDwsTask, TaskContext
# 同步映射:(core 表, DWD 源表, 字段映射)
# 字段映射格式:(core_col, dwd_expr)
SYNC_TABLES = [
{
"core_table": "core.dim_assistant",
"dwd_source": "dwd.dim_assistant",
"columns": [
("assistant_id", "assistant_id"),
("tenant_id", "tenant_id"),
("site_id", "site_id"),
("real_name", "real_name"),
("nickname", "nickname"),
("mobile", "mobile"),
("level", "level"),
("assistant_status", "assistant_status"),
("leave_status", "leave_status"),
],
},
{
"core_table": "core.dim_member",
"dwd_source": "dwd.dim_member",
"columns": [
("member_id", "member_id"),
("system_member_id", "system_member_id"),
("tenant_id", "tenant_id"),
("register_site_id", "register_site_id"),
("mobile", "mobile"),
("nickname", "nickname"),
("member_card_grade_name", "member_card_grade_name"),
("status", "1"), # DWD 无 status 字段scd2_is_current=1 即有效
],
},
{
"core_table": "core.dim_site",
"dwd_source": "dwd.dim_site",
"columns": [
("site_id", "site_id"),
("tenant_id", "tenant_id"),
("shop_name", "shop_name"),
("site_label", "site_label"),
("shop_status", "shop_status"),
],
},
{
"core_table": "core.dim_table",
"dwd_source": "dwd.dim_table",
"columns": [
("table_id", "table_id"),
("site_id", "site_id"),
("table_name", "table_name"),
("site_table_area_name", "site_table_area_name"),
("table_price", "table_price"),
],
},
]
class CoreDimSyncTask(BaseDwsTask):
"""Core 维度同步任务DWD -> core 全量刷新"""
# 无日期列,全量刷新
DATE_COL = None
def get_task_code(self) -> str:
return "CORE_DIM_SYNC"
def get_target_table(self) -> str:
# 多表同步,此方法不直接使用
return "core_dim_sync"
def get_primary_keys(self) -> List[str]:
return []
def extract(self, context: TaskContext) -> Dict[str, Any]:
"""提取 DWD 当前版本维度数据"""
result = {}
for table_def in SYNC_TABLES:
core_table = table_def["core_table"]
dwd_source = table_def["dwd_source"]
dwd_exprs = [col[1] for col in table_def["columns"]]
select_clause = ", ".join(dwd_exprs)
sql = f"SELECT {select_clause} FROM {dwd_source} WHERE scd2_is_current = 1"
with self.db.conn.cursor() as cur:
cur.execute(sql)
rows = cur.fetchall()
core_cols = [col[0] for col in table_def["columns"]]
result[core_table] = {
"rows": [dict(zip(core_cols, row)) for row in rows],
"columns": core_cols,
}
self.logger.info(
"%s: %s <- %s: %d rows",
self.get_task_code(), core_table, dwd_source, len(rows),
)
return {"tables": result, "site_id": context.store_id}
def transform(self, extracted: Dict[str, Any], context: TaskContext) -> Dict[str, Any]:
"""直通,无需转换"""
return extracted
def load(self, transformed: Dict[str, Any], context: TaskContext) -> dict:
"""TRUNCATE + INSERT 全量刷新"""
tables = transformed.get("tables", {})
total_inserted = 0
total_deleted = 0
for core_table, data in tables.items():
rows = data["rows"]
columns = data["columns"]
with self.db.conn.cursor() as cur:
# TRUNCATE
cur.execute(f"TRUNCATE {core_table}")
self.logger.info("%s: TRUNCATE %s", self.get_task_code(), core_table)
# INSERT
if rows:
cols_str = ", ".join(columns)
placeholders = ", ".join(["%s"] * len(columns))
insert_sql = f"INSERT INTO {core_table} ({cols_str}) VALUES ({placeholders})"
for row in rows:
values = [row.get(col) for col in columns]
cur.execute(insert_sql, values)
total_inserted += len(rows)
self.logger.info(
"%s: INSERT %s: %d rows",
self.get_task_code(), core_table, len(rows),
)
return {
"counts": {
"fetched": total_inserted,
"inserted": total_inserted,
"updated": 0,
"skipped": 0,
"errors": 0,
},
"extra": {"tables_synced": len(tables)},
}

View File

@@ -530,10 +530,12 @@ class MemberIndexBaseTask(BaseIndexTask):
enable_stop_exception = int(params.get('enable_stop_high_balance_exception', 0)) == 1
high_balance_threshold = float(params.get('high_balance_threshold', 1000))
# CHANGE 2026-04-12 | STOP 不再排除:超出 recency 窗口的老客归入 OLD 继续计算
# WBI 衰减公式自然给出高分,避免最需要召回的客户被遗漏
if data.t_a >= recency_days:
if enable_stop_exception and data.sv_balance >= high_balance_threshold:
return "STOP", "STOP_HIGH_BALANCE", True
return "STOP", "STOP", False
return "OLD", "STOP_OVERDUE", True
new_visit_threshold = int(params.get('new_visit_threshold', 2))
new_days_threshold = int(params.get('new_days_threshold', 30))

View File

@@ -5,6 +5,7 @@ from __future__ import annotations
import math
from dataclasses import dataclass
from datetime import datetime
from typing import Any, Dict, List, Optional
from .member_index_base import MemberActivityData, MemberIndexBaseTask
@@ -202,9 +203,10 @@ class NewconvIndexTask(MemberIndexBaseTask):
avg_raw=sum(all_raw) / len(all_raw)
)
# P19: 回测模式传入 calc_time
calc_time = (context.as_of_date if context and context.as_of_date else None)
inserted = self._save_newconv_data(newconv_list, calc_time=calc_time)
# 日快照模式:始终按 stat_date 写入
now = (context.as_of_date if context and context.as_of_date else None) or datetime.now(self.tz)
stat_date = now.date() if hasattr(now, 'date') else now
inserted = self._save_newconv_data(newconv_list, stat_date=stat_date)
self.logger.info("NCI calculation finished, inserted %d rows", inserted)
return {
@@ -288,30 +290,23 @@ class NewconvIndexTask(MemberIndexBaseTask):
if data.raw_score < 0:
data.raw_score = 0.0
def _save_newconv_data(self, data_list: List[MemberNewconvData], *, calc_time=None) -> int:
"""保存 NCI 数据"""
def _save_newconv_data(self, data_list: List[MemberNewconvData], *, stat_date) -> int:
"""日快照模式:按 (site_id, stat_date) 删除后插入。"""
if not data_list:
return 0
site_id = data_list[0].activity.site_id
# P19: 回测模式按 calc_time 删除(保留其他快照),正常模式按 site_id 全量刷新
use_param_time = calc_time is not None
with self.db.conn.cursor() as cur:
if use_param_time:
cur.execute(
"DELETE FROM dws.dws_member_newconv_index WHERE site_id = %s AND calc_time = %s",
(site_id, calc_time),
)
else:
cur.execute(
"DELETE FROM dws.dws_member_newconv_index WHERE site_id = %s",
(site_id,),
)
from datetime import date as date_type
if not isinstance(stat_date, date_type):
stat_date = stat_date.date() if hasattr(stat_date, 'date') else stat_date
# P19: 回测模式传入 calc_time正常模式用 NOW()
use_param_time = calc_time is not None
time_placeholder = "%s, %s, %s" if use_param_time else "NOW(), NOW(), NOW()"
insert_sql = f"""
site_id = data_list[0].activity.site_id
with self.db.conn.cursor() as cur:
cur.execute(
"DELETE FROM dws.dws_member_newconv_index WHERE site_id = %s AND stat_date = %s",
(site_id, stat_date),
)
insert_sql = """
INSERT INTO dws.dws_member_newconv_index (
site_id, tenant_id, member_id,
status, segment,
@@ -325,7 +320,7 @@ class NewconvIndexTask(MemberIndexBaseTask):
raw_score_welcome, raw_score_convert, raw_score,
display_score_welcome, display_score_convert, display_score,
last_wechat_touch_time,
calc_time, created_at, updated_at
calc_time, created_at, updated_at, stat_date
) VALUES (
%s, %s, %s,
%s, %s,
@@ -339,32 +334,40 @@ class NewconvIndexTask(MemberIndexBaseTask):
%s, %s, %s,
%s, %s, %s,
%s,
{time_placeholder}
NOW(), NOW(), NOW(), %s
)
"""
inserted = 0
# 批量写入executemany 替代逐行 execute
batch_params = []
for data in data_list:
activity = data.activity
batch_params.append((
activity.site_id, activity.tenant_id, activity.member_id,
data.status, data.segment,
activity.member_create_time, activity.first_visit_time, activity.last_visit_time, activity.last_recharge_time,
activity.t_v, activity.t_r, activity.t_a,
activity.visits_14d, activity.visits_30d, activity.visits_60d, activity.visits_total,
activity.spend_30d, activity.spend_180d, activity.sv_balance, activity.recharge_60d_amt,
activity.interval_count,
data.need_new, data.salvage_new, data.recharge_new, data.value_new,
data.welcome_new,
data.raw_score_welcome, data.raw_score_convert, data.raw_score,
data.display_score_welcome, data.display_score_convert, data.display_score,
None,
stat_date,
))
from psycopg2.extras import execute_batch
with self.db.conn.cursor() as cur:
for data in data_list:
activity = data.activity
params = (
activity.site_id, activity.tenant_id, activity.member_id,
data.status, data.segment,
activity.member_create_time, activity.first_visit_time, activity.last_visit_time, activity.last_recharge_time,
activity.t_v, activity.t_r, activity.t_a,
activity.visits_14d, activity.visits_30d, activity.visits_60d, activity.visits_total,
activity.spend_30d, activity.spend_180d, activity.sv_balance, activity.recharge_60d_amt,
activity.interval_count,
data.need_new, data.salvage_new, data.recharge_new, data.value_new,
data.welcome_new,
data.raw_score_welcome, data.raw_score_convert, data.raw_score,
data.display_score_welcome, data.display_score_convert, data.display_score,
None,
)
if use_param_time:
params = params + (calc_time, calc_time, calc_time)
cur.execute(insert_sql, params)
inserted += cur.rowcount
execute_batch(cur, insert_sql, batch_params, page_size=200)
inserted = len(batch_params)
# 保留策略:清理 365 天前的快照
cur.execute(
"DELETE FROM dws.dws_member_newconv_index WHERE site_id = %s AND stat_date < CURRENT_DATE - INTERVAL '365 days'",
(site_id,),
)
self.db.conn.commit()
return inserted

View File

@@ -180,9 +180,9 @@ class RelationIndexTask(BaseIndexTask):
self._apply_display_scores(pair_map, params_rs, params_ms, params_ml, site_id)
# P19: 仅回测模式传 calc_time按 calc_time 删除保留其他快照),正常模式传 None按 site_id 全量刷新)
backtest_calc_time = now if (context and context.as_of_date) else None
inserted = self._save_relation_rows(site_id, list(pair_map.values()), calc_time=backtest_calc_time)
# 日快照模式:始终按 stat_date 写入/覆盖,支持多日快照共存
stat_date = now.date() if hasattr(now, 'date') else now
inserted = self._save_relation_rows(site_id, list(pair_map.values()), stat_date=stat_date)
self.logger.info("关系指数计算完成,写入 %d 条记录", inserted)
return {
@@ -585,27 +585,23 @@ class RelationIndexTask(BaseIndexTask):
return "asinh"
return "none"
def _save_relation_rows(self, site_id: int, rows: List[RelationPairMetrics], *, calc_time: Optional[datetime] = None) -> int:
# P19: 回测模式传入 calc_time正常模式用 NOW()
use_param_time = calc_time is not None
def _save_relation_rows(self, site_id: int, rows: List[RelationPairMetrics], *, stat_date) -> int:
"""日快照模式:始终按 (site_id, stat_date) 删除后插入,支持多日快照共存。"""
from datetime import date as date_type
if not isinstance(stat_date, date_type):
stat_date = stat_date.date() if hasattr(stat_date, 'date') else stat_date
with self.db.conn.cursor() as cur:
# P19: 回测模式按 calc_time 删除(保留其他快照),正常模式按 site_id 全量刷新
if use_param_time:
cur.execute(
"DELETE FROM dws.dws_member_assistant_relation_index WHERE site_id = %s AND calc_time = %s",
(site_id, calc_time),
)
else:
cur.execute(
"DELETE FROM dws.dws_member_assistant_relation_index WHERE site_id = %s",
(site_id,),
)
cur.execute(
"DELETE FROM dws.dws_member_assistant_relation_index WHERE site_id = %s AND stat_date = %s",
(site_id, stat_date),
)
if not rows:
self.db.conn.commit()
return 0
insert_sql = f"""
insert_sql = """
INSERT INTO dws.dws_member_assistant_relation_index (
site_id, tenant_id, member_id, assistant_id,
session_count, total_duration_minutes, basic_session_count, incentive_session_count,
@@ -614,7 +610,7 @@ class RelationIndexTask(BaseIndexTask):
os_share, os_label, os_rank,
ms_f_short, ms_f_long, ms_raw, ms_display,
ml_order_count, ml_allocated_amount, ml_raw, ml_display,
calc_time, created_at, updated_at
calc_time, created_at, updated_at, stat_date
) VALUES (
%s, %s, %s, %s,
%s, %s, %s, %s,
@@ -623,42 +619,34 @@ class RelationIndexTask(BaseIndexTask):
%s, %s, %s,
%s, %s, %s, %s,
%s, %s, %s, %s,
{('%s, %s, %s' if use_param_time else 'NOW(), NOW(), NOW()')}
NOW(), NOW(), NOW(), %s
)
"""
inserted = 0
for row in rows:
params = (
row.site_id,
row.tenant_id,
row.member_id,
row.assistant_id,
row.session_count,
row.total_duration_minutes,
row.basic_session_count,
row.incentive_session_count,
# 批量写入executemany 替代逐行 execute
batch_params = [
(
row.site_id, row.tenant_id, row.member_id, row.assistant_id,
row.session_count, row.total_duration_minutes,
row.basic_session_count, row.incentive_session_count,
row.days_since_last_session,
row.rs_f,
row.rs_d,
row.rs_r,
row.rs_raw,
row.rs_display,
row.os_share,
row.os_label,
row.os_rank,
row.ms_f_short,
row.ms_f_long,
row.ms_raw,
row.ms_display,
row.ml_order_count,
row.ml_allocated_amount,
row.ml_raw,
row.ml_display,
row.rs_f, row.rs_d, row.rs_r, row.rs_raw, row.rs_display,
row.os_share, row.os_label, row.os_rank,
row.ms_f_short, row.ms_f_long, row.ms_raw, row.ms_display,
row.ml_order_count, row.ml_allocated_amount, row.ml_raw, row.ml_display,
stat_date,
)
if use_param_time:
params = params + (calc_time, calc_time, calc_time)
cur.execute(insert_sql, params)
inserted += max(cur.rowcount, 0)
for row in rows
]
from psycopg2.extras import execute_batch
execute_batch(cur, insert_sql, batch_params, page_size=200)
inserted = len(batch_params)
# 保留策略:清理 365 天前的快照
cur.execute(
"DELETE FROM dws.dws_member_assistant_relation_index WHERE site_id = %s AND stat_date < CURRENT_DATE - INTERVAL '365 days'",
(site_id,),
)
self.db.conn.commit()
return inserted

View File

@@ -178,9 +178,10 @@ class WinbackIndexTask(MemberIndexBaseTask):
avg_raw=sum(all_raw) / len(all_raw)
)
# P19: 回测模式传入 calc_time
calc_time = (context.as_of_date if context and context.as_of_date else None)
inserted = self._save_winback_data(winback_list, calc_time=calc_time)
# 日快照模式:始终按 stat_date 写入
now = (context.as_of_date if context and context.as_of_date else None) or datetime.now(self.tz)
stat_date = now.date() if hasattr(now, 'date') else now
inserted = self._save_winback_data(winback_list, stat_date=stat_date)
self.logger.info("WBI calculation finished, inserted %d rows", inserted)
return {
@@ -341,29 +342,23 @@ class WinbackIndexTask(MemberIndexBaseTask):
if data.raw_score < 0:
data.raw_score = 0.0
def _save_winback_data(self, data_list: List[MemberWinbackData], *, calc_time: Optional[datetime] = None) -> int:
"""保存 WBI 数据"""
def _save_winback_data(self, data_list: List[MemberWinbackData], *, stat_date) -> int:
"""日快照模式:按 (site_id, stat_date) 删除后插入。"""
if not data_list:
return 0
site_id = data_list[0].activity.site_id
# P19: 回测模式传入 calc_time正常模式用 NOW()
use_param_time = calc_time is not None
# P19: 回测模式按 calc_time 删除(保留其他快照),正常模式按 site_id 全量刷新
with self.db.conn.cursor() as cur:
if use_param_time:
cur.execute(
"DELETE FROM dws.dws_member_winback_index WHERE site_id = %s AND calc_time = %s",
(site_id, calc_time),
)
else:
cur.execute(
"DELETE FROM dws.dws_member_winback_index WHERE site_id = %s",
(site_id,),
)
from datetime import date as date_type
if not isinstance(stat_date, date_type):
stat_date = stat_date.date() if hasattr(stat_date, 'date') else stat_date
time_placeholder = "%s, %s, %s" if use_param_time else "NOW(), NOW(), NOW()"
insert_sql = f"""
site_id = data_list[0].activity.site_id
with self.db.conn.cursor() as cur:
cur.execute(
"DELETE FROM dws.dws_member_winback_index WHERE site_id = %s AND stat_date = %s",
(site_id, stat_date),
)
insert_sql = """
INSERT INTO dws.dws_member_winback_index (
site_id, tenant_id, member_id,
status, segment,
@@ -376,7 +371,7 @@ class WinbackIndexTask(MemberIndexBaseTask):
ideal_interval_days, ideal_next_visit_date,
raw_score, display_score,
last_wechat_touch_time,
calc_time, created_at, updated_at
calc_time, created_at, updated_at, stat_date
) VALUES (
%s, %s, %s,
%s, %s,
@@ -389,31 +384,39 @@ class WinbackIndexTask(MemberIndexBaseTask):
%s, %s,
%s, %s,
%s,
{time_placeholder}
NOW(), NOW(), NOW(), %s
)
"""
inserted = 0
# 批量写入executemany 替代逐行 execute
batch_params = []
for data in data_list:
activity = data.activity
batch_params.append((
activity.site_id, activity.tenant_id, activity.member_id,
data.status, data.segment,
activity.member_create_time, activity.first_visit_time, activity.last_visit_time, activity.last_recharge_time,
activity.t_v, activity.t_r, activity.t_a,
activity.visits_14d, activity.visits_30d, activity.visits_60d, activity.visits_total,
activity.spend_30d, activity.spend_180d, activity.sv_balance, activity.recharge_60d_amt,
activity.interval_count,
data.overdue_old, data.overdue_cdf_p, data.drop_old, data.recharge_old, data.value_old,
data.ideal_interval_days, data.ideal_next_visit_date,
data.raw_score, data.display_score,
None,
stat_date,
))
from psycopg2.extras import execute_batch
with self.db.conn.cursor() as cur:
for data in data_list:
activity = data.activity
params = (
activity.site_id, activity.tenant_id, activity.member_id,
data.status, data.segment,
activity.member_create_time, activity.first_visit_time, activity.last_visit_time, activity.last_recharge_time,
activity.t_v, activity.t_r, activity.t_a,
activity.visits_14d, activity.visits_30d, activity.visits_60d, activity.visits_total,
activity.spend_30d, activity.spend_180d, activity.sv_balance, activity.recharge_60d_amt,
activity.interval_count,
data.overdue_old, data.overdue_cdf_p, data.drop_old, data.recharge_old, data.value_old,
data.ideal_interval_days, data.ideal_next_visit_date,
data.raw_score, data.display_score,
None,
)
if use_param_time:
params = params + (calc_time, calc_time, calc_time)
cur.execute(insert_sql, params)
inserted += cur.rowcount
execute_batch(cur, insert_sql, batch_params, page_size=200)
inserted = len(batch_params)
# 保留策略:清理 365 天前的快照
cur.execute(
"DELETE FROM dws.dws_member_winback_index WHERE site_id = %s AND stat_date < CURRENT_DATE - INTERVAL '365 days'",
(site_id,),
)
self.db.conn.commit()
return inserted

View File

@@ -2,11 +2,17 @@
"""
DWS 客户项目标签任务
时间窗口计算每位客户在四大项目BILLIARD/SNOOKER/MAHJONG/KTV
消费时长占比占比≥25% 则分配标签。散客member_id=0不参与。
每位客户最近 N 次消费(开台记录)计算四大项目BILLIARD/SNOOKER/MAHJONG/KTV
消费时长占比占比≥25% 则分配标签。散客member_id=0不参与。
设计思路:
不按固定日期窗口30天/60天而按每位客户最近的消费记录数量取数
避免长期未到店或来店频率不稳定的客户标签丢失。
数据链路:
dwd_table_fee_log (ledger_count)
→ ROW_NUMBER() OVER (PARTITION BY member_id ORDER BY ledger_end_time DESC)
→ 取最近 LAST_N_VISITS 条记录
→ JOIN dim_table (site_table_id → table_id, scd2_is_current=1)
→ get_area_category(area_name, table_name)
→ 按 category_code 汇总 → 计算占比 → 写入 dws_member_project_tag
@@ -15,25 +21,23 @@ DWS 客户项目标签任务
dws.dws_member_project_tag
更新策略:
全量删除重建(按 site_id 删除后重新插入所有时间窗口
全量删除重建(按 site_id 删除后重新插入)
"""
from __future__ import annotations
from datetime import date
from decimal import Decimal
from typing import Any, Dict, List, Optional
from tasks.dws.base_dws_task import BaseDwsTask, TimeWindow
from neozqyy_shared.datetime_utils import biz_date_sql_expr
from tasks.dws.base_dws_task import BaseDwsTask
# 只计算四大项目
VALID_CATEGORIES = {"BILLIARD", "SNOOKER", "MAHJONG", "KTV"}
# 客户看板的 2 个时间窗口
MEMBER_WINDOWS = [
TimeWindow.LAST_30_DAYS,
TimeWindow.LAST_60_DAYS,
]
# 取每位客户最近 30 次消费(开台记录)
LAST_N_VISITS = 30
# 写入 time_window 字段的枚举值
TIME_WINDOW_VALUE = "LAST_30_VISITS"
TAG_THRESHOLD = Decimal("0.25")
@@ -52,21 +56,15 @@ class MemberProjectTagTask(BaseDwsTask):
def extract(self, context) -> Dict[str, Any]:
site_id = context.store_id
self.logger.info("%s: 提取客户台费时长数据", self.get_task_code())
self.logger.info("%s: 提取客户最近 %d 次消费的台费时长数据",
self.get_task_code(), LAST_N_VISITS)
self.load_config_cache()
table_info = self._extract_table_info(site_id)
window_data: Dict[str, List[Dict]] = {}
for window in MEMBER_WINDOWS:
time_range = self.get_time_window_range(window)
rows = self._extract_member_durations(
site_id, time_range.start, time_range.end
)
window_data[window.value] = rows
rows = self._extract_member_durations(site_id)
return {
"window_data": window_data,
"rows": rows,
"table_info": table_info,
"site_id": site_id,
}
@@ -81,30 +79,37 @@ class MemberProjectTagTask(BaseDwsTask):
rows = self.db.query(sql, (site_id,))
return {r["table_id"]: dict(r) for r in (rows or [])}
def _extract_member_durations(
self, site_id: int, start_date: date, end_date: date
) -> List[Dict[str, Any]]:
"""提取客户台费时长明细(按客户+台桌聚合),排除散客"""
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("tfl.ledger_end_time", cutoff)
sql = f"""
SELECT
tfl.member_id,
tfl.site_table_id AS table_id,
COALESCE(SUM(tfl.ledger_count), 0) AS duration_seconds
FROM dwd.dwd_table_fee_log tfl
WHERE tfl.site_id = %(site_id)s
AND {biz_expr} >= %(start_date)s
AND {biz_expr} <= %(end_date)s
AND COALESCE(tfl.is_delete, 0) = 0
AND tfl.member_id IS NOT NULL
AND tfl.member_id != 0
GROUP BY tfl.member_id, tfl.site_table_id
def _extract_member_durations(self, site_id: int) -> List[Dict[str, Any]]:
"""按每位客户最近 N 次消费提取台费时长明细,排除散客。
使用 ROW_NUMBER() 按 member_id 分区、ledger_end_time 倒序排名,
取最近 LAST_N_VISITS 条记录后再按 (member_id, table_id) 聚合。
"""
sql = """
WITH ranked AS (
SELECT tfl.member_id,
tfl.site_table_id AS table_id,
tfl.ledger_count AS duration_seconds,
ROW_NUMBER() OVER (
PARTITION BY tfl.member_id
ORDER BY tfl.ledger_end_time DESC
) AS rn
FROM dwd.dwd_table_fee_log tfl
WHERE tfl.site_id = %(site_id)s
AND COALESCE(tfl.is_delete, 0) = 0
AND tfl.member_id IS NOT NULL
AND tfl.member_id != 0
)
SELECT member_id,
table_id,
COALESCE(SUM(duration_seconds), 0) AS duration_seconds
FROM ranked
WHERE rn <= %(last_n)s
GROUP BY member_id, table_id
"""
rows = self.db.query(sql, {
"site_id": site_id,
"start_date": start_date,
"end_date": end_date,
"last_n": LAST_N_VISITS,
})
return [dict(r) for r in rows] if rows else []
@@ -114,59 +119,59 @@ class MemberProjectTagTask(BaseDwsTask):
tenant_id = getattr(context, "tenant_id", 0) or 0
results: List[Dict[str, Any]] = []
for window_value, rows in extracted["window_data"].items():
# member_id → category_code → seconds
member_cats: Dict[int, Dict[str, int]] = {}
# member_id → category_code → seconds
member_cats: Dict[int, Dict[str, int]] = {}
for row in rows:
mid = row["member_id"]
tid = row["table_id"]
secs = self.safe_int(row["duration_seconds"])
if secs <= 0:
continue
for row in extracted["rows"]:
mid = row["member_id"]
tid = row["table_id"]
secs = self.safe_int(row["duration_seconds"])
if secs <= 0:
continue
tinfo = table_info.get(tid, {})
area_name = tinfo.get("area_name")
table_name = tinfo.get("table_name")
cat = self.get_area_category(area_name, table_name)
code = cat.get("category_code", "OTHER")
tinfo = table_info.get(tid, {})
area_name = tinfo.get("area_name")
table_name = tinfo.get("table_name")
cat = self.get_area_category(area_name, table_name)
code = cat.get("category_code", "OTHER")
if code not in VALID_CATEGORIES:
continue
if code not in VALID_CATEGORIES:
continue
if mid not in member_cats:
member_cats[mid] = {}
member_cats[mid][code] = member_cats[mid].get(code, 0) + secs
if mid not in member_cats:
member_cats[mid] = {}
member_cats[mid][code] = member_cats[mid].get(code, 0) + secs
for mid, cats in member_cats.items():
total = sum(cats.values())
if total <= 0:
continue
for mid, cats in member_cats.items():
total = sum(cats.values())
if total <= 0:
continue
for code, secs in cats.items():
pct = Decimal(str(secs)) / Decimal(str(total))
pct = pct.quantize(Decimal("0.0001"))
cat_info = self._get_category_display(code)
for code, secs in cats.items():
pct = Decimal(str(secs)) / Decimal(str(total))
pct = pct.quantize(Decimal("0.0001"))
cat_info = self._get_category_display(code)
results.append({
"site_id": site_id,
"tenant_id": tenant_id,
"member_id": mid,
"time_window": window_value,
"category_code": code,
"category_name": cat_info["category_name"],
"short_name": cat_info["short_name"],
"duration_seconds": secs,
"total_seconds": total,
"percentage": float(pct),
"is_tagged": pct >= TAG_THRESHOLD,
})
results.append({
"site_id": site_id,
"tenant_id": tenant_id,
"member_id": mid,
"time_window": TIME_WINDOW_VALUE,
"category_code": code,
"category_name": cat_info["category_name"],
"short_name": cat_info["short_name"],
"duration_seconds": secs,
"total_seconds": total,
"percentage": float(pct),
"is_tagged": pct >= TAG_THRESHOLD,
})
self.logger.info(
"%s: 生成 %d 条标签记录(其中 %d 条达标)",
"%s: 生成 %d 条标签记录(其中 %d 条达标),基于每客户最近 %d 次消费",
self.get_task_code(),
len(results),
sum(1 for r in results if r["is_tagged"]),
LAST_N_VISITS,
)
return results

View File

@@ -2,96 +2,130 @@
# - 2026-03-29 | Prompt: DWS_TASK_ENGINE ETL 任务 | 新建文件。
# 编排任务引擎全流程:完成检查 → 过期检查 → 任务生成。
# 通过 HTTP 调用后端 POST /api/internal/run-job 按 job_name 执行。
# - 2026-04-12 | 合并 DWS_TASK_SIMULATION有时间窗口时走推演模式
# 无时间窗口时走原来的 HTTP 模式。
# -*- coding: utf-8 -*-
"""
DWS 任务引擎编排任务DWS_TASK_ENGINE
在 DWS 指数计算完成后执行,按顺序调用后端任务引擎的各个步骤
1. recall_completion_check — 检测召回是否完成,生成回访任务
2. task_expiry_check — 标记超时未处理的任务
3. task_generator — 根据 WBI/NCI/RS 指数生成/替换任务
双模式
- 无时间窗口(日常 Flow通过 HTTP 调用后端任务引擎
1. recall_completion_check — 检测召回完成
2. task_expiry_check — 标<><E6A087>超时任务
3. task_generator — 根据指数生成/替换任务
通过 HTTP 调用后端 POST /api/internal/run-jobInternal-Token 认证),
每步失败仅记录日志,不中断后续步骤。
- 有时间窗口(历史推演):基于指数日快照逐天重放任务生命周期
需先运行 DWS_INDEX_BACKFILL 生成历史快照
"""
from __future__ import annotations
import json
import logging
import os
import sys
import time
from datetime import date, datetime, timedelta
from decimal import Decimal
from pathlib import Path
from typing import Any
from typing import Any, Dict, Optional
from zoneinfo import ZoneInfo
import requests
from dotenv import load_dotenv
from ..base_task import BaseTask, TaskContext
# 加载根 .envBACKEND_API_URL / INTERNAL_API_TOKEN 不在 AppConfig 映射中)
# task_engine.py → dws/ → tasks/ → feiqiu/ → connectors/ → etl/ → apps/ → root
# 加载根 .env
_REPO_ROOT = Path(__file__).resolve().parents[6]
load_dotenv(_REPO_ROOT / ".env", override=False)
logger = logging.getLogger(__name__)
_TIMEOUT = (5, 30) # 连接 5s读取 30s任务执行可能较慢
_TIMEOUT = (5, 30)
# 按顺序执行的后端任务列表
# HTTP 模式<E6A8A1><E5BC8F><EFBFBD>按顺序执行的后端任务
_JOB_SEQUENCE = [
"recall_completion_check",
"task_expiry_check",
"task_generator",
]
# 推演模式<EFBC9A><E5AFBC> task_generator 纯函数
_BACKEND = _REPO_ROOT / "apps" / "backend"
if str(_BACKEND) not in sys.path:
sys.path.insert(0, str(_BACKEND))
try:
from app.services.task_generator import (
IndexData,
determine_task_type,
should_replace_task,
)
_SIMULATION_AVAILABLE = True
except ImportError:
_SIMULATION_AVAILABLE = False
# 推演截止日期(现有 active 任务从 03-29 开始)
CUTOFF_DATE = date(2026, 3, 28)
FOLLOW_UP_HOURS = 72
# ── HTTP <20><>式辅助 ──
def _run_backend_job(backend_url: str, token: str, job_name: str) -> dict:
"""调用后端 POST /api/internal/run-job 执行指定任务。
Returns:
{"success": bool, "message": str} 或 {"success": False, "message": error}
"""
url = f"{backend_url}/api/internal/run-job"
headers = {
"Authorization": f"Internal-Token {token}",
"Content-Type": "application/json",
}
body = {"job_name": job_name}
try:
resp = requests.post(url, json=body, headers=headers, timeout=_TIMEOUT)
resp = requests.post(url, json={"job_name": job_name}, headers=headers, timeout=_TIMEOUT)
if resp.status_code == 200:
data = resp.json()
# 后端 ResponseWrapperMiddleware 包装:{"code": 0, "data": {...}}
inner = data.get("data", data)
return {
"success": inner.get("success", False),
"message": inner.get("message", ""),
}
else:
return {
"success": False,
"message": f"HTTP {resp.status_code}: {resp.text[:200]}",
}
return {"success": inner.get("success", False), "message": inner.get("message", "")}
return {"success": False, "message": f"HTTP {resp.status_code}: {resp.text[:200]}"}
except requests.RequestException as exc:
return {"success": False, "message": str(exc)}
class DwsTaskEngineTask(BaseTask):
"""DWS 任务引擎编排任务
"""DWS 任务引擎(双模式)
不读写 DWS 表,仅通过 HTTP 调用后端执行任务引擎步骤。
继承 BaseTask 而非 BaseDwsTask因为不需要 DWS 层的数据操作方法。
无时间窗口 → HTTP 模式(日常 Flow
有时间窗口 → 推演模式(历史回填)
"""
def get_task_code(self) -> str:
return "DWS_TASK_ENGINE"
def extract(self, context: TaskContext) -> dict[str, Any]:
"""无需提取数据,返回空上下文。"""
return {}
def execute(self, context=None) -> Dict[str, Any]:
"""直接 override execute(),绕过 BaseTask 的 E/T/L 模板。
def load(self, extracted: dict[str, Any], context: TaskContext) -> dict[str, Any]:
"""按顺序调用后端任务引擎的各个步骤。"""
根据是否有时间窗口决定模式:
- 有窗口 → 推演模式(逐天生成+完成任务)
- 无窗口 → HTTP 模式(调用后端执行当天任务引擎)
"""
if self._has_window(context):
return self._run_simulation_mode(context)
return self._run_http_mode()
def _has_window(self, context=None) -> bool:
"""检查是否指定了时间窗口config 或 context 均可)。"""
# 优先从 configCLI --window-start/--window-end
wo = self.config.get("run.window_override") or {}
if wo.get("start") and wo.get("end"):
return True
# 其次从 contexttask_executor 构建的)
if context and hasattr(context, 'window_start') and hasattr(context, 'window_end'):
if context.window_start and context.window_end and context.window_start != context.window_end:
return True
return False
# ── HTTP 模式(日常) ──
def _run_http_mode(self) -> dict[str, Any]:
backend_url = os.environ.get("BACKEND_API_URL", "").rstrip("/")
token = os.environ.get("INTERNAL_API_TOKEN", "")
@@ -103,22 +137,667 @@ class DwsTaskEngineTask(BaseTask):
return {"skipped": True, "reason": "INTERNAL_API_TOKEN 未配置"}
results: dict[str, Any] = {}
for job_name in _JOB_SEQUENCE:
self.logger.info("DWS_TASK_ENGINE: 执行 %s ...", job_name)
result = _run_backend_job(backend_url, token, job_name)
success = result.get("success", False)
message = result.get("message", "")
results[job_name] = {"success": success, "message": message}
if success:
self.logger.info(
"DWS_TASK_ENGINE: %s 成功 — %s", job_name, message
)
self.logger.info("DWS_TASK_ENGINE: %s 成功 — %s", job_name, message)
else:
self.logger.warning(
"DWS_TASK_ENGINE: %s 失败 — %s", job_name, message
)
self.logger.warning("DWS_TASK_ENGINE: %s 失败 — %s", job_name, message)
return results
# ── 推演模式(历史) ──
def _run_simulation_mode(self, context: Optional[TaskContext]) -> dict[str, Any]:
if not _SIMULATION_AVAILABLE:
raise RuntimeError("推演模式不可用:无法导入 app.services.task_generator")
import psycopg2
start_date, end_date = self._parse_date_range(context)
tz = ZoneInfo(self.config.get("app.timezone", "Asia/Shanghai"))
etl_conn = self.db.conn
app_dsn = os.environ.get("APP_DB_DSN")
if not app_dsn:
raise ValueError("推演模式需要 APP_DB_DSN 环境变量")
app_conn = psycopg2.connect(app_dsn)
app_conn.set_client_encoding("UTF8")
site_id = self._get_site_id(etl_conn)
total_days = (end_date - start_date).days + 1
self.logger.info(
"DWS_TASK_ENGINE [推演模式]: %s ~ %s (%d天), site_id=%s",
start_date, end_date, total_days, site_id,
)
# 清理指定范围内的旧数据,保留范围外的
self._clean_date_range(app_conn, tz, start_date, end_date)
# 加载推演范围之前就存在的 active 任务(不加载范围之后的"未来"任务)
active_tasks = self._load_existing_active_tasks(app_conn, site_id, before_date=start_date)
self.logger.info("DWS_TASK_ENGINE [推演]: 范围前已有 active 任务 %d", len(active_tasks))
stats = {
"created": 0, "completed": 0, "resolved": 0, "overridden": 0,
"expired": 0, "follow_up_created": 0,
"recall_events": 0, "skipped_no_snapshot": 0,
}
# 预加载全量数据,按日分片(避免 255 x 5 次逐日查询 -> 5 次全量查询)
self.logger.info("DWS_TASK_ENGINE [推演]: 预加载快照 + 结算数据 ...")
snapshots_by_date = self._bulk_load_snapshots(etl_conn, site_id, start_date, end_date)
settlements_by_date = self._bulk_load_settlements(etl_conn, site_id, start_date, end_date, tz)
member_visits_by_date = self._bulk_load_member_visits(etl_conn, site_id, start_date, end_date, tz)
self.logger.info(
"DWS_TASK_ENGINE [推演]: 预加载完成, %d 天有快照, %d 天有助教结算, %d 天有到店记录",
len(snapshots_by_date), len(settlements_by_date), len(member_visits_by_date),
)
# 加载任务生成参数(与日常 task_generator 保持一致)
task_params = self._load_task_generator_params(app_conn, site_id)
self.logger.info(
"DWS_TASK_ENGINE [推演]: 任务阈值 high=%.1f, normal=%.1f, rs=[%.1f, %.1f)",
task_params["high_threshold"], task_params["normal_threshold"],
task_params["rs_min"], task_params["rs_max"],
)
t0 = time.time()
current = start_date
while current <= end_date:
snapshot = snapshots_by_date.get(current, {"relation": {}, "wbi": {}, "nci": {}})
if not snapshot["relation"] and not snapshot["wbi"] and not snapshot["nci"]:
stats["skipped_no_snapshot"] += 1
current += timedelta(days=1)
continue
day_settlements = settlements_by_date.get(current, {})
day_visits = member_visits_by_date.get(current, {})
self._simulate_day(app_conn, etl_conn, site_id, current, tz, snapshot, active_tasks, stats,
preloaded_settlements=day_settlements, preloaded_visits=day_visits,
task_params=task_params)
day_num = (current - start_date).days + 1
if day_num % 30 == 0 or current == end_date:
elapsed = time.time() - t0
self.logger.info(
"DWS_TASK_ENGINE [推演]: %s (%d/%d) 创建=%d 完成=%d 解除=%d 覆盖=%d 过期=%d %.0fs",
current, day_num, total_days,
stats["created"], stats["completed"], stats["resolved"],
stats["overridden"], stats["expired"], elapsed,
)
current += timedelta(days=1)
# 收尾:清理推演结束后仍 active 但 expires_at 已过期的任务
now_dt = datetime.now(tz)
cleanup_count = 0
with app_conn.cursor() as cur:
cur.execute(
"""UPDATE biz.coach_tasks SET status = 'expired', updated_at = %s
WHERE site_id = %s AND status = 'active'
AND expires_at IS NOT NULL AND expires_at < %s
RETURNING id, task_type""",
(now_dt, site_id, now_dt),
)
for task_id, task_type in cur.fetchall():
self._history(cur, task_id, "expired", "active", "expired",
task_type, task_type, {"reason": "post_simulation_cleanup"})
cleanup_count += 1
app_conn.commit()
stats["expired"] += cleanup_count
if cleanup_count:
self.logger.info("DWS_TASK_ENGINE [推演]: 收尾清理 %d 个已过期任务", cleanup_count)
total_elapsed = time.time() - t0
self.logger.info(
"DWS_TASK_ENGINE [推演] 完成: %.0fs, 创建=%d 完成=%d 解除=%d 覆盖=%d 过期=%d 回访=%d 事件=%d 跳过=%d active=%d",
total_elapsed, stats["created"], stats["completed"], stats["resolved"],
stats["overridden"], stats["expired"],
stats["follow_up_created"], stats["recall_events"],
stats["skipped_no_snapshot"], len(active_tasks),
)
app_conn.close()
# 推演完成后触发日常流程recall_detector + task_generator
# 让最新到店数据触发召回完成、POOL 过滤清理存量
self.logger.info("DWS_TASK_ENGINE [推演]: 触发日常流程 ...")
try:
http_result = self._run_http_mode()
self.logger.info("DWS_TASK_ENGINE [推演]: 日常流程完成 %s", http_result)
except Exception:
self.logger.exception("DWS_TASK_ENGINE [推演]: 日常流程触发失败(不影响推演结果)")
return {
"status": "SUCCESS",
"counts": {
# 框架标准字段(总结框显示用)
"inserted": stats["created"],
"updated": stats["overridden"],
"skipped": stats["skipped_no_snapshot"],
"errors": 0,
# 原始明细
**stats,
},
}
# ── 推演辅助方法 ──
def _parse_date_range(self, context: Optional[TaskContext]) -> tuple[date, date]:
wo = self.config.get("run.window_override") or {}
start_str = wo.get("start")
end_str = wo.get("end")
if start_str and end_str:
return self._parse_date(start_str), self._parse_date(end_str)
if context and context.window_start and context.window_end:
return context.window_start.date(), context.window_end.date()
raise ValueError("推演模式需要指定时间窗口")
@staticmethod
def _parse_date(s) -> date:
if isinstance(s, date) and not isinstance(s, datetime):
return s
if isinstance(s, datetime):
return s.date()
return date.fromisoformat(str(s).strip()[:10])
def _get_site_id(self, etl_conn) -> int:
with etl_conn.cursor() as cur:
cur.execute("SELECT DISTINCT site_id FROM dws.dws_member_assistant_relation_index LIMIT 1")
row = cur.fetchone()
etl_conn.commit()
if not row:
raise RuntimeError("relation_index 表为空,请先运行 DWS_INDEX_BACKFILL")
return row[0]
def _load_snapshot(self, etl_conn, site_id: int, stat_date: date) -> dict:
result = {"relation": {}, "wbi": {}, "nci": {}}
with etl_conn.cursor() as cur:
cur.execute(
"""SELECT assistant_id, member_id, rs_display, os_label, session_count
FROM dws.dws_member_assistant_relation_index
WHERE site_id = %s AND stat_date = %s""",
(site_id, stat_date),
)
for r in cur.fetchall():
result["relation"][(r[0], r[1])] = {
"rs": Decimal(str(r[2])), "os_label": r[3], "session_count": r[4],
}
# WBI同时记录 status 用于过滤老客 NCI
old_members = set()
cur.execute(
"""SELECT member_id, display_score, status FROM dws.dws_member_winback_index
WHERE site_id = %s AND stat_date = %s""",
(site_id, stat_date),
)
for r in cur.fetchall():
result["wbi"][r[0]] = Decimal(str(r[1])) if r[1] else Decimal(0)
if r[2] == "OLD":
old_members.add(r[0])
# NCI排除已转老客避免使用过时高分
cur.execute(
"""SELECT member_id, display_score FROM dws.dws_member_newconv_index
WHERE site_id = %s AND stat_date = %s""",
(site_id, stat_date),
)
for r in cur.fetchall():
if r[0] not in old_members:
result["nci"][r[0]] = Decimal(str(r[1])) if r[1] else Decimal(0)
etl_conn.commit()
return result
def _load_settlements(self, etl_conn, site_id: int, d: date) -> dict:
"""助教级结算settle_type=1 全部计入settle_type=3 仅 BONUS 服务。"""
tz = ZoneInfo(self.config.get("app.timezone", "Asia/Shanghai"))
day_start = datetime(d.year, d.month, d.day, 0, 0, 0, tzinfo=tz)
day_end = day_start + timedelta(days=1)
settlements = {}
with etl_conn.cursor() as cur:
cur.execute(
"""SELECT sl.site_assistant_id, sh.member_id, MAX(sh.pay_time)
FROM dwd.dwd_settlement_head sh
JOIN dwd.dwd_assistant_service_log sl
ON sl.order_settle_id = sh.order_settle_id AND sl.is_delete = 0
WHERE sh.site_id = %s
AND (sh.settle_type = 1 OR (sh.settle_type = 3 AND sl.order_assistant_type = 2))
AND sh.pay_time >= %s AND sh.pay_time < %s
GROUP BY sl.site_assistant_id, sh.member_id""",
(site_id, day_start, day_end),
)
for r in cur.fetchall():
if r[0] and r[1]:
settlements[(r[0], r[1])] = r[2]
etl_conn.commit()
return settlements
def _load_member_visits(self, etl_conn, site_id: int, d: date) -> dict:
"""门店级到店检测:含无助教服务的 settle_type=1用于 resolved 判定。"""
tz = ZoneInfo(self.config.get("app.timezone", "Asia/Shanghai"))
day_start = datetime(d.year, d.month, d.day, 0, 0, 0, tzinfo=tz)
day_end = day_start + timedelta(days=1)
visits = {}
with etl_conn.cursor() as cur:
cur.execute(
"""SELECT sh.member_id, MAX(sh.pay_time)
FROM dwd.dwd_settlement_head sh
WHERE sh.site_id = %s
AND (
sh.settle_type = 1
OR (sh.settle_type = 3 AND EXISTS (
SELECT 1 FROM dwd.dwd_assistant_service_log sl
WHERE sl.order_settle_id = sh.order_settle_id
AND sl.is_delete = 0
AND sl.order_assistant_type = 2
))
)
AND sh.pay_time >= %s AND sh.pay_time < %s
GROUP BY sh.member_id""",
(site_id, day_start, day_end),
)
for r in cur.fetchall():
if r[0]:
visits[r[0]] = r[1]
etl_conn.commit()
return visits
def _bulk_load_settlements(self, etl_conn, site_id: int, start: date, end: date, tz) -> dict:
"""一次查全量助教级结算,按日分片返回 {date: {(aid,mid): pay_time}}。"""
from collections import defaultdict
day_start = datetime(start.year, start.month, start.day, 0, 0, 0, tzinfo=tz)
day_end = datetime(end.year, end.month, end.day, 0, 0, 0, tzinfo=tz) + timedelta(days=1)
result = defaultdict(dict)
with etl_conn.cursor() as cur:
cur.execute(
"""SELECT sl.site_assistant_id, sh.member_id, sh.pay_time
FROM dwd.dwd_settlement_head sh
JOIN dwd.dwd_assistant_service_log sl
ON sl.order_settle_id = sh.order_settle_id AND sl.is_delete = 0
WHERE sh.site_id = %s
AND (sh.settle_type = 1 OR (sh.settle_type = 3 AND sl.order_assistant_type = 2))
AND sh.pay_time >= %s AND sh.pay_time < %s""",
(site_id, day_start, day_end),
)
for aid, mid, pay_time in cur.fetchall():
if aid and mid:
d_key = pay_time.astimezone(tz).date()
existing = result[d_key].get((aid, mid))
if existing is None or pay_time > existing:
result[d_key][(aid, mid)] = pay_time
etl_conn.commit()
return dict(result)
def _bulk_load_member_visits(self, etl_conn, site_id: int, start: date, end: date, tz) -> dict:
"""一次查全量门店级到店,按日分片返回 {date: {mid: pay_time}}。"""
from collections import defaultdict
day_start = datetime(start.year, start.month, start.day, 0, 0, 0, tzinfo=tz)
day_end = datetime(end.year, end.month, end.day, 0, 0, 0, tzinfo=tz) + timedelta(days=1)
result = defaultdict(dict)
with etl_conn.cursor() as cur:
cur.execute(
"""SELECT sh.member_id, sh.pay_time
FROM dwd.dwd_settlement_head sh
WHERE sh.site_id = %s
AND (
sh.settle_type = 1
OR (sh.settle_type = 3 AND EXISTS (
SELECT 1 FROM dwd.dwd_assistant_service_log sl
WHERE sl.order_settle_id = sh.order_settle_id
AND sl.is_delete = 0
AND sl.order_assistant_type = 2
))
)
AND sh.pay_time >= %s AND sh.pay_time < %s""",
(site_id, day_start, day_end),
)
for mid, pay_time in cur.fetchall():
if mid:
d_key = pay_time.astimezone(tz).date()
existing = result[d_key].get(mid)
if existing is None or pay_time > existing:
result[d_key][mid] = pay_time
etl_conn.commit()
return dict(result)
@staticmethod
def _load_task_generator_params(app_conn, site_id: int) -> dict:
"""从 cfg_task_generator_params 加载任务生成阈值,与日常 task_generator 保持一致。"""
defaults = {
"high_threshold": 7.5,
"normal_threshold": 4.0,
"rs_min": 1.0,
"rs_max": 6.0,
}
key_map = {
"high_priority_recall_threshold": "high_threshold",
"priority_recall_threshold": "normal_threshold",
"rs_min_for_relationship": "rs_min",
"rs_max_for_relationship": "rs_max",
}
with app_conn.cursor() as cur:
cur.execute("SELECT param_key, param_value FROM biz.cfg_task_generator_params")
for param_key, param_value in cur.fetchall():
mapped = key_map.get(param_key)
if mapped:
defaults[mapped] = float(param_value)
app_conn.commit()
return defaults
def _bulk_load_snapshots(self, etl_conn, site_id: int, start: date, end: date) -> dict:
"""一次查全量指数快照relation/wbi/nci按日分片返回 {date: snapshot_dict}。"""
from collections import defaultdict
result = defaultdict(lambda: {"relation": {}, "wbi": {}, "nci": {}})
with etl_conn.cursor() as cur:
# relation_index
cur.execute(
"""SELECT stat_date, assistant_id, member_id, rs_display, os_label, session_count
FROM dws.dws_member_assistant_relation_index
WHERE site_id = %s AND stat_date >= %s AND stat_date <= %s""",
(site_id, start, end),
)
for sd, aid, mid, rs, os_label, sc in cur.fetchall():
result[sd]["relation"][(aid, mid)] = {
"rs": Decimal(str(rs)), "os_label": os_label, "session_count": sc,
}
# WBI同时收集 OLD 状态用于过滤 NCI
old_members_by_date = defaultdict(set)
cur.execute(
"""SELECT stat_date, member_id, display_score, status
FROM dws.dws_member_winback_index
WHERE site_id = %s AND stat_date >= %s AND stat_date <= %s""",
(site_id, start, end),
)
for sd, mid, score, status in cur.fetchall():
result[sd]["wbi"][mid] = Decimal(str(score)) if score else Decimal(0)
if status == "OLD":
old_members_by_date[sd].add(mid)
# NCI排除已转老客
cur.execute(
"""SELECT stat_date, member_id, display_score
FROM dws.dws_member_newconv_index
WHERE site_id = %s AND stat_date >= %s AND stat_date <= %s""",
(site_id, start, end),
)
for sd, mid, score in cur.fetchall():
if mid not in old_members_by_date.get(sd, set()):
result[sd]["nci"][mid] = Decimal(str(score)) if score else Decimal(0)
etl_conn.commit()
return dict(result)
def _simulate_day(self, app_conn, etl_conn, site_id, d, tz, snapshot, active_tasks, stats,
*, preloaded_settlements=None, preloaded_visits=None, task_params=None):
day_dt = datetime(d.year, d.month, d.day, 7, 0, 0, tzinfo=tz)
# 1. 过期检测
expired_keys = [k for k, t in active_tasks.items() if t.get("expires_at") and t["expires_at"] < day_dt]
for key in expired_keys:
task = active_tasks.pop(key)
stats["expired"] += 1
with app_conn.cursor() as cur:
cur.execute("UPDATE biz.coach_tasks SET status = 'expired', updated_at = %s WHERE id = %s", (day_dt, task["id"]))
self._history(cur, task["id"], "expired", "active", "expired", task["task_type"], task["task_type"], {"simulated": True})
# 2. 任务生成<EFBC88><E6B7B7><EFBFBD>冲突策略
relation = snapshot["relation"]
wbi_map = snapshot["wbi"]
nci_map = snapshot["nci"]
ownership_pairs = [
(aid, mid, info)
for (aid, mid), info in relation.items()
if info["os_label"] in ("MAIN", "COMANAGE") and info["session_count"] > 0
]
for aid, mid, info in ownership_pairs:
wbi = wbi_map.get(mid, Decimal(0))
nci = nci_map.get(mid, Decimal(0))
rs = info["rs"]
# 参数化任务判定(与日常 task_generator._process_pair 保持一致)
priority_score = max(wbi, nci)
if task_params:
ht = Decimal(str(task_params["high_threshold"]))
nt = Decimal(str(task_params["normal_threshold"]))
rs_min = Decimal(str(task_params["rs_min"]))
rs_max = Decimal(str(task_params["rs_max"]))
else:
ht, nt, rs_min, rs_max = Decimal(7), Decimal(5), Decimal(1), Decimal(6)
if priority_score > ht:
new_type = "high_priority_recall"
elif priority_score > nt:
new_type = "priority_recall"
elif rs > rs_min and rs < rs_max:
new_type = "relationship_building"
else:
new_type = None
if not new_type:
continue
key = (aid, mid)
existing = active_tasks.get(key)
priority = float(max(wbi, nci)) if new_type in ("high_priority_recall", "priority_recall") else float(rs)
if existing:
if existing["task_type"] == new_type:
continue
if existing["task_type"] == "follow_up_visit":
# follow_up_visit 保留宽限期 + 新建高优先任务
with app_conn.cursor() as cur:
if not existing.get("expires_at"):
cur.execute(
"UPDATE biz.coach_tasks SET expires_at = created_at + INTERVAL '72 hours', updated_at = %s WHERE id = %s",
(day_dt, existing["id"]),
)
self._history(cur, existing["id"], "expires_at_filled", "active", "active",
"follow_up_visit", "follow_up_visit",
{"reason": "higher_priority_task_created", "simulated": True})
cur.execute(
"""INSERT INTO biz.coach_tasks
(site_id, assistant_id, member_id, task_type, status,
priority_score, parent_task_id, created_at, updated_at)
VALUES (%s, %s, %s, %s, 'active', %s, %s, %s, %s)
ON CONFLICT (site_id, assistant_id, member_id, task_type) WHERE (status = 'active')
DO UPDATE SET priority_score = EXCLUDED.priority_score, updated_at = EXCLUDED.updated_at
RETURNING id""",
(site_id, aid, mid, new_type, priority, existing["id"], day_dt, day_dt),
)
new_id = cur.fetchone()[0]
self._history(cur, new_id, "created", None, "active", "follow_up_visit", new_type, {"simulated": True})
active_tasks[key] = {"id": new_id, "task_type": new_type, "created_at": day_dt, "expires_at": None, "priority": priority}
stats["created"] += 1
else:
# 非 follow_up原地覆盖
with app_conn.cursor() as cur:
# 先关闭可能冲突的同 new_type active 记录(避免唯一约束冲突)
cur.execute(
"""UPDATE biz.coach_tasks SET status = 'inactive', updated_at = %s
WHERE site_id = %s AND assistant_id = %s AND member_id = %s
AND task_type = %s AND status = 'active' AND id != %s""",
(day_dt, site_id, aid, mid, new_type, existing["id"]),
)
cur.execute(
"UPDATE biz.coach_tasks SET task_type = %s, priority_score = %s, updated_at = %s WHERE id = %s AND status = 'active'",
(new_type, priority, day_dt, existing["id"]),
)
self._history(cur, existing["id"], "type_override", "active", "active",
existing["task_type"], new_type,
{"old_priority": existing.get("priority"), "simulated": True})
existing["task_type"] = new_type
existing["priority"] = priority
stats["overridden"] += 1
else:
# 新建任务upsert若同类型 active 已存在则更新 priority
with app_conn.cursor() as cur:
cur.execute(
"""INSERT INTO biz.coach_tasks
(site_id, assistant_id, member_id, task_type, status,
priority_score, created_at, updated_at)
VALUES (%s, %s, %s, %s, 'active', %s, %s, %s)
ON CONFLICT (site_id, assistant_id, member_id, task_type) WHERE (status = 'active')
DO UPDATE SET priority_score = EXCLUDED.priority_score, updated_at = EXCLUDED.updated_at
RETURNING id""",
(site_id, aid, mid, new_type, priority, day_dt, day_dt),
)
task_id = cur.fetchone()[0]
self._history(cur, task_id, "created", None, "active", None, new_type, {"simulated": True})
active_tasks[key] = {"id": task_id, "task_type": new_type, "created_at": day_dt, "expires_at": None, "priority": priority}
stats["created"] += 1
# 3. 召回检测(优先使用预加载数据)
settlements = preloaded_settlements if preloaded_settlements is not None else self._load_settlements(etl_conn, site_id, d)
for (aid, mid), pay_time in settlements.items():
key = (aid, mid)
task = active_tasks.get(key)
with app_conn.cursor() as cur:
try:
cur.execute(
"""INSERT INTO biz.recall_events
(site_id, assistant_id, member_id, pay_time, task_id, task_type, created_at)
VALUES (%s, %s, %s, %s, %s, %s, %s)
ON CONFLICT (site_id, assistant_id, member_id,
(date_trunc('day', pay_time AT TIME ZONE 'Asia/Shanghai')))
DO NOTHING RETURNING id""",
(site_id, aid, mid, pay_time, task["id"] if task else None, task["task_type"] if task else None, day_dt),
)
if cur.fetchone():
stats["recall_events"] += 1
except Exception:
pass
if not task or task["task_type"] not in ("high_priority_recall", "priority_recall") or pay_time <= task["created_at"]:
continue
with app_conn.cursor() as cur:
cur.execute(
"""UPDATE biz.coach_tasks SET status = 'completed', completed_at = %s,
completed_task_type = %s, completion_type = 'auto', updated_at = %s
WHERE id = %s AND status = 'active'""",
(pay_time, task["task_type"], day_dt, task["id"]),
)
self._history(cur, task["id"], "completed", "active", "completed",
task["task_type"], task["task_type"],
{"service_time": str(pay_time), "simulated": True})
stats["completed"] += 1
expires_at = pay_time + timedelta(hours=FOLLOW_UP_HOURS)
with app_conn.cursor() as cur:
cur.execute(
"""INSERT INTO biz.coach_tasks
(site_id, assistant_id, member_id, task_type, status, expires_at, created_at, updated_at)
VALUES (%s, %s, %s, 'follow_up_visit', 'active', %s, %s, %s)
ON CONFLICT (site_id, assistant_id, member_id, task_type) WHERE (status = 'active')
DO UPDATE SET expires_at = EXCLUDED.expires_at, updated_at = EXCLUDED.updated_at
RETURNING id""",
(site_id, aid, mid, expires_at, day_dt, day_dt),
)
fu_id = cur.fetchone()[0]
self._history(cur, fu_id, "created", None, "active", None, "follow_up_visit",
{"reason": "recall_completed", "simulated": True})
active_tasks[key] = {"id": fu_id, "task_type": "follow_up_visit", "created_at": day_dt, "expires_at": expires_at}
stats["follow_up_created"] += 1
# 3b. 门店级召回解除:客户到店后,未被服务的助教任务标记 resolved
member_visits = preloaded_visits if preloaded_visits is not None else self._load_member_visits(etl_conn, site_id, d)
resolved_keys = [
k for k, t in active_tasks.items()
if k[1] in member_visits
and t["task_type"] in ("high_priority_recall", "priority_recall")
and member_visits[k[1]] > t["created_at"]
]
for key in resolved_keys:
task = active_tasks.pop(key)
pay_time = member_visits[key[1]]
with app_conn.cursor() as cur:
cur.execute(
"""UPDATE biz.coach_tasks SET status = 'resolved', updated_at = %s
WHERE id = %s AND status = 'active'""",
(day_dt, task["id"]),
)
self._history(cur, task["id"], "customer_returned", "active", "resolved",
task["task_type"], task["task_type"],
{"service_time": str(pay_time), "simulated": True})
stats["resolved"] += 1
app_conn.commit()
@staticmethod
def _history(cur, task_id, action, old_status, new_status, old_task_type, new_task_type, detail=None):
if task_id is None:
return
cur.execute(
"""INSERT INTO biz.coach_task_history
(task_id, action, old_status, new_status, old_task_type, new_task_type, detail)
VALUES (%s, %s, %s, %s, %s, %s, %s)""",
(task_id, action, old_status, new_status, old_task_type, new_task_type,
json.dumps(detail) if detail else None),
)
def _clean_date_range(self, app_conn, tz, start_date: date, end_date: date):
"""清理指定日期范围内的旧任务数据(保留范围外的)。"""
range_start = datetime(start_date.year, start_date.month, start_date.day, 0, 0, 0, tzinfo=tz)
range_end = datetime(end_date.year, end_date.month, end_date.day, 23, 59, 59, tzinfo=tz)
with app_conn.cursor() as cur:
cur.execute(
"DELETE FROM biz.coach_task_history WHERE task_id IN (SELECT id FROM biz.coach_tasks WHERE created_at >= %s AND created_at <= %s)",
(range_start, range_end),
)
h = cur.rowcount
cur.execute(
"DELETE FROM biz.recall_events WHERE created_at >= %s AND created_at <= %s",
(range_start, range_end),
)
e = cur.rowcount
cur.execute(
"DELETE FROM biz.coach_tasks WHERE created_at >= %s AND created_at <= %s",
(range_start, range_end),
)
t = cur.rowcount
app_conn.commit()
if t > 0 or e > 0:
self.logger.info("DWS_TASK_ENGINE [推演]: 清理 %s~%s 旧数据: %d history, %d events, %d tasks", start_date, end_date, h, e, t)
def _load_existing_active_tasks(self, app_conn, site_id: int, before_date: date = None) -> dict:
"""加载数据库中已有的 active 任务到内存字典。
before_date: 只加载 created_at < before_date 的任务,避免加载推演范围之后的"未来"任务。
"""
active_tasks = {}
tz = ZoneInfo(self.config.get("app.timezone", "Asia/Shanghai"))
with app_conn.cursor() as cur:
if before_date:
cutoff = datetime(before_date.year, before_date.month, before_date.day, 0, 0, 0, tzinfo=tz)
cur.execute(
"""SELECT id, assistant_id, member_id, task_type, created_at, expires_at, priority_score
FROM biz.coach_tasks
WHERE site_id = %s AND status = 'active' AND created_at < %s""",
(site_id, cutoff),
)
else:
cur.execute(
"""SELECT id, assistant_id, member_id, task_type, created_at, expires_at, priority_score
FROM biz.coach_tasks
WHERE site_id = %s AND status = 'active'""",
(site_id,),
)
for row in cur.fetchall():
key = (row[1], row[2])
active_tasks[key] = {
"id": row[0], "task_type": row[3],
"created_at": row[4], "expires_at": row[5],
"priority": float(row[6]) if row[6] else 0,
}
app_conn.commit()
return active_tasks

View File

@@ -0,0 +1,151 @@
# -*- coding: utf-8 -*-
"""
指数日快照回填任务DWS_INDEX_BACKFILL
逐天调用 RelationIndexTask / WinbackIndexTask / NewconvIndexTask
为 3 张指数表生成历史日快照。
CHANGE 2026-04-12 | 性能优化:
- 任务实例复用(创建 1 次,循环 N 天复用)
- 减少 765 次 Task 初始化和参数表查询
CLI 用法:
python -m cli.main --tasks DWS_INDEX_BACKFILL \\
--window-start 2025-08-01 --window-end 2026-04-11
admin-web在 ETL 任务配置页面选择 DWS_INDEX_BACKFILL设置时间窗口。
"""
from __future__ import annotations
import time
from datetime import date, datetime, timedelta
from typing import Any, Dict, Optional
from ..base_task import BaseTask, TaskContext
from ..dws.index.relation_index_task import RelationIndexTask
from ..dws.index.winback_index_task import WinbackIndexTask
from ..dws.index.newconv_index_task import NewconvIndexTask
class IndexBackfillTask(BaseTask):
"""指数日快照回填工具任务。"""
def get_task_code(self) -> str:
return "DWS_INDEX_BACKFILL"
def execute(self, context: Optional[TaskContext] = None) -> Dict[str, Any]:
"""主流程:解析日期范围,逐天执行 3 个指数任务。"""
start_date, end_date = self._parse_date_range(context)
store_id = self._resolve_store_id(context)
total_days = (end_date - start_date).days + 1
self.logger.info(
"DWS_INDEX_BACKFILL: %s ~ %s (%d天), store_id=%s",
start_date, end_date, total_days, store_id,
)
# 创建 1 次实例,循环复用(避免 765 次 __init__ + 参数查询)
task_instances = [
RelationIndexTask(self.config, self.db, self.api, self.logger),
WinbackIndexTask(self.config, self.db, self.api, self.logger),
NewconvIndexTask(self.config, self.db, self.api, self.logger),
]
task_names = ["RS", "WBI", "NCI"]
completed = 0
errors = 0
t0 = time.time()
current = start_date
while current <= end_date:
ctx = self._build_day_context(current, store_id)
day_num = (current - start_date).days + 1
day_t0 = time.time()
for i, task in enumerate(task_instances):
try:
task.execute(ctx)
completed += 1
except Exception:
self.logger.exception(
"DWS_INDEX_BACKFILL: %s %s 失败",
task.__class__.__name__, current,
)
errors += 1
self.logger.info(
"DWS_INDEX_BACKFILL: %s [%d/%d] %s (%d/3)",
current, day_num, total_days, task_names[i], i + 1,
)
elapsed = time.time() - day_t0
total_elapsed_so_far = time.time() - t0
avg_per_day = total_elapsed_so_far / day_num
eta = avg_per_day * (total_days - day_num)
self.logger.info(
"DWS_INDEX_BACKFILL: %s [%d/%d %.0f%%] %.1fs/天 ETA %.0fs",
current, day_num, total_days, day_num / total_days * 100,
elapsed, eta,
)
current += timedelta(days=1)
total_elapsed = time.time() - t0
self.logger.info(
"DWS_INDEX_BACKFILL 完成: %d/%d 成功, %d 失败, %.0fs",
completed, total_days * 3, errors, total_elapsed,
)
return {
"status": "SUCCESS" if errors == 0 else "PARTIAL",
"counts": {
"days": total_days,
"completed": completed,
"errors": errors,
"elapsed_sec": round(total_elapsed, 1),
},
}
def _parse_date_range(self, context: Optional[TaskContext]) -> tuple[date, date]:
"""从 config 或 context 解析日期范围。"""
wo = self.config.get("run.window_override") or {}
start_str = wo.get("start")
end_str = wo.get("end")
if start_str and end_str:
return self._parse_date(start_str), self._parse_date(end_str)
if context and context.window_start and context.window_end:
return context.window_start.date(), context.window_end.date()
raise ValueError(
"DWS_INDEX_BACKFILL 需要指定日期范围。"
"CLI: --window-start 2025-08-01 --window-end 2026-04-11"
)
@staticmethod
def _parse_date(s) -> date:
if isinstance(s, date) and not isinstance(s, datetime):
return s
if isinstance(s, datetime):
return s.date()
return date.fromisoformat(str(s).strip()[:10])
def _resolve_store_id(self, context: Optional[TaskContext]) -> int:
if context and getattr(context, "store_id", None):
return int(context.store_id)
sid = self.config.get("app.store_id")
if sid:
return int(sid)
raise ValueError("DWS_INDEX_BACKFILL 需要 store_id")
def _build_day_context(self, d: date, store_id: int) -> TaskContext:
from zoneinfo import ZoneInfo
tz = ZoneInfo(self.config.get("app.timezone", "Asia/Shanghai"))
as_of = datetime(d.year, d.month, d.day, 23, 59, 0, tzinfo=tz)
window_start = as_of - timedelta(days=90)
return TaskContext(
store_id=store_id,
window_start=window_start,
window_end=as_of,
window_minutes=int((as_of - window_start).total_seconds() / 60),
as_of_date=as_of,
)

View File

@@ -0,0 +1,473 @@
# -*- coding: utf-8 -*-
"""
历史任务推演任务DWS_TASK_SIMULATION
基于指数日快照,逐天重放 task_generator + recall_detector 逻辑,
还原完整的任务生命周期。
CLI 用法:
python -m cli.main --tasks DWS_TASK_SIMULATION \\
--window-start 2025-08-01 --window-end 2026-03-28
admin-web在 ETL 任务配置页面选择 DWS_TASK_SIMULATION设置时间窗口。
"""
from __future__ import annotations
import json
import logging
import os
import sys
import time
from datetime import date, datetime, timedelta
from decimal import Decimal
from pathlib import Path
from typing import Any, Dict, Optional
from zoneinfo import ZoneInfo
import psycopg2
from ..base_task import BaseTask, TaskContext
# 导入 task_generator 纯函数(后端代码)
_BACKEND = Path(__file__).resolve().parents[5] / "backend"
if str(_BACKEND) not in sys.path:
sys.path.insert(0, str(_BACKEND))
from app.services.task_generator import (
IndexData,
determine_task_type,
should_replace_task,
)
logger = logging.getLogger(__name__)
# 推演截止日期(现有 active 任务从 03-29 开始)
CUTOFF_DATE = date(2026, 3, 28)
FOLLOW_UP_HOURS = 48
class TaskSimulationTask(BaseTask):
"""历史任务推演工具任务。"""
def get_task_code(self) -> str:
return "DWS_TASK_SIMULATION"
def execute(self, context: Optional[TaskContext] = None) -> Dict[str, Any]:
"""主流程:解析日期范围,逐天推演。"""
start_date, end_date = self._parse_date_range(context)
if end_date > CUTOFF_DATE:
self.logger.warning(
"end_date %s 超过截止日期 %s,自动截断", end_date, CUTOFF_DATE
)
end_date = CUTOFF_DATE
tz = ZoneInfo(self.config.get("app.timezone", "Asia/Shanghai"))
# ETL 库连接复用 self.db.conn
etl_conn = self.db.conn
# 业务库需要独立连接
app_dsn = os.environ.get("APP_DB_DSN")
if not app_dsn:
raise ValueError("DWS_TASK_SIMULATION 需要 APP_DB_DSN 环境变量")
app_conn = psycopg2.connect(app_dsn)
app_conn.set_client_encoding("UTF8")
site_id = self._get_site_id(etl_conn)
total_days = (end_date - start_date).days + 1
self.logger.info(
"DWS_TASK_SIMULATION: %s ~ %s (%d天), site_id=%s",
start_date, end_date, total_days, site_id,
)
# 清理截止日期前的旧数据(模拟数据可安全清理)
self._clean_before_cutoff(app_conn, CUTOFF_DATE)
active_tasks: dict[tuple[int, int], dict] = {}
stats = {
"created": 0, "completed": 0, "overridden": 0,
"expired": 0, "follow_up_created": 0,
"recall_events": 0, "skipped_no_snapshot": 0,
}
t0 = time.time()
current = start_date
while current <= end_date:
snapshot = self._load_snapshot(etl_conn, site_id, current)
if not snapshot["relation"] and not snapshot["wbi"] and not snapshot["nci"]:
stats["skipped_no_snapshot"] += 1
current += timedelta(days=1)
continue
self._simulate_day(
app_conn, etl_conn, site_id, current, tz,
snapshot, active_tasks, stats,
)
day_num = (current - start_date).days + 1
if day_num % 30 == 0 or current == end_date:
elapsed = time.time() - t0
self.logger.info(
"DWS_TASK_SIMULATION: %s (%d/%d) 创建=%d 完成=%d 覆盖=%d 过期=%d %.0fs",
current, day_num, total_days,
stats["created"], stats["completed"],
stats["overridden"], stats["expired"], elapsed,
)
current += timedelta(days=1)
total_elapsed = time.time() - t0
self.logger.info(
"DWS_TASK_SIMULATION 完成: %.0fs, 创建=%d 完成=%d 覆盖=%d 过期=%d 回访=%d 事件=%d 跳过=%d active=%d",
total_elapsed, stats["created"], stats["completed"],
stats["overridden"], stats["expired"],
stats["follow_up_created"], stats["recall_events"],
stats["skipped_no_snapshot"], len(active_tasks),
)
app_conn.close()
return {
"status": "SUCCESS",
"counts": stats,
}
# ── 日期解析 ──
def _parse_date_range(self, context: Optional[TaskContext]) -> tuple[date, date]:
wo = self.config.get("run.window_override") or {}
start_str = wo.get("start")
end_str = wo.get("end")
if start_str and end_str:
return self._parse_date(start_str), self._parse_date(end_str)
if context and context.window_start and context.window_end:
return context.window_start.date(), context.window_end.date()
raise ValueError(
"DWS_TASK_SIMULATION 需要指定日期范围。"
"CLI: --window-start 2025-08-01 --window-end 2026-03-28"
)
@staticmethod
def _parse_date(s) -> date:
if isinstance(s, date) and not isinstance(s, datetime):
return s
if isinstance(s, datetime):
return s.date()
return date.fromisoformat(str(s).strip()[:10])
def _get_site_id(self, etl_conn) -> int:
with etl_conn.cursor() as cur:
cur.execute(
"SELECT DISTINCT site_id FROM dws.dws_member_assistant_relation_index LIMIT 1"
)
row = cur.fetchone()
etl_conn.commit()
if not row:
raise RuntimeError("relation_index 表为空,请先运行 DWS_INDEX_BACKFILL")
return row[0]
# ── 数据加载 ──
def _load_snapshot(self, etl_conn, site_id: int, stat_date: date) -> dict:
result = {"relation": {}, "wbi": {}, "nci": {}}
with etl_conn.cursor() as cur:
cur.execute(
"""SELECT assistant_id, member_id, rs_display, os_label, session_count
FROM dws.dws_member_assistant_relation_index
WHERE site_id = %s AND stat_date = %s""",
(site_id, stat_date),
)
for r in cur.fetchall():
result["relation"][(r[0], r[1])] = {
"rs": Decimal(str(r[2])), "os_label": r[3], "session_count": r[4],
}
cur.execute(
"""SELECT member_id, display_score FROM dws.dws_member_winback_index
WHERE site_id = %s AND stat_date = %s""",
(site_id, stat_date),
)
for r in cur.fetchall():
result["wbi"][r[0]] = Decimal(str(r[1])) if r[1] else Decimal(0)
cur.execute(
"""SELECT member_id, display_score FROM dws.dws_member_newconv_index
WHERE site_id = %s AND stat_date = %s""",
(site_id, stat_date),
)
for r in cur.fetchall():
result["nci"][r[0]] = Decimal(str(r[1])) if r[1] else Decimal(0)
etl_conn.commit()
return result
def _load_settlements(self, etl_conn, site_id: int, d: date) -> dict:
"""加载当天结算 → {(assistant_id, member_id): pay_time}"""
tz = ZoneInfo(self.config.get("app.timezone", "Asia/Shanghai"))
day_start = datetime(d.year, d.month, d.day, 0, 0, 0, tzinfo=tz)
day_end = day_start + timedelta(days=1)
settlements = {}
with etl_conn.cursor() as cur:
cur.execute(
"""SELECT sl.site_assistant_id, sh.member_id, MAX(sh.pay_time)
FROM dwd.dwd_settlement_head sh
JOIN dwd.dwd_assistant_service_log sl
ON sl.order_settle_id = sh.order_settle_id AND sl.is_delete = 0
WHERE sh.site_id = %s AND sh.settle_type IN (1,3)
AND sh.pay_time >= %s AND sh.pay_time < %s
GROUP BY sl.site_assistant_id, sh.member_id""",
(site_id, day_start, day_end),
)
for r in cur.fetchall():
if r[0] and r[1]:
settlements[(r[0], r[1])] = r[2]
etl_conn.commit()
return settlements
# ── 模拟逻辑 ──
def _simulate_day(
self, app_conn, etl_conn, site_id, d, tz,
snapshot, active_tasks, stats,
):
day_dt = datetime(d.year, d.month, d.day, 7, 0, 0, tzinfo=tz)
# 1. 过期检测
expired_keys = [
k for k, t in active_tasks.items()
if t.get("expires_at") and t["expires_at"] < day_dt
]
for key in expired_keys:
task = active_tasks.pop(key)
stats["expired"] += 1
with app_conn.cursor() as cur:
cur.execute(
"UPDATE biz.coach_tasks SET status = 'expired', updated_at = %s WHERE id = %s",
(day_dt, task["id"]),
)
self._history(cur, task["id"], "expired", "active", "expired",
task["task_type"], task["task_type"],
{"simulated": True})
# 2. 任务生成(冲突覆盖)
relation = snapshot["relation"]
wbi_map = snapshot["wbi"]
nci_map = snapshot["nci"]
ownership_pairs = [
(aid, mid, info)
for (aid, mid), info in relation.items()
if info["os_label"] in ("MAIN", "COMANAGE") and info["session_count"] > 0
]
for aid, mid, info in ownership_pairs:
wbi = wbi_map.get(mid, Decimal(0))
nci = nci_map.get(mid, Decimal(0))
rs = info["rs"]
new_type = determine_task_type(IndexData(
site_id=site_id, assistant_id=aid, member_id=mid,
wbi=wbi, nci=nci, rs=rs,
has_active_recall=False, has_follow_up_note=False,
))
if not new_type:
continue
key = (aid, mid)
existing = active_tasks.get(key)
priority = float(max(wbi, nci)) if new_type in (
"high_priority_recall", "priority_recall"
) else float(rs)
if existing:
if existing["task_type"] == new_type:
continue # 同类型跳过
if existing["task_type"] == "follow_up_visit":
# follow_up_visit 保留宽限期,填 expires_at新建高优先任务
with app_conn.cursor() as cur:
if not existing.get("expires_at"):
cur.execute(
"""UPDATE biz.coach_tasks
SET expires_at = created_at + INTERVAL '48 hours', updated_at = %s
WHERE id = %s""",
(day_dt, existing["id"]),
)
self._history(cur, existing["id"], "expires_at_filled",
"active", "active",
"follow_up_visit", "follow_up_visit",
{"reason": "higher_priority_task_created", "simulated": True})
# 新建高优先任务
cur.execute(
"""INSERT INTO biz.coach_tasks
(site_id, assistant_id, member_id, task_type, status,
priority_score, parent_task_id, created_at, updated_at)
VALUES (%s, %s, %s, %s, 'active', %s, %s, %s, %s)
RETURNING id""",
(site_id, aid, mid, new_type, priority,
existing["id"], day_dt, day_dt),
)
new_id = cur.fetchone()[0]
self._history(cur, new_id, "created", None, "active",
"follow_up_visit", new_type, {"simulated": True})
active_tasks[key] = {
"id": new_id, "task_type": new_type,
"created_at": day_dt, "expires_at": None,
"priority": priority,
}
stats["created"] += 1
else:
# 非 follow_up原地覆盖
with app_conn.cursor() as cur:
cur.execute(
"""UPDATE biz.coach_tasks
SET task_type = %s, priority_score = %s, updated_at = %s
WHERE id = %s AND status = 'active'""",
(new_type, priority, day_dt, existing["id"]),
)
self._history(cur, existing["id"], "type_override", "active", "active",
existing["task_type"], new_type,
{"old_priority": existing.get("priority"), "simulated": True})
existing["task_type"] = new_type
existing["priority"] = priority
stats["overridden"] += 1
else:
# 新建任务
with app_conn.cursor() as cur:
cur.execute(
"""INSERT INTO biz.coach_tasks
(site_id, assistant_id, member_id, task_type, status,
priority_score, created_at, updated_at)
VALUES (%s, %s, %s, %s, 'active', %s, %s, %s)
RETURNING id""",
(site_id, aid, mid, new_type, priority, day_dt, day_dt),
)
task_id = cur.fetchone()[0]
self._history(cur, task_id, "created", None, "active",
None, new_type, {"simulated": True})
active_tasks[key] = {
"id": task_id, "task_type": new_type,
"created_at": day_dt, "expires_at": None,
"priority": priority,
}
stats["created"] += 1
# 3. 召回检测
settlements = self._load_settlements(etl_conn, site_id, d)
for (aid, mid), pay_time in settlements.items():
key = (aid, mid)
task = active_tasks.get(key)
# 写 recall_event
with app_conn.cursor() as cur:
try:
cur.execute(
"""INSERT INTO biz.recall_events
(site_id, assistant_id, member_id, pay_time,
task_id, task_type, created_at)
VALUES (%s, %s, %s, %s, %s, %s, %s)
ON CONFLICT (site_id, assistant_id, member_id,
(date_trunc('day', pay_time AT TIME ZONE 'Asia/Shanghai')))
DO NOTHING RETURNING id""",
(site_id, aid, mid, pay_time,
task["id"] if task else None,
task["task_type"] if task else None,
day_dt),
)
if cur.fetchone():
stats["recall_events"] += 1
except Exception:
pass
if not task:
continue
if task["task_type"] not in ("high_priority_recall", "priority_recall"):
continue
if pay_time <= task["created_at"]:
continue
# 完成召回
with app_conn.cursor() as cur:
cur.execute(
"""UPDATE biz.coach_tasks
SET status = 'completed', completed_at = %s,
completed_task_type = %s, completion_type = 'auto', updated_at = %s
WHERE id = %s AND status = 'active'""",
(pay_time, task["task_type"], day_dt, task["id"]),
)
self._history(cur, task["id"], "completed", "active", "completed",
task["task_type"], task["task_type"],
{"service_time": str(pay_time), "simulated": True})
stats["completed"] += 1
# 生成回访
expires_at = pay_time + timedelta(hours=FOLLOW_UP_HOURS)
with app_conn.cursor() as cur:
cur.execute(
"""INSERT INTO biz.coach_tasks
(site_id, assistant_id, member_id, task_type, status,
expires_at, created_at, updated_at)
VALUES (%s, %s, %s, 'follow_up_visit', 'active', %s, %s, %s)
RETURNING id""",
(site_id, aid, mid, expires_at, day_dt, day_dt),
)
fu_id = cur.fetchone()[0]
self._history(cur, fu_id, "created", None, "active",
None, "follow_up_visit",
{"reason": "recall_completed", "simulated": True})
active_tasks[key] = {
"id": fu_id, "task_type": "follow_up_visit",
"created_at": day_dt, "expires_at": expires_at,
}
stats["follow_up_created"] += 1
app_conn.commit()
# ── 辅助 ──
@staticmethod
def _history(cur, task_id, action, old_status, new_status,
old_task_type, new_task_type, detail=None):
if task_id is None:
return
cur.execute(
"""INSERT INTO biz.coach_task_history
(task_id, action, old_status, new_status,
old_task_type, new_task_type, detail)
VALUES (%s, %s, %s, %s, %s, %s, %s)""",
(task_id, action, old_status, new_status,
old_task_type, new_task_type,
json.dumps(detail) if detail else None),
)
def _clean_before_cutoff(self, app_conn, cutoff: date):
"""清理截止日期前的数据(安全:只删模拟产生的历史数据)。"""
tz = ZoneInfo(self.config.get("app.timezone", "Asia/Shanghai"))
cutoff_dt = datetime(cutoff.year, cutoff.month, cutoff.day, 23, 59, 59, tzinfo=tz)
with app_conn.cursor() as cur:
cur.execute(
"""DELETE FROM biz.coach_task_history
WHERE task_id IN (SELECT id FROM biz.coach_tasks WHERE created_at < %s)""",
(cutoff_dt,),
)
h = cur.rowcount
cur.execute(
"DELETE FROM biz.recall_events WHERE created_at < %s",
(cutoff_dt,),
)
e = cur.rowcount
cur.execute(
"DELETE FROM biz.coach_tasks WHERE created_at < %s",
(cutoff_dt,),
)
t = cur.rowcount
app_conn.commit()
if t > 0:
self.logger.info(
"DWS_TASK_SIMULATION: 清理旧数据 %d history, %d events, %d tasks",
h, e, t,
)