feat: 2026-04-15~04-20 累积变更基线 — 多主线合流
主线 1: rns1-customer-coach-api + 04-miniapp-core-business 后端实施
- 新增 GET /xcx/coaches/{id}/banner 轻量接口
- performance/records 加 coach_id 参数 + view_board_coach 权限分流
- coach/customer/performance/board/task 服务层重构
- fdw_queries 结算单粒度聚合 + consumption_summary 视图统一
- task_generator 回访宽限 72h + UPSERT 替代策略 + Step 5 保底清理
- recall_detector settle_type=3 双重限制 + 门店级 resolved
主线 2: 小程序权限分流 + 新增 coach-service-records 管理者视角业绩明细页
- perf-progress 共享模块去重 task-list/coach-detail 动画逻辑
- isScattered 散客标记端到端
- foodDetail/phoneFull/creator* 字段透传
主线 3: P19 指数回测框架 Phase 1+2
- 3 个指数表 stat_date 日快照模式
- 新增 DWS_INDEX_BACKFILL / DWS_TASK_SIMULATION 工具任务
- task_engine 升级 HTTP 实时 + 推演回测双模式
主线 4: Core 维度层启用
- 新增 CORE_DIM_SYNC 任务(DWD → core 4 维度表)
- 修复 app 视图空查询问题
主线 5: member_project_tag 改为 LAST_30_VISITS 消费次数窗口
主线 6: 2 个迁移 SQL 已执行(stat_date + member_project_tag 新窗口)
- schema 基线与 DDL 快照同步
主线 7: 开发机路径迁移 C:\NeoZQYY → C:\Project\NeoZQYY(约 95% 改动量)
附带: 新建运维脚本(churned_customer_report / simulate_historical_tasks /
backfill_index_snapshots)+ tools/task-analysis/ 任务分析工具
合计 157 文件。未包含中间产物(tmp/ .playwright-mcp/ inspect-* excel/sheet 分析 txt)。
审计记录见下一个 commit。
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -31,6 +31,7 @@ from .finance_board_cache import FinanceBoardCacheTask
|
||||
from .coach_area_hours_task import CoachAreaHoursTask
|
||||
from .finance_base_task import FinanceBaseTask
|
||||
from .maintenance_task import DwsMaintenanceTask
|
||||
from .core_dim_sync_task import CoreDimSyncTask
|
||||
from .goods_stock_daily_task import GoodsStockDailyTask
|
||||
from .goods_stock_weekly_task import GoodsStockWeeklyTask
|
||||
from .goods_stock_monthly_task import GoodsStockMonthlyTask
|
||||
@@ -73,6 +74,7 @@ __all__ = [
|
||||
"FinanceDiscountDetailTask",
|
||||
"CoachAreaHoursTask",
|
||||
"DwsMaintenanceTask",
|
||||
"CoreDimSyncTask",
|
||||
# 库存维度
|
||||
"GoodsStockDailyTask",
|
||||
"GoodsStockWeeklyTask",
|
||||
|
||||
173
apps/etl/connectors/feiqiu/tasks/dws/core_dim_sync_task.py
Normal file
173
apps/etl/connectors/feiqiu/tasks/dws/core_dim_sync_task.py
Normal file
@@ -0,0 +1,173 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Core 维度同步任务
|
||||
|
||||
功能说明:
|
||||
将 DWD 层当前版本(scd2_is_current=1)的维度数据同步到 core 层。
|
||||
core 层作为跨平台统一维度层,屏蔽 ODS/DWD 多数据源差异。
|
||||
|
||||
同步表:
|
||||
- core.dim_assistant <- dwd.dim_assistant
|
||||
- core.dim_member <- dwd.dim_member
|
||||
- core.dim_site <- dwd.dim_site
|
||||
- core.dim_table <- dwd.dim_table
|
||||
|
||||
更新策略:
|
||||
TRUNCATE + INSERT 全量刷新(维度表数据量小,全量代价低)
|
||||
|
||||
作者:ETL 团队
|
||||
创建日期:2026-04-15
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from .base_dws_task import BaseDwsTask, TaskContext
|
||||
|
||||
|
||||
# 同步映射:(core 表, DWD 源表, 字段映射)
|
||||
# 字段映射格式:(core_col, dwd_expr)
|
||||
SYNC_TABLES = [
|
||||
{
|
||||
"core_table": "core.dim_assistant",
|
||||
"dwd_source": "dwd.dim_assistant",
|
||||
"columns": [
|
||||
("assistant_id", "assistant_id"),
|
||||
("tenant_id", "tenant_id"),
|
||||
("site_id", "site_id"),
|
||||
("real_name", "real_name"),
|
||||
("nickname", "nickname"),
|
||||
("mobile", "mobile"),
|
||||
("level", "level"),
|
||||
("assistant_status", "assistant_status"),
|
||||
("leave_status", "leave_status"),
|
||||
],
|
||||
},
|
||||
{
|
||||
"core_table": "core.dim_member",
|
||||
"dwd_source": "dwd.dim_member",
|
||||
"columns": [
|
||||
("member_id", "member_id"),
|
||||
("system_member_id", "system_member_id"),
|
||||
("tenant_id", "tenant_id"),
|
||||
("register_site_id", "register_site_id"),
|
||||
("mobile", "mobile"),
|
||||
("nickname", "nickname"),
|
||||
("member_card_grade_name", "member_card_grade_name"),
|
||||
("status", "1"), # DWD 无 status 字段,scd2_is_current=1 即有效
|
||||
],
|
||||
},
|
||||
{
|
||||
"core_table": "core.dim_site",
|
||||
"dwd_source": "dwd.dim_site",
|
||||
"columns": [
|
||||
("site_id", "site_id"),
|
||||
("tenant_id", "tenant_id"),
|
||||
("shop_name", "shop_name"),
|
||||
("site_label", "site_label"),
|
||||
("shop_status", "shop_status"),
|
||||
],
|
||||
},
|
||||
{
|
||||
"core_table": "core.dim_table",
|
||||
"dwd_source": "dwd.dim_table",
|
||||
"columns": [
|
||||
("table_id", "table_id"),
|
||||
("site_id", "site_id"),
|
||||
("table_name", "table_name"),
|
||||
("site_table_area_name", "site_table_area_name"),
|
||||
("table_price", "table_price"),
|
||||
],
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
class CoreDimSyncTask(BaseDwsTask):
|
||||
"""Core 维度同步任务:DWD -> core 全量刷新"""
|
||||
|
||||
# 无日期列,全量刷新
|
||||
DATE_COL = None
|
||||
|
||||
def get_task_code(self) -> str:
|
||||
return "CORE_DIM_SYNC"
|
||||
|
||||
def get_target_table(self) -> str:
|
||||
# 多表同步,此方法不直接使用
|
||||
return "core_dim_sync"
|
||||
|
||||
def get_primary_keys(self) -> List[str]:
|
||||
return []
|
||||
|
||||
def extract(self, context: TaskContext) -> Dict[str, Any]:
|
||||
"""提取 DWD 当前版本维度数据"""
|
||||
result = {}
|
||||
for table_def in SYNC_TABLES:
|
||||
core_table = table_def["core_table"]
|
||||
dwd_source = table_def["dwd_source"]
|
||||
dwd_exprs = [col[1] for col in table_def["columns"]]
|
||||
|
||||
select_clause = ", ".join(dwd_exprs)
|
||||
sql = f"SELECT {select_clause} FROM {dwd_source} WHERE scd2_is_current = 1"
|
||||
|
||||
with self.db.conn.cursor() as cur:
|
||||
cur.execute(sql)
|
||||
rows = cur.fetchall()
|
||||
|
||||
core_cols = [col[0] for col in table_def["columns"]]
|
||||
result[core_table] = {
|
||||
"rows": [dict(zip(core_cols, row)) for row in rows],
|
||||
"columns": core_cols,
|
||||
}
|
||||
self.logger.info(
|
||||
"%s: %s <- %s: %d rows",
|
||||
self.get_task_code(), core_table, dwd_source, len(rows),
|
||||
)
|
||||
|
||||
return {"tables": result, "site_id": context.store_id}
|
||||
|
||||
def transform(self, extracted: Dict[str, Any], context: TaskContext) -> Dict[str, Any]:
|
||||
"""直通,无需转换"""
|
||||
return extracted
|
||||
|
||||
def load(self, transformed: Dict[str, Any], context: TaskContext) -> dict:
|
||||
"""TRUNCATE + INSERT 全量刷新"""
|
||||
tables = transformed.get("tables", {})
|
||||
total_inserted = 0
|
||||
total_deleted = 0
|
||||
|
||||
for core_table, data in tables.items():
|
||||
rows = data["rows"]
|
||||
columns = data["columns"]
|
||||
|
||||
with self.db.conn.cursor() as cur:
|
||||
# TRUNCATE
|
||||
cur.execute(f"TRUNCATE {core_table}")
|
||||
self.logger.info("%s: TRUNCATE %s", self.get_task_code(), core_table)
|
||||
|
||||
# INSERT
|
||||
if rows:
|
||||
cols_str = ", ".join(columns)
|
||||
placeholders = ", ".join(["%s"] * len(columns))
|
||||
insert_sql = f"INSERT INTO {core_table} ({cols_str}) VALUES ({placeholders})"
|
||||
|
||||
for row in rows:
|
||||
values = [row.get(col) for col in columns]
|
||||
cur.execute(insert_sql, values)
|
||||
|
||||
total_inserted += len(rows)
|
||||
self.logger.info(
|
||||
"%s: INSERT %s: %d rows",
|
||||
self.get_task_code(), core_table, len(rows),
|
||||
)
|
||||
|
||||
return {
|
||||
"counts": {
|
||||
"fetched": total_inserted,
|
||||
"inserted": total_inserted,
|
||||
"updated": 0,
|
||||
"skipped": 0,
|
||||
"errors": 0,
|
||||
},
|
||||
"extra": {"tables_synced": len(tables)},
|
||||
}
|
||||
@@ -530,10 +530,12 @@ class MemberIndexBaseTask(BaseIndexTask):
|
||||
enable_stop_exception = int(params.get('enable_stop_high_balance_exception', 0)) == 1
|
||||
high_balance_threshold = float(params.get('high_balance_threshold', 1000))
|
||||
|
||||
# CHANGE 2026-04-12 | STOP 不再排除:超出 recency 窗口的老客归入 OLD 继续计算
|
||||
# WBI 衰减公式自然给出高分,避免最需要召回的客户被遗漏
|
||||
if data.t_a >= recency_days:
|
||||
if enable_stop_exception and data.sv_balance >= high_balance_threshold:
|
||||
return "STOP", "STOP_HIGH_BALANCE", True
|
||||
return "STOP", "STOP", False
|
||||
return "OLD", "STOP_OVERDUE", True
|
||||
|
||||
new_visit_threshold = int(params.get('new_visit_threshold', 2))
|
||||
new_days_threshold = int(params.get('new_days_threshold', 30))
|
||||
|
||||
@@ -5,6 +5,7 @@ from __future__ import annotations
|
||||
|
||||
import math
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from .member_index_base import MemberActivityData, MemberIndexBaseTask
|
||||
@@ -202,9 +203,10 @@ class NewconvIndexTask(MemberIndexBaseTask):
|
||||
avg_raw=sum(all_raw) / len(all_raw)
|
||||
)
|
||||
|
||||
# P19: 回测模式传入 calc_time
|
||||
calc_time = (context.as_of_date if context and context.as_of_date else None)
|
||||
inserted = self._save_newconv_data(newconv_list, calc_time=calc_time)
|
||||
# 日快照模式:始终按 stat_date 写入
|
||||
now = (context.as_of_date if context and context.as_of_date else None) or datetime.now(self.tz)
|
||||
stat_date = now.date() if hasattr(now, 'date') else now
|
||||
inserted = self._save_newconv_data(newconv_list, stat_date=stat_date)
|
||||
self.logger.info("NCI calculation finished, inserted %d rows", inserted)
|
||||
|
||||
return {
|
||||
@@ -288,30 +290,23 @@ class NewconvIndexTask(MemberIndexBaseTask):
|
||||
if data.raw_score < 0:
|
||||
data.raw_score = 0.0
|
||||
|
||||
def _save_newconv_data(self, data_list: List[MemberNewconvData], *, calc_time=None) -> int:
|
||||
"""保存 NCI 数据"""
|
||||
def _save_newconv_data(self, data_list: List[MemberNewconvData], *, stat_date) -> int:
|
||||
"""日快照模式:按 (site_id, stat_date) 删除后插入。"""
|
||||
if not data_list:
|
||||
return 0
|
||||
|
||||
site_id = data_list[0].activity.site_id
|
||||
# P19: 回测模式按 calc_time 删除(保留其他快照),正常模式按 site_id 全量刷新
|
||||
use_param_time = calc_time is not None
|
||||
with self.db.conn.cursor() as cur:
|
||||
if use_param_time:
|
||||
cur.execute(
|
||||
"DELETE FROM dws.dws_member_newconv_index WHERE site_id = %s AND calc_time = %s",
|
||||
(site_id, calc_time),
|
||||
)
|
||||
else:
|
||||
cur.execute(
|
||||
"DELETE FROM dws.dws_member_newconv_index WHERE site_id = %s",
|
||||
(site_id,),
|
||||
)
|
||||
from datetime import date as date_type
|
||||
if not isinstance(stat_date, date_type):
|
||||
stat_date = stat_date.date() if hasattr(stat_date, 'date') else stat_date
|
||||
|
||||
# P19: 回测模式传入 calc_time,正常模式用 NOW()
|
||||
use_param_time = calc_time is not None
|
||||
time_placeholder = "%s, %s, %s" if use_param_time else "NOW(), NOW(), NOW()"
|
||||
insert_sql = f"""
|
||||
site_id = data_list[0].activity.site_id
|
||||
with self.db.conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"DELETE FROM dws.dws_member_newconv_index WHERE site_id = %s AND stat_date = %s",
|
||||
(site_id, stat_date),
|
||||
)
|
||||
|
||||
insert_sql = """
|
||||
INSERT INTO dws.dws_member_newconv_index (
|
||||
site_id, tenant_id, member_id,
|
||||
status, segment,
|
||||
@@ -325,7 +320,7 @@ class NewconvIndexTask(MemberIndexBaseTask):
|
||||
raw_score_welcome, raw_score_convert, raw_score,
|
||||
display_score_welcome, display_score_convert, display_score,
|
||||
last_wechat_touch_time,
|
||||
calc_time, created_at, updated_at
|
||||
calc_time, created_at, updated_at, stat_date
|
||||
) VALUES (
|
||||
%s, %s, %s,
|
||||
%s, %s,
|
||||
@@ -339,32 +334,40 @@ class NewconvIndexTask(MemberIndexBaseTask):
|
||||
%s, %s, %s,
|
||||
%s, %s, %s,
|
||||
%s,
|
||||
{time_placeholder}
|
||||
NOW(), NOW(), NOW(), %s
|
||||
)
|
||||
"""
|
||||
|
||||
inserted = 0
|
||||
# 批量写入(executemany 替代逐行 execute)
|
||||
batch_params = []
|
||||
for data in data_list:
|
||||
activity = data.activity
|
||||
batch_params.append((
|
||||
activity.site_id, activity.tenant_id, activity.member_id,
|
||||
data.status, data.segment,
|
||||
activity.member_create_time, activity.first_visit_time, activity.last_visit_time, activity.last_recharge_time,
|
||||
activity.t_v, activity.t_r, activity.t_a,
|
||||
activity.visits_14d, activity.visits_30d, activity.visits_60d, activity.visits_total,
|
||||
activity.spend_30d, activity.spend_180d, activity.sv_balance, activity.recharge_60d_amt,
|
||||
activity.interval_count,
|
||||
data.need_new, data.salvage_new, data.recharge_new, data.value_new,
|
||||
data.welcome_new,
|
||||
data.raw_score_welcome, data.raw_score_convert, data.raw_score,
|
||||
data.display_score_welcome, data.display_score_convert, data.display_score,
|
||||
None,
|
||||
stat_date,
|
||||
))
|
||||
|
||||
from psycopg2.extras import execute_batch
|
||||
with self.db.conn.cursor() as cur:
|
||||
for data in data_list:
|
||||
activity = data.activity
|
||||
params = (
|
||||
activity.site_id, activity.tenant_id, activity.member_id,
|
||||
data.status, data.segment,
|
||||
activity.member_create_time, activity.first_visit_time, activity.last_visit_time, activity.last_recharge_time,
|
||||
activity.t_v, activity.t_r, activity.t_a,
|
||||
activity.visits_14d, activity.visits_30d, activity.visits_60d, activity.visits_total,
|
||||
activity.spend_30d, activity.spend_180d, activity.sv_balance, activity.recharge_60d_amt,
|
||||
activity.interval_count,
|
||||
data.need_new, data.salvage_new, data.recharge_new, data.value_new,
|
||||
data.welcome_new,
|
||||
data.raw_score_welcome, data.raw_score_convert, data.raw_score,
|
||||
data.display_score_welcome, data.display_score_convert, data.display_score,
|
||||
None,
|
||||
)
|
||||
if use_param_time:
|
||||
params = params + (calc_time, calc_time, calc_time)
|
||||
cur.execute(insert_sql, params)
|
||||
inserted += cur.rowcount
|
||||
execute_batch(cur, insert_sql, batch_params, page_size=200)
|
||||
inserted = len(batch_params)
|
||||
|
||||
# 保留策略:清理 365 天前的快照
|
||||
cur.execute(
|
||||
"DELETE FROM dws.dws_member_newconv_index WHERE site_id = %s AND stat_date < CURRENT_DATE - INTERVAL '365 days'",
|
||||
(site_id,),
|
||||
)
|
||||
|
||||
self.db.conn.commit()
|
||||
return inserted
|
||||
|
||||
@@ -180,9 +180,9 @@ class RelationIndexTask(BaseIndexTask):
|
||||
|
||||
self._apply_display_scores(pair_map, params_rs, params_ms, params_ml, site_id)
|
||||
|
||||
# P19: 仅回测模式传 calc_time(按 calc_time 删除保留其他快照),正常模式传 None(按 site_id 全量刷新)
|
||||
backtest_calc_time = now if (context and context.as_of_date) else None
|
||||
inserted = self._save_relation_rows(site_id, list(pair_map.values()), calc_time=backtest_calc_time)
|
||||
# 日快照模式:始终按 stat_date 写入/覆盖,支持多日快照共存
|
||||
stat_date = now.date() if hasattr(now, 'date') else now
|
||||
inserted = self._save_relation_rows(site_id, list(pair_map.values()), stat_date=stat_date)
|
||||
self.logger.info("关系指数计算完成,写入 %d 条记录", inserted)
|
||||
|
||||
return {
|
||||
@@ -585,27 +585,23 @@ class RelationIndexTask(BaseIndexTask):
|
||||
return "asinh"
|
||||
return "none"
|
||||
|
||||
def _save_relation_rows(self, site_id: int, rows: List[RelationPairMetrics], *, calc_time: Optional[datetime] = None) -> int:
|
||||
# P19: 回测模式传入 calc_time,正常模式用 NOW()
|
||||
use_param_time = calc_time is not None
|
||||
def _save_relation_rows(self, site_id: int, rows: List[RelationPairMetrics], *, stat_date) -> int:
|
||||
"""日快照模式:始终按 (site_id, stat_date) 删除后插入,支持多日快照共存。"""
|
||||
from datetime import date as date_type
|
||||
if not isinstance(stat_date, date_type):
|
||||
stat_date = stat_date.date() if hasattr(stat_date, 'date') else stat_date
|
||||
|
||||
with self.db.conn.cursor() as cur:
|
||||
# P19: 回测模式按 calc_time 删除(保留其他快照),正常模式按 site_id 全量刷新
|
||||
if use_param_time:
|
||||
cur.execute(
|
||||
"DELETE FROM dws.dws_member_assistant_relation_index WHERE site_id = %s AND calc_time = %s",
|
||||
(site_id, calc_time),
|
||||
)
|
||||
else:
|
||||
cur.execute(
|
||||
"DELETE FROM dws.dws_member_assistant_relation_index WHERE site_id = %s",
|
||||
(site_id,),
|
||||
)
|
||||
cur.execute(
|
||||
"DELETE FROM dws.dws_member_assistant_relation_index WHERE site_id = %s AND stat_date = %s",
|
||||
(site_id, stat_date),
|
||||
)
|
||||
|
||||
if not rows:
|
||||
self.db.conn.commit()
|
||||
return 0
|
||||
|
||||
insert_sql = f"""
|
||||
insert_sql = """
|
||||
INSERT INTO dws.dws_member_assistant_relation_index (
|
||||
site_id, tenant_id, member_id, assistant_id,
|
||||
session_count, total_duration_minutes, basic_session_count, incentive_session_count,
|
||||
@@ -614,7 +610,7 @@ class RelationIndexTask(BaseIndexTask):
|
||||
os_share, os_label, os_rank,
|
||||
ms_f_short, ms_f_long, ms_raw, ms_display,
|
||||
ml_order_count, ml_allocated_amount, ml_raw, ml_display,
|
||||
calc_time, created_at, updated_at
|
||||
calc_time, created_at, updated_at, stat_date
|
||||
) VALUES (
|
||||
%s, %s, %s, %s,
|
||||
%s, %s, %s, %s,
|
||||
@@ -623,42 +619,34 @@ class RelationIndexTask(BaseIndexTask):
|
||||
%s, %s, %s,
|
||||
%s, %s, %s, %s,
|
||||
%s, %s, %s, %s,
|
||||
{('%s, %s, %s' if use_param_time else 'NOW(), NOW(), NOW()')}
|
||||
NOW(), NOW(), NOW(), %s
|
||||
)
|
||||
"""
|
||||
inserted = 0
|
||||
for row in rows:
|
||||
params = (
|
||||
row.site_id,
|
||||
row.tenant_id,
|
||||
row.member_id,
|
||||
row.assistant_id,
|
||||
row.session_count,
|
||||
row.total_duration_minutes,
|
||||
row.basic_session_count,
|
||||
row.incentive_session_count,
|
||||
# 批量写入(executemany 替代逐行 execute)
|
||||
batch_params = [
|
||||
(
|
||||
row.site_id, row.tenant_id, row.member_id, row.assistant_id,
|
||||
row.session_count, row.total_duration_minutes,
|
||||
row.basic_session_count, row.incentive_session_count,
|
||||
row.days_since_last_session,
|
||||
row.rs_f,
|
||||
row.rs_d,
|
||||
row.rs_r,
|
||||
row.rs_raw,
|
||||
row.rs_display,
|
||||
row.os_share,
|
||||
row.os_label,
|
||||
row.os_rank,
|
||||
row.ms_f_short,
|
||||
row.ms_f_long,
|
||||
row.ms_raw,
|
||||
row.ms_display,
|
||||
row.ml_order_count,
|
||||
row.ml_allocated_amount,
|
||||
row.ml_raw,
|
||||
row.ml_display,
|
||||
row.rs_f, row.rs_d, row.rs_r, row.rs_raw, row.rs_display,
|
||||
row.os_share, row.os_label, row.os_rank,
|
||||
row.ms_f_short, row.ms_f_long, row.ms_raw, row.ms_display,
|
||||
row.ml_order_count, row.ml_allocated_amount, row.ml_raw, row.ml_display,
|
||||
stat_date,
|
||||
)
|
||||
if use_param_time:
|
||||
params = params + (calc_time, calc_time, calc_time)
|
||||
cur.execute(insert_sql, params)
|
||||
inserted += max(cur.rowcount, 0)
|
||||
for row in rows
|
||||
]
|
||||
from psycopg2.extras import execute_batch
|
||||
execute_batch(cur, insert_sql, batch_params, page_size=200)
|
||||
inserted = len(batch_params)
|
||||
|
||||
# 保留策略:清理 365 天前的快照
|
||||
cur.execute(
|
||||
"DELETE FROM dws.dws_member_assistant_relation_index WHERE site_id = %s AND stat_date < CURRENT_DATE - INTERVAL '365 days'",
|
||||
(site_id,),
|
||||
)
|
||||
|
||||
self.db.conn.commit()
|
||||
return inserted
|
||||
|
||||
|
||||
@@ -178,9 +178,10 @@ class WinbackIndexTask(MemberIndexBaseTask):
|
||||
avg_raw=sum(all_raw) / len(all_raw)
|
||||
)
|
||||
|
||||
# P19: 回测模式传入 calc_time
|
||||
calc_time = (context.as_of_date if context and context.as_of_date else None)
|
||||
inserted = self._save_winback_data(winback_list, calc_time=calc_time)
|
||||
# 日快照模式:始终按 stat_date 写入
|
||||
now = (context.as_of_date if context and context.as_of_date else None) or datetime.now(self.tz)
|
||||
stat_date = now.date() if hasattr(now, 'date') else now
|
||||
inserted = self._save_winback_data(winback_list, stat_date=stat_date)
|
||||
self.logger.info("WBI calculation finished, inserted %d rows", inserted)
|
||||
|
||||
return {
|
||||
@@ -341,29 +342,23 @@ class WinbackIndexTask(MemberIndexBaseTask):
|
||||
if data.raw_score < 0:
|
||||
data.raw_score = 0.0
|
||||
|
||||
def _save_winback_data(self, data_list: List[MemberWinbackData], *, calc_time: Optional[datetime] = None) -> int:
|
||||
"""保存 WBI 数据"""
|
||||
def _save_winback_data(self, data_list: List[MemberWinbackData], *, stat_date) -> int:
|
||||
"""日快照模式:按 (site_id, stat_date) 删除后插入。"""
|
||||
if not data_list:
|
||||
return 0
|
||||
|
||||
site_id = data_list[0].activity.site_id
|
||||
# P19: 回测模式传入 calc_time,正常模式用 NOW()
|
||||
use_param_time = calc_time is not None
|
||||
# P19: 回测模式按 calc_time 删除(保留其他快照),正常模式按 site_id 全量刷新
|
||||
with self.db.conn.cursor() as cur:
|
||||
if use_param_time:
|
||||
cur.execute(
|
||||
"DELETE FROM dws.dws_member_winback_index WHERE site_id = %s AND calc_time = %s",
|
||||
(site_id, calc_time),
|
||||
)
|
||||
else:
|
||||
cur.execute(
|
||||
"DELETE FROM dws.dws_member_winback_index WHERE site_id = %s",
|
||||
(site_id,),
|
||||
)
|
||||
from datetime import date as date_type
|
||||
if not isinstance(stat_date, date_type):
|
||||
stat_date = stat_date.date() if hasattr(stat_date, 'date') else stat_date
|
||||
|
||||
time_placeholder = "%s, %s, %s" if use_param_time else "NOW(), NOW(), NOW()"
|
||||
insert_sql = f"""
|
||||
site_id = data_list[0].activity.site_id
|
||||
with self.db.conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"DELETE FROM dws.dws_member_winback_index WHERE site_id = %s AND stat_date = %s",
|
||||
(site_id, stat_date),
|
||||
)
|
||||
|
||||
insert_sql = """
|
||||
INSERT INTO dws.dws_member_winback_index (
|
||||
site_id, tenant_id, member_id,
|
||||
status, segment,
|
||||
@@ -376,7 +371,7 @@ class WinbackIndexTask(MemberIndexBaseTask):
|
||||
ideal_interval_days, ideal_next_visit_date,
|
||||
raw_score, display_score,
|
||||
last_wechat_touch_time,
|
||||
calc_time, created_at, updated_at
|
||||
calc_time, created_at, updated_at, stat_date
|
||||
) VALUES (
|
||||
%s, %s, %s,
|
||||
%s, %s,
|
||||
@@ -389,31 +384,39 @@ class WinbackIndexTask(MemberIndexBaseTask):
|
||||
%s, %s,
|
||||
%s, %s,
|
||||
%s,
|
||||
{time_placeholder}
|
||||
NOW(), NOW(), NOW(), %s
|
||||
)
|
||||
"""
|
||||
|
||||
inserted = 0
|
||||
# 批量写入(executemany 替代逐行 execute)
|
||||
batch_params = []
|
||||
for data in data_list:
|
||||
activity = data.activity
|
||||
batch_params.append((
|
||||
activity.site_id, activity.tenant_id, activity.member_id,
|
||||
data.status, data.segment,
|
||||
activity.member_create_time, activity.first_visit_time, activity.last_visit_time, activity.last_recharge_time,
|
||||
activity.t_v, activity.t_r, activity.t_a,
|
||||
activity.visits_14d, activity.visits_30d, activity.visits_60d, activity.visits_total,
|
||||
activity.spend_30d, activity.spend_180d, activity.sv_balance, activity.recharge_60d_amt,
|
||||
activity.interval_count,
|
||||
data.overdue_old, data.overdue_cdf_p, data.drop_old, data.recharge_old, data.value_old,
|
||||
data.ideal_interval_days, data.ideal_next_visit_date,
|
||||
data.raw_score, data.display_score,
|
||||
None,
|
||||
stat_date,
|
||||
))
|
||||
|
||||
from psycopg2.extras import execute_batch
|
||||
with self.db.conn.cursor() as cur:
|
||||
for data in data_list:
|
||||
activity = data.activity
|
||||
params = (
|
||||
activity.site_id, activity.tenant_id, activity.member_id,
|
||||
data.status, data.segment,
|
||||
activity.member_create_time, activity.first_visit_time, activity.last_visit_time, activity.last_recharge_time,
|
||||
activity.t_v, activity.t_r, activity.t_a,
|
||||
activity.visits_14d, activity.visits_30d, activity.visits_60d, activity.visits_total,
|
||||
activity.spend_30d, activity.spend_180d, activity.sv_balance, activity.recharge_60d_amt,
|
||||
activity.interval_count,
|
||||
data.overdue_old, data.overdue_cdf_p, data.drop_old, data.recharge_old, data.value_old,
|
||||
data.ideal_interval_days, data.ideal_next_visit_date,
|
||||
data.raw_score, data.display_score,
|
||||
None,
|
||||
)
|
||||
if use_param_time:
|
||||
params = params + (calc_time, calc_time, calc_time)
|
||||
cur.execute(insert_sql, params)
|
||||
inserted += cur.rowcount
|
||||
execute_batch(cur, insert_sql, batch_params, page_size=200)
|
||||
inserted = len(batch_params)
|
||||
|
||||
# 保留策略:清理 365 天前的快照
|
||||
cur.execute(
|
||||
"DELETE FROM dws.dws_member_winback_index WHERE site_id = %s AND stat_date < CURRENT_DATE - INTERVAL '365 days'",
|
||||
(site_id,),
|
||||
)
|
||||
|
||||
self.db.conn.commit()
|
||||
return inserted
|
||||
|
||||
@@ -2,11 +2,17 @@
|
||||
"""
|
||||
DWS 客户项目标签任务
|
||||
|
||||
按时间窗口计算每位客户在四大项目(BILLIARD/SNOOKER/MAHJONG/KTV)的
|
||||
消费时长占比,占比≥25% 则分配标签。散客(member_id=0)不参与。
|
||||
按每位客户最近 N 次消费(开台记录)计算四大项目(BILLIARD/SNOOKER/MAHJONG/KTV)
|
||||
的消费时长占比,占比≥25% 则分配标签。散客(member_id=0)不参与。
|
||||
|
||||
设计思路:
|
||||
不按固定日期窗口(30天/60天),而按每位客户最近的消费记录数量取数,
|
||||
避免长期未到店或来店频率不稳定的客户标签丢失。
|
||||
|
||||
数据链路:
|
||||
dwd_table_fee_log (ledger_count)
|
||||
→ ROW_NUMBER() OVER (PARTITION BY member_id ORDER BY ledger_end_time DESC)
|
||||
→ 取最近 LAST_N_VISITS 条记录
|
||||
→ JOIN dim_table (site_table_id → table_id, scd2_is_current=1)
|
||||
→ get_area_category(area_name, table_name)
|
||||
→ 按 category_code 汇总 → 计算占比 → 写入 dws_member_project_tag
|
||||
@@ -15,25 +21,23 @@ DWS 客户项目标签任务
|
||||
dws.dws_member_project_tag
|
||||
|
||||
更新策略:
|
||||
全量删除重建(按 site_id 删除后重新插入所有时间窗口)
|
||||
全量删除重建(按 site_id 删除后重新插入)
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import date
|
||||
from decimal import Decimal
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from tasks.dws.base_dws_task import BaseDwsTask, TimeWindow
|
||||
from neozqyy_shared.datetime_utils import biz_date_sql_expr
|
||||
from tasks.dws.base_dws_task import BaseDwsTask
|
||||
|
||||
# 只计算四大项目
|
||||
VALID_CATEGORIES = {"BILLIARD", "SNOOKER", "MAHJONG", "KTV"}
|
||||
|
||||
# 客户看板的 2 个时间窗口
|
||||
MEMBER_WINDOWS = [
|
||||
TimeWindow.LAST_30_DAYS,
|
||||
TimeWindow.LAST_60_DAYS,
|
||||
]
|
||||
# 取每位客户最近 30 次消费(开台记录)
|
||||
LAST_N_VISITS = 30
|
||||
|
||||
# 写入 time_window 字段的枚举值
|
||||
TIME_WINDOW_VALUE = "LAST_30_VISITS"
|
||||
|
||||
TAG_THRESHOLD = Decimal("0.25")
|
||||
|
||||
@@ -52,21 +56,15 @@ class MemberProjectTagTask(BaseDwsTask):
|
||||
|
||||
def extract(self, context) -> Dict[str, Any]:
|
||||
site_id = context.store_id
|
||||
self.logger.info("%s: 提取客户台费时长数据", self.get_task_code())
|
||||
self.logger.info("%s: 提取客户最近 %d 次消费的台费时长数据",
|
||||
self.get_task_code(), LAST_N_VISITS)
|
||||
|
||||
self.load_config_cache()
|
||||
table_info = self._extract_table_info(site_id)
|
||||
|
||||
window_data: Dict[str, List[Dict]] = {}
|
||||
for window in MEMBER_WINDOWS:
|
||||
time_range = self.get_time_window_range(window)
|
||||
rows = self._extract_member_durations(
|
||||
site_id, time_range.start, time_range.end
|
||||
)
|
||||
window_data[window.value] = rows
|
||||
rows = self._extract_member_durations(site_id)
|
||||
|
||||
return {
|
||||
"window_data": window_data,
|
||||
"rows": rows,
|
||||
"table_info": table_info,
|
||||
"site_id": site_id,
|
||||
}
|
||||
@@ -81,30 +79,37 @@ class MemberProjectTagTask(BaseDwsTask):
|
||||
rows = self.db.query(sql, (site_id,))
|
||||
return {r["table_id"]: dict(r) for r in (rows or [])}
|
||||
|
||||
def _extract_member_durations(
|
||||
self, site_id: int, start_date: date, end_date: date
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""提取客户台费时长明细(按客户+台桌聚合),排除散客"""
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("tfl.ledger_end_time", cutoff)
|
||||
sql = f"""
|
||||
SELECT
|
||||
tfl.member_id,
|
||||
tfl.site_table_id AS table_id,
|
||||
COALESCE(SUM(tfl.ledger_count), 0) AS duration_seconds
|
||||
FROM dwd.dwd_table_fee_log tfl
|
||||
WHERE tfl.site_id = %(site_id)s
|
||||
AND {biz_expr} >= %(start_date)s
|
||||
AND {biz_expr} <= %(end_date)s
|
||||
AND COALESCE(tfl.is_delete, 0) = 0
|
||||
AND tfl.member_id IS NOT NULL
|
||||
AND tfl.member_id != 0
|
||||
GROUP BY tfl.member_id, tfl.site_table_id
|
||||
def _extract_member_durations(self, site_id: int) -> List[Dict[str, Any]]:
|
||||
"""按每位客户最近 N 次消费提取台费时长明细,排除散客。
|
||||
|
||||
使用 ROW_NUMBER() 按 member_id 分区、ledger_end_time 倒序排名,
|
||||
取最近 LAST_N_VISITS 条记录后再按 (member_id, table_id) 聚合。
|
||||
"""
|
||||
sql = """
|
||||
WITH ranked AS (
|
||||
SELECT tfl.member_id,
|
||||
tfl.site_table_id AS table_id,
|
||||
tfl.ledger_count AS duration_seconds,
|
||||
ROW_NUMBER() OVER (
|
||||
PARTITION BY tfl.member_id
|
||||
ORDER BY tfl.ledger_end_time DESC
|
||||
) AS rn
|
||||
FROM dwd.dwd_table_fee_log tfl
|
||||
WHERE tfl.site_id = %(site_id)s
|
||||
AND COALESCE(tfl.is_delete, 0) = 0
|
||||
AND tfl.member_id IS NOT NULL
|
||||
AND tfl.member_id != 0
|
||||
)
|
||||
SELECT member_id,
|
||||
table_id,
|
||||
COALESCE(SUM(duration_seconds), 0) AS duration_seconds
|
||||
FROM ranked
|
||||
WHERE rn <= %(last_n)s
|
||||
GROUP BY member_id, table_id
|
||||
"""
|
||||
rows = self.db.query(sql, {
|
||||
"site_id": site_id,
|
||||
"start_date": start_date,
|
||||
"end_date": end_date,
|
||||
"last_n": LAST_N_VISITS,
|
||||
})
|
||||
return [dict(r) for r in rows] if rows else []
|
||||
|
||||
@@ -114,59 +119,59 @@ class MemberProjectTagTask(BaseDwsTask):
|
||||
tenant_id = getattr(context, "tenant_id", 0) or 0
|
||||
results: List[Dict[str, Any]] = []
|
||||
|
||||
for window_value, rows in extracted["window_data"].items():
|
||||
# member_id → category_code → seconds
|
||||
member_cats: Dict[int, Dict[str, int]] = {}
|
||||
# member_id → category_code → seconds
|
||||
member_cats: Dict[int, Dict[str, int]] = {}
|
||||
|
||||
for row in rows:
|
||||
mid = row["member_id"]
|
||||
tid = row["table_id"]
|
||||
secs = self.safe_int(row["duration_seconds"])
|
||||
if secs <= 0:
|
||||
continue
|
||||
for row in extracted["rows"]:
|
||||
mid = row["member_id"]
|
||||
tid = row["table_id"]
|
||||
secs = self.safe_int(row["duration_seconds"])
|
||||
if secs <= 0:
|
||||
continue
|
||||
|
||||
tinfo = table_info.get(tid, {})
|
||||
area_name = tinfo.get("area_name")
|
||||
table_name = tinfo.get("table_name")
|
||||
cat = self.get_area_category(area_name, table_name)
|
||||
code = cat.get("category_code", "OTHER")
|
||||
tinfo = table_info.get(tid, {})
|
||||
area_name = tinfo.get("area_name")
|
||||
table_name = tinfo.get("table_name")
|
||||
cat = self.get_area_category(area_name, table_name)
|
||||
code = cat.get("category_code", "OTHER")
|
||||
|
||||
if code not in VALID_CATEGORIES:
|
||||
continue
|
||||
if code not in VALID_CATEGORIES:
|
||||
continue
|
||||
|
||||
if mid not in member_cats:
|
||||
member_cats[mid] = {}
|
||||
member_cats[mid][code] = member_cats[mid].get(code, 0) + secs
|
||||
if mid not in member_cats:
|
||||
member_cats[mid] = {}
|
||||
member_cats[mid][code] = member_cats[mid].get(code, 0) + secs
|
||||
|
||||
for mid, cats in member_cats.items():
|
||||
total = sum(cats.values())
|
||||
if total <= 0:
|
||||
continue
|
||||
for mid, cats in member_cats.items():
|
||||
total = sum(cats.values())
|
||||
if total <= 0:
|
||||
continue
|
||||
|
||||
for code, secs in cats.items():
|
||||
pct = Decimal(str(secs)) / Decimal(str(total))
|
||||
pct = pct.quantize(Decimal("0.0001"))
|
||||
cat_info = self._get_category_display(code)
|
||||
for code, secs in cats.items():
|
||||
pct = Decimal(str(secs)) / Decimal(str(total))
|
||||
pct = pct.quantize(Decimal("0.0001"))
|
||||
cat_info = self._get_category_display(code)
|
||||
|
||||
results.append({
|
||||
"site_id": site_id,
|
||||
"tenant_id": tenant_id,
|
||||
"member_id": mid,
|
||||
"time_window": window_value,
|
||||
"category_code": code,
|
||||
"category_name": cat_info["category_name"],
|
||||
"short_name": cat_info["short_name"],
|
||||
"duration_seconds": secs,
|
||||
"total_seconds": total,
|
||||
"percentage": float(pct),
|
||||
"is_tagged": pct >= TAG_THRESHOLD,
|
||||
})
|
||||
results.append({
|
||||
"site_id": site_id,
|
||||
"tenant_id": tenant_id,
|
||||
"member_id": mid,
|
||||
"time_window": TIME_WINDOW_VALUE,
|
||||
"category_code": code,
|
||||
"category_name": cat_info["category_name"],
|
||||
"short_name": cat_info["short_name"],
|
||||
"duration_seconds": secs,
|
||||
"total_seconds": total,
|
||||
"percentage": float(pct),
|
||||
"is_tagged": pct >= TAG_THRESHOLD,
|
||||
})
|
||||
|
||||
self.logger.info(
|
||||
"%s: 生成 %d 条标签记录(其中 %d 条达标)",
|
||||
"%s: 生成 %d 条标签记录(其中 %d 条达标),基于每客户最近 %d 次消费",
|
||||
self.get_task_code(),
|
||||
len(results),
|
||||
sum(1 for r in results if r["is_tagged"]),
|
||||
LAST_N_VISITS,
|
||||
)
|
||||
return results
|
||||
|
||||
|
||||
@@ -2,96 +2,130 @@
|
||||
# - 2026-03-29 | Prompt: DWS_TASK_ENGINE ETL 任务 | 新建文件。
|
||||
# 编排任务引擎全流程:完成检查 → 过期检查 → 任务生成。
|
||||
# 通过 HTTP 调用后端 POST /api/internal/run-job 按 job_name 执行。
|
||||
# - 2026-04-12 | 合并 DWS_TASK_SIMULATION:有时间窗口时走推演模式,
|
||||
# 无时间窗口时走原来的 HTTP 模式。
|
||||
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
DWS 任务引擎编排任务(DWS_TASK_ENGINE)
|
||||
|
||||
在 DWS 指数计算完成后执行,按顺序调用后端任务引擎的各个步骤:
|
||||
1. recall_completion_check — 检测召回是否完成,生成回访任务
|
||||
2. task_expiry_check — 标记超时未处理的任务
|
||||
3. task_generator — 根据 WBI/NCI/RS 指数生成/替换任务
|
||||
双模式:
|
||||
- 无时间窗口(日常 Flow):通过 HTTP 调用后端任务引擎
|
||||
1. recall_completion_check — 检测召回完成
|
||||
2. task_expiry_check — 标<><E6A087>超时任务
|
||||
3. task_generator — 根据指数生成/替换任务
|
||||
|
||||
通过 HTTP 调用后端 POST /api/internal/run-job(Internal-Token 认证),
|
||||
每步失败仅记录日志,不中断后续步骤。
|
||||
- 有时间窗口(历史推演):基于指数日快照逐天重放任务生命周期
|
||||
需先运行 DWS_INDEX_BACKFILL 生成历史快照
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from datetime import date, datetime, timedelta
|
||||
from decimal import Decimal
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from typing import Any, Dict, Optional
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
import requests
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from ..base_task import BaseTask, TaskContext
|
||||
|
||||
# 加载根 .env(BACKEND_API_URL / INTERNAL_API_TOKEN 不在 AppConfig 映射中)
|
||||
# task_engine.py → dws/ → tasks/ → feiqiu/ → connectors/ → etl/ → apps/ → root
|
||||
# 加载根 .env
|
||||
_REPO_ROOT = Path(__file__).resolve().parents[6]
|
||||
load_dotenv(_REPO_ROOT / ".env", override=False)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_TIMEOUT = (5, 30) # 连接 5s,读取 30s(任务执行可能较慢)
|
||||
_TIMEOUT = (5, 30)
|
||||
|
||||
# 按顺序执行的后端任务列表
|
||||
# HTTP 模式<E6A8A1><E5BC8F><EFBFBD>按顺序执行的后端任务
|
||||
_JOB_SEQUENCE = [
|
||||
"recall_completion_check",
|
||||
"task_expiry_check",
|
||||
"task_generator",
|
||||
]
|
||||
|
||||
# 推演模式:导<EFBC9A><E5AFBC> task_generator 纯函数
|
||||
_BACKEND = _REPO_ROOT / "apps" / "backend"
|
||||
if str(_BACKEND) not in sys.path:
|
||||
sys.path.insert(0, str(_BACKEND))
|
||||
|
||||
try:
|
||||
from app.services.task_generator import (
|
||||
IndexData,
|
||||
determine_task_type,
|
||||
should_replace_task,
|
||||
)
|
||||
_SIMULATION_AVAILABLE = True
|
||||
except ImportError:
|
||||
_SIMULATION_AVAILABLE = False
|
||||
|
||||
# 推演截止日期(现有 active 任务从 03-29 开始)
|
||||
CUTOFF_DATE = date(2026, 3, 28)
|
||||
FOLLOW_UP_HOURS = 72
|
||||
|
||||
|
||||
# ── HTTP <20><>式辅助 ──
|
||||
|
||||
def _run_backend_job(backend_url: str, token: str, job_name: str) -> dict:
|
||||
"""调用后端 POST /api/internal/run-job 执行指定任务。
|
||||
|
||||
Returns:
|
||||
{"success": bool, "message": str} 或 {"success": False, "message": error}
|
||||
"""
|
||||
url = f"{backend_url}/api/internal/run-job"
|
||||
headers = {
|
||||
"Authorization": f"Internal-Token {token}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
body = {"job_name": job_name}
|
||||
|
||||
try:
|
||||
resp = requests.post(url, json=body, headers=headers, timeout=_TIMEOUT)
|
||||
resp = requests.post(url, json={"job_name": job_name}, headers=headers, timeout=_TIMEOUT)
|
||||
if resp.status_code == 200:
|
||||
data = resp.json()
|
||||
# 后端 ResponseWrapperMiddleware 包装:{"code": 0, "data": {...}}
|
||||
inner = data.get("data", data)
|
||||
return {
|
||||
"success": inner.get("success", False),
|
||||
"message": inner.get("message", ""),
|
||||
}
|
||||
else:
|
||||
return {
|
||||
"success": False,
|
||||
"message": f"HTTP {resp.status_code}: {resp.text[:200]}",
|
||||
}
|
||||
return {"success": inner.get("success", False), "message": inner.get("message", "")}
|
||||
return {"success": False, "message": f"HTTP {resp.status_code}: {resp.text[:200]}"}
|
||||
except requests.RequestException as exc:
|
||||
return {"success": False, "message": str(exc)}
|
||||
|
||||
|
||||
class DwsTaskEngineTask(BaseTask):
|
||||
"""DWS 任务引擎编排任务。
|
||||
"""DWS 任务引擎(双模式)。
|
||||
|
||||
不读写 DWS 表,仅通过 HTTP 调用后端执行任务引擎步骤。
|
||||
继承 BaseTask 而非 BaseDwsTask,因为不需要 DWS 层的数据操作方法。
|
||||
无时间窗口 → HTTP 模式(日常 Flow)
|
||||
有时间窗口 → 推演模式(历史回填)
|
||||
"""
|
||||
|
||||
def get_task_code(self) -> str:
|
||||
return "DWS_TASK_ENGINE"
|
||||
|
||||
def extract(self, context: TaskContext) -> dict[str, Any]:
|
||||
"""无需提取数据,返回空上下文。"""
|
||||
return {}
|
||||
def execute(self, context=None) -> Dict[str, Any]:
|
||||
"""直接 override execute(),绕过 BaseTask 的 E/T/L 模板。
|
||||
|
||||
def load(self, extracted: dict[str, Any], context: TaskContext) -> dict[str, Any]:
|
||||
"""按顺序调用后端任务引擎的各个步骤。"""
|
||||
根据是否有时间窗口决定模式:
|
||||
- 有窗口 → 推演模式(逐天生成+完成任务)
|
||||
- 无窗口 → HTTP 模式(调用后端执行当天任务引擎)
|
||||
"""
|
||||
if self._has_window(context):
|
||||
return self._run_simulation_mode(context)
|
||||
return self._run_http_mode()
|
||||
|
||||
def _has_window(self, context=None) -> bool:
|
||||
"""检查是否指定了时间窗口(config 或 context 均可)。"""
|
||||
# 优先从 config(CLI --window-start/--window-end)
|
||||
wo = self.config.get("run.window_override") or {}
|
||||
if wo.get("start") and wo.get("end"):
|
||||
return True
|
||||
# 其次从 context(task_executor 构建的)
|
||||
if context and hasattr(context, 'window_start') and hasattr(context, 'window_end'):
|
||||
if context.window_start and context.window_end and context.window_start != context.window_end:
|
||||
return True
|
||||
return False
|
||||
|
||||
# ── HTTP 模式(日常) ──
|
||||
|
||||
def _run_http_mode(self) -> dict[str, Any]:
|
||||
backend_url = os.environ.get("BACKEND_API_URL", "").rstrip("/")
|
||||
token = os.environ.get("INTERNAL_API_TOKEN", "")
|
||||
|
||||
@@ -103,22 +137,667 @@ class DwsTaskEngineTask(BaseTask):
|
||||
return {"skipped": True, "reason": "INTERNAL_API_TOKEN 未配置"}
|
||||
|
||||
results: dict[str, Any] = {}
|
||||
|
||||
for job_name in _JOB_SEQUENCE:
|
||||
self.logger.info("DWS_TASK_ENGINE: 执行 %s ...", job_name)
|
||||
result = _run_backend_job(backend_url, token, job_name)
|
||||
success = result.get("success", False)
|
||||
message = result.get("message", "")
|
||||
|
||||
results[job_name] = {"success": success, "message": message}
|
||||
|
||||
if success:
|
||||
self.logger.info(
|
||||
"DWS_TASK_ENGINE: %s 成功 — %s", job_name, message
|
||||
)
|
||||
self.logger.info("DWS_TASK_ENGINE: %s 成功 — %s", job_name, message)
|
||||
else:
|
||||
self.logger.warning(
|
||||
"DWS_TASK_ENGINE: %s 失败 — %s", job_name, message
|
||||
)
|
||||
self.logger.warning("DWS_TASK_ENGINE: %s 失败 — %s", job_name, message)
|
||||
|
||||
return results
|
||||
|
||||
# ── 推演模式(历史) ──
|
||||
|
||||
def _run_simulation_mode(self, context: Optional[TaskContext]) -> dict[str, Any]:
|
||||
if not _SIMULATION_AVAILABLE:
|
||||
raise RuntimeError("推演模式不可用:无法导入 app.services.task_generator")
|
||||
|
||||
import psycopg2
|
||||
|
||||
start_date, end_date = self._parse_date_range(context)
|
||||
|
||||
tz = ZoneInfo(self.config.get("app.timezone", "Asia/Shanghai"))
|
||||
etl_conn = self.db.conn
|
||||
|
||||
app_dsn = os.environ.get("APP_DB_DSN")
|
||||
if not app_dsn:
|
||||
raise ValueError("推演模式需要 APP_DB_DSN 环境变量")
|
||||
app_conn = psycopg2.connect(app_dsn)
|
||||
app_conn.set_client_encoding("UTF8")
|
||||
|
||||
site_id = self._get_site_id(etl_conn)
|
||||
total_days = (end_date - start_date).days + 1
|
||||
|
||||
self.logger.info(
|
||||
"DWS_TASK_ENGINE [推演模式]: %s ~ %s (%d天), site_id=%s",
|
||||
start_date, end_date, total_days, site_id,
|
||||
)
|
||||
|
||||
# 清理指定范围内的旧数据,保留范围外的
|
||||
self._clean_date_range(app_conn, tz, start_date, end_date)
|
||||
|
||||
# 加载推演范围之前就存在的 active 任务(不加载范围之后的"未来"任务)
|
||||
active_tasks = self._load_existing_active_tasks(app_conn, site_id, before_date=start_date)
|
||||
self.logger.info("DWS_TASK_ENGINE [推演]: 范围前已有 active 任务 %d 个", len(active_tasks))
|
||||
|
||||
stats = {
|
||||
"created": 0, "completed": 0, "resolved": 0, "overridden": 0,
|
||||
"expired": 0, "follow_up_created": 0,
|
||||
"recall_events": 0, "skipped_no_snapshot": 0,
|
||||
}
|
||||
|
||||
# 预加载全量数据,按日分片(避免 255 x 5 次逐日查询 -> 5 次全量查询)
|
||||
self.logger.info("DWS_TASK_ENGINE [推演]: 预加载快照 + 结算数据 ...")
|
||||
snapshots_by_date = self._bulk_load_snapshots(etl_conn, site_id, start_date, end_date)
|
||||
settlements_by_date = self._bulk_load_settlements(etl_conn, site_id, start_date, end_date, tz)
|
||||
member_visits_by_date = self._bulk_load_member_visits(etl_conn, site_id, start_date, end_date, tz)
|
||||
self.logger.info(
|
||||
"DWS_TASK_ENGINE [推演]: 预加载完成, %d 天有快照, %d 天有助教结算, %d 天有到店记录",
|
||||
len(snapshots_by_date), len(settlements_by_date), len(member_visits_by_date),
|
||||
)
|
||||
|
||||
# 加载任务生成参数(与日常 task_generator 保持一致)
|
||||
task_params = self._load_task_generator_params(app_conn, site_id)
|
||||
self.logger.info(
|
||||
"DWS_TASK_ENGINE [推演]: 任务阈值 high=%.1f, normal=%.1f, rs=[%.1f, %.1f)",
|
||||
task_params["high_threshold"], task_params["normal_threshold"],
|
||||
task_params["rs_min"], task_params["rs_max"],
|
||||
)
|
||||
|
||||
t0 = time.time()
|
||||
current = start_date
|
||||
while current <= end_date:
|
||||
snapshot = snapshots_by_date.get(current, {"relation": {}, "wbi": {}, "nci": {}})
|
||||
if not snapshot["relation"] and not snapshot["wbi"] and not snapshot["nci"]:
|
||||
stats["skipped_no_snapshot"] += 1
|
||||
current += timedelta(days=1)
|
||||
continue
|
||||
|
||||
day_settlements = settlements_by_date.get(current, {})
|
||||
day_visits = member_visits_by_date.get(current, {})
|
||||
self._simulate_day(app_conn, etl_conn, site_id, current, tz, snapshot, active_tasks, stats,
|
||||
preloaded_settlements=day_settlements, preloaded_visits=day_visits,
|
||||
task_params=task_params)
|
||||
|
||||
day_num = (current - start_date).days + 1
|
||||
if day_num % 30 == 0 or current == end_date:
|
||||
elapsed = time.time() - t0
|
||||
self.logger.info(
|
||||
"DWS_TASK_ENGINE [推演]: %s (%d/%d) 创建=%d 完成=%d 解除=%d 覆盖=%d 过期=%d %.0fs",
|
||||
current, day_num, total_days,
|
||||
stats["created"], stats["completed"], stats["resolved"],
|
||||
stats["overridden"], stats["expired"], elapsed,
|
||||
)
|
||||
|
||||
current += timedelta(days=1)
|
||||
|
||||
# 收尾:清理推演结束后仍 active 但 expires_at 已过期的任务
|
||||
now_dt = datetime.now(tz)
|
||||
cleanup_count = 0
|
||||
with app_conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""UPDATE biz.coach_tasks SET status = 'expired', updated_at = %s
|
||||
WHERE site_id = %s AND status = 'active'
|
||||
AND expires_at IS NOT NULL AND expires_at < %s
|
||||
RETURNING id, task_type""",
|
||||
(now_dt, site_id, now_dt),
|
||||
)
|
||||
for task_id, task_type in cur.fetchall():
|
||||
self._history(cur, task_id, "expired", "active", "expired",
|
||||
task_type, task_type, {"reason": "post_simulation_cleanup"})
|
||||
cleanup_count += 1
|
||||
app_conn.commit()
|
||||
stats["expired"] += cleanup_count
|
||||
if cleanup_count:
|
||||
self.logger.info("DWS_TASK_ENGINE [推演]: 收尾清理 %d 个已过期任务", cleanup_count)
|
||||
|
||||
total_elapsed = time.time() - t0
|
||||
self.logger.info(
|
||||
"DWS_TASK_ENGINE [推演] 完成: %.0fs, 创建=%d 完成=%d 解除=%d 覆盖=%d 过期=%d 回访=%d 事件=%d 跳过=%d active=%d",
|
||||
total_elapsed, stats["created"], stats["completed"], stats["resolved"],
|
||||
stats["overridden"], stats["expired"],
|
||||
stats["follow_up_created"], stats["recall_events"],
|
||||
stats["skipped_no_snapshot"], len(active_tasks),
|
||||
)
|
||||
|
||||
app_conn.close()
|
||||
|
||||
# 推演完成后触发日常流程(recall_detector + task_generator)
|
||||
# 让最新到店数据触发召回完成、POOL 过滤清理存量
|
||||
self.logger.info("DWS_TASK_ENGINE [推演]: 触发日常流程 ...")
|
||||
try:
|
||||
http_result = self._run_http_mode()
|
||||
self.logger.info("DWS_TASK_ENGINE [推演]: 日常流程完成 %s", http_result)
|
||||
except Exception:
|
||||
self.logger.exception("DWS_TASK_ENGINE [推演]: 日常流程触发失败(不影响推演结果)")
|
||||
|
||||
return {
|
||||
"status": "SUCCESS",
|
||||
"counts": {
|
||||
# 框架标准字段(总结框显示用)
|
||||
"inserted": stats["created"],
|
||||
"updated": stats["overridden"],
|
||||
"skipped": stats["skipped_no_snapshot"],
|
||||
"errors": 0,
|
||||
# 原始明细
|
||||
**stats,
|
||||
},
|
||||
}
|
||||
|
||||
# ── 推演辅助方法 ──
|
||||
|
||||
def _parse_date_range(self, context: Optional[TaskContext]) -> tuple[date, date]:
|
||||
wo = self.config.get("run.window_override") or {}
|
||||
start_str = wo.get("start")
|
||||
end_str = wo.get("end")
|
||||
if start_str and end_str:
|
||||
return self._parse_date(start_str), self._parse_date(end_str)
|
||||
if context and context.window_start and context.window_end:
|
||||
return context.window_start.date(), context.window_end.date()
|
||||
raise ValueError("推演模式需要指定时间窗口")
|
||||
|
||||
@staticmethod
|
||||
def _parse_date(s) -> date:
|
||||
if isinstance(s, date) and not isinstance(s, datetime):
|
||||
return s
|
||||
if isinstance(s, datetime):
|
||||
return s.date()
|
||||
return date.fromisoformat(str(s).strip()[:10])
|
||||
|
||||
def _get_site_id(self, etl_conn) -> int:
|
||||
with etl_conn.cursor() as cur:
|
||||
cur.execute("SELECT DISTINCT site_id FROM dws.dws_member_assistant_relation_index LIMIT 1")
|
||||
row = cur.fetchone()
|
||||
etl_conn.commit()
|
||||
if not row:
|
||||
raise RuntimeError("relation_index 表为空,请先运行 DWS_INDEX_BACKFILL")
|
||||
return row[0]
|
||||
|
||||
def _load_snapshot(self, etl_conn, site_id: int, stat_date: date) -> dict:
|
||||
result = {"relation": {}, "wbi": {}, "nci": {}}
|
||||
with etl_conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""SELECT assistant_id, member_id, rs_display, os_label, session_count
|
||||
FROM dws.dws_member_assistant_relation_index
|
||||
WHERE site_id = %s AND stat_date = %s""",
|
||||
(site_id, stat_date),
|
||||
)
|
||||
for r in cur.fetchall():
|
||||
result["relation"][(r[0], r[1])] = {
|
||||
"rs": Decimal(str(r[2])), "os_label": r[3], "session_count": r[4],
|
||||
}
|
||||
# WBI(同时记录 status 用于过滤老客 NCI)
|
||||
old_members = set()
|
||||
cur.execute(
|
||||
"""SELECT member_id, display_score, status FROM dws.dws_member_winback_index
|
||||
WHERE site_id = %s AND stat_date = %s""",
|
||||
(site_id, stat_date),
|
||||
)
|
||||
for r in cur.fetchall():
|
||||
result["wbi"][r[0]] = Decimal(str(r[1])) if r[1] else Decimal(0)
|
||||
if r[2] == "OLD":
|
||||
old_members.add(r[0])
|
||||
# NCI(排除已转老客,避免使用过时高分)
|
||||
cur.execute(
|
||||
"""SELECT member_id, display_score FROM dws.dws_member_newconv_index
|
||||
WHERE site_id = %s AND stat_date = %s""",
|
||||
(site_id, stat_date),
|
||||
)
|
||||
for r in cur.fetchall():
|
||||
if r[0] not in old_members:
|
||||
result["nci"][r[0]] = Decimal(str(r[1])) if r[1] else Decimal(0)
|
||||
etl_conn.commit()
|
||||
return result
|
||||
|
||||
def _load_settlements(self, etl_conn, site_id: int, d: date) -> dict:
|
||||
"""助教级结算:settle_type=1 全部计入,settle_type=3 仅 BONUS 服务。"""
|
||||
tz = ZoneInfo(self.config.get("app.timezone", "Asia/Shanghai"))
|
||||
day_start = datetime(d.year, d.month, d.day, 0, 0, 0, tzinfo=tz)
|
||||
day_end = day_start + timedelta(days=1)
|
||||
settlements = {}
|
||||
with etl_conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""SELECT sl.site_assistant_id, sh.member_id, MAX(sh.pay_time)
|
||||
FROM dwd.dwd_settlement_head sh
|
||||
JOIN dwd.dwd_assistant_service_log sl
|
||||
ON sl.order_settle_id = sh.order_settle_id AND sl.is_delete = 0
|
||||
WHERE sh.site_id = %s
|
||||
AND (sh.settle_type = 1 OR (sh.settle_type = 3 AND sl.order_assistant_type = 2))
|
||||
AND sh.pay_time >= %s AND sh.pay_time < %s
|
||||
GROUP BY sl.site_assistant_id, sh.member_id""",
|
||||
(site_id, day_start, day_end),
|
||||
)
|
||||
for r in cur.fetchall():
|
||||
if r[0] and r[1]:
|
||||
settlements[(r[0], r[1])] = r[2]
|
||||
etl_conn.commit()
|
||||
return settlements
|
||||
|
||||
def _load_member_visits(self, etl_conn, site_id: int, d: date) -> dict:
|
||||
"""门店级到店检测:含无助教服务的 settle_type=1,用于 resolved 判定。"""
|
||||
tz = ZoneInfo(self.config.get("app.timezone", "Asia/Shanghai"))
|
||||
day_start = datetime(d.year, d.month, d.day, 0, 0, 0, tzinfo=tz)
|
||||
day_end = day_start + timedelta(days=1)
|
||||
visits = {}
|
||||
with etl_conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""SELECT sh.member_id, MAX(sh.pay_time)
|
||||
FROM dwd.dwd_settlement_head sh
|
||||
WHERE sh.site_id = %s
|
||||
AND (
|
||||
sh.settle_type = 1
|
||||
OR (sh.settle_type = 3 AND EXISTS (
|
||||
SELECT 1 FROM dwd.dwd_assistant_service_log sl
|
||||
WHERE sl.order_settle_id = sh.order_settle_id
|
||||
AND sl.is_delete = 0
|
||||
AND sl.order_assistant_type = 2
|
||||
))
|
||||
)
|
||||
AND sh.pay_time >= %s AND sh.pay_time < %s
|
||||
GROUP BY sh.member_id""",
|
||||
(site_id, day_start, day_end),
|
||||
)
|
||||
for r in cur.fetchall():
|
||||
if r[0]:
|
||||
visits[r[0]] = r[1]
|
||||
etl_conn.commit()
|
||||
return visits
|
||||
|
||||
def _bulk_load_settlements(self, etl_conn, site_id: int, start: date, end: date, tz) -> dict:
|
||||
"""一次查全量助教级结算,按日分片返回 {date: {(aid,mid): pay_time}}。"""
|
||||
from collections import defaultdict
|
||||
day_start = datetime(start.year, start.month, start.day, 0, 0, 0, tzinfo=tz)
|
||||
day_end = datetime(end.year, end.month, end.day, 0, 0, 0, tzinfo=tz) + timedelta(days=1)
|
||||
result = defaultdict(dict)
|
||||
with etl_conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""SELECT sl.site_assistant_id, sh.member_id, sh.pay_time
|
||||
FROM dwd.dwd_settlement_head sh
|
||||
JOIN dwd.dwd_assistant_service_log sl
|
||||
ON sl.order_settle_id = sh.order_settle_id AND sl.is_delete = 0
|
||||
WHERE sh.site_id = %s
|
||||
AND (sh.settle_type = 1 OR (sh.settle_type = 3 AND sl.order_assistant_type = 2))
|
||||
AND sh.pay_time >= %s AND sh.pay_time < %s""",
|
||||
(site_id, day_start, day_end),
|
||||
)
|
||||
for aid, mid, pay_time in cur.fetchall():
|
||||
if aid and mid:
|
||||
d_key = pay_time.astimezone(tz).date()
|
||||
existing = result[d_key].get((aid, mid))
|
||||
if existing is None or pay_time > existing:
|
||||
result[d_key][(aid, mid)] = pay_time
|
||||
etl_conn.commit()
|
||||
return dict(result)
|
||||
|
||||
def _bulk_load_member_visits(self, etl_conn, site_id: int, start: date, end: date, tz) -> dict:
|
||||
"""一次查全量门店级到店,按日分片返回 {date: {mid: pay_time}}。"""
|
||||
from collections import defaultdict
|
||||
day_start = datetime(start.year, start.month, start.day, 0, 0, 0, tzinfo=tz)
|
||||
day_end = datetime(end.year, end.month, end.day, 0, 0, 0, tzinfo=tz) + timedelta(days=1)
|
||||
result = defaultdict(dict)
|
||||
with etl_conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""SELECT sh.member_id, sh.pay_time
|
||||
FROM dwd.dwd_settlement_head sh
|
||||
WHERE sh.site_id = %s
|
||||
AND (
|
||||
sh.settle_type = 1
|
||||
OR (sh.settle_type = 3 AND EXISTS (
|
||||
SELECT 1 FROM dwd.dwd_assistant_service_log sl
|
||||
WHERE sl.order_settle_id = sh.order_settle_id
|
||||
AND sl.is_delete = 0
|
||||
AND sl.order_assistant_type = 2
|
||||
))
|
||||
)
|
||||
AND sh.pay_time >= %s AND sh.pay_time < %s""",
|
||||
(site_id, day_start, day_end),
|
||||
)
|
||||
for mid, pay_time in cur.fetchall():
|
||||
if mid:
|
||||
d_key = pay_time.astimezone(tz).date()
|
||||
existing = result[d_key].get(mid)
|
||||
if existing is None or pay_time > existing:
|
||||
result[d_key][mid] = pay_time
|
||||
etl_conn.commit()
|
||||
return dict(result)
|
||||
|
||||
@staticmethod
|
||||
def _load_task_generator_params(app_conn, site_id: int) -> dict:
|
||||
"""从 cfg_task_generator_params 加载任务生成阈值,与日常 task_generator 保持一致。"""
|
||||
defaults = {
|
||||
"high_threshold": 7.5,
|
||||
"normal_threshold": 4.0,
|
||||
"rs_min": 1.0,
|
||||
"rs_max": 6.0,
|
||||
}
|
||||
key_map = {
|
||||
"high_priority_recall_threshold": "high_threshold",
|
||||
"priority_recall_threshold": "normal_threshold",
|
||||
"rs_min_for_relationship": "rs_min",
|
||||
"rs_max_for_relationship": "rs_max",
|
||||
}
|
||||
with app_conn.cursor() as cur:
|
||||
cur.execute("SELECT param_key, param_value FROM biz.cfg_task_generator_params")
|
||||
for param_key, param_value in cur.fetchall():
|
||||
mapped = key_map.get(param_key)
|
||||
if mapped:
|
||||
defaults[mapped] = float(param_value)
|
||||
app_conn.commit()
|
||||
return defaults
|
||||
|
||||
def _bulk_load_snapshots(self, etl_conn, site_id: int, start: date, end: date) -> dict:
|
||||
"""一次查全量指数快照(relation/wbi/nci),按日分片返回 {date: snapshot_dict}。"""
|
||||
from collections import defaultdict
|
||||
result = defaultdict(lambda: {"relation": {}, "wbi": {}, "nci": {}})
|
||||
|
||||
with etl_conn.cursor() as cur:
|
||||
# relation_index
|
||||
cur.execute(
|
||||
"""SELECT stat_date, assistant_id, member_id, rs_display, os_label, session_count
|
||||
FROM dws.dws_member_assistant_relation_index
|
||||
WHERE site_id = %s AND stat_date >= %s AND stat_date <= %s""",
|
||||
(site_id, start, end),
|
||||
)
|
||||
for sd, aid, mid, rs, os_label, sc in cur.fetchall():
|
||||
result[sd]["relation"][(aid, mid)] = {
|
||||
"rs": Decimal(str(rs)), "os_label": os_label, "session_count": sc,
|
||||
}
|
||||
|
||||
# WBI(同时收集 OLD 状态用于过滤 NCI)
|
||||
old_members_by_date = defaultdict(set)
|
||||
cur.execute(
|
||||
"""SELECT stat_date, member_id, display_score, status
|
||||
FROM dws.dws_member_winback_index
|
||||
WHERE site_id = %s AND stat_date >= %s AND stat_date <= %s""",
|
||||
(site_id, start, end),
|
||||
)
|
||||
for sd, mid, score, status in cur.fetchall():
|
||||
result[sd]["wbi"][mid] = Decimal(str(score)) if score else Decimal(0)
|
||||
if status == "OLD":
|
||||
old_members_by_date[sd].add(mid)
|
||||
|
||||
# NCI(排除已转老客)
|
||||
cur.execute(
|
||||
"""SELECT stat_date, member_id, display_score
|
||||
FROM dws.dws_member_newconv_index
|
||||
WHERE site_id = %s AND stat_date >= %s AND stat_date <= %s""",
|
||||
(site_id, start, end),
|
||||
)
|
||||
for sd, mid, score in cur.fetchall():
|
||||
if mid not in old_members_by_date.get(sd, set()):
|
||||
result[sd]["nci"][mid] = Decimal(str(score)) if score else Decimal(0)
|
||||
|
||||
etl_conn.commit()
|
||||
return dict(result)
|
||||
|
||||
def _simulate_day(self, app_conn, etl_conn, site_id, d, tz, snapshot, active_tasks, stats,
|
||||
*, preloaded_settlements=None, preloaded_visits=None, task_params=None):
|
||||
day_dt = datetime(d.year, d.month, d.day, 7, 0, 0, tzinfo=tz)
|
||||
|
||||
# 1. 过期检测
|
||||
expired_keys = [k for k, t in active_tasks.items() if t.get("expires_at") and t["expires_at"] < day_dt]
|
||||
for key in expired_keys:
|
||||
task = active_tasks.pop(key)
|
||||
stats["expired"] += 1
|
||||
with app_conn.cursor() as cur:
|
||||
cur.execute("UPDATE biz.coach_tasks SET status = 'expired', updated_at = %s WHERE id = %s", (day_dt, task["id"]))
|
||||
self._history(cur, task["id"], "expired", "active", "expired", task["task_type"], task["task_type"], {"simulated": True})
|
||||
|
||||
# 2. 任务生成(混<EFBC88><E6B7B7><EFBFBD>冲突策略)
|
||||
relation = snapshot["relation"]
|
||||
wbi_map = snapshot["wbi"]
|
||||
nci_map = snapshot["nci"]
|
||||
|
||||
ownership_pairs = [
|
||||
(aid, mid, info)
|
||||
for (aid, mid), info in relation.items()
|
||||
if info["os_label"] in ("MAIN", "COMANAGE") and info["session_count"] > 0
|
||||
]
|
||||
|
||||
for aid, mid, info in ownership_pairs:
|
||||
wbi = wbi_map.get(mid, Decimal(0))
|
||||
nci = nci_map.get(mid, Decimal(0))
|
||||
rs = info["rs"]
|
||||
|
||||
# 参数化任务判定(与日常 task_generator._process_pair 保持一致)
|
||||
priority_score = max(wbi, nci)
|
||||
if task_params:
|
||||
ht = Decimal(str(task_params["high_threshold"]))
|
||||
nt = Decimal(str(task_params["normal_threshold"]))
|
||||
rs_min = Decimal(str(task_params["rs_min"]))
|
||||
rs_max = Decimal(str(task_params["rs_max"]))
|
||||
else:
|
||||
ht, nt, rs_min, rs_max = Decimal(7), Decimal(5), Decimal(1), Decimal(6)
|
||||
|
||||
if priority_score > ht:
|
||||
new_type = "high_priority_recall"
|
||||
elif priority_score > nt:
|
||||
new_type = "priority_recall"
|
||||
elif rs > rs_min and rs < rs_max:
|
||||
new_type = "relationship_building"
|
||||
else:
|
||||
new_type = None
|
||||
|
||||
if not new_type:
|
||||
continue
|
||||
|
||||
key = (aid, mid)
|
||||
existing = active_tasks.get(key)
|
||||
priority = float(max(wbi, nci)) if new_type in ("high_priority_recall", "priority_recall") else float(rs)
|
||||
|
||||
if existing:
|
||||
if existing["task_type"] == new_type:
|
||||
continue
|
||||
|
||||
if existing["task_type"] == "follow_up_visit":
|
||||
# follow_up_visit 保留宽限期 + 新建高优先任务
|
||||
with app_conn.cursor() as cur:
|
||||
if not existing.get("expires_at"):
|
||||
cur.execute(
|
||||
"UPDATE biz.coach_tasks SET expires_at = created_at + INTERVAL '72 hours', updated_at = %s WHERE id = %s",
|
||||
(day_dt, existing["id"]),
|
||||
)
|
||||
self._history(cur, existing["id"], "expires_at_filled", "active", "active",
|
||||
"follow_up_visit", "follow_up_visit",
|
||||
{"reason": "higher_priority_task_created", "simulated": True})
|
||||
cur.execute(
|
||||
"""INSERT INTO biz.coach_tasks
|
||||
(site_id, assistant_id, member_id, task_type, status,
|
||||
priority_score, parent_task_id, created_at, updated_at)
|
||||
VALUES (%s, %s, %s, %s, 'active', %s, %s, %s, %s)
|
||||
ON CONFLICT (site_id, assistant_id, member_id, task_type) WHERE (status = 'active')
|
||||
DO UPDATE SET priority_score = EXCLUDED.priority_score, updated_at = EXCLUDED.updated_at
|
||||
RETURNING id""",
|
||||
(site_id, aid, mid, new_type, priority, existing["id"], day_dt, day_dt),
|
||||
)
|
||||
new_id = cur.fetchone()[0]
|
||||
self._history(cur, new_id, "created", None, "active", "follow_up_visit", new_type, {"simulated": True})
|
||||
active_tasks[key] = {"id": new_id, "task_type": new_type, "created_at": day_dt, "expires_at": None, "priority": priority}
|
||||
stats["created"] += 1
|
||||
else:
|
||||
# 非 follow_up:原地覆盖
|
||||
with app_conn.cursor() as cur:
|
||||
# 先关闭可能冲突的同 new_type active 记录(避免唯一约束冲突)
|
||||
cur.execute(
|
||||
"""UPDATE biz.coach_tasks SET status = 'inactive', updated_at = %s
|
||||
WHERE site_id = %s AND assistant_id = %s AND member_id = %s
|
||||
AND task_type = %s AND status = 'active' AND id != %s""",
|
||||
(day_dt, site_id, aid, mid, new_type, existing["id"]),
|
||||
)
|
||||
cur.execute(
|
||||
"UPDATE biz.coach_tasks SET task_type = %s, priority_score = %s, updated_at = %s WHERE id = %s AND status = 'active'",
|
||||
(new_type, priority, day_dt, existing["id"]),
|
||||
)
|
||||
self._history(cur, existing["id"], "type_override", "active", "active",
|
||||
existing["task_type"], new_type,
|
||||
{"old_priority": existing.get("priority"), "simulated": True})
|
||||
existing["task_type"] = new_type
|
||||
existing["priority"] = priority
|
||||
stats["overridden"] += 1
|
||||
else:
|
||||
# 新建任务(upsert:若同类型 active 已存在则更新 priority)
|
||||
with app_conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""INSERT INTO biz.coach_tasks
|
||||
(site_id, assistant_id, member_id, task_type, status,
|
||||
priority_score, created_at, updated_at)
|
||||
VALUES (%s, %s, %s, %s, 'active', %s, %s, %s)
|
||||
ON CONFLICT (site_id, assistant_id, member_id, task_type) WHERE (status = 'active')
|
||||
DO UPDATE SET priority_score = EXCLUDED.priority_score, updated_at = EXCLUDED.updated_at
|
||||
RETURNING id""",
|
||||
(site_id, aid, mid, new_type, priority, day_dt, day_dt),
|
||||
)
|
||||
task_id = cur.fetchone()[0]
|
||||
self._history(cur, task_id, "created", None, "active", None, new_type, {"simulated": True})
|
||||
active_tasks[key] = {"id": task_id, "task_type": new_type, "created_at": day_dt, "expires_at": None, "priority": priority}
|
||||
stats["created"] += 1
|
||||
|
||||
# 3. 召回检测(优先使用预加载数据)
|
||||
settlements = preloaded_settlements if preloaded_settlements is not None else self._load_settlements(etl_conn, site_id, d)
|
||||
for (aid, mid), pay_time in settlements.items():
|
||||
key = (aid, mid)
|
||||
task = active_tasks.get(key)
|
||||
with app_conn.cursor() as cur:
|
||||
try:
|
||||
cur.execute(
|
||||
"""INSERT INTO biz.recall_events
|
||||
(site_id, assistant_id, member_id, pay_time, task_id, task_type, created_at)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s)
|
||||
ON CONFLICT (site_id, assistant_id, member_id,
|
||||
(date_trunc('day', pay_time AT TIME ZONE 'Asia/Shanghai')))
|
||||
DO NOTHING RETURNING id""",
|
||||
(site_id, aid, mid, pay_time, task["id"] if task else None, task["task_type"] if task else None, day_dt),
|
||||
)
|
||||
if cur.fetchone():
|
||||
stats["recall_events"] += 1
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if not task or task["task_type"] not in ("high_priority_recall", "priority_recall") or pay_time <= task["created_at"]:
|
||||
continue
|
||||
|
||||
with app_conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""UPDATE biz.coach_tasks SET status = 'completed', completed_at = %s,
|
||||
completed_task_type = %s, completion_type = 'auto', updated_at = %s
|
||||
WHERE id = %s AND status = 'active'""",
|
||||
(pay_time, task["task_type"], day_dt, task["id"]),
|
||||
)
|
||||
self._history(cur, task["id"], "completed", "active", "completed",
|
||||
task["task_type"], task["task_type"],
|
||||
{"service_time": str(pay_time), "simulated": True})
|
||||
stats["completed"] += 1
|
||||
|
||||
expires_at = pay_time + timedelta(hours=FOLLOW_UP_HOURS)
|
||||
with app_conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""INSERT INTO biz.coach_tasks
|
||||
(site_id, assistant_id, member_id, task_type, status, expires_at, created_at, updated_at)
|
||||
VALUES (%s, %s, %s, 'follow_up_visit', 'active', %s, %s, %s)
|
||||
ON CONFLICT (site_id, assistant_id, member_id, task_type) WHERE (status = 'active')
|
||||
DO UPDATE SET expires_at = EXCLUDED.expires_at, updated_at = EXCLUDED.updated_at
|
||||
RETURNING id""",
|
||||
(site_id, aid, mid, expires_at, day_dt, day_dt),
|
||||
)
|
||||
fu_id = cur.fetchone()[0]
|
||||
self._history(cur, fu_id, "created", None, "active", None, "follow_up_visit",
|
||||
{"reason": "recall_completed", "simulated": True})
|
||||
active_tasks[key] = {"id": fu_id, "task_type": "follow_up_visit", "created_at": day_dt, "expires_at": expires_at}
|
||||
stats["follow_up_created"] += 1
|
||||
|
||||
# 3b. 门店级召回解除:客户到店后,未被服务的助教任务标记 resolved
|
||||
member_visits = preloaded_visits if preloaded_visits is not None else self._load_member_visits(etl_conn, site_id, d)
|
||||
resolved_keys = [
|
||||
k for k, t in active_tasks.items()
|
||||
if k[1] in member_visits
|
||||
and t["task_type"] in ("high_priority_recall", "priority_recall")
|
||||
and member_visits[k[1]] > t["created_at"]
|
||||
]
|
||||
for key in resolved_keys:
|
||||
task = active_tasks.pop(key)
|
||||
pay_time = member_visits[key[1]]
|
||||
with app_conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""UPDATE biz.coach_tasks SET status = 'resolved', updated_at = %s
|
||||
WHERE id = %s AND status = 'active'""",
|
||||
(day_dt, task["id"]),
|
||||
)
|
||||
self._history(cur, task["id"], "customer_returned", "active", "resolved",
|
||||
task["task_type"], task["task_type"],
|
||||
{"service_time": str(pay_time), "simulated": True})
|
||||
stats["resolved"] += 1
|
||||
|
||||
app_conn.commit()
|
||||
|
||||
@staticmethod
|
||||
def _history(cur, task_id, action, old_status, new_status, old_task_type, new_task_type, detail=None):
|
||||
if task_id is None:
|
||||
return
|
||||
cur.execute(
|
||||
"""INSERT INTO biz.coach_task_history
|
||||
(task_id, action, old_status, new_status, old_task_type, new_task_type, detail)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s)""",
|
||||
(task_id, action, old_status, new_status, old_task_type, new_task_type,
|
||||
json.dumps(detail) if detail else None),
|
||||
)
|
||||
|
||||
def _clean_date_range(self, app_conn, tz, start_date: date, end_date: date):
|
||||
"""清理指定日期范围内的旧任务数据(保留范围外的)。"""
|
||||
range_start = datetime(start_date.year, start_date.month, start_date.day, 0, 0, 0, tzinfo=tz)
|
||||
range_end = datetime(end_date.year, end_date.month, end_date.day, 23, 59, 59, tzinfo=tz)
|
||||
with app_conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"DELETE FROM biz.coach_task_history WHERE task_id IN (SELECT id FROM biz.coach_tasks WHERE created_at >= %s AND created_at <= %s)",
|
||||
(range_start, range_end),
|
||||
)
|
||||
h = cur.rowcount
|
||||
cur.execute(
|
||||
"DELETE FROM biz.recall_events WHERE created_at >= %s AND created_at <= %s",
|
||||
(range_start, range_end),
|
||||
)
|
||||
e = cur.rowcount
|
||||
cur.execute(
|
||||
"DELETE FROM biz.coach_tasks WHERE created_at >= %s AND created_at <= %s",
|
||||
(range_start, range_end),
|
||||
)
|
||||
t = cur.rowcount
|
||||
app_conn.commit()
|
||||
if t > 0 or e > 0:
|
||||
self.logger.info("DWS_TASK_ENGINE [推演]: 清理 %s~%s 旧数据: %d history, %d events, %d tasks", start_date, end_date, h, e, t)
|
||||
|
||||
def _load_existing_active_tasks(self, app_conn, site_id: int, before_date: date = None) -> dict:
|
||||
"""加载数据库中已有的 active 任务到内存字典。
|
||||
|
||||
before_date: 只加载 created_at < before_date 的任务,避免加载推演范围之后的"未来"任务。
|
||||
"""
|
||||
active_tasks = {}
|
||||
tz = ZoneInfo(self.config.get("app.timezone", "Asia/Shanghai"))
|
||||
with app_conn.cursor() as cur:
|
||||
if before_date:
|
||||
cutoff = datetime(before_date.year, before_date.month, before_date.day, 0, 0, 0, tzinfo=tz)
|
||||
cur.execute(
|
||||
"""SELECT id, assistant_id, member_id, task_type, created_at, expires_at, priority_score
|
||||
FROM biz.coach_tasks
|
||||
WHERE site_id = %s AND status = 'active' AND created_at < %s""",
|
||||
(site_id, cutoff),
|
||||
)
|
||||
else:
|
||||
cur.execute(
|
||||
"""SELECT id, assistant_id, member_id, task_type, created_at, expires_at, priority_score
|
||||
FROM biz.coach_tasks
|
||||
WHERE site_id = %s AND status = 'active'""",
|
||||
(site_id,),
|
||||
)
|
||||
for row in cur.fetchall():
|
||||
key = (row[1], row[2])
|
||||
active_tasks[key] = {
|
||||
"id": row[0], "task_type": row[3],
|
||||
"created_at": row[4], "expires_at": row[5],
|
||||
"priority": float(row[6]) if row[6] else 0,
|
||||
}
|
||||
app_conn.commit()
|
||||
return active_tasks
|
||||
|
||||
151
apps/etl/connectors/feiqiu/tasks/utility/index_backfill_task.py
Normal file
151
apps/etl/connectors/feiqiu/tasks/utility/index_backfill_task.py
Normal file
@@ -0,0 +1,151 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
指数日快照回填任务(DWS_INDEX_BACKFILL)
|
||||
|
||||
逐天调用 RelationIndexTask / WinbackIndexTask / NewconvIndexTask,
|
||||
为 3 张指数表生成历史日快照。
|
||||
|
||||
CHANGE 2026-04-12 | 性能优化:
|
||||
- 任务实例复用(创建 1 次,循环 N 天复用)
|
||||
- 减少 765 次 Task 初始化和参数表查询
|
||||
|
||||
CLI 用法:
|
||||
python -m cli.main --tasks DWS_INDEX_BACKFILL \\
|
||||
--window-start 2025-08-01 --window-end 2026-04-11
|
||||
|
||||
admin-web:在 ETL 任务配置页面选择 DWS_INDEX_BACKFILL,设置时间窗口。
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import time
|
||||
from datetime import date, datetime, timedelta
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from ..base_task import BaseTask, TaskContext
|
||||
from ..dws.index.relation_index_task import RelationIndexTask
|
||||
from ..dws.index.winback_index_task import WinbackIndexTask
|
||||
from ..dws.index.newconv_index_task import NewconvIndexTask
|
||||
|
||||
|
||||
class IndexBackfillTask(BaseTask):
|
||||
"""指数日快照回填工具任务。"""
|
||||
|
||||
def get_task_code(self) -> str:
|
||||
return "DWS_INDEX_BACKFILL"
|
||||
|
||||
def execute(self, context: Optional[TaskContext] = None) -> Dict[str, Any]:
|
||||
"""主流程:解析日期范围,逐天执行 3 个指数任务。"""
|
||||
start_date, end_date = self._parse_date_range(context)
|
||||
store_id = self._resolve_store_id(context)
|
||||
|
||||
total_days = (end_date - start_date).days + 1
|
||||
self.logger.info(
|
||||
"DWS_INDEX_BACKFILL: %s ~ %s (%d天), store_id=%s",
|
||||
start_date, end_date, total_days, store_id,
|
||||
)
|
||||
|
||||
# 创建 1 次实例,循环复用(避免 765 次 __init__ + 参数查询)
|
||||
task_instances = [
|
||||
RelationIndexTask(self.config, self.db, self.api, self.logger),
|
||||
WinbackIndexTask(self.config, self.db, self.api, self.logger),
|
||||
NewconvIndexTask(self.config, self.db, self.api, self.logger),
|
||||
]
|
||||
task_names = ["RS", "WBI", "NCI"]
|
||||
completed = 0
|
||||
errors = 0
|
||||
t0 = time.time()
|
||||
|
||||
current = start_date
|
||||
while current <= end_date:
|
||||
ctx = self._build_day_context(current, store_id)
|
||||
day_num = (current - start_date).days + 1
|
||||
day_t0 = time.time()
|
||||
|
||||
for i, task in enumerate(task_instances):
|
||||
try:
|
||||
task.execute(ctx)
|
||||
completed += 1
|
||||
except Exception:
|
||||
self.logger.exception(
|
||||
"DWS_INDEX_BACKFILL: %s %s 失败",
|
||||
task.__class__.__name__, current,
|
||||
)
|
||||
errors += 1
|
||||
self.logger.info(
|
||||
"DWS_INDEX_BACKFILL: %s [%d/%d] %s (%d/3)",
|
||||
current, day_num, total_days, task_names[i], i + 1,
|
||||
)
|
||||
|
||||
elapsed = time.time() - day_t0
|
||||
total_elapsed_so_far = time.time() - t0
|
||||
avg_per_day = total_elapsed_so_far / day_num
|
||||
eta = avg_per_day * (total_days - day_num)
|
||||
self.logger.info(
|
||||
"DWS_INDEX_BACKFILL: %s [%d/%d %.0f%%] %.1fs/天 ETA %.0fs",
|
||||
current, day_num, total_days, day_num / total_days * 100,
|
||||
elapsed, eta,
|
||||
)
|
||||
|
||||
current += timedelta(days=1)
|
||||
|
||||
total_elapsed = time.time() - t0
|
||||
self.logger.info(
|
||||
"DWS_INDEX_BACKFILL 完成: %d/%d 成功, %d 失败, %.0fs",
|
||||
completed, total_days * 3, errors, total_elapsed,
|
||||
)
|
||||
|
||||
return {
|
||||
"status": "SUCCESS" if errors == 0 else "PARTIAL",
|
||||
"counts": {
|
||||
"days": total_days,
|
||||
"completed": completed,
|
||||
"errors": errors,
|
||||
"elapsed_sec": round(total_elapsed, 1),
|
||||
},
|
||||
}
|
||||
|
||||
def _parse_date_range(self, context: Optional[TaskContext]) -> tuple[date, date]:
|
||||
"""从 config 或 context 解析日期范围。"""
|
||||
wo = self.config.get("run.window_override") or {}
|
||||
start_str = wo.get("start")
|
||||
end_str = wo.get("end")
|
||||
|
||||
if start_str and end_str:
|
||||
return self._parse_date(start_str), self._parse_date(end_str)
|
||||
|
||||
if context and context.window_start and context.window_end:
|
||||
return context.window_start.date(), context.window_end.date()
|
||||
|
||||
raise ValueError(
|
||||
"DWS_INDEX_BACKFILL 需要指定日期范围。"
|
||||
"CLI: --window-start 2025-08-01 --window-end 2026-04-11"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _parse_date(s) -> date:
|
||||
if isinstance(s, date) and not isinstance(s, datetime):
|
||||
return s
|
||||
if isinstance(s, datetime):
|
||||
return s.date()
|
||||
return date.fromisoformat(str(s).strip()[:10])
|
||||
|
||||
def _resolve_store_id(self, context: Optional[TaskContext]) -> int:
|
||||
if context and getattr(context, "store_id", None):
|
||||
return int(context.store_id)
|
||||
sid = self.config.get("app.store_id")
|
||||
if sid:
|
||||
return int(sid)
|
||||
raise ValueError("DWS_INDEX_BACKFILL 需要 store_id")
|
||||
|
||||
def _build_day_context(self, d: date, store_id: int) -> TaskContext:
|
||||
from zoneinfo import ZoneInfo
|
||||
tz = ZoneInfo(self.config.get("app.timezone", "Asia/Shanghai"))
|
||||
as_of = datetime(d.year, d.month, d.day, 23, 59, 0, tzinfo=tz)
|
||||
window_start = as_of - timedelta(days=90)
|
||||
return TaskContext(
|
||||
store_id=store_id,
|
||||
window_start=window_start,
|
||||
window_end=as_of,
|
||||
window_minutes=int((as_of - window_start).total_seconds() / 60),
|
||||
as_of_date=as_of,
|
||||
)
|
||||
473
apps/etl/connectors/feiqiu/tasks/utility/task_simulation_task.py
Normal file
473
apps/etl/connectors/feiqiu/tasks/utility/task_simulation_task.py
Normal file
@@ -0,0 +1,473 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
历史任务推演任务(DWS_TASK_SIMULATION)
|
||||
|
||||
基于指数日快照,逐天重放 task_generator + recall_detector 逻辑,
|
||||
还原完整的任务生命周期。
|
||||
|
||||
CLI 用法:
|
||||
python -m cli.main --tasks DWS_TASK_SIMULATION \\
|
||||
--window-start 2025-08-01 --window-end 2026-03-28
|
||||
|
||||
admin-web:在 ETL 任务配置页面选择 DWS_TASK_SIMULATION,设置时间窗口。
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from datetime import date, datetime, timedelta
|
||||
from decimal import Decimal
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
import psycopg2
|
||||
|
||||
from ..base_task import BaseTask, TaskContext
|
||||
|
||||
# 导入 task_generator 纯函数(后端代码)
|
||||
_BACKEND = Path(__file__).resolve().parents[5] / "backend"
|
||||
if str(_BACKEND) not in sys.path:
|
||||
sys.path.insert(0, str(_BACKEND))
|
||||
|
||||
from app.services.task_generator import (
|
||||
IndexData,
|
||||
determine_task_type,
|
||||
should_replace_task,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# 推演截止日期(现有 active 任务从 03-29 开始)
|
||||
CUTOFF_DATE = date(2026, 3, 28)
|
||||
FOLLOW_UP_HOURS = 48
|
||||
|
||||
|
||||
class TaskSimulationTask(BaseTask):
|
||||
"""历史任务推演工具任务。"""
|
||||
|
||||
def get_task_code(self) -> str:
|
||||
return "DWS_TASK_SIMULATION"
|
||||
|
||||
def execute(self, context: Optional[TaskContext] = None) -> Dict[str, Any]:
|
||||
"""主流程:解析日期范围,逐天推演。"""
|
||||
start_date, end_date = self._parse_date_range(context)
|
||||
|
||||
if end_date > CUTOFF_DATE:
|
||||
self.logger.warning(
|
||||
"end_date %s 超过截止日期 %s,自动截断", end_date, CUTOFF_DATE
|
||||
)
|
||||
end_date = CUTOFF_DATE
|
||||
|
||||
tz = ZoneInfo(self.config.get("app.timezone", "Asia/Shanghai"))
|
||||
|
||||
# ETL 库连接复用 self.db.conn
|
||||
etl_conn = self.db.conn
|
||||
|
||||
# 业务库需要独立连接
|
||||
app_dsn = os.environ.get("APP_DB_DSN")
|
||||
if not app_dsn:
|
||||
raise ValueError("DWS_TASK_SIMULATION 需要 APP_DB_DSN 环境变量")
|
||||
app_conn = psycopg2.connect(app_dsn)
|
||||
app_conn.set_client_encoding("UTF8")
|
||||
|
||||
site_id = self._get_site_id(etl_conn)
|
||||
total_days = (end_date - start_date).days + 1
|
||||
|
||||
self.logger.info(
|
||||
"DWS_TASK_SIMULATION: %s ~ %s (%d天), site_id=%s",
|
||||
start_date, end_date, total_days, site_id,
|
||||
)
|
||||
|
||||
# 清理截止日期前的旧数据(模拟数据可安全清理)
|
||||
self._clean_before_cutoff(app_conn, CUTOFF_DATE)
|
||||
|
||||
active_tasks: dict[tuple[int, int], dict] = {}
|
||||
stats = {
|
||||
"created": 0, "completed": 0, "overridden": 0,
|
||||
"expired": 0, "follow_up_created": 0,
|
||||
"recall_events": 0, "skipped_no_snapshot": 0,
|
||||
}
|
||||
|
||||
t0 = time.time()
|
||||
current = start_date
|
||||
while current <= end_date:
|
||||
snapshot = self._load_snapshot(etl_conn, site_id, current)
|
||||
if not snapshot["relation"] and not snapshot["wbi"] and not snapshot["nci"]:
|
||||
stats["skipped_no_snapshot"] += 1
|
||||
current += timedelta(days=1)
|
||||
continue
|
||||
|
||||
self._simulate_day(
|
||||
app_conn, etl_conn, site_id, current, tz,
|
||||
snapshot, active_tasks, stats,
|
||||
)
|
||||
|
||||
day_num = (current - start_date).days + 1
|
||||
if day_num % 30 == 0 or current == end_date:
|
||||
elapsed = time.time() - t0
|
||||
self.logger.info(
|
||||
"DWS_TASK_SIMULATION: %s (%d/%d) 创建=%d 完成=%d 覆盖=%d 过期=%d %.0fs",
|
||||
current, day_num, total_days,
|
||||
stats["created"], stats["completed"],
|
||||
stats["overridden"], stats["expired"], elapsed,
|
||||
)
|
||||
|
||||
current += timedelta(days=1)
|
||||
|
||||
total_elapsed = time.time() - t0
|
||||
self.logger.info(
|
||||
"DWS_TASK_SIMULATION 完成: %.0fs, 创建=%d 完成=%d 覆盖=%d 过期=%d 回访=%d 事件=%d 跳过=%d active=%d",
|
||||
total_elapsed, stats["created"], stats["completed"],
|
||||
stats["overridden"], stats["expired"],
|
||||
stats["follow_up_created"], stats["recall_events"],
|
||||
stats["skipped_no_snapshot"], len(active_tasks),
|
||||
)
|
||||
|
||||
app_conn.close()
|
||||
|
||||
return {
|
||||
"status": "SUCCESS",
|
||||
"counts": stats,
|
||||
}
|
||||
|
||||
# ── 日期解析 ──
|
||||
|
||||
def _parse_date_range(self, context: Optional[TaskContext]) -> tuple[date, date]:
|
||||
wo = self.config.get("run.window_override") or {}
|
||||
start_str = wo.get("start")
|
||||
end_str = wo.get("end")
|
||||
if start_str and end_str:
|
||||
return self._parse_date(start_str), self._parse_date(end_str)
|
||||
if context and context.window_start and context.window_end:
|
||||
return context.window_start.date(), context.window_end.date()
|
||||
raise ValueError(
|
||||
"DWS_TASK_SIMULATION 需要指定日期范围。"
|
||||
"CLI: --window-start 2025-08-01 --window-end 2026-03-28"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _parse_date(s) -> date:
|
||||
if isinstance(s, date) and not isinstance(s, datetime):
|
||||
return s
|
||||
if isinstance(s, datetime):
|
||||
return s.date()
|
||||
return date.fromisoformat(str(s).strip()[:10])
|
||||
|
||||
def _get_site_id(self, etl_conn) -> int:
|
||||
with etl_conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"SELECT DISTINCT site_id FROM dws.dws_member_assistant_relation_index LIMIT 1"
|
||||
)
|
||||
row = cur.fetchone()
|
||||
etl_conn.commit()
|
||||
if not row:
|
||||
raise RuntimeError("relation_index 表为空,请先运行 DWS_INDEX_BACKFILL")
|
||||
return row[0]
|
||||
|
||||
# ── 数据加载 ──
|
||||
|
||||
def _load_snapshot(self, etl_conn, site_id: int, stat_date: date) -> dict:
|
||||
result = {"relation": {}, "wbi": {}, "nci": {}}
|
||||
with etl_conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""SELECT assistant_id, member_id, rs_display, os_label, session_count
|
||||
FROM dws.dws_member_assistant_relation_index
|
||||
WHERE site_id = %s AND stat_date = %s""",
|
||||
(site_id, stat_date),
|
||||
)
|
||||
for r in cur.fetchall():
|
||||
result["relation"][(r[0], r[1])] = {
|
||||
"rs": Decimal(str(r[2])), "os_label": r[3], "session_count": r[4],
|
||||
}
|
||||
cur.execute(
|
||||
"""SELECT member_id, display_score FROM dws.dws_member_winback_index
|
||||
WHERE site_id = %s AND stat_date = %s""",
|
||||
(site_id, stat_date),
|
||||
)
|
||||
for r in cur.fetchall():
|
||||
result["wbi"][r[0]] = Decimal(str(r[1])) if r[1] else Decimal(0)
|
||||
cur.execute(
|
||||
"""SELECT member_id, display_score FROM dws.dws_member_newconv_index
|
||||
WHERE site_id = %s AND stat_date = %s""",
|
||||
(site_id, stat_date),
|
||||
)
|
||||
for r in cur.fetchall():
|
||||
result["nci"][r[0]] = Decimal(str(r[1])) if r[1] else Decimal(0)
|
||||
etl_conn.commit()
|
||||
return result
|
||||
|
||||
def _load_settlements(self, etl_conn, site_id: int, d: date) -> dict:
|
||||
"""加载当天结算 → {(assistant_id, member_id): pay_time}"""
|
||||
tz = ZoneInfo(self.config.get("app.timezone", "Asia/Shanghai"))
|
||||
day_start = datetime(d.year, d.month, d.day, 0, 0, 0, tzinfo=tz)
|
||||
day_end = day_start + timedelta(days=1)
|
||||
settlements = {}
|
||||
with etl_conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""SELECT sl.site_assistant_id, sh.member_id, MAX(sh.pay_time)
|
||||
FROM dwd.dwd_settlement_head sh
|
||||
JOIN dwd.dwd_assistant_service_log sl
|
||||
ON sl.order_settle_id = sh.order_settle_id AND sl.is_delete = 0
|
||||
WHERE sh.site_id = %s AND sh.settle_type IN (1,3)
|
||||
AND sh.pay_time >= %s AND sh.pay_time < %s
|
||||
GROUP BY sl.site_assistant_id, sh.member_id""",
|
||||
(site_id, day_start, day_end),
|
||||
)
|
||||
for r in cur.fetchall():
|
||||
if r[0] and r[1]:
|
||||
settlements[(r[0], r[1])] = r[2]
|
||||
etl_conn.commit()
|
||||
return settlements
|
||||
|
||||
# ── 模拟逻辑 ──
|
||||
|
||||
def _simulate_day(
|
||||
self, app_conn, etl_conn, site_id, d, tz,
|
||||
snapshot, active_tasks, stats,
|
||||
):
|
||||
day_dt = datetime(d.year, d.month, d.day, 7, 0, 0, tzinfo=tz)
|
||||
|
||||
# 1. 过期检测
|
||||
expired_keys = [
|
||||
k for k, t in active_tasks.items()
|
||||
if t.get("expires_at") and t["expires_at"] < day_dt
|
||||
]
|
||||
for key in expired_keys:
|
||||
task = active_tasks.pop(key)
|
||||
stats["expired"] += 1
|
||||
with app_conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"UPDATE biz.coach_tasks SET status = 'expired', updated_at = %s WHERE id = %s",
|
||||
(day_dt, task["id"]),
|
||||
)
|
||||
self._history(cur, task["id"], "expired", "active", "expired",
|
||||
task["task_type"], task["task_type"],
|
||||
{"simulated": True})
|
||||
|
||||
# 2. 任务生成(冲突覆盖)
|
||||
relation = snapshot["relation"]
|
||||
wbi_map = snapshot["wbi"]
|
||||
nci_map = snapshot["nci"]
|
||||
|
||||
ownership_pairs = [
|
||||
(aid, mid, info)
|
||||
for (aid, mid), info in relation.items()
|
||||
if info["os_label"] in ("MAIN", "COMANAGE") and info["session_count"] > 0
|
||||
]
|
||||
|
||||
for aid, mid, info in ownership_pairs:
|
||||
wbi = wbi_map.get(mid, Decimal(0))
|
||||
nci = nci_map.get(mid, Decimal(0))
|
||||
rs = info["rs"]
|
||||
|
||||
new_type = determine_task_type(IndexData(
|
||||
site_id=site_id, assistant_id=aid, member_id=mid,
|
||||
wbi=wbi, nci=nci, rs=rs,
|
||||
has_active_recall=False, has_follow_up_note=False,
|
||||
))
|
||||
if not new_type:
|
||||
continue
|
||||
|
||||
key = (aid, mid)
|
||||
existing = active_tasks.get(key)
|
||||
priority = float(max(wbi, nci)) if new_type in (
|
||||
"high_priority_recall", "priority_recall"
|
||||
) else float(rs)
|
||||
|
||||
if existing:
|
||||
if existing["task_type"] == new_type:
|
||||
continue # 同类型跳过
|
||||
|
||||
if existing["task_type"] == "follow_up_visit":
|
||||
# follow_up_visit 保留宽限期,填 expires_at,新建高优先任务
|
||||
with app_conn.cursor() as cur:
|
||||
if not existing.get("expires_at"):
|
||||
cur.execute(
|
||||
"""UPDATE biz.coach_tasks
|
||||
SET expires_at = created_at + INTERVAL '48 hours', updated_at = %s
|
||||
WHERE id = %s""",
|
||||
(day_dt, existing["id"]),
|
||||
)
|
||||
self._history(cur, existing["id"], "expires_at_filled",
|
||||
"active", "active",
|
||||
"follow_up_visit", "follow_up_visit",
|
||||
{"reason": "higher_priority_task_created", "simulated": True})
|
||||
# 新建高优先任务
|
||||
cur.execute(
|
||||
"""INSERT INTO biz.coach_tasks
|
||||
(site_id, assistant_id, member_id, task_type, status,
|
||||
priority_score, parent_task_id, created_at, updated_at)
|
||||
VALUES (%s, %s, %s, %s, 'active', %s, %s, %s, %s)
|
||||
RETURNING id""",
|
||||
(site_id, aid, mid, new_type, priority,
|
||||
existing["id"], day_dt, day_dt),
|
||||
)
|
||||
new_id = cur.fetchone()[0]
|
||||
self._history(cur, new_id, "created", None, "active",
|
||||
"follow_up_visit", new_type, {"simulated": True})
|
||||
|
||||
active_tasks[key] = {
|
||||
"id": new_id, "task_type": new_type,
|
||||
"created_at": day_dt, "expires_at": None,
|
||||
"priority": priority,
|
||||
}
|
||||
stats["created"] += 1
|
||||
else:
|
||||
# 非 follow_up:原地覆盖
|
||||
with app_conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""UPDATE biz.coach_tasks
|
||||
SET task_type = %s, priority_score = %s, updated_at = %s
|
||||
WHERE id = %s AND status = 'active'""",
|
||||
(new_type, priority, day_dt, existing["id"]),
|
||||
)
|
||||
self._history(cur, existing["id"], "type_override", "active", "active",
|
||||
existing["task_type"], new_type,
|
||||
{"old_priority": existing.get("priority"), "simulated": True})
|
||||
|
||||
existing["task_type"] = new_type
|
||||
existing["priority"] = priority
|
||||
stats["overridden"] += 1
|
||||
else:
|
||||
# 新建任务
|
||||
with app_conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""INSERT INTO biz.coach_tasks
|
||||
(site_id, assistant_id, member_id, task_type, status,
|
||||
priority_score, created_at, updated_at)
|
||||
VALUES (%s, %s, %s, %s, 'active', %s, %s, %s)
|
||||
RETURNING id""",
|
||||
(site_id, aid, mid, new_type, priority, day_dt, day_dt),
|
||||
)
|
||||
task_id = cur.fetchone()[0]
|
||||
self._history(cur, task_id, "created", None, "active",
|
||||
None, new_type, {"simulated": True})
|
||||
|
||||
active_tasks[key] = {
|
||||
"id": task_id, "task_type": new_type,
|
||||
"created_at": day_dt, "expires_at": None,
|
||||
"priority": priority,
|
||||
}
|
||||
stats["created"] += 1
|
||||
|
||||
# 3. 召回检测
|
||||
settlements = self._load_settlements(etl_conn, site_id, d)
|
||||
|
||||
for (aid, mid), pay_time in settlements.items():
|
||||
key = (aid, mid)
|
||||
task = active_tasks.get(key)
|
||||
|
||||
# 写 recall_event
|
||||
with app_conn.cursor() as cur:
|
||||
try:
|
||||
cur.execute(
|
||||
"""INSERT INTO biz.recall_events
|
||||
(site_id, assistant_id, member_id, pay_time,
|
||||
task_id, task_type, created_at)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s)
|
||||
ON CONFLICT (site_id, assistant_id, member_id,
|
||||
(date_trunc('day', pay_time AT TIME ZONE 'Asia/Shanghai')))
|
||||
DO NOTHING RETURNING id""",
|
||||
(site_id, aid, mid, pay_time,
|
||||
task["id"] if task else None,
|
||||
task["task_type"] if task else None,
|
||||
day_dt),
|
||||
)
|
||||
if cur.fetchone():
|
||||
stats["recall_events"] += 1
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if not task:
|
||||
continue
|
||||
if task["task_type"] not in ("high_priority_recall", "priority_recall"):
|
||||
continue
|
||||
if pay_time <= task["created_at"]:
|
||||
continue
|
||||
|
||||
# 完成召回
|
||||
with app_conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""UPDATE biz.coach_tasks
|
||||
SET status = 'completed', completed_at = %s,
|
||||
completed_task_type = %s, completion_type = 'auto', updated_at = %s
|
||||
WHERE id = %s AND status = 'active'""",
|
||||
(pay_time, task["task_type"], day_dt, task["id"]),
|
||||
)
|
||||
self._history(cur, task["id"], "completed", "active", "completed",
|
||||
task["task_type"], task["task_type"],
|
||||
{"service_time": str(pay_time), "simulated": True})
|
||||
stats["completed"] += 1
|
||||
|
||||
# 生成回访
|
||||
expires_at = pay_time + timedelta(hours=FOLLOW_UP_HOURS)
|
||||
with app_conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""INSERT INTO biz.coach_tasks
|
||||
(site_id, assistant_id, member_id, task_type, status,
|
||||
expires_at, created_at, updated_at)
|
||||
VALUES (%s, %s, %s, 'follow_up_visit', 'active', %s, %s, %s)
|
||||
RETURNING id""",
|
||||
(site_id, aid, mid, expires_at, day_dt, day_dt),
|
||||
)
|
||||
fu_id = cur.fetchone()[0]
|
||||
self._history(cur, fu_id, "created", None, "active",
|
||||
None, "follow_up_visit",
|
||||
{"reason": "recall_completed", "simulated": True})
|
||||
|
||||
active_tasks[key] = {
|
||||
"id": fu_id, "task_type": "follow_up_visit",
|
||||
"created_at": day_dt, "expires_at": expires_at,
|
||||
}
|
||||
stats["follow_up_created"] += 1
|
||||
|
||||
app_conn.commit()
|
||||
|
||||
# ── 辅助 ──
|
||||
|
||||
@staticmethod
|
||||
def _history(cur, task_id, action, old_status, new_status,
|
||||
old_task_type, new_task_type, detail=None):
|
||||
if task_id is None:
|
||||
return
|
||||
cur.execute(
|
||||
"""INSERT INTO biz.coach_task_history
|
||||
(task_id, action, old_status, new_status,
|
||||
old_task_type, new_task_type, detail)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s)""",
|
||||
(task_id, action, old_status, new_status,
|
||||
old_task_type, new_task_type,
|
||||
json.dumps(detail) if detail else None),
|
||||
)
|
||||
|
||||
def _clean_before_cutoff(self, app_conn, cutoff: date):
|
||||
"""清理截止日期前的数据(安全:只删模拟产生的历史数据)。"""
|
||||
tz = ZoneInfo(self.config.get("app.timezone", "Asia/Shanghai"))
|
||||
cutoff_dt = datetime(cutoff.year, cutoff.month, cutoff.day, 23, 59, 59, tzinfo=tz)
|
||||
with app_conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""DELETE FROM biz.coach_task_history
|
||||
WHERE task_id IN (SELECT id FROM biz.coach_tasks WHERE created_at < %s)""",
|
||||
(cutoff_dt,),
|
||||
)
|
||||
h = cur.rowcount
|
||||
cur.execute(
|
||||
"DELETE FROM biz.recall_events WHERE created_at < %s",
|
||||
(cutoff_dt,),
|
||||
)
|
||||
e = cur.rowcount
|
||||
cur.execute(
|
||||
"DELETE FROM biz.coach_tasks WHERE created_at < %s",
|
||||
(cutoff_dt,),
|
||||
)
|
||||
t = cur.rowcount
|
||||
app_conn.commit()
|
||||
if t > 0:
|
||||
self.logger.info(
|
||||
"DWS_TASK_SIMULATION: 清理旧数据 %d history, %d events, %d tasks",
|
||||
h, e, t,
|
||||
)
|
||||
Reference in New Issue
Block a user