Files
Neo-ZQYY/apps/etl/connectors/feiqiu/tasks/dws/task_engine.py
Neo 2a7a5d68aa feat: 2026-04-15~04-20 累积变更基线 — 多主线合流
主线 1: rns1-customer-coach-api + 04-miniapp-core-business 后端实施
  - 新增 GET /xcx/coaches/{id}/banner 轻量接口
  - performance/records 加 coach_id 参数 + view_board_coach 权限分流
  - coach/customer/performance/board/task 服务层重构
  - fdw_queries 结算单粒度聚合 + consumption_summary 视图统一
  - task_generator 回访宽限 72h + UPSERT 替代策略 + Step 5 保底清理
  - recall_detector settle_type=3 双重限制 + 门店级 resolved

主线 2: 小程序权限分流 + 新增 coach-service-records 管理者视角业绩明细页
  - perf-progress 共享模块去重 task-list/coach-detail 动画逻辑
  - isScattered 散客标记端到端
  - foodDetail/phoneFull/creator* 字段透传

主线 3: P19 指数回测框架 Phase 1+2
  - 3 个指数表 stat_date 日快照模式
  - 新增 DWS_INDEX_BACKFILL / DWS_TASK_SIMULATION 工具任务
  - task_engine 升级 HTTP 实时 + 推演回测双模式

主线 4: Core 维度层启用
  - 新增 CORE_DIM_SYNC 任务(DWD → core 4 维度表)
  - 修复 app 视图空查询问题

主线 5: member_project_tag 改为 LAST_30_VISITS 消费次数窗口

主线 6: 2 个迁移 SQL 已执行(stat_date + member_project_tag 新窗口)
  - schema 基线与 DDL 快照同步

主线 7: 开发机路径迁移 C:\NeoZQYY → C:\Project\NeoZQYY(约 95% 改动量)

附带: 新建运维脚本(churned_customer_report / simulate_historical_tasks /
      backfill_index_snapshots)+ tools/task-analysis/ 任务分析工具

合计 157 文件。未包含中间产物(tmp/ .playwright-mcp/ inspect-* excel/sheet 分析 txt)。
审计记录见下一个 commit。

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-20 06:32:07 +08:00

804 lines
37 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# AI_CHANGELOG
# - 2026-03-29 | Prompt: DWS_TASK_ENGINE ETL 任务 | 新建文件。
# 编排任务引擎全流程:完成检查 → 过期检查 → 任务生成。
# 通过 HTTP 调用后端 POST /api/internal/run-job 按 job_name 执行。
# - 2026-04-12 | 合并 DWS_TASK_SIMULATION有时间窗口时走推演模式
# 无时间窗口时走原来的 HTTP 模式。
# -*- coding: utf-8 -*-
"""
DWS 任务引擎编排任务DWS_TASK_ENGINE
双模式:
- 无时间窗口(日常 Flow通过 HTTP 调用后端任务引擎
1. recall_completion_check — 检测召回完成
2. task_expiry_check — 标<><E6A087>超时任务
3. task_generator — 根据指数生成/替换任务
- 有时间窗口(历史推演):基于指数日快照逐天重放任务生命周期
需先运行 DWS_INDEX_BACKFILL 生成历史快照
"""
from __future__ import annotations
import json
import logging
import os
import sys
import time
from datetime import date, datetime, timedelta
from decimal import Decimal
from pathlib import Path
from typing import Any, Dict, Optional
from zoneinfo import ZoneInfo
import requests
from dotenv import load_dotenv
from ..base_task import BaseTask, TaskContext
# 加载根 .env
_REPO_ROOT = Path(__file__).resolve().parents[6]
load_dotenv(_REPO_ROOT / ".env", override=False)
logger = logging.getLogger(__name__)
_TIMEOUT = (5, 30)
# HTTP 模式<E6A8A1><E5BC8F><EFBFBD>按顺序执行的后端任务
_JOB_SEQUENCE = [
"recall_completion_check",
"task_expiry_check",
"task_generator",
]
# 推演模式<EFBC9A><E5AFBC> task_generator 纯函数
_BACKEND = _REPO_ROOT / "apps" / "backend"
if str(_BACKEND) not in sys.path:
sys.path.insert(0, str(_BACKEND))
try:
from app.services.task_generator import (
IndexData,
determine_task_type,
should_replace_task,
)
_SIMULATION_AVAILABLE = True
except ImportError:
_SIMULATION_AVAILABLE = False
# 推演截止日期(现有 active 任务从 03-29 开始)
CUTOFF_DATE = date(2026, 3, 28)
FOLLOW_UP_HOURS = 72
# ── HTTP <20><>式辅助 ──
def _run_backend_job(backend_url: str, token: str, job_name: str) -> dict:
url = f"{backend_url}/api/internal/run-job"
headers = {
"Authorization": f"Internal-Token {token}",
"Content-Type": "application/json",
}
try:
resp = requests.post(url, json={"job_name": job_name}, headers=headers, timeout=_TIMEOUT)
if resp.status_code == 200:
data = resp.json()
inner = data.get("data", data)
return {"success": inner.get("success", False), "message": inner.get("message", "")}
return {"success": False, "message": f"HTTP {resp.status_code}: {resp.text[:200]}"}
except requests.RequestException as exc:
return {"success": False, "message": str(exc)}
class DwsTaskEngineTask(BaseTask):
"""DWS 任务引擎(双模式)。
无时间窗口 → HTTP 模式(日常 Flow
有时间窗口 → 推演模式(历史回填)
"""
def get_task_code(self) -> str:
return "DWS_TASK_ENGINE"
def execute(self, context=None) -> Dict[str, Any]:
"""直接 override execute(),绕过 BaseTask 的 E/T/L 模板。
根据是否有时间窗口决定模式:
- 有窗口 → 推演模式(逐天生成+完成任务)
- 无窗口 → HTTP 模式(调用后端执行当天任务引擎)
"""
if self._has_window(context):
return self._run_simulation_mode(context)
return self._run_http_mode()
def _has_window(self, context=None) -> bool:
"""检查是否指定了时间窗口config 或 context 均可)。"""
# 优先从 configCLI --window-start/--window-end
wo = self.config.get("run.window_override") or {}
if wo.get("start") and wo.get("end"):
return True
# 其次从 contexttask_executor 构建的)
if context and hasattr(context, 'window_start') and hasattr(context, 'window_end'):
if context.window_start and context.window_end and context.window_start != context.window_end:
return True
return False
# ── HTTP 模式(日常) ──
def _run_http_mode(self) -> dict[str, Any]:
backend_url = os.environ.get("BACKEND_API_URL", "").rstrip("/")
token = os.environ.get("INTERNAL_API_TOKEN", "")
if not backend_url:
self.logger.error("DWS_TASK_ENGINE 跳过BACKEND_API_URL 未配置")
return {"skipped": True, "reason": "BACKEND_API_URL 未配置"}
if not token:
self.logger.error("DWS_TASK_ENGINE 跳过INTERNAL_API_TOKEN 未配置")
return {"skipped": True, "reason": "INTERNAL_API_TOKEN 未配置"}
results: dict[str, Any] = {}
for job_name in _JOB_SEQUENCE:
self.logger.info("DWS_TASK_ENGINE: 执行 %s ...", job_name)
result = _run_backend_job(backend_url, token, job_name)
success = result.get("success", False)
message = result.get("message", "")
results[job_name] = {"success": success, "message": message}
if success:
self.logger.info("DWS_TASK_ENGINE: %s 成功 — %s", job_name, message)
else:
self.logger.warning("DWS_TASK_ENGINE: %s 失败 — %s", job_name, message)
return results
# ── 推演模式(历史) ──
def _run_simulation_mode(self, context: Optional[TaskContext]) -> dict[str, Any]:
if not _SIMULATION_AVAILABLE:
raise RuntimeError("推演模式不可用:无法导入 app.services.task_generator")
import psycopg2
start_date, end_date = self._parse_date_range(context)
tz = ZoneInfo(self.config.get("app.timezone", "Asia/Shanghai"))
etl_conn = self.db.conn
app_dsn = os.environ.get("APP_DB_DSN")
if not app_dsn:
raise ValueError("推演模式需要 APP_DB_DSN 环境变量")
app_conn = psycopg2.connect(app_dsn)
app_conn.set_client_encoding("UTF8")
site_id = self._get_site_id(etl_conn)
total_days = (end_date - start_date).days + 1
self.logger.info(
"DWS_TASK_ENGINE [推演模式]: %s ~ %s (%d天), site_id=%s",
start_date, end_date, total_days, site_id,
)
# 清理指定范围内的旧数据,保留范围外的
self._clean_date_range(app_conn, tz, start_date, end_date)
# 加载推演范围之前就存在的 active 任务(不加载范围之后的"未来"任务)
active_tasks = self._load_existing_active_tasks(app_conn, site_id, before_date=start_date)
self.logger.info("DWS_TASK_ENGINE [推演]: 范围前已有 active 任务 %d", len(active_tasks))
stats = {
"created": 0, "completed": 0, "resolved": 0, "overridden": 0,
"expired": 0, "follow_up_created": 0,
"recall_events": 0, "skipped_no_snapshot": 0,
}
# 预加载全量数据,按日分片(避免 255 x 5 次逐日查询 -> 5 次全量查询)
self.logger.info("DWS_TASK_ENGINE [推演]: 预加载快照 + 结算数据 ...")
snapshots_by_date = self._bulk_load_snapshots(etl_conn, site_id, start_date, end_date)
settlements_by_date = self._bulk_load_settlements(etl_conn, site_id, start_date, end_date, tz)
member_visits_by_date = self._bulk_load_member_visits(etl_conn, site_id, start_date, end_date, tz)
self.logger.info(
"DWS_TASK_ENGINE [推演]: 预加载完成, %d 天有快照, %d 天有助教结算, %d 天有到店记录",
len(snapshots_by_date), len(settlements_by_date), len(member_visits_by_date),
)
# 加载任务生成参数(与日常 task_generator 保持一致)
task_params = self._load_task_generator_params(app_conn, site_id)
self.logger.info(
"DWS_TASK_ENGINE [推演]: 任务阈值 high=%.1f, normal=%.1f, rs=[%.1f, %.1f)",
task_params["high_threshold"], task_params["normal_threshold"],
task_params["rs_min"], task_params["rs_max"],
)
t0 = time.time()
current = start_date
while current <= end_date:
snapshot = snapshots_by_date.get(current, {"relation": {}, "wbi": {}, "nci": {}})
if not snapshot["relation"] and not snapshot["wbi"] and not snapshot["nci"]:
stats["skipped_no_snapshot"] += 1
current += timedelta(days=1)
continue
day_settlements = settlements_by_date.get(current, {})
day_visits = member_visits_by_date.get(current, {})
self._simulate_day(app_conn, etl_conn, site_id, current, tz, snapshot, active_tasks, stats,
preloaded_settlements=day_settlements, preloaded_visits=day_visits,
task_params=task_params)
day_num = (current - start_date).days + 1
if day_num % 30 == 0 or current == end_date:
elapsed = time.time() - t0
self.logger.info(
"DWS_TASK_ENGINE [推演]: %s (%d/%d) 创建=%d 完成=%d 解除=%d 覆盖=%d 过期=%d %.0fs",
current, day_num, total_days,
stats["created"], stats["completed"], stats["resolved"],
stats["overridden"], stats["expired"], elapsed,
)
current += timedelta(days=1)
# 收尾:清理推演结束后仍 active 但 expires_at 已过期的任务
now_dt = datetime.now(tz)
cleanup_count = 0
with app_conn.cursor() as cur:
cur.execute(
"""UPDATE biz.coach_tasks SET status = 'expired', updated_at = %s
WHERE site_id = %s AND status = 'active'
AND expires_at IS NOT NULL AND expires_at < %s
RETURNING id, task_type""",
(now_dt, site_id, now_dt),
)
for task_id, task_type in cur.fetchall():
self._history(cur, task_id, "expired", "active", "expired",
task_type, task_type, {"reason": "post_simulation_cleanup"})
cleanup_count += 1
app_conn.commit()
stats["expired"] += cleanup_count
if cleanup_count:
self.logger.info("DWS_TASK_ENGINE [推演]: 收尾清理 %d 个已过期任务", cleanup_count)
total_elapsed = time.time() - t0
self.logger.info(
"DWS_TASK_ENGINE [推演] 完成: %.0fs, 创建=%d 完成=%d 解除=%d 覆盖=%d 过期=%d 回访=%d 事件=%d 跳过=%d active=%d",
total_elapsed, stats["created"], stats["completed"], stats["resolved"],
stats["overridden"], stats["expired"],
stats["follow_up_created"], stats["recall_events"],
stats["skipped_no_snapshot"], len(active_tasks),
)
app_conn.close()
# 推演完成后触发日常流程recall_detector + task_generator
# 让最新到店数据触发召回完成、POOL 过滤清理存量
self.logger.info("DWS_TASK_ENGINE [推演]: 触发日常流程 ...")
try:
http_result = self._run_http_mode()
self.logger.info("DWS_TASK_ENGINE [推演]: 日常流程完成 %s", http_result)
except Exception:
self.logger.exception("DWS_TASK_ENGINE [推演]: 日常流程触发失败(不影响推演结果)")
return {
"status": "SUCCESS",
"counts": {
# 框架标准字段(总结框显示用)
"inserted": stats["created"],
"updated": stats["overridden"],
"skipped": stats["skipped_no_snapshot"],
"errors": 0,
# 原始明细
**stats,
},
}
# ── 推演辅助方法 ──
def _parse_date_range(self, context: Optional[TaskContext]) -> tuple[date, date]:
wo = self.config.get("run.window_override") or {}
start_str = wo.get("start")
end_str = wo.get("end")
if start_str and end_str:
return self._parse_date(start_str), self._parse_date(end_str)
if context and context.window_start and context.window_end:
return context.window_start.date(), context.window_end.date()
raise ValueError("推演模式需要指定时间窗口")
@staticmethod
def _parse_date(s) -> date:
if isinstance(s, date) and not isinstance(s, datetime):
return s
if isinstance(s, datetime):
return s.date()
return date.fromisoformat(str(s).strip()[:10])
def _get_site_id(self, etl_conn) -> int:
with etl_conn.cursor() as cur:
cur.execute("SELECT DISTINCT site_id FROM dws.dws_member_assistant_relation_index LIMIT 1")
row = cur.fetchone()
etl_conn.commit()
if not row:
raise RuntimeError("relation_index 表为空,请先运行 DWS_INDEX_BACKFILL")
return row[0]
def _load_snapshot(self, etl_conn, site_id: int, stat_date: date) -> dict:
result = {"relation": {}, "wbi": {}, "nci": {}}
with etl_conn.cursor() as cur:
cur.execute(
"""SELECT assistant_id, member_id, rs_display, os_label, session_count
FROM dws.dws_member_assistant_relation_index
WHERE site_id = %s AND stat_date = %s""",
(site_id, stat_date),
)
for r in cur.fetchall():
result["relation"][(r[0], r[1])] = {
"rs": Decimal(str(r[2])), "os_label": r[3], "session_count": r[4],
}
# WBI同时记录 status 用于过滤老客 NCI
old_members = set()
cur.execute(
"""SELECT member_id, display_score, status FROM dws.dws_member_winback_index
WHERE site_id = %s AND stat_date = %s""",
(site_id, stat_date),
)
for r in cur.fetchall():
result["wbi"][r[0]] = Decimal(str(r[1])) if r[1] else Decimal(0)
if r[2] == "OLD":
old_members.add(r[0])
# NCI排除已转老客避免使用过时高分
cur.execute(
"""SELECT member_id, display_score FROM dws.dws_member_newconv_index
WHERE site_id = %s AND stat_date = %s""",
(site_id, stat_date),
)
for r in cur.fetchall():
if r[0] not in old_members:
result["nci"][r[0]] = Decimal(str(r[1])) if r[1] else Decimal(0)
etl_conn.commit()
return result
def _load_settlements(self, etl_conn, site_id: int, d: date) -> dict:
"""助教级结算settle_type=1 全部计入settle_type=3 仅 BONUS 服务。"""
tz = ZoneInfo(self.config.get("app.timezone", "Asia/Shanghai"))
day_start = datetime(d.year, d.month, d.day, 0, 0, 0, tzinfo=tz)
day_end = day_start + timedelta(days=1)
settlements = {}
with etl_conn.cursor() as cur:
cur.execute(
"""SELECT sl.site_assistant_id, sh.member_id, MAX(sh.pay_time)
FROM dwd.dwd_settlement_head sh
JOIN dwd.dwd_assistant_service_log sl
ON sl.order_settle_id = sh.order_settle_id AND sl.is_delete = 0
WHERE sh.site_id = %s
AND (sh.settle_type = 1 OR (sh.settle_type = 3 AND sl.order_assistant_type = 2))
AND sh.pay_time >= %s AND sh.pay_time < %s
GROUP BY sl.site_assistant_id, sh.member_id""",
(site_id, day_start, day_end),
)
for r in cur.fetchall():
if r[0] and r[1]:
settlements[(r[0], r[1])] = r[2]
etl_conn.commit()
return settlements
def _load_member_visits(self, etl_conn, site_id: int, d: date) -> dict:
"""门店级到店检测:含无助教服务的 settle_type=1用于 resolved 判定。"""
tz = ZoneInfo(self.config.get("app.timezone", "Asia/Shanghai"))
day_start = datetime(d.year, d.month, d.day, 0, 0, 0, tzinfo=tz)
day_end = day_start + timedelta(days=1)
visits = {}
with etl_conn.cursor() as cur:
cur.execute(
"""SELECT sh.member_id, MAX(sh.pay_time)
FROM dwd.dwd_settlement_head sh
WHERE sh.site_id = %s
AND (
sh.settle_type = 1
OR (sh.settle_type = 3 AND EXISTS (
SELECT 1 FROM dwd.dwd_assistant_service_log sl
WHERE sl.order_settle_id = sh.order_settle_id
AND sl.is_delete = 0
AND sl.order_assistant_type = 2
))
)
AND sh.pay_time >= %s AND sh.pay_time < %s
GROUP BY sh.member_id""",
(site_id, day_start, day_end),
)
for r in cur.fetchall():
if r[0]:
visits[r[0]] = r[1]
etl_conn.commit()
return visits
def _bulk_load_settlements(self, etl_conn, site_id: int, start: date, end: date, tz) -> dict:
"""一次查全量助教级结算,按日分片返回 {date: {(aid,mid): pay_time}}。"""
from collections import defaultdict
day_start = datetime(start.year, start.month, start.day, 0, 0, 0, tzinfo=tz)
day_end = datetime(end.year, end.month, end.day, 0, 0, 0, tzinfo=tz) + timedelta(days=1)
result = defaultdict(dict)
with etl_conn.cursor() as cur:
cur.execute(
"""SELECT sl.site_assistant_id, sh.member_id, sh.pay_time
FROM dwd.dwd_settlement_head sh
JOIN dwd.dwd_assistant_service_log sl
ON sl.order_settle_id = sh.order_settle_id AND sl.is_delete = 0
WHERE sh.site_id = %s
AND (sh.settle_type = 1 OR (sh.settle_type = 3 AND sl.order_assistant_type = 2))
AND sh.pay_time >= %s AND sh.pay_time < %s""",
(site_id, day_start, day_end),
)
for aid, mid, pay_time in cur.fetchall():
if aid and mid:
d_key = pay_time.astimezone(tz).date()
existing = result[d_key].get((aid, mid))
if existing is None or pay_time > existing:
result[d_key][(aid, mid)] = pay_time
etl_conn.commit()
return dict(result)
def _bulk_load_member_visits(self, etl_conn, site_id: int, start: date, end: date, tz) -> dict:
"""一次查全量门店级到店,按日分片返回 {date: {mid: pay_time}}。"""
from collections import defaultdict
day_start = datetime(start.year, start.month, start.day, 0, 0, 0, tzinfo=tz)
day_end = datetime(end.year, end.month, end.day, 0, 0, 0, tzinfo=tz) + timedelta(days=1)
result = defaultdict(dict)
with etl_conn.cursor() as cur:
cur.execute(
"""SELECT sh.member_id, sh.pay_time
FROM dwd.dwd_settlement_head sh
WHERE sh.site_id = %s
AND (
sh.settle_type = 1
OR (sh.settle_type = 3 AND EXISTS (
SELECT 1 FROM dwd.dwd_assistant_service_log sl
WHERE sl.order_settle_id = sh.order_settle_id
AND sl.is_delete = 0
AND sl.order_assistant_type = 2
))
)
AND sh.pay_time >= %s AND sh.pay_time < %s""",
(site_id, day_start, day_end),
)
for mid, pay_time in cur.fetchall():
if mid:
d_key = pay_time.astimezone(tz).date()
existing = result[d_key].get(mid)
if existing is None or pay_time > existing:
result[d_key][mid] = pay_time
etl_conn.commit()
return dict(result)
@staticmethod
def _load_task_generator_params(app_conn, site_id: int) -> dict:
"""从 cfg_task_generator_params 加载任务生成阈值,与日常 task_generator 保持一致。"""
defaults = {
"high_threshold": 7.5,
"normal_threshold": 4.0,
"rs_min": 1.0,
"rs_max": 6.0,
}
key_map = {
"high_priority_recall_threshold": "high_threshold",
"priority_recall_threshold": "normal_threshold",
"rs_min_for_relationship": "rs_min",
"rs_max_for_relationship": "rs_max",
}
with app_conn.cursor() as cur:
cur.execute("SELECT param_key, param_value FROM biz.cfg_task_generator_params")
for param_key, param_value in cur.fetchall():
mapped = key_map.get(param_key)
if mapped:
defaults[mapped] = float(param_value)
app_conn.commit()
return defaults
def _bulk_load_snapshots(self, etl_conn, site_id: int, start: date, end: date) -> dict:
"""一次查全量指数快照relation/wbi/nci按日分片返回 {date: snapshot_dict}。"""
from collections import defaultdict
result = defaultdict(lambda: {"relation": {}, "wbi": {}, "nci": {}})
with etl_conn.cursor() as cur:
# relation_index
cur.execute(
"""SELECT stat_date, assistant_id, member_id, rs_display, os_label, session_count
FROM dws.dws_member_assistant_relation_index
WHERE site_id = %s AND stat_date >= %s AND stat_date <= %s""",
(site_id, start, end),
)
for sd, aid, mid, rs, os_label, sc in cur.fetchall():
result[sd]["relation"][(aid, mid)] = {
"rs": Decimal(str(rs)), "os_label": os_label, "session_count": sc,
}
# WBI同时收集 OLD 状态用于过滤 NCI
old_members_by_date = defaultdict(set)
cur.execute(
"""SELECT stat_date, member_id, display_score, status
FROM dws.dws_member_winback_index
WHERE site_id = %s AND stat_date >= %s AND stat_date <= %s""",
(site_id, start, end),
)
for sd, mid, score, status in cur.fetchall():
result[sd]["wbi"][mid] = Decimal(str(score)) if score else Decimal(0)
if status == "OLD":
old_members_by_date[sd].add(mid)
# NCI排除已转老客
cur.execute(
"""SELECT stat_date, member_id, display_score
FROM dws.dws_member_newconv_index
WHERE site_id = %s AND stat_date >= %s AND stat_date <= %s""",
(site_id, start, end),
)
for sd, mid, score in cur.fetchall():
if mid not in old_members_by_date.get(sd, set()):
result[sd]["nci"][mid] = Decimal(str(score)) if score else Decimal(0)
etl_conn.commit()
return dict(result)
def _simulate_day(self, app_conn, etl_conn, site_id, d, tz, snapshot, active_tasks, stats,
*, preloaded_settlements=None, preloaded_visits=None, task_params=None):
day_dt = datetime(d.year, d.month, d.day, 7, 0, 0, tzinfo=tz)
# 1. 过期检测
expired_keys = [k for k, t in active_tasks.items() if t.get("expires_at") and t["expires_at"] < day_dt]
for key in expired_keys:
task = active_tasks.pop(key)
stats["expired"] += 1
with app_conn.cursor() as cur:
cur.execute("UPDATE biz.coach_tasks SET status = 'expired', updated_at = %s WHERE id = %s", (day_dt, task["id"]))
self._history(cur, task["id"], "expired", "active", "expired", task["task_type"], task["task_type"], {"simulated": True})
# 2. 任务生成<EFBC88><E6B7B7><EFBFBD>冲突策略
relation = snapshot["relation"]
wbi_map = snapshot["wbi"]
nci_map = snapshot["nci"]
ownership_pairs = [
(aid, mid, info)
for (aid, mid), info in relation.items()
if info["os_label"] in ("MAIN", "COMANAGE") and info["session_count"] > 0
]
for aid, mid, info in ownership_pairs:
wbi = wbi_map.get(mid, Decimal(0))
nci = nci_map.get(mid, Decimal(0))
rs = info["rs"]
# 参数化任务判定(与日常 task_generator._process_pair 保持一致)
priority_score = max(wbi, nci)
if task_params:
ht = Decimal(str(task_params["high_threshold"]))
nt = Decimal(str(task_params["normal_threshold"]))
rs_min = Decimal(str(task_params["rs_min"]))
rs_max = Decimal(str(task_params["rs_max"]))
else:
ht, nt, rs_min, rs_max = Decimal(7), Decimal(5), Decimal(1), Decimal(6)
if priority_score > ht:
new_type = "high_priority_recall"
elif priority_score > nt:
new_type = "priority_recall"
elif rs > rs_min and rs < rs_max:
new_type = "relationship_building"
else:
new_type = None
if not new_type:
continue
key = (aid, mid)
existing = active_tasks.get(key)
priority = float(max(wbi, nci)) if new_type in ("high_priority_recall", "priority_recall") else float(rs)
if existing:
if existing["task_type"] == new_type:
continue
if existing["task_type"] == "follow_up_visit":
# follow_up_visit 保留宽限期 + 新建高优先任务
with app_conn.cursor() as cur:
if not existing.get("expires_at"):
cur.execute(
"UPDATE biz.coach_tasks SET expires_at = created_at + INTERVAL '72 hours', updated_at = %s WHERE id = %s",
(day_dt, existing["id"]),
)
self._history(cur, existing["id"], "expires_at_filled", "active", "active",
"follow_up_visit", "follow_up_visit",
{"reason": "higher_priority_task_created", "simulated": True})
cur.execute(
"""INSERT INTO biz.coach_tasks
(site_id, assistant_id, member_id, task_type, status,
priority_score, parent_task_id, created_at, updated_at)
VALUES (%s, %s, %s, %s, 'active', %s, %s, %s, %s)
ON CONFLICT (site_id, assistant_id, member_id, task_type) WHERE (status = 'active')
DO UPDATE SET priority_score = EXCLUDED.priority_score, updated_at = EXCLUDED.updated_at
RETURNING id""",
(site_id, aid, mid, new_type, priority, existing["id"], day_dt, day_dt),
)
new_id = cur.fetchone()[0]
self._history(cur, new_id, "created", None, "active", "follow_up_visit", new_type, {"simulated": True})
active_tasks[key] = {"id": new_id, "task_type": new_type, "created_at": day_dt, "expires_at": None, "priority": priority}
stats["created"] += 1
else:
# 非 follow_up原地覆盖
with app_conn.cursor() as cur:
# 先关闭可能冲突的同 new_type active 记录(避免唯一约束冲突)
cur.execute(
"""UPDATE biz.coach_tasks SET status = 'inactive', updated_at = %s
WHERE site_id = %s AND assistant_id = %s AND member_id = %s
AND task_type = %s AND status = 'active' AND id != %s""",
(day_dt, site_id, aid, mid, new_type, existing["id"]),
)
cur.execute(
"UPDATE biz.coach_tasks SET task_type = %s, priority_score = %s, updated_at = %s WHERE id = %s AND status = 'active'",
(new_type, priority, day_dt, existing["id"]),
)
self._history(cur, existing["id"], "type_override", "active", "active",
existing["task_type"], new_type,
{"old_priority": existing.get("priority"), "simulated": True})
existing["task_type"] = new_type
existing["priority"] = priority
stats["overridden"] += 1
else:
# 新建任务upsert若同类型 active 已存在则更新 priority
with app_conn.cursor() as cur:
cur.execute(
"""INSERT INTO biz.coach_tasks
(site_id, assistant_id, member_id, task_type, status,
priority_score, created_at, updated_at)
VALUES (%s, %s, %s, %s, 'active', %s, %s, %s)
ON CONFLICT (site_id, assistant_id, member_id, task_type) WHERE (status = 'active')
DO UPDATE SET priority_score = EXCLUDED.priority_score, updated_at = EXCLUDED.updated_at
RETURNING id""",
(site_id, aid, mid, new_type, priority, day_dt, day_dt),
)
task_id = cur.fetchone()[0]
self._history(cur, task_id, "created", None, "active", None, new_type, {"simulated": True})
active_tasks[key] = {"id": task_id, "task_type": new_type, "created_at": day_dt, "expires_at": None, "priority": priority}
stats["created"] += 1
# 3. 召回检测(优先使用预加载数据)
settlements = preloaded_settlements if preloaded_settlements is not None else self._load_settlements(etl_conn, site_id, d)
for (aid, mid), pay_time in settlements.items():
key = (aid, mid)
task = active_tasks.get(key)
with app_conn.cursor() as cur:
try:
cur.execute(
"""INSERT INTO biz.recall_events
(site_id, assistant_id, member_id, pay_time, task_id, task_type, created_at)
VALUES (%s, %s, %s, %s, %s, %s, %s)
ON CONFLICT (site_id, assistant_id, member_id,
(date_trunc('day', pay_time AT TIME ZONE 'Asia/Shanghai')))
DO NOTHING RETURNING id""",
(site_id, aid, mid, pay_time, task["id"] if task else None, task["task_type"] if task else None, day_dt),
)
if cur.fetchone():
stats["recall_events"] += 1
except Exception:
pass
if not task or task["task_type"] not in ("high_priority_recall", "priority_recall") or pay_time <= task["created_at"]:
continue
with app_conn.cursor() as cur:
cur.execute(
"""UPDATE biz.coach_tasks SET status = 'completed', completed_at = %s,
completed_task_type = %s, completion_type = 'auto', updated_at = %s
WHERE id = %s AND status = 'active'""",
(pay_time, task["task_type"], day_dt, task["id"]),
)
self._history(cur, task["id"], "completed", "active", "completed",
task["task_type"], task["task_type"],
{"service_time": str(pay_time), "simulated": True})
stats["completed"] += 1
expires_at = pay_time + timedelta(hours=FOLLOW_UP_HOURS)
with app_conn.cursor() as cur:
cur.execute(
"""INSERT INTO biz.coach_tasks
(site_id, assistant_id, member_id, task_type, status, expires_at, created_at, updated_at)
VALUES (%s, %s, %s, 'follow_up_visit', 'active', %s, %s, %s)
ON CONFLICT (site_id, assistant_id, member_id, task_type) WHERE (status = 'active')
DO UPDATE SET expires_at = EXCLUDED.expires_at, updated_at = EXCLUDED.updated_at
RETURNING id""",
(site_id, aid, mid, expires_at, day_dt, day_dt),
)
fu_id = cur.fetchone()[0]
self._history(cur, fu_id, "created", None, "active", None, "follow_up_visit",
{"reason": "recall_completed", "simulated": True})
active_tasks[key] = {"id": fu_id, "task_type": "follow_up_visit", "created_at": day_dt, "expires_at": expires_at}
stats["follow_up_created"] += 1
# 3b. 门店级召回解除:客户到店后,未被服务的助教任务标记 resolved
member_visits = preloaded_visits if preloaded_visits is not None else self._load_member_visits(etl_conn, site_id, d)
resolved_keys = [
k for k, t in active_tasks.items()
if k[1] in member_visits
and t["task_type"] in ("high_priority_recall", "priority_recall")
and member_visits[k[1]] > t["created_at"]
]
for key in resolved_keys:
task = active_tasks.pop(key)
pay_time = member_visits[key[1]]
with app_conn.cursor() as cur:
cur.execute(
"""UPDATE biz.coach_tasks SET status = 'resolved', updated_at = %s
WHERE id = %s AND status = 'active'""",
(day_dt, task["id"]),
)
self._history(cur, task["id"], "customer_returned", "active", "resolved",
task["task_type"], task["task_type"],
{"service_time": str(pay_time), "simulated": True})
stats["resolved"] += 1
app_conn.commit()
@staticmethod
def _history(cur, task_id, action, old_status, new_status, old_task_type, new_task_type, detail=None):
if task_id is None:
return
cur.execute(
"""INSERT INTO biz.coach_task_history
(task_id, action, old_status, new_status, old_task_type, new_task_type, detail)
VALUES (%s, %s, %s, %s, %s, %s, %s)""",
(task_id, action, old_status, new_status, old_task_type, new_task_type,
json.dumps(detail) if detail else None),
)
def _clean_date_range(self, app_conn, tz, start_date: date, end_date: date):
"""清理指定日期范围内的旧任务数据(保留范围外的)。"""
range_start = datetime(start_date.year, start_date.month, start_date.day, 0, 0, 0, tzinfo=tz)
range_end = datetime(end_date.year, end_date.month, end_date.day, 23, 59, 59, tzinfo=tz)
with app_conn.cursor() as cur:
cur.execute(
"DELETE FROM biz.coach_task_history WHERE task_id IN (SELECT id FROM biz.coach_tasks WHERE created_at >= %s AND created_at <= %s)",
(range_start, range_end),
)
h = cur.rowcount
cur.execute(
"DELETE FROM biz.recall_events WHERE created_at >= %s AND created_at <= %s",
(range_start, range_end),
)
e = cur.rowcount
cur.execute(
"DELETE FROM biz.coach_tasks WHERE created_at >= %s AND created_at <= %s",
(range_start, range_end),
)
t = cur.rowcount
app_conn.commit()
if t > 0 or e > 0:
self.logger.info("DWS_TASK_ENGINE [推演]: 清理 %s~%s 旧数据: %d history, %d events, %d tasks", start_date, end_date, h, e, t)
def _load_existing_active_tasks(self, app_conn, site_id: int, before_date: date = None) -> dict:
"""加载数据库中已有的 active 任务到内存字典。
before_date: 只加载 created_at < before_date 的任务,避免加载推演范围之后的"未来"任务。
"""
active_tasks = {}
tz = ZoneInfo(self.config.get("app.timezone", "Asia/Shanghai"))
with app_conn.cursor() as cur:
if before_date:
cutoff = datetime(before_date.year, before_date.month, before_date.day, 0, 0, 0, tzinfo=tz)
cur.execute(
"""SELECT id, assistant_id, member_id, task_type, created_at, expires_at, priority_score
FROM biz.coach_tasks
WHERE site_id = %s AND status = 'active' AND created_at < %s""",
(site_id, cutoff),
)
else:
cur.execute(
"""SELECT id, assistant_id, member_id, task_type, created_at, expires_at, priority_score
FROM biz.coach_tasks
WHERE site_id = %s AND status = 'active'""",
(site_id,),
)
for row in cur.fetchall():
key = (row[1], row[2])
active_tasks[key] = {
"id": row[0], "task_type": row[3],
"created_at": row[4], "expires_at": row[5],
"priority": float(row[6]) if row[6] else 0,
}
app_conn.commit()
return active_tasks