feat: 2026-04-15~04-20 累积变更基线 — 多主线合流

主线 1: rns1-customer-coach-api + 04-miniapp-core-business 后端实施
  - 新增 GET /xcx/coaches/{id}/banner 轻量接口
  - performance/records 加 coach_id 参数 + view_board_coach 权限分流
  - coach/customer/performance/board/task 服务层重构
  - fdw_queries 结算单粒度聚合 + consumption_summary 视图统一
  - task_generator 回访宽限 72h + UPSERT 替代策略 + Step 5 保底清理
  - recall_detector settle_type=3 双重限制 + 门店级 resolved

主线 2: 小程序权限分流 + 新增 coach-service-records 管理者视角业绩明细页
  - perf-progress 共享模块去重 task-list/coach-detail 动画逻辑
  - isScattered 散客标记端到端
  - foodDetail/phoneFull/creator* 字段透传

主线 3: P19 指数回测框架 Phase 1+2
  - 3 个指数表 stat_date 日快照模式
  - 新增 DWS_INDEX_BACKFILL / DWS_TASK_SIMULATION 工具任务
  - task_engine 升级 HTTP 实时 + 推演回测双模式

主线 4: Core 维度层启用
  - 新增 CORE_DIM_SYNC 任务(DWD → core 4 维度表)
  - 修复 app 视图空查询问题

主线 5: member_project_tag 改为 LAST_30_VISITS 消费次数窗口

主线 6: 2 个迁移 SQL 已执行(stat_date + member_project_tag 新窗口)
  - schema 基线与 DDL 快照同步

主线 7: 开发机路径迁移 C:\NeoZQYY → C:\Project\NeoZQYY(约 95% 改动量)

附带: 新建运维脚本(churned_customer_report / simulate_historical_tasks /
      backfill_index_snapshots)+ tools/task-analysis/ 任务分析工具

合计 157 文件。未包含中间产物(tmp/ .playwright-mcp/ inspect-* excel/sheet 分析 txt)。
审计记录见下一个 commit。

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Neo
2026-04-20 06:32:07 +08:00
parent 79d3c2e97e
commit 2a7a5d68aa
157 changed files with 14304 additions and 3717 deletions

View File

@@ -0,0 +1,151 @@
# -*- coding: utf-8 -*-
"""
指数日快照回填任务DWS_INDEX_BACKFILL
逐天调用 RelationIndexTask / WinbackIndexTask / NewconvIndexTask
为 3 张指数表生成历史日快照。
CHANGE 2026-04-12 | 性能优化:
- 任务实例复用(创建 1 次,循环 N 天复用)
- 减少 765 次 Task 初始化和参数表查询
CLI 用法:
python -m cli.main --tasks DWS_INDEX_BACKFILL \\
--window-start 2025-08-01 --window-end 2026-04-11
admin-web在 ETL 任务配置页面选择 DWS_INDEX_BACKFILL设置时间窗口。
"""
from __future__ import annotations
import time
from datetime import date, datetime, timedelta
from typing import Any, Dict, Optional
from ..base_task import BaseTask, TaskContext
from ..dws.index.relation_index_task import RelationIndexTask
from ..dws.index.winback_index_task import WinbackIndexTask
from ..dws.index.newconv_index_task import NewconvIndexTask
class IndexBackfillTask(BaseTask):
"""指数日快照回填工具任务。"""
def get_task_code(self) -> str:
return "DWS_INDEX_BACKFILL"
def execute(self, context: Optional[TaskContext] = None) -> Dict[str, Any]:
"""主流程:解析日期范围,逐天执行 3 个指数任务。"""
start_date, end_date = self._parse_date_range(context)
store_id = self._resolve_store_id(context)
total_days = (end_date - start_date).days + 1
self.logger.info(
"DWS_INDEX_BACKFILL: %s ~ %s (%d天), store_id=%s",
start_date, end_date, total_days, store_id,
)
# 创建 1 次实例,循环复用(避免 765 次 __init__ + 参数查询)
task_instances = [
RelationIndexTask(self.config, self.db, self.api, self.logger),
WinbackIndexTask(self.config, self.db, self.api, self.logger),
NewconvIndexTask(self.config, self.db, self.api, self.logger),
]
task_names = ["RS", "WBI", "NCI"]
completed = 0
errors = 0
t0 = time.time()
current = start_date
while current <= end_date:
ctx = self._build_day_context(current, store_id)
day_num = (current - start_date).days + 1
day_t0 = time.time()
for i, task in enumerate(task_instances):
try:
task.execute(ctx)
completed += 1
except Exception:
self.logger.exception(
"DWS_INDEX_BACKFILL: %s %s 失败",
task.__class__.__name__, current,
)
errors += 1
self.logger.info(
"DWS_INDEX_BACKFILL: %s [%d/%d] %s (%d/3)",
current, day_num, total_days, task_names[i], i + 1,
)
elapsed = time.time() - day_t0
total_elapsed_so_far = time.time() - t0
avg_per_day = total_elapsed_so_far / day_num
eta = avg_per_day * (total_days - day_num)
self.logger.info(
"DWS_INDEX_BACKFILL: %s [%d/%d %.0f%%] %.1fs/天 ETA %.0fs",
current, day_num, total_days, day_num / total_days * 100,
elapsed, eta,
)
current += timedelta(days=1)
total_elapsed = time.time() - t0
self.logger.info(
"DWS_INDEX_BACKFILL 完成: %d/%d 成功, %d 失败, %.0fs",
completed, total_days * 3, errors, total_elapsed,
)
return {
"status": "SUCCESS" if errors == 0 else "PARTIAL",
"counts": {
"days": total_days,
"completed": completed,
"errors": errors,
"elapsed_sec": round(total_elapsed, 1),
},
}
def _parse_date_range(self, context: Optional[TaskContext]) -> tuple[date, date]:
"""从 config 或 context 解析日期范围。"""
wo = self.config.get("run.window_override") or {}
start_str = wo.get("start")
end_str = wo.get("end")
if start_str and end_str:
return self._parse_date(start_str), self._parse_date(end_str)
if context and context.window_start and context.window_end:
return context.window_start.date(), context.window_end.date()
raise ValueError(
"DWS_INDEX_BACKFILL 需要指定日期范围。"
"CLI: --window-start 2025-08-01 --window-end 2026-04-11"
)
@staticmethod
def _parse_date(s) -> date:
if isinstance(s, date) and not isinstance(s, datetime):
return s
if isinstance(s, datetime):
return s.date()
return date.fromisoformat(str(s).strip()[:10])
def _resolve_store_id(self, context: Optional[TaskContext]) -> int:
if context and getattr(context, "store_id", None):
return int(context.store_id)
sid = self.config.get("app.store_id")
if sid:
return int(sid)
raise ValueError("DWS_INDEX_BACKFILL 需要 store_id")
def _build_day_context(self, d: date, store_id: int) -> TaskContext:
from zoneinfo import ZoneInfo
tz = ZoneInfo(self.config.get("app.timezone", "Asia/Shanghai"))
as_of = datetime(d.year, d.month, d.day, 23, 59, 0, tzinfo=tz)
window_start = as_of - timedelta(days=90)
return TaskContext(
store_id=store_id,
window_start=window_start,
window_end=as_of,
window_minutes=int((as_of - window_start).total_seconds() / 60),
as_of_date=as_of,
)

View File

@@ -0,0 +1,473 @@
# -*- coding: utf-8 -*-
"""
历史任务推演任务DWS_TASK_SIMULATION
基于指数日快照,逐天重放 task_generator + recall_detector 逻辑,
还原完整的任务生命周期。
CLI 用法:
python -m cli.main --tasks DWS_TASK_SIMULATION \\
--window-start 2025-08-01 --window-end 2026-03-28
admin-web在 ETL 任务配置页面选择 DWS_TASK_SIMULATION设置时间窗口。
"""
from __future__ import annotations
import json
import logging
import os
import sys
import time
from datetime import date, datetime, timedelta
from decimal import Decimal
from pathlib import Path
from typing import Any, Dict, Optional
from zoneinfo import ZoneInfo
import psycopg2
from ..base_task import BaseTask, TaskContext
# 导入 task_generator 纯函数(后端代码)
_BACKEND = Path(__file__).resolve().parents[5] / "backend"
if str(_BACKEND) not in sys.path:
sys.path.insert(0, str(_BACKEND))
from app.services.task_generator import (
IndexData,
determine_task_type,
should_replace_task,
)
logger = logging.getLogger(__name__)
# 推演截止日期(现有 active 任务从 03-29 开始)
CUTOFF_DATE = date(2026, 3, 28)
FOLLOW_UP_HOURS = 48
class TaskSimulationTask(BaseTask):
"""历史任务推演工具任务。"""
def get_task_code(self) -> str:
return "DWS_TASK_SIMULATION"
def execute(self, context: Optional[TaskContext] = None) -> Dict[str, Any]:
"""主流程:解析日期范围,逐天推演。"""
start_date, end_date = self._parse_date_range(context)
if end_date > CUTOFF_DATE:
self.logger.warning(
"end_date %s 超过截止日期 %s,自动截断", end_date, CUTOFF_DATE
)
end_date = CUTOFF_DATE
tz = ZoneInfo(self.config.get("app.timezone", "Asia/Shanghai"))
# ETL 库连接复用 self.db.conn
etl_conn = self.db.conn
# 业务库需要独立连接
app_dsn = os.environ.get("APP_DB_DSN")
if not app_dsn:
raise ValueError("DWS_TASK_SIMULATION 需要 APP_DB_DSN 环境变量")
app_conn = psycopg2.connect(app_dsn)
app_conn.set_client_encoding("UTF8")
site_id = self._get_site_id(etl_conn)
total_days = (end_date - start_date).days + 1
self.logger.info(
"DWS_TASK_SIMULATION: %s ~ %s (%d天), site_id=%s",
start_date, end_date, total_days, site_id,
)
# 清理截止日期前的旧数据(模拟数据可安全清理)
self._clean_before_cutoff(app_conn, CUTOFF_DATE)
active_tasks: dict[tuple[int, int], dict] = {}
stats = {
"created": 0, "completed": 0, "overridden": 0,
"expired": 0, "follow_up_created": 0,
"recall_events": 0, "skipped_no_snapshot": 0,
}
t0 = time.time()
current = start_date
while current <= end_date:
snapshot = self._load_snapshot(etl_conn, site_id, current)
if not snapshot["relation"] and not snapshot["wbi"] and not snapshot["nci"]:
stats["skipped_no_snapshot"] += 1
current += timedelta(days=1)
continue
self._simulate_day(
app_conn, etl_conn, site_id, current, tz,
snapshot, active_tasks, stats,
)
day_num = (current - start_date).days + 1
if day_num % 30 == 0 or current == end_date:
elapsed = time.time() - t0
self.logger.info(
"DWS_TASK_SIMULATION: %s (%d/%d) 创建=%d 完成=%d 覆盖=%d 过期=%d %.0fs",
current, day_num, total_days,
stats["created"], stats["completed"],
stats["overridden"], stats["expired"], elapsed,
)
current += timedelta(days=1)
total_elapsed = time.time() - t0
self.logger.info(
"DWS_TASK_SIMULATION 完成: %.0fs, 创建=%d 完成=%d 覆盖=%d 过期=%d 回访=%d 事件=%d 跳过=%d active=%d",
total_elapsed, stats["created"], stats["completed"],
stats["overridden"], stats["expired"],
stats["follow_up_created"], stats["recall_events"],
stats["skipped_no_snapshot"], len(active_tasks),
)
app_conn.close()
return {
"status": "SUCCESS",
"counts": stats,
}
# ── 日期解析 ──
def _parse_date_range(self, context: Optional[TaskContext]) -> tuple[date, date]:
wo = self.config.get("run.window_override") or {}
start_str = wo.get("start")
end_str = wo.get("end")
if start_str and end_str:
return self._parse_date(start_str), self._parse_date(end_str)
if context and context.window_start and context.window_end:
return context.window_start.date(), context.window_end.date()
raise ValueError(
"DWS_TASK_SIMULATION 需要指定日期范围。"
"CLI: --window-start 2025-08-01 --window-end 2026-03-28"
)
@staticmethod
def _parse_date(s) -> date:
if isinstance(s, date) and not isinstance(s, datetime):
return s
if isinstance(s, datetime):
return s.date()
return date.fromisoformat(str(s).strip()[:10])
def _get_site_id(self, etl_conn) -> int:
with etl_conn.cursor() as cur:
cur.execute(
"SELECT DISTINCT site_id FROM dws.dws_member_assistant_relation_index LIMIT 1"
)
row = cur.fetchone()
etl_conn.commit()
if not row:
raise RuntimeError("relation_index 表为空,请先运行 DWS_INDEX_BACKFILL")
return row[0]
# ── 数据加载 ──
def _load_snapshot(self, etl_conn, site_id: int, stat_date: date) -> dict:
result = {"relation": {}, "wbi": {}, "nci": {}}
with etl_conn.cursor() as cur:
cur.execute(
"""SELECT assistant_id, member_id, rs_display, os_label, session_count
FROM dws.dws_member_assistant_relation_index
WHERE site_id = %s AND stat_date = %s""",
(site_id, stat_date),
)
for r in cur.fetchall():
result["relation"][(r[0], r[1])] = {
"rs": Decimal(str(r[2])), "os_label": r[3], "session_count": r[4],
}
cur.execute(
"""SELECT member_id, display_score FROM dws.dws_member_winback_index
WHERE site_id = %s AND stat_date = %s""",
(site_id, stat_date),
)
for r in cur.fetchall():
result["wbi"][r[0]] = Decimal(str(r[1])) if r[1] else Decimal(0)
cur.execute(
"""SELECT member_id, display_score FROM dws.dws_member_newconv_index
WHERE site_id = %s AND stat_date = %s""",
(site_id, stat_date),
)
for r in cur.fetchall():
result["nci"][r[0]] = Decimal(str(r[1])) if r[1] else Decimal(0)
etl_conn.commit()
return result
def _load_settlements(self, etl_conn, site_id: int, d: date) -> dict:
"""加载当天结算 → {(assistant_id, member_id): pay_time}"""
tz = ZoneInfo(self.config.get("app.timezone", "Asia/Shanghai"))
day_start = datetime(d.year, d.month, d.day, 0, 0, 0, tzinfo=tz)
day_end = day_start + timedelta(days=1)
settlements = {}
with etl_conn.cursor() as cur:
cur.execute(
"""SELECT sl.site_assistant_id, sh.member_id, MAX(sh.pay_time)
FROM dwd.dwd_settlement_head sh
JOIN dwd.dwd_assistant_service_log sl
ON sl.order_settle_id = sh.order_settle_id AND sl.is_delete = 0
WHERE sh.site_id = %s AND sh.settle_type IN (1,3)
AND sh.pay_time >= %s AND sh.pay_time < %s
GROUP BY sl.site_assistant_id, sh.member_id""",
(site_id, day_start, day_end),
)
for r in cur.fetchall():
if r[0] and r[1]:
settlements[(r[0], r[1])] = r[2]
etl_conn.commit()
return settlements
# ── 模拟逻辑 ──
def _simulate_day(
self, app_conn, etl_conn, site_id, d, tz,
snapshot, active_tasks, stats,
):
day_dt = datetime(d.year, d.month, d.day, 7, 0, 0, tzinfo=tz)
# 1. 过期检测
expired_keys = [
k for k, t in active_tasks.items()
if t.get("expires_at") and t["expires_at"] < day_dt
]
for key in expired_keys:
task = active_tasks.pop(key)
stats["expired"] += 1
with app_conn.cursor() as cur:
cur.execute(
"UPDATE biz.coach_tasks SET status = 'expired', updated_at = %s WHERE id = %s",
(day_dt, task["id"]),
)
self._history(cur, task["id"], "expired", "active", "expired",
task["task_type"], task["task_type"],
{"simulated": True})
# 2. 任务生成(冲突覆盖)
relation = snapshot["relation"]
wbi_map = snapshot["wbi"]
nci_map = snapshot["nci"]
ownership_pairs = [
(aid, mid, info)
for (aid, mid), info in relation.items()
if info["os_label"] in ("MAIN", "COMANAGE") and info["session_count"] > 0
]
for aid, mid, info in ownership_pairs:
wbi = wbi_map.get(mid, Decimal(0))
nci = nci_map.get(mid, Decimal(0))
rs = info["rs"]
new_type = determine_task_type(IndexData(
site_id=site_id, assistant_id=aid, member_id=mid,
wbi=wbi, nci=nci, rs=rs,
has_active_recall=False, has_follow_up_note=False,
))
if not new_type:
continue
key = (aid, mid)
existing = active_tasks.get(key)
priority = float(max(wbi, nci)) if new_type in (
"high_priority_recall", "priority_recall"
) else float(rs)
if existing:
if existing["task_type"] == new_type:
continue # 同类型跳过
if existing["task_type"] == "follow_up_visit":
# follow_up_visit 保留宽限期,填 expires_at新建高优先任务
with app_conn.cursor() as cur:
if not existing.get("expires_at"):
cur.execute(
"""UPDATE biz.coach_tasks
SET expires_at = created_at + INTERVAL '48 hours', updated_at = %s
WHERE id = %s""",
(day_dt, existing["id"]),
)
self._history(cur, existing["id"], "expires_at_filled",
"active", "active",
"follow_up_visit", "follow_up_visit",
{"reason": "higher_priority_task_created", "simulated": True})
# 新建高优先任务
cur.execute(
"""INSERT INTO biz.coach_tasks
(site_id, assistant_id, member_id, task_type, status,
priority_score, parent_task_id, created_at, updated_at)
VALUES (%s, %s, %s, %s, 'active', %s, %s, %s, %s)
RETURNING id""",
(site_id, aid, mid, new_type, priority,
existing["id"], day_dt, day_dt),
)
new_id = cur.fetchone()[0]
self._history(cur, new_id, "created", None, "active",
"follow_up_visit", new_type, {"simulated": True})
active_tasks[key] = {
"id": new_id, "task_type": new_type,
"created_at": day_dt, "expires_at": None,
"priority": priority,
}
stats["created"] += 1
else:
# 非 follow_up原地覆盖
with app_conn.cursor() as cur:
cur.execute(
"""UPDATE biz.coach_tasks
SET task_type = %s, priority_score = %s, updated_at = %s
WHERE id = %s AND status = 'active'""",
(new_type, priority, day_dt, existing["id"]),
)
self._history(cur, existing["id"], "type_override", "active", "active",
existing["task_type"], new_type,
{"old_priority": existing.get("priority"), "simulated": True})
existing["task_type"] = new_type
existing["priority"] = priority
stats["overridden"] += 1
else:
# 新建任务
with app_conn.cursor() as cur:
cur.execute(
"""INSERT INTO biz.coach_tasks
(site_id, assistant_id, member_id, task_type, status,
priority_score, created_at, updated_at)
VALUES (%s, %s, %s, %s, 'active', %s, %s, %s)
RETURNING id""",
(site_id, aid, mid, new_type, priority, day_dt, day_dt),
)
task_id = cur.fetchone()[0]
self._history(cur, task_id, "created", None, "active",
None, new_type, {"simulated": True})
active_tasks[key] = {
"id": task_id, "task_type": new_type,
"created_at": day_dt, "expires_at": None,
"priority": priority,
}
stats["created"] += 1
# 3. 召回检测
settlements = self._load_settlements(etl_conn, site_id, d)
for (aid, mid), pay_time in settlements.items():
key = (aid, mid)
task = active_tasks.get(key)
# 写 recall_event
with app_conn.cursor() as cur:
try:
cur.execute(
"""INSERT INTO biz.recall_events
(site_id, assistant_id, member_id, pay_time,
task_id, task_type, created_at)
VALUES (%s, %s, %s, %s, %s, %s, %s)
ON CONFLICT (site_id, assistant_id, member_id,
(date_trunc('day', pay_time AT TIME ZONE 'Asia/Shanghai')))
DO NOTHING RETURNING id""",
(site_id, aid, mid, pay_time,
task["id"] if task else None,
task["task_type"] if task else None,
day_dt),
)
if cur.fetchone():
stats["recall_events"] += 1
except Exception:
pass
if not task:
continue
if task["task_type"] not in ("high_priority_recall", "priority_recall"):
continue
if pay_time <= task["created_at"]:
continue
# 完成召回
with app_conn.cursor() as cur:
cur.execute(
"""UPDATE biz.coach_tasks
SET status = 'completed', completed_at = %s,
completed_task_type = %s, completion_type = 'auto', updated_at = %s
WHERE id = %s AND status = 'active'""",
(pay_time, task["task_type"], day_dt, task["id"]),
)
self._history(cur, task["id"], "completed", "active", "completed",
task["task_type"], task["task_type"],
{"service_time": str(pay_time), "simulated": True})
stats["completed"] += 1
# 生成回访
expires_at = pay_time + timedelta(hours=FOLLOW_UP_HOURS)
with app_conn.cursor() as cur:
cur.execute(
"""INSERT INTO biz.coach_tasks
(site_id, assistant_id, member_id, task_type, status,
expires_at, created_at, updated_at)
VALUES (%s, %s, %s, 'follow_up_visit', 'active', %s, %s, %s)
RETURNING id""",
(site_id, aid, mid, expires_at, day_dt, day_dt),
)
fu_id = cur.fetchone()[0]
self._history(cur, fu_id, "created", None, "active",
None, "follow_up_visit",
{"reason": "recall_completed", "simulated": True})
active_tasks[key] = {
"id": fu_id, "task_type": "follow_up_visit",
"created_at": day_dt, "expires_at": expires_at,
}
stats["follow_up_created"] += 1
app_conn.commit()
# ── 辅助 ──
@staticmethod
def _history(cur, task_id, action, old_status, new_status,
old_task_type, new_task_type, detail=None):
if task_id is None:
return
cur.execute(
"""INSERT INTO biz.coach_task_history
(task_id, action, old_status, new_status,
old_task_type, new_task_type, detail)
VALUES (%s, %s, %s, %s, %s, %s, %s)""",
(task_id, action, old_status, new_status,
old_task_type, new_task_type,
json.dumps(detail) if detail else None),
)
def _clean_before_cutoff(self, app_conn, cutoff: date):
"""清理截止日期前的数据(安全:只删模拟产生的历史数据)。"""
tz = ZoneInfo(self.config.get("app.timezone", "Asia/Shanghai"))
cutoff_dt = datetime(cutoff.year, cutoff.month, cutoff.day, 23, 59, 59, tzinfo=tz)
with app_conn.cursor() as cur:
cur.execute(
"""DELETE FROM biz.coach_task_history
WHERE task_id IN (SELECT id FROM biz.coach_tasks WHERE created_at < %s)""",
(cutoff_dt,),
)
h = cur.rowcount
cur.execute(
"DELETE FROM biz.recall_events WHERE created_at < %s",
(cutoff_dt,),
)
e = cur.rowcount
cur.execute(
"DELETE FROM biz.coach_tasks WHERE created_at < %s",
(cutoff_dt,),
)
t = cur.rowcount
app_conn.commit()
if t > 0:
self.logger.info(
"DWS_TASK_SIMULATION: 清理旧数据 %d history, %d events, %d tasks",
h, e, t,
)