feat: 2026-04-15~04-20 累积变更基线 — 多主线合流
主线 1: rns1-customer-coach-api + 04-miniapp-core-business 后端实施
- 新增 GET /xcx/coaches/{id}/banner 轻量接口
- performance/records 加 coach_id 参数 + view_board_coach 权限分流
- coach/customer/performance/board/task 服务层重构
- fdw_queries 结算单粒度聚合 + consumption_summary 视图统一
- task_generator 回访宽限 72h + UPSERT 替代策略 + Step 5 保底清理
- recall_detector settle_type=3 双重限制 + 门店级 resolved
主线 2: 小程序权限分流 + 新增 coach-service-records 管理者视角业绩明细页
- perf-progress 共享模块去重 task-list/coach-detail 动画逻辑
- isScattered 散客标记端到端
- foodDetail/phoneFull/creator* 字段透传
主线 3: P19 指数回测框架 Phase 1+2
- 3 个指数表 stat_date 日快照模式
- 新增 DWS_INDEX_BACKFILL / DWS_TASK_SIMULATION 工具任务
- task_engine 升级 HTTP 实时 + 推演回测双模式
主线 4: Core 维度层启用
- 新增 CORE_DIM_SYNC 任务(DWD → core 4 维度表)
- 修复 app 视图空查询问题
主线 5: member_project_tag 改为 LAST_30_VISITS 消费次数窗口
主线 6: 2 个迁移 SQL 已执行(stat_date + member_project_tag 新窗口)
- schema 基线与 DDL 快照同步
主线 7: 开发机路径迁移 C:\NeoZQYY → C:\Project\NeoZQYY(约 95% 改动量)
附带: 新建运维脚本(churned_customer_report / simulate_historical_tasks /
backfill_index_snapshots)+ tools/task-analysis/ 任务分析工具
合计 157 文件。未包含中间产物(tmp/ .playwright-mcp/ inspect-* excel/sheet 分析 txt)。
审计记录见下一个 commit。
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -2,7 +2,7 @@
|
||||
"""历史数据回填脚本。
|
||||
|
||||
用法:
|
||||
cd C:\\NeoZQYY
|
||||
cd C:\\Project\\NeoZQYY
|
||||
uv run python scripts/ops/ai_backfill.py [--dry-run] [--batch-size 10] [--interval 5]
|
||||
|
||||
功能:
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
对已有月份批量从 DWD 服务记录聚合助教区域课时,写入 dws_coach_area_hours。
|
||||
|
||||
用法:
|
||||
cd C:\\NeoZQYY
|
||||
cd C:\\Project\\NeoZQYY
|
||||
uv run python scripts/ops/backfill_coach_area_hours.py \\
|
||||
--site-id 2790685415443269 --start-month 2025-07-01 --end-month 2026-03-01
|
||||
"""
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
回填完成后触发 DWS_FINANCE_BOARD_CACHE 重算所有已完成周期缓存。
|
||||
|
||||
用法:
|
||||
cd C:\\NeoZQYY
|
||||
cd C:\\Project\\NeoZQYY
|
||||
uv run python scripts/ops/backfill_finance_area_daily.py \\
|
||||
--site-id 1 --start-date 2025-07-16 --end-date 2026-03-28
|
||||
|
||||
|
||||
210
scripts/ops/backfill_index_snapshots.py
Normal file
210
scripts/ops/backfill_index_snapshots.py
Normal file
@@ -0,0 +1,210 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
回填脚本:为 3 张指数表生成历史日快照
|
||||
|
||||
按日期升序逐天执行 RelationIndexTask / WinbackIndexTask / NewconvIndexTask,
|
||||
写入 stat_date 列,支持多日快照共存。
|
||||
|
||||
用法:
|
||||
cd C:\\Project\\NeoZQYY
|
||||
uv run python scripts/ops/backfill_index_snapshots.py \\
|
||||
--start 2025-08-01 --end 2026-04-11
|
||||
|
||||
# 仅回填 relation_index
|
||||
uv run python scripts/ops/backfill_index_snapshots.py \\
|
||||
--start 2025-08-01 --end 2026-04-11 --tasks RELATION
|
||||
|
||||
# 干跑模式(不写数据库)
|
||||
uv run python scripts/ops/backfill_index_snapshots.py \\
|
||||
--start 2025-08-01 --end 2025-08-03 --dry-run
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
import sys
|
||||
import time
|
||||
from datetime import date, datetime, timedelta
|
||||
from pathlib import Path
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
# ── 环境加载 ──
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parent))
|
||||
from _env_paths import ensure_repo_root
|
||||
|
||||
ensure_repo_root()
|
||||
|
||||
# 切换到 ETL 目录以便导入任务模块
|
||||
import os
|
||||
_ETL_ROOT = Path(__file__).resolve().parents[2] / "apps" / "etl" / "connectors" / "feiqiu"
|
||||
sys.path.insert(0, str(_ETL_ROOT))
|
||||
os.chdir(_ETL_ROOT)
|
||||
|
||||
from dotenv import load_dotenv
|
||||
_ROOT = Path(__file__).resolve().parents[2]
|
||||
load_dotenv(_ROOT / ".env", override=False)
|
||||
load_dotenv(_ETL_ROOT / ".env", override=False)
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s [%(levelname)s] %(message)s",
|
||||
)
|
||||
logger = logging.getLogger("backfill_index")
|
||||
|
||||
# ── 导入 ETL 组件 ──
|
||||
from config.settings import AppConfig
|
||||
from database.connection import DatabaseConnection
|
||||
from tasks.base_task import TaskContext
|
||||
from tasks.dws.index.relation_index_task import RelationIndexTask
|
||||
from tasks.dws.index.winback_index_task import WinbackIndexTask
|
||||
from tasks.dws.index.newconv_index_task import NewconvIndexTask
|
||||
|
||||
TZ = ZoneInfo("Asia/Shanghai")
|
||||
|
||||
# 任务代码映射
|
||||
TASK_MAP = {
|
||||
"RELATION": RelationIndexTask,
|
||||
"WINBACK": WinbackIndexTask,
|
||||
"NEWCONV": NewconvIndexTask,
|
||||
}
|
||||
|
||||
|
||||
class DummyAPI:
|
||||
"""指数任务不需要 API,提供空壳。"""
|
||||
pass
|
||||
|
||||
|
||||
def build_context(d: date, store_id: int) -> TaskContext:
|
||||
"""为指定日期构建 TaskContext。"""
|
||||
as_of = datetime(d.year, d.month, d.day, 23, 59, 0, tzinfo=TZ)
|
||||
window_end = as_of
|
||||
window_start = as_of - timedelta(days=90)
|
||||
return TaskContext(
|
||||
store_id=store_id,
|
||||
window_start=window_start,
|
||||
window_end=window_end,
|
||||
window_minutes=int((window_end - window_start).total_seconds() / 60),
|
||||
as_of_date=as_of,
|
||||
)
|
||||
|
||||
|
||||
def run_backfill(
|
||||
start_date: date,
|
||||
end_date: date,
|
||||
task_keys: list[str],
|
||||
dry_run: bool = False,
|
||||
):
|
||||
"""执行回填主流程。"""
|
||||
# 加载配置
|
||||
config = AppConfig.load()
|
||||
|
||||
# 创建数据库连接
|
||||
db_conn = DatabaseConnection(
|
||||
dsn=config.config["db"]["dsn"],
|
||||
session=config.config["db"].get("session"),
|
||||
connect_timeout=config.config["db"].get("connect_timeout_sec"),
|
||||
)
|
||||
|
||||
store_id = config.config["app"]["store_id"]
|
||||
total_days = (end_date - start_date).days + 1
|
||||
total_tasks = total_days * len(task_keys)
|
||||
|
||||
logger.info(
|
||||
"回填参数: %s ~ %s (%d天), 任务=%s, store_id=%s, dry_run=%s",
|
||||
start_date, end_date, total_days, task_keys, store_id, dry_run,
|
||||
)
|
||||
|
||||
if dry_run:
|
||||
logger.info("[DRY RUN] 仅计算不写入数据库")
|
||||
|
||||
completed = 0
|
||||
errors = 0
|
||||
t0 = time.time()
|
||||
|
||||
current = start_date
|
||||
while current <= end_date:
|
||||
ctx = build_context(current, store_id)
|
||||
day_t0 = time.time()
|
||||
|
||||
for key in task_keys:
|
||||
task_cls = TASK_MAP[key]
|
||||
task = task_cls(config, db_conn, DummyAPI(), logger)
|
||||
|
||||
if dry_run:
|
||||
logger.info("[DRY RUN] 跳过 %s %s", key, current)
|
||||
completed += 1
|
||||
continue
|
||||
|
||||
try:
|
||||
result = task.execute(ctx)
|
||||
records = result.get("records_inserted", result.get("member_count", "?"))
|
||||
logger.info(
|
||||
" %s %s → %s 条记录",
|
||||
key, current, records,
|
||||
)
|
||||
completed += 1
|
||||
except Exception:
|
||||
logger.exception(" %s %s 失败", key, current)
|
||||
errors += 1
|
||||
# 尝试恢复连接
|
||||
try:
|
||||
db_conn.close()
|
||||
db_conn = DatabaseConnection(
|
||||
dsn=config.config["db"]["dsn"],
|
||||
session=config.config["db"].get("session"),
|
||||
connect_timeout=config.config["db"].get("connect_timeout_sec"),
|
||||
)
|
||||
# 更新任务引用
|
||||
except Exception:
|
||||
logger.exception("数据库重连失败")
|
||||
|
||||
elapsed = time.time() - day_t0
|
||||
progress = completed / total_tasks * 100 if total_tasks else 0
|
||||
logger.info(
|
||||
"✓ %s 完成 (%.1fs) [%d/%d %.0f%%]",
|
||||
current, elapsed, completed, total_tasks, progress,
|
||||
)
|
||||
|
||||
current += timedelta(days=1)
|
||||
|
||||
total_elapsed = time.time() - t0
|
||||
logger.info(
|
||||
"回填完成: %d/%d 成功, %d 失败, 耗时 %.1f 秒 (%.1f 分钟)",
|
||||
completed, total_tasks, errors, total_elapsed, total_elapsed / 60,
|
||||
)
|
||||
|
||||
db_conn.close()
|
||||
return errors == 0
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="回填指数表历史日快照")
|
||||
parser.add_argument("--start", required=True, help="起始日期 (YYYY-MM-DD)")
|
||||
parser.add_argument("--end", required=True, help="结束日期 (YYYY-MM-DD)")
|
||||
parser.add_argument(
|
||||
"--tasks",
|
||||
default="RELATION,WINBACK,NEWCONV",
|
||||
help="任务列表,逗号分隔 (RELATION/WINBACK/NEWCONV)",
|
||||
)
|
||||
parser.add_argument("--dry-run", action="store_true", help="干跑模式,不写数据库")
|
||||
args = parser.parse_args()
|
||||
|
||||
start_date = date.fromisoformat(args.start)
|
||||
end_date = date.fromisoformat(args.end)
|
||||
task_keys = [k.strip().upper() for k in args.tasks.split(",")]
|
||||
|
||||
for k in task_keys:
|
||||
if k not in TASK_MAP:
|
||||
print(f"错误:未知任务 {k},可选: {list(TASK_MAP.keys())}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
if start_date > end_date:
|
||||
print("错误:start 必须 <= end", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
success = run_backfill(start_date, end_date, task_keys, dry_run=args.dry_run)
|
||||
sys.exit(0 if success else 1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -5,7 +5,7 @@ import os
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
ROOT = Path(r"C:\NeoZQYY")
|
||||
ROOT = Path(r"C:\Project\NeoZQYY")
|
||||
ETL_AUDIT = ROOT / "apps" / "etl" / "pipelines" / "feiqiu" / "docs" / "audit" / "changes"
|
||||
ROOT_AUDIT = ROOT / "docs" / "audit" / "changes"
|
||||
|
||||
|
||||
1137
scripts/ops/churned_customer_report.py
Normal file
1137
scripts/ops/churned_customer_report.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -13,7 +13,7 @@ ETL 统一分析 — 编排入口
|
||||
--source etl-log 切换为读 ETL 落盘 JSON(一致性检查模式)
|
||||
|
||||
用法:
|
||||
cd C:\\NeoZQYY
|
||||
cd C:\\Project\\NeoZQYY
|
||||
uv run python scripts/ops/etl_unified_analysis.py
|
||||
uv run python scripts/ops/etl_unified_analysis.py --mode consistency --source etl-log
|
||||
uv run python scripts/ops/etl_unified_analysis.py --mode structure --date-from 2026-01-01
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
# 生成 ETL 全流程联调综合报告
|
||||
# 输出路径:{SYSTEM_LOG_ROOT}/{date}__etl_integration_report.md
|
||||
# 环境变量 SYSTEM_LOG_ROOT 缺失时报错终止。
|
||||
# 用法:cd C:\NeoZQYY && python scripts/ops/gen_integration_report.py
|
||||
# 用法:cd C:\Project\NeoZQYY && python scripts/ops/gen_integration_report.py
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
5. 输出可用于测试的 JWT token
|
||||
|
||||
使用方式:
|
||||
cd C:\\NeoZQYY
|
||||
cd C:\\Project\\NeoZQYY
|
||||
python scripts/ops/init_test_user.py [--openid <openid>] [--role <role_code>] [--reset]
|
||||
|
||||
环境要求:
|
||||
|
||||
524
scripts/ops/simulate_historical_tasks.py
Normal file
524
scripts/ops/simulate_historical_tasks.py
Normal file
@@ -0,0 +1,524 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
历史任务推演脚本:基于日快照指数重放 task_generator + recall_detector 逻辑
|
||||
|
||||
前置条件:先运行 backfill_index_snapshots.py 生成历史指数快照。
|
||||
|
||||
用法:
|
||||
cd C:\\Project\\NeoZQYY
|
||||
uv run python scripts/ops/simulate_historical_tasks.py \\
|
||||
--start 2025-08-01 --end 2026-03-28
|
||||
|
||||
# 干跑模式
|
||||
uv run python scripts/ops/simulate_historical_tasks.py \\
|
||||
--start 2025-08-01 --end 2026-03-28 --dry-run
|
||||
|
||||
# 清理之前的模拟数据后重跑
|
||||
uv run python scripts/ops/simulate_historical_tasks.py \\
|
||||
--start 2025-08-01 --end 2026-03-28 --clean
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import sys
|
||||
import time
|
||||
from datetime import date, datetime, timedelta
|
||||
from decimal import Decimal
|
||||
from pathlib import Path
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
# ── 环境加载 ──
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parent))
|
||||
from _env_paths import ensure_repo_root
|
||||
|
||||
ensure_repo_root()
|
||||
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
|
||||
_ROOT = Path(__file__).resolve().parents[2]
|
||||
load_dotenv(_ROOT / ".env", override=False)
|
||||
|
||||
# 导入 task_generator 的纯函数(在 backend 目录下)
|
||||
_BACKEND = _ROOT / "apps" / "backend"
|
||||
sys.path.insert(0, str(_BACKEND))
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s [%(levelname)s] %(message)s",
|
||||
)
|
||||
logger = logging.getLogger("simulate_tasks")
|
||||
|
||||
import psycopg2
|
||||
import psycopg2.extras
|
||||
|
||||
TZ = ZoneInfo("Asia/Shanghai")
|
||||
|
||||
# 导入 task_generator 纯函数
|
||||
from app.services.task_generator import (
|
||||
IndexData,
|
||||
determine_task_type,
|
||||
should_replace_task,
|
||||
TASK_TYPE_PRIORITY,
|
||||
)
|
||||
|
||||
# 推演截止日期(现有 active 任务从 03-29 开始)
|
||||
CUTOFF_DATE = date(2026, 3, 28)
|
||||
# 回访任务过期时长
|
||||
FOLLOW_UP_HOURS = 48
|
||||
|
||||
|
||||
def get_connections():
|
||||
"""获取 ETL 库和业务库连接。"""
|
||||
pg_dsn = os.environ.get("PG_DSN")
|
||||
app_dsn = os.environ.get("APP_DB_DSN")
|
||||
if not pg_dsn or not app_dsn:
|
||||
print("错误:PG_DSN 和 APP_DB_DSN 必须在 .env 中配置", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
etl_conn = psycopg2.connect(pg_dsn)
|
||||
etl_conn.set_client_encoding("UTF8")
|
||||
app_conn = psycopg2.connect(app_dsn)
|
||||
app_conn.set_client_encoding("UTF8")
|
||||
return etl_conn, app_conn
|
||||
|
||||
|
||||
def load_index_snapshot(etl_conn, site_id: int, stat_date: date) -> dict:
|
||||
"""加载指定日期的指数快照。
|
||||
|
||||
返回:
|
||||
{
|
||||
"relation": {(assistant_id, member_id): {rs, os_label, ...}},
|
||||
"wbi": {member_id: Decimal},
|
||||
"nci": {member_id: Decimal},
|
||||
}
|
||||
"""
|
||||
result = {"relation": {}, "wbi": {}, "nci": {}}
|
||||
|
||||
with etl_conn.cursor() as cur:
|
||||
# 关系指数
|
||||
cur.execute(
|
||||
"""SELECT assistant_id, member_id, rs_display, os_label, os_share, session_count
|
||||
FROM dws.dws_member_assistant_relation_index
|
||||
WHERE site_id = %s AND stat_date = %s""",
|
||||
(site_id, stat_date),
|
||||
)
|
||||
for row in cur.fetchall():
|
||||
result["relation"][(row[0], row[1])] = {
|
||||
"rs": Decimal(str(row[2])),
|
||||
"os_label": row[3],
|
||||
"os_share": Decimal(str(row[4])),
|
||||
"session_count": row[5],
|
||||
}
|
||||
|
||||
# WBI
|
||||
cur.execute(
|
||||
"""SELECT member_id, display_score
|
||||
FROM dws.dws_member_winback_index
|
||||
WHERE site_id = %s AND stat_date = %s""",
|
||||
(site_id, stat_date),
|
||||
)
|
||||
for row in cur.fetchall():
|
||||
result["wbi"][row[0]] = Decimal(str(row[1])) if row[1] else Decimal(0)
|
||||
|
||||
# NCI
|
||||
cur.execute(
|
||||
"""SELECT member_id, display_score
|
||||
FROM dws.dws_member_newconv_index
|
||||
WHERE site_id = %s AND stat_date = %s""",
|
||||
(site_id, stat_date),
|
||||
)
|
||||
for row in cur.fetchall():
|
||||
result["nci"][row[0]] = Decimal(str(row[1])) if row[1] else Decimal(0)
|
||||
|
||||
etl_conn.commit()
|
||||
return result
|
||||
|
||||
|
||||
def load_settlements_for_day(etl_conn, site_id: int, d: date) -> dict:
|
||||
"""加载指定日期的结算记录。
|
||||
|
||||
返回:{(assistant_id, member_id): latest_pay_time}
|
||||
"""
|
||||
settlements = {}
|
||||
day_start = datetime(d.year, d.month, d.day, 0, 0, 0, tzinfo=TZ)
|
||||
day_end = day_start + timedelta(days=1)
|
||||
|
||||
with etl_conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT sl.site_assistant_id AS assistant_id,
|
||||
sh.member_id,
|
||||
MAX(sh.pay_time) AS latest_pay_time
|
||||
FROM dwd.dwd_settlement_head sh
|
||||
JOIN dwd.dwd_assistant_service_log sl
|
||||
ON sl.order_settle_id = sh.order_settle_id
|
||||
AND sl.is_delete = 0
|
||||
WHERE sh.site_id = %s
|
||||
AND sh.settle_type IN (1, 3)
|
||||
AND sh.pay_time >= %s AND sh.pay_time < %s
|
||||
GROUP BY sl.site_assistant_id, sh.member_id
|
||||
""",
|
||||
(site_id, day_start, day_end),
|
||||
)
|
||||
for row in cur.fetchall():
|
||||
if row[0] and row[1]:
|
||||
settlements[(row[0], row[1])] = row[2]
|
||||
|
||||
etl_conn.commit()
|
||||
return settlements
|
||||
|
||||
|
||||
def simulate_day(
|
||||
app_conn,
|
||||
etl_conn,
|
||||
site_id: int,
|
||||
d: date,
|
||||
snapshot: dict,
|
||||
active_tasks: dict,
|
||||
stats: dict,
|
||||
dry_run: bool = False,
|
||||
):
|
||||
"""模拟单天的任务生成和召回检测。
|
||||
|
||||
active_tasks: {(assistant_id, member_id): {"id": int, "task_type": str, "created_at": datetime, "expires_at": datetime|None}}
|
||||
stats: 累计统计
|
||||
"""
|
||||
day_datetime = datetime(d.year, d.month, d.day, 7, 0, 0, tzinfo=TZ)
|
||||
|
||||
# ── 1. 过期检测(回访任务 expires_at < 当天) ──
|
||||
expired_keys = []
|
||||
for key, task in active_tasks.items():
|
||||
if task.get("expires_at") and task["expires_at"] < day_datetime:
|
||||
expired_keys.append(key)
|
||||
for key in expired_keys:
|
||||
task = active_tasks.pop(key)
|
||||
stats["expired"] += 1
|
||||
if not dry_run:
|
||||
with app_conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"UPDATE biz.coach_tasks SET status = 'expired', updated_at = %s WHERE id = %s",
|
||||
(day_datetime, task["id"]),
|
||||
)
|
||||
_insert_history(cur, task["id"], "expired", "active", "expired",
|
||||
task["task_type"], task["task_type"],
|
||||
{"reason": "follow_up_expired", "simulated": True})
|
||||
|
||||
# ── 2. 任务生成(基于指数快照) ──
|
||||
relation = snapshot["relation"]
|
||||
wbi_map = snapshot["wbi"]
|
||||
nci_map = snapshot["nci"]
|
||||
|
||||
# 收集 MAIN/COMANAGE 对
|
||||
ownership_pairs = [
|
||||
(aid, mid, info)
|
||||
for (aid, mid), info in relation.items()
|
||||
if info["os_label"] in ("MAIN", "COMANAGE") and info["session_count"] > 0
|
||||
]
|
||||
|
||||
for aid, mid, info in ownership_pairs:
|
||||
wbi = wbi_map.get(mid, Decimal(0))
|
||||
nci = nci_map.get(mid, Decimal(0))
|
||||
rs = info["rs"]
|
||||
|
||||
index_data = IndexData(
|
||||
site_id=site_id,
|
||||
assistant_id=aid,
|
||||
member_id=mid,
|
||||
wbi=wbi,
|
||||
nci=nci,
|
||||
rs=rs,
|
||||
has_active_recall=False,
|
||||
has_follow_up_note=False,
|
||||
)
|
||||
new_type = determine_task_type(index_data)
|
||||
if not new_type:
|
||||
continue
|
||||
|
||||
key = (aid, mid)
|
||||
existing = active_tasks.get(key)
|
||||
|
||||
if existing:
|
||||
if existing["task_type"] == new_type:
|
||||
continue # 同类型跳过
|
||||
if not should_replace_task(existing["task_type"], new_type):
|
||||
continue
|
||||
# 关闭旧任务
|
||||
if not dry_run:
|
||||
with app_conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"UPDATE biz.coach_tasks SET status = 'inactive', updated_at = %s WHERE id = %s",
|
||||
(day_datetime, existing["id"]),
|
||||
)
|
||||
_insert_history(cur, existing["id"], "type_change_close", "active", "inactive",
|
||||
existing["task_type"], new_type,
|
||||
{"reason": "replaced_by_simulation", "simulated": True})
|
||||
|
||||
# 计算 priority_score
|
||||
priority_score = float(max(wbi, nci)) if new_type in ("high_priority_recall", "priority_recall") else float(rs)
|
||||
|
||||
if dry_run:
|
||||
active_tasks[key] = {
|
||||
"id": None,
|
||||
"task_type": new_type,
|
||||
"created_at": day_datetime,
|
||||
"expires_at": None,
|
||||
}
|
||||
stats["created"] += 1
|
||||
continue
|
||||
|
||||
with app_conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""INSERT INTO biz.coach_tasks
|
||||
(site_id, assistant_id, member_id, task_type, status, priority_score, created_at, updated_at)
|
||||
VALUES (%s, %s, %s, %s, 'active', %s, %s, %s)
|
||||
RETURNING id""",
|
||||
(site_id, aid, mid, new_type, priority_score, day_datetime, day_datetime),
|
||||
)
|
||||
task_id = cur.fetchone()[0]
|
||||
_insert_history(cur, task_id, "created", None, "active", None, new_type,
|
||||
{"reason": "simulation_generated", "simulated": True})
|
||||
|
||||
active_tasks[key] = {
|
||||
"id": task_id,
|
||||
"task_type": new_type,
|
||||
"created_at": day_datetime,
|
||||
"expires_at": None,
|
||||
}
|
||||
stats["created"] += 1
|
||||
|
||||
# ── 3. 召回检测(基于当天结算记录) ──
|
||||
settlements = load_settlements_for_day(etl_conn, site_id, d)
|
||||
|
||||
for (aid, mid), pay_time in settlements.items():
|
||||
key = (aid, mid)
|
||||
task = active_tasks.get(key)
|
||||
|
||||
# 写 recall_event
|
||||
if not dry_run:
|
||||
with app_conn.cursor() as cur:
|
||||
try:
|
||||
cur.execute(
|
||||
"""INSERT INTO biz.recall_events
|
||||
(site_id, assistant_id, member_id, pay_time, task_id, task_type, created_at)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s)
|
||||
ON CONFLICT (site_id, assistant_id, member_id,
|
||||
(date_trunc('day', pay_time AT TIME ZONE 'Asia/Shanghai')))
|
||||
DO NOTHING RETURNING id""",
|
||||
(site_id, aid, mid, pay_time,
|
||||
task["id"] if task else None,
|
||||
task["task_type"] if task else None,
|
||||
day_datetime),
|
||||
)
|
||||
inserted = cur.fetchone()
|
||||
if inserted:
|
||||
stats["recall_events"] += 1
|
||||
except Exception:
|
||||
pass # 去重冲突或其他
|
||||
|
||||
if not task:
|
||||
continue
|
||||
|
||||
# 完成召回任务
|
||||
if task["task_type"] in ("high_priority_recall", "priority_recall"):
|
||||
if pay_time > task["created_at"]:
|
||||
stats["completed"] += 1
|
||||
if not dry_run:
|
||||
with app_conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""UPDATE biz.coach_tasks
|
||||
SET status = 'completed', completed_at = %s, completed_task_type = %s,
|
||||
completion_type = 'auto', updated_at = %s
|
||||
WHERE id = %s AND status = 'active'""",
|
||||
(pay_time, task["task_type"], day_datetime, task["id"]),
|
||||
)
|
||||
_insert_history(cur, task["id"], "completed", "active", "completed",
|
||||
task["task_type"], task["task_type"],
|
||||
{"service_time": str(pay_time), "simulated": True})
|
||||
|
||||
# 生成回访任务
|
||||
expires_at = pay_time + timedelta(hours=FOLLOW_UP_HOURS)
|
||||
if not dry_run:
|
||||
with app_conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""INSERT INTO biz.coach_tasks
|
||||
(site_id, assistant_id, member_id, task_type, status, expires_at, created_at, updated_at)
|
||||
VALUES (%s, %s, %s, 'follow_up_visit', 'active', %s, %s, %s)
|
||||
RETURNING id""",
|
||||
(site_id, aid, mid, expires_at, day_datetime, day_datetime),
|
||||
)
|
||||
fu_id = cur.fetchone()[0]
|
||||
_insert_history(cur, fu_id, "created", None, "active", None, "follow_up_visit",
|
||||
{"reason": "recall_completed", "simulated": True})
|
||||
|
||||
active_tasks[key] = {
|
||||
"id": fu_id,
|
||||
"task_type": "follow_up_visit",
|
||||
"created_at": day_datetime,
|
||||
"expires_at": expires_at,
|
||||
}
|
||||
else:
|
||||
active_tasks[key] = {
|
||||
"id": None,
|
||||
"task_type": "follow_up_visit",
|
||||
"created_at": day_datetime,
|
||||
"expires_at": expires_at,
|
||||
}
|
||||
stats["follow_up_created"] += 1
|
||||
|
||||
if not dry_run:
|
||||
app_conn.commit()
|
||||
|
||||
|
||||
def _insert_history(cur, task_id, action, old_status, new_status, old_task_type, new_task_type, detail=None):
|
||||
"""在 coach_task_history 中记录变更。"""
|
||||
if task_id is None:
|
||||
return
|
||||
cur.execute(
|
||||
"""INSERT INTO biz.coach_task_history
|
||||
(task_id, action, old_status, new_status, old_task_type, new_task_type, detail)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s)""",
|
||||
(task_id, action, old_status, new_status, old_task_type, new_task_type,
|
||||
json.dumps(detail) if detail else None),
|
||||
)
|
||||
|
||||
|
||||
def get_site_id(etl_conn) -> int:
|
||||
"""从 relation_index 获取 site_id。"""
|
||||
with etl_conn.cursor() as cur:
|
||||
cur.execute("SELECT DISTINCT site_id FROM dws.dws_member_assistant_relation_index LIMIT 1")
|
||||
row = cur.fetchone()
|
||||
if not row:
|
||||
raise RuntimeError("relation_index 表为空,请先运行 backfill_index_snapshots.py")
|
||||
return row[0]
|
||||
etl_conn.commit()
|
||||
|
||||
|
||||
def clean_simulated_data(app_conn, cutoff: date):
|
||||
"""清理模拟产生的数据。"""
|
||||
cutoff_dt = datetime(cutoff.year, cutoff.month, cutoff.day, 23, 59, 59, tzinfo=TZ)
|
||||
with app_conn.cursor() as cur:
|
||||
# 先删 history(外键依赖)
|
||||
cur.execute(
|
||||
"""DELETE FROM biz.coach_task_history
|
||||
WHERE task_id IN (SELECT id FROM biz.coach_tasks WHERE created_at < %s)""",
|
||||
(cutoff_dt,),
|
||||
)
|
||||
history_count = cur.rowcount
|
||||
|
||||
# 删 recall_events
|
||||
cur.execute(
|
||||
"DELETE FROM biz.recall_events WHERE created_at < %s",
|
||||
(cutoff_dt,),
|
||||
)
|
||||
events_count = cur.rowcount
|
||||
|
||||
# 删 coach_tasks
|
||||
cur.execute(
|
||||
"DELETE FROM biz.coach_tasks WHERE created_at < %s",
|
||||
(cutoff_dt,),
|
||||
)
|
||||
tasks_count = cur.rowcount
|
||||
|
||||
app_conn.commit()
|
||||
logger.info(
|
||||
"清理完成: %d 条 history, %d 条 recall_events, %d 条 coach_tasks",
|
||||
history_count, events_count, tasks_count,
|
||||
)
|
||||
|
||||
|
||||
def run_simulation(start_date: date, end_date: date, dry_run: bool = False, clean: bool = False):
|
||||
"""执行历史任务推演主流程。"""
|
||||
if end_date > CUTOFF_DATE:
|
||||
logger.warning("end_date %s 超过截止日期 %s,自动截断", end_date, CUTOFF_DATE)
|
||||
end_date = CUTOFF_DATE
|
||||
|
||||
etl_conn, app_conn = get_connections()
|
||||
site_id = get_site_id(etl_conn)
|
||||
|
||||
if clean:
|
||||
logger.info("清理 %s 之前的模拟数据...", CUTOFF_DATE)
|
||||
clean_simulated_data(app_conn, CUTOFF_DATE)
|
||||
|
||||
total_days = (end_date - start_date).days + 1
|
||||
logger.info(
|
||||
"推演参数: %s ~ %s (%d天), site_id=%s, dry_run=%s",
|
||||
start_date, end_date, total_days, site_id, dry_run,
|
||||
)
|
||||
|
||||
# 内存中维护 active 任务集
|
||||
active_tasks: dict[tuple[int, int], dict] = {}
|
||||
stats = {
|
||||
"created": 0, "completed": 0, "expired": 0,
|
||||
"follow_up_created": 0, "recall_events": 0, "skipped_no_snapshot": 0,
|
||||
}
|
||||
|
||||
t0 = time.time()
|
||||
current = start_date
|
||||
while current <= end_date:
|
||||
snapshot = load_index_snapshot(etl_conn, site_id, current)
|
||||
|
||||
if not snapshot["relation"] and not snapshot["wbi"] and not snapshot["nci"]:
|
||||
stats["skipped_no_snapshot"] += 1
|
||||
current += timedelta(days=1)
|
||||
continue
|
||||
|
||||
simulate_day(app_conn, etl_conn, site_id, current, snapshot, active_tasks, stats, dry_run)
|
||||
|
||||
day_num = (current - start_date).days + 1
|
||||
if day_num % 30 == 0 or current == end_date:
|
||||
elapsed = time.time() - t0
|
||||
logger.info(
|
||||
"进度: %s (%d/%d) | 已创建=%d 已完成=%d 已过期=%d 回访=%d 事件=%d | %.0fs",
|
||||
current, day_num, total_days,
|
||||
stats["created"], stats["completed"], stats["expired"],
|
||||
stats["follow_up_created"], stats["recall_events"], elapsed,
|
||||
)
|
||||
|
||||
current += timedelta(days=1)
|
||||
|
||||
total_elapsed = time.time() - t0
|
||||
logger.info("=" * 60)
|
||||
logger.info("推演完成: %.1f 秒 (%.1f 分钟)", total_elapsed, total_elapsed / 60)
|
||||
logger.info(" 任务创建: %d", stats["created"])
|
||||
logger.info(" 任务完成: %d", stats["completed"])
|
||||
logger.info(" 任务过期: %d", stats["expired"])
|
||||
logger.info(" 回访生成: %d", stats["follow_up_created"])
|
||||
logger.info(" 召回事件: %d", stats["recall_events"])
|
||||
logger.info(" 跳过(无快照): %d", stats["skipped_no_snapshot"])
|
||||
logger.info(" 推演结束时 active 任务数: %d", len(active_tasks))
|
||||
|
||||
# 统计类型分布
|
||||
type_dist = {}
|
||||
for task in active_tasks.values():
|
||||
tt = task["task_type"]
|
||||
type_dist[tt] = type_dist.get(tt, 0) + 1
|
||||
for tt, cnt in sorted(type_dist.items()):
|
||||
logger.info(" %s: %d", tt, cnt)
|
||||
|
||||
etl_conn.close()
|
||||
app_conn.close()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="历史任务推演(基于指数日快照)")
|
||||
parser.add_argument("--start", required=True, help="起始日期 (YYYY-MM-DD)")
|
||||
parser.add_argument("--end", required=True, help="结束日期 (YYYY-MM-DD)")
|
||||
parser.add_argument("--dry-run", action="store_true", help="干跑模式")
|
||||
parser.add_argument("--clean", action="store_true", help="清理之前的模拟数据后重跑")
|
||||
args = parser.parse_args()
|
||||
|
||||
start_date = date.fromisoformat(args.start)
|
||||
end_date = date.fromisoformat(args.end)
|
||||
|
||||
if start_date > end_date:
|
||||
print("错误:start 必须 <= end", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
run_simulation(start_date, end_date, dry_run=args.dry_run, clean=args.clean)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -6,7 +6,7 @@ $ErrorActionPreference = "Stop"
|
||||
|
||||
try {
|
||||
# CHANGE 2026-03-07 | 定位项目根目录:从 bat 启动目录推算,不穿透 junction
|
||||
# 背景:C:\NeoZQYY 是 junction → D:\NeoZQYY\...\repo,
|
||||
# 背景:C:\Project\NeoZQYY 是 junction → D:\NeoZQYY\...\repo,
|
||||
# $MyInvocation.MyCommand.Path 和 Split-Path 都会穿透 junction 解析到 D 盘,
|
||||
# 导致后端 CWD、.venv python、.env 全部指向 D 盘副本。
|
||||
# 解决:优先用环境变量 NEOZQYY_ROOT;其次用 bat 传入的 %~dp0(不穿透 junction);
|
||||
|
||||
Reference in New Issue
Block a user