feat: 2026-04-15~04-20 累积变更基线 — 多主线合流

主线 1: rns1-customer-coach-api + 04-miniapp-core-business 后端实施
  - 新增 GET /xcx/coaches/{id}/banner 轻量接口
  - performance/records 加 coach_id 参数 + view_board_coach 权限分流
  - coach/customer/performance/board/task 服务层重构
  - fdw_queries 结算单粒度聚合 + consumption_summary 视图统一
  - task_generator 回访宽限 72h + UPSERT 替代策略 + Step 5 保底清理
  - recall_detector settle_type=3 双重限制 + 门店级 resolved

主线 2: 小程序权限分流 + 新增 coach-service-records 管理者视角业绩明细页
  - perf-progress 共享模块去重 task-list/coach-detail 动画逻辑
  - isScattered 散客标记端到端
  - foodDetail/phoneFull/creator* 字段透传

主线 3: P19 指数回测框架 Phase 1+2
  - 3 个指数表 stat_date 日快照模式
  - 新增 DWS_INDEX_BACKFILL / DWS_TASK_SIMULATION 工具任务
  - task_engine 升级 HTTP 实时 + 推演回测双模式

主线 4: Core 维度层启用
  - 新增 CORE_DIM_SYNC 任务(DWD → core 4 维度表)
  - 修复 app 视图空查询问题

主线 5: member_project_tag 改为 LAST_30_VISITS 消费次数窗口

主线 6: 2 个迁移 SQL 已执行(stat_date + member_project_tag 新窗口)
  - schema 基线与 DDL 快照同步

主线 7: 开发机路径迁移 C:\NeoZQYY → C:\Project\NeoZQYY(约 95% 改动量)

附带: 新建运维脚本(churned_customer_report / simulate_historical_tasks /
      backfill_index_snapshots)+ tools/task-analysis/ 任务分析工具

合计 157 文件。未包含中间产物(tmp/ .playwright-mcp/ inspect-* excel/sheet 分析 txt)。
审计记录见下一个 commit。

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Neo
2026-04-20 06:32:07 +08:00
parent 79d3c2e97e
commit 2a7a5d68aa
157 changed files with 14304 additions and 3717 deletions

View File

@@ -2,7 +2,7 @@
"""历史数据回填脚本。
用法:
cd C:\\NeoZQYY
cd C:\\Project\\NeoZQYY
uv run python scripts/ops/ai_backfill.py [--dry-run] [--batch-size 10] [--interval 5]
功能:

View File

@@ -5,7 +5,7 @@
对已有月份批量从 DWD 服务记录聚合助教区域课时,写入 dws_coach_area_hours。
用法:
cd C:\\NeoZQYY
cd C:\\Project\\NeoZQYY
uv run python scripts/ops/backfill_coach_area_hours.py \\
--site-id 2790685415443269 --start-month 2025-07-01 --end-month 2026-03-01
"""

View File

@@ -6,7 +6,7 @@
回填完成后触发 DWS_FINANCE_BOARD_CACHE 重算所有已完成周期缓存。
用法:
cd C:\\NeoZQYY
cd C:\\Project\\NeoZQYY
uv run python scripts/ops/backfill_finance_area_daily.py \\
--site-id 1 --start-date 2025-07-16 --end-date 2026-03-28

View File

@@ -0,0 +1,210 @@
# -*- coding: utf-8 -*-
"""
回填脚本:为 3 张指数表生成历史日快照
按日期升序逐天执行 RelationIndexTask / WinbackIndexTask / NewconvIndexTask
写入 stat_date 列,支持多日快照共存。
用法:
cd C:\\Project\\NeoZQYY
uv run python scripts/ops/backfill_index_snapshots.py \\
--start 2025-08-01 --end 2026-04-11
# 仅回填 relation_index
uv run python scripts/ops/backfill_index_snapshots.py \\
--start 2025-08-01 --end 2026-04-11 --tasks RELATION
# 干跑模式(不写数据库)
uv run python scripts/ops/backfill_index_snapshots.py \\
--start 2025-08-01 --end 2025-08-03 --dry-run
"""
from __future__ import annotations
import argparse
import logging
import sys
import time
from datetime import date, datetime, timedelta
from pathlib import Path
from zoneinfo import ZoneInfo
# ── 环境加载 ──
sys.path.insert(0, str(Path(__file__).resolve().parent))
from _env_paths import ensure_repo_root
ensure_repo_root()
# 切换到 ETL 目录以便导入任务模块
import os
_ETL_ROOT = Path(__file__).resolve().parents[2] / "apps" / "etl" / "connectors" / "feiqiu"
sys.path.insert(0, str(_ETL_ROOT))
os.chdir(_ETL_ROOT)
from dotenv import load_dotenv
_ROOT = Path(__file__).resolve().parents[2]
load_dotenv(_ROOT / ".env", override=False)
load_dotenv(_ETL_ROOT / ".env", override=False)
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
)
logger = logging.getLogger("backfill_index")
# ── 导入 ETL 组件 ──
from config.settings import AppConfig
from database.connection import DatabaseConnection
from tasks.base_task import TaskContext
from tasks.dws.index.relation_index_task import RelationIndexTask
from tasks.dws.index.winback_index_task import WinbackIndexTask
from tasks.dws.index.newconv_index_task import NewconvIndexTask
TZ = ZoneInfo("Asia/Shanghai")
# 任务代码映射
TASK_MAP = {
"RELATION": RelationIndexTask,
"WINBACK": WinbackIndexTask,
"NEWCONV": NewconvIndexTask,
}
class DummyAPI:
"""指数任务不需要 API提供空壳。"""
pass
def build_context(d: date, store_id: int) -> TaskContext:
"""为指定日期构建 TaskContext。"""
as_of = datetime(d.year, d.month, d.day, 23, 59, 0, tzinfo=TZ)
window_end = as_of
window_start = as_of - timedelta(days=90)
return TaskContext(
store_id=store_id,
window_start=window_start,
window_end=window_end,
window_minutes=int((window_end - window_start).total_seconds() / 60),
as_of_date=as_of,
)
def run_backfill(
start_date: date,
end_date: date,
task_keys: list[str],
dry_run: bool = False,
):
"""执行回填主流程。"""
# 加载配置
config = AppConfig.load()
# 创建数据库连接
db_conn = DatabaseConnection(
dsn=config.config["db"]["dsn"],
session=config.config["db"].get("session"),
connect_timeout=config.config["db"].get("connect_timeout_sec"),
)
store_id = config.config["app"]["store_id"]
total_days = (end_date - start_date).days + 1
total_tasks = total_days * len(task_keys)
logger.info(
"回填参数: %s ~ %s (%d天), 任务=%s, store_id=%s, dry_run=%s",
start_date, end_date, total_days, task_keys, store_id, dry_run,
)
if dry_run:
logger.info("[DRY RUN] 仅计算不写入数据库")
completed = 0
errors = 0
t0 = time.time()
current = start_date
while current <= end_date:
ctx = build_context(current, store_id)
day_t0 = time.time()
for key in task_keys:
task_cls = TASK_MAP[key]
task = task_cls(config, db_conn, DummyAPI(), logger)
if dry_run:
logger.info("[DRY RUN] 跳过 %s %s", key, current)
completed += 1
continue
try:
result = task.execute(ctx)
records = result.get("records_inserted", result.get("member_count", "?"))
logger.info(
" %s %s%s 条记录",
key, current, records,
)
completed += 1
except Exception:
logger.exception(" %s %s 失败", key, current)
errors += 1
# 尝试恢复连接
try:
db_conn.close()
db_conn = DatabaseConnection(
dsn=config.config["db"]["dsn"],
session=config.config["db"].get("session"),
connect_timeout=config.config["db"].get("connect_timeout_sec"),
)
# 更新任务引用
except Exception:
logger.exception("数据库重连失败")
elapsed = time.time() - day_t0
progress = completed / total_tasks * 100 if total_tasks else 0
logger.info(
"%s 完成 (%.1fs) [%d/%d %.0f%%]",
current, elapsed, completed, total_tasks, progress,
)
current += timedelta(days=1)
total_elapsed = time.time() - t0
logger.info(
"回填完成: %d/%d 成功, %d 失败, 耗时 %.1f 秒 (%.1f 分钟)",
completed, total_tasks, errors, total_elapsed, total_elapsed / 60,
)
db_conn.close()
return errors == 0
def main():
parser = argparse.ArgumentParser(description="回填指数表历史日快照")
parser.add_argument("--start", required=True, help="起始日期 (YYYY-MM-DD)")
parser.add_argument("--end", required=True, help="结束日期 (YYYY-MM-DD)")
parser.add_argument(
"--tasks",
default="RELATION,WINBACK,NEWCONV",
help="任务列表,逗号分隔 (RELATION/WINBACK/NEWCONV)",
)
parser.add_argument("--dry-run", action="store_true", help="干跑模式,不写数据库")
args = parser.parse_args()
start_date = date.fromisoformat(args.start)
end_date = date.fromisoformat(args.end)
task_keys = [k.strip().upper() for k in args.tasks.split(",")]
for k in task_keys:
if k not in TASK_MAP:
print(f"错误:未知任务 {k},可选: {list(TASK_MAP.keys())}", file=sys.stderr)
sys.exit(1)
if start_date > end_date:
print("错误start 必须 <= end", file=sys.stderr)
sys.exit(1)
success = run_backfill(start_date, end_date, task_keys, dry_run=args.dry_run)
sys.exit(0 if success else 1)
if __name__ == "__main__":
main()

View File

@@ -5,7 +5,7 @@ import os
import shutil
from pathlib import Path
ROOT = Path(r"C:\NeoZQYY")
ROOT = Path(r"C:\Project\NeoZQYY")
ETL_AUDIT = ROOT / "apps" / "etl" / "pipelines" / "feiqiu" / "docs" / "audit" / "changes"
ROOT_AUDIT = ROOT / "docs" / "audit" / "changes"

File diff suppressed because it is too large Load Diff

View File

@@ -13,7 +13,7 @@ ETL 统一分析 — 编排入口
--source etl-log 切换为读 ETL 落盘 JSON一致性检查模式
用法:
cd C:\\NeoZQYY
cd C:\\Project\\NeoZQYY
uv run python scripts/ops/etl_unified_analysis.py
uv run python scripts/ops/etl_unified_analysis.py --mode consistency --source etl-log
uv run python scripts/ops/etl_unified_analysis.py --mode structure --date-from 2026-01-01

View File

@@ -1,7 +1,7 @@
# 生成 ETL 全流程联调综合报告
# 输出路径:{SYSTEM_LOG_ROOT}/{date}__etl_integration_report.md
# 环境变量 SYSTEM_LOG_ROOT 缺失时报错终止。
# 用法cd C:\NeoZQYY && python scripts/ops/gen_integration_report.py
# 用法cd C:\Project\NeoZQYY && python scripts/ops/gen_integration_report.py
import os
import sys

View File

@@ -14,7 +14,7 @@
5. 输出可用于测试的 JWT token
使用方式:
cd C:\\NeoZQYY
cd C:\\Project\\NeoZQYY
python scripts/ops/init_test_user.py [--openid <openid>] [--role <role_code>] [--reset]
环境要求:

View File

@@ -0,0 +1,524 @@
# -*- coding: utf-8 -*-
"""
历史任务推演脚本:基于日快照指数重放 task_generator + recall_detector 逻辑
前置条件:先运行 backfill_index_snapshots.py 生成历史指数快照。
用法:
cd C:\\Project\\NeoZQYY
uv run python scripts/ops/simulate_historical_tasks.py \\
--start 2025-08-01 --end 2026-03-28
# 干跑模式
uv run python scripts/ops/simulate_historical_tasks.py \\
--start 2025-08-01 --end 2026-03-28 --dry-run
# 清理之前的模拟数据后重跑
uv run python scripts/ops/simulate_historical_tasks.py \\
--start 2025-08-01 --end 2026-03-28 --clean
"""
from __future__ import annotations
import argparse
import json
import logging
import sys
import time
from datetime import date, datetime, timedelta
from decimal import Decimal
from pathlib import Path
from zoneinfo import ZoneInfo
# ── 环境加载 ──
sys.path.insert(0, str(Path(__file__).resolve().parent))
from _env_paths import ensure_repo_root
ensure_repo_root()
import os
from dotenv import load_dotenv
_ROOT = Path(__file__).resolve().parents[2]
load_dotenv(_ROOT / ".env", override=False)
# 导入 task_generator 的纯函数(在 backend 目录下)
_BACKEND = _ROOT / "apps" / "backend"
sys.path.insert(0, str(_BACKEND))
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
)
logger = logging.getLogger("simulate_tasks")
import psycopg2
import psycopg2.extras
TZ = ZoneInfo("Asia/Shanghai")
# 导入 task_generator 纯函数
from app.services.task_generator import (
IndexData,
determine_task_type,
should_replace_task,
TASK_TYPE_PRIORITY,
)
# 推演截止日期(现有 active 任务从 03-29 开始)
CUTOFF_DATE = date(2026, 3, 28)
# 回访任务过期时长
FOLLOW_UP_HOURS = 48
def get_connections():
"""获取 ETL 库和业务库连接。"""
pg_dsn = os.environ.get("PG_DSN")
app_dsn = os.environ.get("APP_DB_DSN")
if not pg_dsn or not app_dsn:
print("错误PG_DSN 和 APP_DB_DSN 必须在 .env 中配置", file=sys.stderr)
sys.exit(1)
etl_conn = psycopg2.connect(pg_dsn)
etl_conn.set_client_encoding("UTF8")
app_conn = psycopg2.connect(app_dsn)
app_conn.set_client_encoding("UTF8")
return etl_conn, app_conn
def load_index_snapshot(etl_conn, site_id: int, stat_date: date) -> dict:
"""加载指定日期的指数快照。
返回:
{
"relation": {(assistant_id, member_id): {rs, os_label, ...}},
"wbi": {member_id: Decimal},
"nci": {member_id: Decimal},
}
"""
result = {"relation": {}, "wbi": {}, "nci": {}}
with etl_conn.cursor() as cur:
# 关系指数
cur.execute(
"""SELECT assistant_id, member_id, rs_display, os_label, os_share, session_count
FROM dws.dws_member_assistant_relation_index
WHERE site_id = %s AND stat_date = %s""",
(site_id, stat_date),
)
for row in cur.fetchall():
result["relation"][(row[0], row[1])] = {
"rs": Decimal(str(row[2])),
"os_label": row[3],
"os_share": Decimal(str(row[4])),
"session_count": row[5],
}
# WBI
cur.execute(
"""SELECT member_id, display_score
FROM dws.dws_member_winback_index
WHERE site_id = %s AND stat_date = %s""",
(site_id, stat_date),
)
for row in cur.fetchall():
result["wbi"][row[0]] = Decimal(str(row[1])) if row[1] else Decimal(0)
# NCI
cur.execute(
"""SELECT member_id, display_score
FROM dws.dws_member_newconv_index
WHERE site_id = %s AND stat_date = %s""",
(site_id, stat_date),
)
for row in cur.fetchall():
result["nci"][row[0]] = Decimal(str(row[1])) if row[1] else Decimal(0)
etl_conn.commit()
return result
def load_settlements_for_day(etl_conn, site_id: int, d: date) -> dict:
"""加载指定日期的结算记录。
返回:{(assistant_id, member_id): latest_pay_time}
"""
settlements = {}
day_start = datetime(d.year, d.month, d.day, 0, 0, 0, tzinfo=TZ)
day_end = day_start + timedelta(days=1)
with etl_conn.cursor() as cur:
cur.execute(
"""
SELECT sl.site_assistant_id AS assistant_id,
sh.member_id,
MAX(sh.pay_time) AS latest_pay_time
FROM dwd.dwd_settlement_head sh
JOIN dwd.dwd_assistant_service_log sl
ON sl.order_settle_id = sh.order_settle_id
AND sl.is_delete = 0
WHERE sh.site_id = %s
AND sh.settle_type IN (1, 3)
AND sh.pay_time >= %s AND sh.pay_time < %s
GROUP BY sl.site_assistant_id, sh.member_id
""",
(site_id, day_start, day_end),
)
for row in cur.fetchall():
if row[0] and row[1]:
settlements[(row[0], row[1])] = row[2]
etl_conn.commit()
return settlements
def simulate_day(
app_conn,
etl_conn,
site_id: int,
d: date,
snapshot: dict,
active_tasks: dict,
stats: dict,
dry_run: bool = False,
):
"""模拟单天的任务生成和召回检测。
active_tasks: {(assistant_id, member_id): {"id": int, "task_type": str, "created_at": datetime, "expires_at": datetime|None}}
stats: 累计统计
"""
day_datetime = datetime(d.year, d.month, d.day, 7, 0, 0, tzinfo=TZ)
# ── 1. 过期检测(回访任务 expires_at < 当天) ──
expired_keys = []
for key, task in active_tasks.items():
if task.get("expires_at") and task["expires_at"] < day_datetime:
expired_keys.append(key)
for key in expired_keys:
task = active_tasks.pop(key)
stats["expired"] += 1
if not dry_run:
with app_conn.cursor() as cur:
cur.execute(
"UPDATE biz.coach_tasks SET status = 'expired', updated_at = %s WHERE id = %s",
(day_datetime, task["id"]),
)
_insert_history(cur, task["id"], "expired", "active", "expired",
task["task_type"], task["task_type"],
{"reason": "follow_up_expired", "simulated": True})
# ── 2. 任务生成(基于指数快照) ──
relation = snapshot["relation"]
wbi_map = snapshot["wbi"]
nci_map = snapshot["nci"]
# 收集 MAIN/COMANAGE 对
ownership_pairs = [
(aid, mid, info)
for (aid, mid), info in relation.items()
if info["os_label"] in ("MAIN", "COMANAGE") and info["session_count"] > 0
]
for aid, mid, info in ownership_pairs:
wbi = wbi_map.get(mid, Decimal(0))
nci = nci_map.get(mid, Decimal(0))
rs = info["rs"]
index_data = IndexData(
site_id=site_id,
assistant_id=aid,
member_id=mid,
wbi=wbi,
nci=nci,
rs=rs,
has_active_recall=False,
has_follow_up_note=False,
)
new_type = determine_task_type(index_data)
if not new_type:
continue
key = (aid, mid)
existing = active_tasks.get(key)
if existing:
if existing["task_type"] == new_type:
continue # 同类型跳过
if not should_replace_task(existing["task_type"], new_type):
continue
# 关闭旧任务
if not dry_run:
with app_conn.cursor() as cur:
cur.execute(
"UPDATE biz.coach_tasks SET status = 'inactive', updated_at = %s WHERE id = %s",
(day_datetime, existing["id"]),
)
_insert_history(cur, existing["id"], "type_change_close", "active", "inactive",
existing["task_type"], new_type,
{"reason": "replaced_by_simulation", "simulated": True})
# 计算 priority_score
priority_score = float(max(wbi, nci)) if new_type in ("high_priority_recall", "priority_recall") else float(rs)
if dry_run:
active_tasks[key] = {
"id": None,
"task_type": new_type,
"created_at": day_datetime,
"expires_at": None,
}
stats["created"] += 1
continue
with app_conn.cursor() as cur:
cur.execute(
"""INSERT INTO biz.coach_tasks
(site_id, assistant_id, member_id, task_type, status, priority_score, created_at, updated_at)
VALUES (%s, %s, %s, %s, 'active', %s, %s, %s)
RETURNING id""",
(site_id, aid, mid, new_type, priority_score, day_datetime, day_datetime),
)
task_id = cur.fetchone()[0]
_insert_history(cur, task_id, "created", None, "active", None, new_type,
{"reason": "simulation_generated", "simulated": True})
active_tasks[key] = {
"id": task_id,
"task_type": new_type,
"created_at": day_datetime,
"expires_at": None,
}
stats["created"] += 1
# ── 3. 召回检测(基于当天结算记录) ──
settlements = load_settlements_for_day(etl_conn, site_id, d)
for (aid, mid), pay_time in settlements.items():
key = (aid, mid)
task = active_tasks.get(key)
# 写 recall_event
if not dry_run:
with app_conn.cursor() as cur:
try:
cur.execute(
"""INSERT INTO biz.recall_events
(site_id, assistant_id, member_id, pay_time, task_id, task_type, created_at)
VALUES (%s, %s, %s, %s, %s, %s, %s)
ON CONFLICT (site_id, assistant_id, member_id,
(date_trunc('day', pay_time AT TIME ZONE 'Asia/Shanghai')))
DO NOTHING RETURNING id""",
(site_id, aid, mid, pay_time,
task["id"] if task else None,
task["task_type"] if task else None,
day_datetime),
)
inserted = cur.fetchone()
if inserted:
stats["recall_events"] += 1
except Exception:
pass # 去重冲突或其他
if not task:
continue
# 完成召回任务
if task["task_type"] in ("high_priority_recall", "priority_recall"):
if pay_time > task["created_at"]:
stats["completed"] += 1
if not dry_run:
with app_conn.cursor() as cur:
cur.execute(
"""UPDATE biz.coach_tasks
SET status = 'completed', completed_at = %s, completed_task_type = %s,
completion_type = 'auto', updated_at = %s
WHERE id = %s AND status = 'active'""",
(pay_time, task["task_type"], day_datetime, task["id"]),
)
_insert_history(cur, task["id"], "completed", "active", "completed",
task["task_type"], task["task_type"],
{"service_time": str(pay_time), "simulated": True})
# 生成回访任务
expires_at = pay_time + timedelta(hours=FOLLOW_UP_HOURS)
if not dry_run:
with app_conn.cursor() as cur:
cur.execute(
"""INSERT INTO biz.coach_tasks
(site_id, assistant_id, member_id, task_type, status, expires_at, created_at, updated_at)
VALUES (%s, %s, %s, 'follow_up_visit', 'active', %s, %s, %s)
RETURNING id""",
(site_id, aid, mid, expires_at, day_datetime, day_datetime),
)
fu_id = cur.fetchone()[0]
_insert_history(cur, fu_id, "created", None, "active", None, "follow_up_visit",
{"reason": "recall_completed", "simulated": True})
active_tasks[key] = {
"id": fu_id,
"task_type": "follow_up_visit",
"created_at": day_datetime,
"expires_at": expires_at,
}
else:
active_tasks[key] = {
"id": None,
"task_type": "follow_up_visit",
"created_at": day_datetime,
"expires_at": expires_at,
}
stats["follow_up_created"] += 1
if not dry_run:
app_conn.commit()
def _insert_history(cur, task_id, action, old_status, new_status, old_task_type, new_task_type, detail=None):
"""在 coach_task_history 中记录变更。"""
if task_id is None:
return
cur.execute(
"""INSERT INTO biz.coach_task_history
(task_id, action, old_status, new_status, old_task_type, new_task_type, detail)
VALUES (%s, %s, %s, %s, %s, %s, %s)""",
(task_id, action, old_status, new_status, old_task_type, new_task_type,
json.dumps(detail) if detail else None),
)
def get_site_id(etl_conn) -> int:
"""从 relation_index 获取 site_id。"""
with etl_conn.cursor() as cur:
cur.execute("SELECT DISTINCT site_id FROM dws.dws_member_assistant_relation_index LIMIT 1")
row = cur.fetchone()
if not row:
raise RuntimeError("relation_index 表为空,请先运行 backfill_index_snapshots.py")
return row[0]
etl_conn.commit()
def clean_simulated_data(app_conn, cutoff: date):
"""清理模拟产生的数据。"""
cutoff_dt = datetime(cutoff.year, cutoff.month, cutoff.day, 23, 59, 59, tzinfo=TZ)
with app_conn.cursor() as cur:
# 先删 history外键依赖
cur.execute(
"""DELETE FROM biz.coach_task_history
WHERE task_id IN (SELECT id FROM biz.coach_tasks WHERE created_at < %s)""",
(cutoff_dt,),
)
history_count = cur.rowcount
# 删 recall_events
cur.execute(
"DELETE FROM biz.recall_events WHERE created_at < %s",
(cutoff_dt,),
)
events_count = cur.rowcount
# 删 coach_tasks
cur.execute(
"DELETE FROM biz.coach_tasks WHERE created_at < %s",
(cutoff_dt,),
)
tasks_count = cur.rowcount
app_conn.commit()
logger.info(
"清理完成: %d 条 history, %d 条 recall_events, %d 条 coach_tasks",
history_count, events_count, tasks_count,
)
def run_simulation(start_date: date, end_date: date, dry_run: bool = False, clean: bool = False):
"""执行历史任务推演主流程。"""
if end_date > CUTOFF_DATE:
logger.warning("end_date %s 超过截止日期 %s,自动截断", end_date, CUTOFF_DATE)
end_date = CUTOFF_DATE
etl_conn, app_conn = get_connections()
site_id = get_site_id(etl_conn)
if clean:
logger.info("清理 %s 之前的模拟数据...", CUTOFF_DATE)
clean_simulated_data(app_conn, CUTOFF_DATE)
total_days = (end_date - start_date).days + 1
logger.info(
"推演参数: %s ~ %s (%d天), site_id=%s, dry_run=%s",
start_date, end_date, total_days, site_id, dry_run,
)
# 内存中维护 active 任务集
active_tasks: dict[tuple[int, int], dict] = {}
stats = {
"created": 0, "completed": 0, "expired": 0,
"follow_up_created": 0, "recall_events": 0, "skipped_no_snapshot": 0,
}
t0 = time.time()
current = start_date
while current <= end_date:
snapshot = load_index_snapshot(etl_conn, site_id, current)
if not snapshot["relation"] and not snapshot["wbi"] and not snapshot["nci"]:
stats["skipped_no_snapshot"] += 1
current += timedelta(days=1)
continue
simulate_day(app_conn, etl_conn, site_id, current, snapshot, active_tasks, stats, dry_run)
day_num = (current - start_date).days + 1
if day_num % 30 == 0 or current == end_date:
elapsed = time.time() - t0
logger.info(
"进度: %s (%d/%d) | 已创建=%d 已完成=%d 已过期=%d 回访=%d 事件=%d | %.0fs",
current, day_num, total_days,
stats["created"], stats["completed"], stats["expired"],
stats["follow_up_created"], stats["recall_events"], elapsed,
)
current += timedelta(days=1)
total_elapsed = time.time() - t0
logger.info("=" * 60)
logger.info("推演完成: %.1f 秒 (%.1f 分钟)", total_elapsed, total_elapsed / 60)
logger.info(" 任务创建: %d", stats["created"])
logger.info(" 任务完成: %d", stats["completed"])
logger.info(" 任务过期: %d", stats["expired"])
logger.info(" 回访生成: %d", stats["follow_up_created"])
logger.info(" 召回事件: %d", stats["recall_events"])
logger.info(" 跳过(无快照): %d", stats["skipped_no_snapshot"])
logger.info(" 推演结束时 active 任务数: %d", len(active_tasks))
# 统计类型分布
type_dist = {}
for task in active_tasks.values():
tt = task["task_type"]
type_dist[tt] = type_dist.get(tt, 0) + 1
for tt, cnt in sorted(type_dist.items()):
logger.info(" %s: %d", tt, cnt)
etl_conn.close()
app_conn.close()
def main():
parser = argparse.ArgumentParser(description="历史任务推演(基于指数日快照)")
parser.add_argument("--start", required=True, help="起始日期 (YYYY-MM-DD)")
parser.add_argument("--end", required=True, help="结束日期 (YYYY-MM-DD)")
parser.add_argument("--dry-run", action="store_true", help="干跑模式")
parser.add_argument("--clean", action="store_true", help="清理之前的模拟数据后重跑")
args = parser.parse_args()
start_date = date.fromisoformat(args.start)
end_date = date.fromisoformat(args.end)
if start_date > end_date:
print("错误start 必须 <= end", file=sys.stderr)
sys.exit(1)
run_simulation(start_date, end_date, dry_run=args.dry_run, clean=args.clean)
if __name__ == "__main__":
main()

View File

@@ -6,7 +6,7 @@ $ErrorActionPreference = "Stop"
try {
# CHANGE 2026-03-07 | 定位项目根目录:从 bat 启动目录推算,不穿透 junction
# 背景C:\NeoZQYY 是 junction → D:\NeoZQYY\...\repo
# 背景C:\Project\NeoZQYY 是 junction → D:\NeoZQYY\...\repo
# $MyInvocation.MyCommand.Path 和 Split-Path 都会穿透 junction 解析到 D 盘,
# 导致后端 CWD、.venv python、.env 全部指向 D 盘副本。
# 解决:优先用环境变量 NEOZQYY_ROOT其次用 bat 传入的 %~dp0不穿透 junction