feat: 2026-04-15~05-02 累积变更基线 — AI 重构 + Runtime Context + DWS 修复

涵盖（每条对应已存的审计记录）： - AI 模块拆分：apps/backend/app/ai/apps -> prompts/（8 个 APP + app2a 派生） audit: 2026-04-20__ai-module-complete.md - admin-web AI 管理套件：AIDashboard / AIOperations / AIRunLogs / AITriggers / TriggerManager audit: 2026-04-21__admin-web-ai-management-suite.md - App2 财务洞察 prompt v3 -> v5.1 + 小程序 AI 接入（chat / board-finance） audit: 2026-04-22__app2_prompt_v5_1_and_miniprogram_ai_insight.md - App2 prewarm 全过滤器 + AI 触发器 cron reschedule audit: 2026-04-21__app2-finance-prewarm-all-filters.md migration: 20260420_ai_trigger_jobs_and_app2_prewarm.sql / 20260421_app2_prewarm_cron_reschedule.sql - AppType 联合类型对齐 + adminAiAppTypes.test.ts audit: 2026-04-30__admin_web_ai_app_type_alignment.md - DashScope tokens_used 提取修复 audit: 2026-04-30__backend_dashscope_tokens_used_extraction.md - App3 线索完整详情 prompt audit: 2026-05-01__backend_app3_full_detail_prompt.md - Runtime Context 沙箱（5-1~5-2 主线）： - 后端 schema/service + admin_runtime_context / xcx_runtime_clock 两个 router - admin-web RuntimeContext.tsx + miniprogram runtime-clock.ts - migration: 20260501__runtime_context_sandbox.sql - tools/db/verify_admin_web_sandbox.py + verify_sandbox_end_to_end.py - database/changes: 7 份 sandbox_* 验证报告 - 飞球 DWS 修复：finance_area_daily 区域汇总 + task_engine 调整 + RLS 视图业务日上界（migration 20260502 + scripts/ops/gen_rls_business_date_migration.py）合规： - .gitignore 启用 tmp/ 排除 - 不入仓：apps/etl/connectors/feiqiu/.env（API_TOKEN secret，本地修改保留）待验证清单： - docs/audit/changes/2026-05-04__cumulative_baseline_pending_verification.md 每个主题的功能完整性 / 上线验证几乎都未收口，按优先级 P0~P3 逐一处理
2026-05-04 02:30:19 +08:00
parent 2010034840
commit caf179a5da
130 changed files with 14543 additions and 2717 deletions
--- a/scripts/analyze_ab_content_quality.py
+++ b/scripts/analyze_ab_content_quality.py
@@ -0,0 +1,336 @@
+"""App2 A/B 测试 · 内容质量深度分析器。
+
+围绕"实际信息质量"评估，非表现形式（加粗/时长）。
+
+分析维度（按板块）：
+- 板块 A（seq 1-2）：客单价环比是否原字段引用（非推测）、是否识别"对比口径"
+- 板块 B（seq 3-4）：最大优惠来源是否点明、手动调整是否合规表述（禁用"抹零/免单 X 元"）
+- 板块 C（seq 5-6）：是否引用权威字段"储值卡余额变化"（期初/期末/其他调整）
+- 板块 D（seq 7-8）：支出完整性 + 人力成本占比
+- 板块 E（seq 9-10）：seq 9 是否含"旺淡倍率"；seq 10 是否标"同周X均值/期均"基线
+- 板块 F（seq 11-12）：seq 11 三色灯 + top 2 原因；seq 12 跟踪节奏 + 触发动作 + 阈值
+
+违规检测：
+- 禁用行业数字（payload 未提供）：警戒线/均值/参考值/30%/40% 等无锚点百分比
+- 禁用单期推测（"提升/下降/显著增长"但未引用 _环比）
+- 编造字段（payload 不含的字段名）
+
+用法：
+  PYTHONIOENCODING=utf-8 .venv/Scripts/python.exe scripts/analyze_ab_content_quality.py --dir export/ai-ab-test/round_a
+  # 对比两轮：
+  PYTHONIOENCODING=utf-8 .venv/Scripts/python.exe scripts/analyze_ab_content_quality.py --compare export/ai-ab-test/round_a export/ai-ab-test/round_b
+"""
+from __future__ import annotations
+
+import argparse
+import json
+import re
+import sys
+from pathlib import Path
+from statistics import mean, stdev
+
+
+def _iter_rounds(dir_path: Path) -> list[dict]:
+    """读取目录下 round_XX.json 按 round_idx 排序。"""
+    files = sorted(dir_path.glob('round_*.json'))
+    out = []
+    for f in files:
+        data = json.loads(f.read_text(encoding='utf-8'))
+        data['_file'] = f.name
+        out.append(data)
+    return out
+
+
+def _get_seq(insights: list, seq: int) -> dict | None:
+    for ins in insights:
+        if isinstance(ins, dict) and ins.get('seq') == seq:
+            return ins
+    return None
+
+
+def _has_number(text: str) -> bool:
+    """content 是否含 ≥1 个具体数字或百分比。"""
+    return bool(re.search(r'\d+(\.\d+)?%?', text))
+
+
+def analyze_round(parsed: dict | None) -> dict:
+    """分析单次返回的内容质量。返回 17 项指标。"""
+    metrics: dict = {
+        # 基础
+        'count': 0,
+        'seq_complete': False,
+        'has_number_rate': 0.0,       # 每条含数字的比例（目标 100%）
+        'avg_content_len': 0,
+        # 板块 A
+        'A_unit_econ_ref': False,      # seq 1-2 中引用单位经济字段（客单价/会员占比/日均订单）
+        'A_env_bi_ref': False,         # 引用 _环比 字段的真实值
+        'A_calib_ref': False,          # 引用对比口径（"同天数对齐"/"同期"）
+        # 板块 B
+        'B_top_source': False,         # 点明最大优惠来源
+        'B_manual_violation': False,   # 违规：说了"抹零 X 元" / "免单 X 元"
+        # 板块 C
+        'C_balance_change_ref': False, # 引用"储值卡余额变化"字段（期初/期末/其他调整）
+        # 板块 D
+        'D_labor_ratio_ref': False,    # 人力成本占比
+        'D_zero_expense_flag': False,  # 标注支出 0 或数据缺失
+        # 板块 E
+        'E_weekday_ratio': False,      # seq 9 含旺/淡倍率（"X 倍" / "X.XX 倍"）
+        'E_anomaly_baseline': False,   # seq 10 标注基线类型（"同周" / "期均" / "基线"）
+        # 板块 F
+        'F_light': 'unknown',
+        'F_top2_reasons': False,       # seq 11 列 ≥2 原因（1)...2)... / 原因一...原因二）
+        'F_tracking_trigger': False,   # seq 12 含跟踪节奏 + 触发动作
+        # 违规
+        'V_industry_number': 0,        # 编造行业数字提及次数
+        'V_speculation': 0,            # 单期推测（未引用 _环比 却说"提升/下降/显著"）
+    }
+    if not parsed:
+        return metrics
+    insights = parsed.get('insights') or []
+    if not isinstance(insights, list):
+        return metrics
+    metrics['count'] = len(insights)
+    seqs = [ins.get('seq') for ins in insights if isinstance(ins, dict)]
+    metrics['seq_complete'] = sorted([s for s in seqs if isinstance(s, int)]) == list(range(1, 13))
+
+    total_len = 0
+    with_number = 0
+    for ins in insights:
+        if not isinstance(ins, dict):
+            continue
+        body = (ins.get('content') or '')
+        total_len += len(body)
+        if _has_number(body):
+            with_number += 1
+    if insights:
+        metrics['has_number_rate'] = round(with_number / len(insights), 2)
+        metrics['avg_content_len'] = round(total_len / len(insights))
+
+    # 板块 A
+    a_texts = ' '.join((_get_seq(insights, s) or {}).get('content') or '' for s in (1, 2))
+    metrics['A_unit_econ_ref'] = any(kw in a_texts for kw in ('客单价', '会员订单占比', '会员占比', '日均订单'))
+    metrics['A_env_bi_ref'] = '_环比' in a_texts or bool(re.search(r'环比[^字段][^"]*?[+-]?\d+\.?\d*%', a_texts))
+    metrics['A_calib_ref'] = any(kw in a_texts for kw in ('对比口径', '同天数对齐', '同期', '同日数', '截断到', '对比期'))
+
+    # 板块 B
+    b_texts = ' '.join((_get_seq(insights, s) or {}).get('content') or '' for s in (3, 4))
+    metrics['B_top_source'] = any(kw in b_texts for kw in ('最大', '主导', '占比最高', '占比超', '团购优惠', '主要来源'))
+    # 违规：直接说"抹零 X 元"/"免单 X 元"（不是说"抹零/免单"这个类目名）
+    metrics['B_manual_violation'] = bool(re.search(r'(抹零|免单)\s*\d+[\.\d]*\s*元', b_texts))
+
+    # 板块 C
+    c_texts = ' '.join((_get_seq(insights, s) or {}).get('content') or '' for s in (5, 6))
+    metrics['C_balance_change_ref'] = any(kw in c_texts for kw in ('期初', '期末', '余额变化', '其他调整', '非充值/消耗'))
+
+    # 板块 D
+    d_texts = ' '.join((_get_seq(insights, s) or {}).get('content') or '' for s in (7, 8))
+    metrics['D_labor_ratio_ref'] = any(kw in d_texts for kw in ('人力成本', '助教成本', '占成交收入', '占比'))
+    metrics['D_zero_expense_flag'] = any(kw in d_texts for kw in ('支出为 0', '支出全 0', '支出全0', '支出为0', '0 元', '0元', '数据缺失', '数据不完整', '数据完整性', '未录入'))
+
+    # 板块 E
+    seq9 = (_get_seq(insights, 9) or {}).get('content') or ''
+    seq10 = (_get_seq(insights, 10) or {}).get('content') or ''
+    metrics['E_weekday_ratio'] = bool(re.search(r'\d+\.?\d*\s*倍|比.*\d+\.?\d*', seq9))
+    metrics['E_anomaly_baseline'] = any(kw in seq10 for kw in ('同周', '期均', '基线', '同星期'))
+
+    # 板块 F
+    seq11 = (_get_seq(insights, 11) or {}).get('content') or ''
+    seq12 = (_get_seq(insights, 12) or {}).get('content') or ''
+    if re.search(r'🔴|红灯', seq11):
+        metrics['F_light'] = 'red'
+    elif re.search(r'🟡|黄灯', seq11):
+        metrics['F_light'] = 'yellow'
+    elif re.search(r'🟢|绿灯', seq11):
+        metrics['F_light'] = 'green'
+    # 匹配 "原因1:" / "原因 1：" / "1)" / "1." / "1、" / "①" / "原因一" / "其一"
+    metrics['F_top2_reasons'] = bool(re.search(r'原因\s*1|1\s*[\)）\.、:：]|①|原因一|其一', seq11)) and \
+                                 bool(re.search(r'原因\s*2|2\s*[\)）\.、:：]|②|原因二|其二', seq11))
+    metrics['F_tracking_trigger'] = any(kw in seq12 for kw in ('启动', '触发', '召回', '立即')) and \
+                                    bool(re.search(r'(每周|每月|每日|每天|每\s*\d+|周期性|定期)', seq12)) and \
+                                    bool(re.search(r'<|>|≥|≤|低于|超过|达到|阈值', seq12))
+
+    # 违规：行业数字（payload 只提供"周中客流规律"，其他均禁）
+    # 典型措辞："行业警戒线" "行业均值" "行业标准" "行业参考" + 数字
+    all_text = ' '.join((ins.get('content') or '') for ins in insights if isinstance(ins, dict))
+    metrics['V_industry_number'] = len(re.findall(r'行业(警戒线|均值|标准|参考值|基线|基准|水平|经验值|通常|一般)[^，。；,]*\d+\.?\d*%?', all_text))
+
+    # 违规：单期推测（句子含"提升/下降/显著增长/大幅"等但未引用 _环比 字段值）
+    # 启发式：句子中有"提升/下降/大幅/明显/显著"但句子内没有带 % 的数字
+    speculation_hits = 0
+    for sent in re.split(r'[。；\n]', all_text):
+        if not sent.strip():
+            continue
+        has_trend_word = bool(re.search(r'(提升|下降|上升|下滑|显著|大幅|明显)', sent))
+        has_pct_number = bool(re.search(r'[+-]?\d+\.?\d*%', sent))
+        if has_trend_word and not has_pct_number:
+            # 允许"不推测"/"禁止推测"这类元指令
+            if re.search(r'(推测|不|禁)', sent):
+                continue
+            speculation_hits += 1
+    metrics['V_speculation'] = speculation_hits
+
+    return metrics
+
+
+def summarize(rounds: list[dict], label: str) -> dict:
+    """聚合 10 次的内容质量分布。"""
+    per = [analyze_round(r.get('parsed')) for r in rounds]
+    # 汇总
+    def _rate(key: str) -> float:
+        vals = [1 if p.get(key) else 0 for p in per]
+        return round(sum(vals) / len(vals), 2) if vals else 0.0
+
+    def _avg(key: str) -> float:
+        vals = [p.get(key, 0) for p in per]
+        return round(mean(vals), 2) if vals else 0.0
+
+    lights: dict[str, int] = {}
+    for p in per:
+        l = p.get('F_light', 'unknown')
+        lights[l] = lights.get(l, 0) + 1
+
+    summary = {
+        'label': label,
+        'n': len(rounds),
+        'rates': {
+            'seq_complete': _rate('seq_complete'),
+            'has_number': _avg('has_number_rate'),
+            'A_unit_econ_ref': _rate('A_unit_econ_ref'),
+            'A_env_bi_ref': _rate('A_env_bi_ref'),
+            'A_calib_ref': _rate('A_calib_ref'),
+            'B_top_source': _rate('B_top_source'),
+            'C_balance_change_ref': _rate('C_balance_change_ref'),
+            'D_labor_ratio_ref': _rate('D_labor_ratio_ref'),
+            'D_zero_expense_flag': _rate('D_zero_expense_flag'),
+            'E_weekday_ratio': _rate('E_weekday_ratio'),
+            'E_anomaly_baseline': _rate('E_anomaly_baseline'),
+            'F_top2_reasons': _rate('F_top2_reasons'),
+            'F_tracking_trigger': _rate('F_tracking_trigger'),
+        },
+        'violations': {
+            'B_manual': sum(1 for p in per if p.get('B_manual_violation')),
+            'industry_number_total': sum(p.get('V_industry_number', 0) for p in per),
+            'speculation_total': sum(p.get('V_speculation', 0) for p in per),
+        },
+        'light_distribution': lights,
+        'avg_content_len': _avg('avg_content_len'),
+        'per_round': per,
+    }
+    return summary
+
+
+def print_summary(s: dict) -> None:
+    print(f"\n=== Round {s['label'].upper()} 内容质量汇总（n={s['n']}）===")
+    print(f"  结构完整性:")
+    print(f"    seq 1-12 完整率:     {s['rates']['seq_complete']:.0%}")
+    print(f"    每条含数字比例:       {s['rates']['has_number']:.0%}")
+    print(f"    平均 content 字数:    {s['avg_content_len']:.0f}")
+    print(f"  板块 A · 收入:")
+    print(f"    引用单位经济字段:     {s['rates']['A_unit_econ_ref']:.0%}")
+    print(f"    引用 _环比 真实值:    {s['rates']['A_env_bi_ref']:.0%}")
+    print(f"    引用对比口径:         {s['rates']['A_calib_ref']:.0%}  ★ v4 新增规则的关键指标")
+    print(f"  板块 B · 优惠:")
+    print(f"    点明最大来源:         {s['rates']['B_top_source']:.0%}")
+    print(f"  板块 C · 储值卡:")
+    print(f"    引用余额变化字段:     {s['rates']['C_balance_change_ref']:.0%}")
+    print(f"  板块 D · 成本:")
+    print(f"    引用人力成本占比:     {s['rates']['D_labor_ratio_ref']:.0%}")
+    print(f"    标注 0 支出/数据缺失: {s['rates']['D_zero_expense_flag']:.0%}")
+    print(f"  板块 E · 时间规律:")
+    print(f"    seq 9 含旺/淡倍率:    {s['rates']['E_weekday_ratio']:.0%}")
+    print(f"    seq 10 标注基线类型:  {s['rates']['E_anomaly_baseline']:.0%}")
+    print(f"  板块 F · 综合:")
+    print(f"    三色灯分布:           {s['light_distribution']}")
+    print(f"    seq 11 列 top 2 原因: {s['rates']['F_top2_reasons']:.0%}")
+    print(f"    seq 12 节奏+触发+阈值:{s['rates']['F_tracking_trigger']:.0%}")
+    print(f"  违规统计（越低越好）:")
+    print(f"    手动调整违规次数:     {s['violations']['B_manual']} / {s['n']}")
+    print(f"    行业数字编造总计:     {s['violations']['industry_number_total']}")
+    print(f"    单期推测总计:         {s['violations']['speculation_total']}")
+
+
+def print_compare(*summaries: dict) -> None:
+    labels = [s['label'].upper() for s in summaries]
+    header = ' vs '.join(labels)
+    print(f"\n======= {header} 多方对比表 =======")
+    col_w = 10
+    print(f"{'指标':<34}" + ''.join(f"{l:>{col_w}}" for l in labels))
+    print('-' * (34 + col_w * len(labels)))
+
+    def _row(name: str, values: list, fmt: str = 'percent') -> None:
+        cells = []
+        for v in values:
+            if fmt == 'percent':
+                cells.append(f'{v:.0%}')
+            else:
+                cells.append(str(v))
+        print(f"{name:<34}" + ''.join(f"{c:>{col_w}}" for c in cells))
+
+    for k, name in (
+        ('seq_complete',         'seq 1-12 完整率'),
+        ('has_number',           '每条含数字比例'),
+        ('A_unit_econ_ref',      'A 引用单位经济字段'),
+        ('A_env_bi_ref',         'A 引用 _环比 真实值'),
+        ('A_calib_ref',          'A 引用对比口径 ★'),
+        ('B_top_source',         'B 点明最大优惠来源'),
+        ('C_balance_change_ref', 'C 引用余额变化字段'),
+        ('D_labor_ratio_ref',    'D 引用人力成本占比'),
+        ('D_zero_expense_flag',  'D 标注 0 支出'),
+        ('E_weekday_ratio',      'E seq 9 含倍率'),
+        ('E_anomaly_baseline',   'E seq 10 标注基线'),
+        ('F_top2_reasons',       'F seq 11 列 top 2 原因 ★'),
+        ('F_tracking_trigger',   'F seq 12 节奏+触发+阈值'),
+    ):
+        _row(name, [s['rates'][k] for s in summaries], 'percent')
+
+    print('-' * (34 + col_w * len(summaries)))
+    print('违规次数（越低越好）:')
+    _row('  手动调整违规',       [s['violations']['B_manual'] for s in summaries], 'int')
+    _row('  行业数字编造',       [s['violations']['industry_number_total'] for s in summaries], 'int')
+    _row('  单期推测',           [s['violations']['speculation_total'] for s in summaries], 'int')
+    print('-' * (34 + col_w * len(summaries)))
+    _row('  平均字数',           [f"{s['avg_content_len']:.0f}" for s in summaries], 'int')
+    _row('  样本数',             [s['n'] for s in summaries], 'int')
+    print()
+    for s in summaries:
+        print(f"  {s['label'].upper()} 三色灯分布: {s['light_distribution']}")
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--dir', help='单目录分析：如 export/ai-ab-test/round_a')
+    parser.add_argument('--compare', nargs='+', metavar='DIR', help='多轮对比（2-4 个目录）')
+    args = parser.parse_args()
+
+    if args.compare:
+        dirs = [Path(d) for d in args.compare]
+        if len(dirs) < 2:
+            sys.exit('--compare 至少 2 个目录')
+        summaries = []
+        for d in dirs:
+            rounds = _iter_rounds(d)
+            if not rounds:
+                sys.exit(f'目录无 round_*.json：{d}')
+            label = d.name.replace('round_', '')
+            summaries.append(summarize(rounds, label))
+        for s in summaries:
+            print_summary(s)
+        print_compare(*summaries)
+        # 存档对比 JSON
+        tag = '_'.join(s['label'] for s in summaries)
+        out_path = Path(f'export/ai-ab-test/_compare_{tag}.json')
+        out_path.write_text(json.dumps({s['label']: s for s in summaries}, ensure_ascii=False, indent=2), encoding='utf-8')
+        print(f'\n[done] 对比 JSON 已存: {out_path}')
+    elif args.dir:
+        rounds = _iter_rounds(Path(args.dir))
+        if not rounds:
+            sys.exit(f'目录无 round_*.json：{args.dir}')
+        label = Path(args.dir).name.replace('round_', '')
+        s = summarize(rounds, label)
+        print_summary(s)
+    else:
+        parser.error('需指定 --dir 或 --compare')
+
+
+if __name__ == '__main__':
+    main()