feat: 2026-04-15~05-02 累积变更基线 — AI 重构 + Runtime Context + DWS 修复

涵盖(每条对应已存的审计记录):
- AI 模块拆分:apps/backend/app/ai/apps -> prompts/(8 个 APP + app2a 派生)
  audit: 2026-04-20__ai-module-complete.md
- admin-web AI 管理套件:AIDashboard / AIOperations / AIRunLogs / AITriggers / TriggerManager
  audit: 2026-04-21__admin-web-ai-management-suite.md
- App2 财务洞察 prompt v3 -> v5.1 + 小程序 AI 接入(chat / board-finance)
  audit: 2026-04-22__app2_prompt_v5_1_and_miniprogram_ai_insight.md
- App2 prewarm 全过滤器 + AI 触发器 cron reschedule
  audit: 2026-04-21__app2-finance-prewarm-all-filters.md
  migration: 20260420_ai_trigger_jobs_and_app2_prewarm.sql / 20260421_app2_prewarm_cron_reschedule.sql
- AppType 联合类型对齐 + adminAiAppTypes.test.ts
  audit: 2026-04-30__admin_web_ai_app_type_alignment.md
- DashScope tokens_used 提取修复
  audit: 2026-04-30__backend_dashscope_tokens_used_extraction.md
- App3 线索完整详情 prompt
  audit: 2026-05-01__backend_app3_full_detail_prompt.md
- Runtime Context 沙箱(5-1~5-2 主线):
  - 后端 schema/service + admin_runtime_context / xcx_runtime_clock 两个 router
  - admin-web RuntimeContext.tsx + miniprogram runtime-clock.ts
  - migration: 20260501__runtime_context_sandbox.sql
  - tools/db/verify_admin_web_sandbox.py + verify_sandbox_end_to_end.py
  - database/changes: 7 份 sandbox_* 验证报告
- 飞球 DWS 修复:finance_area_daily 区域汇总 + task_engine 调整
  + RLS 视图业务日上界(migration 20260502 + scripts/ops/gen_rls_business_date_migration.py)

合规:
- .gitignore 启用 tmp/ 排除
- 不入仓:apps/etl/connectors/feiqiu/.env(API_TOKEN secret,本地修改保留)

待验证清单:
- docs/audit/changes/2026-05-04__cumulative_baseline_pending_verification.md
  每个主题的功能完整性 / 上线验证几乎都未收口,按优先级 P0~P3 逐一处理
This commit is contained in:
Neo
2026-05-04 02:30:19 +08:00
parent 2010034840
commit caf179a5da
130 changed files with 14543 additions and 2717 deletions

View File

@@ -0,0 +1,336 @@
"""App2 A/B 测试 · 内容质量深度分析器。
围绕"实际信息质量"评估,非表现形式(加粗/时长)。
分析维度(按板块):
- 板块 Aseq 1-2客单价环比是否原字段引用非推测、是否识别"对比口径"
- 板块 Bseq 3-4最大优惠来源是否点明、手动调整是否合规表述禁用"抹零/免单 X 元"
- 板块 Cseq 5-6是否引用权威字段"储值卡余额变化"(期初/期末/其他调整)
- 板块 Dseq 7-8支出完整性 + 人力成本占比
- 板块 Eseq 9-10seq 9 是否含"旺淡倍率"seq 10 是否标"同周X均值/期均"基线
- 板块 Fseq 11-12seq 11 三色灯 + top 2 原因seq 12 跟踪节奏 + 触发动作 + 阈值
违规检测:
- 禁用行业数字payload 未提供):警戒线/均值/参考值/30%/40% 等无锚点百分比
- 禁用单期推测("提升/下降/显著增长"但未引用 _环比
- 编造字段payload 不含的字段名)
用法:
PYTHONIOENCODING=utf-8 .venv/Scripts/python.exe scripts/analyze_ab_content_quality.py --dir export/ai-ab-test/round_a
# 对比两轮:
PYTHONIOENCODING=utf-8 .venv/Scripts/python.exe scripts/analyze_ab_content_quality.py --compare export/ai-ab-test/round_a export/ai-ab-test/round_b
"""
from __future__ import annotations
import argparse
import json
import re
import sys
from pathlib import Path
from statistics import mean, stdev
def _iter_rounds(dir_path: Path) -> list[dict]:
"""读取目录下 round_XX.json 按 round_idx 排序。"""
files = sorted(dir_path.glob('round_*.json'))
out = []
for f in files:
data = json.loads(f.read_text(encoding='utf-8'))
data['_file'] = f.name
out.append(data)
return out
def _get_seq(insights: list, seq: int) -> dict | None:
for ins in insights:
if isinstance(ins, dict) and ins.get('seq') == seq:
return ins
return None
def _has_number(text: str) -> bool:
"""content 是否含 ≥1 个具体数字或百分比。"""
return bool(re.search(r'\d+(\.\d+)?%?', text))
def analyze_round(parsed: dict | None) -> dict:
"""分析单次返回的内容质量。返回 17 项指标。"""
metrics: dict = {
# 基础
'count': 0,
'seq_complete': False,
'has_number_rate': 0.0, # 每条含数字的比例(目标 100%
'avg_content_len': 0,
# 板块 A
'A_unit_econ_ref': False, # seq 1-2 中引用单位经济字段(客单价/会员占比/日均订单)
'A_env_bi_ref': False, # 引用 _环比 字段的真实值
'A_calib_ref': False, # 引用对比口径("同天数对齐"/"同期"
# 板块 B
'B_top_source': False, # 点明最大优惠来源
'B_manual_violation': False, # 违规:说了"抹零 X 元" / "免单 X 元"
# 板块 C
'C_balance_change_ref': False, # 引用"储值卡余额变化"字段(期初/期末/其他调整)
# 板块 D
'D_labor_ratio_ref': False, # 人力成本占比
'D_zero_expense_flag': False, # 标注支出 0 或数据缺失
# 板块 E
'E_weekday_ratio': False, # seq 9 含旺/淡倍率("X 倍" / "X.XX 倍"
'E_anomaly_baseline': False, # seq 10 标注基线类型("同周" / "期均" / "基线"
# 板块 F
'F_light': 'unknown',
'F_top2_reasons': False, # seq 11 列 ≥2 原因1)...2)... / 原因一...原因二)
'F_tracking_trigger': False, # seq 12 含跟踪节奏 + 触发动作
# 违规
'V_industry_number': 0, # 编造行业数字提及次数
'V_speculation': 0, # 单期推测(未引用 _环比 却说"提升/下降/显著"
}
if not parsed:
return metrics
insights = parsed.get('insights') or []
if not isinstance(insights, list):
return metrics
metrics['count'] = len(insights)
seqs = [ins.get('seq') for ins in insights if isinstance(ins, dict)]
metrics['seq_complete'] = sorted([s for s in seqs if isinstance(s, int)]) == list(range(1, 13))
total_len = 0
with_number = 0
for ins in insights:
if not isinstance(ins, dict):
continue
body = (ins.get('content') or '')
total_len += len(body)
if _has_number(body):
with_number += 1
if insights:
metrics['has_number_rate'] = round(with_number / len(insights), 2)
metrics['avg_content_len'] = round(total_len / len(insights))
# 板块 A
a_texts = ' '.join((_get_seq(insights, s) or {}).get('content') or '' for s in (1, 2))
metrics['A_unit_econ_ref'] = any(kw in a_texts for kw in ('客单价', '会员订单占比', '会员占比', '日均订单'))
metrics['A_env_bi_ref'] = '_环比' in a_texts or bool(re.search(r'环比[^字段][^"]*?[+-]?\d+\.?\d*%', a_texts))
metrics['A_calib_ref'] = any(kw in a_texts for kw in ('对比口径', '同天数对齐', '同期', '同日数', '截断到', '对比期'))
# 板块 B
b_texts = ' '.join((_get_seq(insights, s) or {}).get('content') or '' for s in (3, 4))
metrics['B_top_source'] = any(kw in b_texts for kw in ('最大', '主导', '占比最高', '占比超', '团购优惠', '主要来源'))
# 违规:直接说"抹零 X 元"/"免单 X 元"(不是说"抹零/免单"这个类目名)
metrics['B_manual_violation'] = bool(re.search(r'(抹零|免单)\s*\d+[\.\d]*\s*元', b_texts))
# 板块 C
c_texts = ' '.join((_get_seq(insights, s) or {}).get('content') or '' for s in (5, 6))
metrics['C_balance_change_ref'] = any(kw in c_texts for kw in ('期初', '期末', '余额变化', '其他调整', '非充值/消耗'))
# 板块 D
d_texts = ' '.join((_get_seq(insights, s) or {}).get('content') or '' for s in (7, 8))
metrics['D_labor_ratio_ref'] = any(kw in d_texts for kw in ('人力成本', '助教成本', '占成交收入', '占比'))
metrics['D_zero_expense_flag'] = any(kw in d_texts for kw in ('支出为 0', '支出全 0', '支出全0', '支出为0', '0 元', '0元', '数据缺失', '数据不完整', '数据完整性', '未录入'))
# 板块 E
seq9 = (_get_seq(insights, 9) or {}).get('content') or ''
seq10 = (_get_seq(insights, 10) or {}).get('content') or ''
metrics['E_weekday_ratio'] = bool(re.search(r'\d+\.?\d*\s*倍|比.*\d+\.?\d*', seq9))
metrics['E_anomaly_baseline'] = any(kw in seq10 for kw in ('同周', '期均', '基线', '同星期'))
# 板块 F
seq11 = (_get_seq(insights, 11) or {}).get('content') or ''
seq12 = (_get_seq(insights, 12) or {}).get('content') or ''
if re.search(r'🔴|红灯', seq11):
metrics['F_light'] = 'red'
elif re.search(r'🟡|黄灯', seq11):
metrics['F_light'] = 'yellow'
elif re.search(r'🟢|绿灯', seq11):
metrics['F_light'] = 'green'
# 匹配 "原因1:" / "原因 1" / "1)" / "1." / "1、" / "①" / "原因一" / "其一"
metrics['F_top2_reasons'] = bool(re.search(r'原因\s*1|1\s*[\)\.、:]|①|原因一|其一', seq11)) and \
bool(re.search(r'原因\s*2|2\s*[\)\.、:]|②|原因二|其二', seq11))
metrics['F_tracking_trigger'] = any(kw in seq12 for kw in ('启动', '触发', '召回', '立即')) and \
bool(re.search(r'(每周|每月|每日|每天|每\s*\d+|周期性|定期)', seq12)) and \
bool(re.search(r'<|>|≥|≤|低于|超过|达到|阈值', seq12))
# 违规行业数字payload 只提供"周中客流规律",其他均禁)
# 典型措辞:"行业警戒线" "行业均值" "行业标准" "行业参考" + 数字
all_text = ' '.join((ins.get('content') or '') for ins in insights if isinstance(ins, dict))
metrics['V_industry_number'] = len(re.findall(r'行业(警戒线|均值|标准|参考值|基线|基准|水平|经验值|通常|一般)[^,。;,]*\d+\.?\d*%?', all_text))
# 违规:单期推测(句子含"提升/下降/显著增长/大幅"等但未引用 _环比 字段值)
# 启发式:句子中有"提升/下降/大幅/明显/显著"但句子内没有带 % 的数字
speculation_hits = 0
for sent in re.split(r'[。;\n]', all_text):
if not sent.strip():
continue
has_trend_word = bool(re.search(r'(提升|下降|上升|下滑|显著|大幅|明显)', sent))
has_pct_number = bool(re.search(r'[+-]?\d+\.?\d*%', sent))
if has_trend_word and not has_pct_number:
# 允许"不推测"/"禁止推测"这类元指令
if re.search(r'(推测|不|禁)', sent):
continue
speculation_hits += 1
metrics['V_speculation'] = speculation_hits
return metrics
def summarize(rounds: list[dict], label: str) -> dict:
"""聚合 10 次的内容质量分布。"""
per = [analyze_round(r.get('parsed')) for r in rounds]
# 汇总
def _rate(key: str) -> float:
vals = [1 if p.get(key) else 0 for p in per]
return round(sum(vals) / len(vals), 2) if vals else 0.0
def _avg(key: str) -> float:
vals = [p.get(key, 0) for p in per]
return round(mean(vals), 2) if vals else 0.0
lights: dict[str, int] = {}
for p in per:
l = p.get('F_light', 'unknown')
lights[l] = lights.get(l, 0) + 1
summary = {
'label': label,
'n': len(rounds),
'rates': {
'seq_complete': _rate('seq_complete'),
'has_number': _avg('has_number_rate'),
'A_unit_econ_ref': _rate('A_unit_econ_ref'),
'A_env_bi_ref': _rate('A_env_bi_ref'),
'A_calib_ref': _rate('A_calib_ref'),
'B_top_source': _rate('B_top_source'),
'C_balance_change_ref': _rate('C_balance_change_ref'),
'D_labor_ratio_ref': _rate('D_labor_ratio_ref'),
'D_zero_expense_flag': _rate('D_zero_expense_flag'),
'E_weekday_ratio': _rate('E_weekday_ratio'),
'E_anomaly_baseline': _rate('E_anomaly_baseline'),
'F_top2_reasons': _rate('F_top2_reasons'),
'F_tracking_trigger': _rate('F_tracking_trigger'),
},
'violations': {
'B_manual': sum(1 for p in per if p.get('B_manual_violation')),
'industry_number_total': sum(p.get('V_industry_number', 0) for p in per),
'speculation_total': sum(p.get('V_speculation', 0) for p in per),
},
'light_distribution': lights,
'avg_content_len': _avg('avg_content_len'),
'per_round': per,
}
return summary
def print_summary(s: dict) -> None:
print(f"\n=== Round {s['label'].upper()} 内容质量汇总n={s['n']}===")
print(f" 结构完整性:")
print(f" seq 1-12 完整率: {s['rates']['seq_complete']:.0%}")
print(f" 每条含数字比例: {s['rates']['has_number']:.0%}")
print(f" 平均 content 字数: {s['avg_content_len']:.0f}")
print(f" 板块 A · 收入:")
print(f" 引用单位经济字段: {s['rates']['A_unit_econ_ref']:.0%}")
print(f" 引用 _环比 真实值: {s['rates']['A_env_bi_ref']:.0%}")
print(f" 引用对比口径: {s['rates']['A_calib_ref']:.0%} ★ v4 新增规则的关键指标")
print(f" 板块 B · 优惠:")
print(f" 点明最大来源: {s['rates']['B_top_source']:.0%}")
print(f" 板块 C · 储值卡:")
print(f" 引用余额变化字段: {s['rates']['C_balance_change_ref']:.0%}")
print(f" 板块 D · 成本:")
print(f" 引用人力成本占比: {s['rates']['D_labor_ratio_ref']:.0%}")
print(f" 标注 0 支出/数据缺失: {s['rates']['D_zero_expense_flag']:.0%}")
print(f" 板块 E · 时间规律:")
print(f" seq 9 含旺/淡倍率: {s['rates']['E_weekday_ratio']:.0%}")
print(f" seq 10 标注基线类型: {s['rates']['E_anomaly_baseline']:.0%}")
print(f" 板块 F · 综合:")
print(f" 三色灯分布: {s['light_distribution']}")
print(f" seq 11 列 top 2 原因: {s['rates']['F_top2_reasons']:.0%}")
print(f" seq 12 节奏+触发+阈值:{s['rates']['F_tracking_trigger']:.0%}")
print(f" 违规统计(越低越好):")
print(f" 手动调整违规次数: {s['violations']['B_manual']} / {s['n']}")
print(f" 行业数字编造总计: {s['violations']['industry_number_total']}")
print(f" 单期推测总计: {s['violations']['speculation_total']}")
def print_compare(*summaries: dict) -> None:
labels = [s['label'].upper() for s in summaries]
header = ' vs '.join(labels)
print(f"\n======= {header} 多方对比表 =======")
col_w = 10
print(f"{'指标':<34}" + ''.join(f"{l:>{col_w}}" for l in labels))
print('-' * (34 + col_w * len(labels)))
def _row(name: str, values: list, fmt: str = 'percent') -> None:
cells = []
for v in values:
if fmt == 'percent':
cells.append(f'{v:.0%}')
else:
cells.append(str(v))
print(f"{name:<34}" + ''.join(f"{c:>{col_w}}" for c in cells))
for k, name in (
('seq_complete', 'seq 1-12 完整率'),
('has_number', '每条含数字比例'),
('A_unit_econ_ref', 'A 引用单位经济字段'),
('A_env_bi_ref', 'A 引用 _环比 真实值'),
('A_calib_ref', 'A 引用对比口径 ★'),
('B_top_source', 'B 点明最大优惠来源'),
('C_balance_change_ref', 'C 引用余额变化字段'),
('D_labor_ratio_ref', 'D 引用人力成本占比'),
('D_zero_expense_flag', 'D 标注 0 支出'),
('E_weekday_ratio', 'E seq 9 含倍率'),
('E_anomaly_baseline', 'E seq 10 标注基线'),
('F_top2_reasons', 'F seq 11 列 top 2 原因 ★'),
('F_tracking_trigger', 'F seq 12 节奏+触发+阈值'),
):
_row(name, [s['rates'][k] for s in summaries], 'percent')
print('-' * (34 + col_w * len(summaries)))
print('违规次数(越低越好):')
_row(' 手动调整违规', [s['violations']['B_manual'] for s in summaries], 'int')
_row(' 行业数字编造', [s['violations']['industry_number_total'] for s in summaries], 'int')
_row(' 单期推测', [s['violations']['speculation_total'] for s in summaries], 'int')
print('-' * (34 + col_w * len(summaries)))
_row(' 平均字数', [f"{s['avg_content_len']:.0f}" for s in summaries], 'int')
_row(' 样本数', [s['n'] for s in summaries], 'int')
print()
for s in summaries:
print(f" {s['label'].upper()} 三色灯分布: {s['light_distribution']}")
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument('--dir', help='单目录分析:如 export/ai-ab-test/round_a')
parser.add_argument('--compare', nargs='+', metavar='DIR', help='多轮对比2-4 个目录)')
args = parser.parse_args()
if args.compare:
dirs = [Path(d) for d in args.compare]
if len(dirs) < 2:
sys.exit('--compare 至少 2 个目录')
summaries = []
for d in dirs:
rounds = _iter_rounds(d)
if not rounds:
sys.exit(f'目录无 round_*.json{d}')
label = d.name.replace('round_', '')
summaries.append(summarize(rounds, label))
for s in summaries:
print_summary(s)
print_compare(*summaries)
# 存档对比 JSON
tag = '_'.join(s['label'] for s in summaries)
out_path = Path(f'export/ai-ab-test/_compare_{tag}.json')
out_path.write_text(json.dumps({s['label']: s for s in summaries}, ensure_ascii=False, indent=2), encoding='utf-8')
print(f'\n[done] 对比 JSON 已存: {out_path}')
elif args.dir:
rounds = _iter_rounds(Path(args.dir))
if not rounds:
sys.exit(f'目录无 round_*.json{args.dir}')
label = Path(args.dir).name.replace('round_', '')
s = summarize(rounds, label)
print_summary(s)
else:
parser.error('需指定 --dir 或 --compare')
if __name__ == '__main__':
main()