feat: 2026-04-15~05-02 累积变更基线 — AI 重构 + Runtime Context + DWS 修复
涵盖(每条对应已存的审计记录): - AI 模块拆分:apps/backend/app/ai/apps -> prompts/(8 个 APP + app2a 派生) audit: 2026-04-20__ai-module-complete.md - admin-web AI 管理套件:AIDashboard / AIOperations / AIRunLogs / AITriggers / TriggerManager audit: 2026-04-21__admin-web-ai-management-suite.md - App2 财务洞察 prompt v3 -> v5.1 + 小程序 AI 接入(chat / board-finance) audit: 2026-04-22__app2_prompt_v5_1_and_miniprogram_ai_insight.md - App2 prewarm 全过滤器 + AI 触发器 cron reschedule audit: 2026-04-21__app2-finance-prewarm-all-filters.md migration: 20260420_ai_trigger_jobs_and_app2_prewarm.sql / 20260421_app2_prewarm_cron_reschedule.sql - AppType 联合类型对齐 + adminAiAppTypes.test.ts audit: 2026-04-30__admin_web_ai_app_type_alignment.md - DashScope tokens_used 提取修复 audit: 2026-04-30__backend_dashscope_tokens_used_extraction.md - App3 线索完整详情 prompt audit: 2026-05-01__backend_app3_full_detail_prompt.md - Runtime Context 沙箱(5-1~5-2 主线): - 后端 schema/service + admin_runtime_context / xcx_runtime_clock 两个 router - admin-web RuntimeContext.tsx + miniprogram runtime-clock.ts - migration: 20260501__runtime_context_sandbox.sql - tools/db/verify_admin_web_sandbox.py + verify_sandbox_end_to_end.py - database/changes: 7 份 sandbox_* 验证报告 - 飞球 DWS 修复:finance_area_daily 区域汇总 + task_engine 调整 + RLS 视图业务日上界(migration 20260502 + scripts/ops/gen_rls_business_date_migration.py) 合规: - .gitignore 启用 tmp/ 排除 - 不入仓:apps/etl/connectors/feiqiu/.env(API_TOKEN secret,本地修改保留) 待验证清单: - docs/audit/changes/2026-05-04__cumulative_baseline_pending_verification.md 每个主题的功能完整性 / 上线验证几乎都未收口,按优先级 P0~P3 逐一处理
This commit is contained in:
336
scripts/analyze_ab_content_quality.py
Normal file
336
scripts/analyze_ab_content_quality.py
Normal file
@@ -0,0 +1,336 @@
|
||||
"""App2 A/B 测试 · 内容质量深度分析器。
|
||||
|
||||
围绕"实际信息质量"评估,非表现形式(加粗/时长)。
|
||||
|
||||
分析维度(按板块):
|
||||
- 板块 A(seq 1-2):客单价环比是否原字段引用(非推测)、是否识别"对比口径"
|
||||
- 板块 B(seq 3-4):最大优惠来源是否点明、手动调整是否合规表述(禁用"抹零/免单 X 元")
|
||||
- 板块 C(seq 5-6):是否引用权威字段"储值卡余额变化"(期初/期末/其他调整)
|
||||
- 板块 D(seq 7-8):支出完整性 + 人力成本占比
|
||||
- 板块 E(seq 9-10):seq 9 是否含"旺淡倍率";seq 10 是否标"同周X均值/期均"基线
|
||||
- 板块 F(seq 11-12):seq 11 三色灯 + top 2 原因;seq 12 跟踪节奏 + 触发动作 + 阈值
|
||||
|
||||
违规检测:
|
||||
- 禁用行业数字(payload 未提供):警戒线/均值/参考值/30%/40% 等无锚点百分比
|
||||
- 禁用单期推测("提升/下降/显著增长"但未引用 _环比)
|
||||
- 编造字段(payload 不含的字段名)
|
||||
|
||||
用法:
|
||||
PYTHONIOENCODING=utf-8 .venv/Scripts/python.exe scripts/analyze_ab_content_quality.py --dir export/ai-ab-test/round_a
|
||||
# 对比两轮:
|
||||
PYTHONIOENCODING=utf-8 .venv/Scripts/python.exe scripts/analyze_ab_content_quality.py --compare export/ai-ab-test/round_a export/ai-ab-test/round_b
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from statistics import mean, stdev
|
||||
|
||||
|
||||
def _iter_rounds(dir_path: Path) -> list[dict]:
|
||||
"""读取目录下 round_XX.json 按 round_idx 排序。"""
|
||||
files = sorted(dir_path.glob('round_*.json'))
|
||||
out = []
|
||||
for f in files:
|
||||
data = json.loads(f.read_text(encoding='utf-8'))
|
||||
data['_file'] = f.name
|
||||
out.append(data)
|
||||
return out
|
||||
|
||||
|
||||
def _get_seq(insights: list, seq: int) -> dict | None:
|
||||
for ins in insights:
|
||||
if isinstance(ins, dict) and ins.get('seq') == seq:
|
||||
return ins
|
||||
return None
|
||||
|
||||
|
||||
def _has_number(text: str) -> bool:
|
||||
"""content 是否含 ≥1 个具体数字或百分比。"""
|
||||
return bool(re.search(r'\d+(\.\d+)?%?', text))
|
||||
|
||||
|
||||
def analyze_round(parsed: dict | None) -> dict:
|
||||
"""分析单次返回的内容质量。返回 17 项指标。"""
|
||||
metrics: dict = {
|
||||
# 基础
|
||||
'count': 0,
|
||||
'seq_complete': False,
|
||||
'has_number_rate': 0.0, # 每条含数字的比例(目标 100%)
|
||||
'avg_content_len': 0,
|
||||
# 板块 A
|
||||
'A_unit_econ_ref': False, # seq 1-2 中引用单位经济字段(客单价/会员占比/日均订单)
|
||||
'A_env_bi_ref': False, # 引用 _环比 字段的真实值
|
||||
'A_calib_ref': False, # 引用对比口径("同天数对齐"/"同期")
|
||||
# 板块 B
|
||||
'B_top_source': False, # 点明最大优惠来源
|
||||
'B_manual_violation': False, # 违规:说了"抹零 X 元" / "免单 X 元"
|
||||
# 板块 C
|
||||
'C_balance_change_ref': False, # 引用"储值卡余额变化"字段(期初/期末/其他调整)
|
||||
# 板块 D
|
||||
'D_labor_ratio_ref': False, # 人力成本占比
|
||||
'D_zero_expense_flag': False, # 标注支出 0 或数据缺失
|
||||
# 板块 E
|
||||
'E_weekday_ratio': False, # seq 9 含旺/淡倍率("X 倍" / "X.XX 倍")
|
||||
'E_anomaly_baseline': False, # seq 10 标注基线类型("同周" / "期均" / "基线")
|
||||
# 板块 F
|
||||
'F_light': 'unknown',
|
||||
'F_top2_reasons': False, # seq 11 列 ≥2 原因(1)...2)... / 原因一...原因二)
|
||||
'F_tracking_trigger': False, # seq 12 含跟踪节奏 + 触发动作
|
||||
# 违规
|
||||
'V_industry_number': 0, # 编造行业数字提及次数
|
||||
'V_speculation': 0, # 单期推测(未引用 _环比 却说"提升/下降/显著")
|
||||
}
|
||||
if not parsed:
|
||||
return metrics
|
||||
insights = parsed.get('insights') or []
|
||||
if not isinstance(insights, list):
|
||||
return metrics
|
||||
metrics['count'] = len(insights)
|
||||
seqs = [ins.get('seq') for ins in insights if isinstance(ins, dict)]
|
||||
metrics['seq_complete'] = sorted([s for s in seqs if isinstance(s, int)]) == list(range(1, 13))
|
||||
|
||||
total_len = 0
|
||||
with_number = 0
|
||||
for ins in insights:
|
||||
if not isinstance(ins, dict):
|
||||
continue
|
||||
body = (ins.get('content') or '')
|
||||
total_len += len(body)
|
||||
if _has_number(body):
|
||||
with_number += 1
|
||||
if insights:
|
||||
metrics['has_number_rate'] = round(with_number / len(insights), 2)
|
||||
metrics['avg_content_len'] = round(total_len / len(insights))
|
||||
|
||||
# 板块 A
|
||||
a_texts = ' '.join((_get_seq(insights, s) or {}).get('content') or '' for s in (1, 2))
|
||||
metrics['A_unit_econ_ref'] = any(kw in a_texts for kw in ('客单价', '会员订单占比', '会员占比', '日均订单'))
|
||||
metrics['A_env_bi_ref'] = '_环比' in a_texts or bool(re.search(r'环比[^字段][^"]*?[+-]?\d+\.?\d*%', a_texts))
|
||||
metrics['A_calib_ref'] = any(kw in a_texts for kw in ('对比口径', '同天数对齐', '同期', '同日数', '截断到', '对比期'))
|
||||
|
||||
# 板块 B
|
||||
b_texts = ' '.join((_get_seq(insights, s) or {}).get('content') or '' for s in (3, 4))
|
||||
metrics['B_top_source'] = any(kw in b_texts for kw in ('最大', '主导', '占比最高', '占比超', '团购优惠', '主要来源'))
|
||||
# 违规:直接说"抹零 X 元"/"免单 X 元"(不是说"抹零/免单"这个类目名)
|
||||
metrics['B_manual_violation'] = bool(re.search(r'(抹零|免单)\s*\d+[\.\d]*\s*元', b_texts))
|
||||
|
||||
# 板块 C
|
||||
c_texts = ' '.join((_get_seq(insights, s) or {}).get('content') or '' for s in (5, 6))
|
||||
metrics['C_balance_change_ref'] = any(kw in c_texts for kw in ('期初', '期末', '余额变化', '其他调整', '非充值/消耗'))
|
||||
|
||||
# 板块 D
|
||||
d_texts = ' '.join((_get_seq(insights, s) or {}).get('content') or '' for s in (7, 8))
|
||||
metrics['D_labor_ratio_ref'] = any(kw in d_texts for kw in ('人力成本', '助教成本', '占成交收入', '占比'))
|
||||
metrics['D_zero_expense_flag'] = any(kw in d_texts for kw in ('支出为 0', '支出全 0', '支出全0', '支出为0', '0 元', '0元', '数据缺失', '数据不完整', '数据完整性', '未录入'))
|
||||
|
||||
# 板块 E
|
||||
seq9 = (_get_seq(insights, 9) or {}).get('content') or ''
|
||||
seq10 = (_get_seq(insights, 10) or {}).get('content') or ''
|
||||
metrics['E_weekday_ratio'] = bool(re.search(r'\d+\.?\d*\s*倍|比.*\d+\.?\d*', seq9))
|
||||
metrics['E_anomaly_baseline'] = any(kw in seq10 for kw in ('同周', '期均', '基线', '同星期'))
|
||||
|
||||
# 板块 F
|
||||
seq11 = (_get_seq(insights, 11) or {}).get('content') or ''
|
||||
seq12 = (_get_seq(insights, 12) or {}).get('content') or ''
|
||||
if re.search(r'🔴|红灯', seq11):
|
||||
metrics['F_light'] = 'red'
|
||||
elif re.search(r'🟡|黄灯', seq11):
|
||||
metrics['F_light'] = 'yellow'
|
||||
elif re.search(r'🟢|绿灯', seq11):
|
||||
metrics['F_light'] = 'green'
|
||||
# 匹配 "原因1:" / "原因 1:" / "1)" / "1." / "1、" / "①" / "原因一" / "其一"
|
||||
metrics['F_top2_reasons'] = bool(re.search(r'原因\s*1|1\s*[\))\.、::]|①|原因一|其一', seq11)) and \
|
||||
bool(re.search(r'原因\s*2|2\s*[\))\.、::]|②|原因二|其二', seq11))
|
||||
metrics['F_tracking_trigger'] = any(kw in seq12 for kw in ('启动', '触发', '召回', '立即')) and \
|
||||
bool(re.search(r'(每周|每月|每日|每天|每\s*\d+|周期性|定期)', seq12)) and \
|
||||
bool(re.search(r'<|>|≥|≤|低于|超过|达到|阈值', seq12))
|
||||
|
||||
# 违规:行业数字(payload 只提供"周中客流规律",其他均禁)
|
||||
# 典型措辞:"行业警戒线" "行业均值" "行业标准" "行业参考" + 数字
|
||||
all_text = ' '.join((ins.get('content') or '') for ins in insights if isinstance(ins, dict))
|
||||
metrics['V_industry_number'] = len(re.findall(r'行业(警戒线|均值|标准|参考值|基线|基准|水平|经验值|通常|一般)[^,。;,]*\d+\.?\d*%?', all_text))
|
||||
|
||||
# 违规:单期推测(句子含"提升/下降/显著增长/大幅"等但未引用 _环比 字段值)
|
||||
# 启发式:句子中有"提升/下降/大幅/明显/显著"但句子内没有带 % 的数字
|
||||
speculation_hits = 0
|
||||
for sent in re.split(r'[。;\n]', all_text):
|
||||
if not sent.strip():
|
||||
continue
|
||||
has_trend_word = bool(re.search(r'(提升|下降|上升|下滑|显著|大幅|明显)', sent))
|
||||
has_pct_number = bool(re.search(r'[+-]?\d+\.?\d*%', sent))
|
||||
if has_trend_word and not has_pct_number:
|
||||
# 允许"不推测"/"禁止推测"这类元指令
|
||||
if re.search(r'(推测|不|禁)', sent):
|
||||
continue
|
||||
speculation_hits += 1
|
||||
metrics['V_speculation'] = speculation_hits
|
||||
|
||||
return metrics
|
||||
|
||||
|
||||
def summarize(rounds: list[dict], label: str) -> dict:
|
||||
"""聚合 10 次的内容质量分布。"""
|
||||
per = [analyze_round(r.get('parsed')) for r in rounds]
|
||||
# 汇总
|
||||
def _rate(key: str) -> float:
|
||||
vals = [1 if p.get(key) else 0 for p in per]
|
||||
return round(sum(vals) / len(vals), 2) if vals else 0.0
|
||||
|
||||
def _avg(key: str) -> float:
|
||||
vals = [p.get(key, 0) for p in per]
|
||||
return round(mean(vals), 2) if vals else 0.0
|
||||
|
||||
lights: dict[str, int] = {}
|
||||
for p in per:
|
||||
l = p.get('F_light', 'unknown')
|
||||
lights[l] = lights.get(l, 0) + 1
|
||||
|
||||
summary = {
|
||||
'label': label,
|
||||
'n': len(rounds),
|
||||
'rates': {
|
||||
'seq_complete': _rate('seq_complete'),
|
||||
'has_number': _avg('has_number_rate'),
|
||||
'A_unit_econ_ref': _rate('A_unit_econ_ref'),
|
||||
'A_env_bi_ref': _rate('A_env_bi_ref'),
|
||||
'A_calib_ref': _rate('A_calib_ref'),
|
||||
'B_top_source': _rate('B_top_source'),
|
||||
'C_balance_change_ref': _rate('C_balance_change_ref'),
|
||||
'D_labor_ratio_ref': _rate('D_labor_ratio_ref'),
|
||||
'D_zero_expense_flag': _rate('D_zero_expense_flag'),
|
||||
'E_weekday_ratio': _rate('E_weekday_ratio'),
|
||||
'E_anomaly_baseline': _rate('E_anomaly_baseline'),
|
||||
'F_top2_reasons': _rate('F_top2_reasons'),
|
||||
'F_tracking_trigger': _rate('F_tracking_trigger'),
|
||||
},
|
||||
'violations': {
|
||||
'B_manual': sum(1 for p in per if p.get('B_manual_violation')),
|
||||
'industry_number_total': sum(p.get('V_industry_number', 0) for p in per),
|
||||
'speculation_total': sum(p.get('V_speculation', 0) for p in per),
|
||||
},
|
||||
'light_distribution': lights,
|
||||
'avg_content_len': _avg('avg_content_len'),
|
||||
'per_round': per,
|
||||
}
|
||||
return summary
|
||||
|
||||
|
||||
def print_summary(s: dict) -> None:
|
||||
print(f"\n=== Round {s['label'].upper()} 内容质量汇总(n={s['n']})===")
|
||||
print(f" 结构完整性:")
|
||||
print(f" seq 1-12 完整率: {s['rates']['seq_complete']:.0%}")
|
||||
print(f" 每条含数字比例: {s['rates']['has_number']:.0%}")
|
||||
print(f" 平均 content 字数: {s['avg_content_len']:.0f}")
|
||||
print(f" 板块 A · 收入:")
|
||||
print(f" 引用单位经济字段: {s['rates']['A_unit_econ_ref']:.0%}")
|
||||
print(f" 引用 _环比 真实值: {s['rates']['A_env_bi_ref']:.0%}")
|
||||
print(f" 引用对比口径: {s['rates']['A_calib_ref']:.0%} ★ v4 新增规则的关键指标")
|
||||
print(f" 板块 B · 优惠:")
|
||||
print(f" 点明最大来源: {s['rates']['B_top_source']:.0%}")
|
||||
print(f" 板块 C · 储值卡:")
|
||||
print(f" 引用余额变化字段: {s['rates']['C_balance_change_ref']:.0%}")
|
||||
print(f" 板块 D · 成本:")
|
||||
print(f" 引用人力成本占比: {s['rates']['D_labor_ratio_ref']:.0%}")
|
||||
print(f" 标注 0 支出/数据缺失: {s['rates']['D_zero_expense_flag']:.0%}")
|
||||
print(f" 板块 E · 时间规律:")
|
||||
print(f" seq 9 含旺/淡倍率: {s['rates']['E_weekday_ratio']:.0%}")
|
||||
print(f" seq 10 标注基线类型: {s['rates']['E_anomaly_baseline']:.0%}")
|
||||
print(f" 板块 F · 综合:")
|
||||
print(f" 三色灯分布: {s['light_distribution']}")
|
||||
print(f" seq 11 列 top 2 原因: {s['rates']['F_top2_reasons']:.0%}")
|
||||
print(f" seq 12 节奏+触发+阈值:{s['rates']['F_tracking_trigger']:.0%}")
|
||||
print(f" 违规统计(越低越好):")
|
||||
print(f" 手动调整违规次数: {s['violations']['B_manual']} / {s['n']}")
|
||||
print(f" 行业数字编造总计: {s['violations']['industry_number_total']}")
|
||||
print(f" 单期推测总计: {s['violations']['speculation_total']}")
|
||||
|
||||
|
||||
def print_compare(*summaries: dict) -> None:
|
||||
labels = [s['label'].upper() for s in summaries]
|
||||
header = ' vs '.join(labels)
|
||||
print(f"\n======= {header} 多方对比表 =======")
|
||||
col_w = 10
|
||||
print(f"{'指标':<34}" + ''.join(f"{l:>{col_w}}" for l in labels))
|
||||
print('-' * (34 + col_w * len(labels)))
|
||||
|
||||
def _row(name: str, values: list, fmt: str = 'percent') -> None:
|
||||
cells = []
|
||||
for v in values:
|
||||
if fmt == 'percent':
|
||||
cells.append(f'{v:.0%}')
|
||||
else:
|
||||
cells.append(str(v))
|
||||
print(f"{name:<34}" + ''.join(f"{c:>{col_w}}" for c in cells))
|
||||
|
||||
for k, name in (
|
||||
('seq_complete', 'seq 1-12 完整率'),
|
||||
('has_number', '每条含数字比例'),
|
||||
('A_unit_econ_ref', 'A 引用单位经济字段'),
|
||||
('A_env_bi_ref', 'A 引用 _环比 真实值'),
|
||||
('A_calib_ref', 'A 引用对比口径 ★'),
|
||||
('B_top_source', 'B 点明最大优惠来源'),
|
||||
('C_balance_change_ref', 'C 引用余额变化字段'),
|
||||
('D_labor_ratio_ref', 'D 引用人力成本占比'),
|
||||
('D_zero_expense_flag', 'D 标注 0 支出'),
|
||||
('E_weekday_ratio', 'E seq 9 含倍率'),
|
||||
('E_anomaly_baseline', 'E seq 10 标注基线'),
|
||||
('F_top2_reasons', 'F seq 11 列 top 2 原因 ★'),
|
||||
('F_tracking_trigger', 'F seq 12 节奏+触发+阈值'),
|
||||
):
|
||||
_row(name, [s['rates'][k] for s in summaries], 'percent')
|
||||
|
||||
print('-' * (34 + col_w * len(summaries)))
|
||||
print('违规次数(越低越好):')
|
||||
_row(' 手动调整违规', [s['violations']['B_manual'] for s in summaries], 'int')
|
||||
_row(' 行业数字编造', [s['violations']['industry_number_total'] for s in summaries], 'int')
|
||||
_row(' 单期推测', [s['violations']['speculation_total'] for s in summaries], 'int')
|
||||
print('-' * (34 + col_w * len(summaries)))
|
||||
_row(' 平均字数', [f"{s['avg_content_len']:.0f}" for s in summaries], 'int')
|
||||
_row(' 样本数', [s['n'] for s in summaries], 'int')
|
||||
print()
|
||||
for s in summaries:
|
||||
print(f" {s['label'].upper()} 三色灯分布: {s['light_distribution']}")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--dir', help='单目录分析:如 export/ai-ab-test/round_a')
|
||||
parser.add_argument('--compare', nargs='+', metavar='DIR', help='多轮对比(2-4 个目录)')
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.compare:
|
||||
dirs = [Path(d) for d in args.compare]
|
||||
if len(dirs) < 2:
|
||||
sys.exit('--compare 至少 2 个目录')
|
||||
summaries = []
|
||||
for d in dirs:
|
||||
rounds = _iter_rounds(d)
|
||||
if not rounds:
|
||||
sys.exit(f'目录无 round_*.json:{d}')
|
||||
label = d.name.replace('round_', '')
|
||||
summaries.append(summarize(rounds, label))
|
||||
for s in summaries:
|
||||
print_summary(s)
|
||||
print_compare(*summaries)
|
||||
# 存档对比 JSON
|
||||
tag = '_'.join(s['label'] for s in summaries)
|
||||
out_path = Path(f'export/ai-ab-test/_compare_{tag}.json')
|
||||
out_path.write_text(json.dumps({s['label']: s for s in summaries}, ensure_ascii=False, indent=2), encoding='utf-8')
|
||||
print(f'\n[done] 对比 JSON 已存: {out_path}')
|
||||
elif args.dir:
|
||||
rounds = _iter_rounds(Path(args.dir))
|
||||
if not rounds:
|
||||
sys.exit(f'目录无 round_*.json:{args.dir}')
|
||||
label = Path(args.dir).name.replace('round_', '')
|
||||
s = summarize(rounds, label)
|
||||
print_summary(s)
|
||||
else:
|
||||
parser.error('需指定 --dir 或 --compare')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user