feat: 2026-04-15~05-02 累积变更基线 — AI 重构 + Runtime Context + DWS 修复
涵盖(每条对应已存的审计记录): - AI 模块拆分:apps/backend/app/ai/apps -> prompts/(8 个 APP + app2a 派生) audit: 2026-04-20__ai-module-complete.md - admin-web AI 管理套件:AIDashboard / AIOperations / AIRunLogs / AITriggers / TriggerManager audit: 2026-04-21__admin-web-ai-management-suite.md - App2 财务洞察 prompt v3 -> v5.1 + 小程序 AI 接入(chat / board-finance) audit: 2026-04-22__app2_prompt_v5_1_and_miniprogram_ai_insight.md - App2 prewarm 全过滤器 + AI 触发器 cron reschedule audit: 2026-04-21__app2-finance-prewarm-all-filters.md migration: 20260420_ai_trigger_jobs_and_app2_prewarm.sql / 20260421_app2_prewarm_cron_reschedule.sql - AppType 联合类型对齐 + adminAiAppTypes.test.ts audit: 2026-04-30__admin_web_ai_app_type_alignment.md - DashScope tokens_used 提取修复 audit: 2026-04-30__backend_dashscope_tokens_used_extraction.md - App3 线索完整详情 prompt audit: 2026-05-01__backend_app3_full_detail_prompt.md - Runtime Context 沙箱(5-1~5-2 主线): - 后端 schema/service + admin_runtime_context / xcx_runtime_clock 两个 router - admin-web RuntimeContext.tsx + miniprogram runtime-clock.ts - migration: 20260501__runtime_context_sandbox.sql - tools/db/verify_admin_web_sandbox.py + verify_sandbox_end_to_end.py - database/changes: 7 份 sandbox_* 验证报告 - 飞球 DWS 修复:finance_area_daily 区域汇总 + task_engine 调整 + RLS 视图业务日上界(migration 20260502 + scripts/ops/gen_rls_business_date_migration.py) 合规: - .gitignore 启用 tmp/ 排除 - 不入仓:apps/etl/connectors/feiqiu/.env(API_TOKEN secret,本地修改保留) 待验证清单: - docs/audit/changes/2026-05-04__cumulative_baseline_pending_verification.md 每个主题的功能完整性 / 上线验证几乎都未收口,按优先级 P0~P3 逐一处理
This commit is contained in:
223
scripts/ab_test_app2_prompt.py
Normal file
223
scripts/ab_test_app2_prompt.py
Normal file
@@ -0,0 +1,223 @@
|
||||
"""App2 财务洞察 system prompt A/B 测试脚本。
|
||||
|
||||
流程:
|
||||
- 对同一 payload 连续调用百炼 N 次(默认 10 次),绕过 AI cache
|
||||
- 存档每次原始 JSON 到 export/ai-ab-test/round_<label>/
|
||||
- 输出稳定性汇总:长度分布、12 条齐整率、三色灯分布、加粗使用、关键字段命中率
|
||||
|
||||
用法:
|
||||
# Round A:当前百炼上的 system prompt(调用前用户已确认未替换)
|
||||
PYTHONIOENCODING=utf-8 .venv/Scripts/python.exe scripts/ab_test_app2_prompt.py --label a --rounds 10
|
||||
# Round B:用户替换为 v4 concise 后执行
|
||||
PYTHONIOENCODING=utf-8 .venv/Scripts/python.exe scripts/ab_test_app2_prompt.py --label b --rounds 10
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, 'apps/backend')
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(dotenv_path=os.path.join(os.getcwd(), '.env'))
|
||||
|
||||
from app.ai.config import AIConfig
|
||||
from app.ai.dashscope_client import DashScopeClient
|
||||
from app.ai.prompts.app2_finance_prompt import build_prompt
|
||||
|
||||
|
||||
SITE_ID = 2790685415443269
|
||||
TIME_DIMENSION = 'this_month'
|
||||
AREA = 'all'
|
||||
OUT_ROOT = Path('export/ai-ab-test')
|
||||
|
||||
|
||||
async def run_one(client: DashScopeClient, app_id: str, prompt: str, round_idx: int) -> dict:
|
||||
"""单次百炼调用,返回结构化结果(含时长/tokens/解析状态)。"""
|
||||
t0 = time.monotonic()
|
||||
try:
|
||||
parsed, tokens, _ = await client.call_app(app_id=app_id, prompt=prompt)
|
||||
dt = time.monotonic() - t0
|
||||
return {
|
||||
'ok': True,
|
||||
'round_idx': round_idx,
|
||||
'duration_s': round(dt, 2),
|
||||
'tokens': tokens,
|
||||
'parsed': parsed,
|
||||
'error': None,
|
||||
}
|
||||
except Exception as e:
|
||||
dt = time.monotonic() - t0
|
||||
return {
|
||||
'ok': False,
|
||||
'round_idx': round_idx,
|
||||
'duration_s': round(dt, 2),
|
||||
'tokens': 0,
|
||||
'parsed': None,
|
||||
'error': f'{type(e).__name__}: {e}',
|
||||
}
|
||||
|
||||
|
||||
def classify_light(content: str) -> str:
|
||||
"""识别三色灯类型。"""
|
||||
if re.search(r'🔴|红灯', content):
|
||||
return 'red'
|
||||
if re.search(r'🟡|黄灯', content):
|
||||
return 'yellow'
|
||||
if re.search(r'🟢|绿灯', content):
|
||||
return 'green'
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def analyze_insights(parsed: dict | None) -> dict:
|
||||
"""分析单次返回的洞察数组质量。"""
|
||||
if not parsed:
|
||||
return {'insights_count': 0, 'has_12': False, 'light': 'unknown', 'bold_count': 0, 'seq_complete': False}
|
||||
insights = parsed.get('insights') or []
|
||||
if not isinstance(insights, list):
|
||||
return {'insights_count': 0, 'has_12': False, 'light': 'unknown', 'bold_count': 0, 'seq_complete': False}
|
||||
count = len(insights)
|
||||
# seq 完整性检查(期望 1-12)
|
||||
seqs = [ins.get('seq') for ins in insights if isinstance(ins, dict)]
|
||||
seq_complete = sorted([s for s in seqs if isinstance(s, int)]) == list(range(1, 13))
|
||||
# seq 11 的三色灯
|
||||
seq11 = next((ins for ins in insights if isinstance(ins, dict) and ins.get('seq') == 11), None)
|
||||
light = classify_light(seq11.get('content') or '') if seq11 else 'unknown'
|
||||
# 加粗使用总次数(**...** 模式)
|
||||
bold_count = 0
|
||||
for ins in insights:
|
||||
if isinstance(ins, dict):
|
||||
body = ins.get('content') or ''
|
||||
bold_count += len(re.findall(r'\*\*[^*]+\*\*', body))
|
||||
return {
|
||||
'insights_count': count,
|
||||
'has_12': count == 12,
|
||||
'seq_complete': seq_complete,
|
||||
'light': light,
|
||||
'bold_count': bold_count,
|
||||
}
|
||||
|
||||
|
||||
async def main() -> None:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--label', required=True, help='测试轮次标识(a/b/v5 等)')
|
||||
parser.add_argument('--rounds', type=int, default=10, help='本次调用次数')
|
||||
parser.add_argument('--delay', type=float, default=1.5, help='每次调用间延时(秒),避免限流')
|
||||
parser.add_argument('--resume', action='store_true', help='断点续跑:起始 idx = 已有 round_*.json 数量 + 1')
|
||||
args = parser.parse_args()
|
||||
|
||||
cfg = AIConfig.from_env()
|
||||
client = DashScopeClient(api_key=cfg.api_key, workspace_id=cfg.workspace_id)
|
||||
app_id = cfg.app_id_2_finance
|
||||
|
||||
# 构建 prompt(仅一次,10 次调用同一份 payload)
|
||||
prompt = await build_prompt({
|
||||
'site_id': SITE_ID,
|
||||
'time_dimension': TIME_DIMENSION,
|
||||
'area': AREA,
|
||||
})
|
||||
print(f'[setup] prompt 长度 = {len(prompt)} 字符')
|
||||
print(f'[setup] app_id = {app_id}')
|
||||
print(f'[setup] label = {args.label.upper()}, rounds = {args.rounds}, delay = {args.delay}s')
|
||||
print()
|
||||
|
||||
out_dir = OUT_ROOT / f'round_{args.label}'
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
# 存档本轮使用的 prompt 快照
|
||||
(out_dir / '_prompt_snapshot.json').write_text(
|
||||
json.dumps(json.loads(prompt), ensure_ascii=False, indent=2),
|
||||
encoding='utf-8',
|
||||
)
|
||||
|
||||
# 断点续跑:统计目录里已有 round_XX.json 数量,新 idx 从 existing+1 开始
|
||||
if args.resume:
|
||||
existing = len(list(out_dir.glob('round_*.json')))
|
||||
start_idx = existing + 1
|
||||
end_idx = start_idx + args.rounds - 1
|
||||
print(f'[resume] 已有 {existing} 份,本次追加 idx {start_idx}~{end_idx}')
|
||||
else:
|
||||
start_idx = 1
|
||||
end_idx = args.rounds
|
||||
|
||||
summary: list[dict] = []
|
||||
for i in range(start_idx, end_idx + 1):
|
||||
print(f'[round {args.label.upper()} · {i:02d}/{end_idx}] 调用中...', end=' ', flush=True)
|
||||
result = await run_one(client, app_id, prompt, i)
|
||||
analysis = analyze_insights(result['parsed'])
|
||||
row = {
|
||||
'round_idx': i,
|
||||
'ok': result['ok'],
|
||||
'duration_s': result['duration_s'],
|
||||
'tokens': result['tokens'],
|
||||
'error': result['error'],
|
||||
**analysis,
|
||||
}
|
||||
summary.append(row)
|
||||
|
||||
# 存档单次结果(原始 + 分析)
|
||||
snapshot = {
|
||||
'meta': {
|
||||
'label': args.label,
|
||||
'round_idx': i,
|
||||
'time_dimension': TIME_DIMENSION,
|
||||
'area': AREA,
|
||||
'duration_s': result['duration_s'],
|
||||
'tokens': result['tokens'],
|
||||
'ok': result['ok'],
|
||||
'error': result['error'],
|
||||
**analysis,
|
||||
},
|
||||
'parsed': result['parsed'],
|
||||
}
|
||||
path = out_dir / f'round_{i:02d}.json'
|
||||
path.write_text(json.dumps(snapshot, ensure_ascii=False, indent=2), encoding='utf-8')
|
||||
|
||||
if result['ok']:
|
||||
print(f'ok · {result["duration_s"]}s · tokens={result["tokens"]} · count={analysis["insights_count"]} · light={analysis["light"]} · bold={analysis["bold_count"]}')
|
||||
else:
|
||||
print(f'FAIL · {result["duration_s"]}s · {result["error"]}')
|
||||
|
||||
if i < end_idx:
|
||||
await asyncio.sleep(args.delay)
|
||||
|
||||
# 汇总统计
|
||||
ok_rows = [r for r in summary if r['ok']]
|
||||
print()
|
||||
print(f'=== Round {args.label.upper()} 汇总({len(ok_rows)}/{args.rounds} 成功)===')
|
||||
if ok_rows:
|
||||
durations = [r['duration_s'] for r in ok_rows]
|
||||
tokens = [r['tokens'] for r in ok_rows]
|
||||
has_12_rate = sum(1 for r in ok_rows if r['has_12']) / len(ok_rows)
|
||||
seq_complete_rate = sum(1 for r in ok_rows if r['seq_complete']) / len(ok_rows)
|
||||
bold_avg = sum(r['bold_count'] for r in ok_rows) / len(ok_rows)
|
||||
lights: dict[str, int] = {}
|
||||
for r in ok_rows:
|
||||
lights[r['light']] = lights.get(r['light'], 0) + 1
|
||||
print(f' 时长: min={min(durations):.1f}s / max={max(durations):.1f}s / avg={sum(durations)/len(durations):.1f}s')
|
||||
print(f' tokens: min={min(tokens)} / max={max(tokens)} / avg={sum(tokens)/len(tokens):.0f}')
|
||||
print(f' 12 条齐整率: {has_12_rate:.0%}')
|
||||
print(f' seq 1-12 完整率: {seq_complete_rate:.0%}')
|
||||
print(f' 平均加粗数: {bold_avg:.1f} 次/次')
|
||||
print(f' 三色灯分布: {lights}')
|
||||
|
||||
# 存档汇总 CSV(resume 模式下追加,首次写 header)
|
||||
import csv
|
||||
csv_path = out_dir / '_summary.csv'
|
||||
mode = 'a' if args.resume and csv_path.exists() else 'w'
|
||||
with csv_path.open(mode, encoding='utf-8-sig', newline='') as f:
|
||||
writer = csv.DictWriter(f, fieldnames=list(summary[0].keys()) if summary else [])
|
||||
if mode == 'w':
|
||||
writer.writeheader()
|
||||
writer.writerows(summary)
|
||||
print(f'[done] 存档到: {out_dir}')
|
||||
print(f'[done] CSV 汇总: {csv_path}')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
||||
336
scripts/analyze_ab_content_quality.py
Normal file
336
scripts/analyze_ab_content_quality.py
Normal file
@@ -0,0 +1,336 @@
|
||||
"""App2 A/B 测试 · 内容质量深度分析器。
|
||||
|
||||
围绕"实际信息质量"评估,非表现形式(加粗/时长)。
|
||||
|
||||
分析维度(按板块):
|
||||
- 板块 A(seq 1-2):客单价环比是否原字段引用(非推测)、是否识别"对比口径"
|
||||
- 板块 B(seq 3-4):最大优惠来源是否点明、手动调整是否合规表述(禁用"抹零/免单 X 元")
|
||||
- 板块 C(seq 5-6):是否引用权威字段"储值卡余额变化"(期初/期末/其他调整)
|
||||
- 板块 D(seq 7-8):支出完整性 + 人力成本占比
|
||||
- 板块 E(seq 9-10):seq 9 是否含"旺淡倍率";seq 10 是否标"同周X均值/期均"基线
|
||||
- 板块 F(seq 11-12):seq 11 三色灯 + top 2 原因;seq 12 跟踪节奏 + 触发动作 + 阈值
|
||||
|
||||
违规检测:
|
||||
- 禁用行业数字(payload 未提供):警戒线/均值/参考值/30%/40% 等无锚点百分比
|
||||
- 禁用单期推测("提升/下降/显著增长"但未引用 _环比)
|
||||
- 编造字段(payload 不含的字段名)
|
||||
|
||||
用法:
|
||||
PYTHONIOENCODING=utf-8 .venv/Scripts/python.exe scripts/analyze_ab_content_quality.py --dir export/ai-ab-test/round_a
|
||||
# 对比两轮:
|
||||
PYTHONIOENCODING=utf-8 .venv/Scripts/python.exe scripts/analyze_ab_content_quality.py --compare export/ai-ab-test/round_a export/ai-ab-test/round_b
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from statistics import mean, stdev
|
||||
|
||||
|
||||
def _iter_rounds(dir_path: Path) -> list[dict]:
|
||||
"""读取目录下 round_XX.json 按 round_idx 排序。"""
|
||||
files = sorted(dir_path.glob('round_*.json'))
|
||||
out = []
|
||||
for f in files:
|
||||
data = json.loads(f.read_text(encoding='utf-8'))
|
||||
data['_file'] = f.name
|
||||
out.append(data)
|
||||
return out
|
||||
|
||||
|
||||
def _get_seq(insights: list, seq: int) -> dict | None:
|
||||
for ins in insights:
|
||||
if isinstance(ins, dict) and ins.get('seq') == seq:
|
||||
return ins
|
||||
return None
|
||||
|
||||
|
||||
def _has_number(text: str) -> bool:
|
||||
"""content 是否含 ≥1 个具体数字或百分比。"""
|
||||
return bool(re.search(r'\d+(\.\d+)?%?', text))
|
||||
|
||||
|
||||
def analyze_round(parsed: dict | None) -> dict:
|
||||
"""分析单次返回的内容质量。返回 17 项指标。"""
|
||||
metrics: dict = {
|
||||
# 基础
|
||||
'count': 0,
|
||||
'seq_complete': False,
|
||||
'has_number_rate': 0.0, # 每条含数字的比例(目标 100%)
|
||||
'avg_content_len': 0,
|
||||
# 板块 A
|
||||
'A_unit_econ_ref': False, # seq 1-2 中引用单位经济字段(客单价/会员占比/日均订单)
|
||||
'A_env_bi_ref': False, # 引用 _环比 字段的真实值
|
||||
'A_calib_ref': False, # 引用对比口径("同天数对齐"/"同期")
|
||||
# 板块 B
|
||||
'B_top_source': False, # 点明最大优惠来源
|
||||
'B_manual_violation': False, # 违规:说了"抹零 X 元" / "免单 X 元"
|
||||
# 板块 C
|
||||
'C_balance_change_ref': False, # 引用"储值卡余额变化"字段(期初/期末/其他调整)
|
||||
# 板块 D
|
||||
'D_labor_ratio_ref': False, # 人力成本占比
|
||||
'D_zero_expense_flag': False, # 标注支出 0 或数据缺失
|
||||
# 板块 E
|
||||
'E_weekday_ratio': False, # seq 9 含旺/淡倍率("X 倍" / "X.XX 倍")
|
||||
'E_anomaly_baseline': False, # seq 10 标注基线类型("同周" / "期均" / "基线")
|
||||
# 板块 F
|
||||
'F_light': 'unknown',
|
||||
'F_top2_reasons': False, # seq 11 列 ≥2 原因(1)...2)... / 原因一...原因二)
|
||||
'F_tracking_trigger': False, # seq 12 含跟踪节奏 + 触发动作
|
||||
# 违规
|
||||
'V_industry_number': 0, # 编造行业数字提及次数
|
||||
'V_speculation': 0, # 单期推测(未引用 _环比 却说"提升/下降/显著")
|
||||
}
|
||||
if not parsed:
|
||||
return metrics
|
||||
insights = parsed.get('insights') or []
|
||||
if not isinstance(insights, list):
|
||||
return metrics
|
||||
metrics['count'] = len(insights)
|
||||
seqs = [ins.get('seq') for ins in insights if isinstance(ins, dict)]
|
||||
metrics['seq_complete'] = sorted([s for s in seqs if isinstance(s, int)]) == list(range(1, 13))
|
||||
|
||||
total_len = 0
|
||||
with_number = 0
|
||||
for ins in insights:
|
||||
if not isinstance(ins, dict):
|
||||
continue
|
||||
body = (ins.get('content') or '')
|
||||
total_len += len(body)
|
||||
if _has_number(body):
|
||||
with_number += 1
|
||||
if insights:
|
||||
metrics['has_number_rate'] = round(with_number / len(insights), 2)
|
||||
metrics['avg_content_len'] = round(total_len / len(insights))
|
||||
|
||||
# 板块 A
|
||||
a_texts = ' '.join((_get_seq(insights, s) or {}).get('content') or '' for s in (1, 2))
|
||||
metrics['A_unit_econ_ref'] = any(kw in a_texts for kw in ('客单价', '会员订单占比', '会员占比', '日均订单'))
|
||||
metrics['A_env_bi_ref'] = '_环比' in a_texts or bool(re.search(r'环比[^字段][^"]*?[+-]?\d+\.?\d*%', a_texts))
|
||||
metrics['A_calib_ref'] = any(kw in a_texts for kw in ('对比口径', '同天数对齐', '同期', '同日数', '截断到', '对比期'))
|
||||
|
||||
# 板块 B
|
||||
b_texts = ' '.join((_get_seq(insights, s) or {}).get('content') or '' for s in (3, 4))
|
||||
metrics['B_top_source'] = any(kw in b_texts for kw in ('最大', '主导', '占比最高', '占比超', '团购优惠', '主要来源'))
|
||||
# 违规:直接说"抹零 X 元"/"免单 X 元"(不是说"抹零/免单"这个类目名)
|
||||
metrics['B_manual_violation'] = bool(re.search(r'(抹零|免单)\s*\d+[\.\d]*\s*元', b_texts))
|
||||
|
||||
# 板块 C
|
||||
c_texts = ' '.join((_get_seq(insights, s) or {}).get('content') or '' for s in (5, 6))
|
||||
metrics['C_balance_change_ref'] = any(kw in c_texts for kw in ('期初', '期末', '余额变化', '其他调整', '非充值/消耗'))
|
||||
|
||||
# 板块 D
|
||||
d_texts = ' '.join((_get_seq(insights, s) or {}).get('content') or '' for s in (7, 8))
|
||||
metrics['D_labor_ratio_ref'] = any(kw in d_texts for kw in ('人力成本', '助教成本', '占成交收入', '占比'))
|
||||
metrics['D_zero_expense_flag'] = any(kw in d_texts for kw in ('支出为 0', '支出全 0', '支出全0', '支出为0', '0 元', '0元', '数据缺失', '数据不完整', '数据完整性', '未录入'))
|
||||
|
||||
# 板块 E
|
||||
seq9 = (_get_seq(insights, 9) or {}).get('content') or ''
|
||||
seq10 = (_get_seq(insights, 10) or {}).get('content') or ''
|
||||
metrics['E_weekday_ratio'] = bool(re.search(r'\d+\.?\d*\s*倍|比.*\d+\.?\d*', seq9))
|
||||
metrics['E_anomaly_baseline'] = any(kw in seq10 for kw in ('同周', '期均', '基线', '同星期'))
|
||||
|
||||
# 板块 F
|
||||
seq11 = (_get_seq(insights, 11) or {}).get('content') or ''
|
||||
seq12 = (_get_seq(insights, 12) or {}).get('content') or ''
|
||||
if re.search(r'🔴|红灯', seq11):
|
||||
metrics['F_light'] = 'red'
|
||||
elif re.search(r'🟡|黄灯', seq11):
|
||||
metrics['F_light'] = 'yellow'
|
||||
elif re.search(r'🟢|绿灯', seq11):
|
||||
metrics['F_light'] = 'green'
|
||||
# 匹配 "原因1:" / "原因 1:" / "1)" / "1." / "1、" / "①" / "原因一" / "其一"
|
||||
metrics['F_top2_reasons'] = bool(re.search(r'原因\s*1|1\s*[\))\.、::]|①|原因一|其一', seq11)) and \
|
||||
bool(re.search(r'原因\s*2|2\s*[\))\.、::]|②|原因二|其二', seq11))
|
||||
metrics['F_tracking_trigger'] = any(kw in seq12 for kw in ('启动', '触发', '召回', '立即')) and \
|
||||
bool(re.search(r'(每周|每月|每日|每天|每\s*\d+|周期性|定期)', seq12)) and \
|
||||
bool(re.search(r'<|>|≥|≤|低于|超过|达到|阈值', seq12))
|
||||
|
||||
# 违规:行业数字(payload 只提供"周中客流规律",其他均禁)
|
||||
# 典型措辞:"行业警戒线" "行业均值" "行业标准" "行业参考" + 数字
|
||||
all_text = ' '.join((ins.get('content') or '') for ins in insights if isinstance(ins, dict))
|
||||
metrics['V_industry_number'] = len(re.findall(r'行业(警戒线|均值|标准|参考值|基线|基准|水平|经验值|通常|一般)[^,。;,]*\d+\.?\d*%?', all_text))
|
||||
|
||||
# 违规:单期推测(句子含"提升/下降/显著增长/大幅"等但未引用 _环比 字段值)
|
||||
# 启发式:句子中有"提升/下降/大幅/明显/显著"但句子内没有带 % 的数字
|
||||
speculation_hits = 0
|
||||
for sent in re.split(r'[。;\n]', all_text):
|
||||
if not sent.strip():
|
||||
continue
|
||||
has_trend_word = bool(re.search(r'(提升|下降|上升|下滑|显著|大幅|明显)', sent))
|
||||
has_pct_number = bool(re.search(r'[+-]?\d+\.?\d*%', sent))
|
||||
if has_trend_word and not has_pct_number:
|
||||
# 允许"不推测"/"禁止推测"这类元指令
|
||||
if re.search(r'(推测|不|禁)', sent):
|
||||
continue
|
||||
speculation_hits += 1
|
||||
metrics['V_speculation'] = speculation_hits
|
||||
|
||||
return metrics
|
||||
|
||||
|
||||
def summarize(rounds: list[dict], label: str) -> dict:
|
||||
"""聚合 10 次的内容质量分布。"""
|
||||
per = [analyze_round(r.get('parsed')) for r in rounds]
|
||||
# 汇总
|
||||
def _rate(key: str) -> float:
|
||||
vals = [1 if p.get(key) else 0 for p in per]
|
||||
return round(sum(vals) / len(vals), 2) if vals else 0.0
|
||||
|
||||
def _avg(key: str) -> float:
|
||||
vals = [p.get(key, 0) for p in per]
|
||||
return round(mean(vals), 2) if vals else 0.0
|
||||
|
||||
lights: dict[str, int] = {}
|
||||
for p in per:
|
||||
l = p.get('F_light', 'unknown')
|
||||
lights[l] = lights.get(l, 0) + 1
|
||||
|
||||
summary = {
|
||||
'label': label,
|
||||
'n': len(rounds),
|
||||
'rates': {
|
||||
'seq_complete': _rate('seq_complete'),
|
||||
'has_number': _avg('has_number_rate'),
|
||||
'A_unit_econ_ref': _rate('A_unit_econ_ref'),
|
||||
'A_env_bi_ref': _rate('A_env_bi_ref'),
|
||||
'A_calib_ref': _rate('A_calib_ref'),
|
||||
'B_top_source': _rate('B_top_source'),
|
||||
'C_balance_change_ref': _rate('C_balance_change_ref'),
|
||||
'D_labor_ratio_ref': _rate('D_labor_ratio_ref'),
|
||||
'D_zero_expense_flag': _rate('D_zero_expense_flag'),
|
||||
'E_weekday_ratio': _rate('E_weekday_ratio'),
|
||||
'E_anomaly_baseline': _rate('E_anomaly_baseline'),
|
||||
'F_top2_reasons': _rate('F_top2_reasons'),
|
||||
'F_tracking_trigger': _rate('F_tracking_trigger'),
|
||||
},
|
||||
'violations': {
|
||||
'B_manual': sum(1 for p in per if p.get('B_manual_violation')),
|
||||
'industry_number_total': sum(p.get('V_industry_number', 0) for p in per),
|
||||
'speculation_total': sum(p.get('V_speculation', 0) for p in per),
|
||||
},
|
||||
'light_distribution': lights,
|
||||
'avg_content_len': _avg('avg_content_len'),
|
||||
'per_round': per,
|
||||
}
|
||||
return summary
|
||||
|
||||
|
||||
def print_summary(s: dict) -> None:
|
||||
print(f"\n=== Round {s['label'].upper()} 内容质量汇总(n={s['n']})===")
|
||||
print(f" 结构完整性:")
|
||||
print(f" seq 1-12 完整率: {s['rates']['seq_complete']:.0%}")
|
||||
print(f" 每条含数字比例: {s['rates']['has_number']:.0%}")
|
||||
print(f" 平均 content 字数: {s['avg_content_len']:.0f}")
|
||||
print(f" 板块 A · 收入:")
|
||||
print(f" 引用单位经济字段: {s['rates']['A_unit_econ_ref']:.0%}")
|
||||
print(f" 引用 _环比 真实值: {s['rates']['A_env_bi_ref']:.0%}")
|
||||
print(f" 引用对比口径: {s['rates']['A_calib_ref']:.0%} ★ v4 新增规则的关键指标")
|
||||
print(f" 板块 B · 优惠:")
|
||||
print(f" 点明最大来源: {s['rates']['B_top_source']:.0%}")
|
||||
print(f" 板块 C · 储值卡:")
|
||||
print(f" 引用余额变化字段: {s['rates']['C_balance_change_ref']:.0%}")
|
||||
print(f" 板块 D · 成本:")
|
||||
print(f" 引用人力成本占比: {s['rates']['D_labor_ratio_ref']:.0%}")
|
||||
print(f" 标注 0 支出/数据缺失: {s['rates']['D_zero_expense_flag']:.0%}")
|
||||
print(f" 板块 E · 时间规律:")
|
||||
print(f" seq 9 含旺/淡倍率: {s['rates']['E_weekday_ratio']:.0%}")
|
||||
print(f" seq 10 标注基线类型: {s['rates']['E_anomaly_baseline']:.0%}")
|
||||
print(f" 板块 F · 综合:")
|
||||
print(f" 三色灯分布: {s['light_distribution']}")
|
||||
print(f" seq 11 列 top 2 原因: {s['rates']['F_top2_reasons']:.0%}")
|
||||
print(f" seq 12 节奏+触发+阈值:{s['rates']['F_tracking_trigger']:.0%}")
|
||||
print(f" 违规统计(越低越好):")
|
||||
print(f" 手动调整违规次数: {s['violations']['B_manual']} / {s['n']}")
|
||||
print(f" 行业数字编造总计: {s['violations']['industry_number_total']}")
|
||||
print(f" 单期推测总计: {s['violations']['speculation_total']}")
|
||||
|
||||
|
||||
def print_compare(*summaries: dict) -> None:
|
||||
labels = [s['label'].upper() for s in summaries]
|
||||
header = ' vs '.join(labels)
|
||||
print(f"\n======= {header} 多方对比表 =======")
|
||||
col_w = 10
|
||||
print(f"{'指标':<34}" + ''.join(f"{l:>{col_w}}" for l in labels))
|
||||
print('-' * (34 + col_w * len(labels)))
|
||||
|
||||
def _row(name: str, values: list, fmt: str = 'percent') -> None:
|
||||
cells = []
|
||||
for v in values:
|
||||
if fmt == 'percent':
|
||||
cells.append(f'{v:.0%}')
|
||||
else:
|
||||
cells.append(str(v))
|
||||
print(f"{name:<34}" + ''.join(f"{c:>{col_w}}" for c in cells))
|
||||
|
||||
for k, name in (
|
||||
('seq_complete', 'seq 1-12 完整率'),
|
||||
('has_number', '每条含数字比例'),
|
||||
('A_unit_econ_ref', 'A 引用单位经济字段'),
|
||||
('A_env_bi_ref', 'A 引用 _环比 真实值'),
|
||||
('A_calib_ref', 'A 引用对比口径 ★'),
|
||||
('B_top_source', 'B 点明最大优惠来源'),
|
||||
('C_balance_change_ref', 'C 引用余额变化字段'),
|
||||
('D_labor_ratio_ref', 'D 引用人力成本占比'),
|
||||
('D_zero_expense_flag', 'D 标注 0 支出'),
|
||||
('E_weekday_ratio', 'E seq 9 含倍率'),
|
||||
('E_anomaly_baseline', 'E seq 10 标注基线'),
|
||||
('F_top2_reasons', 'F seq 11 列 top 2 原因 ★'),
|
||||
('F_tracking_trigger', 'F seq 12 节奏+触发+阈值'),
|
||||
):
|
||||
_row(name, [s['rates'][k] for s in summaries], 'percent')
|
||||
|
||||
print('-' * (34 + col_w * len(summaries)))
|
||||
print('违规次数(越低越好):')
|
||||
_row(' 手动调整违规', [s['violations']['B_manual'] for s in summaries], 'int')
|
||||
_row(' 行业数字编造', [s['violations']['industry_number_total'] for s in summaries], 'int')
|
||||
_row(' 单期推测', [s['violations']['speculation_total'] for s in summaries], 'int')
|
||||
print('-' * (34 + col_w * len(summaries)))
|
||||
_row(' 平均字数', [f"{s['avg_content_len']:.0f}" for s in summaries], 'int')
|
||||
_row(' 样本数', [s['n'] for s in summaries], 'int')
|
||||
print()
|
||||
for s in summaries:
|
||||
print(f" {s['label'].upper()} 三色灯分布: {s['light_distribution']}")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--dir', help='单目录分析:如 export/ai-ab-test/round_a')
|
||||
parser.add_argument('--compare', nargs='+', metavar='DIR', help='多轮对比(2-4 个目录)')
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.compare:
|
||||
dirs = [Path(d) for d in args.compare]
|
||||
if len(dirs) < 2:
|
||||
sys.exit('--compare 至少 2 个目录')
|
||||
summaries = []
|
||||
for d in dirs:
|
||||
rounds = _iter_rounds(d)
|
||||
if not rounds:
|
||||
sys.exit(f'目录无 round_*.json:{d}')
|
||||
label = d.name.replace('round_', '')
|
||||
summaries.append(summarize(rounds, label))
|
||||
for s in summaries:
|
||||
print_summary(s)
|
||||
print_compare(*summaries)
|
||||
# 存档对比 JSON
|
||||
tag = '_'.join(s['label'] for s in summaries)
|
||||
out_path = Path(f'export/ai-ab-test/_compare_{tag}.json')
|
||||
out_path.write_text(json.dumps({s['label']: s for s in summaries}, ensure_ascii=False, indent=2), encoding='utf-8')
|
||||
print(f'\n[done] 对比 JSON 已存: {out_path}')
|
||||
elif args.dir:
|
||||
rounds = _iter_rounds(Path(args.dir))
|
||||
if not rounds:
|
||||
sys.exit(f'目录无 round_*.json:{args.dir}')
|
||||
label = Path(args.dir).name.replace('round_', '')
|
||||
s = summarize(rounds, label)
|
||||
print_summary(s)
|
||||
else:
|
||||
parser.error('需指定 --dir 或 --compare')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
531
scripts/analyze_store_manager_quality.py
Normal file
531
scripts/analyze_store_manager_quality.py
Normal file
@@ -0,0 +1,531 @@
|
||||
"""App2 财务洞察 · 店长视角评分方法 v2。
|
||||
|
||||
围绕"店长读了这 12 条能做什么 / 学到什么"做评估,三大维度:
|
||||
|
||||
【1】准确性(客观正确 · 越高越好)
|
||||
- 数字取自 payload,无编造
|
||||
- 引用权威字段(非原始指标兜底)
|
||||
- 遵守业务规则(手动调整不拆明细 / 禁行业数字 / 禁单期推测 / 对比口径)
|
||||
- 数据缺失/0 支出主动标注
|
||||
|
||||
【2】洞察深度(业务价值 · 越高越好)
|
||||
- 非显性信号挖掘("消耗>充值=存量消费"/"差值=平均让利"等解读)
|
||||
- 多指标协同分析(seq 11 结构失衡 + 原因互相印证)
|
||||
- 根因而非表象(如"数据录入缺失"而不只"支出为 0")
|
||||
- seq 12 跟踪 4 要素齐全(指标 / 阈值 / 节奏 / 触发动作)
|
||||
- 避免空洞表达("关注 XX" / "加强 XX" 被扣分)
|
||||
|
||||
【3】稳定性(可靠性 · 越高越好;稳定 ≠ 僵化)
|
||||
- 健康度评级方向一致(同数据下 10 次评级应相似,灯色众数占比 ≥ 80%)
|
||||
- 关键原因收敛(seq 11 原因 1/2 所引 key signal 的 IoU ≥ 60%)
|
||||
- seq 12 跟踪指标选择一致(10 次中 TOP 指标命中率高)
|
||||
- 字数/时长波动小(内容饱满但不冗余 · CV 低)
|
||||
|
||||
用法:
|
||||
PYTHONIOENCODING=utf-8 .venv/Scripts/python.exe scripts/analyze_store_manager_quality.py \\
|
||||
--compare export/ai-ab-test/round_a export/ai-ab-test/round_b \\
|
||||
export/ai-ab-test/round_v5 export/ai-ab-test/round_v5_1
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
from collections import Counter
|
||||
from pathlib import Path
|
||||
from statistics import mean, stdev
|
||||
|
||||
|
||||
# ===== 核心业务关键词库(店长语汇) =====
|
||||
|
||||
# 挖掘深度关键信号
|
||||
DEEP_SIGNALS = [
|
||||
# 储值卡负债动态
|
||||
('消耗.*充值|充值.*消耗', '储值卡充消对比'),
|
||||
('存量消费|复购乏力|复购.*收缩|复购.*减弱', '复购解读'),
|
||||
('负债.*累积|兑付压力|负债.*减轻', '负债解读'),
|
||||
# 客单价双口径差值
|
||||
('按成交收入.*按发生额|按发生额.*按成交收入', '客单价双口径对比'),
|
||||
('每单.*让利|让利.*量级|让利.*金额', '让利量化'),
|
||||
# 业态归因
|
||||
('业态|定位|散客|团购为主|车站|商场', '业态归因'),
|
||||
# 数据完整性
|
||||
('数据.*缺失|数据.*完整|录入.*缺失|支出.*全.*0|支出.*为.*0|虚高风险|净利.*可信', '数据质量质疑'),
|
||||
# 协同恶化/结构失衡
|
||||
('结构失衡|协同|多指标.*同|同向恶化', '结构性洞察'),
|
||||
# 时间规律深度
|
||||
('周六.*周.*倍|周末.*工作日|旺淡日.*倍', '周规律倍率'),
|
||||
('同周|同星期|同周基线', '基线识别'),
|
||||
]
|
||||
|
||||
# 空洞表达(应被扣分)
|
||||
HOLLOW_PATTERNS = [
|
||||
r'^关注\s*[^,。]+$',
|
||||
r'建议关注',
|
||||
r'加强\s*(\w+)?运营',
|
||||
r'提升\s*(\w+)?管理',
|
||||
r'需要重视',
|
||||
r'应当注意',
|
||||
]
|
||||
|
||||
# 趋势词 · 必须同句内有 % 或绝对值数字(否则单期推测违规)
|
||||
TREND_WORDS = ['下滑', '下降', '上升', '提升', '收缩', '萎缩', '承压', '走弱', '走强',
|
||||
'加剧', '恶化', '持续', '显著', '大幅', '明显', '锐减', '攀升']
|
||||
|
||||
|
||||
def _iter_rounds(dir_path: Path) -> list[dict]:
|
||||
files = sorted(dir_path.glob('round_*.json'))
|
||||
out = []
|
||||
for f in files:
|
||||
data = json.loads(f.read_text(encoding='utf-8'))
|
||||
data['_file'] = f.name
|
||||
out.append(data)
|
||||
return out
|
||||
|
||||
|
||||
def _get_seq(insights: list, seq: int) -> dict | None:
|
||||
for ins in insights:
|
||||
if isinstance(ins, dict) and ins.get('seq') == seq:
|
||||
return ins
|
||||
return None
|
||||
|
||||
|
||||
def analyze_accuracy(insights: list) -> dict:
|
||||
"""准确性评分(每项 0/1)。"""
|
||||
a_texts = ' '.join((_get_seq(insights, s) or {}).get('content') or '' for s in (1, 2))
|
||||
b_texts = ' '.join((_get_seq(insights, s) or {}).get('content') or '' for s in (3, 4))
|
||||
c_texts = ' '.join((_get_seq(insights, s) or {}).get('content') or '' for s in (5, 6))
|
||||
d_texts = ' '.join((_get_seq(insights, s) or {}).get('content') or '' for s in (7, 8))
|
||||
seq9 = (_get_seq(insights, 9) or {}).get('content') or ''
|
||||
seq10 = (_get_seq(insights, 10) or {}).get('content') or ''
|
||||
all_text = ' '.join((ins.get('content') or '') for ins in insights if isinstance(ins, dict))
|
||||
|
||||
# 1. 对比口径显式引用(V5.1 H1 硬性要求)
|
||||
calib_explicit = bool(re.search(r'对比口径|同期对齐|同天数对齐|当期\s*\d+\s*天.*上期|\d+\s*天同期', a_texts))
|
||||
|
||||
# 2. 权威字段引用(C 储值卡余额变化 / A 单位经济环比)
|
||||
authority_c = any(k in c_texts for k in ('期初', '期末', '余额变化', '其他调整'))
|
||||
authority_a = bool(re.search(r'[+-]?\d+\.?\d*%', a_texts)) # 有实际环比数字
|
||||
|
||||
# 3. 规则合规
|
||||
no_industry_number = not bool(re.search(r'行业(警戒线|均值|标准|参考|基准|水平|经验值|通常).*\d+\.?\d*%?', all_text))
|
||||
no_manual_detail = not bool(re.search(r'(抹零|免单)\s*\d+[\.\d]*\s*元', b_texts))
|
||||
|
||||
# 4. 单期推测违规计数(趋势词同句内是否有数字锚点)
|
||||
speculation_count = 0
|
||||
for sent in re.split(r'[。;\n]', all_text):
|
||||
if not sent.strip():
|
||||
continue
|
||||
has_trend_word = any(w in sent for w in TREND_WORDS)
|
||||
has_number = bool(re.search(r'[+-]?\d+\.?\d*%|\d{3,}元|\d+\s*元|\-\d+', sent))
|
||||
if has_trend_word and not has_number:
|
||||
# 允许"禁止推测"等元指令跳过
|
||||
if '推测' in sent or '禁' in sent or '不能' in sent:
|
||||
continue
|
||||
speculation_count += 1
|
||||
|
||||
# 5. 数据完整性标注(D 板块 0 支出主动指出)
|
||||
data_integrity_flagged = any(k in d_texts for k in
|
||||
('支出.*0', '数据缺失', '录入', '不完整', '虚高', '无法评估', '可信度'))
|
||||
|
||||
# 6. 每条含数字
|
||||
ins_with_number = sum(1 for ins in insights if isinstance(ins, dict) and
|
||||
re.search(r'\d+', ins.get('content') or ''))
|
||||
number_rate = ins_with_number / len(insights) if insights else 0
|
||||
|
||||
return {
|
||||
'calib_explicit': int(calib_explicit),
|
||||
'authority_c': int(authority_c),
|
||||
'authority_a_env_bi': int(authority_a),
|
||||
'no_industry_number': int(no_industry_number),
|
||||
'no_manual_detail': int(no_manual_detail),
|
||||
'speculation_count': speculation_count, # 越低越好
|
||||
'data_integrity_flagged': int(data_integrity_flagged),
|
||||
'number_rate': round(number_rate, 2),
|
||||
}
|
||||
|
||||
|
||||
def analyze_depth(insights: list) -> dict:
|
||||
"""洞察深度评分。"""
|
||||
all_text = ' '.join((ins.get('content') or '') for ins in insights if isinstance(ins, dict))
|
||||
|
||||
# 1. 深度信号命中数(10 类里命中几类)
|
||||
deep_hits: list[str] = []
|
||||
for pattern, name in DEEP_SIGNALS:
|
||||
if re.search(pattern, all_text):
|
||||
deep_hits.append(name)
|
||||
deep_hit_count = len(deep_hits)
|
||||
|
||||
# 2. 空洞表达计数(每条按句扫描)
|
||||
hollow_count = 0
|
||||
for ins in insights:
|
||||
if not isinstance(ins, dict):
|
||||
continue
|
||||
body = ins.get('content') or ''
|
||||
for pattern in HOLLOW_PATTERNS:
|
||||
if re.search(pattern, body):
|
||||
hollow_count += 1
|
||||
break
|
||||
|
||||
# 3. seq 11 top 2 原因结构 + 每个原因是否有意义解读(不只数字堆砌)
|
||||
seq11 = (_get_seq(insights, 11) or {}).get('content') or ''
|
||||
has_r1 = bool(re.search(r'原因\s*1|1\s*[\))\.、::]|①|原因一', seq11))
|
||||
has_r2 = bool(re.search(r'原因\s*2|2\s*[\))\.、::]|②|原因二', seq11))
|
||||
f_top2 = int(has_r1 and has_r2)
|
||||
# 意义解读:原因文字里是否有解读词("收缩/虚高/失衡/风险/无法评估"等因果意义)
|
||||
has_semantic = bool(re.search(r'收缩|虚高|失衡|风险|无法评估|可信|压力|乏力|不足|崩塌|疲态|暴露',
|
||||
seq11))
|
||||
f_top2_semantic = int(has_r1 and has_r2 and has_semantic)
|
||||
|
||||
# 4. seq 12 跟踪 4 要素齐全(指标/阈值/节奏/动作)
|
||||
seq12 = (_get_seq(insights, 12) or {}).get('content') or ''
|
||||
has_indicator = bool(re.search(r'[储值卡余额|会员|客单价|成交收入|现金流入|现金流出|支出|占比|助教|订单]', seq12))
|
||||
has_rhythm = bool(re.search(r'(每[周月日天]|每\s*\d+|双周|旬|定期|周期性|连续)', seq12))
|
||||
has_threshold = bool(re.search(r'<\s*-?\d+|>\s*-?\d+|≥|≤|达到|跌破|超过|低于|目标', seq12))
|
||||
has_action = bool(re.search(r'(启动|触发|召回|发起|立即|核查|补录|校准|活动|赠金|审批|预算)', seq12))
|
||||
seq12_4elem = int(has_indicator and has_rhythm and has_threshold and has_action)
|
||||
|
||||
# 5. 协同分析识别(seq 11 是否讲到多指标联动)
|
||||
collaborative = bool(re.search(r'(\+|与|及|和|协同|同时|叠加|共同|双收缩|双下降).*(\+|与|及|和)', seq11)) or \
|
||||
bool(re.search(r'\d+.*\d+.*\d+', seq11)) # 至少 3 个数字说明多信号
|
||||
|
||||
return {
|
||||
'deep_hit_count': deep_hit_count, # 0-10
|
||||
'deep_hits': deep_hits,
|
||||
'hollow_count': hollow_count, # 越低越好
|
||||
'f_top2_structure': f_top2,
|
||||
'f_top2_with_semantic': f_top2_semantic,
|
||||
'seq12_4elem_complete': seq12_4elem,
|
||||
'collaborative_analysis': int(collaborative),
|
||||
}
|
||||
|
||||
|
||||
def analyze_stability_round(insights: list) -> dict:
|
||||
"""单轮提取稳定性分析所需的"指纹"。"""
|
||||
# 1. 健康度灯色
|
||||
seq11 = (_get_seq(insights, 11) or {}).get('content') or ''
|
||||
light = 'unknown'
|
||||
if re.search(r'🔴|红灯', seq11):
|
||||
light = 'red'
|
||||
elif re.search(r'🟡|黄灯', seq11):
|
||||
light = 'yellow'
|
||||
elif re.search(r'🟢|绿灯', seq11):
|
||||
light = 'green'
|
||||
|
||||
# 2. 关键 key signals(从 seq 11 提取命中的业务关键词集合)
|
||||
KEY_SIGNAL_VOCAB = [
|
||||
'会员占比', '会员订单占比',
|
||||
'客单价',
|
||||
'储值卡余额', '储值卡',
|
||||
'支出.*0', '支出缺失', '数据缺失',
|
||||
'复购',
|
||||
'优惠',
|
||||
'助教成本', '人力成本',
|
||||
'成交收入', '现金流入',
|
||||
]
|
||||
signals = set()
|
||||
for kw in KEY_SIGNAL_VOCAB:
|
||||
if re.search(kw, seq11):
|
||||
signals.add(kw)
|
||||
|
||||
# 3. seq 12 选取的跟踪指标
|
||||
seq12_title = (_get_seq(insights, 12) or {}).get('title') or ''
|
||||
seq12_body = (_get_seq(insights, 12) or {}).get('content') or ''
|
||||
tracking_indicator = 'unknown'
|
||||
for kw, label in [
|
||||
('储值卡余额', '储值卡余额'),
|
||||
('会员占比|会员订单占比', '会员占比'),
|
||||
('客单价', '客单价'),
|
||||
('现金流出|支出', '现金流出/支出'),
|
||||
('成交收入', '成交收入'),
|
||||
('助教|人力成本', '助教/人力成本'),
|
||||
]:
|
||||
if re.search(kw, seq12_title + ' ' + seq12_body):
|
||||
tracking_indicator = label
|
||||
break
|
||||
|
||||
# 4. 字数
|
||||
avg_len = 0
|
||||
if insights:
|
||||
avg_len = sum(len(ins.get('content') or '') for ins in insights if isinstance(ins, dict)) / len(insights)
|
||||
|
||||
return {
|
||||
'light': light,
|
||||
'seq11_signals': signals,
|
||||
'tracking_indicator': tracking_indicator,
|
||||
'avg_content_len': round(avg_len, 1),
|
||||
}
|
||||
|
||||
|
||||
def summarize(rounds_data: list[dict], label: str, perf: list[dict] | None = None) -> dict:
|
||||
"""聚合 n 次的三层评分。
|
||||
|
||||
rounds_data: round_*.json 原始数据列表(含 parsed)
|
||||
perf: 外部传入性能数据(duration_s, tokens),从 meta 里取
|
||||
"""
|
||||
accuracies = []
|
||||
depths = []
|
||||
stab_fingerprints = []
|
||||
|
||||
for rd in rounds_data:
|
||||
insights = (rd.get('parsed') or {}).get('insights') or []
|
||||
if not isinstance(insights, list) or not insights:
|
||||
continue
|
||||
accuracies.append(analyze_accuracy(insights))
|
||||
depths.append(analyze_depth(insights))
|
||||
stab_fingerprints.append(analyze_stability_round(insights))
|
||||
|
||||
n = len(accuracies)
|
||||
if n == 0:
|
||||
return {'label': label, 'n': 0}
|
||||
|
||||
# === 准确性聚合 ===
|
||||
acc_scores = {
|
||||
'calib_explicit_rate': mean(a['calib_explicit'] for a in accuracies),
|
||||
'authority_c_rate': mean(a['authority_c'] for a in accuracies),
|
||||
'authority_a_env_bi_rate': mean(a['authority_a_env_bi'] for a in accuracies),
|
||||
'no_industry_number_rate': mean(a['no_industry_number'] for a in accuracies),
|
||||
'no_manual_detail_rate': mean(a['no_manual_detail'] for a in accuracies),
|
||||
'speculation_avg': mean(a['speculation_count'] for a in accuracies),
|
||||
'data_integrity_flagged_rate': mean(a['data_integrity_flagged'] for a in accuracies),
|
||||
'number_rate_avg': mean(a['number_rate'] for a in accuracies),
|
||||
}
|
||||
|
||||
# === 洞察深度聚合 ===
|
||||
depth_scores = {
|
||||
'deep_hit_avg': mean(d['deep_hit_count'] for d in depths),
|
||||
'deep_hit_union': len(set.union(*[set(d['deep_hits']) for d in depths])) if depths else 0,
|
||||
'hollow_avg': mean(d['hollow_count'] for d in depths),
|
||||
'f_top2_rate': mean(d['f_top2_structure'] for d in depths),
|
||||
'f_top2_with_semantic_rate': mean(d['f_top2_with_semantic'] for d in depths),
|
||||
'seq12_4elem_rate': mean(d['seq12_4elem_complete'] for d in depths),
|
||||
'collab_analysis_rate': mean(d['collaborative_analysis'] for d in depths),
|
||||
}
|
||||
|
||||
# === 稳定性聚合(核心:众数占比 / 交并比 / CV)===
|
||||
# 灯色众数占比
|
||||
light_counter = Counter(s['light'] for s in stab_fingerprints)
|
||||
light_mode_rate = light_counter.most_common(1)[0][1] / n
|
||||
|
||||
# seq 11 signals 交并比(跨 n 轮的平均 IoU)
|
||||
iou_scores = []
|
||||
for i in range(n):
|
||||
for j in range(i + 1, n):
|
||||
s1, s2 = stab_fingerprints[i]['seq11_signals'], stab_fingerprints[j]['seq11_signals']
|
||||
union = s1 | s2
|
||||
if not union:
|
||||
continue
|
||||
iou_scores.append(len(s1 & s2) / len(union))
|
||||
seq11_iou = mean(iou_scores) if iou_scores else 0
|
||||
|
||||
# tracking indicator 一致性(众数占比)
|
||||
ti_counter = Counter(s['tracking_indicator'] for s in stab_fingerprints)
|
||||
tracking_mode_rate = ti_counter.most_common(1)[0][1] / n
|
||||
|
||||
# 字数 CV(低为稳定)
|
||||
content_lens = [s['avg_content_len'] for s in stab_fingerprints]
|
||||
content_cv = (stdev(content_lens) / mean(content_lens)) if len(content_lens) > 1 and mean(content_lens) > 0 else 0
|
||||
|
||||
# 性能 CV
|
||||
perf_data = perf or []
|
||||
durations = [p['duration_s'] for p in perf_data if p.get('duration_s') is not None]
|
||||
tokens = [p['tokens'] for p in perf_data if p.get('tokens') is not None]
|
||||
duration_cv = (stdev(durations) / mean(durations)) if len(durations) > 1 and mean(durations) > 0 else 0
|
||||
tokens_cv = (stdev(tokens) / mean(tokens)) if len(tokens) > 1 and mean(tokens) > 0 else 0
|
||||
|
||||
stab_scores = {
|
||||
'light_mode': light_counter.most_common(1)[0][0],
|
||||
'light_mode_rate': light_mode_rate,
|
||||
'light_distribution': dict(light_counter),
|
||||
'seq11_signal_iou': seq11_iou,
|
||||
'tracking_mode': ti_counter.most_common(1)[0][0],
|
||||
'tracking_mode_rate': tracking_mode_rate,
|
||||
'content_len_cv': content_cv,
|
||||
'duration_cv': duration_cv,
|
||||
'tokens_cv': tokens_cv,
|
||||
'duration_mean': mean(durations) if durations else 0,
|
||||
'tokens_mean': mean(tokens) if tokens else 0,
|
||||
}
|
||||
|
||||
# === 综合评分(0-100 百分制,三维加权)===
|
||||
# 准确性 40% + 洞察深度 35% + 稳定性 25%
|
||||
acc_composite = (
|
||||
acc_scores['calib_explicit_rate'] * 0.25 +
|
||||
acc_scores['authority_c_rate'] * 0.15 +
|
||||
acc_scores['authority_a_env_bi_rate'] * 0.10 +
|
||||
acc_scores['no_industry_number_rate'] * 0.15 +
|
||||
acc_scores['no_manual_detail_rate'] * 0.10 +
|
||||
max(0, 1 - acc_scores['speculation_avg'] / 5) * 0.15 + # 5 次推测扣到 0
|
||||
acc_scores['data_integrity_flagged_rate'] * 0.10
|
||||
)
|
||||
depth_composite = (
|
||||
min(depth_scores['deep_hit_avg'] / 5, 1) * 0.30 + # 深度信号 5 类以上得满
|
||||
max(0, 1 - depth_scores['hollow_avg'] / 3) * 0.15 + # 空洞 3 次扣到 0
|
||||
depth_scores['f_top2_with_semantic_rate'] * 0.25 +
|
||||
depth_scores['seq12_4elem_rate'] * 0.20 +
|
||||
depth_scores['collab_analysis_rate'] * 0.10
|
||||
)
|
||||
stab_composite = (
|
||||
stab_scores['light_mode_rate'] * 0.30 + # 同灯色占比
|
||||
stab_scores['seq11_signal_iou'] * 0.25 + # 原因信号交并比
|
||||
stab_scores['tracking_mode_rate'] * 0.25 + # 跟踪指标一致
|
||||
max(0, 1 - stab_scores['content_len_cv'] * 2) * 0.10 + # CV 0.5 扣到 0
|
||||
max(0, 1 - stab_scores['duration_cv'] * 2) * 0.10
|
||||
)
|
||||
overall = acc_composite * 0.4 + depth_composite * 0.35 + stab_composite * 0.25
|
||||
|
||||
return {
|
||||
'label': label,
|
||||
'n': n,
|
||||
'accuracy': acc_scores,
|
||||
'depth': depth_scores,
|
||||
'stability': stab_scores,
|
||||
'composite': {
|
||||
'accuracy': round(acc_composite * 100, 1),
|
||||
'depth': round(depth_composite * 100, 1),
|
||||
'stability': round(stab_composite * 100, 1),
|
||||
'overall': round(overall * 100, 1),
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _load_perf(dir_path: Path) -> list[dict]:
|
||||
"""从 round_XX.json 的 meta 提取 duration/tokens。"""
|
||||
out = []
|
||||
for f in sorted(dir_path.glob('round_*.json')):
|
||||
data = json.loads(f.read_text(encoding='utf-8'))
|
||||
meta = data.get('meta') or {}
|
||||
out.append({'duration_s': meta.get('duration_s'), 'tokens': meta.get('tokens')})
|
||||
return out
|
||||
|
||||
|
||||
def print_summary(s: dict) -> None:
|
||||
if s['n'] == 0:
|
||||
print(f"\n=== {s['label'].upper()} 数据为空 ===")
|
||||
return
|
||||
c = s['composite']
|
||||
print(f"\n=== Round {s['label'].upper()} · 店长视角评分(n={s['n']})===")
|
||||
print(f" 【综合评分】 {c['overall']:.1f} / 100")
|
||||
print(f" 准确性 {c['accuracy']:.1f} | 洞察深度 {c['depth']:.1f} | 稳定性 {c['stability']:.1f}")
|
||||
|
||||
print(f"\n -- 准确性明细 --")
|
||||
a = s['accuracy']
|
||||
print(f" 对比口径显式引用: {a['calib_explicit_rate']:.0%}")
|
||||
print(f" C 权威字段(余额变化): {a['authority_c_rate']:.0%}")
|
||||
print(f" A 权威字段(环比数字): {a['authority_a_env_bi_rate']:.0%}")
|
||||
print(f" 禁行业数字合规: {a['no_industry_number_rate']:.0%}")
|
||||
print(f" 禁手动调整拆明细合规: {a['no_manual_detail_rate']:.0%}")
|
||||
print(f" 单期推测违规/次 (低好): {a['speculation_avg']:.1f}")
|
||||
print(f" 数据完整性标注: {a['data_integrity_flagged_rate']:.0%}")
|
||||
|
||||
print(f"\n -- 洞察深度明细 --")
|
||||
d = s['depth']
|
||||
print(f" 深度信号命中/次 (满 10): {d['deep_hit_avg']:.1f}")
|
||||
print(f" 跨轮覆盖信号数: {d['deep_hit_union']} / 10")
|
||||
print(f" 空洞表达/次 (低好): {d['hollow_avg']:.1f}")
|
||||
print(f" seq 11 top 2 结构: {d['f_top2_rate']:.0%}")
|
||||
print(f" seq 11 有意义解读: {d['f_top2_with_semantic_rate']:.0%}")
|
||||
print(f" seq 12 四要素齐全: {d['seq12_4elem_rate']:.0%}")
|
||||
print(f" 多指标协同分析: {d['collab_analysis_rate']:.0%}")
|
||||
|
||||
print(f"\n -- 稳定性明细 --")
|
||||
st = s['stability']
|
||||
print(f" 评级众数 ({st['light_mode']}) 占比: {st['light_mode_rate']:.0%} [{st['light_distribution']}]")
|
||||
print(f" seq 11 原因信号 IoU: {st['seq11_signal_iou']:.0%}")
|
||||
print(f" 跟踪指标众数 ({st['tracking_mode']}) 占比: {st['tracking_mode_rate']:.0%}")
|
||||
print(f" 字数 CV (低好): {st['content_len_cv']:.2f}")
|
||||
print(f" 时长 CV (低好): {st['duration_cv']:.2f} 均值 {st['duration_mean']:.1f}s")
|
||||
print(f" tokens CV (低好): {st['tokens_cv']:.2f} 均值 {st['tokens_mean']:.0f}")
|
||||
|
||||
|
||||
def print_compare(*summaries: dict) -> None:
|
||||
labels = [s['label'].upper() for s in summaries]
|
||||
header = ' vs '.join(labels)
|
||||
col_w = 12
|
||||
print(f"\n======= {header} 店长视角综合评分 =======")
|
||||
print(f"{'指标':<34}" + ''.join(f"{l:>{col_w}}" for l in labels))
|
||||
print('-' * (34 + col_w * len(labels)))
|
||||
|
||||
def _row(name: str, values: list, fmt: str = 'float', higher_better: bool = True) -> None:
|
||||
cells = []
|
||||
for v in values:
|
||||
if fmt == 'percent':
|
||||
cells.append(f'{v:.0%}')
|
||||
elif fmt == 'int':
|
||||
cells.append(str(v))
|
||||
elif fmt == 'float1':
|
||||
cells.append(f'{v:.1f}')
|
||||
else:
|
||||
cells.append(f'{v:.2f}')
|
||||
print(f"{name:<34}" + ''.join(f"{c:>{col_w}}" for c in cells))
|
||||
|
||||
print('【综合】')
|
||||
_row(' 总分 / 100', [s['composite']['overall'] for s in summaries], 'float1')
|
||||
_row(' 准确性 (40%)', [s['composite']['accuracy'] for s in summaries], 'float1')
|
||||
_row(' 洞察深度 (35%)', [s['composite']['depth'] for s in summaries], 'float1')
|
||||
_row(' 稳定性 (25%)', [s['composite']['stability'] for s in summaries], 'float1')
|
||||
print('【准确性】')
|
||||
_row(' 对比口径显式 ★', [s['accuracy']['calib_explicit_rate'] for s in summaries], 'percent')
|
||||
_row(' C 权威字段', [s['accuracy']['authority_c_rate'] for s in summaries], 'percent')
|
||||
_row(' 数据完整性标注', [s['accuracy']['data_integrity_flagged_rate'] for s in summaries], 'percent')
|
||||
_row(' 单期推测违规/次 ↓', [s['accuracy']['speculation_avg'] for s in summaries], 'float1')
|
||||
_row(' 禁行业数字合规', [s['accuracy']['no_industry_number_rate'] for s in summaries], 'percent')
|
||||
print('【洞察深度】')
|
||||
_row(' 深度信号命中/次', [s['depth']['deep_hit_avg'] for s in summaries], 'float1')
|
||||
_row(' 跨轮信号覆盖 /10', [s['depth']['deep_hit_union'] for s in summaries], 'int')
|
||||
_row(' 空洞表达/次 ↓', [s['depth']['hollow_avg'] for s in summaries], 'float1')
|
||||
_row(' seq 11 top 2 结构', [s['depth']['f_top2_rate'] for s in summaries], 'percent')
|
||||
_row(' seq 11 有意义解读 ★', [s['depth']['f_top2_with_semantic_rate'] for s in summaries], 'percent')
|
||||
_row(' seq 12 四要素齐全', [s['depth']['seq12_4elem_rate'] for s in summaries], 'percent')
|
||||
_row(' 多指标协同分析', [s['depth']['collab_analysis_rate'] for s in summaries], 'percent')
|
||||
print('【稳定性(同数据下越一致越好)】')
|
||||
_row(' 评级众数占比 ★', [s['stability']['light_mode_rate'] for s in summaries], 'percent')
|
||||
_row(' seq 11 原因信号 IoU', [s['stability']['seq11_signal_iou'] for s in summaries], 'percent')
|
||||
_row(' 跟踪指标众数占比 ★', [s['stability']['tracking_mode_rate'] for s in summaries], 'percent')
|
||||
_row(' 字数 CV ↓', [s['stability']['content_len_cv'] for s in summaries], 'float')
|
||||
_row(' 时长 CV ↓', [s['stability']['duration_cv'] for s in summaries], 'float')
|
||||
_row(' 均时长 s', [s['stability']['duration_mean'] for s in summaries], 'float1')
|
||||
_row(' 均 tokens', [s['stability']['tokens_mean'] for s in summaries], 'float1')
|
||||
print()
|
||||
for s in summaries:
|
||||
print(f" {s['label'].upper()} 灯色分布: {s['stability']['light_distribution']}")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--dir', help='单目录分析')
|
||||
parser.add_argument('--compare', nargs='+', metavar='DIR', help='多目录对比(2-5 个)')
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.compare:
|
||||
dirs = [Path(d) for d in args.compare]
|
||||
summaries = []
|
||||
for d in dirs:
|
||||
rounds = _iter_rounds(d)
|
||||
if not rounds:
|
||||
print(f'[skip] {d} 无数据')
|
||||
continue
|
||||
perf = _load_perf(d)
|
||||
label = d.name.replace('round_', '')
|
||||
summaries.append(summarize(rounds, label, perf))
|
||||
for s in summaries:
|
||||
print_summary(s)
|
||||
print_compare(*summaries)
|
||||
tag = '_'.join(s['label'] for s in summaries)
|
||||
out_path = Path(f'export/ai-ab-test/_manager_quality_{tag}.json')
|
||||
out_path.write_text(json.dumps({s['label']: s for s in summaries}, ensure_ascii=False, indent=2, default=str),
|
||||
encoding='utf-8')
|
||||
print(f'\n[done] 店长视角评分 JSON 已存: {out_path}')
|
||||
elif args.dir:
|
||||
rounds = _iter_rounds(Path(args.dir))
|
||||
perf = _load_perf(Path(args.dir))
|
||||
label = Path(args.dir).name.replace('round_', '')
|
||||
s = summarize(rounds, label, perf)
|
||||
print_summary(s)
|
||||
else:
|
||||
parser.error('需指定 --dir 或 --compare')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
45
scripts/dump_app2_prompt.py
Normal file
45
scripts/dump_app2_prompt.py
Normal file
@@ -0,0 +1,45 @@
|
||||
"""导出 App2 当前拼接后的完整 user prompt(不含百炼侧 system prompt)。"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, 'apps/backend')
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(dotenv_path=os.path.join(os.getcwd(), '.env'))
|
||||
|
||||
from app.ai.prompts.app2_finance_prompt import build_prompt
|
||||
|
||||
|
||||
async def main() -> None:
|
||||
p = await build_prompt({
|
||||
'site_id': 2790685415443269,
|
||||
'time_dimension': 'this_month',
|
||||
'area': 'all',
|
||||
})
|
||||
pretty = json.dumps(json.loads(p), ensure_ascii=False, indent=2)
|
||||
md = f"""# App2 当前拼接 Prompt 快照(this_month / all)
|
||||
|
||||
- 生成时间:2026-04-22
|
||||
- Prompt 长度:{len(p)} 字符
|
||||
- 仅为 **user prompt**;百炼控制台 system prompt(行业背景 + 分析框架)不在此文件,会在调用时由百炼 SDK 前置拼接
|
||||
|
||||
## 完整 Prompt
|
||||
|
||||
```json
|
||||
{pretty}
|
||||
```
|
||||
"""
|
||||
out = r'c:/Project/NeoZQYY/export/ai-prompt-samples/app2_current_user_prompt.md'
|
||||
with open(out, 'w', encoding='utf-8') as f:
|
||||
f.write(md)
|
||||
print(f'prompt={len(p)} chars')
|
||||
print(f'md={out}')
|
||||
print()
|
||||
print(pretty)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
||||
31
scripts/gen_app2_v6_md.py
Normal file
31
scripts/gen_app2_v6_md.py
Normal file
@@ -0,0 +1,31 @@
|
||||
import sys, asyncio, json, os
|
||||
sys.path.insert(0, 'apps/backend')
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(dotenv_path=os.path.join(os.getcwd(), '.env'))
|
||||
from app.ai.prompts.app2_finance_prompt import build_prompt
|
||||
|
||||
|
||||
async def main() -> None:
|
||||
p = await build_prompt({'site_id': 2790685415443269, 'time_dimension': 'this_month', 'area': 'all'})
|
||||
pretty = json.dumps(json.loads(p), ensure_ascii=False, indent=2)
|
||||
md = f"""# App2 财务洞察 — 本月/全部区域 (v6 异常日含星期)
|
||||
|
||||
- 生成时间:2026-04-22
|
||||
- Prompt 长度:{len(p)} 字符
|
||||
- 异常日参数:阈值 40%、上限 10、最少 7 天样本
|
||||
- 新增:日期带星期中文,AI 可识别周中规律
|
||||
|
||||
## 完整 Prompt
|
||||
|
||||
```json
|
||||
{pretty}
|
||||
```
|
||||
"""
|
||||
out = r'c:/Project/NeoZQYY/export/ai-prompt-samples/app2_this_month_all_v6_anomaly_with_weekday.md'
|
||||
with open(out, 'w', encoding='utf-8') as f:
|
||||
f.write(md)
|
||||
print(f'prompt={len(p)} chars, md saved to {out}')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
||||
180
scripts/ops/gen_rls_business_date_migration.py
Normal file
180
scripts/ops/gen_rls_business_date_migration.py
Normal file
@@ -0,0 +1,180 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
从测试库 / 真实库实时读取目标视图的定义(pg_get_viewdef),
|
||||
为每个视图生成 ``CREATE OR REPLACE VIEW`` 块,在 WHERE 末尾追加业务日上界。
|
||||
|
||||
数据库实际列签名可能比 ``schemas/app.sql`` 文件中的快照新(增列),
|
||||
因此本脚本走 ``pg_get_viewdef`` 兜底,确保 ``CREATE OR REPLACE`` 不会
|
||||
因为列签名漂移而失败。
|
||||
|
||||
输出 SQL 文件:db/etl_feiqiu/migrations/20260502__rls_views_business_date_upper_bound.sql
|
||||
|
||||
环境变量:``TEST_PG_DSN`` 或 ``PG_DSN``。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
import psycopg2
|
||||
from dotenv import load_dotenv
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[2]
|
||||
OUT = ROOT / "db" / "etl_feiqiu" / "migrations" / "20260502__rls_views_business_date_upper_bound.sql"
|
||||
|
||||
# 需要改造的视图 → 业务日上界条件(None 表示不强制裁剪,仅占位)
|
||||
VIEWS_WITH_BD: dict[str, str] = {
|
||||
# ── 财务事实 ─────────────────────────────────────────────
|
||||
"app.v_dws_finance_area_daily": "stat_date <= app.business_date_now()",
|
||||
"app.v_dws_finance_daily_summary": "stat_date <= app.business_date_now()",
|
||||
"app.v_dws_finance_discount_detail": "stat_date <= app.business_date_now()",
|
||||
"app.v_dws_finance_expense_summary": "expense_month <= date_trunc('month', app.business_date_now())::date",
|
||||
"app.v_dws_finance_income_structure": "stat_date <= app.business_date_now()",
|
||||
"app.v_dws_finance_recharge_summary": "stat_date <= app.business_date_now()",
|
||||
# ── 助教 / 任务事实 ─────────────────────────────────────
|
||||
"app.v_assistant_daily": "stat_date <= app.business_date_now()",
|
||||
"app.v_dws_assistant_daily_detail": "stat_date <= app.business_date_now()",
|
||||
"app.v_dws_assistant_finance_analysis": "stat_date <= app.business_date_now()",
|
||||
"app.v_dws_assistant_monthly_summary": "stat_month <= date_trunc('month', app.business_date_now())::date",
|
||||
"app.v_dws_assistant_salary_calc": "salary_month <= date_trunc('month', app.business_date_now())::date",
|
||||
# ── 客户事实 ────────────────────────────────────────────
|
||||
"app.v_dws_member_consumption_summary": "stat_date <= app.business_date_now()",
|
||||
"app.v_dws_member_visit_detail": "visit_date <= app.business_date_now()",
|
||||
"app.v_dws_member_winback_index": "COALESCE(last_visit_time::date, '0001-01-01'::date) <= app.business_date_now()",
|
||||
# ── DWD 事实 ────────────────────────────────────────────
|
||||
"app.v_dwd_settlement_head": "COALESCE(create_time::date, '0001-01-01'::date) <= app.business_date_now()",
|
||||
"app.v_dwd_assistant_service_log": "COALESCE(create_time::date, '0001-01-01'::date) <= app.business_date_now()",
|
||||
"app.v_dwd_recharge_order": "COALESCE(pay_time::date, '0001-01-01'::date) <= app.business_date_now()",
|
||||
"app.v_dwd_store_goods_sale": "COALESCE(create_time::date, '0001-01-01'::date) <= app.business_date_now()",
|
||||
"app.v_dwd_table_fee_log": "COALESCE(create_time::date, '0001-01-01'::date) <= app.business_date_now()",
|
||||
# ── DIM SCD2 / 配置维度 ────────────────────────────────
|
||||
"app.v_cfg_assistant_level_price": "effective_from <= app.business_date_now() AND effective_to >= app.business_date_now()",
|
||||
"app.v_cfg_performance_tier": "effective_from <= app.business_date_now() AND effective_to >= app.business_date_now()",
|
||||
"app.v_cfg_bonus_rules": "effective_from <= app.business_date_now() AND effective_to >= app.business_date_now()",
|
||||
"app.v_cfg_index_parameters": "effective_from <= app.business_date_now() AND effective_to >= app.business_date_now()",
|
||||
# ── DWS 业务事实 / 汇总 (补 18 个) ───────────────────
|
||||
"app.v_dws_assistant_customer_stats": "stat_date <= app.business_date_now()",
|
||||
"app.v_dws_assistant_order_contribution": "stat_date <= app.business_date_now()",
|
||||
"app.v_dws_assistant_project_tag": "computed_at::date <= app.business_date_now()",
|
||||
"app.v_dws_assistant_recharge_commission": "commission_month <= date_trunc('month', app.business_date_now())::date",
|
||||
"app.v_dws_coach_area_hours": "stat_month <= date_trunc('month', app.business_date_now())::date",
|
||||
"app.v_dws_finance_board_cache": "computed_at::date <= app.business_date_now()",
|
||||
"app.v_dws_member_assistant_intimacy": "calc_time::date <= app.business_date_now()",
|
||||
"app.v_dws_member_assistant_relation_index": "COALESCE(stat_date, calc_time::date) <= app.business_date_now()",
|
||||
"app.v_dws_member_newconv_index": "stat_date <= app.business_date_now()",
|
||||
"app.v_dws_member_project_tag": "computed_at::date <= app.business_date_now()",
|
||||
"app.v_dws_member_spending_power_index": "calc_time::date <= app.business_date_now()",
|
||||
"app.v_dws_order_summary": "order_date <= app.business_date_now()",
|
||||
"app.v_dws_platform_settlement": "settlement_date <= app.business_date_now()",
|
||||
"app.v_finance_daily": "stat_date <= app.business_date_now()",
|
||||
"app.v_member_consumption": "stat_date <= app.business_date_now()",
|
||||
"app.v_order_summary": "order_date <= app.business_date_now()",
|
||||
}
|
||||
|
||||
# 跳过原因记录(用于审计文档)
|
||||
VIEWS_SKIPPED: dict[str, str] = {
|
||||
"app.v_assistant": "无日期列;纯 dim 当前快照",
|
||||
"app.v_cfg_area_category": "无日期列;纯静态配置",
|
||||
"app.v_member": "无日期列;纯当前会员快照",
|
||||
"app.v_site": "无日期列;纯门店元数据",
|
||||
# SCD2 维度:保留 scd2_is_current=1 语义不动;
|
||||
# 想要"sandbox 当时的维度状态"需把过滤改为 scd2_start_time <= bd AND (scd2_end_time > bd OR is null),
|
||||
# 但这会让一行"当前生效"变成多行(其中一行是当时生效),影响 JOIN 与上层调用方。先保留现状,后续视需求评估。
|
||||
"app.v_dim_assistant": "SCD2 dim:scd2_is_current=1 当前快照(保留)",
|
||||
"app.v_dim_member": "SCD2 dim:scd2_is_current=1 当前快照(保留)",
|
||||
"app.v_dim_member_card_account": "SCD2 dim:scd2_is_current=1 当前快照(保留)",
|
||||
"app.v_dim_staff": "SCD2 dim:scd2_is_current=1 当前快照(保留)",
|
||||
"app.v_dim_staff_ex": "SCD2 dim:scd2_is_current=1 当前快照(保留)",
|
||||
"app.v_dim_table": "SCD2 dim:scd2_is_current=1 当前快照(保留)",
|
||||
}
|
||||
|
||||
def fetch_view_def(cur, schema: str, view: str) -> str:
|
||||
"""返回 ``pg_get_viewdef`` 的视图体(去掉末尾分号),格式化为多行。"""
|
||||
cur.execute(
|
||||
"SELECT pg_get_viewdef(%s::regclass, true)", (f"{schema}.{view}",)
|
||||
)
|
||||
row = cur.fetchone()
|
||||
if not row or not row[0]:
|
||||
raise RuntimeError(f"未取到视图定义: {schema}.{view}")
|
||||
body = row[0].rstrip().rstrip(";").rstrip()
|
||||
return body
|
||||
|
||||
|
||||
def add_business_date_clause(view_body: str, predicate: str) -> str:
|
||||
"""在 WHERE 末尾追加 AND <predicate>。
|
||||
|
||||
若视图体含 ``ORDER BY``,将谓词插入到 ORDER BY 前;否则末尾追加。
|
||||
"""
|
||||
body = view_body.rstrip()
|
||||
upper = body.upper()
|
||||
order_by_idx = upper.rfind("ORDER BY")
|
||||
if order_by_idx != -1:
|
||||
head = body[:order_by_idx].rstrip()
|
||||
tail = body[order_by_idx:]
|
||||
if "WHERE" in head.upper():
|
||||
return f"{head}\n AND {predicate}\n {tail}"
|
||||
return f"{body}\n WHERE {predicate}"
|
||||
if "WHERE" in body.upper():
|
||||
return f"{body}\n AND {predicate}"
|
||||
return f"{body}\n WHERE {predicate}"
|
||||
|
||||
|
||||
def main() -> None:
|
||||
load_dotenv()
|
||||
dsn = os.environ.get("TEST_PG_DSN") or os.environ.get("PG_DSN")
|
||||
if not dsn:
|
||||
raise SystemExit("请配置 TEST_PG_DSN 或 PG_DSN")
|
||||
|
||||
out_lines: list[str] = []
|
||||
out_lines.append(
|
||||
"-- =============================================================================\n"
|
||||
"-- ETL 库(etl_feiqiu)/ app schema —— RLS 视图业务日上界裁剪\n"
|
||||
"-- 由 scripts/ops/gen_rls_business_date_migration.py 自动生成。\n"
|
||||
"-- 沙箱模式下,业务读取层只看到 sandbox_date 及之前的数据。\n"
|
||||
"-- =============================================================================\n"
|
||||
)
|
||||
out_lines.append("BEGIN;\n")
|
||||
|
||||
out_lines.append(
|
||||
"-- helper:业务日 GUC 读取,缺省回退当前真实日期\n"
|
||||
"CREATE OR REPLACE FUNCTION app.business_date_now()\n"
|
||||
"RETURNS date\n"
|
||||
"LANGUAGE sql\n"
|
||||
"STABLE\n"
|
||||
"AS $$\n"
|
||||
" SELECT COALESCE(\n"
|
||||
" NULLIF(current_setting('app.current_business_date', true), '')::date,\n"
|
||||
" CURRENT_DATE\n"
|
||||
" );\n"
|
||||
"$$;\n"
|
||||
"COMMENT ON FUNCTION app.business_date_now() IS\n"
|
||||
"'返回当前业务日(GUC app.current_business_date),未设置时回退 CURRENT_DATE。';\n"
|
||||
)
|
||||
|
||||
conn = psycopg2.connect(dsn)
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
for view_name, predicate in VIEWS_WITH_BD.items():
|
||||
schema, view = view_name.split(".", 1)
|
||||
body = fetch_view_def(cur, schema, view)
|
||||
body_with_bd = add_business_date_clause(body, predicate)
|
||||
out_lines.append(f"\n-- {view_name}:加业务日上界 → {predicate}")
|
||||
out_lines.append(f"CREATE OR REPLACE VIEW {view_name} AS")
|
||||
out_lines.append(body_with_bd + ";\n")
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
out_lines.append("\nCOMMIT;\n")
|
||||
out_lines.append(
|
||||
"\n-- 回滚:DROP FUNCTION app.business_date_now() CASCADE;\n"
|
||||
"-- 然后重新执行 db/etl_feiqiu/schemas/app.sql 即可恢复 live 行为\n"
|
||||
)
|
||||
|
||||
OUT.write_text("\n".join(out_lines), encoding="utf-8")
|
||||
print(f"OK: 写入 {OUT.relative_to(ROOT)}, 共 {len(VIEWS_WITH_BD)} 个视图")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
66
scripts/test_app2_new_system_prompt.py
Normal file
66
scripts/test_app2_new_system_prompt.py
Normal file
@@ -0,0 +1,66 @@
|
||||
"""用 dispatcher.run_single_app 等效 API 路径跑 this_month/all,验证百炼侧新 system prompt 的输出效果。"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
|
||||
sys.path.insert(0, 'apps/backend')
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(dotenv_path=os.path.join(os.getcwd(), '.env'))
|
||||
|
||||
from app.ai.budget_tracker import BudgetTracker
|
||||
from app.ai.cache_service import AICacheService
|
||||
from app.ai.circuit_breaker import CircuitBreaker
|
||||
from app.ai.config import AIConfig
|
||||
from app.ai.conversation_service import ConversationService
|
||||
from app.ai.dashscope_client import DashScopeClient
|
||||
from app.ai.dispatcher import AIDispatcher
|
||||
from app.ai.rate_limiter import RateLimiter
|
||||
from app.ai.run_log_service import AIRunLogService
|
||||
from app.database import get_connection
|
||||
|
||||
|
||||
async def main() -> None:
|
||||
cfg = AIConfig.from_env()
|
||||
run_log = AIRunLogService(get_conn=get_connection)
|
||||
dispatcher = AIDispatcher(
|
||||
client=DashScopeClient(api_key=cfg.api_key, workspace_id=cfg.workspace_id),
|
||||
cache_svc=AICacheService(),
|
||||
conv_svc=ConversationService(),
|
||||
circuit_breaker=CircuitBreaker(),
|
||||
rate_limiter=RateLimiter(),
|
||||
budget_tracker=BudgetTracker(usage_provider=run_log),
|
||||
run_log_svc=run_log,
|
||||
config=cfg,
|
||||
)
|
||||
|
||||
t0 = time.monotonic()
|
||||
result = await dispatcher.run_single_app(
|
||||
app_type='app2_finance',
|
||||
context={
|
||||
'site_id': 2790685415443269,
|
||||
'time_dimension': 'this_month',
|
||||
'area': 'all',
|
||||
},
|
||||
triggered_by='admin_test_new_system_prompt',
|
||||
)
|
||||
dt = time.monotonic() - t0
|
||||
if result is None:
|
||||
print(f'FAILED after {dt:.1f}s')
|
||||
return
|
||||
|
||||
insights = result.get('insights') or []
|
||||
print(f'done in {dt:.1f}s, insights={len(insights)} 条\n')
|
||||
for ins in insights:
|
||||
seq = ins.get('seq')
|
||||
title = ins.get('title')
|
||||
content = (ins.get('content') or '')
|
||||
print(f'[{seq}] {title}')
|
||||
print(f' {content}\n')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
||||
Reference in New Issue
Block a user