feat: chat integration, tenant admin spec, backend chat service, miniprogram updates, DEMO moved to tmp, XCX-TEST removed, migrations & docs
This commit is contained in:
345
scripts/ops/test_chat_ai_quality.py
Normal file
345
scripts/ops/test_chat_ai_quality.py
Normal file
@@ -0,0 +1,345 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
RNS1.4 CHAT 模块 AI 返回质量评估脚本。
|
||||
|
||||
直接调用百炼 API(OpenAI 兼容协议),模拟 4 种入口场景的对话,
|
||||
评估 AI 回复的质量(语义相关性、中文正确性、上下文理解能力)。
|
||||
|
||||
输出:Markdown 评估报告 → docs/reports/
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# 加载根 .env
|
||||
_root = Path(__file__).resolve().parents[2]
|
||||
load_dotenv(_root / ".env")
|
||||
|
||||
BAILIAN_API_KEY = os.environ.get("BAILIAN_API_KEY")
|
||||
BAILIAN_BASE_URL = os.environ.get("BAILIAN_BASE_URL")
|
||||
BAILIAN_MODEL = os.environ.get("BAILIAN_MODEL")
|
||||
|
||||
if not all([BAILIAN_API_KEY, BAILIAN_BASE_URL, BAILIAN_MODEL]):
|
||||
print("ERROR: 缺少 BAILIAN_API_KEY / BAILIAN_BASE_URL / BAILIAN_MODEL 环境变量")
|
||||
sys.exit(1)
|
||||
|
||||
import openai
|
||||
|
||||
client = openai.AsyncOpenAI(api_key=BAILIAN_API_KEY, base_url=BAILIAN_BASE_URL)
|
||||
|
||||
SYSTEM_PROMPT = json.dumps(
|
||||
{"task": "你是台球门店的 AI 助手,根据用户的问题和当前页面上下文提供帮助。"},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
|
||||
# ── 4 个测试场景 ──────────────────────────────────────────────
|
||||
|
||||
SCENARIOS: list[dict] = [
|
||||
{
|
||||
"name": "场景1: task 入口 — 维客任务咨询",
|
||||
"context_type": "task",
|
||||
"description": "助教从任务详情页进入,询问如何完成一个维客任务",
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": SYSTEM_PROMPT,
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": json.dumps(
|
||||
{
|
||||
"current_time": datetime.now().isoformat(),
|
||||
"source_page": "task-detail",
|
||||
"page_context": {
|
||||
"task_type": "retention",
|
||||
"member_name": "张三",
|
||||
"priority_score": 85,
|
||||
"expires_at": "2026-03-25",
|
||||
},
|
||||
"screen_content": "维客任务:张三,优先级85分,3月25日到期",
|
||||
},
|
||||
ensure_ascii=False,
|
||||
),
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "这个客户最近消费频率下降了,我应该怎么跟他沟通比较好?有什么话术建议吗?",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "场景2: customer 入口 — 客户详情咨询",
|
||||
"context_type": "customer",
|
||||
"description": "助教从客户详情页进入,询问客户消费情况分析",
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": SYSTEM_PROMPT,
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": json.dumps(
|
||||
{
|
||||
"current_time": datetime.now().isoformat(),
|
||||
"source_page": "customer-detail",
|
||||
"page_context": {
|
||||
"member_id": 12345,
|
||||
"member_name": "李四",
|
||||
"member_level": "VIP",
|
||||
"last_visit": "2026-03-15",
|
||||
"total_consumption": "¥8,500",
|
||||
},
|
||||
"screen_content": "客户:李四,VIP会员,累计消费¥8,500,最近到店3月15日",
|
||||
},
|
||||
ensure_ascii=False,
|
||||
),
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "帮我分析一下这个客户的消费习惯,他适合推荐什么课程?",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "场景3: coach 入口 — 助教业绩咨询",
|
||||
"context_type": "coach",
|
||||
"description": "助教从自己的详情页进入,询问业绩提升建议",
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": SYSTEM_PROMPT,
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": json.dumps(
|
||||
{
|
||||
"current_time": datetime.now().isoformat(),
|
||||
"source_page": "coach-detail",
|
||||
"page_context": {
|
||||
"coach_name": "王教练",
|
||||
"monthly_lessons": 45,
|
||||
"monthly_revenue": "¥12,000",
|
||||
"customer_count": 28,
|
||||
},
|
||||
"screen_content": "助教:王教练,本月课时45节,收入¥12,000,服务客户28人",
|
||||
},
|
||||
ensure_ascii=False,
|
||||
),
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "我这个月业绩一般,有什么方法可以提升客户续课率?",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "场景4: general 入口 — 通用对话",
|
||||
"context_type": "general",
|
||||
"description": "助教从首页直接进入聊天,无特定上下文",
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": SYSTEM_PROMPT,
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "台球馆周末客流量大的时候,怎么合理安排台位和助教排班?",
|
||||
},
|
||||
],
|
||||
},
|
||||
]
|
||||
|
||||
# ── 多轮追问(场景1 追加) ────────────────────────────────────
|
||||
|
||||
FOLLOWUP_MESSAGES = [
|
||||
"如果他说最近比较忙没时间来,我该怎么回应?",
|
||||
"好的,那如果他愿意来,我应该推荐什么样的课程套餐?",
|
||||
]
|
||||
|
||||
|
||||
async def call_ai(messages: list[dict]) -> tuple[str, float, int | None]:
|
||||
"""调用百炼 API,返回 (回复内容, 耗时秒, tokens_used)。"""
|
||||
t0 = time.time()
|
||||
response = await client.chat.completions.create(
|
||||
model=BAILIAN_MODEL,
|
||||
messages=messages,
|
||||
temperature=0.7,
|
||||
max_tokens=2000,
|
||||
)
|
||||
elapsed = time.time() - t0
|
||||
content = response.choices[0].message.content or ""
|
||||
tokens = response.usage.total_tokens if response.usage else None
|
||||
return content, elapsed, tokens
|
||||
|
||||
|
||||
async def call_ai_stream(messages: list[dict]) -> tuple[str, float, int]:
|
||||
"""流式调用百炼 API,返回 (完整回复, 耗时秒, chunk数)。"""
|
||||
t0 = time.time()
|
||||
chunks: list[str] = []
|
||||
chunk_count = 0
|
||||
response = await client.chat.completions.create(
|
||||
model=BAILIAN_MODEL,
|
||||
messages=messages,
|
||||
temperature=0.7,
|
||||
max_tokens=2000,
|
||||
stream=True,
|
||||
)
|
||||
async for chunk in response:
|
||||
if chunk.choices and chunk.choices[0].delta.content:
|
||||
chunks.append(chunk.choices[0].delta.content)
|
||||
chunk_count += 1
|
||||
elapsed = time.time() - t0
|
||||
return "".join(chunks), elapsed, chunk_count
|
||||
|
||||
|
||||
async def run_scenario(scenario: dict) -> dict:
|
||||
"""执行单个场景,返回结果字典。"""
|
||||
print(f"\n{'='*60}")
|
||||
print(f" {scenario['name']}")
|
||||
print(f"{'='*60}")
|
||||
|
||||
results = {"name": scenario["name"], "description": scenario["description"], "rounds": []}
|
||||
|
||||
messages = list(scenario["messages"])
|
||||
|
||||
# 第一轮:非流式
|
||||
user_msg = messages[-1]["content"]
|
||||
print(f"\n[用户] {user_msg[:80]}...")
|
||||
reply, elapsed, tokens = await call_ai(messages)
|
||||
print(f"[AI] ({elapsed:.1f}s, {tokens} tokens) {reply[:100]}...")
|
||||
results["rounds"].append({
|
||||
"round": 1,
|
||||
"mode": "非流式",
|
||||
"user_message": user_msg,
|
||||
"ai_reply": reply,
|
||||
"elapsed_seconds": round(elapsed, 2),
|
||||
"tokens_used": tokens,
|
||||
})
|
||||
messages.append({"role": "assistant", "content": reply})
|
||||
|
||||
# 第二轮:流式(仅场景1)
|
||||
if scenario["context_type"] == "task":
|
||||
for i, followup in enumerate(FOLLOWUP_MESSAGES):
|
||||
messages.append({"role": "user", "content": followup})
|
||||
print(f"\n[用户] {followup}")
|
||||
reply_s, elapsed_s, chunk_count = await call_ai_stream(messages)
|
||||
print(f"[AI-Stream] ({elapsed_s:.1f}s, {chunk_count} chunks) {reply_s[:100]}...")
|
||||
results["rounds"].append({
|
||||
"round": i + 2,
|
||||
"mode": "流式",
|
||||
"user_message": followup,
|
||||
"ai_reply": reply_s,
|
||||
"elapsed_seconds": round(elapsed_s, 2),
|
||||
"chunk_count": chunk_count,
|
||||
})
|
||||
messages.append({"role": "assistant", "content": reply_s})
|
||||
|
||||
return results
|
||||
|
||||
|
||||
async def main():
|
||||
print("RNS1.4 CHAT AI 质量评估 — 开始")
|
||||
print(f"模型: {BAILIAN_MODEL}")
|
||||
print(f"端点: {BAILIAN_BASE_URL}")
|
||||
print(f"时间: {datetime.now().isoformat()}")
|
||||
|
||||
all_results: list[dict] = []
|
||||
for scenario in SCENARIOS:
|
||||
try:
|
||||
result = await run_scenario(scenario)
|
||||
all_results.append(result)
|
||||
except Exception as e:
|
||||
print(f"\n ❌ 场景失败: {e}")
|
||||
all_results.append({
|
||||
"name": scenario["name"],
|
||||
"description": scenario["description"],
|
||||
"error": str(e),
|
||||
})
|
||||
|
||||
# 生成 Markdown 报告
|
||||
report = generate_report(all_results)
|
||||
output_path = _root / "docs" / "reports" / "2026-03-20__rns14_chat_ai_quality_eval.md"
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
output_path.write_text(report, encoding="utf-8")
|
||||
print(f"\n✅ 报告已输出: {output_path}")
|
||||
|
||||
|
||||
def generate_report(results: list[dict]) -> str:
|
||||
"""生成 Markdown 评估报告。"""
|
||||
lines: list[str] = []
|
||||
lines.append("# RNS1.4 CHAT 模块 AI 返回质量评估报告")
|
||||
lines.append("")
|
||||
lines.append(f"- 评估时间: {datetime.now().strftime('%Y-%m-%d %H:%M')}")
|
||||
lines.append(f"- 模型: {BAILIAN_MODEL}")
|
||||
lines.append(f"- 端点: {BAILIAN_BASE_URL}")
|
||||
lines.append(f"- 场景数: {len(results)}")
|
||||
lines.append("")
|
||||
lines.append("---")
|
||||
lines.append("")
|
||||
|
||||
for r in results:
|
||||
lines.append(f"## {r['name']}")
|
||||
lines.append("")
|
||||
lines.append(f"**场景描述**: {r['description']}")
|
||||
lines.append("")
|
||||
|
||||
if "error" in r:
|
||||
lines.append(f"**❌ 执行失败**: {r['error']}")
|
||||
lines.append("")
|
||||
continue
|
||||
|
||||
for rd in r.get("rounds", []):
|
||||
lines.append(f"### 第 {rd['round']} 轮({rd['mode']})")
|
||||
lines.append("")
|
||||
lines.append(f"**用户发送**:")
|
||||
lines.append("")
|
||||
lines.append(f"```")
|
||||
lines.append(rd["user_message"])
|
||||
lines.append(f"```")
|
||||
lines.append("")
|
||||
lines.append(f"**AI 回复**:")
|
||||
lines.append("")
|
||||
lines.append(f"```")
|
||||
lines.append(rd["ai_reply"])
|
||||
lines.append(f"```")
|
||||
lines.append("")
|
||||
|
||||
meta_parts = [f"耗时 {rd['elapsed_seconds']}s"]
|
||||
if rd.get("tokens_used"):
|
||||
meta_parts.append(f"tokens: {rd['tokens_used']}")
|
||||
if rd.get("chunk_count"):
|
||||
meta_parts.append(f"chunks: {rd['chunk_count']}")
|
||||
lines.append(f"**性能**: {' | '.join(meta_parts)}")
|
||||
lines.append("")
|
||||
|
||||
lines.append("---")
|
||||
lines.append("")
|
||||
|
||||
# AI 评价占位(由执行者填写)
|
||||
lines.append("## 综合评价")
|
||||
lines.append("")
|
||||
lines.append("| 维度 | 评分 | 说明 |")
|
||||
lines.append("|------|------|------|")
|
||||
lines.append("| 语义相关性 | — | — |")
|
||||
lines.append("| 中文表达质量 | — | — |")
|
||||
lines.append("| 上下文理解 | — | — |")
|
||||
lines.append("| 多轮连贯性 | — | — |")
|
||||
lines.append("| 响应速度 | — | — |")
|
||||
lines.append("| 流式输出稳定性 | — | — |")
|
||||
lines.append("")
|
||||
lines.append("> 评分标准: ✅ 优秀 / ⚠️ 可接受 / ❌ 不合格")
|
||||
lines.append("")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
Reference in New Issue
Block a user