feat(ai): W1-AI-CLOSURE 超级 Sprint — 9 APP 全链路收口 + chat 上下文真激活
Phase 2.3 chat 上下文捕获链路从未真正激活到完整工作: - 14 处 ai-float-button 补 sourcePage,chat.ts 三分支同步设 pageFilters.contextId - 后端 page_context 4 层 BUG 修(列名错位 + RLS site_id 未重设) - xcx_chat filters.pop 破坏 body.page_context 引用 — dict() 浅拷贝隔离 - chat 流式 markdown 实时解析(表格/标题/列表/加粗 + KPI 富卡) - reference_card KPI 富卡接入 SSE 路径,db 真写入 - 维客线索 source 显示规则:AI 来源用机器人 icon 替代长文字 数据库: - public.member_retention_clue 加 emoji + runtime_mode + sandbox_instance_id - biz.ai_run_logs 加 assistant_id + 复合索引 - chk_ai_cache_type CHECK 约束 8 类应用名 - cache_type / app_type 命名统一(app6_note / app7_customer / app8_consolidation) - 历史 emoji 抽取脚本 44/44 成功 后端 silent failure 修: - cleanup_service WHERE app_type → cache_type(90 天清理 + 20K 上限重新生效) - _build_ai_insight 字段错位修复(app4 → app7 + 字段对齐 prompt schema) - task_manager talkingPoints 改 app5_tactics + tactics 字段 - task_manager aiSuggestion 改取 one_line_summary - cache_service.CACHE_EXPIRY_DAYS 加 app2a_finance_area - WS /ws/ai-cache 加 token + JWT + site_id 校验(P0 信息泄露漏洞) - internal_ai token 改 hmac.compare_digest 工具/文档: - main.py 加 RotatingFileHandler logs/backend.log + uvicorn /health 过滤 - 新建 utils/clue_category.py(VI 6 类配色 + emoji fallback + source 显示规则) - 新建 utils/markdown.ts(轻量 md 转 rich-text 解析 + streaming 容错) - audit + 数据库变更说明 + backlog §七 #14 收口 + #15-#38 残余子任务 - backlog 追加 §十一 App1 参数/MCP/沙箱审计 + §十二 百炼/SQL MCP 主任务线 实地 MCP 走查:14 入口数据层 + 5 代表入口 sourcePage 注入 + customer-detail 全模块 + chat md 渲染 + reference_card 富卡 都已验证。9 项预先 BUG/UX 登记 §七 #29-#38 后续修复。 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
222
scripts/ops/backfill_retention_clue_emoji.py
Normal file
222
scripts/ops/backfill_retention_clue_emoji.py
Normal file
@@ -0,0 +1,222 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""W1-AI-CLOSURE 组 1 — 维客线索 emoji 回填脚本。
|
||||
|
||||
背景:
|
||||
historic dispatcher._write_retention_clue 把 App8 prompt 输出的独立 emoji 字段
|
||||
拼到 summary 字符串(`f"{emoji} {raw_summary}"`),违反字段独立性哲学。
|
||||
20260506__ai_closure_schema_fixes.sql 已加 emoji 独立列。
|
||||
|
||||
本脚本回填历史数据:扫描 member_retention_clue 全表,把 summary 开头的 emoji
|
||||
提取到 emoji 列,并把 summary 去掉 emoji 前缀。
|
||||
|
||||
用法:
|
||||
cd C:\\Project\\NeoZQYY
|
||||
.venv\\Scripts\\python.exe scripts/ops/backfill_retention_clue_emoji.py --dry-run
|
||||
.venv\\Scripts\\python.exe scripts/ops/backfill_retention_clue_emoji.py
|
||||
|
||||
设计:
|
||||
- 默认 --dry-run 模式下打印 diff,不写库
|
||||
- 实跑模式下逐条 UPDATE,事务包裹
|
||||
- 仅处理 emoji = '' 的行,已回填的不重复处理(可重入)
|
||||
- 失败行单独打印,不影响其他行
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
import psycopg2
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# ── 加载根 .env(BOM 兼容) ─────────────────────────────
|
||||
_ROOT = Path(__file__).resolve().parent.parent.parent
|
||||
load_dotenv(_ROOT / ".env", override=False, encoding="utf-8-sig")
|
||||
|
||||
_DSN = os.environ.get("APP_DB_DSN")
|
||||
if not _DSN:
|
||||
sys.exit("ERROR: APP_DB_DSN 环境变量未设置,请检查根 .env")
|
||||
|
||||
logger = logging.getLogger("backfill_retention_clue_emoji")
|
||||
|
||||
# ── emoji 前缀正则(覆盖常见 BMP + SMP 符号 + ZWJ 序列) ──
|
||||
# 匹配:summary 开头的 1 个或多个 emoji 字符 + 紧跟的空白(0 或多个)
|
||||
_EMOJI_PREFIX = re.compile(
|
||||
r"^("
|
||||
r"[\U0001F300-\U0001F9FF]" # Misc Symbols and Pictographs / Emoticons / Symbols and Pictographs Extended-A
|
||||
r"|[\U0001FA70-\U0001FAFF]" # Symbols and Pictographs Extended-B
|
||||
r"|[☀-➿]" # Misc Symbols + Dingbats
|
||||
r"|[⌀-⏿]" # Misc Technical
|
||||
r"|[⬀-⯿]" # Misc Symbols and Arrows
|
||||
r"|[\U0001F1E6-\U0001F1FF]" # 区域旗(国旗)
|
||||
r"|️" # Variation Selector-16
|
||||
r"|" # Zero Width Joiner
|
||||
r")+\s*"
|
||||
)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ClueRow:
|
||||
"""member_retention_clue 表中需要回填的一行(只读 DTO)。"""
|
||||
|
||||
id: int
|
||||
summary: str
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class BackfillResult:
|
||||
"""单行回填结果:从 summary 抽出的 emoji + 剩余 summary。"""
|
||||
|
||||
id: int
|
||||
extracted_emoji: str
|
||||
new_summary: str
|
||||
original_summary: str
|
||||
|
||||
@property
|
||||
def changed(self) -> bool:
|
||||
"""是否真的发生了变化(emoji 非空且 summary 不同)。"""
|
||||
return bool(self.extracted_emoji) and self.new_summary != self.original_summary
|
||||
|
||||
|
||||
def extract_emoji_prefix(summary: str) -> tuple[str, str]:
|
||||
"""从 summary 开头抽取 emoji 前缀。
|
||||
|
||||
Args:
|
||||
summary: 原始 summary 文本,可能含或不含 emoji 前缀
|
||||
|
||||
Returns:
|
||||
(extracted_emoji, remaining_summary):
|
||||
- extracted_emoji: 抽出的 emoji 字符串(可能多个 + ZWJ),空表示无 emoji 前缀
|
||||
- remaining_summary: 去掉 emoji 前缀后的 summary(已 strip 前导空白)
|
||||
"""
|
||||
match = _EMOJI_PREFIX.match(summary)
|
||||
if not match:
|
||||
return "", summary
|
||||
emoji_part = match.group(0).rstrip() # emoji 本身,不带尾随空白
|
||||
remaining = summary[match.end():].lstrip() # 去掉 emoji + 空白后的剩余文本
|
||||
return emoji_part, remaining
|
||||
|
||||
|
||||
def fetch_pending_rows(conn: psycopg2.extensions.connection) -> list[ClueRow]:
|
||||
"""查询所有 emoji='' 的行。
|
||||
|
||||
可重入:已回填(emoji 非空)的不重复处理。
|
||||
"""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT id, summary
|
||||
FROM public.member_retention_clue
|
||||
WHERE emoji = ''
|
||||
ORDER BY id
|
||||
"""
|
||||
)
|
||||
return [ClueRow(id=r[0], summary=r[1]) for r in cur.fetchall()]
|
||||
|
||||
|
||||
def apply_backfill(
|
||||
conn: psycopg2.extensions.connection,
|
||||
result: BackfillResult,
|
||||
) -> None:
|
||||
"""对单行执行 UPDATE。"""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""
|
||||
UPDATE public.member_retention_clue
|
||||
SET emoji = %s, summary = %s
|
||||
WHERE id = %s
|
||||
""",
|
||||
(result.extracted_emoji, result.new_summary, result.id),
|
||||
)
|
||||
|
||||
|
||||
def run(dry_run: bool) -> int:
|
||||
"""执行回填。
|
||||
|
||||
Returns:
|
||||
退出码:0 成功,1 有失败行
|
||||
"""
|
||||
conn = psycopg2.connect(_DSN)
|
||||
failed_count = 0
|
||||
try:
|
||||
rows = fetch_pending_rows(conn)
|
||||
logger.info("待处理行数: %d (emoji = '' 的所有行)", len(rows))
|
||||
|
||||
if not rows:
|
||||
logger.info("无待处理行,提前退出")
|
||||
return 0
|
||||
|
||||
results: list[BackfillResult] = []
|
||||
for row in rows:
|
||||
emoji, new_summary = extract_emoji_prefix(row.summary)
|
||||
results.append(BackfillResult(
|
||||
id=row.id,
|
||||
extracted_emoji=emoji,
|
||||
new_summary=new_summary,
|
||||
original_summary=row.summary,
|
||||
))
|
||||
|
||||
changed = [r for r in results if r.changed]
|
||||
unchanged = [r for r in results if not r.changed]
|
||||
|
||||
logger.info("将抽取 emoji 的行: %d", len(changed))
|
||||
logger.info("无 emoji 前缀的行: %d (跳过 UPDATE)", len(unchanged))
|
||||
|
||||
# 打印前 5 条 diff 给用户审阅
|
||||
for r in changed[:5]:
|
||||
logger.info(
|
||||
" id=%d emoji=%r summary: %r -> %r",
|
||||
r.id, r.extracted_emoji, r.original_summary, r.new_summary,
|
||||
)
|
||||
if len(changed) > 5:
|
||||
logger.info(" ... (省略剩余 %d 行)", len(changed) - 5)
|
||||
|
||||
if dry_run:
|
||||
logger.info("[DRY-RUN] 不执行 UPDATE,正式回填请去掉 --dry-run")
|
||||
return 0
|
||||
|
||||
# 实跑:逐行 UPDATE,失败单独记录
|
||||
for r in changed:
|
||||
try:
|
||||
apply_backfill(conn, r)
|
||||
except psycopg2.Error as exc:
|
||||
logger.exception("UPDATE 失败 id=%d: %s", r.id, exc)
|
||||
failed_count += 1
|
||||
conn.rollback()
|
||||
continue
|
||||
conn.commit()
|
||||
|
||||
logger.info(
|
||||
"回填完成: 成功 %d 行 / 失败 %d 行",
|
||||
len(changed) - failed_count, failed_count,
|
||||
)
|
||||
return 0 if failed_count == 0 else 1
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="W1-AI-CLOSURE 维客线索 emoji 回填(从 summary 抽取到独立列)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dry-run",
|
||||
action="store_true",
|
||||
help="试运行模式,打印 diff 不写库",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
||||
)
|
||||
|
||||
sys.exit(run(dry_run=args.dry_run))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user