微信小程序页面迁移校验之前 P5任务处理之前

This commit is contained in:
Neo
2026-03-09 01:19:21 +08:00
parent 263bf96035
commit 6e20987d2f
1112 changed files with 153824 additions and 219694 deletions

View File

@@ -0,0 +1,242 @@
"""导出所有 execution 的对话内容,供外部 LLM 批量生成摘要。
每个 execution 导出一个文件,包含:
- execution ID
- 每轮 user input 和 assistant output
- 文件变更列表
输出目录: export/session_summaries/
输出格式: {exec_id_short}.txt
用法:
python -B scripts/ops/export_session_conversations.py
python -B scripts/ops/export_session_conversations.py --limit 50
python -B scripts/ops/export_session_conversations.py --output-dir /path/to/dir
"""
import json
import os
import sys
from pathlib import Path
# 添加 scripts/ops 到 path
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from _env_paths import ensure_repo_root
ensure_repo_root()
from extract_kiro_session import (
find_kiro_agent_dir,
find_workspace_session_dir,
find_all_executions,
find_execution_log,
load_sessions_json,
parse_messages,
parse_actions,
DEFAULT_GLOBAL_STORAGE,
)
OUTPUT_DIR = Path("export/session_summaries")
def extract_conversation_text(log: dict) -> dict:
"""从 execution log 中提取对话文本。
返回:
{
"exec_id": str,
"workflow": str,
"status": str,
"start_time": str,
"rounds": [{"role": "user"|"assistant", "text": str}, ...],
"files_modified": [str],
"files_created": [str],
}
"""
messages = log.get("context", {}).get("messages", [])
actions = log.get("actions", [])
conversation = parse_messages(messages)
timeline = parse_actions(actions)
rounds = []
for msg in conversation:
role = msg.get("role", "")
if role not in ("human", "bot"):
continue
texts = []
for entry in msg.get("entries", []):
et = entry.get("type", "")
if et == "text":
t = entry.get("text", "").strip()
if t:
# 跳过 steering/system prompt 自动注入
if t.startswith("## Included Rules") or t.startswith("<steering"):
continue
texts.append(t)
elif et == "toolUse":
# 只记录工具名,不记录参数(节省 token
texts.append(f"[调用工具: {entry.get('name', '?')}]")
elif et == "toolUseResponse":
ok = "成功" if entry.get("success") else "失败"
texts.append(f"[工具结果: {entry.get('name', '?')} {ok}]")
if texts:
combined = "\n".join(texts)
# 截断过长的单条消息(节省 token
if len(combined) > 2000:
combined = combined[:2000] + "\n[...截断...]"
rounds.append({
"role": "user" if role == "human" else "assistant",
"text": combined,
})
# 文件变更
files_modified = []
files_created = []
for step in timeline:
fc = step.get("_file_change")
if fc:
fname = fc.get("file", "?")
if fc.get("original"):
if fname not in files_modified:
files_modified.append(fname)
else:
if fname not in files_created:
files_created.append(fname)
from extract_kiro_session import ts_fmt
return {
"exec_id": log.get("executionId", "?"),
"workflow": log.get("workflowType", "?"),
"status": log.get("status", "?"),
"start_time": ts_fmt(log.get("startTime")),
"rounds": rounds,
"files_modified": files_modified,
"files_created": files_created,
}
def render_conversation_file(data: dict) -> str:
"""渲染为纯文本格式,供 LLM 阅读。"""
L = []
L.append(f"EXECUTION_ID: {data['exec_id']}")
L.append(f"WORKFLOW: {data['workflow']}")
L.append(f"STATUS: {data['status']}")
L.append(f"START_TIME: {data['start_time']}")
if data["files_modified"]:
L.append(f"FILES_MODIFIED: {', '.join(data['files_modified'][:20])}")
if data["files_created"]:
L.append(f"FILES_CREATED: {', '.join(data['files_created'][:20])}")
L.append(f"ROUNDS: {len(data['rounds'])}")
L.append("---")
for i, r in enumerate(data["rounds"], 1):
role_label = "USER" if r["role"] == "user" else "ASSISTANT"
L.append(f"\n[{role_label} #{i}]")
L.append(r["text"])
L.append("\n---END---")
return "\n".join(L)
def main():
import argparse
parser = argparse.ArgumentParser(description="导出 session 对话内容供 LLM 批量总结")
parser.add_argument("--limit", type=int, help="最多导出 N 条")
parser.add_argument("--output-dir", type=str, help="输出目录")
parser.add_argument("--skip-existing", action="store_true", help="跳过已导出的")
args = parser.parse_args()
out_dir = Path(args.output_dir) if args.output_dir else OUTPUT_DIR
out_dir.mkdir(parents=True, exist_ok=True)
gs = DEFAULT_GLOBAL_STORAGE
ws = os.getcwd()
agent_dir = find_kiro_agent_dir(gs)
if not agent_dir:
print("[export] kiro.kiroagent dir not found")
return
session_dir = find_workspace_session_dir(agent_dir, ws)
chat_ids = None
if session_dir:
sessions = load_sessions_json(session_dir)
chat_ids = {s.get("chatSessionId") or s.get("sessionId") for s in sessions
if s.get("chatSessionId") or s.get("sessionId")}
all_execs = find_all_executions(agent_dir, chat_session_ids=chat_ids)
if not all_execs:
print("[export] no executions found")
return
# 只导出主 execution非子代理
# 子代理的 execution 通常没有独立的用户输入
count = 0
skipped = 0
errors = 0
for execution in all_execs:
eid = execution.get("executionId", "")
short = eid[:8]
if args.skip_existing:
out_file = out_dir / f"{short}.txt"
if out_file.exists():
skipped += 1
continue
log_path = find_execution_log(agent_dir, execution["_hex_dir"], execution)
if not log_path:
continue
try:
with open(log_path, "r", encoding="utf-8") as f:
log = json.load(f)
except Exception:
errors += 1
continue
# 跳过子代理 execution没有独立用户输入
if log.get("workflowType") in ("sub-agent",):
continue
data = extract_conversation_text(log)
# 跳过没有对话内容的
if not data["rounds"]:
continue
out_file = out_dir / f"{short}.txt"
out_file.write_text(render_conversation_file(data), encoding="utf-8")
count += 1
if count % 50 == 0:
print(f"[export] {count} exported...")
if args.limit and count >= args.limit:
break
# 生成 manifestID 列表,供后续导入摘要时匹配)
manifest = out_dir / "_manifest.json"
existing = sorted([f.stem for f in out_dir.glob("*.txt") if f.stem != "_manifest"])
manifest.write_text(
json.dumps({"count": len(existing), "ids": existing}, indent=2, ensure_ascii=False) + "\n",
encoding="utf-8"
)
print(f"[export] done: {count} exported, {skipped} skipped, {errors} errors")
print(f"[export] output: {out_dir}")
print(f"[export] manifest: {manifest}")
print(f"\n下一步:")
print(f" 1. 用 LLM 处理 {out_dir}/*.txt为每个文件生成 50-200 字中文摘要")
print(f" 2. 将结果写入 {out_dir}/_summaries.json格式")
print(f' {{"exec_id_short": "摘要文本", ...}}')
print(f" 3. 运行导入脚本将摘要写入索引")
if __name__ == "__main__":
main()