"""Kiro Agent 执行日志全量解析器 - 最终版 v2 修正 Hash 规律分析,基于实际验证结果。 """ import json from pathlib import Path from datetime import datetime, timezone, timedelta # === 路径 === BASE = Path(r"C:\temp\FX\globalStorage\kiro.kiroagent") EXEC_ID_DIR = "4d8f681b2b78799de676ab36904d08c7" ACTION_LOG_ID = "414d1636299d2b9e4ce7e17fb11f63e9" LOG_FILE_HASH = "794adc0617d71b2231c20ac9d101c7a6" DIFF_PARENT_HASH = "74a08cf8613c7dec4db7b264470db812" LOG_PATH = BASE / EXEC_ID_DIR / ACTION_LOG_ID / LOG_FILE_HASH DIFF_DIR = BASE / EXEC_ID_DIR / DIFF_PARENT_HASH SESSION_DIR = BASE / "workspace-sessions" / "ZDpcVXNlcnNcTmVvXGRlc2t0b3BcVA__" SESSION_JSON = SESSION_DIR / "dba26892-8e58-447f-ad82-bc9459a51753.json" OUT_MD = Path(r"C:\temp\FX\kiro_full_session_record.md") CST = timezone(timedelta(hours=8)) def ts(ms): if not ms: return "N/A" return datetime.fromtimestamp(ms / 1000, tz=CST).strftime("%Y-%m-%d %H:%M:%S") def trunc(s, n=3000): if not isinstance(s, str): return str(s) return s if len(s) <= n else s[:n] + f"\n... [截断,原文共 {len(s)} 字符]" def safe_json(obj, n=5000): s = json.dumps(obj, ensure_ascii=False, indent=2) return s if len(s) <= n else s[:n] + f"\n... [截断,原文共 {len(s)} 字符]" def parse_messages(messages): conversation = [] for i, msg in enumerate(messages): entries = msg.get("entries", []) parsed = [] for entry in entries: if not isinstance(entry, dict): continue etype = entry.get("type", "unknown") if etype == "text": parsed.append({"type": "text", "text": entry.get("text", "")}) elif etype == "toolUse": parsed.append({ "type": "toolUse", "id": entry.get("id"), "name": entry.get("name"), "args": entry.get("args", {}), "requestMessageId": entry.get("requestMessageId"), }) elif etype == "toolUseResponse": parsed.append({ "type": "toolUseResponse", "id": entry.get("id"), "name": entry.get("name"), "args": entry.get("args", {}), "message": entry.get("message", ""), "success": entry.get("success"), }) elif etype == "document": doc = entry.get("document", {}) parsed.append({ "type": "document", "doc_type": doc.get("type"), "target": doc.get("target"), "doc_keys": list(doc.keys()), }) else: parsed.append({"type": etype, "raw_keys": list(entry.keys())}) conversation.append({ "index": i, "role": msg.get("role", "?"), "messageId": msg.get("messageId", "?"), "forcedRole": msg.get("forcedRole"), "entries": parsed, }) return conversation def parse_actions(actions): timeline = [] for i, action in enumerate(actions): entry = { "index": i, "actionId": action.get("actionId"), "actionType": action.get("actionType"), "actionState": action.get("actionState"), "emittedAt": ts(action.get("emittedAt")), } if action.get("subExecutionId"): entry["subExecutionId"] = action["subExecutionId"] if action.get("endTime"): entry["endTime"] = ts(action["endTime"]) for k in ("intentResult", "input", "output"): if k in action: entry[k] = action[k] timeline.append(entry) return timeline def collect_diffs(): diffs = {} if not DIFF_DIR.exists(): return diffs for sub in sorted(DIFF_DIR.iterdir()): if sub.is_dir(): for f in sub.iterdir(): if f.is_file(): diffs[f"{sub.name}/{f.name}"] = { "size": f.stat().st_size, "content": f.read_text(encoding="utf-8", errors="replace"), } return diffs def collect_ids(log, conversation, timeline): refs = { "executionId": log.get("executionId"), "chatSessionId": log.get("chatSessionId"), "actionIds": [], "subExecutionIds": [], "toolUseIds": [], "messageIds": [], "requestMessageIds": [], "diff_hashes": [], } for step in timeline: for k, lst in [("actionId", "actionIds"), ("subExecutionId", "subExecutionIds")]: v = step.get(k) if v and v not in refs[lst]: refs[lst].append(v) for msg in conversation: mid = msg.get("messageId") if mid and mid not in refs["messageIds"]: refs["messageIds"].append(mid) for entry in msg.get("entries", []): for k, lst in [("id", "toolUseIds"), ("requestMessageId", "requestMessageIds")]: v = entry.get(k) if v and v not in refs[lst]: refs[lst].append(v) if DIFF_DIR.exists(): refs["diff_hashes"] = [s.name for s in sorted(DIFF_DIR.iterdir()) if s.is_dir()] return refs def gen_md(meta, input_msg, conversation, timeline, diffs, ids, usage, session_info): L = [] L.append("# Kiro Agent 会话全量记录\n") L.append(f"> 生成时间: {datetime.now(CST).strftime('%Y-%m-%d %H:%M:%S')} CST\n") # --- 1. 元数据 --- L.append("## 1. 会话元数据\n") L.append("| 字段 | 值 |") L.append("|------|-----|") for k, v in meta.items(): L.append(f"| {k} | `{v}` |") L.append("") if session_info: L.append("### 会话配置\n") L.append(f"- title: `{session_info.get('title')}`") L.append(f"- dateCreated: `{ts(int(session_info.get('dateCreated', 0)))}`") L.append(f"- workspaceDirectory: `{session_info.get('workspaceDirectory')}`") L.append("") # --- 2. 用户原始输入 --- L.append("## 2. 用户原始输入\n") L.append(f"```\n{trunc(input_msg, 5000)}\n```\n") # --- 3. ID 引用关系 --- L.append("## 3. ID/Hash 引用关系图\n") L.append("### 3.1 目录层级 Hash(已验证)\n") L.append("| Hash | 位置 | 验证结果 |") L.append("|------|------|----------|") L.append(f"| `ZDpcVXNlcnNcTmVvXGRlc2t0b3BcVA__` | workspace-sessions 子目录 | ✅ Base64url 编码 workspace 路径 `d:\\Users\\Neo\\desktop\\T`,`==` 填充替换为 `__` |") L.append(f"| `dba26892-8e58-447f-ad82-bc9459a51753` | session 文件名 | ✅ UUID v4,即 chatSessionId |") L.append(f"| `{EXEC_ID_DIR}` | 顶层目录 | ❓ 32 位 hex(MD5 格式),非 workspace 路径的 MD5,可能是 profile/account hash |") L.append(f"| `{ACTION_LOG_ID}` | 二级目录 | ❓ 32 位 hex,非 executionId 的 MD5,可能是 execution 内部标识的 hash |") L.append(f"| `{LOG_FILE_HASH}` | 日志文件名 | ❓ 32 位 hex,可能是固定 schema 标识或内容 hash |") L.append(f"| `{DIFF_PARENT_HASH}` | diff 父目录 | ❓ 32 位 hex,与 execution 关联 |") L.append(f"| `f62de366d0006e17ea00a01f6624aabf` | 执行索引文件 | ❓ 固定文件名,出现在 workspace 和 default 两处 |") L.append(f"| diff 子目录(8位 hex) | diff 快照 | 文件内容的短 hash |") L.append(f"| `tooluse_*` | toolUse entry id | `tooluse_` 前缀 + 随机串(如 `tooluse_fileTree`、`tooluse_DH4yvpYUCRxZsulG2G2bU1`) |") L.append("") L.append("### 3.2 运行时 ID\n") L.append(f"- executionId: `{ids['executionId']}`") L.append(f"- chatSessionId: `{ids['chatSessionId']}`") L.append(f"- subExecutionIds: `{ids['subExecutionIds']}`") L.append(f"\ntoolUseIds ({len(ids['toolUseIds'])} 个):\n") for tid in ids["toolUseIds"]: L.append(f"- `{tid}`") L.append(f"\nmessageIds ({len(ids['messageIds'])} 个):\n") for mid in ids["messageIds"]: L.append(f"- `{mid}`") L.append(f"\nrequestMessageIds ({len(ids['requestMessageIds'])} 个):\n") for rmid in ids["requestMessageIds"]: L.append(f"- `{rmid}`") L.append(f"\ndiff_hashes: `{ids['diff_hashes']}`\n") # --- 4. 对话记录 --- L.append("## 4. 对话全量记录\n") h = sum(1 for m in conversation if m['role']=='human') b = sum(1 for m in conversation if m['role']=='bot') t = sum(1 for m in conversation if m['role']=='tool') L.append(f"共 {len(conversation)} 条消息:human={h}, bot={b}, tool={t}\n") for msg in conversation: emoji = {"human": "👤", "bot": "🤖", "tool": "🔧"}.get(msg["role"], "❓") L.append(f"### Msg {msg['index']}: {emoji} {msg['role'].upper()} `{msg['messageId']}`\n") for entry in msg["entries"]: et = entry["type"] if et == "text": text = entry["text"] if not text: L.append("*(空)*\n") elif len(text) > 10000 and msg["role"] == "human": L.append(f"**[系统提示词]** ({len(text)} 字符)\n") L.append("
展开\n") L.append(f"```\n{trunc(text, 20000)}\n```\n
\n") else: L.append(f"```\n{trunc(text, 5000)}\n```\n") elif et == "toolUse": L.append(f"**[🔧 调用]** `{entry['name']}` id=`{entry['id']}`\n") L.append(f"```json\n{safe_json(entry['args'], 5000)}\n```\n") elif et == "toolUseResponse": ok = "✅" if entry.get("success") else "❌" L.append(f"**[📋 结果]** `{entry['name']}` {ok} id=`{entry['id']}`\n") if entry.get("message"): L.append(f"```\n{trunc(entry['message'], 5000)}\n```\n") if entry.get("args"): L.append(f"
完整参数\n") L.append(f"```json\n{safe_json(entry['args'], 5000)}\n```\n
\n") elif et == "document": L.append(f"**[📄 文档]** type=`{entry.get('doc_type')}` target=`{entry.get('target')}`\n") else: L.append(f"**[{et}]** keys={entry.get('raw_keys')}\n") # --- 5. Actions --- L.append("## 5. Actions 时间线\n") L.append(f"共 {len(timeline)} 个\n") for step in timeline: L.append(f"### Step {step['index']}: `{step.get('actionType','?')}` [{step.get('actionState','?')}] @ {step.get('emittedAt','?')}\n") L.append(f"- actionId: `{step.get('actionId')}`") if step.get("subExecutionId"): L.append(f"- subExecutionId: `{step['subExecutionId']}`") if step.get("endTime"): L.append(f"- endTime: {step['endTime']}") for k in ("intentResult", "input", "output"): if k in step: L.append(f"- {k}:\n```json\n{safe_json(step[k], 5000)}\n```") L.append("") # --- 6. Diff --- L.append("## 6. 文件版本快照\n") if diffs: for path, info in diffs.items(): lang = "python" if path.endswith(".py") else "markdown" if path.endswith(".md") else "" L.append(f"### `{path}` ({info['size']} bytes)\n") L.append(f"```{lang}\n{trunc(info['content'], 5000)}\n```\n") else: L.append("*(无)*\n") # --- 7. 资源消耗 --- L.append("## 7. 资源消耗\n") if usage: L.append("| 工具 | 消耗 | 单位 |") L.append("|------|------|------|") total = 0 for u in usage: tools = ", ".join(u.get("usedTools", ["-"])) amt = u.get("usage", 0) total += amt L.append(f"| {tools} | {amt} | {u.get('unit','?')} |") L.append(f"| **合计** | **{total:.4f}** | |") L.append("") # --- 8. 存储结构 --- L.append("## 8. Kiro 日志存储结构\n") L.append("```") L.append("globalStorage/") L.append("├── state.vscdb # VS Code 状态 SQLite") L.append("├── state.vscdb.backup") L.append("├── storage.json # 窗口/主题/工作区配置") L.append("└── kiro.kiroagent/") L.append(" ├── config.json # 上下文提供者列表") L.append(" ├── profile.json # 用户 profile(ARN)") L.append(" ├── .migrations/ # 迁移标记") L.append(f" ├── <32位hex>/ # 推测: profile/account hash") L.append(f" │ ├── f62de366... # 执行索引 JSON") L.append(f" │ ├── <32位hex>/ # 推测: execution 相关 hash") L.append(f" │ │ └── <32位hex> # 完整执行日志 JSON") L.append(f" │ └── <32位hex>/ # diff 快照集合") L.append(f" │ └── <8位hex>/ # 文件内容短 hash") L.append(f" │ └── # 文件快照") L.append(" ├── default/") L.append(" │ └── f62de366... # 默认执行索引(空)") L.append(" ├── dev_data/tokens_generated.jsonl") L.append(" ├── index/") L.append(" │ ├── docs.sqlite") L.append(" │ └── globalContext.json") L.append(" └── workspace-sessions/") L.append(" └── / # == 替换为 __") L.append(" ├── sessions.json # 会话列表") L.append(" └── .json # 会话状态") L.append("```\n") L.append("### Hash 命名规律总结\n") L.append("| 类型 | 格式 | 规律 |") L.append("|------|------|------|") L.append("| workspace-sessions 子目录 | Base64url | workspace 绝对路径的 base64url 编码,`==` 填充替换为 `__` |") L.append("| session 文件名 | UUID v4 | 即 chatSessionId |") L.append("| 顶层 32 位 hex 目录 | MD5 格式 | 非 workspace 路径 MD5,推测为 profile ARN 或 account ID 的 hash |") L.append("| 二级 32 位 hex 目录 | MD5 格式 | 非 executionId 的 MD5,推测为内部标识 hash |") L.append("| 日志文件名 | 32 位 hex | 可能是固定 schema 标识 |") L.append("| 执行索引文件名 | 32 位 hex | 固定值 `f62de366...`,出现在多处 |") L.append("| diff 子目录 | 8 位 hex | 文件内容短 hash |") L.append("| toolUse ID | `tooluse_` + 随机串 | 特殊的有 `tooluse_fileTree`(内置工具) |") L.append("") # --- 9. 数据 Schema --- L.append("## 9. 执行日志 JSON Schema\n") L.append("```") L.append("{") L.append(' "executionId": "UUID",') L.append(' "chatSessionId": "UUID",') L.append(' "workflowType": "chat-agent",') L.append(' "autonomyMode": "Autopilot",') L.append(' "status": "succeed",') L.append(' "startTime": <毫秒时间戳>,') L.append(' "endTime": <毫秒时间戳>,') L.append(' "contextUsagePercentage": ,') L.append(' "input": {') L.append(' "data": {') L.append(' "messages": [{ "role": "user", "content": [{ "text": "..." }] }],') L.append(' "chatSessionId": "UUID"') L.append(' },') L.append(' "documents": []') L.append(' },') L.append(' "actions": [') L.append(' {') L.append(' "type": "AgentExecutionAction",') L.append(' "executionId": "UUID",') L.append(' "actionId": "UUID",') L.append(' "actionType": "intent|tool|generation|subAgent",') L.append(' "actionState": "start|succeed|failed",') L.append(' "chatSessionId": "UUID",') L.append(' "emittedAt": <毫秒时间戳>,') L.append(' "endTime?": <毫秒时间戳>,') L.append(' "subExecutionId?": "UUID",') L.append(' "intentResult?": { "classification": "do|..." },') L.append(' "input?": { "toolName": "...", ... },') L.append(' "output?": { "text": "...", ... }') L.append(' }') L.append(' ],') L.append(' "context": {') L.append(' "messages": [') L.append(' {') L.append(' "role": "human|bot|tool",') L.append(' "messageId": "UUID",') L.append(' "forcedRole": null,') L.append(' "entries": [') L.append(' { "type": "text", "text": "..." },') L.append(' { "type": "toolUse", "id": "tooluse_*", "name": "...", "args": {...}, "requestMessageId": "UUID" },') L.append(' { "type": "toolUseResponse", "id": "tooluse_*", "name": "...", "args": {...}, "message": "...", "success": bool },') L.append(' { "type": "document", "document": { "type": "...", "target": "...", ... } }') L.append(' ]') L.append(' }') L.append(' ]') L.append(' },') L.append(' "usageSummary": [') L.append(' { "usedTools?": ["toolName"], "unit": "credit", "unitPlural": "credits", "usage": }') L.append(' ],') L.append(' "result": { "status": "success", "executionId": "UUID", "result": "..." }') L.append("}") L.append("```\n") return "\n".join(L) def main(): print("=== Kiro Agent 日志全量解析 v2 ===\n") with open(LOG_PATH, "r", encoding="utf-8") as f: log = json.load(f) print(f"日志: {LOG_PATH.stat().st_size / 1024:.1f} KB") meta = { "executionId": log.get("executionId"), "chatSessionId": log.get("chatSessionId"), "workflowType": log.get("workflowType"), "autonomyMode": log.get("autonomyMode"), "status": log.get("status"), "startTime": ts(log.get("startTime")), "endTime": ts(log.get("endTime")), "duration": f"{(log.get('endTime',0) - log.get('startTime',0)) / 1000:.1f}s", "contextUsage": f"{log.get('contextUsagePercentage',0):.2f}%", } # 用户输入 input_text = "" for msg in log.get("input",{}).get("data",{}).get("messages",[]): for entry in msg.get("content", msg.get("entries", [])): if isinstance(entry, dict) and entry.get("text"): input_text += entry["text"] + "\n" conversation = parse_messages(log.get("context",{}).get("messages",[])) timeline = parse_actions(log.get("actions",[])) diffs = collect_diffs() ids = collect_ids(log, conversation, timeline) session_info = None if SESSION_JSON.exists(): with open(SESSION_JSON, "r", encoding="utf-8") as f: session_info = json.load(f) usage = log.get("usageSummary", []) md = gen_md(meta, input_text, conversation, timeline, diffs, ids, usage, session_info) with open(OUT_MD, "w", encoding="utf-8") as f: f.write(md) lines = len(md.splitlines()) size_kb = OUT_MD.stat().st_size / 1024 print(f"输出: {OUT_MD} ({size_kb:.1f} KB, {lines} 行)") print(f"消息: human={sum(1 for m in conversation if m['role']=='human')}, " f"bot={sum(1 for m in conversation if m['role']=='bot')}, " f"tool={sum(1 for m in conversation if m['role']=='tool')}") print(f"工具调用: {sum(1 for m in conversation for e in m['entries'] if e['type']=='toolUse')}") print(f"Actions: {len(timeline)}, Diffs: {len(diffs)}") print(f"toolUseIds: {len(ids['toolUseIds'])}, messageIds: {len(ids['messageIds'])}") if __name__ == "__main__": main()