2226 lines
90 KiB
Python
2226 lines
90 KiB
Python
#!/usr/bin/env python3
|
||
"""extract_kiro_session — Kiro 执行日志全量提取器 v2。
|
||
|
||
改进点(相比 v1):
|
||
1. 系统提示词去重:首次保存到 _system_prompts/sp_{hash8}.md,后续引用
|
||
2. 目录分层:YYYY-MM/DD/{chatSessionId_short}/ 下按对话组织
|
||
3. 子代理递归提取:主 execution + 子 execution 放同一目录,按调用顺序编号
|
||
4. ID 替换:kiro-diff URI → 真实文件路径,terminalId → 进程描述
|
||
5. CONTEXT TRANSFER 中的 steering 内容折叠引用
|
||
6. 无内容的 model action 压缩为一行
|
||
|
||
用法:
|
||
python scripts/ops/extract_kiro_session.py # 提取最新 execution
|
||
python scripts/ops/extract_kiro_session.py --all # 提取所有未索引的
|
||
python scripts/ops/extract_kiro_session.py --recent 20 # 提取最近 N 个未索引的
|
||
python scripts/ops/extract_kiro_session.py --execution-id XX # 提取指定 execution
|
||
"""
|
||
|
||
import base64
|
||
import hashlib
|
||
import json
|
||
import os
|
||
import re
|
||
import sys
|
||
from datetime import datetime, timezone, timedelta
|
||
from typing import Optional
|
||
|
||
from _env_paths import ensure_repo_root
|
||
|
||
ensure_repo_root()
|
||
|
||
CST = timezone(timedelta(hours=8))
|
||
|
||
# Kiro 固定的 execution manifest 文件名
|
||
MANIFEST_FILENAME = "f62de366d0006e17ea00a01f6624aabf"
|
||
|
||
# 输出路径
|
||
SESSION_LOG_DIR = os.path.join("docs", "audit", "session_logs")
|
||
INDEX_PATH = os.path.join(SESSION_LOG_DIR, "_session_index.json") # 精简版:仅主对话
|
||
INDEX_FULL_PATH = os.path.join(SESSION_LOG_DIR, "_session_index_full.json") # 完整版:主对话 + 子代理
|
||
SYSTEM_PROMPTS_DIR = os.path.join(SESSION_LOG_DIR, "_system_prompts")
|
||
|
||
# globalStorage 默认路径
|
||
DEFAULT_GLOBAL_STORAGE = os.path.join(
|
||
os.environ.get("APPDATA", ""),
|
||
"Kiro", "User", "globalStorage"
|
||
)
|
||
|
||
# ═══════════════════════════════════════════════════════════
|
||
# 工具函数
|
||
# ═══════════════════════════════════════════════════════════
|
||
|
||
def ts_fmt(ms) -> str:
|
||
if not ms:
|
||
return "N/A"
|
||
try:
|
||
return datetime.fromtimestamp(ms / 1000, tz=CST).strftime("%Y-%m-%d %H:%M:%S")
|
||
except Exception:
|
||
return str(ms)
|
||
|
||
def ts_iso(ms) -> str:
|
||
if not ms:
|
||
return ""
|
||
try:
|
||
return datetime.fromtimestamp(ms / 1000, tz=CST).isoformat()
|
||
except Exception:
|
||
return ""
|
||
|
||
def ts_date_parts(ms) -> tuple[str, str, str]:
|
||
"""返回 (YYYY-MM, DD, HHMMSS) 用于目录分层和文件命名"""
|
||
try:
|
||
dt = datetime.fromtimestamp(ms / 1000, tz=CST)
|
||
return dt.strftime("%Y-%m"), dt.strftime("%d"), dt.strftime("%H%M%S")
|
||
except Exception:
|
||
return "unknown", "00", "000000"
|
||
|
||
|
||
def _resolve_chat_dir(day_dir: str, chat_short: str, first_hms: str) -> str:
|
||
"""为 chatSession 确定带当天序号的输出目录。
|
||
|
||
规则:
|
||
1. 先在 day_dir 下查找已有的同 chatSession 目录(同一天的后续轮次)
|
||
2. 再在整个 SESSION_LOG_DIR 下搜索(跨天场景:chatSession 首轮在其他日期)
|
||
3. 都没找到则在 day_dir 下分配新序号创建
|
||
- 目录格式:{seq:02d}_{chat_short}_{first_hms}/
|
||
"""
|
||
os.makedirs(day_dir, exist_ok=True)
|
||
|
||
# 1. 在当天目录下查找
|
||
for d in os.listdir(day_dir):
|
||
if os.path.isdir(os.path.join(day_dir, d)) and chat_short in d:
|
||
return os.path.join(day_dir, d)
|
||
|
||
# 2. 跨天搜索:遍历所有 YYYY-MM/DD/ 目录
|
||
log_root = SESSION_LOG_DIR
|
||
if os.path.isdir(log_root):
|
||
for ym in os.listdir(log_root):
|
||
ym_path = os.path.join(log_root, ym)
|
||
if not os.path.isdir(ym_path) or ym.startswith("_"):
|
||
continue
|
||
for dd in os.listdir(ym_path):
|
||
dd_path = os.path.join(ym_path, dd)
|
||
if not os.path.isdir(dd_path):
|
||
continue
|
||
for d in os.listdir(dd_path):
|
||
if os.path.isdir(os.path.join(dd_path, d)) and chat_short in d:
|
||
return os.path.join(dd_path, d)
|
||
|
||
# 3. 新 chatSession:分配序号
|
||
existing_seqs = []
|
||
for d in os.listdir(day_dir):
|
||
if os.path.isdir(os.path.join(day_dir, d)) and len(d) >= 2 and d[:2].isdigit():
|
||
existing_seqs.append(int(d[:2]))
|
||
next_seq = max(existing_seqs, default=0) + 1
|
||
new_dir = os.path.join(day_dir, f"{next_seq:02d}_{chat_short}_{first_hms}")
|
||
os.makedirs(new_dir, exist_ok=True)
|
||
return new_dir
|
||
|
||
|
||
def _write_cross_day_ref(exec_day_dir: str, chat_short: str, chat_dir: str):
|
||
"""在 execution 所在日期目录下生成跨天指引文件。
|
||
|
||
当一个 chatSession 跨天时,后续日期的 day_dir 下不会有该对话的目录,
|
||
生成 _ref_{chatShort}.md 告知该对话归在哪个目录。
|
||
"""
|
||
os.makedirs(exec_day_dir, exist_ok=True)
|
||
ref_path = os.path.join(exec_day_dir, f"_ref_{chat_short}.md")
|
||
if os.path.isfile(ref_path):
|
||
return # 已存在,不重复写
|
||
rel_target = os.path.relpath(chat_dir, exec_day_dir).replace("\\", "/")
|
||
with open(ref_path, "w", encoding="utf-8") as f:
|
||
f.write(f"# 跨天对话指引\n\n")
|
||
f.write(f"chatSession `{chat_short}` 的完整记录归档在:\n\n")
|
||
f.write(f"→ `{rel_target}`\n\n")
|
||
f.write(f"(绝对路径:`{chat_dir.replace(chr(92), '/')}`)\n")
|
||
|
||
|
||
def trunc(s, n=3000) -> str:
|
||
if not isinstance(s, str):
|
||
return str(s)
|
||
return s if len(s) <= n else s[:n] + f"\n... [截断,原文共 {len(s)} 字符]"
|
||
|
||
import re as _re
|
||
_SURROGATE_RE = _re.compile(r'[\ud800-\udfff]')
|
||
|
||
def _sanitize_surrogates(obj):
|
||
"""递归清洗 dict/list/str 中的 surrogate 字符,替换为 U+FFFD。"""
|
||
if isinstance(obj, str):
|
||
return _SURROGATE_RE.sub('\ufffd', obj)
|
||
if isinstance(obj, dict):
|
||
return {_sanitize_surrogates(k): _sanitize_surrogates(v) for k, v in obj.items()}
|
||
if isinstance(obj, list):
|
||
return [_sanitize_surrogates(i) for i in obj]
|
||
return obj
|
||
|
||
|
||
def safe_json(obj, n=5000) -> str:
|
||
try:
|
||
s = json.dumps(obj, ensure_ascii=False, indent=2)
|
||
except Exception:
|
||
s = str(obj)
|
||
return s if len(s) <= n else s[:n] + f"\n... [截断,原文共 {len(s)} 字符]"
|
||
|
||
def fence(content: str, lang: str = "") -> str:
|
||
"""生成安全的 Markdown 代码围栏。
|
||
检测 content 中最长连续反引号序列,外层用更多反引号包裹。
|
||
同时转义行首 # 避免被解析为 Markdown 标题。
|
||
如果内容中有未闭合的围栏,在末尾补上关闭围栏。
|
||
"""
|
||
if not content:
|
||
return f"```{lang}\n```"
|
||
# 修复内容中未闭合的围栏(原始数据截断导致)
|
||
fence_stack = []
|
||
for line in content.split("\n"):
|
||
stripped = line.strip()
|
||
m = re.match(r"^(`{3,})", stripped)
|
||
if m:
|
||
ticks = len(m.group(1))
|
||
# 如果栈顶有相同 tick 数的开启围栏,且当前行是纯关闭围栏
|
||
if fence_stack and fence_stack[-1] == ticks and stripped == '`' * ticks:
|
||
fence_stack.pop()
|
||
else:
|
||
fence_stack.append(ticks)
|
||
# 补上所有未闭合的围栏(从栈顶开始关闭)
|
||
if fence_stack:
|
||
suffix_lines = ['`' * t for t in reversed(fence_stack)]
|
||
content = content + "\n" + "\n".join(suffix_lines)
|
||
# 找出内容中最长的连续反引号
|
||
max_ticks = 2
|
||
cur = 0
|
||
for ch in content:
|
||
if ch == '`':
|
||
cur += 1
|
||
if cur > max_ticks:
|
||
max_ticks = cur
|
||
else:
|
||
cur = 0
|
||
outer = '`' * (max_ticks + 1)
|
||
# 转义行首 # —— 加零宽空格使其不被解析为标题
|
||
safe = _escape_heading(content)
|
||
return f"{outer}{lang}\n{safe}\n{outer}"
|
||
|
||
|
||
|
||
def _escape_heading(text: str) -> str:
|
||
"""转义文本中行首的 # 符号,防止被 Markdown 解析为标题。
|
||
在 # 前插入零宽空格 (\\u200b)。
|
||
"""
|
||
lines = text.split('\n')
|
||
out = []
|
||
for line in lines:
|
||
if line.lstrip().startswith('#'):
|
||
# 找到第一个 # 的位置,在前面插入零宽空格
|
||
idx = 0
|
||
while idx < len(line) and line[idx] in (' ', '\t'):
|
||
idx += 1
|
||
out.append(line[:idx] + '\u200b' + line[idx:])
|
||
else:
|
||
out.append(line)
|
||
return '\n'.join(out)
|
||
|
||
|
||
def hash8(text: str) -> str:
|
||
return hashlib.sha256(text.encode("utf-8")).hexdigest()[:8]
|
||
|
||
|
||
# ═══════════════════════════════════════════════════════════
|
||
# 系统提示词去重
|
||
# ═══════════════════════════════════════════════════════════
|
||
|
||
def save_system_prompt(text: str) -> str:
|
||
"""保存系统提示词到 _system_prompts/,返回引用文件名。
|
||
如果已存在相同 hash 的文件则跳过。
|
||
"""
|
||
h = hash8(text)
|
||
filename = f"sp_{h}.md"
|
||
filepath = os.path.join(SYSTEM_PROMPTS_DIR, filename)
|
||
if not os.path.isfile(filepath):
|
||
os.makedirs(SYSTEM_PROMPTS_DIR, exist_ok=True)
|
||
with open(filepath, "w", encoding="utf-8") as f:
|
||
f.write(f"# 系统提示词 (hash: {h})\n\n")
|
||
f.write(text)
|
||
return filename
|
||
|
||
|
||
def is_system_prompt(text: str) -> bool:
|
||
"""判断文本是否为系统提示词(Kiro 注入的 <identity>/<capabilities> 等)"""
|
||
if not text:
|
||
return False
|
||
# 系统提示词通常以 <identity> 开头或包含 <capabilities>
|
||
return (
|
||
"<identity>" in text[:200]
|
||
or "<capabilities>" in text[:500]
|
||
or text.strip().startswith("You are Kiro")
|
||
)
|
||
|
||
|
||
def is_steering_block(text: str) -> bool:
|
||
"""判断文本是否为 steering-reminder 注入"""
|
||
return "<steering-reminder>" in text[:100]
|
||
|
||
|
||
# ═══════════════════════════════════════════════════════════
|
||
# ID 替换与路径还原
|
||
# ═══════════════════════════════════════════════════════════
|
||
|
||
# kiro-diff URI 模式:kiro-diff:/path?commitId=xxx&executionId=yyy
|
||
KIRO_DIFF_PATTERN = re.compile(
|
||
r'kiro-diff:(/[^?]+)\?commitId(?:%3D|=)([^&]+)&executionId(?:%3D|=)([^"\'}\s]+)'
|
||
)
|
||
|
||
def resolve_kiro_diff_uri(uri: str) -> str:
|
||
"""将 kiro-diff: URI 替换为可读的文件路径描述"""
|
||
m = KIRO_DIFF_PATTERN.search(uri)
|
||
if m:
|
||
filepath = m.group(1)
|
||
commit_id = m.group(2)
|
||
return f"{filepath} (版本: {commit_id[:8]})"
|
||
return uri
|
||
|
||
|
||
def resolve_ids_in_text(text: str) -> str:
|
||
"""在文本中替换已知的 ID 模式为可读信息"""
|
||
if not text or not isinstance(text, str):
|
||
return str(text) if text else ""
|
||
# 替换 kiro-diff URI
|
||
text = KIRO_DIFF_PATTERN.sub(
|
||
lambda m: f'[文件快照] {m.group(1)} (版本 {m.group(2)[:8]})',
|
||
text
|
||
)
|
||
# 替换 file:///c%3A/ 编码路径
|
||
text = re.sub(
|
||
r'file:///([a-zA-Z])%3A/',
|
||
lambda m: f'{m.group(1).upper()}:/',
|
||
text
|
||
)
|
||
return text
|
||
|
||
|
||
def resolve_tool_args(name: str, args: dict) -> dict:
|
||
"""对工具调用参数做可读性增强"""
|
||
resolved = dict(args)
|
||
|
||
# document 类型中的 target 可能是 kiro-diff URI
|
||
if "target" in resolved and isinstance(resolved["target"], str):
|
||
resolved["target"] = resolve_kiro_diff_uri(resolved["target"])
|
||
|
||
# editCode / strReplace 中的 path
|
||
if "path" in resolved and isinstance(resolved["path"], str):
|
||
resolved["path"] = resolve_ids_in_text(resolved["path"])
|
||
|
||
# document entries 中的 modified/original
|
||
for key in ("modified", "original", "local"):
|
||
if key in resolved and isinstance(resolved[key], str):
|
||
resolved[key] = resolve_ids_in_text(resolved[key])
|
||
|
||
return resolved
|
||
|
||
|
||
# ═══════════════════════════════════════════════════════════
|
||
# 定位逻辑
|
||
# ═══════════════════════════════════════════════════════════
|
||
|
||
def find_kiro_agent_dir(global_storage: str) -> Optional[str]:
|
||
agent_dir = os.path.join(global_storage, "kiro.kiroagent")
|
||
return agent_dir if os.path.isdir(agent_dir) else None
|
||
|
||
|
||
def decode_base64url_dir(dirname: str) -> str:
|
||
try:
|
||
b64 = dirname.replace("__", "==")
|
||
return base64.urlsafe_b64decode(b64).decode("utf-8", errors="replace")
|
||
except Exception:
|
||
return ""
|
||
|
||
|
||
def find_workspace_session_dir(agent_dir: str, workspace_path: str) -> Optional[str]:
|
||
ws_dir = os.path.join(agent_dir, "workspace-sessions")
|
||
if not os.path.isdir(ws_dir):
|
||
return None
|
||
target = workspace_path.replace("\\", "/").rstrip("/").lower()
|
||
for entry in os.scandir(ws_dir):
|
||
if entry.is_dir():
|
||
decoded = decode_base64url_dir(entry.name)
|
||
if decoded.replace("\\", "/").rstrip("/").lower() == target:
|
||
return entry.path
|
||
return None
|
||
|
||
|
||
def load_sessions_json(session_dir: str) -> list[dict]:
|
||
sessions_file = os.path.join(session_dir, "sessions.json")
|
||
if not os.path.isfile(sessions_file):
|
||
return []
|
||
try:
|
||
with open(sessions_file, "r", encoding="utf-8") as f:
|
||
data = json.load(f)
|
||
if isinstance(data, list):
|
||
return data
|
||
if isinstance(data, dict) and "sessions" in data:
|
||
return data["sessions"]
|
||
return []
|
||
except Exception:
|
||
return []
|
||
|
||
|
||
def load_session_detail(session_dir: str, session_id: str) -> Optional[dict]:
|
||
path = os.path.join(session_dir, f"{session_id}.json")
|
||
if not os.path.isfile(path):
|
||
return None
|
||
try:
|
||
with open(path, "r", encoding="utf-8") as f:
|
||
return json.load(f)
|
||
except Exception:
|
||
return None
|
||
|
||
|
||
def find_all_manifests(agent_dir: str) -> list[tuple[str, str]]:
|
||
results = []
|
||
hex_pattern = re.compile(r"^[0-9a-f]{32}$")
|
||
for entry in os.scandir(agent_dir):
|
||
if entry.is_dir() and hex_pattern.match(entry.name):
|
||
manifest = os.path.join(entry.path, MANIFEST_FILENAME)
|
||
if os.path.isfile(manifest):
|
||
results.append((entry.name, manifest))
|
||
return results
|
||
|
||
|
||
def load_manifest(manifest_path: str) -> list[dict]:
|
||
try:
|
||
with open(manifest_path, "r", encoding="utf-8") as f:
|
||
data = json.load(f)
|
||
if isinstance(data, list):
|
||
return data
|
||
# Kiro 新版 manifest 格式: {"executions": [...], "version": ...}
|
||
if isinstance(data, dict) and "executions" in data:
|
||
return data["executions"]
|
||
return []
|
||
except Exception:
|
||
return []
|
||
|
||
|
||
|
||
def find_execution_log(agent_dir: str, hex_dir: str, execution: dict) -> Optional[str]:
|
||
"""定位完整 execution log 文件"""
|
||
exec_id = execution.get("executionId", "")
|
||
hex_path = os.path.join(agent_dir, hex_dir)
|
||
for entry in os.scandir(hex_path):
|
||
if not entry.is_dir():
|
||
continue
|
||
for sub_entry in os.scandir(entry.path):
|
||
if not sub_entry.is_file() or sub_entry.stat().st_size < 1000:
|
||
continue
|
||
try:
|
||
with open(sub_entry.path, "r", encoding="utf-8") as f:
|
||
head = f.read(500)
|
||
if exec_id in head:
|
||
f.seek(0)
|
||
data = json.load(f)
|
||
if data.get("executionId") == exec_id:
|
||
return sub_entry.path
|
||
except Exception:
|
||
continue
|
||
return None
|
||
|
||
|
||
def find_all_executions(
|
||
agent_dir: str,
|
||
chat_session_ids: Optional[set[str]] = None,
|
||
execution_id: Optional[str] = None,
|
||
) -> list[dict]:
|
||
"""从所有 manifest 中找匹配的 execution,按 endTime 降序"""
|
||
manifests = find_all_manifests(agent_dir)
|
||
all_execs = []
|
||
for hex_dir, manifest_path in manifests:
|
||
entries = load_manifest(manifest_path)
|
||
for entry in entries:
|
||
entry["_hex_dir"] = hex_dir
|
||
if execution_id:
|
||
eid = entry.get("executionId", "")
|
||
if eid == execution_id or eid.startswith(execution_id):
|
||
return [entry]
|
||
# 仅当 execution 有 chatSessionId 且不在 workspace session 集合中时才过滤
|
||
csid = entry.get("chatSessionId")
|
||
if chat_session_ids and csid and csid not in chat_session_ids:
|
||
continue
|
||
all_execs.append(entry)
|
||
all_execs.sort(key=lambda e: e.get("endTime", 0), reverse=True)
|
||
return all_execs
|
||
|
||
|
||
# ═══════════════════════════════════════════════════════════
|
||
# 解析逻辑
|
||
# ═══════════════════════════════════════════════════════════
|
||
|
||
def parse_messages(messages: list) -> list[dict]:
|
||
"""解析 context.messages,处理系统提示词去重和 ID 替换"""
|
||
conversation = []
|
||
for i, msg in enumerate(messages):
|
||
entries = msg.get("entries", [])
|
||
parsed = []
|
||
for entry in entries:
|
||
if not isinstance(entry, dict):
|
||
continue
|
||
etype = entry.get("type", "unknown")
|
||
if etype == "text":
|
||
text = entry.get("text", "")
|
||
# 检测系统提示词
|
||
if is_system_prompt(text):
|
||
sp_file = save_system_prompt(text)
|
||
parsed.append({
|
||
"type": "system_prompt_ref",
|
||
"ref_file": sp_file,
|
||
"char_count": len(text),
|
||
})
|
||
elif is_steering_block(text):
|
||
# steering 内容折叠,只保留文件名列表
|
||
steering_files = re.findall(r'(\w[\w-]+\.md):', text)
|
||
parsed.append({
|
||
"type": "steering_ref",
|
||
"files": steering_files or ["(steering block)"],
|
||
"char_count": len(text),
|
||
})
|
||
else:
|
||
parsed.append({"type": "text", "text": resolve_ids_in_text(text)})
|
||
elif etype == "toolUse":
|
||
args = resolve_tool_args(entry.get("name", ""), entry.get("args", {}))
|
||
parsed.append({
|
||
"type": "toolUse",
|
||
"id": entry.get("id"),
|
||
"name": entry.get("name"),
|
||
"args": args,
|
||
})
|
||
elif etype == "toolUseResponse":
|
||
msg_text = entry.get("message", "")
|
||
parsed.append({
|
||
"type": "toolUseResponse",
|
||
"id": entry.get("id"),
|
||
"name": entry.get("name"),
|
||
"message": resolve_ids_in_text(msg_text),
|
||
"success": entry.get("success"),
|
||
})
|
||
elif etype == "document":
|
||
doc = entry.get("document", {})
|
||
doc_type = doc.get("type", "")
|
||
target = doc.get("target", "")
|
||
# steering 类型的 document:提取文件名
|
||
if doc_type == "steering":
|
||
display_name = doc.get("displayName", "")
|
||
parsed.append({
|
||
"type": "steering_doc",
|
||
"name": display_name or "steering",
|
||
})
|
||
else:
|
||
parsed.append({
|
||
"type": "document",
|
||
"doc_type": doc_type,
|
||
"target": resolve_ids_in_text(target) if target else "",
|
||
})
|
||
else:
|
||
parsed.append({"type": etype, "raw_keys": list(entry.keys())})
|
||
conversation.append({
|
||
"index": i,
|
||
"role": msg.get("role", "?"),
|
||
"messageId": msg.get("messageId", "?"),
|
||
"entries": parsed,
|
||
})
|
||
return conversation
|
||
|
||
|
||
def parse_actions(actions: list) -> list[dict]:
|
||
"""解析 actions,压缩无内容的 model action"""
|
||
timeline = []
|
||
for i, action in enumerate(actions):
|
||
atype = action.get("actionType", "")
|
||
astate = action.get("actionState", "")
|
||
|
||
# 压缩无内容的 model action 为摘要
|
||
if atype == "model" and "output" not in action and "input" not in action:
|
||
timeline.append({
|
||
"index": i,
|
||
"actionType": "model",
|
||
"actionState": astate,
|
||
"emittedAt": ts_fmt(action.get("emittedAt")),
|
||
"_compressed": True,
|
||
})
|
||
continue
|
||
|
||
entry = {
|
||
"index": i,
|
||
"actionId": action.get("actionId"),
|
||
"actionType": atype,
|
||
"actionState": astate,
|
||
"emittedAt": ts_fmt(action.get("emittedAt")),
|
||
}
|
||
if action.get("subExecutionId"):
|
||
entry["subExecutionId"] = action["subExecutionId"]
|
||
if action.get("endTime"):
|
||
entry["endTime"] = ts_fmt(action["endTime"])
|
||
for k in ("intentResult", "input", "output"):
|
||
if k in action:
|
||
val = action[k]
|
||
# 对 output/input 中的文本做 ID 替换
|
||
if isinstance(val, dict):
|
||
val = dict(val) # 避免修改原始数据
|
||
# 提取文件变更信息(write/create action 的 originalContent/modifiedContent)
|
||
if k == "input" and ("originalContent" in val or "modifiedContent" in val):
|
||
file_path = val.get("file", val.get("path", "?"))
|
||
entry["_file_change"] = {
|
||
"file": resolve_ids_in_text(str(file_path)),
|
||
"original": val.get("originalContent", ""),
|
||
"modified": val.get("modifiedContent", ""),
|
||
}
|
||
# 从 input 中移除大文本,保留元信息
|
||
slim = {vk: resolve_ids_in_text(str(vv)) if isinstance(vv, str) else vv
|
||
for vk, vv in val.items()
|
||
if vk not in ("originalContent", "modifiedContent")}
|
||
entry[k] = slim
|
||
continue
|
||
for vk, vv in val.items():
|
||
if isinstance(vv, str):
|
||
val[vk] = resolve_ids_in_text(vv)
|
||
entry[k] = val
|
||
timeline.append(entry)
|
||
return timeline
|
||
|
||
|
||
def extract_sub_execution_ids(actions: list) -> list[str]:
|
||
"""从 actions 中提取所有 subExecutionId(按出现顺序)"""
|
||
seen = set()
|
||
result = []
|
||
for action in actions:
|
||
sid = action.get("subExecutionId")
|
||
if sid and sid not in seen:
|
||
seen.add(sid)
|
||
result.append(sid)
|
||
return result
|
||
|
||
|
||
|
||
# ═══════════════════════════════════════════════════════════
|
||
# Diff 快照收集
|
||
# ═══════════════════════════════════════════════════════════
|
||
|
||
def collect_diffs(agent_dir: str, hex_dir: str, execution: dict) -> dict[str, dict]:
|
||
"""从 execution 的 actions 中提取文件变更的 diff 信息。
|
||
Kiro 在 write/create action 的 input 中内联了 originalContent 和 modifiedContent。
|
||
"""
|
||
# diff 快照存储在固定目录 74a08cf8.../commitId/ 下,
|
||
# 但 action input 中已内联内容,直接从 actions 提取更可靠
|
||
return {}
|
||
|
||
|
||
# ═══════════════════════════════════════════════════════════
|
||
# Prompt Log 匹配
|
||
# ═══════════════════════════════════════════════════════════
|
||
|
||
PROMPT_LOG_DIR = os.path.join("docs", "audit", "prompt_logs")
|
||
|
||
def find_matching_prompt_log(start_time_ms: int) -> Optional[str]:
|
||
"""根据 execution startTime 匹配最近的 prompt_log 文件。
|
||
prompt_log 文件名格式: prompt_log_YYYYMMDD_HHMMSS.md
|
||
匹配窗口: startTime 前后 5 分钟内最近的一个。
|
||
"""
|
||
if not os.path.isdir(PROMPT_LOG_DIR):
|
||
return None
|
||
try:
|
||
exec_dt = datetime.fromtimestamp(start_time_ms / 1000, tz=CST)
|
||
except Exception:
|
||
return None
|
||
|
||
best_match = None
|
||
best_delta = float("inf")
|
||
pattern = re.compile(r"prompt_log_(\d{8}_\d{6})\.md$")
|
||
|
||
for f in os.scandir(PROMPT_LOG_DIR):
|
||
if not f.is_file():
|
||
continue
|
||
m = pattern.match(f.name)
|
||
if not m:
|
||
continue
|
||
try:
|
||
log_dt = datetime.strptime(m.group(1), "%Y%m%d_%H%M%S").replace(tzinfo=CST)
|
||
delta = abs((exec_dt - log_dt).total_seconds())
|
||
if delta < 300 and delta < best_delta: # 5 分钟窗口
|
||
best_delta = delta
|
||
best_match = f.path
|
||
except Exception:
|
||
continue
|
||
return best_match
|
||
|
||
|
||
# ═══════════════════════════════════════════════════════════
|
||
# Markdown 生成
|
||
# ═══════════════════════════════════════════════════════════
|
||
|
||
def _msg_semantic_label(msg: dict) -> str:
|
||
"""为对话消息生成语义标签,用于快速定位。"""
|
||
entries = msg.get("entries", [])
|
||
if not entries:
|
||
return ""
|
||
parts = []
|
||
for e in entries:
|
||
et = e["type"]
|
||
if et == "system_prompt_ref":
|
||
parts.append("系统提示词")
|
||
elif et == "steering_ref":
|
||
parts.append(f"Steering({len(e.get('files', []))})")
|
||
elif et == "steering_doc":
|
||
parts.append(f"Steering:`{e.get('name', '?')}`")
|
||
elif et == "toolUse":
|
||
name = e.get("name", "?")
|
||
# 提取关键参数作为上下文
|
||
args = e.get("args", {})
|
||
ctx = ""
|
||
if name in ("readFile", "readCode", "readMultipleFiles"):
|
||
ctx = args.get("path", "") or ",".join(args.get("paths", [])[:2])
|
||
elif name in ("fsWrite", "strReplace", "editCode"):
|
||
ctx = args.get("path", "")
|
||
elif name == "grepSearch":
|
||
ctx = args.get("query", "")[:30]
|
||
elif name == "invokeSubAgent":
|
||
ctx = args.get("name", "")
|
||
elif name == "executePwsh":
|
||
ctx = (args.get("command", "") or "")[:40]
|
||
elif name == "taskStatus":
|
||
ctx = args.get("status", "")
|
||
if ctx:
|
||
parts.append(f"调用 `{name}` → {ctx}")
|
||
else:
|
||
parts.append(f"调用 `{name}`")
|
||
elif et == "toolUseResponse":
|
||
name = e.get("name", "?")
|
||
ok = "✅" if e.get("success") else "❌"
|
||
parts.append(f"结果 `{name}` {ok}")
|
||
elif et == "document":
|
||
parts.append(f"文档:{e.get('doc_type', '?')}")
|
||
elif et == "text":
|
||
# 文本内容:提取前 50 字符作为预览(bot 和 human 都加)
|
||
role = msg.get("role", "")
|
||
if role in ("bot", "human"):
|
||
text = (e.get("text") or "").strip()
|
||
if text:
|
||
preview = text[:50].replace("\n", " ")
|
||
if len(text) > 50:
|
||
preview += "…"
|
||
icon = "💬" if role == "bot" else "📝"
|
||
parts.append(f"{icon} `{preview}`")
|
||
return ", ".join(parts) if parts else ""
|
||
|
||
|
||
def _step_semantic_label(step: dict) -> str:
|
||
"""为 action step 生成带图标的语义标签。"""
|
||
at = step.get("actionType", "?")
|
||
state = step.get("actionState", "?")
|
||
fc = step.get("_file_change")
|
||
sub_eid = step.get("subExecutionId")
|
||
|
||
# 状态图标
|
||
if state == "Error":
|
||
state_icon = "❌"
|
||
elif state in ("Success", "Accepted"):
|
||
state_icon = "✅"
|
||
else:
|
||
state_icon = "⏳"
|
||
|
||
# 类型图标 + 上下文
|
||
if at in ("write", "append") and fc:
|
||
fname = fc.get("file", "?")
|
||
short = fname.rsplit("/", 1)[-1] if "/" in fname else fname
|
||
orig = fc.get("original", "")
|
||
if orig:
|
||
return f"⚡ `{at}` 修改 `{short}` {state_icon}"
|
||
else:
|
||
return f"⚡ `{at}` 新建 `{short}` {state_icon}"
|
||
elif at == "invokeSubAgent":
|
||
inp = step.get("input", {})
|
||
# Kiro 原始 log 用 subAgentName,工具 schema 用 name
|
||
agent_name = (inp.get("subAgentName") or inp.get("name", "?")) if isinstance(inp, dict) else "?"
|
||
return f"🔀 `invokeSubAgent` → {agent_name} {state_icon}"
|
||
elif at == "subagentResponse":
|
||
return f"🔀 `subagentResponse` {state_icon}"
|
||
elif at in ("readFiles", "readCode"):
|
||
inp = step.get("input", {})
|
||
if isinstance(inp, dict):
|
||
files = inp.get("files", [])
|
||
if files and isinstance(files[0], dict):
|
||
paths = [f.get("path", "?") for f in files[:2]]
|
||
else:
|
||
paths = [str(f) for f in files[:2]]
|
||
ctx = ", ".join(paths)
|
||
else:
|
||
ctx = ""
|
||
return f"📖 `{at}` {ctx} {state_icon}" if ctx else f"📖 `{at}` {state_icon}"
|
||
elif at == "search":
|
||
return f"🔍 `search` {state_icon}"
|
||
elif at == "say":
|
||
return f"💬 `say` {state_icon}"
|
||
elif at == "taskStatus":
|
||
return f"📋 `taskStatus` {state_icon}"
|
||
elif at == "steering":
|
||
return f"📄 `steering` {state_icon}"
|
||
elif at == "runCommand":
|
||
return f"🖥️ `runCommand` {state_icon}"
|
||
elif at == "getDiagnostics":
|
||
return f"🩺 `getDiagnostics` {state_icon}"
|
||
elif at == "ContextualHookInvoked":
|
||
inp = step.get("input", {})
|
||
hook_name = inp.get("name", "?") if isinstance(inp, dict) else "?"
|
||
return f"🪝 Hook `{hook_name}` {state_icon}"
|
||
elif at == "intentClassification":
|
||
ir = step.get("intentResult", {})
|
||
cls = ir.get("classification", "?") if isinstance(ir, dict) else "?"
|
||
return f"🎯 意图: `{cls}` {state_icon}"
|
||
elif at == "replace":
|
||
inp = step.get("input", {})
|
||
path = inp.get("file", inp.get("path", "?")) if isinstance(inp, dict) else "?"
|
||
short = path.rsplit("/", 1)[-1] if "/" in str(path) else str(path)
|
||
return f"✏️ `replace` `{short}` {state_icon}"
|
||
else:
|
||
return f"`{at}` [{state}]"
|
||
|
||
|
||
|
||
|
||
|
||
|
||
def _load_previous_actions_md(main_md_path: str) -> str:
|
||
"""从已有的 main.md 文件中提取 '## 4. Actions 时间线' 部分的原始 Markdown。
|
||
|
||
用于覆盖模式下,将前几轮 execution 的 actions 追加到新一轮前面。
|
||
返回空字符串表示无历史 actions。
|
||
"""
|
||
if not os.path.isfile(main_md_path):
|
||
return ""
|
||
try:
|
||
with open(main_md_path, "r", encoding="utf-8") as f:
|
||
content = f.read()
|
||
except Exception:
|
||
return ""
|
||
|
||
# 定位 "## 4. Actions 时间线" 到下一个 "## 5." 或文件末尾
|
||
start_marker = "## 4. Actions 时间线"
|
||
end_marker = "## 5."
|
||
start_idx = content.find(start_marker)
|
||
if start_idx == -1:
|
||
return ""
|
||
# 跳过标题行本身,从下一行开始
|
||
body_start = content.find("\n", start_idx)
|
||
if body_start == -1:
|
||
return ""
|
||
body_start += 1
|
||
|
||
end_idx = content.find(end_marker, body_start)
|
||
if end_idx == -1:
|
||
actions_md = content[body_start:]
|
||
else:
|
||
actions_md = content[body_start:end_idx]
|
||
|
||
return actions_md.strip()
|
||
|
||
|
||
def _merge_summaries(prev_summary: dict, cur_summary: dict) -> dict:
|
||
"""增量合并两轮 execution 的结构化摘要。
|
||
|
||
规则:
|
||
- duration_s: 累加
|
||
- action_count: 累加
|
||
- msg_count: 取最新(context.messages 是累积超集)
|
||
- files_modified / files_created: 并集(保序)
|
||
- sub_agents: 并集
|
||
- errors: 拼接
|
||
- description: 取最新非空值
|
||
- workflow / status: 取最新
|
||
"""
|
||
if not prev_summary:
|
||
return dict(cur_summary)
|
||
|
||
merged = dict(cur_summary)
|
||
merged["duration_s"] = round(
|
||
prev_summary.get("duration_s", 0) + cur_summary.get("duration_s", 0), 1
|
||
)
|
||
merged["action_count"] = (
|
||
prev_summary.get("action_count", 0) + cur_summary.get("action_count", 0)
|
||
)
|
||
# msg_count: 取较大值(最新轮次的 context.messages 是累积超集)
|
||
merged["msg_count"] = max(
|
||
prev_summary.get("msg_count", 0), cur_summary.get("msg_count", 0)
|
||
)
|
||
# 文件列表:并集,保序
|
||
def _union_lists(a: list, b: list) -> list:
|
||
seen = set()
|
||
result = []
|
||
for item in a + b:
|
||
if item not in seen:
|
||
seen.add(item)
|
||
result.append(item)
|
||
return result
|
||
|
||
merged["files_modified"] = _union_lists(
|
||
prev_summary.get("files_modified", []),
|
||
cur_summary.get("files_modified", []),
|
||
)
|
||
merged["files_created"] = _union_lists(
|
||
prev_summary.get("files_created", []),
|
||
cur_summary.get("files_created", []),
|
||
)
|
||
merged["sub_agents"] = _union_lists(
|
||
prev_summary.get("sub_agents", []),
|
||
cur_summary.get("sub_agents", []),
|
||
)
|
||
merged["errors"] = (
|
||
prev_summary.get("errors", []) + cur_summary.get("errors", [])
|
||
)
|
||
# description: 取最新非空值
|
||
if cur_summary.get("description"):
|
||
merged["description"] = cur_summary["description"]
|
||
elif prev_summary.get("description"):
|
||
merged["description"] = prev_summary["description"]
|
||
|
||
return merged
|
||
|
||
|
||
def _build_execution_summary(
|
||
log: dict,
|
||
conversation: list[dict],
|
||
timeline: list[dict],
|
||
sub_file_map: Optional[dict[str, str]] = None,
|
||
) -> dict:
|
||
"""构建结构化执行摘要(零 LLM 成本,纯规则化提取)。
|
||
返回 dict 供 md 渲染和索引存储共用。
|
||
"""
|
||
dur = (log.get("endTime", 0) - log.get("startTime", 0)) / 1000
|
||
|
||
files_modified = []
|
||
files_created = []
|
||
sub_agents = []
|
||
errors = []
|
||
|
||
for step in timeline:
|
||
if step.get("_compressed"):
|
||
continue
|
||
idx = step.get("index", "?")
|
||
at = step.get("actionType", "?")
|
||
state = step.get("actionState", "?")
|
||
|
||
fc = step.get("_file_change")
|
||
if fc:
|
||
fname = fc.get("file", "?")
|
||
if fc.get("original"):
|
||
files_modified.append(fname)
|
||
else:
|
||
files_created.append(fname)
|
||
|
||
if at == "invokeSubAgent":
|
||
inp = step.get("input", {})
|
||
agent_name = (inp.get("subAgentName") or inp.get("name", "?")) if isinstance(inp, dict) else "?"
|
||
sub_agents.append(agent_name)
|
||
|
||
if state == "Error":
|
||
errors.append(f"Step {idx}: `{at}`")
|
||
|
||
for msg in conversation:
|
||
for e in msg.get("entries", []):
|
||
if e.get("type") == "toolUseResponse" and not e.get("success"):
|
||
errors.append(f"Msg {msg['index']}: `{e.get('name', '?')}`")
|
||
|
||
# 去重文件名
|
||
files_modified = list(dict.fromkeys(files_modified))
|
||
files_created = list(dict.fromkeys(files_created))
|
||
|
||
# description 由外部 LLM 生成(百炼 API),提取阶段不生成
|
||
description = ""
|
||
|
||
return {
|
||
"workflow": log.get("workflowType", "?"),
|
||
"status": log.get("status", "?"),
|
||
"duration_s": round(dur, 1),
|
||
"msg_count": len(conversation),
|
||
"action_count": len(timeline),
|
||
"files_modified": files_modified,
|
||
"files_created": files_created,
|
||
"sub_agents": sub_agents,
|
||
"errors": errors,
|
||
"description": description,
|
||
}
|
||
|
||
|
||
def _render_summary_md(summary: dict, sub_file_map: Optional[dict[str, str]] = None) -> str:
|
||
"""将结构化摘要渲染为 Markdown 文本(放在文件最前面)。"""
|
||
L = []
|
||
# 一句话概览
|
||
status_icon = "✅" if summary["status"] == "succeed" else "❌"
|
||
L.append(f"{status_icon} `{summary['workflow']}` | {summary['duration_s']}s | "
|
||
f"{summary['msg_count']} msgs | {summary['action_count']} actions")
|
||
L.append("")
|
||
|
||
desc = summary.get("description", "")
|
||
if desc:
|
||
L.append(f"> {desc}")
|
||
L.append("")
|
||
|
||
fm = summary["files_modified"]
|
||
fc = summary["files_created"]
|
||
if fm or fc:
|
||
L.append(f"**文件变更** (修改 {len(fm)} / 新建 {len(fc)})")
|
||
for f in fm:
|
||
L.append(f"- ⚡ 修改 `{f}`")
|
||
for f in fc:
|
||
L.append(f"- ✨ 新建 `{f}`")
|
||
L.append("")
|
||
|
||
sa = summary["sub_agents"]
|
||
if sa:
|
||
L.append(f"**子代理** ({len(sa)}): {', '.join(f'`{a}`' for a in sa)}")
|
||
L.append("")
|
||
|
||
errs = summary["errors"]
|
||
if errs:
|
||
L.append(f"**错误** ({len(errs)})")
|
||
for e in errs:
|
||
L.append(f"- ❌ {e}")
|
||
L.append("")
|
||
|
||
if not fm and not fc and not sa and not errs:
|
||
L.append("*(无文件变更、子代理调用或错误)*")
|
||
|
||
return "\n".join(L)
|
||
|
||
|
||
def _build_nav_summary(
|
||
conversation: list[dict],
|
||
timeline: list[dict],
|
||
sub_file_map: Optional[dict[str, str]] = None,
|
||
) -> str:
|
||
"""生成快速导航摘要:文件变更、子代理、错误。"""
|
||
file_changes = []
|
||
sub_agents = []
|
||
errors = []
|
||
|
||
for step in timeline:
|
||
if step.get("_compressed"):
|
||
continue
|
||
idx = step.get("index", "?")
|
||
at = step.get("actionType", "?")
|
||
state = step.get("actionState", "?")
|
||
|
||
# 文件变更
|
||
fc = step.get("_file_change")
|
||
if fc:
|
||
fname = fc.get("file", "?")
|
||
orig = fc.get("original", "")
|
||
action = "修改" if orig else "新建"
|
||
file_changes.append(f"- Step {idx}: ⚡ {action} `{fname}`")
|
||
|
||
# 子代理
|
||
if at == "invokeSubAgent":
|
||
inp = step.get("input", {})
|
||
agent_name = (inp.get("subAgentName") or inp.get("name", "?")) if isinstance(inp, dict) else "?"
|
||
sub_eid = step.get("subExecutionId", "")
|
||
sub_path = ""
|
||
if sub_file_map and sub_eid and sub_eid in sub_file_map:
|
||
sub_path = f" → `{sub_file_map[sub_eid].replace(chr(92), '/')}`"
|
||
sub_agents.append(f"- Step {idx}: 🔀 `{agent_name}`{sub_path}")
|
||
|
||
# 错误
|
||
if state == "Error":
|
||
errors.append(f"- Step {idx}: ❌ `{at}`")
|
||
|
||
# 对话中的错误工具结果
|
||
for msg in conversation:
|
||
for e in msg.get("entries", []):
|
||
if e.get("type") == "toolUseResponse" and not e.get("success"):
|
||
errors.append(f"- Msg {msg['index']}: ❌ `{e.get('name', '?')}`")
|
||
|
||
lines = []
|
||
if file_changes:
|
||
lines.append(f"**文件变更** ({len(file_changes)})")
|
||
lines.extend(file_changes)
|
||
lines.append("")
|
||
if sub_agents:
|
||
lines.append(f"**子代理调用** ({len(sub_agents)})")
|
||
lines.extend(sub_agents)
|
||
lines.append("")
|
||
if errors:
|
||
lines.append(f"**错误** ({len(errors)})")
|
||
lines.extend(errors)
|
||
lines.append("")
|
||
if not lines:
|
||
lines.append("*(无文件变更、子代理调用或错误)*")
|
||
|
||
return "\n".join(lines)
|
||
|
||
|
||
def generate_full_record(
|
||
log: dict,
|
||
conversation: list[dict],
|
||
timeline: list[dict],
|
||
diffs: dict[str, dict],
|
||
session_info: Optional[dict] = None,
|
||
prompt_log_path: Optional[str] = None,
|
||
is_sub: bool = False,
|
||
sub_index: int = 0,
|
||
sub_file_map: Optional[dict[str, str]] = None,
|
||
accumulated_actions_md: str = "",
|
||
merged_summary: Optional[dict] = None,
|
||
execution_round: int = 1,
|
||
) -> tuple[str, dict]:
|
||
"""生成单个 execution 的 Markdown 全量记录(覆盖模式)。
|
||
|
||
覆盖模式下:
|
||
- 对话记录:直接使用最新 execution 的 context.messages(累积超集,无需去重)
|
||
- Actions 时间线:前几轮的 accumulated_actions_md + 本轮 timeline
|
||
- 执行摘要:使用 merged_summary(已合并前几轮)
|
||
|
||
Args:
|
||
log: 原始 execution log JSON
|
||
conversation: parse_messages 输出
|
||
timeline: parse_actions 输出
|
||
diffs: collect_diffs 输出
|
||
session_info: 会话配置(仅主 execution 有)
|
||
prompt_log_path: 匹配的 prompt_log 文件路径
|
||
is_sub: 是否为子代理 execution
|
||
sub_index: 子代理序号(从 1 开始)
|
||
accumulated_actions_md: 前几轮 execution 的 actions Markdown(追加到本轮前面)
|
||
merged_summary: 合并后的结构化摘要(含前几轮),None 时使用本轮单独摘要
|
||
execution_round: 当前是第几轮 execution(用于标注)
|
||
"""
|
||
L = []
|
||
exec_id = log.get("executionId", "?")
|
||
chat_id = log.get("chatSessionId", "?")
|
||
|
||
# 构建结构化摘要(供 md 和索引共用)
|
||
_summary = _build_execution_summary(log, conversation, timeline, sub_file_map)
|
||
# 覆盖模式:使用合并后的摘要(含前几轮累积)
|
||
display_summary = merged_summary if merged_summary else _summary
|
||
|
||
# 标题
|
||
if is_sub:
|
||
L.append(f"# 子代理 Execution #{sub_index}\n")
|
||
else:
|
||
L.append("# Kiro 会话全量记录\n")
|
||
L.append(f"> 生成时间: {datetime.now(CST).strftime('%Y-%m-%d %H:%M:%S')} CST\n")
|
||
|
||
# ── 0. 执行摘要(文件最前面,AI 读前 20 行即可掌握全貌)──
|
||
L.append("## 📋 执行摘要\n")
|
||
if execution_round > 1:
|
||
L.append(f"*(合并 {execution_round} 轮 execution)*\n")
|
||
L.append(_render_summary_md(display_summary, sub_file_map))
|
||
L.append("")
|
||
|
||
# ── 1. 元数据 ──
|
||
L.append("## 1. 元数据\n")
|
||
L.append("| 字段 | 值 |")
|
||
L.append("|------|-----|")
|
||
L.append(f"| executionId | `{exec_id}` |")
|
||
L.append(f"| chatSessionId | `{chat_id}` |")
|
||
L.append(f"| workflowType | `{log.get('workflowType', '?')}` |")
|
||
L.append(f"| autonomyMode | `{log.get('autonomyMode', '?')}` |")
|
||
L.append(f"| status | `{log.get('status', '?')}` |")
|
||
L.append(f"| startTime | `{ts_fmt(log.get('startTime'))}` |")
|
||
L.append(f"| endTime | `{ts_fmt(log.get('endTime'))}` |")
|
||
dur = (log.get("endTime", 0) - log.get("startTime", 0)) / 1000
|
||
L.append(f"| duration | `{dur:.1f}s` |")
|
||
L.append(f"| contextUsage | `{log.get('contextUsagePercentage', 0):.2f}%` |")
|
||
L.append("")
|
||
|
||
if session_info and not is_sub:
|
||
L.append(f"- 会话标题: `{session_info.get('title', '?')}`")
|
||
L.append(f"- 创建时间: `{ts_fmt(int(session_info.get('dateCreated', 0)))}`")
|
||
L.append(f"- 工作区: `{session_info.get('workspaceDirectory', '?')}`")
|
||
L.append("")
|
||
|
||
if prompt_log_path and not is_sub:
|
||
rel = os.path.relpath(prompt_log_path).replace("\\", "/")
|
||
L.append(f"- 关联 prompt_log: `{rel}`")
|
||
L.append("")
|
||
|
||
# ── 2. 用户输入 ──
|
||
L.append("## 2. 用户输入\n")
|
||
input_text = ""
|
||
for msg in log.get("input", {}).get("data", {}).get("messages", []):
|
||
for entry in msg.get("content", msg.get("entries", [])):
|
||
if isinstance(entry, dict) and entry.get("text"):
|
||
input_text += entry["text"] + "\n"
|
||
if input_text.strip():
|
||
L.append(fence(input_text.strip()) + "\n")
|
||
else:
|
||
L.append("*(无用户输入)*\n")
|
||
|
||
# ── 3. 对话记录 ──
|
||
L.append("## 3. 对话记录\n")
|
||
|
||
# 覆盖模式:直接使用全部消息(最新 execution 的 context.messages 是累积超集)
|
||
h = sum(1 for m in conversation if m["role"] == "human")
|
||
b = sum(1 for m in conversation if m["role"] == "bot")
|
||
t = sum(1 for m in conversation if m["role"] == "tool")
|
||
L.append(f"共 {len(conversation)} 条消息: human={h}, bot={b}, tool={t}\n")
|
||
|
||
for msg in conversation:
|
||
emoji = {"human": "👤", "bot": "🤖", "tool": "🔧"}.get(msg["role"], "❓")
|
||
# 生成语义标签
|
||
msg_label = _msg_semantic_label(msg)
|
||
label_suffix = f" — {msg_label}" if msg_label else ""
|
||
|
||
# P0: 压缩 hook 输出的空消息(特征:HUMAN 消息只含 "Output:\nCommand executed..." 或 "Output:\nCommand timed out...")
|
||
if msg["role"] == "human" and len(msg["entries"]) == 1:
|
||
e0 = msg["entries"][0]
|
||
if e0["type"] == "text":
|
||
_txt = (e0.get("text") or "").strip()
|
||
if _txt.startswith("Output:") and ("Exit Code:" in _txt) and len(_txt) < 200:
|
||
# 提取 exit code
|
||
import re as _re
|
||
_ec_match = _re.search(r"Exit Code:\s*(-?\d+)", _txt)
|
||
_ec = _ec_match.group(1) if _ec_match else "?"
|
||
L.append(f"### Msg {msg['index']}: 👤 HUMAN — 🪝 Hook 输出 (exit={_ec})\n")
|
||
continue
|
||
|
||
L.append(f"### Msg {msg['index']}: {emoji} {msg['role'].upper()}{label_suffix}\n")
|
||
|
||
for entry in msg["entries"]:
|
||
et = entry["type"]
|
||
|
||
if et == "system_prompt_ref":
|
||
ref = entry["ref_file"]
|
||
chars = entry["char_count"]
|
||
sp_path = f"docs/audit/session_logs/_system_prompts/{ref}"
|
||
L.append(f"**[系统提示词]** → `{sp_path}` ({chars} 字符)\n")
|
||
|
||
elif et == "steering_ref":
|
||
files = ", ".join(entry["files"])
|
||
chars = entry["char_count"]
|
||
L.append(f"**[Steering]** 引用: {files} ({chars} 字符)\n")
|
||
|
||
elif et == "text":
|
||
text = entry.get("text", "")
|
||
if not text:
|
||
L.append("*(空)*\n")
|
||
else:
|
||
L.append(fence(text) + "\n")
|
||
|
||
elif et == "toolUse":
|
||
name = entry.get("name", "?")
|
||
args = entry.get("args", {})
|
||
L.append(f"**[🔧 调用]** `{name}`\n")
|
||
# P1: strReplace/editCode 的代码变更用 diff 格式展示
|
||
if name in ("strReplace", "editCode") and isinstance(args, dict):
|
||
_path = args.get("path", "?")
|
||
_lang = "python" if _path.endswith(".py") else "sql" if _path.endswith(".sql") else ""
|
||
L.append(f"- 文件: `{_path}`\n")
|
||
_old = args.get("oldStr", args.get("old_str", ""))
|
||
_new = args.get("newStr", args.get("new_str", ""))
|
||
_sel = args.get("selector", "")
|
||
_op = args.get("operation", "")
|
||
_repl = args.get("replacement", "")
|
||
if _sel:
|
||
L.append(f"- selector: `{_sel}`" + (f" ({_op})" if _op else ""))
|
||
if _old:
|
||
L.append(f"- 删除:\n" + fence(trunc(_old, 2000), _lang))
|
||
if _new:
|
||
L.append(f"- 插入:\n" + fence(trunc(_new, 2000), _lang))
|
||
if _repl:
|
||
L.append(f"- 替换为:\n" + fence(trunc(_repl, 2000), _lang))
|
||
L.append("")
|
||
else:
|
||
L.append(fence(safe_json(args, 5000), "json") + "\n")
|
||
|
||
elif et == "toolUseResponse":
|
||
ok = "✅" if entry.get("success") else "❌"
|
||
L.append(f"**[📋 结果]** `{entry.get('name', '?')}` {ok}\n")
|
||
msg_text = entry.get("message", "")
|
||
if msg_text:
|
||
L.append(fence(trunc(msg_text, 5000)) + "\n")
|
||
|
||
elif et == "document":
|
||
target = entry.get("target", "")
|
||
L.append(f"**[📄 文档]** type=`{entry.get('doc_type')}` target=`{target}`\n")
|
||
|
||
elif et == "steering_doc":
|
||
L.append(f"**[📄 Steering]** `{entry.get('name', 'steering')}`\n")
|
||
|
||
else:
|
||
L.append(f"**[{et}]** keys={entry.get('raw_keys')}\n")
|
||
|
||
# ── 4. Actions 时间线 ──
|
||
L.append("## 4. Actions 时间线\n")
|
||
|
||
# 覆盖模式:先输出前几轮累积的 actions,再输出本轮
|
||
if accumulated_actions_md:
|
||
L.append(accumulated_actions_md)
|
||
L.append("")
|
||
L.append(f"---\n")
|
||
L.append(f"### 🔄 第 {execution_round} 轮 Execution ({ts_fmt(log.get('startTime'))})\n")
|
||
|
||
L.append(f"共 {len(timeline)} 个\n")
|
||
|
||
for step in timeline:
|
||
if step.get("_compressed"):
|
||
L.append(f"- `model` [{step.get('actionState')}] @ {step.get('emittedAt')}\n")
|
||
continue
|
||
|
||
at = step.get('actionType', '?')
|
||
state = step.get('actionState', '?')
|
||
# 生成语义标签
|
||
step_label = _step_semantic_label(step)
|
||
L.append(f"### Step {step['index']}: {step_label} @ {step.get('emittedAt','?')}\n")
|
||
if step.get("subExecutionId"):
|
||
sub_eid = step["subExecutionId"]
|
||
L.append(f"- subExecutionId: `{sub_eid}`")
|
||
# 标注子代理文件路径(如果有映射)
|
||
if sub_file_map and sub_eid in sub_file_map:
|
||
sub_path = sub_file_map[sub_eid].replace("\\", "/")
|
||
L.append(f"- 子代理记录: `{sub_path}`")
|
||
if step.get("endTime"):
|
||
L.append(f"- endTime: {step['endTime']}")
|
||
# 文件变更展示
|
||
if step.get("_file_change"):
|
||
fc = step["_file_change"]
|
||
fname = fc.get("file", "?")
|
||
orig = fc.get("original", "")
|
||
mod = fc.get("modified", "")
|
||
lang = "python" if fname.endswith(".py") else "sql" if fname.endswith(".sql") else ""
|
||
L.append(f"- 文件变更: `{fname}`")
|
||
if orig and mod:
|
||
L.append(f" - 修改前 ({len(orig)} 字符):\n" + fence(trunc(orig, 3000), lang))
|
||
L.append(f" - 修改后 ({len(mod)} 字符):\n" + fence(trunc(mod, 3000), lang))
|
||
elif mod:
|
||
L.append(f" - 新建 ({len(mod)} 字符):\n" + fence(trunc(mod, 3000), lang))
|
||
# 特殊处理各种 action type 的内容展示
|
||
_at = step.get("actionType", "")
|
||
if _at == "say":
|
||
_say_msg = (step.get("output") or {}).get("message", "")
|
||
if _say_msg:
|
||
L.append(f"- 💬 AI 回复:\n\n{_say_msg}\n")
|
||
else:
|
||
for k in ("intentResult", "input", "output"):
|
||
if k in step:
|
||
L.append(f"- {k}:\n" + fence(safe_json(step[k], 5000), "json"))
|
||
elif _at == "invokeSubAgent":
|
||
_sub_input = step.get("input") or {}
|
||
_sub_prompt = _sub_input.get("prompt", "")
|
||
_sub_name = _sub_input.get("name", "") or _sub_input.get("subAgentName", "") or "?"
|
||
if _sub_prompt:
|
||
L.append(f"- 📤 委托 `{_sub_name}`:\n\n{trunc(_sub_prompt, 3000)}\n")
|
||
_sub_output = step.get("output") or {}
|
||
_sub_resp = _sub_output.get("response", "")
|
||
if _sub_resp:
|
||
L.append(f"- 📥 子代理输出:\n\n{_sub_resp}\n")
|
||
elif not _sub_prompt:
|
||
for k in ("intentResult", "input", "output"):
|
||
if k in step:
|
||
L.append(f"- {k}:\n" + fence(safe_json(step[k], 5000), "json"))
|
||
elif _at == "subagentResponse":
|
||
_sr_input = step.get("input") or {}
|
||
_sr_resp = _sr_input.get("response", "")
|
||
if _sr_resp:
|
||
L.append(f"- 📥 子代理返回:\n\n{_sr_resp}\n")
|
||
else:
|
||
for k in ("intentResult", "input", "output"):
|
||
if k in step:
|
||
L.append(f"- {k}:\n" + fence(safe_json(step[k], 5000), "json"))
|
||
elif _at == "ContextualHookInvoked":
|
||
# P1: hook 执行——提取名称、命令、exitCode
|
||
_hi = step.get("input") or {}
|
||
_ho = step.get("output") or {}
|
||
_h_name = _hi.get("name", "?")
|
||
_h_cmd = _ho.get("command", "")
|
||
_h_result = _ho.get("result", {})
|
||
_h_exit = _h_result.get("exitCode", "?") if isinstance(_h_result, dict) else "?"
|
||
_h_out = _h_result.get("output", "") if isinstance(_h_result, dict) else ""
|
||
if _h_cmd:
|
||
L.append(f"- `$ {_h_cmd}`")
|
||
L.append(f"- Exit: `{_h_exit}`")
|
||
if _h_out and _h_out != "Command executed successfully with no output.":
|
||
L.append(f"- 输出:\n" + fence(trunc(_h_out, 2000)))
|
||
elif _at == "intentClassification":
|
||
# P1: 意图分类——压缩为一行
|
||
_ir = step.get("intentResult", {})
|
||
_cls = _ir.get("classification", "?") if isinstance(_ir, dict) else "?"
|
||
L.append(f"- 分类结果: `{_cls}`")
|
||
elif _at == "runCommand":
|
||
# P0: 命令执行——提取命令、exitCode、输出
|
||
_rc_in = step.get("input") or {}
|
||
_rc_out = step.get("output") or {}
|
||
_rc_cmd = _rc_in.get("command", "") if isinstance(_rc_in, dict) else ""
|
||
_rc_result = _rc_out.get("result", {}) if isinstance(_rc_out, dict) else {}
|
||
_rc_exit = _rc_result.get("exitCode", "?") if isinstance(_rc_result, dict) else "?"
|
||
_rc_output = _rc_result.get("output", "") if isinstance(_rc_result, dict) else ""
|
||
if _rc_cmd:
|
||
L.append(f"- `$ {_rc_cmd}`")
|
||
L.append(f"- Exit: `{_rc_exit}`")
|
||
if _rc_output:
|
||
L.append(f"- 输出:\n" + fence(trunc(_rc_output, 3000)))
|
||
elif _at == "search":
|
||
# P2: 搜索——提取 query 和 why
|
||
_s_in = step.get("input") or {}
|
||
_s_query = _s_in.get("query", "") if isinstance(_s_in, dict) else ""
|
||
_s_why = _s_in.get("why", "") if isinstance(_s_in, dict) else ""
|
||
if _s_query:
|
||
L.append(f"- 🔍 `{trunc(_s_query, 100)}`")
|
||
if _s_why:
|
||
L.append(f"- 原因: {trunc(_s_why, 200)}")
|
||
# 展示搜索结果(如果有)
|
||
_s_out = step.get("output")
|
||
if _s_out and isinstance(_s_out, dict):
|
||
_s_files = _s_out.get("files", [])
|
||
if _s_files:
|
||
L.append(f"- 结果: {len(_s_files)} 个文件")
|
||
elif _at == "steering":
|
||
# P2: steering——提取文件名列表
|
||
_st_in = step.get("input") or {}
|
||
_st_docs = _st_in.get("documents", []) if isinstance(_st_in, dict) else []
|
||
if _st_docs:
|
||
import urllib.parse
|
||
names = []
|
||
for d in _st_docs[:10]:
|
||
if isinstance(d, str):
|
||
# URL 编码的路径,提取文件名
|
||
decoded = urllib.parse.unquote(d)
|
||
name = decoded.rsplit("/", 1)[-1] if "/" in decoded else decoded
|
||
names.append(name)
|
||
if names:
|
||
L.append(f"- 文件: {', '.join(names)}")
|
||
else:
|
||
for k in ("input", "output"):
|
||
if k in step:
|
||
L.append(f"- {k}:\n" + fence(safe_json(step[k], 3000), "json"))
|
||
elif _at == "getDiagnostics":
|
||
# P2: 诊断——提取路径和问题数
|
||
_gd_in = step.get("input") or {}
|
||
_gd_out = step.get("output") or {}
|
||
_gd_paths = _gd_in.get("paths", []) if isinstance(_gd_in, dict) else []
|
||
if _gd_paths:
|
||
L.append(f"- 文件: {', '.join(str(p) for p in _gd_paths[:5])}")
|
||
if isinstance(_gd_out, dict):
|
||
_gd_diags = _gd_out.get("diagnostics", [])
|
||
if isinstance(_gd_diags, list):
|
||
L.append(f"- 问题数: {len(_gd_diags)}")
|
||
for d in _gd_diags[:5]:
|
||
if isinstance(d, dict):
|
||
L.append(f" - {d.get('severity', '?')}: {d.get('message', '?')[:100]}")
|
||
elif not _gd_diags:
|
||
L.append("- ✅ 无问题")
|
||
elif _at in ("readFiles", "readCode"):
|
||
# P3: 文件读取——只展示路径,不展示内容
|
||
_rf_in = step.get("input") or {}
|
||
if isinstance(_rf_in, dict):
|
||
_rf_files = _rf_in.get("files", [])
|
||
paths = []
|
||
for f in _rf_files[:5]:
|
||
if isinstance(f, dict):
|
||
paths.append(f.get("path", "?"))
|
||
else:
|
||
paths.append(str(f))
|
||
if paths:
|
||
L.append(f"- 文件: {', '.join(paths)}")
|
||
else:
|
||
for k in ("intentResult", "input", "output"):
|
||
if k in step:
|
||
L.append(f"- {k}:\n" + fence(safe_json(step[k], 5000), "json"))
|
||
L.append("")
|
||
|
||
# ── 5. 资源消耗 ──
|
||
usage = log.get("usageSummary", [])
|
||
if usage:
|
||
L.append("## 5. 资源消耗\n")
|
||
L.append("| 工具 | 消耗 | 单位 |")
|
||
L.append("|------|------|------|")
|
||
total = 0
|
||
for u in usage:
|
||
tools = ", ".join(u.get("usedTools", ["-"]))
|
||
amt = u.get("usage", 0)
|
||
total += amt
|
||
L.append(f"| {tools} | {amt} | {u.get('unit', '?')} |")
|
||
L.append(f"| **合计** | **{total:.4f}** | |")
|
||
L.append("")
|
||
|
||
return "\n".join(L), _summary, display_summary
|
||
|
||
|
||
# ═══════════════════════════════════════════════════════════
|
||
# 索引管理
|
||
# ═══════════════════════════════════════════════════════════
|
||
|
||
def load_index() -> dict:
|
||
if os.path.isfile(INDEX_PATH):
|
||
try:
|
||
with open(INDEX_PATH, "r", encoding="utf-8") as f:
|
||
return json.load(f)
|
||
except Exception:
|
||
pass
|
||
return {"version": 2, "entries": {}}
|
||
|
||
|
||
def load_full_index() -> dict:
|
||
if os.path.isfile(INDEX_FULL_PATH):
|
||
try:
|
||
with open(INDEX_FULL_PATH, "r", encoding="utf-8") as f:
|
||
return json.load(f)
|
||
except Exception:
|
||
pass
|
||
return {"version": 2, "entries": {}}
|
||
|
||
|
||
def save_index(index: dict):
|
||
os.makedirs(os.path.dirname(INDEX_PATH), exist_ok=True)
|
||
with open(INDEX_PATH, "w", encoding="utf-8") as f:
|
||
json.dump(index, f, ensure_ascii=False, indent=2)
|
||
_save_day_indexes(index, "_day_index.json")
|
||
|
||
|
||
def save_full_index(index: dict):
|
||
os.makedirs(os.path.dirname(INDEX_FULL_PATH), exist_ok=True)
|
||
with open(INDEX_FULL_PATH, "w", encoding="utf-8") as f:
|
||
json.dump(index, f, ensure_ascii=False, indent=2)
|
||
_save_day_indexes(index, "_day_index_full.json")
|
||
|
||
|
||
def _save_day_indexes(index: dict, filename: str):
|
||
"""将根级索引按 output_dir 中的 {ym}/{dd} 拆分,写入每个 day_dir 下。
|
||
|
||
路径格式:docs/audit/session_logs/{ym}/{dd}/{filename}
|
||
每个 day 级索引只包含 output_dir 在该天目录下的 entry。
|
||
"""
|
||
entries = index.get("entries", {})
|
||
if not entries:
|
||
return
|
||
|
||
# 按 day_dir 分组
|
||
day_groups: dict[str, dict[str, dict]] = {}
|
||
prefix = SESSION_LOG_DIR.replace("\\", "/")
|
||
for eid, ent in entries.items():
|
||
out_dir = ent.get("output_dir", "").replace("\\", "/")
|
||
if not out_dir.startswith(prefix):
|
||
continue
|
||
# out_dir 格式:docs/audit/session_logs/2026-03/03/01_abc12345_013337
|
||
# 取到 day_dir:docs/audit/session_logs/2026-03/03
|
||
rel = out_dir[len(prefix):].lstrip("/")
|
||
parts = rel.split("/")
|
||
if len(parts) >= 2:
|
||
day_key = f"{parts[0]}/{parts[1]}" # "2026-03/03"
|
||
day_groups.setdefault(day_key, {})[eid] = ent
|
||
|
||
for day_key, day_entries in day_groups.items():
|
||
day_dir = os.path.join(SESSION_LOG_DIR, day_key.replace("/", os.sep))
|
||
day_idx_path = os.path.join(day_dir, filename)
|
||
os.makedirs(day_dir, exist_ok=True)
|
||
day_data = {"version": 2, "entries": day_entries}
|
||
with open(day_idx_path, "w", encoding="utf-8") as f:
|
||
json.dump(day_data, f, ensure_ascii=False, indent=2)
|
||
|
||
|
||
def update_index(index: dict, exec_id: str, output_dir: str, meta: dict,
|
||
summary: Optional[dict] = None, is_sub: bool = False,
|
||
parent_exec_id: str = "",
|
||
chat_session_id: str = "",
|
||
prev_exec_ids: Optional[list[str]] = None):
|
||
"""添加/更新一条索引记录,含结构化摘要供 AI 低成本查询。
|
||
|
||
覆盖模式下:
|
||
- 同一 chatSession 只保留最新 execution 的 entry
|
||
- prev_exec_ids 中的旧 entry 会被移除
|
||
- summary 应为已合并的(调用方负责合并)
|
||
|
||
is_sub=True 时标记为子代理 entry,并记录 parent_exec_id。
|
||
"""
|
||
# 覆盖模式:将同 chatSession 的旧 entry 标记为已替代(保留在索引中防止重复提取)
|
||
if prev_exec_ids:
|
||
for old_eid in prev_exec_ids:
|
||
if old_eid in index.get("entries", {}) and old_eid != exec_id:
|
||
# 保留 entry 但标记为 superseded,避免被 extract_latest 重复提取
|
||
index["entries"][old_eid] = {
|
||
"superseded_by": exec_id,
|
||
"chatSessionId": index["entries"][old_eid].get("chatSessionId", ""),
|
||
}
|
||
|
||
entry = {
|
||
"output_dir": output_dir.replace("\\", "/"),
|
||
"chatSessionId": meta.get("chatSessionId", "") or chat_session_id,
|
||
"startTime": meta.get("startTime", ""),
|
||
"endTime": meta.get("endTime", ""),
|
||
"status": meta.get("status", ""),
|
||
"workflowType": meta.get("workflowType", ""),
|
||
"indexed_at": datetime.now(CST).isoformat(),
|
||
}
|
||
if is_sub:
|
||
entry["is_sub"] = True
|
||
if parent_exec_id:
|
||
entry["parent_exec_id"] = parent_exec_id
|
||
if summary:
|
||
entry["summary"] = {
|
||
"duration_s": summary.get("duration_s", 0),
|
||
"msg_count": summary.get("msg_count", 0),
|
||
"action_count": summary.get("action_count", 0),
|
||
"files_modified": summary.get("files_modified", []),
|
||
"files_created": summary.get("files_created", []),
|
||
"sub_agents": summary.get("sub_agents", []),
|
||
"errors": summary.get("errors", []),
|
||
}
|
||
if summary.get("description"):
|
||
entry["description"] = summary["description"]
|
||
index["entries"][exec_id] = entry
|
||
|
||
|
||
|
||
# ═══════════════════════════════════════════════════════════
|
||
# 主提取逻辑
|
||
# ═══════════════════════════════════════════════════════════
|
||
|
||
def extract_single_execution(
|
||
agent_dir: str,
|
||
hex_dir: str,
|
||
execution: dict,
|
||
session_dir: Optional[str],
|
||
index: dict,
|
||
full_index: Optional[dict] = None,
|
||
sub_mode: bool = False,
|
||
sub_index: int = 0,
|
||
output_base_dir: Optional[str] = None,
|
||
parent_exec_id: str = "",
|
||
force: bool = False,
|
||
) -> Optional[str]:
|
||
"""提取单个 execution 并写入文件(覆盖模式)。
|
||
|
||
覆盖模式:同一 chatSession 只保留一个 main.md 文件。
|
||
- 对话记录:使用最新 execution 的 context.messages(累积超集)
|
||
- Actions 时间线:前几轮的 actions + 本轮 actions(追加)
|
||
- 执行摘要:增量合并(duration 累加、files 并集、msg_count 取最新)
|
||
- 索引:同 chatSession 只保留一条 entry
|
||
|
||
Args:
|
||
agent_dir: kiro.kiroagent 目录
|
||
hex_dir: execution 所在的 32 位 hex 目录
|
||
execution: manifest 中的 execution 条目
|
||
session_dir: workspace-sessions 子目录(用于加载会话信息)
|
||
index: 精简索引字典(仅主对话)
|
||
full_index: 完整索引字典(主对话 + 子代理),None 时不写入
|
||
sub_mode: 是否为子代理模式
|
||
sub_index: 子代理序号
|
||
output_base_dir: 子代理模式下的输出目录(与主 execution 同目录)
|
||
parent_exec_id: 子代理的父 execution ID
|
||
force: 强制重新提取(忽略已索引检查)
|
||
|
||
Returns:
|
||
输出目录路径,或 None(如果失败/已索引)
|
||
"""
|
||
exec_id = execution.get("executionId", "")
|
||
chat_id = execution.get("chatSessionId", "")
|
||
|
||
# 跳过已索引且文件仍存在的(子代理不检查,因为它们跟随主 execution)
|
||
# force 模式下跳过此检查(用于 --all 重新提取)
|
||
if not sub_mode and not force and exec_id in index.get("entries", {}):
|
||
existing_entry = index["entries"][exec_id]
|
||
# superseded/no_log 占位条目:永远跳过,不重试
|
||
if existing_entry.get("superseded_by") or existing_entry.get("no_log"):
|
||
return None
|
||
existing_dir = existing_entry.get("output_dir", "")
|
||
if existing_dir and os.path.isdir(existing_dir):
|
||
return None
|
||
# 文件已被清理,从索引中移除,继续提取
|
||
del index["entries"][exec_id]
|
||
|
||
# 加载 execution log
|
||
log_path = find_execution_log(agent_dir, hex_dir, execution)
|
||
if not log_path:
|
||
# Tombstone:日志文件不存在(已被 Kiro 清理),写入占位条目防止重复尝试
|
||
if not sub_mode and exec_id:
|
||
index.setdefault("entries", {})[exec_id] = {
|
||
"no_log": True,
|
||
"chatSessionId": chat_id,
|
||
"indexed_at": datetime.now(CST).isoformat(),
|
||
}
|
||
if full_index is not None:
|
||
full_index.setdefault("entries", {})[exec_id] = {
|
||
"no_log": True,
|
||
"chatSessionId": chat_id,
|
||
"indexed_at": datetime.now(CST).isoformat(),
|
||
}
|
||
return None
|
||
|
||
try:
|
||
with open(log_path, "rb") as f:
|
||
raw = f.read()
|
||
# 清洗 surrogate 字符(Kiro log 中 emoji 可能被存为 surrogate pair)
|
||
text = raw.decode("utf-8", errors="surrogatepass").encode("utf-8", errors="replace").decode("utf-8")
|
||
log = _sanitize_surrogates(json.loads(text))
|
||
except Exception:
|
||
return None
|
||
|
||
# 从完整 log 补充 chatSessionId(新版 manifest 条目中可能缺失)
|
||
if not chat_id:
|
||
chat_id = log.get("chatSessionId", "")
|
||
|
||
# 解析
|
||
messages = log.get("context", {}).get("messages", [])
|
||
actions = log.get("actions", [])
|
||
conversation = parse_messages(messages)
|
||
timeline = parse_actions(actions)
|
||
diffs = collect_diffs(agent_dir, hex_dir, execution)
|
||
|
||
# 会话信息(仅主 execution)
|
||
session_info = None
|
||
if not sub_mode and session_dir and chat_id:
|
||
session_info = load_session_detail(session_dir, chat_id)
|
||
|
||
# prompt_log 匹配(仅主 execution)
|
||
prompt_log = None
|
||
if not sub_mode:
|
||
start_time = log.get("startTime", 0)
|
||
prompt_log = find_matching_prompt_log(start_time)
|
||
|
||
# 取 execution 开始时间(用于目录和文件命名的时间后缀)
|
||
_start_ms = log.get("startTime") or execution.get("startTime", 0)
|
||
_ym, _dd, _hms = ts_date_parts(_start_ms)
|
||
|
||
# 确定输出目录
|
||
if sub_mode and output_base_dir:
|
||
out_dir = output_base_dir
|
||
else:
|
||
chat_short = chat_id[:8] if chat_id else hash8(exec_id)
|
||
day_dir = os.path.join(SESSION_LOG_DIR, _ym, _dd)
|
||
out_dir = _resolve_chat_dir(day_dir, chat_short, _hms)
|
||
|
||
os.makedirs(out_dir, exist_ok=True)
|
||
|
||
# 跨天指引
|
||
if not sub_mode:
|
||
chat_short = chat_id[:8] if chat_id else hash8(exec_id)
|
||
chat_day_dir = os.path.dirname(out_dir)
|
||
exec_day_dir = os.path.join(SESSION_LOG_DIR, _ym, _dd)
|
||
if os.path.normpath(chat_day_dir) != os.path.normpath(exec_day_dir):
|
||
_write_cross_day_ref(exec_day_dir, chat_short, out_dir)
|
||
|
||
# 先递归提取子代理,收集 sub_file_map(subExecutionId → 文件路径)
|
||
sub_file_map: dict[str, str] = {}
|
||
if not sub_mode:
|
||
sub_exec_ids = extract_sub_execution_ids(actions)
|
||
for si, sub_eid in enumerate(sub_exec_ids, 1):
|
||
sub_execs = find_all_executions(agent_dir, execution_id=sub_eid)
|
||
if sub_execs:
|
||
sub_exec = sub_execs[0]
|
||
extract_single_execution(
|
||
agent_dir=agent_dir,
|
||
hex_dir=sub_exec["_hex_dir"],
|
||
execution=sub_exec,
|
||
session_dir=session_dir,
|
||
index=index,
|
||
full_index=full_index,
|
||
sub_mode=True,
|
||
sub_index=si,
|
||
output_base_dir=out_dir,
|
||
parent_exec_id=exec_id,
|
||
force=force,
|
||
)
|
||
sub_filename = f"sub_{si:02d}_{sub_eid[:8]}.md"
|
||
sub_file_map[sub_eid] = os.path.join(out_dir, sub_filename)
|
||
|
||
# ── 覆盖模式:加载前几轮的 actions 和 summary ──
|
||
accumulated_actions_md = ""
|
||
prev_summary: dict = {}
|
||
execution_round = 1
|
||
prev_exec_ids: list[str] = [] # 同 chatSession 的旧 executionId 列表
|
||
|
||
if not sub_mode:
|
||
# 查找同 chatSession 的已有 main.md 文件
|
||
existing_main_path = ""
|
||
if os.path.isdir(out_dir):
|
||
for f in os.listdir(out_dir):
|
||
if f.startswith("main_") and f.endswith(".md"):
|
||
existing_main_path = os.path.join(out_dir, f)
|
||
break # 覆盖模式下只有一个 main 文件
|
||
|
||
if existing_main_path:
|
||
# 提取前几轮累积的 actions
|
||
accumulated_actions_md = _load_previous_actions_md(existing_main_path)
|
||
|
||
# 从索引中查找同 chatSession 的旧 entry,获取 prev_summary
|
||
if chat_id:
|
||
for eid, ent in list(index.get("entries", {}).items()):
|
||
if ent.get("chatSessionId") == chat_id and eid != exec_id and not ent.get("is_sub"):
|
||
# 跳过已被替代的条目(无 summary/output_dir)
|
||
if ent.get("superseded_by") or ent.get("no_log"):
|
||
prev_exec_ids.append(eid)
|
||
continue
|
||
prev_exec_ids.append(eid)
|
||
ent_summary = ent.get("summary", {})
|
||
# 保留旧 entry 的 description(LLM 生成的)
|
||
if ent.get("description"):
|
||
ent_summary["description"] = ent["description"]
|
||
prev_summary = _merge_summaries(prev_summary, ent_summary)
|
||
|
||
# 计算当前是第几轮
|
||
execution_round = len(prev_exec_ids) + 1
|
||
|
||
# 生成 Markdown + 结构化摘要
|
||
# 先计算本轮 summary,与前几轮合并后传给 generate_full_record 渲染
|
||
cur_summary = _build_execution_summary(log, conversation, timeline, sub_file_map if not sub_mode else None)
|
||
final_summary = _merge_summaries(prev_summary, cur_summary) if prev_summary else cur_summary
|
||
|
||
md, _cur_summary_unused, _display_unused = generate_full_record(
|
||
log=log,
|
||
conversation=conversation,
|
||
timeline=timeline,
|
||
diffs=diffs,
|
||
session_info=session_info,
|
||
prompt_log_path=prompt_log,
|
||
is_sub=sub_mode,
|
||
sub_index=sub_index,
|
||
sub_file_map=sub_file_map if not sub_mode else None,
|
||
accumulated_actions_md=accumulated_actions_md,
|
||
merged_summary=final_summary if prev_summary else None,
|
||
execution_round=execution_round,
|
||
)
|
||
|
||
# ── 写入文件(覆盖模式:删除旧 main,写入新 main)──
|
||
if sub_mode:
|
||
filename = f"sub_{sub_index:02d}_{exec_id[:8]}.md"
|
||
else:
|
||
# 覆盖模式:删除同目录下所有旧 main_*.md
|
||
if os.path.isdir(out_dir):
|
||
for f in os.listdir(out_dir):
|
||
if f.startswith("main_") and f.endswith(".md"):
|
||
try:
|
||
os.remove(os.path.join(out_dir, f))
|
||
except Exception:
|
||
pass
|
||
# 固定文件名:main_01_{最新execId前8位}.md
|
||
filename = f"main_01_{exec_id[:8]}.md"
|
||
|
||
filepath = os.path.join(out_dir, filename)
|
||
md_safe = md.encode("utf-8", errors="surrogateescape").decode("utf-8", errors="replace")
|
||
with open(filepath, "w", encoding="utf-8") as f:
|
||
f.write(md_safe)
|
||
|
||
# 更新索引(覆盖模式:移除旧 entry,写入新 entry)
|
||
_meta = {
|
||
"chatSessionId": chat_id,
|
||
"startTime": ts_fmt(log.get("startTime")),
|
||
"endTime": ts_fmt(log.get("endTime")),
|
||
"status": log.get("status", ""),
|
||
"workflowType": log.get("workflowType", ""),
|
||
}
|
||
if not sub_mode:
|
||
update_index(index, exec_id, out_dir, _meta,
|
||
summary=final_summary,
|
||
chat_session_id=chat_id,
|
||
prev_exec_ids=prev_exec_ids)
|
||
if full_index is not None:
|
||
update_index(full_index, exec_id, out_dir, _meta,
|
||
summary=final_summary,
|
||
chat_session_id=chat_id,
|
||
prev_exec_ids=prev_exec_ids)
|
||
else:
|
||
if full_index is not None:
|
||
update_index(full_index, exec_id, out_dir, _meta,
|
||
summary=cur_summary, is_sub=True,
|
||
parent_exec_id=parent_exec_id)
|
||
|
||
return out_dir
|
||
|
||
|
||
# ═══════════════════════════════════════════════════════════
|
||
# 入口函数
|
||
# ═══════════════════════════════════════════════════════════
|
||
|
||
def extract_latest(global_storage: Optional[str] = None, workspace_path: Optional[str] = None):
|
||
"""提取所有未索引的终态 execution + partial/补全逻辑(供 agent_on_stop 调用)。
|
||
|
||
覆盖模式:同一 chatSession 的 execution 按 startTime 升序提取,
|
||
后续轮次覆盖前一轮的 main.md(对话用最新超集,actions 追加)。
|
||
|
||
Partial/补全:
|
||
1. 检测当前 session 的 execution(可能还在 running),提取为 status="partial"
|
||
2. 下次调用时,找到 status="partial" 的 entry,重新提取覆盖
|
||
"""
|
||
gs = global_storage or DEFAULT_GLOBAL_STORAGE
|
||
ws = workspace_path or os.getcwd()
|
||
|
||
agent_dir = find_kiro_agent_dir(gs)
|
||
if not agent_dir:
|
||
return
|
||
|
||
session_dir = find_workspace_session_dir(agent_dir, ws)
|
||
chat_ids = None
|
||
if session_dir:
|
||
sessions = load_sessions_json(session_dir)
|
||
chat_ids = {s.get("chatSessionId") or s.get("sessionId") for s in sessions
|
||
if s.get("chatSessionId") or s.get("sessionId")}
|
||
|
||
TERMINAL_STATUSES = ("succeed", "failed", "stopped", "aborted")
|
||
|
||
all_execs = find_all_executions(agent_dir, chat_session_ids=chat_ids)
|
||
if not all_execs:
|
||
return
|
||
|
||
index = load_index()
|
||
full_index = load_full_index()
|
||
|
||
# ── 步骤 1:补全之前标记为 partial 的 entry ──
|
||
partial_eids = [
|
||
eid for eid, ent in index.get("entries", {}).items()
|
||
if ent.get("status") == "partial"
|
||
]
|
||
for p_eid in partial_eids:
|
||
# 在 all_execs 中找到对应的 execution
|
||
matched = [e for e in all_execs if e.get("executionId") == p_eid]
|
||
if matched:
|
||
p_exec = matched[0]
|
||
# 如果已变为终态,重新提取覆盖
|
||
if p_exec.get("status", "") in TERMINAL_STATUSES:
|
||
result = extract_single_execution(
|
||
agent_dir=agent_dir,
|
||
hex_dir=p_exec["_hex_dir"],
|
||
execution=p_exec,
|
||
session_dir=session_dir,
|
||
index=index,
|
||
full_index=full_index,
|
||
force=True, # 强制覆盖
|
||
)
|
||
if result:
|
||
# 逐条持久化,避免中途超时导致下次重复处理
|
||
save_index(index)
|
||
save_full_index(full_index)
|
||
print(f"[session-extract] completed partial: {result}")
|
||
|
||
# ── 步骤 2:提取未索引的终态 execution ──
|
||
# 同时检查 index(主对话)和 full_index(含子代理),避免重复提取子代理 execution
|
||
indexed_eids = set(index.get("entries", {}).keys())
|
||
if full_index:
|
||
indexed_eids |= set(full_index.get("entries", {}).keys())
|
||
ready = [e for e in all_execs
|
||
if e.get("executionId", "") not in indexed_eids
|
||
and e.get("status", "") in TERMINAL_STATUSES]
|
||
|
||
# ── 步骤 3:检测当前 session 的 running execution,提取为 partial ──
|
||
# 当前 hook 触发时,本 session 的 execution 通常还在 running
|
||
# 通过环境变量 KIRO_CHAT_SESSION_ID 识别(如果有的话)
|
||
current_chat_id = os.environ.get("KIRO_CHAT_SESSION_ID", "")
|
||
if current_chat_id:
|
||
running_execs = [
|
||
e for e in all_execs
|
||
if e.get("chatSessionId") == current_chat_id
|
||
and e.get("status") not in TERMINAL_STATUSES
|
||
and e.get("executionId", "") not in indexed_eids
|
||
]
|
||
for r_exec in running_execs:
|
||
# 标记为 partial 提取
|
||
r_exec["_is_partial"] = True
|
||
ready.append(r_exec)
|
||
|
||
if not ready:
|
||
return
|
||
|
||
# agent_on_stop 场景下限制单次处理量,避免处理数千个历史 execution
|
||
# 优先处理最新的(按 startTime 降序排列,ready 已经是降序的来自 find_all_executions)
|
||
MAX_PER_RUN = 50
|
||
if len(ready) > MAX_PER_RUN:
|
||
# 按 startTime 降序排序,取最新的 MAX_PER_RUN 个
|
||
ready.sort(key=lambda x: x.get("startTime", 0), reverse=True)
|
||
skipped = len(ready) - MAX_PER_RUN
|
||
ready = ready[:MAX_PER_RUN]
|
||
print(f"[session-extract] capped to {MAX_PER_RUN} most recent (skipped {skipped} older)")
|
||
|
||
# 按 chatSessionId 分组,组内按 startTime 升序
|
||
from collections import defaultdict
|
||
chat_groups: dict[str, list[dict]] = defaultdict(list)
|
||
for e in ready:
|
||
cid = e.get("chatSessionId", "") or "unknown"
|
||
chat_groups[cid].append(e)
|
||
for cid in chat_groups:
|
||
chat_groups[cid].sort(key=lambda x: x.get("startTime", 0))
|
||
|
||
import threading
|
||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||
|
||
lock = threading.Lock()
|
||
extracted_count = 0
|
||
tombstone_count = 0
|
||
|
||
def _extract_group(group_execs: list[dict]) -> tuple[list[str], dict, dict]:
|
||
"""串行提取同一 chatSession 的所有 execution,返回 (results, idx_entries, full_entries)。"""
|
||
local_index: dict = {"version": 2, "entries": {}}
|
||
local_full: dict = {"version": 2, "entries": {}}
|
||
results: list[str] = []
|
||
for execution in group_execs:
|
||
eid = execution.get("executionId", "")
|
||
try:
|
||
result = extract_single_execution(
|
||
agent_dir=agent_dir,
|
||
hex_dir=execution["_hex_dir"],
|
||
execution=execution,
|
||
session_dir=session_dir,
|
||
index=local_index,
|
||
full_index=local_full,
|
||
)
|
||
if result:
|
||
if execution.get("_is_partial"):
|
||
if eid in local_index.get("entries", {}):
|
||
local_index["entries"][eid]["status"] = "partial"
|
||
if eid in local_full.get("entries", {}):
|
||
local_full["entries"][eid]["status"] = "partial"
|
||
results.append(result)
|
||
except Exception as exc:
|
||
print(f"[session-extract] ✗ {eid[:8]}: {exc}")
|
||
return results, local_index.get("entries", {}), local_full.get("entries", {})
|
||
|
||
workers = min(4, len(chat_groups))
|
||
with ThreadPoolExecutor(max_workers=workers) as pool:
|
||
futures = {pool.submit(_extract_group, execs): cid
|
||
for cid, execs in chat_groups.items()}
|
||
for future in as_completed(futures):
|
||
results, idx_entries, full_entries = future.result()
|
||
with lock:
|
||
# 合并到主索引
|
||
index.setdefault("entries", {}).update(idx_entries)
|
||
if full_index is not None:
|
||
full_index.setdefault("entries", {}).update(full_entries)
|
||
extracted_count += len(results)
|
||
tombstone_count += sum(
|
||
1 for ent in idx_entries.values() if ent.get("no_log")
|
||
)
|
||
# 逐组持久化,避免中途超时导致下次重复处理
|
||
if idx_entries:
|
||
save_index(index)
|
||
save_full_index(full_index)
|
||
for r in results:
|
||
print(f"[session-extract] extracted: {r}")
|
||
|
||
if extracted_count > 1:
|
||
print(f"[session-extract] total: {extracted_count} executions")
|
||
if tombstone_count > 0:
|
||
print(f"[session-extract] tombstoned: {tombstone_count} (no log found)")
|
||
|
||
|
||
def extract_all_unindexed(
|
||
global_storage: Optional[str] = None,
|
||
workspace_path: Optional[str] = None,
|
||
limit: Optional[int] = None,
|
||
workers: int = 8,
|
||
rebuild: bool = False,
|
||
):
|
||
"""提取 execution(覆盖模式,多线程并行)。
|
||
|
||
rebuild=True 时清空索引和旧文件,全量重建(用于迁移到覆盖模式)。
|
||
rebuild=False 时只提取未索引的(增量模式)。
|
||
"""
|
||
gs = global_storage or DEFAULT_GLOBAL_STORAGE
|
||
ws = workspace_path or os.getcwd()
|
||
|
||
agent_dir = find_kiro_agent_dir(gs)
|
||
if not agent_dir:
|
||
print("[session-extract] kiro.kiroagent dir not found")
|
||
return
|
||
|
||
session_dir = find_workspace_session_dir(agent_dir, ws)
|
||
chat_ids = None
|
||
if session_dir:
|
||
sessions = load_sessions_json(session_dir)
|
||
chat_ids = {s.get("chatSessionId") or s.get("sessionId") for s in sessions
|
||
if s.get("chatSessionId") or s.get("sessionId")}
|
||
|
||
all_execs = find_all_executions(agent_dir, chat_session_ids=chat_ids)
|
||
if not all_execs:
|
||
print("[session-extract] no executions found")
|
||
return
|
||
|
||
TERMINAL_STATUSES = ("succeed", "failed", "stopped", "aborted")
|
||
|
||
if rebuild:
|
||
# 全量重建:清空索引,处理所有终态 execution
|
||
print("[session-extract] REBUILD mode: clearing indexes, reprocessing all")
|
||
index = {"version": 2, "entries": {}}
|
||
full_index = {"version": 2, "entries": {}}
|
||
# 清理旧的 main_*.md 文件(保留 sub_*.md 和其他文件)
|
||
# 旧文件会在 extract_single_execution 中被覆盖,无需预清理
|
||
todo = [e for e in all_execs
|
||
if e.get("status", "") in TERMINAL_STATUSES]
|
||
else:
|
||
index = load_index()
|
||
full_index = load_full_index()
|
||
todo = [e for e in all_execs
|
||
if e.get("executionId", "") not in index.get("entries", {})
|
||
and e.get("status", "") in TERMINAL_STATUSES]
|
||
|
||
if limit:
|
||
todo = todo[:limit]
|
||
if not todo:
|
||
print("[session-extract] all indexed, nothing to do")
|
||
return
|
||
|
||
print(f"[session-extract] {len(todo)} executions to extract (workers={workers})")
|
||
|
||
import threading
|
||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||
|
||
# 按 chatSessionId 分组,同组内按 startTime 排序串行提取
|
||
# 覆盖模式下同组串行确保 actions 按时间顺序追加
|
||
from collections import defaultdict
|
||
chat_groups: dict[str, list[dict]] = defaultdict(list)
|
||
for e in todo:
|
||
cid = e.get("chatSessionId", "") or "unknown"
|
||
chat_groups[cid].append(e)
|
||
for cid in chat_groups:
|
||
chat_groups[cid].sort(key=lambda x: x.get("startTime", 0))
|
||
|
||
lock = threading.Lock()
|
||
count = 0
|
||
|
||
def _extract_group(group_execs):
|
||
"""串行提取同一 chatSession 的所有 execution(覆盖模式)"""
|
||
local_index = {"version": 2, "entries": {}}
|
||
local_full = {"version": 2, "entries": {}}
|
||
results = []
|
||
for execution in group_execs:
|
||
try:
|
||
result = extract_single_execution(
|
||
agent_dir=agent_dir,
|
||
hex_dir=execution["_hex_dir"],
|
||
execution=execution,
|
||
session_dir=session_dir,
|
||
index=local_index,
|
||
full_index=local_full,
|
||
force=rebuild,
|
||
)
|
||
if result:
|
||
results.append(result)
|
||
except Exception as e:
|
||
eid = execution.get("executionId", "?")[:8]
|
||
print(f"[session-extract] ✗ {eid}: {e}")
|
||
return results, local_index.get("entries", {}), local_full.get("entries", {})
|
||
|
||
with ThreadPoolExecutor(max_workers=workers) as pool:
|
||
futures = {pool.submit(_extract_group, execs): cid
|
||
for cid, execs in chat_groups.items()}
|
||
for future in as_completed(futures):
|
||
results, idx_entries, full_entries = future.result()
|
||
if results:
|
||
with lock:
|
||
count += len(results)
|
||
index["entries"].update(idx_entries)
|
||
full_index["entries"].update(full_entries)
|
||
if count % 50 == 0:
|
||
save_index(index)
|
||
save_full_index(full_index)
|
||
print(f"[session-extract] [{count}/{len(todo)}] checkpoint saved")
|
||
elif count % 10 == 0:
|
||
print(f"[session-extract] [{count}/{len(todo)}]")
|
||
|
||
# 后处理去重:多线程合并可能产生同 chatSessionId 的多条 entry
|
||
# (同组串行保证组内不重复,但 double-submit 等场景仍可能引入)
|
||
def _dedup_index_entries(idx: dict) -> int:
|
||
"""按 chatSessionId 去重,保留 startTime 最晚的 entry,合并 summary。返回去除条数。"""
|
||
entries = idx.get("entries", {})
|
||
chat_groups_dedup: dict[str, list[tuple[str, dict]]] = defaultdict(list)
|
||
keep: dict[str, dict] = {}
|
||
for eid, ent in entries.items():
|
||
if ent.get("is_sub"):
|
||
keep[eid] = ent
|
||
continue
|
||
cid = ent.get("chatSessionId", "")
|
||
if not cid:
|
||
keep[eid] = ent
|
||
continue
|
||
chat_groups_dedup[cid].append((eid, ent))
|
||
removed = 0
|
||
for cid, group in chat_groups_dedup.items():
|
||
if len(group) == 1:
|
||
keep[group[0][0]] = group[0][1]
|
||
continue
|
||
group.sort(key=lambda x: x[1].get("startTime", ""))
|
||
merged_summary: dict = {}
|
||
for eid, ent in group:
|
||
merged_summary = _merge_summaries(merged_summary, ent.get("summary", {}))
|
||
last_eid, last_ent = group[-1]
|
||
last_ent["summary"] = merged_summary
|
||
for eid, ent in reversed(group):
|
||
if ent.get("description"):
|
||
last_ent["description"] = ent["description"]
|
||
break
|
||
keep[last_eid] = last_ent
|
||
removed += len(group) - 1
|
||
idx["entries"] = keep
|
||
return removed
|
||
|
||
removed_main = _dedup_index_entries(index)
|
||
removed_full = _dedup_index_entries(full_index)
|
||
if removed_main or removed_full:
|
||
print(f"[session-extract] dedup: removed {removed_main} main / {removed_full} full duplicates")
|
||
|
||
# 最终保存
|
||
save_index(index)
|
||
save_full_index(full_index)
|
||
print(f"[session-extract] done, extracted {count}/{len(todo)}, final entries: {len(index.get('entries', {}))}")
|
||
|
||
|
||
def extract_by_id(
|
||
execution_id: str,
|
||
global_storage: Optional[str] = None,
|
||
):
|
||
"""提取指定 executionId 的 execution"""
|
||
gs = global_storage or DEFAULT_GLOBAL_STORAGE
|
||
agent_dir = find_kiro_agent_dir(gs)
|
||
if not agent_dir:
|
||
print("[session-extract] kiro.kiroagent dir not found")
|
||
return
|
||
|
||
execs = find_all_executions(agent_dir, execution_id=execution_id)
|
||
if not execs:
|
||
print(f"[session-extract] execution not found: {execution_id}")
|
||
return
|
||
# 验证确实匹配到了目标 execution(前缀匹配)
|
||
matched = execs[0]
|
||
if not matched.get("executionId", "").startswith(execution_id):
|
||
print(f"[session-extract] execution not found: {execution_id}")
|
||
return
|
||
|
||
index = load_index()
|
||
full_index = load_full_index()
|
||
result = extract_single_execution(
|
||
agent_dir=agent_dir,
|
||
hex_dir=execs[0]["_hex_dir"],
|
||
execution=execs[0],
|
||
session_dir=None,
|
||
index=index,
|
||
full_index=full_index,
|
||
force=True, # 指定 ID 时强制提取
|
||
)
|
||
if result:
|
||
save_index(index)
|
||
save_full_index(full_index)
|
||
print(f"[session-extract] extracted: {result}")
|
||
|
||
|
||
# ═══════════════════════════════════════════════════════════
|
||
# CLI 入口
|
||
# ═══════════════════════════════════════════════════════════
|
||
|
||
def main():
|
||
import argparse
|
||
parser = argparse.ArgumentParser(description="Kiro 执行日志全量提取器 v3(覆盖模式)")
|
||
parser.add_argument("--all", action="store_true", help="提取所有未索引的 execution")
|
||
parser.add_argument("--rebuild", action="store_true", help="全量重建:清空索引,重新提取所有 execution(覆盖模式迁移用)")
|
||
parser.add_argument("--recent", type=int, metavar="N", help="提取最近 N 条未索引的")
|
||
parser.add_argument("--workers", type=int, default=8, help="并行线程数(默认 8)")
|
||
parser.add_argument("--execution-id", type=str, help="提取指定 executionId")
|
||
parser.add_argument("--global-storage", type=str, help="globalStorage 路径")
|
||
parser.add_argument("--workspace", type=str, help="workspace 路径")
|
||
args = parser.parse_args()
|
||
|
||
gs = args.global_storage
|
||
ws = args.workspace
|
||
|
||
if args.execution_id:
|
||
extract_by_id(args.execution_id, global_storage=gs)
|
||
elif args.rebuild:
|
||
extract_all_unindexed(global_storage=gs, workspace_path=ws, workers=args.workers, rebuild=True)
|
||
elif args.all:
|
||
extract_all_unindexed(global_storage=gs, workspace_path=ws, workers=args.workers)
|
||
elif args.recent:
|
||
extract_all_unindexed(global_storage=gs, workspace_path=ws, limit=args.recent, workers=args.workers)
|
||
else:
|
||
extract_latest(global_storage=gs, workspace_path=ws)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|