#!/usr/bin/env python3 """prompt_on_submit — promptSubmit 合并 hook 脚本(v2:文件基线模式)。 合并原 audit_flagger + prompt_audit_log 的功能: 1. 扫描工作区文件 → 保存基线快照 → .kiro/state/.file_baseline.json 2. 基于基线文件列表做风险判定 → .kiro/state/.audit_state.json 3. 记录 prompt 日志 → docs/audit/prompt_logs/ 变更检测不再依赖 git status(解决不常 commit 导致的误判问题)。 风险判定仍基于 git status(因为需要知道哪些文件相对于 commit 有变化)。 所有功能块用 try/except 隔离,单个失败不影响其他。 """ import hashlib import json import os import re import subprocess import sys from datetime import datetime, timezone, timedelta # 同目录导入文件基线模块 + cwd 校验 sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) from file_baseline import scan_workspace, save_baseline from _ensure_root import ensure_repo_root TZ_TAIPEI = timezone(timedelta(hours=8)) # ── 风险规则(来自 audit_flagger) ── RISK_RULES = [ (re.compile(r"^apps/etl/connectors/feiqiu/(api|cli|config|database|loaders|models|orchestration|scd|tasks|utils|quality)/"), "etl"), (re.compile(r"^apps/backend/app/"), "backend"), (re.compile(r"^apps/admin-web/src/"), "admin-web"), (re.compile(r"^apps/miniprogram/(miniapp|miniprogram)/"), "miniprogram"), (re.compile(r"^packages/shared/"), "shared"), (re.compile(r"^db/"), "db"), ] NOISE_PATTERNS = [ re.compile(r"^docs/audit/"), re.compile(r"^\.kiro/"), re.compile(r"^tmp/"), re.compile(r"^\.hypothesis/"), ] DB_PATTERNS = [ re.compile(r"^db/"), re.compile(r"/migrations/"), re.compile(r"\.sql$"), re.compile(r"\.prisma$"), ] STATE_PATH = os.path.join(".kiro", "state", ".audit_state.json") PROMPT_ID_PATH = os.path.join(".kiro", "state", ".last_prompt_id.json") def now_taipei(): return datetime.now(TZ_TAIPEI) def sha1hex(s: str) -> str: return hashlib.sha1(s.encode("utf-8")).hexdigest() def get_git_changed_files() -> list[str]: """通过 git status 获取变更文件(仅用于风险判定,不用于变更检测)""" try: r = subprocess.run( ["git", "status", "--porcelain"], capture_output=True, text=True, encoding="utf-8", errors="replace", timeout=10 ) if r.returncode != 0: return [] except Exception: return [] files = [] for line in r.stdout.splitlines(): if len(line) < 4: continue path = line[3:].strip() if " -> " in path: path = path.split(" -> ")[-1] path = path.strip().strip('"').replace("\\", "/") if path: files.append(path) return files def is_noise(f: str) -> bool: return any(p.search(f) for p in NOISE_PATTERNS) def safe_read_json(path): if not os.path.isfile(path): return {} try: with open(path, "r", encoding="utf-8") as f: return json.load(f) except Exception: return {} def write_json(path, data): os.makedirs(os.path.dirname(path) or os.path.join(".kiro", "state"), exist_ok=True) with open(path, "w", encoding="utf-8") as f: json.dump(data, f, indent=2, ensure_ascii=False) # ── 功能块 1:风险标记(基于 git status,判定哪些文件需要审计) ── def do_audit_flag(git_files, now): files = sorted(set(f for f in git_files if not is_noise(f))) if not files: write_json(STATE_PATH, { "audit_required": False, "db_docs_required": False, "reasons": [], "changed_files": [], "change_fingerprint": "", "marked_at": now.isoformat(), "last_reminded_at": None, }) return reasons = [] audit_required = False db_docs_required = False for f in files: for pattern, label in RISK_RULES: if pattern.search(f): audit_required = True tag = f"dir:{label}" if tag not in reasons: reasons.append(tag) if "/" not in f: audit_required = True if "root-file" not in reasons: reasons.append("root-file") if any(p.search(f) for p in DB_PATTERNS): db_docs_required = True if "db-schema-change" not in reasons: reasons.append("db-schema-change") fp = sha1hex("\n".join(files)) # 保留已有 last_reminded_at last_reminded = None existing = safe_read_json(STATE_PATH) if existing.get("change_fingerprint") == fp: last_reminded = existing.get("last_reminded_at") write_json(STATE_PATH, { "audit_required": audit_required, "db_docs_required": db_docs_required, "reasons": reasons, "changed_files": files[:50], "change_fingerprint": fp, "marked_at": now.isoformat(), "last_reminded_at": last_reminded, }) # ── 功能块 2:Prompt 日志 ── def do_prompt_log(now): prompt_id = f"P{now.strftime('%Y%m%d-%H%M%S')}" prompt_raw = os.environ.get("USER_PROMPT", "") if len(prompt_raw) > 20000: prompt_raw = prompt_raw[:5000] + "\n[TRUNCATED: prompt too long]" summary = " ".join(prompt_raw.split()).strip() if len(summary) > 120: summary = summary[:120] + "…" if not summary: summary = "(empty prompt)" log_dir = os.path.join("docs", "audit", "prompt_logs") os.makedirs(log_dir, exist_ok=True) filename = f"prompt_log_{now.strftime('%Y%m%d_%H%M%S')}.md" entry = f"""- [{prompt_id}] {now.strftime('%Y-%m-%d %H:%M:%S %z')} - summary: {summary} - prompt: ```text {prompt_raw} ``` """ with open(os.path.join(log_dir, filename), "w", encoding="utf-8") as f: f.write(entry) write_json(PROMPT_ID_PATH, {"prompt_id": prompt_id, "at": now.isoformat()}) # ── 功能块 3:文件基线快照(替代 git snapshot) ── def do_file_baseline(): """扫描工作区文件 mtime+size,保存为基线快照。 agentStop 时再扫一次对比,即可精确检测本次对话期间的变更。 """ baseline = scan_workspace(".") save_baseline(baseline) def main(): ensure_repo_root() now = now_taipei() # 功能块 3:文件基线快照(最先执行,记录对话开始时的文件状态) try: do_file_baseline() except Exception: pass # 功能块 1:风险标记(仍用 git status,因为需要知道未提交的变更) try: git_files = get_git_changed_files() do_audit_flag(git_files, now) except Exception: pass # 功能块 2:Prompt 日志 try: do_prompt_log(now) except Exception: pass if __name__ == "__main__": try: main() except Exception: pass