Neo-ZQYY/.kiro/scripts/agent_on_stop.py

#!/usr/bin/env python3
"""agent_on_stop — agentStop 合并 hook 脚本。

合并原 audit_reminder + session_log + change_compliance_prescan + build_audit_context：
1. 检测变更（对比 promptSubmit 快照，识别非 Kiro 变更）
2. 若无任何文件变更 → 跳过所有审查，静默退出
3. 记录 session log → docs/audit/session_logs/
4. 合规预扫描 → .kiro/.compliance_state.json
5. 构建审计上下文 → .kiro/.audit_context.json
6. 审计提醒（15 分钟限频）→ stderr

所有功能块用 try/except 隔离，单个失败不影响其他。
"""

import hashlib
import json
import os
import re
import subprocess
import sys
from datetime import datetime, timezone, timedelta

TZ_TAIPEI = timezone(timedelta(hours=8))
MIN_INTERVAL = timedelta(minutes=15)

# 路径常量
STATE_PATH = os.path.join(".kiro", ".audit_state.json")
SNAPSHOT_PATH = os.path.join(".kiro", ".git_snapshot.json")
COMPLIANCE_PATH = os.path.join(".kiro", ".compliance_state.json")
CONTEXT_PATH = os.path.join(".kiro", ".audit_context.json")
PROMPT_ID_PATH = os.path.join(".kiro", ".last_prompt_id.json")
SESSION_LOG_DIR = os.path.join("docs", "audit", "session_logs")

# 噪声路径
NOISE_PATTERNS = [
    re.compile(r"^docs/audit/"),
    re.compile(r"^\.kiro/"),
    re.compile(r"^\.hypothesis/"),
    re.compile(r"^tmp/"),
    re.compile(r"\.png$"),
    re.compile(r"\.jpg$"),
]

# 高风险路径
HIGH_RISK_PATTERNS = [
    re.compile(r"^apps/etl/connectors/feiqiu/(api|cli|config|database|loaders|models|orchestration|scd|tasks|utils|quality)/"),
    re.compile(r"^apps/backend/app/"),
    re.compile(r"^apps/admin-web/src/"),
    re.compile(r"^apps/miniprogram/"),
    re.compile(r"^packages/shared/"),
    re.compile(r"^db/"),
]

# 文档映射（合规检查用）
DOC_MAP = {
    "apps/backend/app/routers/": ["apps/backend/docs/API-REFERENCE.md"],
    "apps/backend/app/services/": ["apps/backend/docs/API-REFERENCE.md", "apps/backend/README.md"],
    "apps/backend/app/auth/": ["apps/backend/docs/API-REFERENCE.md", "apps/backend/README.md"],
    "apps/etl/connectors/feiqiu/tasks/": ["apps/etl/connectors/feiqiu/docs/etl_tasks/"],
    "apps/etl/connectors/feiqiu/loaders/": ["apps/etl/connectors/feiqiu/docs/etl_tasks/"],
    "apps/etl/connectors/feiqiu/scd/": ["apps/etl/connectors/feiqiu/docs/business-rules/scd2_rules.md"],
    "apps/etl/connectors/feiqiu/orchestration/": ["apps/etl/connectors/feiqiu/docs/architecture/"],
    "apps/admin-web/src/": ["apps/admin-web/README.md"],
    "apps/miniprogram/": ["apps/miniprogram/README.md"],
    "packages/shared/": ["packages/shared/README.md"],
}

MIGRATION_PATTERNS = [
    re.compile(r"^db/etl_feiqiu/migrations/.*\.sql$"),
    re.compile(r"^db/zqyy_app/migrations/.*\.sql$"),
    re.compile(r"^db/fdw/.*\.sql$"),
]

BD_MANUAL_PATTERN = re.compile(r"^docs/database/BD_Manual_.*\.md$")
DDL_BASELINE_DIR = "docs/database/ddl/"
AUDIT_CHANGES_DIR = "docs/audit/changes/"


def now_taipei():
    return datetime.now(TZ_TAIPEI)


def sha1hex(s: str) -> str:
    return hashlib.sha1(s.encode("utf-8")).hexdigest()


def is_noise(f: str) -> bool:
    return any(p.search(f) for p in NOISE_PATTERNS)


def safe_read_json(path):
    if not os.path.isfile(path):
        return {}
    try:
        with open(path, "r", encoding="utf-8") as f:
            return json.load(f)
    except Exception:
        return {}


def write_json(path, data):
    os.makedirs(os.path.dirname(path) or ".kiro", exist_ok=True)
    with open(path, "w", encoding="utf-8") as f:
        json.dump(data, f, indent=2, ensure_ascii=False)


def get_changed_files() -> list[str]:
    try:
        r = subprocess.run(
            ["git", "status", "--porcelain"],
            capture_output=True, text=True, encoding="utf-8", errors="replace", timeout=10
        )
        if r.returncode != 0:
            return []
    except Exception:
        return []
    files = []
    for line in r.stdout.splitlines():
        if len(line) < 4:
            continue
        path = line[3:].strip()
        if " -> " in path:
            path = path.split(" -> ")[-1]
        path = path.strip().strip('"').replace("\\", "/")
        if path:
            files.append(path)
    return sorted(set(files))


def git_diff_stat():
    try:
        r = subprocess.run(
            ["git", "diff", "--stat", "HEAD"],
            capture_output=True, text=True, encoding="utf-8", errors="replace", timeout=15
        )
        return r.stdout.strip() if r.returncode == 0 else ""
    except Exception:
        return ""


def git_diff_files(files, max_total=30000):
    if not files:
        return ""
    all_diff = []
    total_len = 0
    for f in files:
        if total_len >= max_total:
            all_diff.append(f"\n[TRUNCATED: diff exceeds {max_total // 1000}KB]")
            break
        try:
            r = subprocess.run(
                ["git", "diff", "HEAD", "--", f],
                capture_output=True, text=True, encoding="utf-8", errors="replace", timeout=10
            )
            if r.returncode == 0 and r.stdout.strip():
                chunk = r.stdout.strip()
                if len(chunk) > 5000:
                    chunk = chunk[:5000] + f"\n[TRUNCATED: {f} diff too long]"
                all_diff.append(chunk)
                total_len += len(chunk)
        except Exception:
            continue
    return "\n".join(all_diff)


def get_latest_prompt_log():
    log_dir = os.path.join("docs", "audit", "prompt_logs")
    if not os.path.isdir(log_dir):
        return ""
    try:
        files = sorted(
            [f for f in os.listdir(log_dir) if f.startswith("prompt_log_")],
            reverse=True
        )
        if not files:
            return ""
        with open(os.path.join(log_dir, files[0]), "r", encoding="utf-8") as f:
            content = f.read()
        return content[:3000] + "\n[TRUNCATED]" if len(content) > 3000 else content
    except Exception:
        return ""


# ── 步骤 1：检测变更，识别非 Kiro 变更 ──
def detect_changes(current_files):
    """对比 promptSubmit 快照，返回 (real_files, external_files, no_change)"""
    snapshot = safe_read_json(SNAPSHOT_PATH)
    snapshot_files = set(snapshot.get("files", []))
    current_set = set(current_files)

    # 排除噪声后的真实变更
    real_files = sorted(f for f in current_files if not is_noise(f))

    if not real_files:
        return [], [], True

    # 检测非 Kiro 变更：在 agentStop 时出现但 promptSubmit 快照中没有的文件
    # 这些是对话期间由外部操作（CLI、脚本等）产生的变更
    new_since_submit = current_set - snapshot_files
    external_files = sorted(f for f in new_since_submit if not is_noise(f))

    return real_files, external_files, False


# ── 步骤 2：Session Log ──
def do_session_log(now, changed_files, external_files):
    agent_output = os.environ.get("AGENT_OUTPUT", "")
    user_prompt = os.environ.get("USER_PROMPT", "")
    prompt_info = safe_read_json(PROMPT_ID_PATH)
    audit_state = safe_read_json(STATE_PATH)
    prompt_id = prompt_info.get("prompt_id", "unknown")

    max_len = 50000
    if len(agent_output) > max_len:
        agent_output = agent_output[:max_len] + "\n\n[TRUNCATED: output exceeds 50KB]"
    if len(user_prompt) > 10000:
        user_prompt = user_prompt[:10000] + "\n\n[TRUNCATED: prompt exceeds 10KB]"

    diff_stat = git_diff_stat()
    git_status = ""
    try:
        r = subprocess.run(
            ["git", "status", "--short"],
            capture_output=True, text=True, encoding="utf-8", errors="replace", timeout=10
        )
        git_status = r.stdout.strip() if r.returncode == 0 else ""
    except Exception:
        pass

    os.makedirs(SESSION_LOG_DIR, exist_ok=True)
    filename = f"session_{now.strftime('%Y%m%d_%H%M%S')}.md"

    # 外部变更标记
    external_section = ""
    if external_files:
        ext_list = "\n".join(external_files[:30])
        external_section = f"""
## External Changes (non-Kiro, {len(external_files)} files)

以下文件在本次对话期间由外部操作（CLI/脚本/手动编辑）产生：

```
{ext_list}
```
"""

    content = f"""# Session Log — {now.strftime('%Y-%m-%d %H:%M:%S %z')}

- Prompt-ID: `{prompt_id}`
- Audit Required: `{audit_state.get('audit_required', 'N/A')}`
- Reasons: {', '.join(audit_state.get('reasons', [])) or 'none'}
- External Changes: {len(external_files)} files

## User Input

```text
{user_prompt or '(not captured)'}
```

## Agent Output

```text
{agent_output or '(not captured)'}
```

## Changed Files ({len(changed_files)})

```
{chr(10).join(changed_files[:80]) if changed_files else '(none)'}
```
{external_section}
## Git Diff Stat

```
{diff_stat}
```

## Git Status

```
{git_status or '(clean)'}
```
"""
    with open(os.path.join(SESSION_LOG_DIR, filename), "w", encoding="utf-8") as f:
        f.write(content)


# ── 步骤 3：合规预扫描 ──
def do_compliance_prescan(all_files):
    result = {
        "new_migration_sql": [],
        "new_or_modified_sql": [],
        "code_without_docs": [],
        "new_files": [],
        "has_bd_manual": False,
        "has_audit_record": False,
        "has_ddl_baseline": False,
    }

    code_files = []
    doc_files = set()

    for f in all_files:
        if is_noise(f):
            continue
        for mp in MIGRATION_PATTERNS:
            if mp.search(f):
                result["new_migration_sql"].append(f)
                break
        if f.endswith(".sql"):
            result["new_or_modified_sql"].append(f)
        if BD_MANUAL_PATTERN.search(f):
            result["has_bd_manual"] = True
        if f.startswith(AUDIT_CHANGES_DIR):
            result["has_audit_record"] = True
        if f.startswith(DDL_BASELINE_DIR):
            result["has_ddl_baseline"] = True
        if f.endswith(".md") or "/docs/" in f:
            doc_files.add(f)
        if f.endswith((".py", ".ts", ".tsx", ".js", ".jsx")):
            code_files.append(f)

    for cf in code_files:
        expected_docs = []
        for prefix, docs in DOC_MAP.items():
            if cf.startswith(prefix):
                expected_docs.extend(docs)
        if expected_docs:
            has_doc = False
            for ed in expected_docs:
                if ed in doc_files:
                    has_doc = True
                    break
                if ed.endswith("/") and any(d.startswith(ed) for d in doc_files):
                    has_doc = True
                    break
            if not has_doc:
                result["code_without_docs"].append({
                    "file": cf,
                    "expected_docs": expected_docs,
                })

    needs_check = bool(
        result["new_migration_sql"]
        or result["code_without_docs"]
    )

    now = now_taipei()
    write_json(COMPLIANCE_PATH, {
        "needs_check": needs_check,
        "scanned_at": now.isoformat(),
        **result,
    })
    return result


# ── 步骤 4：构建审计上下文 ──
def do_build_audit_context(all_files, external_files, compliance):
    now = now_taipei()
    audit_state = safe_read_json(STATE_PATH)
    prompt_info = safe_read_json(PROMPT_ID_PATH)

    changed_files = audit_state.get("changed_files", all_files[:50])
    high_risk_files = [
        f for f in changed_files
        if any(p.search(f) for p in HIGH_RISK_PATTERNS)
    ]

    diff_stat = git_diff_stat()
    high_risk_diff = git_diff_files(high_risk_files)
    prompt_log = get_latest_prompt_log()

    context = {
        "built_at": now.isoformat(),
        "prompt_id": prompt_info.get("prompt_id", "unknown"),
        "prompt_at": prompt_info.get("at", ""),
        "audit_required": audit_state.get("audit_required", False),
        "db_docs_required": audit_state.get("db_docs_required", False),
        "reasons": audit_state.get("reasons", []),
        "changed_files": changed_files,
        "high_risk_files": high_risk_files,
        "external_files": external_files,
        "compliance": {
            "code_without_docs": compliance.get("code_without_docs", []),
            "new_migration_sql": compliance.get("new_migration_sql", []),
            "has_bd_manual": compliance.get("has_bd_manual", False),
            "has_audit_record": compliance.get("has_audit_record", False),
            "has_ddl_baseline": compliance.get("has_ddl_baseline", False),
        },
        "diff_stat": diff_stat,
        "high_risk_diff": high_risk_diff,
        "latest_prompt_log": prompt_log,
    }

    write_json(CONTEXT_PATH, context)


# ── 步骤 5：审计提醒（15 分钟限频） ──
def do_audit_reminder(real_files):
    state = safe_read_json(STATE_PATH)
    if not state.get("audit_required"):
        return

    # 工作树干净时清除
    if not real_files:
        state["audit_required"] = False
        state["reasons"] = []
        state["changed_files"] = []
        state["last_reminded_at"] = None
        write_json(STATE_PATH, state)
        return

    now = now_taipei()
    last_str = state.get("last_reminded_at")
    if last_str:
        try:
            last = datetime.fromisoformat(last_str)
            if (now - last) < MIN_INTERVAL:
                return
        except Exception:
            pass

    state["last_reminded_at"] = now.isoformat()
    write_json(STATE_PATH, state)

    reasons = state.get("reasons", [])
    reason_text = ", ".join(reasons) if reasons else "high-risk paths changed"
    ext_note = ""
    # 从 context 读取外部变更数量
    ctx = safe_read_json(CONTEXT_PATH)
    ext_count = len(ctx.get("external_files", []))
    if ext_count:
        ext_note = f" (includes {ext_count} external/non-Kiro changes)"

    sys.stderr.write(
        f"[AUDIT REMINDER] Pending audit ({reason_text}){ext_note}. "
        f"Run /audit to sync. (15min rate limit)\n"
    )
    sys.exit(1)


def main():
    # 非 git 仓库直接退出
    try:
        r = subprocess.run(
            ["git", "rev-parse", "--is-inside-work-tree"],
            capture_output=True, text=True, encoding="utf-8", errors="replace", timeout=5
        )
        if r.returncode != 0:
            return
    except Exception:
        return

    now = now_taipei()
    current_files = get_changed_files()

    # 步骤 1：检测变更
    real_files, external_files, no_change = detect_changes(current_files)

    # 无任何文件变更 → 跳过所有审查
    if no_change:
        return

    # 步骤 2：Session Log（始终记录，包括外部变更）
    try:
        do_session_log(now, real_files, external_files)
    except Exception:
        pass

    # 步骤 3：合规预扫描
    compliance = {}
    try:
        compliance = do_compliance_prescan(current_files)
    except Exception:
        pass

    # 步骤 4：构建审计上下文（预备 /audit 使用）
    try:
        do_build_audit_context(current_files, external_files, compliance)
    except Exception:
        pass

    # 步骤 5：审计提醒（最后执行，可能 sys.exit(1)）
    try:
        do_audit_reminder(real_files)
    except SystemExit:
        raise
    except Exception:
        pass


if __name__ == "__main__":
    try:
        main()
    except SystemExit as e:
        sys.exit(e.code)
    except Exception:
        pass