feat: P1-P3 全栈集成 — 数据库基础 + DWS 扩展 + 小程序鉴权 + 工程化体系

## P1 数据库基础
- zqyy_app: 创建 auth/biz schema、FDW 连接 etl_feiqiu
- etl_feiqiu: 创建 app schema RLS 视图、商品库存预警表
- 清理 assistant_abolish 残留数据

## P2 ETL/DWS 扩展
- 新增 DWS 助教订单贡献度表 (dws.assistant_order_contribution)
- 新增 assistant_order_contribution_task 任务及 RLS 视图
- member_consumption 增加充值字段、assistant_daily 增加处罚字段
- 更新 ODS/DWD/DWS 任务文档及业务规则文档
- 更新 consistency_checker、flow_runner、task_registry 等核心模块

## P3 小程序鉴权系统
- 新增 xcx_auth 路由/schema(微信登录 + JWT)
- 新增 wechat/role/matching/application 服务层
- zqyy_app 鉴权表迁移 + 角色权限种子数据
- auth/dependencies.py 支持小程序 JWT 鉴权

## 文档与审计
- 新增 DOCUMENTATION-MAP 文档导航
- 新增 7 份 BD_Manual 数据库变更文档
- 更新 DDL 基线快照(etl_feiqiu 6 schema + zqyy_app auth)
- 新增全栈集成审计记录、部署检查清单更新
- 新增 BACKLOG 路线图、FDW→Core 迁移计划

## Kiro 工程化
- 新增 5 个 Spec(P1/P2/P3/全栈集成/核心业务)
- 新增审计自动化脚本(agent_on_stop/build_audit_context/compliance_prescan)
- 新增 6 个 Hook(合规检查/会话日志/提交审计等)
- 新增 doc-map steering 文件

## 运维与测试
- 新增 ops 脚本:迁移验证/API 健康检查/ETL 监控/集成报告
- 新增属性测试:test_dws_contribution / test_auth_system
- 清理过期 export 报告文件
- 更新 .gitignore 排除规则
This commit is contained in:
Neo
2026-02-26 08:03:53 +08:00
parent fafc95e64c
commit b25308c3f4
224 changed files with 17660 additions and 32198 deletions

View File

@@ -0,0 +1,498 @@
#!/usr/bin/env python3
"""agent_on_stop — agentStop 合并 hook 脚本。
合并原 audit_reminder + session_log + change_compliance_prescan + build_audit_context
1. 检测变更(对比 promptSubmit 快照,识别非 Kiro 变更)
2. 若无任何文件变更 → 跳过所有审查,静默退出
3. 记录 session log → docs/audit/session_logs/
4. 合规预扫描 → .kiro/.compliance_state.json
5. 构建审计上下文 → .kiro/.audit_context.json
6. 审计提醒15 分钟限频)→ stderr
所有功能块用 try/except 隔离,单个失败不影响其他。
"""
import hashlib
import json
import os
import re
import subprocess
import sys
from datetime import datetime, timezone, timedelta
TZ_TAIPEI = timezone(timedelta(hours=8))
MIN_INTERVAL = timedelta(minutes=15)
# 路径常量
STATE_PATH = os.path.join(".kiro", ".audit_state.json")
SNAPSHOT_PATH = os.path.join(".kiro", ".git_snapshot.json")
COMPLIANCE_PATH = os.path.join(".kiro", ".compliance_state.json")
CONTEXT_PATH = os.path.join(".kiro", ".audit_context.json")
PROMPT_ID_PATH = os.path.join(".kiro", ".last_prompt_id.json")
SESSION_LOG_DIR = os.path.join("docs", "audit", "session_logs")
# 噪声路径
NOISE_PATTERNS = [
re.compile(r"^docs/audit/"),
re.compile(r"^\.kiro/"),
re.compile(r"^\.hypothesis/"),
re.compile(r"^tmp/"),
re.compile(r"\.png$"),
re.compile(r"\.jpg$"),
]
# 高风险路径
HIGH_RISK_PATTERNS = [
re.compile(r"^apps/etl/connectors/feiqiu/(api|cli|config|database|loaders|models|orchestration|scd|tasks|utils|quality)/"),
re.compile(r"^apps/backend/app/"),
re.compile(r"^apps/admin-web/src/"),
re.compile(r"^apps/miniprogram/"),
re.compile(r"^packages/shared/"),
re.compile(r"^db/"),
]
# 文档映射(合规检查用)
DOC_MAP = {
"apps/backend/app/routers/": ["apps/backend/docs/API-REFERENCE.md"],
"apps/backend/app/services/": ["apps/backend/docs/API-REFERENCE.md", "apps/backend/README.md"],
"apps/backend/app/auth/": ["apps/backend/docs/API-REFERENCE.md", "apps/backend/README.md"],
"apps/etl/connectors/feiqiu/tasks/": ["apps/etl/connectors/feiqiu/docs/etl_tasks/"],
"apps/etl/connectors/feiqiu/loaders/": ["apps/etl/connectors/feiqiu/docs/etl_tasks/"],
"apps/etl/connectors/feiqiu/scd/": ["apps/etl/connectors/feiqiu/docs/business-rules/scd2_rules.md"],
"apps/etl/connectors/feiqiu/orchestration/": ["apps/etl/connectors/feiqiu/docs/architecture/"],
"apps/admin-web/src/": ["apps/admin-web/README.md"],
"apps/miniprogram/": ["apps/miniprogram/README.md"],
"packages/shared/": ["packages/shared/README.md"],
}
MIGRATION_PATTERNS = [
re.compile(r"^db/etl_feiqiu/migrations/.*\.sql$"),
re.compile(r"^db/zqyy_app/migrations/.*\.sql$"),
re.compile(r"^db/fdw/.*\.sql$"),
]
BD_MANUAL_PATTERN = re.compile(r"^docs/database/BD_Manual_.*\.md$")
DDL_BASELINE_DIR = "docs/database/ddl/"
AUDIT_CHANGES_DIR = "docs/audit/changes/"
def now_taipei():
return datetime.now(TZ_TAIPEI)
def sha1hex(s: str) -> str:
return hashlib.sha1(s.encode("utf-8")).hexdigest()
def is_noise(f: str) -> bool:
return any(p.search(f) for p in NOISE_PATTERNS)
def safe_read_json(path):
if not os.path.isfile(path):
return {}
try:
with open(path, "r", encoding="utf-8") as f:
return json.load(f)
except Exception:
return {}
def write_json(path, data):
os.makedirs(os.path.dirname(path) or ".kiro", exist_ok=True)
with open(path, "w", encoding="utf-8") as f:
json.dump(data, f, indent=2, ensure_ascii=False)
def get_changed_files() -> list[str]:
try:
r = subprocess.run(
["git", "status", "--porcelain"],
capture_output=True, text=True, encoding="utf-8", errors="replace", timeout=10
)
if r.returncode != 0:
return []
except Exception:
return []
files = []
for line in r.stdout.splitlines():
if len(line) < 4:
continue
path = line[3:].strip()
if " -> " in path:
path = path.split(" -> ")[-1]
path = path.strip().strip('"').replace("\\", "/")
if path:
files.append(path)
return sorted(set(files))
def git_diff_stat():
try:
r = subprocess.run(
["git", "diff", "--stat", "HEAD"],
capture_output=True, text=True, encoding="utf-8", errors="replace", timeout=15
)
return r.stdout.strip() if r.returncode == 0 else ""
except Exception:
return ""
def git_diff_files(files, max_total=30000):
if not files:
return ""
all_diff = []
total_len = 0
for f in files:
if total_len >= max_total:
all_diff.append(f"\n[TRUNCATED: diff exceeds {max_total // 1000}KB]")
break
try:
r = subprocess.run(
["git", "diff", "HEAD", "--", f],
capture_output=True, text=True, encoding="utf-8", errors="replace", timeout=10
)
if r.returncode == 0 and r.stdout.strip():
chunk = r.stdout.strip()
if len(chunk) > 5000:
chunk = chunk[:5000] + f"\n[TRUNCATED: {f} diff too long]"
all_diff.append(chunk)
total_len += len(chunk)
except Exception:
continue
return "\n".join(all_diff)
def get_latest_prompt_log():
log_dir = os.path.join("docs", "audit", "prompt_logs")
if not os.path.isdir(log_dir):
return ""
try:
files = sorted(
[f for f in os.listdir(log_dir) if f.startswith("prompt_log_")],
reverse=True
)
if not files:
return ""
with open(os.path.join(log_dir, files[0]), "r", encoding="utf-8") as f:
content = f.read()
return content[:3000] + "\n[TRUNCATED]" if len(content) > 3000 else content
except Exception:
return ""
# ── 步骤 1检测变更识别非 Kiro 变更 ──
def detect_changes(current_files):
"""对比 promptSubmit 快照,返回 (real_files, external_files, no_change)"""
snapshot = safe_read_json(SNAPSHOT_PATH)
snapshot_files = set(snapshot.get("files", []))
current_set = set(current_files)
# 排除噪声后的真实变更
real_files = sorted(f for f in current_files if not is_noise(f))
if not real_files:
return [], [], True
# 检测非 Kiro 变更:在 agentStop 时出现但 promptSubmit 快照中没有的文件
# 这些是对话期间由外部操作CLI、脚本等产生的变更
new_since_submit = current_set - snapshot_files
external_files = sorted(f for f in new_since_submit if not is_noise(f))
return real_files, external_files, False
# ── 步骤 2Session Log ──
def do_session_log(now, changed_files, external_files):
agent_output = os.environ.get("AGENT_OUTPUT", "")
user_prompt = os.environ.get("USER_PROMPT", "")
prompt_info = safe_read_json(PROMPT_ID_PATH)
audit_state = safe_read_json(STATE_PATH)
prompt_id = prompt_info.get("prompt_id", "unknown")
max_len = 50000
if len(agent_output) > max_len:
agent_output = agent_output[:max_len] + "\n\n[TRUNCATED: output exceeds 50KB]"
if len(user_prompt) > 10000:
user_prompt = user_prompt[:10000] + "\n\n[TRUNCATED: prompt exceeds 10KB]"
diff_stat = git_diff_stat()
git_status = ""
try:
r = subprocess.run(
["git", "status", "--short"],
capture_output=True, text=True, encoding="utf-8", errors="replace", timeout=10
)
git_status = r.stdout.strip() if r.returncode == 0 else ""
except Exception:
pass
os.makedirs(SESSION_LOG_DIR, exist_ok=True)
filename = f"session_{now.strftime('%Y%m%d_%H%M%S')}.md"
# 外部变更标记
external_section = ""
if external_files:
ext_list = "\n".join(external_files[:30])
external_section = f"""
## External Changes (non-Kiro, {len(external_files)} files)
以下文件在本次对话期间由外部操作CLI/脚本/手动编辑)产生:
```
{ext_list}
```
"""
content = f"""# Session Log — {now.strftime('%Y-%m-%d %H:%M:%S %z')}
- Prompt-ID: `{prompt_id}`
- Audit Required: `{audit_state.get('audit_required', 'N/A')}`
- Reasons: {', '.join(audit_state.get('reasons', [])) or 'none'}
- External Changes: {len(external_files)} files
## User Input
```text
{user_prompt or '(not captured)'}
```
## Agent Output
```text
{agent_output or '(not captured)'}
```
## Changed Files ({len(changed_files)})
```
{chr(10).join(changed_files[:80]) if changed_files else '(none)'}
```
{external_section}
## Git Diff Stat
```
{diff_stat}
```
## Git Status
```
{git_status or '(clean)'}
```
"""
with open(os.path.join(SESSION_LOG_DIR, filename), "w", encoding="utf-8") as f:
f.write(content)
# ── 步骤 3合规预扫描 ──
def do_compliance_prescan(all_files):
result = {
"new_migration_sql": [],
"new_or_modified_sql": [],
"code_without_docs": [],
"new_files": [],
"has_bd_manual": False,
"has_audit_record": False,
"has_ddl_baseline": False,
}
code_files = []
doc_files = set()
for f in all_files:
if is_noise(f):
continue
for mp in MIGRATION_PATTERNS:
if mp.search(f):
result["new_migration_sql"].append(f)
break
if f.endswith(".sql"):
result["new_or_modified_sql"].append(f)
if BD_MANUAL_PATTERN.search(f):
result["has_bd_manual"] = True
if f.startswith(AUDIT_CHANGES_DIR):
result["has_audit_record"] = True
if f.startswith(DDL_BASELINE_DIR):
result["has_ddl_baseline"] = True
if f.endswith(".md") or "/docs/" in f:
doc_files.add(f)
if f.endswith((".py", ".ts", ".tsx", ".js", ".jsx")):
code_files.append(f)
for cf in code_files:
expected_docs = []
for prefix, docs in DOC_MAP.items():
if cf.startswith(prefix):
expected_docs.extend(docs)
if expected_docs:
has_doc = False
for ed in expected_docs:
if ed in doc_files:
has_doc = True
break
if ed.endswith("/") and any(d.startswith(ed) for d in doc_files):
has_doc = True
break
if not has_doc:
result["code_without_docs"].append({
"file": cf,
"expected_docs": expected_docs,
})
needs_check = bool(
result["new_migration_sql"]
or result["code_without_docs"]
)
now = now_taipei()
write_json(COMPLIANCE_PATH, {
"needs_check": needs_check,
"scanned_at": now.isoformat(),
**result,
})
return result
# ── 步骤 4构建审计上下文 ──
def do_build_audit_context(all_files, external_files, compliance):
now = now_taipei()
audit_state = safe_read_json(STATE_PATH)
prompt_info = safe_read_json(PROMPT_ID_PATH)
changed_files = audit_state.get("changed_files", all_files[:50])
high_risk_files = [
f for f in changed_files
if any(p.search(f) for p in HIGH_RISK_PATTERNS)
]
diff_stat = git_diff_stat()
high_risk_diff = git_diff_files(high_risk_files)
prompt_log = get_latest_prompt_log()
context = {
"built_at": now.isoformat(),
"prompt_id": prompt_info.get("prompt_id", "unknown"),
"prompt_at": prompt_info.get("at", ""),
"audit_required": audit_state.get("audit_required", False),
"db_docs_required": audit_state.get("db_docs_required", False),
"reasons": audit_state.get("reasons", []),
"changed_files": changed_files,
"high_risk_files": high_risk_files,
"external_files": external_files,
"compliance": {
"code_without_docs": compliance.get("code_without_docs", []),
"new_migration_sql": compliance.get("new_migration_sql", []),
"has_bd_manual": compliance.get("has_bd_manual", False),
"has_audit_record": compliance.get("has_audit_record", False),
"has_ddl_baseline": compliance.get("has_ddl_baseline", False),
},
"diff_stat": diff_stat,
"high_risk_diff": high_risk_diff,
"latest_prompt_log": prompt_log,
}
write_json(CONTEXT_PATH, context)
# ── 步骤 5审计提醒15 分钟限频) ──
def do_audit_reminder(real_files):
state = safe_read_json(STATE_PATH)
if not state.get("audit_required"):
return
# 工作树干净时清除
if not real_files:
state["audit_required"] = False
state["reasons"] = []
state["changed_files"] = []
state["last_reminded_at"] = None
write_json(STATE_PATH, state)
return
now = now_taipei()
last_str = state.get("last_reminded_at")
if last_str:
try:
last = datetime.fromisoformat(last_str)
if (now - last) < MIN_INTERVAL:
return
except Exception:
pass
state["last_reminded_at"] = now.isoformat()
write_json(STATE_PATH, state)
reasons = state.get("reasons", [])
reason_text = ", ".join(reasons) if reasons else "high-risk paths changed"
ext_note = ""
# 从 context 读取外部变更数量
ctx = safe_read_json(CONTEXT_PATH)
ext_count = len(ctx.get("external_files", []))
if ext_count:
ext_note = f" (includes {ext_count} external/non-Kiro changes)"
sys.stderr.write(
f"[AUDIT REMINDER] Pending audit ({reason_text}){ext_note}. "
f"Run /audit to sync. (15min rate limit)\n"
)
sys.exit(1)
def main():
# 非 git 仓库直接退出
try:
r = subprocess.run(
["git", "rev-parse", "--is-inside-work-tree"],
capture_output=True, text=True, encoding="utf-8", errors="replace", timeout=5
)
if r.returncode != 0:
return
except Exception:
return
now = now_taipei()
current_files = get_changed_files()
# 步骤 1检测变更
real_files, external_files, no_change = detect_changes(current_files)
# 无任何文件变更 → 跳过所有审查
if no_change:
return
# 步骤 2Session Log始终记录包括外部变更
try:
do_session_log(now, real_files, external_files)
except Exception:
pass
# 步骤 3合规预扫描
compliance = {}
try:
compliance = do_compliance_prescan(current_files)
except Exception:
pass
# 步骤 4构建审计上下文预备 /audit 使用)
try:
do_build_audit_context(current_files, external_files, compliance)
except Exception:
pass
# 步骤 5审计提醒最后执行可能 sys.exit(1)
try:
do_audit_reminder(real_files)
except SystemExit:
raise
except Exception:
pass
if __name__ == "__main__":
try:
main()
except SystemExit as e:
sys.exit(e.code)
except Exception:
pass

View File

@@ -0,0 +1,174 @@
#!/usr/bin/env python3
"""build_audit_context — 合并所有前置 hook 产出,生成统一审计上下文快照。
读取:
- .kiro/.audit_state.jsonaudit-flagger 产出:风险判定、变更文件列表)
- .kiro/.compliance_state.jsonchange-compliance 产出:文档缺失、迁移状态)
- .kiro/.last_prompt_id.jsonprompt-audit-log 产出Prompt ID 溯源)
- git diff --stat HEAD变更统计摘要
- git diff HEAD仅高风险文件的 diff截断到合理长度
输出:.kiro/.audit_context.jsonaudit-writer 子代理的唯一输入)
"""
import json
import os
import re
import subprocess
import sys
from datetime import datetime, timezone, timedelta
TZ_TAIPEI = timezone(timedelta(hours=8))
CONTEXT_PATH = os.path.join(".kiro", ".audit_context.json")
# 高风险路径(只对这些文件取 diff避免 diff 过大)
HIGH_RISK_PATTERNS = [
re.compile(r"^apps/etl/connectors/feiqiu/(api|cli|config|database|loaders|models|orchestration|scd|tasks|utils|quality)/"),
re.compile(r"^apps/backend/app/"),
re.compile(r"^apps/admin-web/src/"),
re.compile(r"^apps/miniprogram/"),
re.compile(r"^packages/shared/"),
re.compile(r"^db/"),
]
def safe_read_json(path):
if not os.path.isfile(path):
return {}
try:
with open(path, "r", encoding="utf-8") as f:
return json.load(f)
except Exception:
return {}
def git_diff_stat():
try:
r = subprocess.run(
["git", "diff", "--stat", "HEAD"],
capture_output=True, text=True, encoding="utf-8", errors="replace", timeout=15
)
return r.stdout.strip() if r.returncode == 0 else ""
except Exception:
return ""
def git_diff_files(files, max_total=30000):
"""获取指定文件的 git diff截断到 max_total 字符"""
if not files:
return ""
# 分批取 diff避免命令行过长
all_diff = []
total_len = 0
for f in files:
if total_len >= max_total:
all_diff.append(f"\n[TRUNCATED: diff exceeds {max_total // 1000}KB limit]")
break
try:
r = subprocess.run(
["git", "diff", "HEAD", "--", f],
capture_output=True, text=True, encoding="utf-8", errors="replace", timeout=10
)
if r.returncode == 0 and r.stdout.strip():
chunk = r.stdout.strip()
# 单文件 diff 截断
if len(chunk) > 5000:
chunk = chunk[:5000] + f"\n[TRUNCATED: {f} diff too long]"
all_diff.append(chunk)
total_len += len(chunk)
except Exception:
continue
return "\n".join(all_diff)
def get_latest_prompt_log():
"""获取最新的 prompt log 文件内容(用于溯源)"""
log_dir = os.path.join("docs", "audit", "prompt_logs")
if not os.path.isdir(log_dir):
return ""
try:
files = sorted(
[f for f in os.listdir(log_dir) if f.startswith("prompt_log_")],
reverse=True
)
if not files:
return ""
latest = os.path.join(log_dir, files[0])
with open(latest, "r", encoding="utf-8") as f:
content = f.read()
# 截断过长内容
if len(content) > 3000:
content = content[:3000] + "\n[TRUNCATED]"
return content
except Exception:
return ""
def main():
now = datetime.now(TZ_TAIPEI)
# 读取前置 hook 产出
audit_state = safe_read_json(os.path.join(".kiro", ".audit_state.json"))
compliance = safe_read_json(os.path.join(".kiro", ".compliance_state.json"))
prompt_id_info = safe_read_json(os.path.join(".kiro", ".last_prompt_id.json"))
# 从 audit_state 提取高风险文件
changed_files = audit_state.get("changed_files", [])
high_risk_files = [
f for f in changed_files
if any(p.search(f) for p in HIGH_RISK_PATTERNS)
]
# 获取 diff仅高风险文件
diff_stat = git_diff_stat()
high_risk_diff = git_diff_files(high_risk_files)
# 获取最新 prompt log
prompt_log = get_latest_prompt_log()
# 构建统一上下文
context = {
"built_at": now.isoformat(),
"prompt_id": prompt_id_info.get("prompt_id", "unknown"),
"prompt_at": prompt_id_info.get("at", ""),
# 来自 audit-flagger
"audit_required": audit_state.get("audit_required", False),
"db_docs_required": audit_state.get("db_docs_required", False),
"reasons": audit_state.get("reasons", []),
"changed_files": changed_files,
"high_risk_files": high_risk_files,
# 来自 change-compliance-prescan
"compliance": {
"code_without_docs": compliance.get("code_without_docs", []),
"new_migration_sql": compliance.get("new_migration_sql", []),
"has_bd_manual": compliance.get("has_bd_manual", False),
"has_audit_record": compliance.get("has_audit_record", False),
"has_ddl_baseline": compliance.get("has_ddl_baseline", False),
},
# git 摘要
"diff_stat": diff_stat,
"high_risk_diff": high_risk_diff,
# prompt 溯源
"latest_prompt_log": prompt_log,
}
os.makedirs(".kiro", exist_ok=True)
with open(CONTEXT_PATH, "w", encoding="utf-8") as f:
json.dump(context, f, indent=2, ensure_ascii=False)
# 输出摘要到 stdout
print(f"audit_context built: {len(changed_files)} files, "
f"{len(high_risk_files)} high-risk, "
f"{len(compliance.get('code_without_docs', []))} docs missing")
if __name__ == "__main__":
try:
main()
except Exception as e:
sys.stderr.write(f"build_audit_context failed: {e}\n")
sys.exit(1)

View File

@@ -0,0 +1,243 @@
#!/usr/bin/env python3
"""change_compliance_prescan — 预扫描变更文件,输出需要合规审查的项目。
在 agentStop 时由 askAgent hook 调用,为 LLM 提供精简的审查清单,
避免 LLM 自行扫描文件浪费 Token。
输出到 stdout供 askAgent 读取):
- 若无需审查:输出 "NO_CHECK_NEEDED"
- 若需审查:输出结构化 JSON 清单
"""
import json
import os
import re
import subprocess
import sys
from datetime import datetime, timezone, timedelta
TZ_TAIPEI = timezone(timedelta(hours=8))
STATE_PATH = os.path.join(".kiro", ".audit_state.json")
# doc-map 中定义的文档对应关系
DOC_MAP = {
# 代码路径前缀 → 应同步更新的文档
"apps/backend/app/routers/": ["apps/backend/docs/API-REFERENCE.md"],
"apps/backend/app/services/": ["apps/backend/docs/API-REFERENCE.md", "apps/backend/README.md"],
"apps/backend/app/auth/": ["apps/backend/docs/API-REFERENCE.md", "apps/backend/README.md"],
"apps/etl/connectors/feiqiu/tasks/": ["apps/etl/connectors/feiqiu/docs/etl_tasks/"],
"apps/etl/connectors/feiqiu/loaders/": ["apps/etl/connectors/feiqiu/docs/etl_tasks/"],
"apps/etl/connectors/feiqiu/scd/": ["apps/etl/connectors/feiqiu/docs/business-rules/scd2_rules.md"],
"apps/etl/connectors/feiqiu/orchestration/": ["apps/etl/connectors/feiqiu/docs/architecture/"],
"apps/admin-web/src/": ["apps/admin-web/README.md"],
"apps/miniprogram/": ["apps/miniprogram/README.md"],
"packages/shared/": ["packages/shared/README.md"],
}
# DDL 基线文件doc-map 中定义)
DDL_BASELINE_DIR = "docs/database/ddl/"
# 迁移脚本路径
MIGRATION_PATTERNS = [
re.compile(r"^db/etl_feiqiu/migrations/.*\.sql$"),
re.compile(r"^db/zqyy_app/migrations/.*\.sql$"),
re.compile(r"^db/fdw/.*\.sql$"),
]
# DB 文档路径
BD_MANUAL_PATTERN = re.compile(r"^docs/database/BD_Manual_.*\.md$")
# 审计记录路径
AUDIT_CHANGES_DIR = "docs/audit/changes/"
# 噪声路径(不参与合规检查)
NOISE = [
re.compile(r"^docs/audit/"),
re.compile(r"^\.kiro/"),
re.compile(r"^\.hypothesis/"),
re.compile(r"^tmp/"),
re.compile(r"\.png$"),
re.compile(r"\.jpg$"),
]
def safe_read_json(path):
if not os.path.isfile(path):
return {}
try:
with open(path, "r", encoding="utf-8") as f:
return json.load(f)
except Exception:
return {}
def get_changed_files():
"""从 audit_state 或 git status 获取变更文件"""
state = safe_read_json(STATE_PATH)
files = state.get("changed_files", [])
if files:
return files
# 回退到 git status
try:
r = subprocess.run(
["git", "status", "--porcelain"],
capture_output=True, text=True, timeout=10
)
if r.returncode != 0:
return []
result = []
for line in r.stdout.splitlines():
if len(line) < 4:
continue
path = line[3:].strip().strip('"').replace("\\", "/")
if " -> " in path:
path = path.split(" -> ")[-1]
if path:
result.append(path)
return sorted(set(result))
except Exception:
return []
def is_noise(f):
return any(p.search(f) for p in NOISE)
def classify_files(files):
"""将变更文件分类,输出审查清单"""
result = {
"new_migration_sql": [], # 新增的迁移 SQL
"new_or_modified_sql": [], # 所有 SQL 变更
"code_without_docs": [], # 有代码改动但缺少对应文档改动
"new_files": [], # 新增文件(需检查目录规范)
"has_bd_manual": False, # 是否有 BD_Manual 文档变更
"has_audit_record": False, # 是否有审计记录变更
"has_ddl_baseline": False, # 是否有 DDL 基线变更
}
code_files = []
doc_files = set()
for f in files:
if is_noise(f):
continue
# 迁移 SQL
for mp in MIGRATION_PATTERNS:
if mp.search(f):
result["new_migration_sql"].append(f)
break
# SQL 文件
if f.endswith(".sql"):
result["new_or_modified_sql"].append(f)
# BD_Manual
if BD_MANUAL_PATTERN.search(f):
result["has_bd_manual"] = True
# 审计记录
if f.startswith(AUDIT_CHANGES_DIR):
result["has_audit_record"] = True
# DDL 基线
if f.startswith(DDL_BASELINE_DIR):
result["has_ddl_baseline"] = True
# 文档文件
if f.endswith(".md") or "/docs/" in f:
doc_files.add(f)
# 代码文件(非文档、非配置)
if f.endswith((".py", ".ts", ".tsx", ".js", ".jsx")):
code_files.append(f)
# 检查代码文件是否有对应文档变更
for cf in code_files:
expected_docs = []
for prefix, docs in DOC_MAP.items():
if cf.startswith(prefix):
expected_docs.extend(docs)
if expected_docs:
# 检查是否有任一对应文档在变更列表中
has_doc = False
for ed in expected_docs:
if ed in doc_files:
has_doc = True
break
# 目录级匹配
if ed.endswith("/"):
if any(d.startswith(ed) for d in doc_files):
has_doc = True
break
if not has_doc:
result["code_without_docs"].append({
"file": cf,
"expected_docs": expected_docs,
})
return result
COMPLIANCE_STATE_PATH = os.path.join(".kiro", ".compliance_state.json")
def save_compliance_state(result, needs_check):
"""持久化合规检查结果,供 audit-writer 子代理读取"""
os.makedirs(".kiro", exist_ok=True)
now = datetime.now(TZ_TAIPEI)
state = {
"needs_check": needs_check,
"scanned_at": now.isoformat(),
**result,
}
with open(COMPLIANCE_STATE_PATH, "w", encoding="utf-8") as f:
json.dump(state, f, indent=2, ensure_ascii=False)
def main():
files = get_changed_files()
if not files:
save_compliance_state({"new_migration_sql": [], "new_or_modified_sql": [],
"code_without_docs": [], "new_files": [],
"has_bd_manual": False, "has_audit_record": False,
"has_ddl_baseline": False}, False)
print("NO_CHECK_NEEDED")
return
# 过滤噪声
real_files = [f for f in files if not is_noise(f)]
if not real_files:
save_compliance_state({"new_migration_sql": [], "new_or_modified_sql": [],
"code_without_docs": [], "new_files": [],
"has_bd_manual": False, "has_audit_record": False,
"has_ddl_baseline": False}, False)
print("NO_CHECK_NEEDED")
return
result = classify_files(files)
# 判断是否需要审查
needs_check = (
result["new_migration_sql"]
or result["code_without_docs"]
or (result["new_migration_sql"] and not result["has_ddl_baseline"])
)
# 始终持久化结果
save_compliance_state(result, needs_check)
if not needs_check:
print("NO_CHECK_NEEDED")
return
# 输出精简 JSON 供 LLM 审查
print(json.dumps(result, indent=2, ensure_ascii=False))
if __name__ == "__main__":
try:
main()
except Exception as e:
# 出错时不阻塞,输出无需检查
print("NO_CHECK_NEEDED")

View File

@@ -0,0 +1,233 @@
#!/usr/bin/env python3
"""prompt_on_submit — promptSubmit 合并 hook 脚本。
合并原 audit_flagger + prompt_audit_log 的功能:
1. git status → 风险判定 → 写 .kiro/.audit_state.json
2. 记录 prompt 日志 → docs/audit/prompt_logs/
3. 记录当前 git fingerprint 快照 → .kiro/.git_snapshot.json供 agentStop 对比)
所有功能块用 try/except 隔离,单个失败不影响其他。
"""
import hashlib
import json
import os
import re
import subprocess
import sys
from datetime import datetime, timezone, timedelta
TZ_TAIPEI = timezone(timedelta(hours=8))
# ── 风险规则(来自 audit_flagger ──
RISK_RULES = [
(re.compile(r"^apps/etl/connectors/feiqiu/(api|cli|config|database|loaders|models|orchestration|scd|tasks|utils|quality)/"), "etl"),
(re.compile(r"^apps/backend/app/"), "backend"),
(re.compile(r"^apps/admin-web/src/"), "admin-web"),
(re.compile(r"^apps/miniprogram/(miniapp|miniprogram)/"), "miniprogram"),
(re.compile(r"^packages/shared/"), "shared"),
(re.compile(r"^db/"), "db"),
]
NOISE_PATTERNS = [
re.compile(r"^docs/audit/"),
re.compile(r"^\.kiro/"),
re.compile(r"^tmp/"),
re.compile(r"^\.hypothesis/"),
]
DB_PATTERNS = [
re.compile(r"^db/"),
re.compile(r"/migrations/"),
re.compile(r"\.sql$"),
re.compile(r"\.prisma$"),
]
STATE_PATH = os.path.join(".kiro", ".audit_state.json")
SNAPSHOT_PATH = os.path.join(".kiro", ".git_snapshot.json")
PROMPT_ID_PATH = os.path.join(".kiro", ".last_prompt_id.json")
def now_taipei():
return datetime.now(TZ_TAIPEI)
def sha1hex(s: str) -> str:
return hashlib.sha1(s.encode("utf-8")).hexdigest()
def get_changed_files() -> list[str]:
try:
r = subprocess.run(
["git", "status", "--porcelain"],
capture_output=True, text=True, encoding="utf-8", errors="replace", timeout=10
)
if r.returncode != 0:
return []
except Exception:
return []
files = []
for line in r.stdout.splitlines():
if len(line) < 4:
continue
path = line[3:].strip()
if " -> " in path:
path = path.split(" -> ")[-1]
path = path.strip().strip('"').replace("\\", "/")
if path:
files.append(path)
return files
def is_noise(f: str) -> bool:
return any(p.search(f) for p in NOISE_PATTERNS)
def safe_read_json(path):
if not os.path.isfile(path):
return {}
try:
with open(path, "r", encoding="utf-8") as f:
return json.load(f)
except Exception:
return {}
def write_json(path, data):
os.makedirs(os.path.dirname(path) or ".kiro", exist_ok=True)
with open(path, "w", encoding="utf-8") as f:
json.dump(data, f, indent=2, ensure_ascii=False)
# ── 功能块 1风险标记audit_flagger ──
def do_audit_flag(all_files, now):
files = sorted(set(f for f in all_files if not is_noise(f)))
if not files:
write_json(STATE_PATH, {
"audit_required": False,
"db_docs_required": False,
"reasons": [],
"changed_files": [],
"change_fingerprint": "",
"marked_at": now.isoformat(),
"last_reminded_at": None,
})
return
reasons = []
audit_required = False
db_docs_required = False
for f in files:
for pattern, label in RISK_RULES:
if pattern.search(f):
audit_required = True
tag = f"dir:{label}"
if tag not in reasons:
reasons.append(tag)
if "/" not in f:
audit_required = True
if "root-file" not in reasons:
reasons.append("root-file")
if any(p.search(f) for p in DB_PATTERNS):
db_docs_required = True
if "db-schema-change" not in reasons:
reasons.append("db-schema-change")
fp = sha1hex("\n".join(files))
# 保留已有 last_reminded_at
last_reminded = None
existing = safe_read_json(STATE_PATH)
if existing.get("change_fingerprint") == fp:
last_reminded = existing.get("last_reminded_at")
write_json(STATE_PATH, {
"audit_required": audit_required,
"db_docs_required": db_docs_required,
"reasons": reasons,
"changed_files": files[:50],
"change_fingerprint": fp,
"marked_at": now.isoformat(),
"last_reminded_at": last_reminded,
})
# ── 功能块 2Prompt 日志 ──
def do_prompt_log(now):
prompt_id = f"P{now.strftime('%Y%m%d-%H%M%S')}"
prompt_raw = os.environ.get("USER_PROMPT", "")
if len(prompt_raw) > 20000:
prompt_raw = prompt_raw[:5000] + "\n[TRUNCATED: prompt too long]"
summary = " ".join(prompt_raw.split()).strip()
if len(summary) > 120:
summary = summary[:120] + ""
if not summary:
summary = "(empty prompt)"
log_dir = os.path.join("docs", "audit", "prompt_logs")
os.makedirs(log_dir, exist_ok=True)
filename = f"prompt_log_{now.strftime('%Y%m%d_%H%M%S')}.md"
entry = f"""- [{prompt_id}] {now.strftime('%Y-%m-%d %H:%M:%S %z')}
- summary: {summary}
- prompt:
```text
{prompt_raw}
```
"""
with open(os.path.join(log_dir, filename), "w", encoding="utf-8") as f:
f.write(entry)
write_json(PROMPT_ID_PATH, {"prompt_id": prompt_id, "at": now.isoformat()})
# ── 功能块 3Git 快照(供 agentStop 对比检测非 Kiro 变更) ──
def do_git_snapshot(all_files, now):
fp = sha1hex("\n".join(sorted(all_files))) if all_files else ""
write_json(SNAPSHOT_PATH, {
"files": sorted(all_files)[:100],
"fingerprint": fp,
"taken_at": now.isoformat(),
})
def main():
# 非 git 仓库直接退出
try:
r = subprocess.run(
["git", "rev-parse", "--is-inside-work-tree"],
capture_output=True, text=True, encoding="utf-8", errors="replace", timeout=5
)
if r.returncode != 0:
return
except Exception:
return
now = now_taipei()
all_files = get_changed_files()
# 各功能块独立 try/except
try:
do_audit_flag(all_files, now)
except Exception:
pass
try:
do_prompt_log(now)
except Exception:
pass
try:
do_git_snapshot(all_files, now)
except Exception:
pass
if __name__ == "__main__":
try:
main()
except Exception:
pass

View File

@@ -0,0 +1,134 @@
#!/usr/bin/env python3
"""session_log — agentStop 时记录本次对话的完整日志。
收集来源:
- 环境变量 AGENT_OUTPUTKiro 注入的 agent 输出)
- 环境变量 USER_PROMPT最近一次用户输入
- .kiro/.last_prompt_id.jsonPrompt ID 溯源)
- .kiro/.audit_state.json变更文件列表
- git diff --stat变更统计
输出docs/audit/session_logs/session_<timestamp>.md
"""
import json
import os
import subprocess
import sys
from datetime import datetime, timezone, timedelta
TZ_TAIPEI = timezone(timedelta(hours=8))
LOG_DIR = os.path.join("docs", "audit", "session_logs")
STATE_PATH = os.path.join(".kiro", ".audit_state.json")
PROMPT_ID_PATH = os.path.join(".kiro", ".last_prompt_id.json")
def now_taipei():
return datetime.now(TZ_TAIPEI)
def safe_read_json(path):
if not os.path.isfile(path):
return {}
try:
with open(path, "r", encoding="utf-8") as f:
return json.load(f)
except Exception:
return {}
def git_diff_stat():
try:
r = subprocess.run(
["git", "diff", "--stat", "HEAD"],
capture_output=True, text=True, timeout=10
)
return r.stdout.strip() if r.returncode == 0 else "(git diff failed)"
except Exception:
return "(git not available)"
def git_status_short():
try:
r = subprocess.run(
["git", "status", "--short"],
capture_output=True, text=True, timeout=10
)
return r.stdout.strip() if r.returncode == 0 else ""
except Exception:
return ""
def main():
now = now_taipei()
ts = now.strftime("%Y%m%d_%H%M%S")
timestamp_display = now.strftime("%Y-%m-%d %H:%M:%S %z")
# 收集数据
agent_output = os.environ.get("AGENT_OUTPUT", "")
user_prompt = os.environ.get("USER_PROMPT", "")
prompt_info = safe_read_json(PROMPT_ID_PATH)
audit_state = safe_read_json(STATE_PATH)
prompt_id = prompt_info.get("prompt_id", "unknown")
# 截断超长内容,避免日志文件过大
max_len = 50000
if len(agent_output) > max_len:
agent_output = agent_output[:max_len] + "\n\n[TRUNCATED: output exceeds 50KB]"
if len(user_prompt) > 10000:
user_prompt = user_prompt[:10000] + "\n\n[TRUNCATED: prompt exceeds 10KB]"
diff_stat = git_diff_stat()
status_short = git_status_short()
changed_files = audit_state.get("changed_files", [])
os.makedirs(LOG_DIR, exist_ok=True)
filename = f"session_{ts}.md"
filepath = os.path.join(LOG_DIR, filename)
content = f"""# Session Log — {timestamp_display}
- Prompt-ID: `{prompt_id}`
- Audit Required: `{audit_state.get('audit_required', 'N/A')}`
- Reasons: {', '.join(audit_state.get('reasons', [])) or 'none'}
## User Input
```text
{user_prompt or '(not captured)'}
```
## Agent Output
```text
{agent_output or '(not captured)'}
```
## Changed Files ({len(changed_files)})
```
{chr(10).join(changed_files[:80]) if changed_files else '(none)'}
```
## Git Diff Stat
```
{diff_stat}
```
## Git Status
```
{status_short or '(clean)'}
```
"""
with open(filepath, "w", encoding="utf-8") as f:
f.write(content)
if __name__ == "__main__":
try:
main()
except Exception:
pass