Files
Neo-ZQYY/scripts/ops/etl_integration_report.py
Neo b25308c3f4 feat: P1-P3 全栈集成 — 数据库基础 + DWS 扩展 + 小程序鉴权 + 工程化体系
## P1 数据库基础
- zqyy_app: 创建 auth/biz schema、FDW 连接 etl_feiqiu
- etl_feiqiu: 创建 app schema RLS 视图、商品库存预警表
- 清理 assistant_abolish 残留数据

## P2 ETL/DWS 扩展
- 新增 DWS 助教订单贡献度表 (dws.assistant_order_contribution)
- 新增 assistant_order_contribution_task 任务及 RLS 视图
- member_consumption 增加充值字段、assistant_daily 增加处罚字段
- 更新 ODS/DWD/DWS 任务文档及业务规则文档
- 更新 consistency_checker、flow_runner、task_registry 等核心模块

## P3 小程序鉴权系统
- 新增 xcx_auth 路由/schema(微信登录 + JWT)
- 新增 wechat/role/matching/application 服务层
- zqyy_app 鉴权表迁移 + 角色权限种子数据
- auth/dependencies.py 支持小程序 JWT 鉴权

## 文档与审计
- 新增 DOCUMENTATION-MAP 文档导航
- 新增 7 份 BD_Manual 数据库变更文档
- 更新 DDL 基线快照(etl_feiqiu 6 schema + zqyy_app auth)
- 新增全栈集成审计记录、部署检查清单更新
- 新增 BACKLOG 路线图、FDW→Core 迁移计划

## Kiro 工程化
- 新增 5 个 Spec(P1/P2/P3/全栈集成/核心业务)
- 新增审计自动化脚本(agent_on_stop/build_audit_context/compliance_prescan)
- 新增 6 个 Hook(合规检查/会话日志/提交审计等)
- 新增 doc-map steering 文件

## 运维与测试
- 新增 ops 脚本:迁移验证/API 健康检查/ETL 监控/集成报告
- 新增属性测试:test_dws_contribution / test_auth_system
- 清理过期 export 报告文件
- 更新 .gitignore 排除规则
2026-02-26 08:03:53 +08:00

363 lines
13 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
ETL 全流程联调报告生成脚本
从后端 API 获取执行日志,解析计时数据和错误信息,生成综合联调报告。
报告输出到 SYSTEM_LOG_ROOT 环境变量指定的目录。
"""
import os
import re
import json
import sys
from datetime import datetime, timedelta
from pathlib import Path
from dotenv import load_dotenv
# 加载根 .env
load_dotenv(Path(__file__).resolve().parents[2] / ".env")
SYSTEM_LOG_ROOT = os.environ.get("SYSTEM_LOG_ROOT")
if not SYSTEM_LOG_ROOT:
print("ERROR: SYSTEM_LOG_ROOT 环境变量未设置", file=sys.stderr)
sys.exit(1)
# ── 执行元数据(从 API 历史获取) ──
EXEC_ID = "1e1c93ff-2ab0-42e6-b529-ec14b551c91a"
EXEC_STATUS = "success"
EXEC_EXIT_CODE = 0
EXEC_STARTED = "2026-02-24T02:15:26.689731+08:00"
EXEC_FINISHED = "2026-02-24T02:50:39.679479+08:00"
EXEC_DURATION_MS = 2112989
TASK_COUNT = 41
FLOW = "api_full"
PROCESSING_MODE = "full_window"
WINDOW_START = "2025-11-01"
WINDOW_END = "2026-02-20"
WINDOW_SPLIT_DAYS = 30
# ── 日志文件路径 ──
SCRIPT_DIR = Path(__file__).resolve().parent
ERROR_LOG_PATH = SCRIPT_DIR / "_tmp_error_log.txt"
if not ERROR_LOG_PATH.exists():
print(f"ERROR: 日志文件不存在: {ERROR_LOG_PATH}", file=sys.stderr)
sys.exit(1)
log_text = ERROR_LOG_PATH.read_text(encoding="utf-8")
lines = log_text.splitlines()
# ── 1. 解析各任务的开始/结束时间 ──
TS_RE = re.compile(r"^\[(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})\]")
def parse_ts(line: str):
m = TS_RE.match(line)
if m:
return datetime.strptime(m.group(1), "%Y-%m-%d %H:%M:%S")
return None
# 提取每个任务的开始和结束时间
task_timings: dict[str, dict] = {}
START_RE = re.compile(r"开始执行(\w+) \(ODS\)|(\w+): ODS fetch\+load start")
COMPLETE_RE = re.compile(r"(\w+) ODS 任务完成:|(\w+): 完成,统计=|(\w+): 完成, 统计=|(\w+): 结果统计:")
FAIL_RE = re.compile(r"任务 (\w+) 失败:")
DWD_START_RE = re.compile(r"DWD_LOAD_FROM_ODS.*开始|开始运行.*DWD_LOAD_FROM_ODS")
DWD_COMPLETE_RE = re.compile(r"DWD_LOAD_FROM_ODS: 完成")
DWS_START_RE = re.compile(r"(DWS_\w+):.*开始|开始执行.*(DWS_\w+)")
DWS_COMPLETE_RE = re.compile(r"(DWS_\w+): 完成")
for line in lines:
ts = parse_ts(line)
if not ts:
continue
# ODS 任务开始
m = START_RE.search(line)
if m:
task = m.group(1) or m.group(2)
if task and task not in task_timings:
task_timings[task] = {"start": ts, "end": None, "status": "running"}
# ODS 任务完成
m = COMPLETE_RE.search(line)
if m:
task = m.group(1) or m.group(2) or m.group(3) or m.group(4)
if task and task in task_timings:
task_timings[task]["end"] = ts
task_timings[task]["status"] = "success"
# DWD 开始
if "DWD_LOAD_FROM_ODS" in line and ("开始" in line or "start" in line.lower()):
if "DWD_LOAD_FROM_ODS" not in task_timings:
task_timings["DWD_LOAD_FROM_ODS"] = {"start": ts, "end": None, "status": "running"}
# DWD 完成
m = DWD_COMPLETE_RE.search(line)
if m and "DWD_LOAD_FROM_ODS" in task_timings:
task_timings["DWD_LOAD_FROM_ODS"]["end"] = ts
task_timings["DWD_LOAD_FROM_ODS"]["status"] = "success"
# DWS 任务开始(仅首次)
for pattern in [r"(DWS_\w+):.*(?:开始|start)", r"开始执行.*(DWS_\w+)"]:
m2 = re.search(pattern, line)
if m2:
task = m2.group(1)
if task not in task_timings:
task_timings[task] = {"start": ts, "end": None, "status": "running"}
# DWS 任务完成
m = DWS_COMPLETE_RE.search(line)
if m:
task = m.group(1)
if task in task_timings:
task_timings[task]["end"] = ts
task_timings[task]["status"] = "success"
# 任务失败
m = FAIL_RE.search(line)
if m:
task = m.group(1)
if task in task_timings:
task_timings[task]["end"] = ts
task_timings[task]["status"] = "failed"
else:
task_timings[task] = {"start": ts, "end": ts, "status": "failed"}
# 计算耗时
for task, info in task_timings.items():
if info["start"] and info["end"]:
info["duration_s"] = (info["end"] - info["start"]).total_seconds()
else:
info["duration_s"] = 0
# ── 2. 收集错误和警告 ──
errors: list[dict] = []
warnings: list[dict] = []
for i, line in enumerate(lines):
ts = parse_ts(line)
if "ERROR" in line:
# 收集错误行及后续 traceback 上下文(最多 10 行)
context_lines = [line]
for j in range(i + 1, min(i + 10, len(lines))):
next_line = lines[j]
if TS_RE.match(next_line) and "Traceback" not in next_line:
break
context_lines.append(next_line)
errors.append({"ts": ts, "line": line.strip(), "context": "\n".join(context_lines)})
elif "WARNING" in line:
warnings.append({"ts": ts, "line": line.strip()})
# ── 3. 分类错误 ──
error_categories: dict[str, list] = {}
for err in errors:
if "未知的任务类型" in err["line"]:
cat = "任务未注册"
elif "member_birthday_manual" in err["context"]:
cat = "FDW 表缺失(根因)"
elif "InFailedSqlTransaction" in err["context"]:
cat = "事务级联失败"
else:
cat = "其他"
error_categories.setdefault(cat, []).append(err)
# ── 4. 按层分组计时 ──
ods_tasks = {k: v for k, v in task_timings.items() if k.startswith("ODS_")}
dwd_tasks = {k: v for k, v in task_timings.items() if k.startswith("DWD_")}
dws_tasks = {k: v for k, v in task_timings.items() if k.startswith("DWS_")}
# Top-5 耗时
all_with_duration = [(k, v) for k, v in task_timings.items() if v["duration_s"] > 0]
top5 = sorted(all_with_duration, key=lambda x: x[1]["duration_s"], reverse=True)[:5]
# 各层总耗时
ods_total = sum(v["duration_s"] for v in ods_tasks.values())
dwd_total = sum(v["duration_s"] for v in dwd_tasks.values())
dws_total = sum(v["duration_s"] for v in dws_tasks.values())
# 成功/失败统计
success_count = sum(1 for v in task_timings.values() if v["status"] == "success")
failed_count = sum(1 for v in task_timings.values() if v["status"] == "failed")
failed_tasks = [k for k, v in task_timings.items() if v["status"] == "failed"]
# ── 5. 生成报告 ──
def fmt_duration(seconds: float) -> str:
"""格式化秒数为 mm:ss 或 hh:mm:ss"""
if seconds < 0:
return "N/A"
m, s = divmod(int(seconds), 60)
h, m = divmod(m, 60)
if h > 0:
return f"{h}h {m:02d}m {s:02d}s"
return f"{m}m {s:02d}s"
report_lines: list[str] = []
def w(line: str = ""):
report_lines.append(line)
w("# ETL 全流程联调报告")
w()
w(f"> 生成时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
w()
# ── 执行概要 ──
w("## 执行概要")
w()
w(f"| 项目 | 值 |")
w(f"|------|-----|")
w(f"| Execution ID | `{EXEC_ID}` |")
w(f"| Flow | `{FLOW}` (API → ODS → DWD → DWS → INDEX) |")
w(f"| 处理模式 | `{PROCESSING_MODE}` |")
w(f"| 时间窗口 | {WINDOW_START} ~ {WINDOW_END} |")
w(f"| 窗口切分 | 按天,{WINDOW_SPLIT_DAYS} 天/切片(共 4 个切片) |")
w(f"| 强制全量 | 是 (`force_full`) |")
w(f"| 任务数 | {TASK_COUNT} 个(全选 `is_common=True` |")
w(f"| 开始时间 | {EXEC_STARTED} |")
w(f"| 结束时间 | {EXEC_FINISHED} |")
w(f"| 总时长 | {fmt_duration(EXEC_DURATION_MS / 1000)} ({EXEC_DURATION_MS}ms) |")
w(f"| 退出码 | {EXEC_EXIT_CODE} |")
w(f"| 最终状态 | `{EXEC_STATUS}` |")
w(f"| 任务成功 | {success_count} / {success_count + failed_count} |")
w(f"| 任务失败 | {failed_count} |")
w()
# ── 性能报告 ──
w("## 性能报告")
w()
w("### 各层耗时汇总")
w()
w(f"| 层 | 任务数 | 总耗时 | 平均耗时 |")
w(f"|-----|-------|--------|---------|")
ods_count = len(ods_tasks)
dwd_count = len(dwd_tasks)
dws_count = len(dws_tasks)
w(f"| ODS | {ods_count} | {fmt_duration(ods_total)} | {fmt_duration(ods_total / max(ods_count, 1))} |")
w(f"| DWD | {dwd_count} | {fmt_duration(dwd_total)} | {fmt_duration(dwd_total / max(dwd_count, 1))} |")
w(f"| DWS+INDEX | {dws_count} | {fmt_duration(dws_total)} | {fmt_duration(dws_total / max(dws_count, 1))} |")
w()
w("### Top-5 耗时任务")
w()
w(f"| 排名 | 任务 | 耗时 | 状态 |")
w(f"|------|------|------|------|")
for rank, (task, info) in enumerate(top5, 1):
w(f"| {rank} | `{task}` | {fmt_duration(info['duration_s'])} | {info['status']} |")
w()
w("### ODS 层各任务耗时明细")
w()
w(f"| 任务 | 开始 | 结束 | 耗时 | 记录数 |")
w(f"|------|------|------|------|--------|")
# 从日志中提取 fetched 数量
fetch_counts: dict[str, int] = {}
for line in lines:
m = re.search(r"(\w+) ODS 任务完成: \{'fetched': (\d+)", line)
if m:
fetch_counts[m.group(1)] = int(m.group(2))
for task in sorted(ods_tasks.keys()):
info = ods_tasks[task]
start_str = info["start"].strftime("%H:%M:%S") if info["start"] else "?"
end_str = info["end"].strftime("%H:%M:%S") if info["end"] else "?"
fetched = fetch_counts.get(task, "?")
w(f"| `{task}` | {start_str} | {end_str} | {fmt_duration(info['duration_s'])} | {fetched} |")
w()
w("### DWD + DWS 层各任务耗时明细")
w()
w(f"| 任务 | 开始 | 结束 | 耗时 | 状态 |")
w(f"|------|------|------|------|------|")
for task in sorted({**dwd_tasks, **dws_tasks}.keys()):
info = task_timings[task]
start_str = info["start"].strftime("%H:%M:%S") if info["start"] else "?"
end_str = info["end"].strftime("%H:%M:%S") if info["end"] else "?"
w(f"| `{task}` | {start_str} | {end_str} | {fmt_duration(info['duration_s'])} | {info['status']} |")
w()
# ── DEBUG 报告 ──
w("## DEBUG 报告")
w()
if not errors and not warnings:
w("无错误或警告。")
else:
w(f"共发现 **{len(errors)}** 个 ERROR**{len(warnings)}** 个 WARNING。")
w()
w("### 错误分类汇总")
w()
w(f"| 类别 | 数量 | 说明 |")
w(f"|------|------|------|")
for cat, errs in error_categories.items():
if cat == "任务未注册":
desc = "`ODS_ASSISTANT_ABOLISH` 未在 `task_registry.py` 中注册"
elif cat == "FDW 表缺失(根因)":
desc = "`fdw_app.member_birthday_manual` 关系不存在"
elif cat == "事务级联失败":
desc = "根因错误导致事务终止,后续 DWS 任务全部 `InFailedSqlTransaction`"
else:
desc = "未分类"
w(f"| {cat} | {len(errs)} | {desc} |")
w()
w("### 错误详情")
w()
w("#### 错误 1ODS_ASSISTANT_ABOLISH 任务未注册")
w()
w("- 时间: 02:15:59")
w("- 错误: `ValueError: 未知的任务类型: ODS_ASSISTANT_ABOLISH`")
w("- 位置: `orchestration/task_registry.py:96`")
w("- 原因: `ODS_ASSISTANT_ABOLISH` 任务在后端任务注册表中标记为 `is_common=True`,但 ETL `task_registry` 中尚未注册该任务的实现类")
w("- 影响: 仅该任务失败,不影响其他任务执行")
w("- 建议: 完成 `assistant-abolish-cleanup` spec 的任务注册,或将后端注册表中该任务的 `is_common` 设为 `False`")
w()
w("#### 错误 2FDW 表缺失导致 DWS 级联失败(根因)")
w()
w("- 时间: 02:50:36")
w("- 根因: `UndefinedTable: 关系 \"fdw_app.member_birthday_manual\" 不存在`")
w("- 触发任务: `DWS_MEMBER_CONSUMPTION`")
w("- 降级尝试: 代码尝试降级为 `dim_member.birthday`,但降级查询在已失败的事务中执行,仍然报错")
w("- 级联影响: 事务被终止后,以下 10 个任务全部 `InFailedSqlTransaction`:")
w(" - `DWS_MEMBER_VISIT`")
w(" - `DWS_FINANCE_DAILY`")
w(" - `DWS_FINANCE_RECHARGE`")
w(" - `DWS_FINANCE_INCOME_STRUCTURE`")
w(" - `DWS_FINANCE_DISCOUNT_DETAIL`")
w(" - `DWS_ASSISTANT_MONTHLY`")
w(" - `DWS_ASSISTANT_FINANCE`")
w(" - `DWS_WINBACK_INDEX`")
w(" - `DWS_NEWCONV_INDEX`")
w(" - `DWS_RELATION_INDEX`")
w("- 建议:")
w(" 1. 在 `zqyy_app` 数据库中创建 `member_birthday_manual` 表(或对应的 FDW 映射)")
w(" 2. 或修改 `DWS_MEMBER_CONSUMPTION` 的降级逻辑,在 FDW 失败时先 ROLLBACK 再重试降级查询")
w(" 3. 考虑为 DWS 任务使用独立事务/连接,避免单任务失败导致级联")
w()
if warnings:
w("### 警告详情")
w()
for warn in warnings:
w(f"- `{warn['line']}`")
w()
# ── 黑盒测试报告占位 ──
w("## 黑盒测试报告")
w()
w("_将在一致性检查完成后追加_")
w()
# ── 输出报告 ──
output_dir = Path(SYSTEM_LOG_ROOT)
output_dir.mkdir(parents=True, exist_ok=True)
date_str = datetime.now().strftime("%Y-%m-%d")
output_path = output_dir / f"{date_str}__etl_integration_report.md"
report_content = "\n".join(report_lines)
output_path.write_text(report_content, encoding="utf-8")
print(f"报告已生成: {output_path}")
print(f"任务统计: {success_count} 成功 / {failed_count} 失败 / {success_count + failed_count} 总计")
print(f"错误数: {len(errors)}, 警告数: {len(warnings)}")