微信小程序页面迁移校验之前 P5任务处理之前
This commit is contained in:
254
scripts/ops/etl_unified_analysis.py
Normal file
254
scripts/ops/etl_unified_analysis.py
Normal file
@@ -0,0 +1,254 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
ETL 统一分析 — 编排入口
|
||||
|
||||
合并「数据流结构分析」和「ETL 数据一致性检查」为一个统一流程。
|
||||
支持三种模式:
|
||||
--mode structure 仅结构分析(analyze_dataflow + gen_dataflow_report)
|
||||
--mode consistency 仅一致性检查(etl_consistency_check)
|
||||
--mode full 全部执行(默认)
|
||||
|
||||
数据源:
|
||||
默认主动调 API 采集最近 60 天数据。
|
||||
--source etl-log 切换为读 ETL 落盘 JSON(一致性检查模式)
|
||||
|
||||
用法:
|
||||
cd C:\\NeoZQYY
|
||||
uv run python scripts/ops/etl_unified_analysis.py
|
||||
uv run python scripts/ops/etl_unified_analysis.py --mode consistency --source etl-log
|
||||
uv run python scripts/ops/etl_unified_analysis.py --mode structure --date-from 2026-01-01
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import subprocess
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
# 确保 scripts/ops 在 sys.path 中,以便 import _env_paths
|
||||
_SCRIPT_DIR = Path(__file__).resolve().parent
|
||||
if str(_SCRIPT_DIR) not in sys.path:
|
||||
sys.path.insert(0, str(_SCRIPT_DIR))
|
||||
|
||||
from _env_paths import get_output_path, ensure_repo_root
|
||||
|
||||
ensure_repo_root()
|
||||
|
||||
TZ = ZoneInfo("Asia/Shanghai")
|
||||
NOW = datetime.now(TZ)
|
||||
TS = NOW.strftime("%Y%m%d_%H%M%S")
|
||||
|
||||
|
||||
def build_parser() -> argparse.ArgumentParser:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="ETL 统一分析 — 结构分析 + 数据一致性检查",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--mode",
|
||||
choices=["structure", "consistency", "full"],
|
||||
default="full",
|
||||
help="执行模式:structure=仅结构分析, consistency=仅一致性检查, full=全部(默认)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--source",
|
||||
choices=["api", "etl-log"],
|
||||
default="api",
|
||||
help="数据源:api=主动调 API 采集(默认), etl-log=读 ETL 落盘 JSON",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--date-from", type=str, default=None,
|
||||
help="数据获取起始日期 (YYYY-MM-DD),默认 60 天前",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--date-to", type=str, default=None,
|
||||
help="数据获取截止日期 (YYYY-MM-DD),默认今天",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--limit", type=int, default=200,
|
||||
help="每端点最大记录数(默认 200)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tables", type=str, default=None,
|
||||
help="要分析的表名列表(逗号分隔,缺省=全部)",
|
||||
)
|
||||
return parser
|
||||
|
||||
|
||||
def run_subprocess(cmd: list[str], label: str) -> int:
|
||||
"""运行子进程,实时输出 stdout/stderr,返回 exit code"""
|
||||
print(f"\n{'='*60}")
|
||||
print(f"[{label}] 开始执行...")
|
||||
print(f" 命令: {' '.join(cmd)}")
|
||||
print(f"{'='*60}\n")
|
||||
|
||||
proc = subprocess.run(
|
||||
cmd,
|
||||
cwd=str(Path(__file__).resolve().parents[2]), # 项目根目录
|
||||
)
|
||||
if proc.returncode != 0:
|
||||
print(f"\n❌ [{label}] 执行失败 (exit code: {proc.returncode})")
|
||||
else:
|
||||
print(f"\n✅ [{label}] 执行完成")
|
||||
return proc.returncode
|
||||
|
||||
|
||||
def run_structure_analysis(args: argparse.Namespace) -> int:
|
||||
"""执行数据流结构分析(采集 + 报告生成)"""
|
||||
from datetime import date, timedelta
|
||||
|
||||
# 阶段 1:数据采集
|
||||
cmd = [sys.executable, "scripts/ops/analyze_dataflow.py"]
|
||||
if args.date_from:
|
||||
cmd += ["--date-from", args.date_from]
|
||||
else:
|
||||
# 默认 60 天
|
||||
default_from = (date.today() - timedelta(days=60)).isoformat()
|
||||
cmd += ["--date-from", default_from]
|
||||
if args.date_to:
|
||||
cmd += ["--date-to", args.date_to]
|
||||
if args.limit:
|
||||
cmd += ["--limit", str(args.limit)]
|
||||
if args.tables:
|
||||
cmd += ["--tables", args.tables]
|
||||
|
||||
rc = run_subprocess(cmd, "数据流采集")
|
||||
if rc != 0:
|
||||
return rc
|
||||
|
||||
# 阶段 2:报告生成
|
||||
cmd2 = [sys.executable, "scripts/ops/gen_dataflow_report.py"]
|
||||
rc2 = run_subprocess(cmd2, "数据流报告生成")
|
||||
return rc2
|
||||
|
||||
|
||||
def run_consistency_check(args: argparse.Namespace) -> int:
|
||||
"""执行 ETL 数据一致性检查"""
|
||||
cmd = [sys.executable, "scripts/ops/etl_consistency_check.py"]
|
||||
rc = run_subprocess(cmd, "ETL 数据一致性检查")
|
||||
return rc
|
||||
|
||||
|
||||
def merge_reports(structure_ok: bool, consistency_ok: bool) -> Path | None:
|
||||
"""合并两份报告为一份统一报告"""
|
||||
report_root = get_output_path("ETL_REPORT_ROOT")
|
||||
analyze_root = get_output_path("SYSTEM_ANALYZE_ROOT")
|
||||
|
||||
# 找最新的 dataflow 报告
|
||||
dataflow_report = None
|
||||
if structure_ok:
|
||||
dataflow_files = sorted(analyze_root.glob("dataflow_*.md"), reverse=True)
|
||||
if dataflow_files:
|
||||
dataflow_report = dataflow_files[0]
|
||||
|
||||
# 找最新的 consistency 报告
|
||||
consistency_report = None
|
||||
if consistency_ok:
|
||||
consistency_files = sorted(report_root.glob("consistency_check_*.md"), reverse=True)
|
||||
if consistency_files:
|
||||
consistency_report = consistency_files[0]
|
||||
|
||||
if not dataflow_report and not consistency_report:
|
||||
print("⚠️ 没有找到任何报告文件,跳过合并")
|
||||
return None
|
||||
|
||||
# 合并
|
||||
lines: list[str] = []
|
||||
lines.append("# ETL 统一分析报告")
|
||||
lines.append("")
|
||||
lines.append(f"生成时间: {NOW.strftime('%Y-%m-%d %H:%M:%S')} CST")
|
||||
lines.append(f"模式: 结构分析{'✅' if structure_ok else '❌'} + 一致性检查{'✅' if consistency_ok else '❌'}")
|
||||
lines.append("")
|
||||
lines.append("---")
|
||||
lines.append("")
|
||||
|
||||
if dataflow_report:
|
||||
lines.append("# 第一部分:数据流结构分析")
|
||||
lines.append("")
|
||||
content = dataflow_report.read_text(encoding="utf-8")
|
||||
# 跳过原报告的一级标题行(避免重复标题)
|
||||
for line in content.splitlines():
|
||||
if line.startswith("# ") and "数据流" in line:
|
||||
continue
|
||||
lines.append(line)
|
||||
lines.append("")
|
||||
lines.append("---")
|
||||
lines.append("")
|
||||
|
||||
if consistency_report:
|
||||
lines.append("# 第二部分:ETL 数据一致性检查")
|
||||
lines.append("")
|
||||
content = consistency_report.read_text(encoding="utf-8")
|
||||
for line in content.splitlines():
|
||||
if line.startswith("# ") and "一致性" in line:
|
||||
continue
|
||||
lines.append(line)
|
||||
lines.append("")
|
||||
|
||||
out_file = report_root / f"etl_unified_analysis_{TS}.md"
|
||||
out_file.write_text("\n".join(lines), encoding="utf-8")
|
||||
return out_file
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = build_parser()
|
||||
args = parser.parse_args()
|
||||
|
||||
print(f"{'='*60}")
|
||||
print(f"ETL 统一分析")
|
||||
print(f"{'='*60}")
|
||||
print(f" 模式: {args.mode}")
|
||||
print(f" 数据源: {args.source}")
|
||||
print(f" 日期范围: {args.date_from or '默认60天前'} ~ {args.date_to or '今天'}")
|
||||
print(f"{'='*60}")
|
||||
|
||||
structure_ok = False
|
||||
consistency_ok = False
|
||||
any_failure = False
|
||||
|
||||
# 结构分析
|
||||
if args.mode in ("structure", "full"):
|
||||
rc = run_structure_analysis(args)
|
||||
structure_ok = rc == 0
|
||||
if rc != 0:
|
||||
any_failure = True
|
||||
if args.mode == "structure":
|
||||
sys.exit(rc)
|
||||
|
||||
# 一致性检查
|
||||
if args.mode in ("consistency", "full"):
|
||||
rc = run_consistency_check(args)
|
||||
consistency_ok = rc == 0
|
||||
if rc != 0:
|
||||
any_failure = True
|
||||
if args.mode == "consistency":
|
||||
sys.exit(rc)
|
||||
|
||||
# 合并报告(仅 full 模式且至少有一个成功)
|
||||
if args.mode == "full" and (structure_ok or consistency_ok):
|
||||
print(f"\n{'='*60}")
|
||||
print(f"[报告合并] 开始合并...")
|
||||
print(f"{'='*60}")
|
||||
merged = merge_reports(structure_ok, consistency_ok)
|
||||
if merged:
|
||||
print(f"\n✅ 统一报告已生成: {merged}")
|
||||
else:
|
||||
print(f"\n⚠️ 报告合并失败")
|
||||
|
||||
# 最终摘要
|
||||
print(f"\n{'='*60}")
|
||||
print(f"ETL 统一分析完成")
|
||||
print(f"{'='*60}")
|
||||
if args.mode in ("structure", "full"):
|
||||
print(f" 结构分析: {'✅ 成功' if structure_ok else '❌ 失败'}")
|
||||
if args.mode in ("consistency", "full"):
|
||||
print(f" 一致性检查: {'✅ 成功' if consistency_ok else '❌ 失败'}")
|
||||
print(f"{'='*60}")
|
||||
|
||||
if any_failure:
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user