微信小程序页面迁移校验之前 P5任务处理之前

This commit is contained in:
Neo
2026-03-09 01:19:21 +08:00
parent 263bf96035
commit 6e20987d2f
1112 changed files with 153824 additions and 219694 deletions

View File

@@ -0,0 +1,254 @@
# -*- coding: utf-8 -*-
"""
ETL 统一分析 — 编排入口
合并「数据流结构分析」和「ETL 数据一致性检查」为一个统一流程。
支持三种模式:
--mode structure 仅结构分析analyze_dataflow + gen_dataflow_report
--mode consistency 仅一致性检查etl_consistency_check
--mode full 全部执行(默认)
数据源:
默认主动调 API 采集最近 60 天数据。
--source etl-log 切换为读 ETL 落盘 JSON一致性检查模式
用法:
cd C:\\NeoZQYY
uv run python scripts/ops/etl_unified_analysis.py
uv run python scripts/ops/etl_unified_analysis.py --mode consistency --source etl-log
uv run python scripts/ops/etl_unified_analysis.py --mode structure --date-from 2026-01-01
"""
from __future__ import annotations
import argparse
import subprocess
import sys
from datetime import datetime
from pathlib import Path
from zoneinfo import ZoneInfo
# 确保 scripts/ops 在 sys.path 中,以便 import _env_paths
_SCRIPT_DIR = Path(__file__).resolve().parent
if str(_SCRIPT_DIR) not in sys.path:
sys.path.insert(0, str(_SCRIPT_DIR))
from _env_paths import get_output_path, ensure_repo_root
ensure_repo_root()
TZ = ZoneInfo("Asia/Shanghai")
NOW = datetime.now(TZ)
TS = NOW.strftime("%Y%m%d_%H%M%S")
def build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
description="ETL 统一分析 — 结构分析 + 数据一致性检查",
)
parser.add_argument(
"--mode",
choices=["structure", "consistency", "full"],
default="full",
help="执行模式structure=仅结构分析, consistency=仅一致性检查, full=全部(默认)",
)
parser.add_argument(
"--source",
choices=["api", "etl-log"],
default="api",
help="数据源api=主动调 API 采集(默认), etl-log=读 ETL 落盘 JSON",
)
parser.add_argument(
"--date-from", type=str, default=None,
help="数据获取起始日期 (YYYY-MM-DD),默认 60 天前",
)
parser.add_argument(
"--date-to", type=str, default=None,
help="数据获取截止日期 (YYYY-MM-DD),默认今天",
)
parser.add_argument(
"--limit", type=int, default=200,
help="每端点最大记录数(默认 200",
)
parser.add_argument(
"--tables", type=str, default=None,
help="要分析的表名列表(逗号分隔,缺省=全部)",
)
return parser
def run_subprocess(cmd: list[str], label: str) -> int:
"""运行子进程,实时输出 stdout/stderr返回 exit code"""
print(f"\n{'='*60}")
print(f"[{label}] 开始执行...")
print(f" 命令: {' '.join(cmd)}")
print(f"{'='*60}\n")
proc = subprocess.run(
cmd,
cwd=str(Path(__file__).resolve().parents[2]), # 项目根目录
)
if proc.returncode != 0:
print(f"\n❌ [{label}] 执行失败 (exit code: {proc.returncode})")
else:
print(f"\n✅ [{label}] 执行完成")
return proc.returncode
def run_structure_analysis(args: argparse.Namespace) -> int:
"""执行数据流结构分析(采集 + 报告生成)"""
from datetime import date, timedelta
# 阶段 1数据采集
cmd = [sys.executable, "scripts/ops/analyze_dataflow.py"]
if args.date_from:
cmd += ["--date-from", args.date_from]
else:
# 默认 60 天
default_from = (date.today() - timedelta(days=60)).isoformat()
cmd += ["--date-from", default_from]
if args.date_to:
cmd += ["--date-to", args.date_to]
if args.limit:
cmd += ["--limit", str(args.limit)]
if args.tables:
cmd += ["--tables", args.tables]
rc = run_subprocess(cmd, "数据流采集")
if rc != 0:
return rc
# 阶段 2报告生成
cmd2 = [sys.executable, "scripts/ops/gen_dataflow_report.py"]
rc2 = run_subprocess(cmd2, "数据流报告生成")
return rc2
def run_consistency_check(args: argparse.Namespace) -> int:
"""执行 ETL 数据一致性检查"""
cmd = [sys.executable, "scripts/ops/etl_consistency_check.py"]
rc = run_subprocess(cmd, "ETL 数据一致性检查")
return rc
def merge_reports(structure_ok: bool, consistency_ok: bool) -> Path | None:
"""合并两份报告为一份统一报告"""
report_root = get_output_path("ETL_REPORT_ROOT")
analyze_root = get_output_path("SYSTEM_ANALYZE_ROOT")
# 找最新的 dataflow 报告
dataflow_report = None
if structure_ok:
dataflow_files = sorted(analyze_root.glob("dataflow_*.md"), reverse=True)
if dataflow_files:
dataflow_report = dataflow_files[0]
# 找最新的 consistency 报告
consistency_report = None
if consistency_ok:
consistency_files = sorted(report_root.glob("consistency_check_*.md"), reverse=True)
if consistency_files:
consistency_report = consistency_files[0]
if not dataflow_report and not consistency_report:
print("⚠️ 没有找到任何报告文件,跳过合并")
return None
# 合并
lines: list[str] = []
lines.append("# ETL 统一分析报告")
lines.append("")
lines.append(f"生成时间: {NOW.strftime('%Y-%m-%d %H:%M:%S')} CST")
lines.append(f"模式: 结构分析{'' if structure_ok else ''} + 一致性检查{'' if consistency_ok else ''}")
lines.append("")
lines.append("---")
lines.append("")
if dataflow_report:
lines.append("# 第一部分:数据流结构分析")
lines.append("")
content = dataflow_report.read_text(encoding="utf-8")
# 跳过原报告的一级标题行(避免重复标题)
for line in content.splitlines():
if line.startswith("# ") and "数据流" in line:
continue
lines.append(line)
lines.append("")
lines.append("---")
lines.append("")
if consistency_report:
lines.append("# 第二部分ETL 数据一致性检查")
lines.append("")
content = consistency_report.read_text(encoding="utf-8")
for line in content.splitlines():
if line.startswith("# ") and "一致性" in line:
continue
lines.append(line)
lines.append("")
out_file = report_root / f"etl_unified_analysis_{TS}.md"
out_file.write_text("\n".join(lines), encoding="utf-8")
return out_file
def main() -> None:
parser = build_parser()
args = parser.parse_args()
print(f"{'='*60}")
print(f"ETL 统一分析")
print(f"{'='*60}")
print(f" 模式: {args.mode}")
print(f" 数据源: {args.source}")
print(f" 日期范围: {args.date_from or '默认60天前'} ~ {args.date_to or '今天'}")
print(f"{'='*60}")
structure_ok = False
consistency_ok = False
any_failure = False
# 结构分析
if args.mode in ("structure", "full"):
rc = run_structure_analysis(args)
structure_ok = rc == 0
if rc != 0:
any_failure = True
if args.mode == "structure":
sys.exit(rc)
# 一致性检查
if args.mode in ("consistency", "full"):
rc = run_consistency_check(args)
consistency_ok = rc == 0
if rc != 0:
any_failure = True
if args.mode == "consistency":
sys.exit(rc)
# 合并报告(仅 full 模式且至少有一个成功)
if args.mode == "full" and (structure_ok or consistency_ok):
print(f"\n{'='*60}")
print(f"[报告合并] 开始合并...")
print(f"{'='*60}")
merged = merge_reports(structure_ok, consistency_ok)
if merged:
print(f"\n✅ 统一报告已生成: {merged}")
else:
print(f"\n⚠️ 报告合并失败")
# 最终摘要
print(f"\n{'='*60}")
print(f"ETL 统一分析完成")
print(f"{'='*60}")
if args.mode in ("structure", "full"):
print(f" 结构分析: {'✅ 成功' if structure_ok else '❌ 失败'}")
if args.mode in ("consistency", "full"):
print(f" 一致性检查: {'✅ 成功' if consistency_ok else '❌ 失败'}")
print(f"{'='*60}")
if any_failure:
sys.exit(1)
if __name__ == "__main__":
main()