# -*- coding: utf-8 -*-
"""
ETL 统一分析 — 编排入口

合并「数据流结构分析」和「ETL 数据一致性检查」为一个统一流程。
支持三种模式：
  --mode structure    仅结构分析（analyze_dataflow + gen_dataflow_report）
  --mode consistency  仅一致性检查（etl_consistency_check）
  --mode full         全部执行（默认）

数据源：
  默认主动调 API 采集最近 60 天数据。
  --source etl-log    切换为读 ETL 落盘 JSON（一致性检查模式）

用法:
    cd C:\\NeoZQYY
    uv run python scripts/ops/etl_unified_analysis.py
    uv run python scripts/ops/etl_unified_analysis.py --mode consistency --source etl-log
    uv run python scripts/ops/etl_unified_analysis.py --mode structure --date-from 2026-01-01
"""
from __future__ import annotations

import argparse
import subprocess
import sys
from datetime import datetime
from pathlib import Path
from zoneinfo import ZoneInfo

# 确保 scripts/ops 在 sys.path 中，以便 import _env_paths
_SCRIPT_DIR = Path(__file__).resolve().parent
if str(_SCRIPT_DIR) not in sys.path:
    sys.path.insert(0, str(_SCRIPT_DIR))

from _env_paths import get_output_path, ensure_repo_root

ensure_repo_root()

TZ = ZoneInfo("Asia/Shanghai")
NOW = datetime.now(TZ)
TS = NOW.strftime("%Y%m%d_%H%M%S")


def build_parser() -> argparse.ArgumentParser:
    parser = argparse.ArgumentParser(
        description="ETL 统一分析 — 结构分析 + 数据一致性检查",
    )
    parser.add_argument(
        "--mode",
        choices=["structure", "consistency", "full"],
        default="full",
        help="执行模式：structure=仅结构分析, consistency=仅一致性检查, full=全部（默认）",
    )
    parser.add_argument(
        "--source",
        choices=["api", "etl-log"],
        default="api",
        help="数据源：api=主动调 API 采集（默认）, etl-log=读 ETL 落盘 JSON",
    )
    parser.add_argument(
        "--date-from", type=str, default=None,
        help="数据获取起始日期 (YYYY-MM-DD)，默认 60 天前",
    )
    parser.add_argument(
        "--date-to", type=str, default=None,
        help="数据获取截止日期 (YYYY-MM-DD)，默认今天",
    )
    parser.add_argument(
        "--limit", type=int, default=200,
        help="每端点最大记录数（默认 200）",
    )
    parser.add_argument(
        "--tables", type=str, default=None,
        help="要分析的表名列表（逗号分隔，缺省=全部）",
    )
    return parser


def run_subprocess(cmd: list[str], label: str) -> int:
    """运行子进程，实时输出 stdout/stderr，返回 exit code"""
    print(f"\n{'='*60}")
    print(f"[{label}] 开始执行...")
    print(f"  命令: {' '.join(cmd)}")
    print(f"{'='*60}\n")

    proc = subprocess.run(
        cmd,
        cwd=str(Path(__file__).resolve().parents[2]),  # 项目根目录
    )
    if proc.returncode != 0:
        print(f"\n❌ [{label}] 执行失败 (exit code: {proc.returncode})")
    else:
        print(f"\n✅ [{label}] 执行完成")
    return proc.returncode


def run_structure_analysis(args: argparse.Namespace) -> int:
    """执行数据流结构分析（采集 + 报告生成）"""
    from datetime import date, timedelta

    # 阶段 1：数据采集
    cmd = [sys.executable, "scripts/ops/analyze_dataflow.py"]
    if args.date_from:
        cmd += ["--date-from", args.date_from]
    else:
        # 默认 60 天
        default_from = (date.today() - timedelta(days=60)).isoformat()
        cmd += ["--date-from", default_from]
    if args.date_to:
        cmd += ["--date-to", args.date_to]
    if args.limit:
        cmd += ["--limit", str(args.limit)]
    if args.tables:
        cmd += ["--tables", args.tables]

    rc = run_subprocess(cmd, "数据流采集")
    if rc != 0:
        return rc

    # 阶段 2：报告生成
    cmd2 = [sys.executable, "scripts/ops/gen_dataflow_report.py"]
    rc2 = run_subprocess(cmd2, "数据流报告生成")
    return rc2


def run_consistency_check(args: argparse.Namespace) -> int:
    """执行 ETL 数据一致性检查"""
    cmd = [sys.executable, "scripts/ops/etl_consistency_check.py"]
    rc = run_subprocess(cmd, "ETL 数据一致性检查")
    return rc


def merge_reports(structure_ok: bool, consistency_ok: bool) -> Path | None:
    """合并两份报告为一份统一报告"""
    report_root = get_output_path("ETL_REPORT_ROOT")
    analyze_root = get_output_path("SYSTEM_ANALYZE_ROOT")

    # 找最新的 dataflow 报告
    dataflow_report = None
    if structure_ok:
        dataflow_files = sorted(analyze_root.glob("dataflow_*.md"), reverse=True)
        if dataflow_files:
            dataflow_report = dataflow_files[0]

    # 找最新的 consistency 报告
    consistency_report = None
    if consistency_ok:
        consistency_files = sorted(report_root.glob("consistency_check_*.md"), reverse=True)
        if consistency_files:
            consistency_report = consistency_files[0]

    if not dataflow_report and not consistency_report:
        print("⚠️ 没有找到任何报告文件，跳过合并")
        return None

    # 合并
    lines: list[str] = []
    lines.append("# ETL 统一分析报告")
    lines.append("")
    lines.append(f"生成时间: {NOW.strftime('%Y-%m-%d %H:%M:%S')} CST")
    lines.append(f"模式: 结构分析{'✅' if structure_ok else '❌'} + 一致性检查{'✅' if consistency_ok else '❌'}")
    lines.append("")
    lines.append("---")
    lines.append("")

    if dataflow_report:
        lines.append("# 第一部分：数据流结构分析")
        lines.append("")
        content = dataflow_report.read_text(encoding="utf-8")
        # 跳过原报告的一级标题行（避免重复标题）
        for line in content.splitlines():
            if line.startswith("# ") and "数据流" in line:
                continue
            lines.append(line)
        lines.append("")
        lines.append("---")
        lines.append("")

    if consistency_report:
        lines.append("# 第二部分：ETL 数据一致性检查")
        lines.append("")
        content = consistency_report.read_text(encoding="utf-8")
        for line in content.splitlines():
            if line.startswith("# ") and "一致性" in line:
                continue
            lines.append(line)
        lines.append("")

    out_file = report_root / f"etl_unified_analysis_{TS}.md"
    out_file.write_text("\n".join(lines), encoding="utf-8")
    return out_file


def main() -> None:
    parser = build_parser()
    args = parser.parse_args()

    print(f"{'='*60}")
    print(f"ETL 统一分析")
    print(f"{'='*60}")
    print(f"  模式:     {args.mode}")
    print(f"  数据源:   {args.source}")
    print(f"  日期范围: {args.date_from or '默认60天前'} ~ {args.date_to or '今天'}")
    print(f"{'='*60}")

    structure_ok = False
    consistency_ok = False
    any_failure = False

    # 结构分析
    if args.mode in ("structure", "full"):
        rc = run_structure_analysis(args)
        structure_ok = rc == 0
        if rc != 0:
            any_failure = True
            if args.mode == "structure":
                sys.exit(rc)

    # 一致性检查
    if args.mode in ("consistency", "full"):
        rc = run_consistency_check(args)
        consistency_ok = rc == 0
        if rc != 0:
            any_failure = True
            if args.mode == "consistency":
                sys.exit(rc)

    # 合并报告（仅 full 模式且至少有一个成功）
    if args.mode == "full" and (structure_ok or consistency_ok):
        print(f"\n{'='*60}")
        print(f"[报告合并] 开始合并...")
        print(f"{'='*60}")
        merged = merge_reports(structure_ok, consistency_ok)
        if merged:
            print(f"\n✅ 统一报告已生成: {merged}")
        else:
            print(f"\n⚠️ 报告合并失败")

    # 最终摘要
    print(f"\n{'='*60}")
    print(f"ETL 统一分析完成")
    print(f"{'='*60}")
    if args.mode in ("structure", "full"):
        print(f"  结构分析:   {'✅ 成功' if structure_ok else '❌ 失败'}")
    if args.mode in ("consistency", "full"):
        print(f"  一致性检查: {'✅ 成功' if consistency_ok else '❌ 失败'}")
    print(f"{'='*60}")

    if any_failure:
        sys.exit(1)


if __name__ == "__main__":
    main()