在前后端开发联调前的提交20260223

2026-02-23 23:02:20 +08:00
parent 254ccb1e77
commit fafc95e64c
1142 changed files with 10366960 additions and 36957 deletions
--- a/apps/etl/connectors/feiqiu/scripts/check/check_ods_content_hash.py
+++ b/apps/etl/connectors/feiqiu/scripts/check/check_ods_content_hash.py
@@ -105,7 +105,13 @@ def _iter_rows(
 def _build_report_path(out_arg: str | None) -> Path:
    if out_arg:
        return Path(out_arg)
-    reports_dir = PROJECT_ROOT / "reports"
+    env_root = os.environ.get("ETL_REPORT_ROOT")
+    if not env_root:
+        raise KeyError(
+            "环境变量 ETL_REPORT_ROOT 未定义。"
+            "请在根 .env 中配置，参考 docs/deployment/EXPORT-PATHS.md"
+        )
+    reports_dir = Path(env_root)
    reports_dir.mkdir(parents=True, exist_ok=True)
    ts = datetime.now().strftime("%Y%m%d_%H%M%S")
    return reports_dir / f"ods_content_hash_check_{ts}.json"
--- a/apps/etl/connectors/feiqiu/scripts/check/check_ods_gaps.py
+++ b/apps/etl/connectors/feiqiu/scripts/check/check_ods_gaps.py
@@ -986,7 +986,7 @@ def main() -> int:
        else:
            tag = f"_{args.tag}" if args.tag else ""
            stamp = datetime.now(tz).strftime("%Y%m%d_%H%M%S")
-            out_path = PROJECT_ROOT / "reports" / f"ods_gap_check{tag}_{stamp}.json"
+            out_path = Path(os.environ.get("ETL_REPORT_ROOT", PROJECT_ROOT / "reports")) / f"ods_gap_check{tag}_{stamp}.json"
        out_path.parent.mkdir(parents=True, exist_ok=True)
        out_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
        logger.info("REPORT_WRITTEN path=%s", out_path)
--- a/apps/etl/connectors/feiqiu/scripts/check/check_ods_json_vs_table.py
+++ b/apps/etl/connectors/feiqiu/scripts/check/check_ods_json_vs_table.py
@@ -69,7 +69,9 @@ def _load_ods_columns(dsn: str) -> Dict[str, Set[str]]:
 def main() -> None:
    """主流程：遍历 FILE_MAPPING 中的 ODS 表，检查 JSON 键覆盖情况并打印报告。"""
    dsn = os.environ.get("PG_DSN")
-    json_dir = pathlib.Path(os.environ.get("JSON_DOC_DIR", "export/test-json-doc"))
+    json_dir = pathlib.Path(os.environ.get("JSON_DOC_DIR") or os.environ.get("ODS_JSON_DOC_DIR", ""))
+    if not str(json_dir):
+        raise KeyError("环境变量 JSON_DOC_DIR 或 ODS_JSON_DOC_DIR 未定义。请在根 .env 中配置。")

    ods_cols_map = _load_ods_columns(dsn)

--- a/apps/etl/connectors/feiqiu/scripts/check_json_vs_md.py
+++ b/apps/etl/connectors/feiqiu/scripts/check_json_vs_md.py
@@ -187,7 +187,10 @@ def main():
    print(f"总计: {len(results)} 个表, {issues} 个有 JSON→MD 缺失")

    # 输出 JSON 格式供后续处理
-    out_path = os.path.join("docs", "reports", "json_vs_md_gaps.json")
+    _report_root = os.environ.get("ETL_REPORT_ROOT")
+    if not _report_root:
+        raise KeyError("环境变量 ETL_REPORT_ROOT 未定义。请在根 .env 中配置。")
+    out_path = os.path.join(_report_root, "json_vs_md_gaps.json")
    with open(out_path, "w", encoding="utf-8") as f:
        json.dump(results, f, ensure_ascii=False, indent=2)
    print(f"\n详细结果已写入: {out_path}")
--- a/apps/etl/connectors/feiqiu/scripts/compare_api_ods.py
+++ b/apps/etl/connectors/feiqiu/scripts/compare_api_ods.py
@@ -244,7 +244,10 @@ def main():
    conn.close()

    # ── 输出 JSON 报告 ──
-    report_json = os.path.join("docs", "reports", "api_ods_comparison.json")
+    _report_root = os.environ.get("ETL_REPORT_ROOT")
+    if not _report_root:
+        raise KeyError("环境变量 ETL_REPORT_ROOT 未定义。请在根 .env 中配置。")
+    report_json = os.path.join(_report_root, "api_ods_comparison.json")
    os.makedirs(os.path.dirname(report_json), exist_ok=True)
    # 序列化时把 tuple 转 list
    json_results = []
@@ -261,7 +264,7 @@ def main():
        json.dump(json_results, f, ensure_ascii=False, indent=2)

    # ── 输出 Markdown 报告 ──
-    report_md = os.path.join("docs", "reports", "api_ods_comparison.md")
+    report_md = os.path.join(_report_root, "api_ods_comparison.md")
    with open(report_md, "w", encoding="utf-8") as f:
        f.write("# API JSON 字段 vs ODS 表列 对比报告\n\n")
        f.write("> 自动生成于 2026-02-13 | 数据来源：数据库实际表结构 + API 参考文档\n")
--- a/apps/etl/connectors/feiqiu/scripts/compare_api_ods_v2.py
+++ b/apps/etl/connectors/feiqiu/scripts/compare_api_ods_v2.py
@@ -407,7 +407,10 @@ def main():
    alter_sqls = generate_alter_sql(results, ods_tables)

    # 输出 JSON 报告
-    json_path = os.path.join(ROOT, "docs", "reports", "api_ods_comparison_v2.json")
+    _report_root = os.environ.get("ETL_REPORT_ROOT")
+    if not _report_root:
+        raise KeyError("环境变量 ETL_REPORT_ROOT 未定义。请在根 .env 中配置。")
+    json_path = os.path.join(_report_root, "api_ods_comparison_v2.json")
    os.makedirs(os.path.dirname(json_path), exist_ok=True)
    with open(json_path, "w", encoding="utf-8") as f:
        json.dump(results, f, ensure_ascii=False, indent=2)
@@ -415,7 +418,7 @@ def main():

    # 输出 Markdown 报告
    md_report = generate_markdown_report(results, alter_sqls)
-    md_path = os.path.join(ROOT, "docs", "reports", "api_ods_comparison_v2.md")
+    md_path = os.path.join(_report_root, "api_ods_comparison_v2.md")
    with open(md_path, "w", encoding="utf-8") as f:
        f.write(md_report)
    print(f"Markdown 报告: {md_path}")
--- a/apps/etl/connectors/feiqiu/scripts/compare_ods_vs_summary_v2.py
+++ b/apps/etl/connectors/feiqiu/scripts/compare_ods_vs_summary_v2.py
@@ -319,7 +319,7 @@ def main():
            print(f"\n  ⚠️  {entry['table']} — {entry['note']} (ODS字段数: {entry['ods_count']})")

    # JSON 输出
-    json_path = Path("docs/reports/ods_vs_summary_comparison_v2.json")
+    json_path = Path(os.environ.get("ETL_REPORT_ROOT", "reports")) / "ods_vs_summary_comparison_v2.json"
    json_path.parent.mkdir(parents=True, exist_ok=True)
    with open(json_path, "w", encoding="utf-8") as f:
        json.dump(report, f, ensure_ascii=False, indent=2)
--- a/apps/etl/connectors/feiqiu/scripts/debug/analyze_architecture.py
+++ b/apps/etl/connectors/feiqiu/scripts/debug/analyze_architecture.py
@@ -845,7 +845,10 @@ def main():
    md_content = generate_report(report, coupling)

    # 确定输出路径
-    reports_dir = root / "docs" / "reports"
+    _report_root = os.environ.get("ETL_REPORT_ROOT")
+    if not _report_root:
+        raise KeyError("环境变量 ETL_REPORT_ROOT 未定义。请在根 .env 中配置。")
+    reports_dir = Path(_report_root)
    reports_dir.mkdir(parents=True, exist_ok=True)

    if args.output:
--- a/apps/etl/connectors/feiqiu/scripts/debug/analyze_performance.py
+++ b/apps/etl/connectors/feiqiu/scripts/debug/analyze_performance.py
@@ -31,7 +31,10 @@ from typing import Any
 _SCRIPT_DIR = Path(__file__).resolve().parent
 _FEIQIU_ROOT = _SCRIPT_DIR.parent.parent          # apps/etl/connectors/feiqiu
 _OUTPUT_DIR = _SCRIPT_DIR / "output"
-_REPORTS_DIR = _FEIQIU_ROOT / "docs" / "reports"
+_etl_report_root = os.environ.get("ETL_REPORT_ROOT")
+if not _etl_report_root:
+    raise KeyError("环境变量 ETL_REPORT_ROOT 未定义。请在根 .env 中配置。")
+_REPORTS_DIR = Path(_etl_report_root)


 # ---------------------------------------------------------------------------
--- a/apps/etl/connectors/feiqiu/scripts/debug/debug_blackbox.py
+++ b/apps/etl/connectors/feiqiu/scripts/debug/debug_blackbox.py
@@ -17,6 +17,7 @@ from __future__ import annotations
 import argparse
 import json
 import logging
+import os
 import sys
 import time
 import traceback
@@ -1148,7 +1149,10 @@ def run_blackbox_check(
    logger.info("JSON 报告: %s", json_path)

    # 输出 Markdown
-    reports_dir = _FEIQIU_ROOT / "docs" / "reports"
+    _report_root = os.environ.get("ETL_REPORT_ROOT")
+    if not _report_root:
+        raise KeyError("环境变量 ETL_REPORT_ROOT 未定义。请在根 .env 中配置。")
+    reports_dir = Path(_report_root)
    reports_dir.mkdir(parents=True, exist_ok=True)
    md_path = reports_dir / f"blackbox_report_{ts}.md"
    md_content = _generate_markdown_report(report)
--- a/apps/etl/connectors/feiqiu/scripts/debug/debug_orchestration.py
+++ b/apps/etl/connectors/feiqiu/scripts/debug/debug_orchestration.py
@@ -170,7 +170,7 @@ def check_invalid_flow_rejection() -> DebugResult:
                config=_stub_config, task_executor=None, task_registry=None,
                db_conn=None, api_client=None, logger=logging.getLogger("test"),
            )
-            runner.run(pipeline=name)
+            runner.run(flow=name)
            errors_missed.append(name)
        except ValueError as exc:
            errors_raised.append({"name": name, "error": str(exc)})
@@ -738,8 +738,8 @@ def check_cli_mode_detection() -> DebugResult:
    """验证 CLI 的 Flow 模式 vs 传统模式判断逻辑。

    通过检查 main() 源码确认：
-    - 有 --pipeline 参数 → Flow 模式（使用 FlowRunner）
-    - 无 --pipeline 参数 → 传统模式（使用 TaskExecutor.run_tasks）
+    - 有 --flow 参数 → Flow 模式（使用 FlowRunner）
+    - 无 --flow 参数 → 传统模式（使用 TaskExecutor.run_tasks）
    """
    import inspect
    from cli.main import main as cli_main
@@ -753,14 +753,14 @@ def check_cli_mode_detection() -> DebugResult:
    source = inspect.getsource(cli_main)

    # 检查 Flow 模式分支
-    if "args.pipeline" in source and "FlowRunner" in source:
-        checks.append("✓ 有 --pipeline 参数时使用 FlowRunner（Flow 模式）")
+    if "args.flow" in source and "FlowRunner" in source:
+        checks.append("✓ 有 --flow 参数时使用 FlowRunner（Flow 模式）")
    else:
-        issues.append("未找到 Flow 模式分支（args.pipeline + FlowRunner）")
+        issues.append("未找到 Flow 模式分支（args.flow + FlowRunner）")

    # 检查传统模式分支
    if "run_tasks" in source:
-        checks.append("✓ 无 --pipeline 参数时使用 run_tasks（传统模式）")
+        checks.append("✓ 无 --flow 参数时使用 run_tasks（传统模式）")
    else:
        issues.append("未找到传统模式分支（run_tasks）")

@@ -814,13 +814,13 @@ def check_cli_flow_choices() -> DebugResult:
        cli_choices = set(FlowRunner.FLOW_LAYERS.keys())
        issues.append("无法导入 FLOW_CHOICES，使用 FLOW_LAYERS 键集合")

-    pipeline_keys = set(FlowRunner.FLOW_LAYERS.keys())
+    flow_keys = set(FlowRunner.FLOW_LAYERS.keys())

-    missing_in_cli = pipeline_keys - cli_choices
-    extra_in_cli = cli_choices - pipeline_keys
+    missing_in_cli = flow_keys - cli_choices
+    extra_in_cli = cli_choices - flow_keys

    result.details = {
-        "flow_layers_keys": sorted(pipeline_keys),
+        "flow_layers_keys": sorted(flow_keys),
        "cli_choices": sorted(cli_choices),
        "missing_in_cli": sorted(missing_in_cli),
        "extra_in_cli": sorted(extra_in_cli),
@@ -836,7 +836,7 @@ def check_cli_flow_choices() -> DebugResult:
        result.message = "; ".join(issues)
    else:
        result.status = "PASS"
-        result.message = f"CLI --flow 可选值与 FLOW_LAYERS 完全一致 ({len(pipeline_keys)} 种)"
+        result.message = f"CLI --flow 可选值与 FLOW_LAYERS 完全一致 ({len(flow_keys)} 种)"

    result.duration_sec = round(time.monotonic() - t0, 4)
    return result
@@ -847,7 +847,7 @@ def check_cli_flow_choices() -> DebugResult:
 # ══════════════════════════════════════════════════════════════

 def check_processing_modes() -> DebugResult:
-    """验证 FlowRunner.run() 对三种处理模式的分支逻辑。
+    """验证 FlowRunner.run() 对四种处理模式的分支逻辑。

    - increment_only: 仅执行增量 ETL
    - verify_only: 跳过增量 ETL，直接执行校验
@@ -899,7 +899,7 @@ def check_processing_modes() -> DebugResult:
        result.message = f"处理模式验证有 {len(issues)} 个问题"
    else:
        result.status = "PASS"
-        result.message = "三种处理模式（increment_only/verify_only/increment_verify）逻辑正确"
+        result.message = "四种处理模式（increment_only/verify_only/increment_verify/full_window）逻辑正确"

    result.duration_sec = round(time.monotonic() - t0, 4)
    return result
--- a/apps/etl/connectors/feiqiu/scripts/debug/generate_report.py
+++ b/apps/etl/connectors/feiqiu/scripts/debug/generate_report.py
@@ -6,11 +6,11 @@ Debug 报告生成脚本 —— 汇总所有阶段的调试结果，生成结构
  - 阶段1: 属性测试结果（pytest 执行）
  - 阶段2: 全量刷新 JSON（scripts/debug/output/full_refresh_*.json）
  - 阶段3: 黑盒校验 JSON（scripts/debug/output/blackbox_*.json）
-  - 阶段4: 架构分析报告（docs/reports/architecture_report_*.md）
-  - 阶段5: 性能分析报告（docs/reports/performance_report_*.md）
+  - 阶段4: 架构分析报告（$ETL_REPORT_ROOT/architecture_report_*.md）
+  - 阶段5: 性能分析报告（$ETL_REPORT_ROOT/performance_report_*.md）

 输出：
-  docs/reports/debug_report_YYYYMMDD.md
+  $ETL_REPORT_ROOT/debug_report_YYYYMMDD.md
 """

 from __future__ import annotations
@@ -18,6 +18,7 @@ from __future__ import annotations
 import argparse
 import json
 import logging
+import os
 import re
 import sys
 from dataclasses import dataclass, field
@@ -25,13 +26,26 @@ from datetime import datetime
 from pathlib import Path
 from typing import Any

+from dotenv import load_dotenv
+
+# 加载根 .env
+load_dotenv(Path(__file__).resolve().parents[5] / ".env", override=False)
+
 # ---------------------------------------------------------------------------
 # 路径常量
 # ---------------------------------------------------------------------------
 SCRIPT_DIR = Path(__file__).resolve().parent
 ETL_ROOT = SCRIPT_DIR.parent.parent          # apps/etl/connectors/feiqiu
 OUTPUT_DIR = SCRIPT_DIR / "output"
-REPORTS_DIR = ETL_ROOT / "docs" / "reports"
+
+_report_root = os.environ.get("ETL_REPORT_ROOT")
+if not _report_root:
+    raise KeyError(
+        "环境变量 ETL_REPORT_ROOT 未定义。"
+        "请在根 .env 中配置，参考 .env.template 和 docs/deployment/EXPORT-PATHS.md"
+    )
+REPORTS_DIR = Path(_report_root)
+
 TESTS_DIR = ETL_ROOT / "tests" / "unit"

 # 属性测试文件
--- a/apps/etl/connectors/feiqiu/scripts/full_api_refresh_v2.py
+++ b/apps/etl/connectors/feiqiu/scripts/full_api_refresh_v2.py
@@ -40,7 +40,9 @@ LIMIT = 100

 SAMPLES_DIR = os.path.join("docs", "api-reference", "samples")
 DOCS_DIR = os.path.join("docs", "api-reference")
-REPORT_DIR = os.path.join("docs", "reports")
+REPORT_DIR = os.environ.get("ETL_REPORT_ROOT")
+if not REPORT_DIR:
+    raise KeyError("环境变量 ETL_REPORT_ROOT 未定义。请在根 .env 中配置。")
 REGISTRY_PATH = os.path.join("docs", "api-reference", "api_registry.json")

 HEADERS = {
--- a/apps/etl/connectors/feiqiu/scripts/rebuild/rebuild_db_and_run_ods_to_dwd.py
+++ b/apps/etl/connectors/feiqiu/scripts/rebuild/rebuild_db_and_run_ods_to_dwd.py
@@ -13,7 +13,7 @@
  python -m scripts.rebuild.rebuild_db_and_run_ods_to_dwd ^
    --dsn "postgresql://user:pwd@host:5432/db" ^
    --store-id 1 ^
-    --json-dir "export/test-json-doc" ^
+    --json-dir "$ODS_JSON_DOC_DIR" ^
    --drop-schemas

 环境变量（可选）：
@@ -44,7 +44,7 @@ from tasks.utility.init_schema_task import InitOdsSchemaTask
 from tasks.utility.manual_ingest_task import ManualIngestTask


-DEFAULT_JSON_DIR = "export/test-json-doc"
+DEFAULT_JSON_DIR = os.environ.get("ODS_JSON_DOC_DIR") or os.environ.get("INGEST_SOURCE_DIR") or ""


@dataclass(frozen=True)
--- a/apps/etl/connectors/feiqiu/scripts/refresh_json_and_audit.py
+++ b/apps/etl/connectors/feiqiu/scripts/refresh_json_and_audit.py
@@ -35,7 +35,9 @@ LIMIT = 100

 SAMPLES_DIR = os.path.join("docs", "api-reference", "samples")
 DOCS_DIR = os.path.join("docs", "api-reference")
-REPORT_DIR = os.path.join("docs", "reports")
+REPORT_DIR = os.environ.get("ETL_REPORT_ROOT")
+if not REPORT_DIR:
+    raise KeyError("环境变量 ETL_REPORT_ROOT 未定义。请在根 .env 中配置。")

 HEADERS = {
    "Authorization": f"Bearer {API_TOKEN}",
--- a/apps/etl/connectors/feiqiu/scripts/repair/dedupe_ods_snapshots.py
+++ b/apps/etl/connectors/feiqiu/scripts/repair/dedupe_ods_snapshots.py
@@ -84,7 +84,13 @@ def _fetch_pk_columns(conn, schema: str, table: str) -> list[str]:
 def _build_report_path(out_arg: str | None) -> Path:
    if out_arg:
        return Path(out_arg)
-    reports_dir = PROJECT_ROOT / "reports"
+    env_root = os.environ.get("ETL_REPORT_ROOT")
+    if not env_root:
+        raise KeyError(
+            "环境变量 ETL_REPORT_ROOT 未定义。"
+            "请在根 .env 中配置，参考 docs/deployment/EXPORT-PATHS.md"
+        )
+    reports_dir = Path(env_root)
    reports_dir.mkdir(parents=True, exist_ok=True)
    ts = datetime.now().strftime("%Y%m%d_%H%M%S")
    return reports_dir / f"ods_snapshot_dedupe_{ts}.json"
--- a/apps/etl/connectors/feiqiu/scripts/repair/repair_ods_content_hash.py
+++ b/apps/etl/connectors/feiqiu/scripts/repair/repair_ods_content_hash.py
@@ -106,7 +106,13 @@ def _iter_rows(
 def _build_report_path(out_arg: str | None) -> Path:
    if out_arg:
        return Path(out_arg)
-    reports_dir = PROJECT_ROOT / "reports"
+    env_root = os.environ.get("ETL_REPORT_ROOT")
+    if not env_root:
+        raise KeyError(
+            "环境变量 ETL_REPORT_ROOT 未定义。"
+            "请在根 .env 中配置，参考 docs/deployment/EXPORT-PATHS.md"
+        )
+    reports_dir = Path(env_root)
    reports_dir.mkdir(parents=True, exist_ok=True)
    ts = datetime.now().strftime("%Y%m%d_%H%M%S")
    return reports_dir / f"ods_content_hash_repair_{ts}.json"
--- a/apps/etl/connectors/feiqiu/scripts/run_compare_v3.py
+++ b/apps/etl/connectors/feiqiu/scripts/run_compare_v3.py
@@ -7,7 +7,9 @@ import json
 import os

 SAMPLES_DIR = os.path.join(os.path.dirname(__file__), "..", "docs", "api-reference", "samples")
-REPORT_DIR = os.path.join(os.path.dirname(__file__), "..", "docs", "reports")
+REPORT_DIR = os.environ.get("ETL_REPORT_ROOT")
+if not REPORT_DIR:
+    raise KeyError("环境变量 ETL_REPORT_ROOT 未定义。请在根 .env 中配置。")
 ODS_META = {"source_file", "source_endpoint", "fetched_at", "payload", "content_hash"}
 NESTED_OBJECTS = {"siteprofile", "tableprofile"}

--- a/apps/etl/connectors/feiqiu/scripts/run_compare_v3_fixed.py
+++ b/apps/etl/connectors/feiqiu/scripts/run_compare_v3_fixed.py
@@ -24,7 +24,9 @@ from datetime import datetime

 DOCS_DIR = os.path.join(os.path.dirname(__file__), "..", "docs", "api-reference")
 SAMPLES_DIR = os.path.join(DOCS_DIR, "samples")
-REPORT_DIR = os.path.join(os.path.dirname(__file__), "..", "docs", "reports")
+REPORT_DIR = os.environ.get("ETL_REPORT_ROOT")
+if not REPORT_DIR:
+    raise KeyError("环境变量 ETL_REPORT_ROOT 未定义。请在根 .env 中配置。")
 ODS_META = {"source_file", "source_endpoint", "fetched_at", "payload", "content_hash"}

 TABLES = [