在前后端开发联调前 的提交20260223
This commit is contained in:
@@ -105,7 +105,13 @@ def _iter_rows(
|
||||
def _build_report_path(out_arg: str | None) -> Path:
|
||||
if out_arg:
|
||||
return Path(out_arg)
|
||||
reports_dir = PROJECT_ROOT / "reports"
|
||||
env_root = os.environ.get("ETL_REPORT_ROOT")
|
||||
if not env_root:
|
||||
raise KeyError(
|
||||
"环境变量 ETL_REPORT_ROOT 未定义。"
|
||||
"请在根 .env 中配置,参考 docs/deployment/EXPORT-PATHS.md"
|
||||
)
|
||||
reports_dir = Path(env_root)
|
||||
reports_dir.mkdir(parents=True, exist_ok=True)
|
||||
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
return reports_dir / f"ods_content_hash_check_{ts}.json"
|
||||
|
||||
@@ -986,7 +986,7 @@ def main() -> int:
|
||||
else:
|
||||
tag = f"_{args.tag}" if args.tag else ""
|
||||
stamp = datetime.now(tz).strftime("%Y%m%d_%H%M%S")
|
||||
out_path = PROJECT_ROOT / "reports" / f"ods_gap_check{tag}_{stamp}.json"
|
||||
out_path = Path(os.environ.get("ETL_REPORT_ROOT", PROJECT_ROOT / "reports")) / f"ods_gap_check{tag}_{stamp}.json"
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
out_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
|
||||
logger.info("REPORT_WRITTEN path=%s", out_path)
|
||||
|
||||
@@ -69,7 +69,9 @@ def _load_ods_columns(dsn: str) -> Dict[str, Set[str]]:
|
||||
def main() -> None:
|
||||
"""主流程:遍历 FILE_MAPPING 中的 ODS 表,检查 JSON 键覆盖情况并打印报告。"""
|
||||
dsn = os.environ.get("PG_DSN")
|
||||
json_dir = pathlib.Path(os.environ.get("JSON_DOC_DIR", "export/test-json-doc"))
|
||||
json_dir = pathlib.Path(os.environ.get("JSON_DOC_DIR") or os.environ.get("ODS_JSON_DOC_DIR", ""))
|
||||
if not str(json_dir):
|
||||
raise KeyError("环境变量 JSON_DOC_DIR 或 ODS_JSON_DOC_DIR 未定义。请在根 .env 中配置。")
|
||||
|
||||
ods_cols_map = _load_ods_columns(dsn)
|
||||
|
||||
|
||||
@@ -187,7 +187,10 @@ def main():
|
||||
print(f"总计: {len(results)} 个表, {issues} 个有 JSON→MD 缺失")
|
||||
|
||||
# 输出 JSON 格式供后续处理
|
||||
out_path = os.path.join("docs", "reports", "json_vs_md_gaps.json")
|
||||
_report_root = os.environ.get("ETL_REPORT_ROOT")
|
||||
if not _report_root:
|
||||
raise KeyError("环境变量 ETL_REPORT_ROOT 未定义。请在根 .env 中配置。")
|
||||
out_path = os.path.join(_report_root, "json_vs_md_gaps.json")
|
||||
with open(out_path, "w", encoding="utf-8") as f:
|
||||
json.dump(results, f, ensure_ascii=False, indent=2)
|
||||
print(f"\n详细结果已写入: {out_path}")
|
||||
|
||||
@@ -244,7 +244,10 @@ def main():
|
||||
conn.close()
|
||||
|
||||
# ── 输出 JSON 报告 ──
|
||||
report_json = os.path.join("docs", "reports", "api_ods_comparison.json")
|
||||
_report_root = os.environ.get("ETL_REPORT_ROOT")
|
||||
if not _report_root:
|
||||
raise KeyError("环境变量 ETL_REPORT_ROOT 未定义。请在根 .env 中配置。")
|
||||
report_json = os.path.join(_report_root, "api_ods_comparison.json")
|
||||
os.makedirs(os.path.dirname(report_json), exist_ok=True)
|
||||
# 序列化时把 tuple 转 list
|
||||
json_results = []
|
||||
@@ -261,7 +264,7 @@ def main():
|
||||
json.dump(json_results, f, ensure_ascii=False, indent=2)
|
||||
|
||||
# ── 输出 Markdown 报告 ──
|
||||
report_md = os.path.join("docs", "reports", "api_ods_comparison.md")
|
||||
report_md = os.path.join(_report_root, "api_ods_comparison.md")
|
||||
with open(report_md, "w", encoding="utf-8") as f:
|
||||
f.write("# API JSON 字段 vs ODS 表列 对比报告\n\n")
|
||||
f.write("> 自动生成于 2026-02-13 | 数据来源:数据库实际表结构 + API 参考文档\n")
|
||||
|
||||
@@ -407,7 +407,10 @@ def main():
|
||||
alter_sqls = generate_alter_sql(results, ods_tables)
|
||||
|
||||
# 输出 JSON 报告
|
||||
json_path = os.path.join(ROOT, "docs", "reports", "api_ods_comparison_v2.json")
|
||||
_report_root = os.environ.get("ETL_REPORT_ROOT")
|
||||
if not _report_root:
|
||||
raise KeyError("环境变量 ETL_REPORT_ROOT 未定义。请在根 .env 中配置。")
|
||||
json_path = os.path.join(_report_root, "api_ods_comparison_v2.json")
|
||||
os.makedirs(os.path.dirname(json_path), exist_ok=True)
|
||||
with open(json_path, "w", encoding="utf-8") as f:
|
||||
json.dump(results, f, ensure_ascii=False, indent=2)
|
||||
@@ -415,7 +418,7 @@ def main():
|
||||
|
||||
# 输出 Markdown 报告
|
||||
md_report = generate_markdown_report(results, alter_sqls)
|
||||
md_path = os.path.join(ROOT, "docs", "reports", "api_ods_comparison_v2.md")
|
||||
md_path = os.path.join(_report_root, "api_ods_comparison_v2.md")
|
||||
with open(md_path, "w", encoding="utf-8") as f:
|
||||
f.write(md_report)
|
||||
print(f"Markdown 报告: {md_path}")
|
||||
|
||||
@@ -319,7 +319,7 @@ def main():
|
||||
print(f"\n ⚠️ {entry['table']} — {entry['note']} (ODS字段数: {entry['ods_count']})")
|
||||
|
||||
# JSON 输出
|
||||
json_path = Path("docs/reports/ods_vs_summary_comparison_v2.json")
|
||||
json_path = Path(os.environ.get("ETL_REPORT_ROOT", "reports")) / "ods_vs_summary_comparison_v2.json"
|
||||
json_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(json_path, "w", encoding="utf-8") as f:
|
||||
json.dump(report, f, ensure_ascii=False, indent=2)
|
||||
|
||||
@@ -845,7 +845,10 @@ def main():
|
||||
md_content = generate_report(report, coupling)
|
||||
|
||||
# 确定输出路径
|
||||
reports_dir = root / "docs" / "reports"
|
||||
_report_root = os.environ.get("ETL_REPORT_ROOT")
|
||||
if not _report_root:
|
||||
raise KeyError("环境变量 ETL_REPORT_ROOT 未定义。请在根 .env 中配置。")
|
||||
reports_dir = Path(_report_root)
|
||||
reports_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if args.output:
|
||||
|
||||
@@ -31,7 +31,10 @@ from typing import Any
|
||||
_SCRIPT_DIR = Path(__file__).resolve().parent
|
||||
_FEIQIU_ROOT = _SCRIPT_DIR.parent.parent # apps/etl/connectors/feiqiu
|
||||
_OUTPUT_DIR = _SCRIPT_DIR / "output"
|
||||
_REPORTS_DIR = _FEIQIU_ROOT / "docs" / "reports"
|
||||
_etl_report_root = os.environ.get("ETL_REPORT_ROOT")
|
||||
if not _etl_report_root:
|
||||
raise KeyError("环境变量 ETL_REPORT_ROOT 未定义。请在根 .env 中配置。")
|
||||
_REPORTS_DIR = Path(_etl_report_root)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -17,6 +17,7 @@ from __future__ import annotations
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import traceback
|
||||
@@ -1148,7 +1149,10 @@ def run_blackbox_check(
|
||||
logger.info("JSON 报告: %s", json_path)
|
||||
|
||||
# 输出 Markdown
|
||||
reports_dir = _FEIQIU_ROOT / "docs" / "reports"
|
||||
_report_root = os.environ.get("ETL_REPORT_ROOT")
|
||||
if not _report_root:
|
||||
raise KeyError("环境变量 ETL_REPORT_ROOT 未定义。请在根 .env 中配置。")
|
||||
reports_dir = Path(_report_root)
|
||||
reports_dir.mkdir(parents=True, exist_ok=True)
|
||||
md_path = reports_dir / f"blackbox_report_{ts}.md"
|
||||
md_content = _generate_markdown_report(report)
|
||||
|
||||
@@ -170,7 +170,7 @@ def check_invalid_flow_rejection() -> DebugResult:
|
||||
config=_stub_config, task_executor=None, task_registry=None,
|
||||
db_conn=None, api_client=None, logger=logging.getLogger("test"),
|
||||
)
|
||||
runner.run(pipeline=name)
|
||||
runner.run(flow=name)
|
||||
errors_missed.append(name)
|
||||
except ValueError as exc:
|
||||
errors_raised.append({"name": name, "error": str(exc)})
|
||||
@@ -738,8 +738,8 @@ def check_cli_mode_detection() -> DebugResult:
|
||||
"""验证 CLI 的 Flow 模式 vs 传统模式判断逻辑。
|
||||
|
||||
通过检查 main() 源码确认:
|
||||
- 有 --pipeline 参数 → Flow 模式(使用 FlowRunner)
|
||||
- 无 --pipeline 参数 → 传统模式(使用 TaskExecutor.run_tasks)
|
||||
- 有 --flow 参数 → Flow 模式(使用 FlowRunner)
|
||||
- 无 --flow 参数 → 传统模式(使用 TaskExecutor.run_tasks)
|
||||
"""
|
||||
import inspect
|
||||
from cli.main import main as cli_main
|
||||
@@ -753,14 +753,14 @@ def check_cli_mode_detection() -> DebugResult:
|
||||
source = inspect.getsource(cli_main)
|
||||
|
||||
# 检查 Flow 模式分支
|
||||
if "args.pipeline" in source and "FlowRunner" in source:
|
||||
checks.append("✓ 有 --pipeline 参数时使用 FlowRunner(Flow 模式)")
|
||||
if "args.flow" in source and "FlowRunner" in source:
|
||||
checks.append("✓ 有 --flow 参数时使用 FlowRunner(Flow 模式)")
|
||||
else:
|
||||
issues.append("未找到 Flow 模式分支(args.pipeline + FlowRunner)")
|
||||
issues.append("未找到 Flow 模式分支(args.flow + FlowRunner)")
|
||||
|
||||
# 检查传统模式分支
|
||||
if "run_tasks" in source:
|
||||
checks.append("✓ 无 --pipeline 参数时使用 run_tasks(传统模式)")
|
||||
checks.append("✓ 无 --flow 参数时使用 run_tasks(传统模式)")
|
||||
else:
|
||||
issues.append("未找到传统模式分支(run_tasks)")
|
||||
|
||||
@@ -814,13 +814,13 @@ def check_cli_flow_choices() -> DebugResult:
|
||||
cli_choices = set(FlowRunner.FLOW_LAYERS.keys())
|
||||
issues.append("无法导入 FLOW_CHOICES,使用 FLOW_LAYERS 键集合")
|
||||
|
||||
pipeline_keys = set(FlowRunner.FLOW_LAYERS.keys())
|
||||
flow_keys = set(FlowRunner.FLOW_LAYERS.keys())
|
||||
|
||||
missing_in_cli = pipeline_keys - cli_choices
|
||||
extra_in_cli = cli_choices - pipeline_keys
|
||||
missing_in_cli = flow_keys - cli_choices
|
||||
extra_in_cli = cli_choices - flow_keys
|
||||
|
||||
result.details = {
|
||||
"flow_layers_keys": sorted(pipeline_keys),
|
||||
"flow_layers_keys": sorted(flow_keys),
|
||||
"cli_choices": sorted(cli_choices),
|
||||
"missing_in_cli": sorted(missing_in_cli),
|
||||
"extra_in_cli": sorted(extra_in_cli),
|
||||
@@ -836,7 +836,7 @@ def check_cli_flow_choices() -> DebugResult:
|
||||
result.message = "; ".join(issues)
|
||||
else:
|
||||
result.status = "PASS"
|
||||
result.message = f"CLI --flow 可选值与 FLOW_LAYERS 完全一致 ({len(pipeline_keys)} 种)"
|
||||
result.message = f"CLI --flow 可选值与 FLOW_LAYERS 完全一致 ({len(flow_keys)} 种)"
|
||||
|
||||
result.duration_sec = round(time.monotonic() - t0, 4)
|
||||
return result
|
||||
@@ -847,7 +847,7 @@ def check_cli_flow_choices() -> DebugResult:
|
||||
# ══════════════════════════════════════════════════════════════
|
||||
|
||||
def check_processing_modes() -> DebugResult:
|
||||
"""验证 FlowRunner.run() 对三种处理模式的分支逻辑。
|
||||
"""验证 FlowRunner.run() 对四种处理模式的分支逻辑。
|
||||
|
||||
- increment_only: 仅执行增量 ETL
|
||||
- verify_only: 跳过增量 ETL,直接执行校验
|
||||
@@ -899,7 +899,7 @@ def check_processing_modes() -> DebugResult:
|
||||
result.message = f"处理模式验证有 {len(issues)} 个问题"
|
||||
else:
|
||||
result.status = "PASS"
|
||||
result.message = "三种处理模式(increment_only/verify_only/increment_verify)逻辑正确"
|
||||
result.message = "四种处理模式(increment_only/verify_only/increment_verify/full_window)逻辑正确"
|
||||
|
||||
result.duration_sec = round(time.monotonic() - t0, 4)
|
||||
return result
|
||||
|
||||
@@ -6,11 +6,11 @@ Debug 报告生成脚本 —— 汇总所有阶段的调试结果,生成结构
|
||||
- 阶段1: 属性测试结果(pytest 执行)
|
||||
- 阶段2: 全量刷新 JSON(scripts/debug/output/full_refresh_*.json)
|
||||
- 阶段3: 黑盒校验 JSON(scripts/debug/output/blackbox_*.json)
|
||||
- 阶段4: 架构分析报告(docs/reports/architecture_report_*.md)
|
||||
- 阶段5: 性能分析报告(docs/reports/performance_report_*.md)
|
||||
- 阶段4: 架构分析报告($ETL_REPORT_ROOT/architecture_report_*.md)
|
||||
- 阶段5: 性能分析报告($ETL_REPORT_ROOT/performance_report_*.md)
|
||||
|
||||
输出:
|
||||
docs/reports/debug_report_YYYYMMDD.md
|
||||
$ETL_REPORT_ROOT/debug_report_YYYYMMDD.md
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -18,6 +18,7 @@ from __future__ import annotations
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from dataclasses import dataclass, field
|
||||
@@ -25,13 +26,26 @@ from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# 加载根 .env
|
||||
load_dotenv(Path(__file__).resolve().parents[5] / ".env", override=False)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 路径常量
|
||||
# ---------------------------------------------------------------------------
|
||||
SCRIPT_DIR = Path(__file__).resolve().parent
|
||||
ETL_ROOT = SCRIPT_DIR.parent.parent # apps/etl/connectors/feiqiu
|
||||
OUTPUT_DIR = SCRIPT_DIR / "output"
|
||||
REPORTS_DIR = ETL_ROOT / "docs" / "reports"
|
||||
|
||||
_report_root = os.environ.get("ETL_REPORT_ROOT")
|
||||
if not _report_root:
|
||||
raise KeyError(
|
||||
"环境变量 ETL_REPORT_ROOT 未定义。"
|
||||
"请在根 .env 中配置,参考 .env.template 和 docs/deployment/EXPORT-PATHS.md"
|
||||
)
|
||||
REPORTS_DIR = Path(_report_root)
|
||||
|
||||
TESTS_DIR = ETL_ROOT / "tests" / "unit"
|
||||
|
||||
# 属性测试文件
|
||||
|
||||
@@ -40,7 +40,9 @@ LIMIT = 100
|
||||
|
||||
SAMPLES_DIR = os.path.join("docs", "api-reference", "samples")
|
||||
DOCS_DIR = os.path.join("docs", "api-reference")
|
||||
REPORT_DIR = os.path.join("docs", "reports")
|
||||
REPORT_DIR = os.environ.get("ETL_REPORT_ROOT")
|
||||
if not REPORT_DIR:
|
||||
raise KeyError("环境变量 ETL_REPORT_ROOT 未定义。请在根 .env 中配置。")
|
||||
REGISTRY_PATH = os.path.join("docs", "api-reference", "api_registry.json")
|
||||
|
||||
HEADERS = {
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
python -m scripts.rebuild.rebuild_db_and_run_ods_to_dwd ^
|
||||
--dsn "postgresql://user:pwd@host:5432/db" ^
|
||||
--store-id 1 ^
|
||||
--json-dir "export/test-json-doc" ^
|
||||
--json-dir "$ODS_JSON_DOC_DIR" ^
|
||||
--drop-schemas
|
||||
|
||||
环境变量(可选):
|
||||
@@ -44,7 +44,7 @@ from tasks.utility.init_schema_task import InitOdsSchemaTask
|
||||
from tasks.utility.manual_ingest_task import ManualIngestTask
|
||||
|
||||
|
||||
DEFAULT_JSON_DIR = "export/test-json-doc"
|
||||
DEFAULT_JSON_DIR = os.environ.get("ODS_JSON_DOC_DIR") or os.environ.get("INGEST_SOURCE_DIR") or ""
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
|
||||
@@ -35,7 +35,9 @@ LIMIT = 100
|
||||
|
||||
SAMPLES_DIR = os.path.join("docs", "api-reference", "samples")
|
||||
DOCS_DIR = os.path.join("docs", "api-reference")
|
||||
REPORT_DIR = os.path.join("docs", "reports")
|
||||
REPORT_DIR = os.environ.get("ETL_REPORT_ROOT")
|
||||
if not REPORT_DIR:
|
||||
raise KeyError("环境变量 ETL_REPORT_ROOT 未定义。请在根 .env 中配置。")
|
||||
|
||||
HEADERS = {
|
||||
"Authorization": f"Bearer {API_TOKEN}",
|
||||
|
||||
@@ -84,7 +84,13 @@ def _fetch_pk_columns(conn, schema: str, table: str) -> list[str]:
|
||||
def _build_report_path(out_arg: str | None) -> Path:
|
||||
if out_arg:
|
||||
return Path(out_arg)
|
||||
reports_dir = PROJECT_ROOT / "reports"
|
||||
env_root = os.environ.get("ETL_REPORT_ROOT")
|
||||
if not env_root:
|
||||
raise KeyError(
|
||||
"环境变量 ETL_REPORT_ROOT 未定义。"
|
||||
"请在根 .env 中配置,参考 docs/deployment/EXPORT-PATHS.md"
|
||||
)
|
||||
reports_dir = Path(env_root)
|
||||
reports_dir.mkdir(parents=True, exist_ok=True)
|
||||
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
return reports_dir / f"ods_snapshot_dedupe_{ts}.json"
|
||||
|
||||
@@ -106,7 +106,13 @@ def _iter_rows(
|
||||
def _build_report_path(out_arg: str | None) -> Path:
|
||||
if out_arg:
|
||||
return Path(out_arg)
|
||||
reports_dir = PROJECT_ROOT / "reports"
|
||||
env_root = os.environ.get("ETL_REPORT_ROOT")
|
||||
if not env_root:
|
||||
raise KeyError(
|
||||
"环境变量 ETL_REPORT_ROOT 未定义。"
|
||||
"请在根 .env 中配置,参考 docs/deployment/EXPORT-PATHS.md"
|
||||
)
|
||||
reports_dir = Path(env_root)
|
||||
reports_dir.mkdir(parents=True, exist_ok=True)
|
||||
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
return reports_dir / f"ods_content_hash_repair_{ts}.json"
|
||||
|
||||
@@ -7,7 +7,9 @@ import json
|
||||
import os
|
||||
|
||||
SAMPLES_DIR = os.path.join(os.path.dirname(__file__), "..", "docs", "api-reference", "samples")
|
||||
REPORT_DIR = os.path.join(os.path.dirname(__file__), "..", "docs", "reports")
|
||||
REPORT_DIR = os.environ.get("ETL_REPORT_ROOT")
|
||||
if not REPORT_DIR:
|
||||
raise KeyError("环境变量 ETL_REPORT_ROOT 未定义。请在根 .env 中配置。")
|
||||
ODS_META = {"source_file", "source_endpoint", "fetched_at", "payload", "content_hash"}
|
||||
NESTED_OBJECTS = {"siteprofile", "tableprofile"}
|
||||
|
||||
|
||||
@@ -24,7 +24,9 @@ from datetime import datetime
|
||||
|
||||
DOCS_DIR = os.path.join(os.path.dirname(__file__), "..", "docs", "api-reference")
|
||||
SAMPLES_DIR = os.path.join(DOCS_DIR, "samples")
|
||||
REPORT_DIR = os.path.join(os.path.dirname(__file__), "..", "docs", "reports")
|
||||
REPORT_DIR = os.environ.get("ETL_REPORT_ROOT")
|
||||
if not REPORT_DIR:
|
||||
raise KeyError("环境变量 ETL_REPORT_ROOT 未定义。请在根 .env 中配置。")
|
||||
ODS_META = {"source_file", "source_endpoint", "fetched_at", "payload", "content_hash"}
|
||||
|
||||
TABLES = [
|
||||
|
||||
Reference in New Issue
Block a user