在前后端开发联调前 的提交20260223

This commit is contained in:
Neo
2026-02-23 23:02:20 +08:00
parent 254ccb1e77
commit fafc95e64c
1142 changed files with 10366960 additions and 36957 deletions

View File

@@ -105,7 +105,13 @@ def _iter_rows(
def _build_report_path(out_arg: str | None) -> Path:
if out_arg:
return Path(out_arg)
reports_dir = PROJECT_ROOT / "reports"
env_root = os.environ.get("ETL_REPORT_ROOT")
if not env_root:
raise KeyError(
"环境变量 ETL_REPORT_ROOT 未定义。"
"请在根 .env 中配置,参考 docs/deployment/EXPORT-PATHS.md"
)
reports_dir = Path(env_root)
reports_dir.mkdir(parents=True, exist_ok=True)
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
return reports_dir / f"ods_content_hash_check_{ts}.json"

View File

@@ -986,7 +986,7 @@ def main() -> int:
else:
tag = f"_{args.tag}" if args.tag else ""
stamp = datetime.now(tz).strftime("%Y%m%d_%H%M%S")
out_path = PROJECT_ROOT / "reports" / f"ods_gap_check{tag}_{stamp}.json"
out_path = Path(os.environ.get("ETL_REPORT_ROOT", PROJECT_ROOT / "reports")) / f"ods_gap_check{tag}_{stamp}.json"
out_path.parent.mkdir(parents=True, exist_ok=True)
out_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
logger.info("REPORT_WRITTEN path=%s", out_path)

View File

@@ -69,7 +69,9 @@ def _load_ods_columns(dsn: str) -> Dict[str, Set[str]]:
def main() -> None:
"""主流程:遍历 FILE_MAPPING 中的 ODS 表,检查 JSON 键覆盖情况并打印报告。"""
dsn = os.environ.get("PG_DSN")
json_dir = pathlib.Path(os.environ.get("JSON_DOC_DIR", "export/test-json-doc"))
json_dir = pathlib.Path(os.environ.get("JSON_DOC_DIR") or os.environ.get("ODS_JSON_DOC_DIR", ""))
if not str(json_dir):
raise KeyError("环境变量 JSON_DOC_DIR 或 ODS_JSON_DOC_DIR 未定义。请在根 .env 中配置。")
ods_cols_map = _load_ods_columns(dsn)

View File

@@ -187,7 +187,10 @@ def main():
print(f"总计: {len(results)} 个表, {issues} 个有 JSON→MD 缺失")
# 输出 JSON 格式供后续处理
out_path = os.path.join("docs", "reports", "json_vs_md_gaps.json")
_report_root = os.environ.get("ETL_REPORT_ROOT")
if not _report_root:
raise KeyError("环境变量 ETL_REPORT_ROOT 未定义。请在根 .env 中配置。")
out_path = os.path.join(_report_root, "json_vs_md_gaps.json")
with open(out_path, "w", encoding="utf-8") as f:
json.dump(results, f, ensure_ascii=False, indent=2)
print(f"\n详细结果已写入: {out_path}")

View File

@@ -244,7 +244,10 @@ def main():
conn.close()
# ── 输出 JSON 报告 ──
report_json = os.path.join("docs", "reports", "api_ods_comparison.json")
_report_root = os.environ.get("ETL_REPORT_ROOT")
if not _report_root:
raise KeyError("环境变量 ETL_REPORT_ROOT 未定义。请在根 .env 中配置。")
report_json = os.path.join(_report_root, "api_ods_comparison.json")
os.makedirs(os.path.dirname(report_json), exist_ok=True)
# 序列化时把 tuple 转 list
json_results = []
@@ -261,7 +264,7 @@ def main():
json.dump(json_results, f, ensure_ascii=False, indent=2)
# ── 输出 Markdown 报告 ──
report_md = os.path.join("docs", "reports", "api_ods_comparison.md")
report_md = os.path.join(_report_root, "api_ods_comparison.md")
with open(report_md, "w", encoding="utf-8") as f:
f.write("# API JSON 字段 vs ODS 表列 对比报告\n\n")
f.write("> 自动生成于 2026-02-13 | 数据来源:数据库实际表结构 + API 参考文档\n")

View File

@@ -407,7 +407,10 @@ def main():
alter_sqls = generate_alter_sql(results, ods_tables)
# 输出 JSON 报告
json_path = os.path.join(ROOT, "docs", "reports", "api_ods_comparison_v2.json")
_report_root = os.environ.get("ETL_REPORT_ROOT")
if not _report_root:
raise KeyError("环境变量 ETL_REPORT_ROOT 未定义。请在根 .env 中配置。")
json_path = os.path.join(_report_root, "api_ods_comparison_v2.json")
os.makedirs(os.path.dirname(json_path), exist_ok=True)
with open(json_path, "w", encoding="utf-8") as f:
json.dump(results, f, ensure_ascii=False, indent=2)
@@ -415,7 +418,7 @@ def main():
# 输出 Markdown 报告
md_report = generate_markdown_report(results, alter_sqls)
md_path = os.path.join(ROOT, "docs", "reports", "api_ods_comparison_v2.md")
md_path = os.path.join(_report_root, "api_ods_comparison_v2.md")
with open(md_path, "w", encoding="utf-8") as f:
f.write(md_report)
print(f"Markdown 报告: {md_path}")

View File

@@ -319,7 +319,7 @@ def main():
print(f"\n ⚠️ {entry['table']}{entry['note']} (ODS字段数: {entry['ods_count']})")
# JSON 输出
json_path = Path("docs/reports/ods_vs_summary_comparison_v2.json")
json_path = Path(os.environ.get("ETL_REPORT_ROOT", "reports")) / "ods_vs_summary_comparison_v2.json"
json_path.parent.mkdir(parents=True, exist_ok=True)
with open(json_path, "w", encoding="utf-8") as f:
json.dump(report, f, ensure_ascii=False, indent=2)

View File

@@ -845,7 +845,10 @@ def main():
md_content = generate_report(report, coupling)
# 确定输出路径
reports_dir = root / "docs" / "reports"
_report_root = os.environ.get("ETL_REPORT_ROOT")
if not _report_root:
raise KeyError("环境变量 ETL_REPORT_ROOT 未定义。请在根 .env 中配置。")
reports_dir = Path(_report_root)
reports_dir.mkdir(parents=True, exist_ok=True)
if args.output:

View File

@@ -31,7 +31,10 @@ from typing import Any
_SCRIPT_DIR = Path(__file__).resolve().parent
_FEIQIU_ROOT = _SCRIPT_DIR.parent.parent # apps/etl/connectors/feiqiu
_OUTPUT_DIR = _SCRIPT_DIR / "output"
_REPORTS_DIR = _FEIQIU_ROOT / "docs" / "reports"
_etl_report_root = os.environ.get("ETL_REPORT_ROOT")
if not _etl_report_root:
raise KeyError("环境变量 ETL_REPORT_ROOT 未定义。请在根 .env 中配置。")
_REPORTS_DIR = Path(_etl_report_root)
# ---------------------------------------------------------------------------

View File

@@ -17,6 +17,7 @@ from __future__ import annotations
import argparse
import json
import logging
import os
import sys
import time
import traceback
@@ -1148,7 +1149,10 @@ def run_blackbox_check(
logger.info("JSON 报告: %s", json_path)
# 输出 Markdown
reports_dir = _FEIQIU_ROOT / "docs" / "reports"
_report_root = os.environ.get("ETL_REPORT_ROOT")
if not _report_root:
raise KeyError("环境变量 ETL_REPORT_ROOT 未定义。请在根 .env 中配置。")
reports_dir = Path(_report_root)
reports_dir.mkdir(parents=True, exist_ok=True)
md_path = reports_dir / f"blackbox_report_{ts}.md"
md_content = _generate_markdown_report(report)

View File

@@ -170,7 +170,7 @@ def check_invalid_flow_rejection() -> DebugResult:
config=_stub_config, task_executor=None, task_registry=None,
db_conn=None, api_client=None, logger=logging.getLogger("test"),
)
runner.run(pipeline=name)
runner.run(flow=name)
errors_missed.append(name)
except ValueError as exc:
errors_raised.append({"name": name, "error": str(exc)})
@@ -738,8 +738,8 @@ def check_cli_mode_detection() -> DebugResult:
"""验证 CLI 的 Flow 模式 vs 传统模式判断逻辑。
通过检查 main() 源码确认:
- 有 --pipeline 参数 → Flow 模式(使用 FlowRunner
- 无 --pipeline 参数 → 传统模式(使用 TaskExecutor.run_tasks
- 有 --flow 参数 → Flow 模式(使用 FlowRunner
- 无 --flow 参数 → 传统模式(使用 TaskExecutor.run_tasks
"""
import inspect
from cli.main import main as cli_main
@@ -753,14 +753,14 @@ def check_cli_mode_detection() -> DebugResult:
source = inspect.getsource(cli_main)
# 检查 Flow 模式分支
if "args.pipeline" in source and "FlowRunner" in source:
checks.append("✓ 有 --pipeline 参数时使用 FlowRunnerFlow 模式)")
if "args.flow" in source and "FlowRunner" in source:
checks.append("✓ 有 --flow 参数时使用 FlowRunnerFlow 模式)")
else:
issues.append("未找到 Flow 模式分支args.pipeline + FlowRunner")
issues.append("未找到 Flow 模式分支args.flow + FlowRunner")
# 检查传统模式分支
if "run_tasks" in source:
checks.append("✓ 无 --pipeline 参数时使用 run_tasks传统模式")
checks.append("✓ 无 --flow 参数时使用 run_tasks传统模式")
else:
issues.append("未找到传统模式分支run_tasks")
@@ -814,13 +814,13 @@ def check_cli_flow_choices() -> DebugResult:
cli_choices = set(FlowRunner.FLOW_LAYERS.keys())
issues.append("无法导入 FLOW_CHOICES使用 FLOW_LAYERS 键集合")
pipeline_keys = set(FlowRunner.FLOW_LAYERS.keys())
flow_keys = set(FlowRunner.FLOW_LAYERS.keys())
missing_in_cli = pipeline_keys - cli_choices
extra_in_cli = cli_choices - pipeline_keys
missing_in_cli = flow_keys - cli_choices
extra_in_cli = cli_choices - flow_keys
result.details = {
"flow_layers_keys": sorted(pipeline_keys),
"flow_layers_keys": sorted(flow_keys),
"cli_choices": sorted(cli_choices),
"missing_in_cli": sorted(missing_in_cli),
"extra_in_cli": sorted(extra_in_cli),
@@ -836,7 +836,7 @@ def check_cli_flow_choices() -> DebugResult:
result.message = "; ".join(issues)
else:
result.status = "PASS"
result.message = f"CLI --flow 可选值与 FLOW_LAYERS 完全一致 ({len(pipeline_keys)} 种)"
result.message = f"CLI --flow 可选值与 FLOW_LAYERS 完全一致 ({len(flow_keys)} 种)"
result.duration_sec = round(time.monotonic() - t0, 4)
return result
@@ -847,7 +847,7 @@ def check_cli_flow_choices() -> DebugResult:
# ══════════════════════════════════════════════════════════════
def check_processing_modes() -> DebugResult:
"""验证 FlowRunner.run() 对种处理模式的分支逻辑。
"""验证 FlowRunner.run() 对种处理模式的分支逻辑。
- increment_only: 仅执行增量 ETL
- verify_only: 跳过增量 ETL直接执行校验
@@ -899,7 +899,7 @@ def check_processing_modes() -> DebugResult:
result.message = f"处理模式验证有 {len(issues)} 个问题"
else:
result.status = "PASS"
result.message = "种处理模式increment_only/verify_only/increment_verify逻辑正确"
result.message = "种处理模式increment_only/verify_only/increment_verify/full_window)逻辑正确"
result.duration_sec = round(time.monotonic() - t0, 4)
return result

View File

@@ -6,11 +6,11 @@ Debug 报告生成脚本 —— 汇总所有阶段的调试结果,生成结构
- 阶段1: 属性测试结果pytest 执行)
- 阶段2: 全量刷新 JSONscripts/debug/output/full_refresh_*.json
- 阶段3: 黑盒校验 JSONscripts/debug/output/blackbox_*.json
- 阶段4: 架构分析报告(docs/reports/architecture_report_*.md
- 阶段5: 性能分析报告(docs/reports/performance_report_*.md
- 阶段4: 架构分析报告($ETL_REPORT_ROOT/architecture_report_*.md
- 阶段5: 性能分析报告($ETL_REPORT_ROOT/performance_report_*.md
输出:
docs/reports/debug_report_YYYYMMDD.md
$ETL_REPORT_ROOT/debug_report_YYYYMMDD.md
"""
from __future__ import annotations
@@ -18,6 +18,7 @@ from __future__ import annotations
import argparse
import json
import logging
import os
import re
import sys
from dataclasses import dataclass, field
@@ -25,13 +26,26 @@ from datetime import datetime
from pathlib import Path
from typing import Any
from dotenv import load_dotenv
# 加载根 .env
load_dotenv(Path(__file__).resolve().parents[5] / ".env", override=False)
# ---------------------------------------------------------------------------
# 路径常量
# ---------------------------------------------------------------------------
SCRIPT_DIR = Path(__file__).resolve().parent
ETL_ROOT = SCRIPT_DIR.parent.parent # apps/etl/connectors/feiqiu
OUTPUT_DIR = SCRIPT_DIR / "output"
REPORTS_DIR = ETL_ROOT / "docs" / "reports"
_report_root = os.environ.get("ETL_REPORT_ROOT")
if not _report_root:
raise KeyError(
"环境变量 ETL_REPORT_ROOT 未定义。"
"请在根 .env 中配置,参考 .env.template 和 docs/deployment/EXPORT-PATHS.md"
)
REPORTS_DIR = Path(_report_root)
TESTS_DIR = ETL_ROOT / "tests" / "unit"
# 属性测试文件

View File

@@ -40,7 +40,9 @@ LIMIT = 100
SAMPLES_DIR = os.path.join("docs", "api-reference", "samples")
DOCS_DIR = os.path.join("docs", "api-reference")
REPORT_DIR = os.path.join("docs", "reports")
REPORT_DIR = os.environ.get("ETL_REPORT_ROOT")
if not REPORT_DIR:
raise KeyError("环境变量 ETL_REPORT_ROOT 未定义。请在根 .env 中配置。")
REGISTRY_PATH = os.path.join("docs", "api-reference", "api_registry.json")
HEADERS = {

View File

@@ -13,7 +13,7 @@
python -m scripts.rebuild.rebuild_db_and_run_ods_to_dwd ^
--dsn "postgresql://user:pwd@host:5432/db" ^
--store-id 1 ^
--json-dir "export/test-json-doc" ^
--json-dir "$ODS_JSON_DOC_DIR" ^
--drop-schemas
环境变量(可选):
@@ -44,7 +44,7 @@ from tasks.utility.init_schema_task import InitOdsSchemaTask
from tasks.utility.manual_ingest_task import ManualIngestTask
DEFAULT_JSON_DIR = "export/test-json-doc"
DEFAULT_JSON_DIR = os.environ.get("ODS_JSON_DOC_DIR") or os.environ.get("INGEST_SOURCE_DIR") or ""
@dataclass(frozen=True)

View File

@@ -35,7 +35,9 @@ LIMIT = 100
SAMPLES_DIR = os.path.join("docs", "api-reference", "samples")
DOCS_DIR = os.path.join("docs", "api-reference")
REPORT_DIR = os.path.join("docs", "reports")
REPORT_DIR = os.environ.get("ETL_REPORT_ROOT")
if not REPORT_DIR:
raise KeyError("环境变量 ETL_REPORT_ROOT 未定义。请在根 .env 中配置。")
HEADERS = {
"Authorization": f"Bearer {API_TOKEN}",

View File

@@ -84,7 +84,13 @@ def _fetch_pk_columns(conn, schema: str, table: str) -> list[str]:
def _build_report_path(out_arg: str | None) -> Path:
if out_arg:
return Path(out_arg)
reports_dir = PROJECT_ROOT / "reports"
env_root = os.environ.get("ETL_REPORT_ROOT")
if not env_root:
raise KeyError(
"环境变量 ETL_REPORT_ROOT 未定义。"
"请在根 .env 中配置,参考 docs/deployment/EXPORT-PATHS.md"
)
reports_dir = Path(env_root)
reports_dir.mkdir(parents=True, exist_ok=True)
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
return reports_dir / f"ods_snapshot_dedupe_{ts}.json"

View File

@@ -106,7 +106,13 @@ def _iter_rows(
def _build_report_path(out_arg: str | None) -> Path:
if out_arg:
return Path(out_arg)
reports_dir = PROJECT_ROOT / "reports"
env_root = os.environ.get("ETL_REPORT_ROOT")
if not env_root:
raise KeyError(
"环境变量 ETL_REPORT_ROOT 未定义。"
"请在根 .env 中配置,参考 docs/deployment/EXPORT-PATHS.md"
)
reports_dir = Path(env_root)
reports_dir.mkdir(parents=True, exist_ok=True)
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
return reports_dir / f"ods_content_hash_repair_{ts}.json"

View File

@@ -7,7 +7,9 @@ import json
import os
SAMPLES_DIR = os.path.join(os.path.dirname(__file__), "..", "docs", "api-reference", "samples")
REPORT_DIR = os.path.join(os.path.dirname(__file__), "..", "docs", "reports")
REPORT_DIR = os.environ.get("ETL_REPORT_ROOT")
if not REPORT_DIR:
raise KeyError("环境变量 ETL_REPORT_ROOT 未定义。请在根 .env 中配置。")
ODS_META = {"source_file", "source_endpoint", "fetched_at", "payload", "content_hash"}
NESTED_OBJECTS = {"siteprofile", "tableprofile"}

View File

@@ -24,7 +24,9 @@ from datetime import datetime
DOCS_DIR = os.path.join(os.path.dirname(__file__), "..", "docs", "api-reference")
SAMPLES_DIR = os.path.join(DOCS_DIR, "samples")
REPORT_DIR = os.path.join(os.path.dirname(__file__), "..", "docs", "reports")
REPORT_DIR = os.environ.get("ETL_REPORT_ROOT")
if not REPORT_DIR:
raise KeyError("环境变量 ETL_REPORT_ROOT 未定义。请在根 .env 中配置。")
ODS_META = {"source_file", "source_endpoint", "fetched_at", "payload", "content_hash"}
TABLES = [