feat: 累积功能变更 — 聊天集成、租户管理、小程序更新、ETL 增强、迁移脚本

包含多个会话的累积代码变更： - backend: AI 聊天服务、触发器调度、认证增强、WebSocket、调度器最小间隔 - admin-web: ETL 状态页、任务管理、调度配置、登录优化 - miniprogram: 看板页面、聊天集成、UI 组件、导航更新 - etl: DWS 新任务（finance_area_daily/board_cache）、连接器增强 - tenant-admin: 项目初始化 - db: 19 个迁移脚本（etl_feiqiu 11 + zqyy_app 8） - packages/shared: 枚举和工具函数更新 - tools: 数据库工具、报表生成、健康检查 - docs: PRD/架构/部署/合约文档更新 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-06 00:03:48 +08:00
parent 70324d8542
commit 6f8f12314f
515 changed files with 76604 additions and 7456 deletions
--- a/tools/reporting/dataflow_analyzer.py
+++ b/tools/reporting/dataflow_analyzer.py
--- a/tools/reporting/gen_dataflow_doc.py
+++ b/tools/reporting/gen_dataflow_doc.py
@@ -0,0 +1,340 @@
+# -*- coding: utf-8 -*-
+"""
+从源代码和 DDL 中提取 API → ODS → DWD 数据流映射，生成 Markdown 文档。
+用法: python scripts/ops/gen_dataflow_doc.py
+输出: $FULL_DATAFLOW_DOC_ROOT/dataflow_api_ods_dwd.md（由 .env 配置）
+"""
+import re
+import ast
+import sys
+import os
+from pathlib import Path
+from collections import OrderedDict
+
+ROOT = Path(__file__).resolve().parents[2]
+ETL = ROOT / "apps" / "etl" / "pipelines" / "feiqiu"
+DB = ROOT / "db" / "etl_feiqiu" / "schemas"
+from _env_paths import get_output_path as _get_path
+OUT = _get_path("FULL_DATAFLOW_DOC_ROOT") / "dataflow_api_ods_dwd.md"
+
+
+# ── 1. 从 DDL 解析表结构 ──────────────────────────────────────────
+def parse_ddl_tables(sql_path: Path, schema: str) -> dict[str, list[dict]]:
+    """解析 CREATE TABLE 语句，返回 {schema.table: [{col, type}, ...]}"""
+    text = sql_path.read_text(encoding="utf-8")
+    tables: dict[str, list[dict]] = {}
+    # 匹配 CREATE TABLE IF NOT EXISTS table_name (...)
+    pattern = re.compile(
+        r"CREATE\s+TABLE\s+IF\s+NOT\s+EXISTS\s+"
+        r"(?:(\w+)\.)?(\w+)\s*\((.*?)\)\s*;",
+        re.DOTALL | re.IGNORECASE,
+    )
+    for m in pattern.finditer(text):
+        s = m.group(1) or schema
+        tname = m.group(2)
+        body = m.group(3)
+        cols = []
+        for line in body.split("\n"):
+            line = line.strip().rstrip(",")
+            if not line or line.upper().startswith("PRIMARY") or line.startswith("--"):
+                continue
+            # 跳过约束行
+            if re.match(r"^(CONSTRAINT|UNIQUE|CHECK|FOREIGN|EXCLUDE)\b", line, re.I):
+                continue
+            parts = line.split()
+            if len(parts) >= 2:
+                col_name = parts[0].strip('"')
+                col_type = parts[1]
+                # 合并类型修饰符
+                if len(parts) > 2 and parts[2].startswith("("):
+                    col_type += parts[2]
+                cols.append({"col": col_name, "type": col_type})
+        full = f"{s}.{tname}"
+        tables[full] = cols
+    return tables
+
+
+# ── 2. 从 Python 源码解析 TABLE_MAP ──────────────────────────────
+def parse_table_map(py_path: Path) -> dict[str, str]:
+    """解析 TABLE_MAP: dict[str, str] = {...}"""
+    text = py_path.read_text(encoding="utf-8")
+    # 找到 TABLE_MAP 字典
+    match = re.search(
+        r"TABLE_MAP\s*(?::\s*dict\[.*?\])?\s*=\s*\{(.*?)\}",
+        text,
+        re.DOTALL,
+    )
+    if not match:
+        return {}
+    body = match.group(1)
+    result = {}
+    for m in re.finditer(r'"([^"]+)"\s*:\s*"([^"]+)"', body):
+        result[m.group(1)] = m.group(2)
+    return result
+
+
+# ── 3. 从 Python 源码解析 FACT_MAPPINGS ──────────────────────────
+def parse_fact_mappings(py_path: Path) -> dict[str, list[tuple]]:
+    """解析 FACT_MAPPINGS 字典，返回 {dwd_table: [(dwd_col, ods_expr, cast), ...]}"""
+    text = py_path.read_text(encoding="utf-8")
+    # 找到 FACT_MAPPINGS 块
+    start = text.find("FACT_MAPPINGS")
+    if start < 0:
+        return {}
+    # 找到第一个 { 后的内容
+    brace_start = text.find("{", start)
+    if brace_start < 0:
+        return {}
+
+    # 手动匹配大括号
+    depth = 0
+    end = brace_start
+    for i in range(brace_start, len(text)):
+        if text[i] == "{":
+            depth += 1
+        elif text[i] == "}":
+            depth -= 1
+            if depth == 0:
+                end = i + 1
+                break
+
+    block = text[brace_start:end]
+    result = {}
+    # 匹配每个表的映射列表
+    table_pattern = re.compile(r'"([^"]+)"\s*:\s*\[', re.DOTALL)
+    for tm in table_pattern.finditer(block):
+        table_name = tm.group(1)
+        list_start = tm.end()
+        # 找到对应的 ]
+        bracket_depth = 1
+        list_end = list_start
+        for i in range(list_start, len(block)):
+            if block[i] == "[":
+                bracket_depth += 1
+            elif block[i] == "]":
+                bracket_depth -= 1
+                if bracket_depth == 0:
+                    list_end = i
+                    break
+        list_body = block[list_start:list_end]
+        # 匹配 (dwd_col, ods_expr, cast|None)
+        tuples = []
+        tuple_pattern = re.compile(
+            r'\(\s*"([^"]+)"\s*,\s*"([^"]+)"\s*,\s*(?:"([^"]+)"|None)\s*\)'
+        )
+        for tp in tuple_pattern.finditer(list_body):
+            tuples.append((tp.group(1), tp.group(2), tp.group(3)))
+        result[table_name] = tuples
+    return result
+
+
+# ── 4. 从 Python 源码解析 ODS_TASK_SPECS ─────────────────────────
+def parse_ods_specs(py_path: Path) -> list[dict]:
+    """解析 ODS_TASK_SPECS，提取 code, table_name, endpoint, list_key, description"""
+    text = py_path.read_text(encoding="utf-8")
+    specs = []
+    # 匹配每个 OdsTaskSpec(...)
+    pattern = re.compile(r"OdsTaskSpec\s*\((.*?)\)\s*,", re.DOTALL)
+    for m in pattern.finditer(text):
+        body = m.group(1)
+        spec = {}
+        for key in ("code", "table_name", "endpoint", "list_key", "description"):
+            km = re.search(rf'{key}\s*=\s*"([^"]*)"', body)
+            if km:
+                spec[key] = km.group(1)
+        if "code" in spec:
+            specs.append(spec)
+    return specs
+
+
+# ── 5. 生成文档 ──────────────────────────────────────────────────
+def generate_doc():
+    ods_ddl = parse_ddl_tables(DB / "ods.sql", "ods")
+    dwd_ddl = parse_ddl_tables(DB / "dwd.sql", "dwd")
+
+    dwd_task_py = ETL / "tasks" / "dwd" / "dwd_load_task.py"
+    table_map = parse_table_map(dwd_task_py)
+    fact_mappings = parse_fact_mappings(dwd_task_py)
+
+    ods_specs = parse_ods_specs(ETL / "tasks" / "ods" / "ods_tasks.py")
+    # ODS 表 → API 端点映射
+    ods_to_api: dict[str, dict] = {}
+    for spec in ods_specs:
+        tn = spec.get("table_name", "")
+        ods_to_api[tn] = spec
+
+    lines = []
+    lines.append("# API → ODS → DWD 数据流对比文档")
+    lines.append("")
+    lines.append("> 自动生成于 `scripts/ops/gen_dataflow_doc.py`，基于 DDL 和 ETL 源码解析。")
+    lines.append("")
+    lines.append("## 概览")
+    lines.append("")
+    lines.append(f"- ODS 表数量: {len(ods_ddl)}")
+    lines.append(f"- DWD 表数量: {len(dwd_ddl)}")
+    lines.append(f"- TABLE_MAP 映射条目: {len(table_map)}")
+    lines.append(f"- ODS 任务数量: {len(ods_specs)}")
+    lines.append("")
+
+    # ── 按 ODS 表分组 ──
+    # 先建立 ODS 表 → DWD 表列表的反向映射
+    ods_to_dwd: dict[str, list[str]] = {}
+    for dwd_t, ods_t in table_map.items():
+        ods_to_dwd.setdefault(ods_t, []).append(dwd_t)
+
+    # 收集所有涉及的 ODS 表（去重、排序）
+    all_ods = sorted(set(list(ods_to_dwd.keys()) + [s.get("table_name", "") for s in ods_specs]))
+
+    lines.append("## 目录")
+    lines.append("")
+    for i, ods_t in enumerate(all_ods, 1):
+        anchor = ods_t.replace(".", "").replace("_", "-")
+        short = ods_t.split(".")[-1] if "." in ods_t else ods_t
+        lines.append(f"{i}. [{short}](#{anchor})")
+    lines.append("")
+    lines.append("---")
+    lines.append("")
+
+    # ── 逐表详情 ──
+    for ods_t in all_ods:
+        short = ods_t.split(".")[-1] if "." in ods_t else ods_t
+        lines.append(f"## {short}")
+        lines.append("")
+
+        # API 信息
+        api_info = ods_to_api.get(ods_t, {})
+        if api_info:
+            lines.append("### API 端点")
+            lines.append("")
+            lines.append(f"- 任务编码: `{api_info.get('code', 'N/A')}`")
+            lines.append(f"- 端点: `{api_info.get('endpoint', 'N/A')}`")
+            lk = api_info.get("list_key")
+            if lk:
+                lines.append(f"- 数据路径: `data.{lk}`")
+            desc = api_info.get("description", "")
+            if desc:
+                lines.append(f"- 说明: {desc}")
+            lines.append("")
+
+        # ODS 表字段
+        ods_cols = ods_ddl.get(ods_t, [])
+        if ods_cols:
+            lines.append(f"### ODS 表: `{ods_t}` ({len(ods_cols)} 列)")
+            lines.append("")
+            lines.append("| # | 列名 | 类型 |")
+            lines.append("|---|------|------|")
+            for idx, c in enumerate(ods_cols, 1):
+                lines.append(f"| {idx} | `{c['col']}` | {c['type']} |")
+            lines.append("")
+
+        # DWD 表
+        dwd_tables = ods_to_dwd.get(ods_t, [])
+        if dwd_tables:
+            for dwd_t in sorted(dwd_tables):
+                dwd_cols = dwd_ddl.get(dwd_t, [])
+                is_dim = "dim_" in dwd_t
+                is_ex = dwd_t.endswith("_ex")
+                table_type = "维度" if is_dim else "事实"
+                if is_ex:
+                    table_type += "（扩展）"
+
+                mappings = fact_mappings.get(dwd_t, [])
+
+                lines.append(f"### DWD 表: `{dwd_t}` — {table_type} ({len(dwd_cols)} 列)")
+                lines.append("")
+
+                # 字段对比表
+                lines.append("| # | DWD 列名 | DWD 类型 | ODS 来源表达式 | 转换 | 备注 |")
+                lines.append("|---|----------|----------|----------------|------|------|")
+
+                # 建立映射查找
+                mapping_dict = {m[0]: (m[1], m[2]) for m in mappings}
+
+                for idx, c in enumerate(dwd_cols, 1):
+                    col_name = c["col"]
+                    col_type = c["type"]
+
+                    # SCD2 列
+                    scd2_cols = {"scd2_start_time", "scd2_end_time", "scd2_is_current", "scd2_version"}
+                    if col_name.lower().replace("scd2_", "scd2_") in scd2_cols or col_name.lower() in scd2_cols:
+                        lines.append(f"| {idx} | `{col_name}` | {col_type} | — | — | DWD 慢变元数据 |")
+                        continue
+
+                    if col_name in mapping_dict:
+                        ods_expr, cast = mapping_dict[col_name]
+                        cast_str = f"CAST → {cast}" if cast else "直接映射"
+                        # 判断是否为 JSONB 提取
+                        note = ""
+                        if "->>" in ods_expr:
+                            note = "JSONB 提取"
+                        elif "CASE" in ods_expr.upper():
+                            note = "派生计算"
+                        elif ods_expr != col_name:
+                            note = "字段重命名"
+                        lines.append(f"| {idx} | `{col_name}` | {col_type} | `{ods_expr}` | {cast_str} | {note} |")
+                    else:
+                        # 同名直传
+                        ods_col_names = {oc["col"].lower() for oc in ods_cols}
+                        if col_name.lower() in ods_col_names:
+                            lines.append(f"| {idx} | `{col_name}` | {col_type} | `{col_name}` | 直接映射 | 同名直传 |")
+                        else:
+                            lines.append(f"| {idx} | `{col_name}` | {col_type} | — | — | 未在 FACT_MAPPINGS 中显式映射 |")
+
+                lines.append("")
+        else:
+            lines.append(f"*该 ODS 表暂无 DWD 映射（仅用于 DWS 或其他下游）*")
+            lines.append("")
+
+        lines.append("---")
+        lines.append("")
+
+    # ── 附录：ETL 元数据列说明 ──
+    lines.append("## 附录：ETL 元数据列")
+    lines.append("")
+    lines.append("所有 ODS 表均包含以下 ETL 元数据列，不映射到 DWD：")
+    lines.append("")
+    lines.append("| 列名 | 类型 | 说明 |")
+    lines.append("|------|------|------|")
+    lines.append("| `content_hash` | TEXT | 记录内容哈希，用于去重和变更检测 |")
+    lines.append("| `source_file` | TEXT | 原始导出文件名，用于数据追溯 |")
+    lines.append("| `source_endpoint` | TEXT | 采集来源接口/文件路径 |")
+    lines.append("| `fetched_at` | TIMESTAMPTZ | 采集/入库时间戳 |")
+    lines.append("| `payload` | JSONB | 完整原始 JSON 记录快照 |")
+    lines.append("")
+
+    lines.append("## 附录：DWD 维度表 SCD2 列")
+    lines.append("")
+    lines.append("所有 DWD 维度表（`dim_*`）均包含以下 SCD2 慢变维度列：")
+    lines.append("")
+    lines.append("| 列名 | 类型 | 说明 |")
+    lines.append("|------|------|------|")
+    lines.append("| `scd2_start_time` | TIMESTAMPTZ | 版本生效起点 |")
+    lines.append("| `scd2_end_time` | TIMESTAMPTZ | 版本失效时间（9999-12-31 = 当前） |")
+    lines.append("| `scd2_is_current` | INT | 当前版本标记（1=当前，0=历史） |")
+    lines.append("| `scd2_version` | INT | 版本号（自增） |")
+    lines.append("")
+
+    lines.append("## 附录：DWD 事实表增量策略")
+    lines.append("")
+    lines.append("事实表按时间窗口增量写入，优先使用以下业务时间列进行过滤（按优先级排序）：")
+    lines.append("")
+    lines.append("1. `pay_time` — 支付时间")
+    lines.append("2. `create_time` — 创建时间")
+    lines.append("3. `update_time` — 更新时间")
+    lines.append("4. `occur_time` — 发生时间")
+    lines.append("5. `settle_time` — 结算时间")
+    lines.append("6. `start_use_time` — 开始使用时间")
+    lines.append("7. `fetched_at` — 入库时间（兜底）")
+    lines.append("")
+
+    # 写入文件
+    OUT.parent.mkdir(parents=True, exist_ok=True)
+    OUT.write_text("\n".join(lines), encoding="utf-8")
+    print(f"文档已生成: {OUT}")
+    print(f"  ODS 表: {len(ods_ddl)}, DWD 表: {len(dwd_ddl)}")
+    print(f"  TABLE_MAP: {len(table_map)} 条, FACT_MAPPINGS: {len(fact_mappings)} 条")
+    print(f"  ODS 任务: {len(ods_specs)} 个")
+
+
+if __name__ == "__main__":
+    generate_doc()
--- a/tools/reporting/gen_dataflow_report.py
+++ b/tools/reporting/gen_dataflow_report.py
@@ -0,0 +1,956 @@
+"""
+数据流结构分析报告生成器（v3）
+
+读取 analyze_dataflow.py 采集的数据，生成带锚点链接、上下游映射列、
+业务描述、多示例值、字段差异报告的 Markdown 报告。
+
+增强内容（v3）：
+- 总览表增加 API JSON 字段数列
+- 覆盖率表增加业务描述列
+- 逐表详情增加业务描述列（来自 BD_manual 文档）
+- 说明+示例值合并，多示例展示，枚举值解释
+- 总览章节增加 API↔ODS↔DWD 字段对比差异报告
+
+用法:
+    python scripts/ops/gen_dataflow_report.py
+    python scripts/ops/gen_dataflow_report.py --output-dir /path/to/output
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import os
+from datetime import datetime
+from pathlib import Path
+
+from dotenv import load_dotenv  # noqa: F401 — _env_paths 负责加载，此处保留以防其他模块间接引用
+
+# ── 白名单定义 ──────────────────────────────────────────────────────────
+# 白名单字段仍然参与检查和统计，但在报告的 1.1 差异明细表格和 3. 逐表详情表格中
+# 折叠显示（不展开详细行），并注明白名单原因。
+# CHANGE 2026-02-21 | 重构白名单逻辑：统一术语为"白名单"，字段仍正常检查，仅报告展示折叠
+
+# ODS 层 ETL 元数据列（非业务字段，ETL 流程自动生成）
+WHITELIST_ETL_META_COLS = {
+    "source_file", "source_endpoint", "fetched_at", "payload", "content_hash",
+}
+
+# DWD 维表 SCD2 管理列（ETL 框架自动维护，非业务映射）
+# CHANGE 2026-03-26 | 补充 scd2_* 新版列名，与 etl_consistency_check.py 保持一致
+WHITELIST_DWD_SCD2_COLS = {
+    "valid_from", "valid_to", "is_current", "etl_loaded_at", "etl_batch_id",
+    "scd2_start_time", "scd2_end_time", "scd2_is_current", "scd2_version",
+}
+
+# API 嵌套对象前缀（上游 API 的门店信息嵌套结构，已通过 site_id 关联，不逐字段映射）
+WHITELIST_API_NESTED_PREFIXES = ("siteProfile.",)
+
+
+def is_whitelist_etl_meta(col_name: str) -> bool:
+    """判断是否为 ETL 元数据白名单列"""
+    return col_name in WHITELIST_ETL_META_COLS
+
+
+def is_whitelist_scd2(col_name: str) -> bool:
+    """判断是否为 DWD SCD2 管理白名单列"""
+    return col_name in WHITELIST_DWD_SCD2_COLS
+
+
+def is_whitelist_api_nested(json_path: str) -> bool:
+    """判断是否为 API 嵌套对象白名单字段"""
+    return any(json_path.startswith(p) for p in WHITELIST_API_NESTED_PREFIXES)
+
+
+def whitelist_reason(col_name: str, json_path: str = "", layer: str = "") -> str:
+    """返回白名单原因描述，非白名单返回空字符串"""
+    if is_whitelist_etl_meta(col_name):
+        return "ETL 元数据列"
+    if is_whitelist_scd2(col_name):
+        return "SCD2 管理列"
+    if json_path and is_whitelist_api_nested(json_path):
+        return "API 嵌套对象（siteProfile）"
+    return ""
+
+
+def load_json(path: Path) -> dict | list | None:
+    if not path.exists():
+        return None
+    with open(path, "r", encoding="utf-8") as f:
+        return json.load(f)
+
+
+def build_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(description="生成数据流结构分析 Markdown 报告")
+    parser.add_argument("--output-dir", type=str, default=None,
+                        help="输出目录（默认读取 .env 中的 SYSTEM_ANALYZE_ROOT）")
+    return parser
+
+
+def resolve_data_dir(override: str | None = None) -> Path:
+    if override:
+        return Path(override)
+    from _env_paths import get_output_path
+    return get_output_path("SYSTEM_ANALYZE_ROOT")
+
+
+def _esc(s: str) -> str:
+    """转义 Markdown 表格中的管道符"""
+    return str(s).replace("|", "\\|").replace("\n", " ") if s else ""
+
+
+
+
+def _format_samples(samples: list[str], max_show: int = 5) -> str:
+    """格式化多示例值，截断过长的值"""
+    if not samples:
+        return ""
+    shown = []
+    for s in samples[:max_show]:
+        s = _esc(s)
+        if len(s) > 30:
+            s = s[:27] + "..."
+        shown.append(f"`{s}`")
+    result = ", ".join(shown)
+    if len(samples) > max_show:
+        result += f" …共{len(samples)}种"
+    return result
+
+
+def _is_enum_like(samples: list[str], total_records: int) -> bool:
+    """判断字段是否像枚举（不同值少且记录数足够多）"""
+    if total_records < 5:
+        return False
+    return 1 < len(samples) <= 8
+
+
+def _write_source_file_manifest(w, data_dir: Path, tables: list[dict], fm_dir: Path | None = None):
+    """在报告开头输出本次分析用到的所有 JSON 数据源文件清单"""
+    if fm_dir is None:
+        fm_dir = data_dir / "field_mappings"
+    w("## 数据源文件清单")
+    w()
+    w("本报告基于以下 JSON 数据文件生成：")
+    w()
+
+    categories = [
+        ("collection_manifest.json", "采集元数据（表清单、日期范围、记录数）"),
+        ("json_trees/", "API JSON 字段结构（递归展开后的字段路径、类型、示例值）"),
+        ("field_mappings/", "三层字段映射（API→ODS→DWD 映射关系）"),
+        ("db_schemas/", "数据库表结构（ODS/DWD 列定义，来自 PostgreSQL）"),
+        ("bd_descriptions/", "业务描述（来自 BD_manual 文档）"),
+    ]
+
+    for cat_path, cat_desc in categories:
+        if cat_path.endswith("/"):
+            # 子目录：列出实际存在的文件
+            # CHANGE 2026-02-21 | field_mappings 使用传入的 fm_dir（可能是 field_mappings_new）
+            if cat_path.rstrip("/") == "field_mappings":
+                sub_dir = fm_dir
+            else:
+                sub_dir = data_dir / cat_path.rstrip("/")
+            if sub_dir.is_dir():
+                try:
+                    files = sorted(f.name for f in sub_dir.iterdir() if f.suffix == ".json")
+                except PermissionError:
+                    w(f"**{cat_path}** — {cat_desc}（目录权限拒绝）")
+                    w()
+                    continue
+            if sub_dir.is_dir():
+                files = sorted(f.name for f in sub_dir.iterdir() if f.suffix == ".json")
+                w(f"**{cat_path}** — {cat_desc}（{len(files)} 个文件）")
+                w()
+                for fn in files:
+                    w(f"- `{cat_path}{fn}`")
+                w()
+            else:
+                w(f"**{cat_path}** — {cat_desc}（目录不存在）")
+                w()
+        else:
+            # 单文件
+            fp = data_dir / cat_path
+            status = "✓" if fp.exists() else "✗ 缺失"
+            w(f"- `{cat_path}` — {cat_desc}（{status}）")
+            w()
+
+    w("---")
+    w()
+
+
+def generate_report(data_dir: Path) -> str:
+    """生成完整的 Markdown 报告"""
+    manifest = load_json(data_dir / "collection_manifest.json")
+    if not manifest:
+        raise FileNotFoundError(f"找不到 collection_manifest.json: {data_dir}")
+
+    # CHANGE 2026-02-21 | Windows 文件锁 fallback：field_mappings_new 优先于被锁的 field_mappings
+    _fm_dir = data_dir / "field_mappings"
+    _fm_new = data_dir / "field_mappings_new"
+    if _fm_new.exists() and any(_fm_new.iterdir()):
+        _fm_dir = _fm_new
+
+    tables = manifest["tables"]
+    now = datetime.now()
+    lines: list[str] = []
+
+    def w(s: str = ""):
+        lines.append(s)
+
+    # ── 从 manifest 读取 API 请求日期范围 ──
+    api_date_from = manifest.get("date_from")
+    api_date_to = manifest.get("date_to")
+    total_records_all = sum(t.get("record_count", 0) for t in tables)
+
+    # ── 报告头 ──
+    w("# 飞球连接器 — 数据流结构分析报告")
+    w()
+    w(f"> 生成时间：{now.strftime('%Y-%m-%d %H:%M:%S')} CST")
+    w(f"> 分析范围：飞球（feiqiu）连接器，共 {len(tables)} 张 ODS 表")
+    w("> 数据来源：API JSON 采样 + PostgreSQL ODS/DWD 表结构 + 三层字段映射 + BD_manual 业务文档")
+    if api_date_from or api_date_to:
+        w(f"> API 请求日期范围：{api_date_from or '—'} ~ {api_date_to or '—'}")
+    w(f"> JSON 数据总量：{total_records_all} 条记录")
+    w()
+
+    # ── 数据源文件清单 ──
+    _write_source_file_manifest(w, data_dir, tables, fm_dir=_fm_dir)
+
+    # ── 1. 总览表（增加 API JSON 字段数列） ──
+    w("## 1. 总览")
+    w()
+    w("| # | ODS 表名 | 业务描述 | 采样记录数 | API JSON 字段数 | ODS 列数 | DWD 目标表 | DWD 总列数 |")
+    w("|---|---------|---------|-----------|---------------|---------|-----------|-----------|")
+    total_records = 0
+    total_ods_cols = 0
+    total_dwd_cols = 0
+    total_json_fields = 0
+    for i, t in enumerate(tables, 1):
+        dwd_names = ", ".join(t["dwd_tables"]) if t["dwd_tables"] else "—"
+        json_fc = t.get("json_field_count", 0)
+        w(f"| {i} | `{t['table']}` | {t['description']} | {t['record_count']} | {json_fc} | {t['ods_column_count']} | {dwd_names} | {t['dwd_column_count']} |")
+        total_records += t["record_count"]
+        total_ods_cols += t["ods_column_count"]
+        total_dwd_cols += t["dwd_column_count"]
+        total_json_fields += json_fc
+    w(f"| | **合计** | | **{total_records}** | **{total_json_fields}** | **{total_ods_cols}** | | **{total_dwd_cols}** |")
+    w()
+
+    # ── 1.1 字段对比差异报告 ──
+    _write_field_diff_report(w, data_dir, tables, fm_dir=_fm_dir)
+
+    # ── 2. 全局统计 ──
+    w("## 2. 全局统计")
+    w()
+
+    # 2.1 JSON→ODS 映射覆盖
+    total_json = 0
+    total_mapped = 0
+    per_table_stats: list[dict] = []
+    for t in tables:
+        fm = load_json(_fm_dir / f"{t['table']}.json")
+        if not fm or "json_to_ods" not in fm:
+            per_table_stats.append({
+                "table": t["table"], "description": t["description"],
+                "json_count": 0, "mapped": 0, "unmapped": 0, "pct": "—",
+            })
+            continue
+        j2o = fm["json_to_ods"]
+        json_count = len(j2o)
+        mapped = sum(1 for m in j2o if m.get("ods_col") is not None)
+        unmapped = json_count - mapped
+        pct = f"{mapped / json_count * 100:.1f}%" if json_count > 0 else "—"
+        per_table_stats.append({
+            "table": t["table"], "description": t["description"],
+            "json_count": json_count, "mapped": mapped, "unmapped": unmapped, "pct": pct,
+        })
+        total_json += json_count
+        total_mapped += mapped
+
+    total_unmapped = total_json - total_mapped
+    w("### 2.1 JSON→ODS 映射覆盖")
+    w()
+    w(f"- JSON 字段总数：{total_json}")
+    if total_json > 0:
+        w(f"- 已映射到 ODS 列：{total_mapped}（{total_mapped / total_json * 100:.1f}%）")
+        w(f"- 仅存于 payload：{total_unmapped}（{total_unmapped / total_json * 100:.1f}%）")
+    else:
+        w("- 已映射到 ODS 列：0")
+        w("- 仅存于 payload：0")
+    w()
+
+    # 2.2 ODS→DWD 映射覆盖
+    w("### 2.2 ODS→DWD 映射覆盖")
+    w()
+    w(f"- DWD 列总数：{total_dwd_cols}")
+    w()
+
+    # 2.3 各表覆盖率（增加业务描述列）
+    w("### 2.3 各表 JSON→ODS 映射覆盖率")
+    w()
+    w("| ODS 表名 | 业务描述 | JSON 字段数 | 已映射 | 仅 payload | 覆盖率 |")
+    w("|---------|---------|-----------|-------|-----------|-------|")
+    sorted_stats = sorted(per_table_stats, key=lambda x: (0 if x["pct"] == "—" else -float(x["pct"].rstrip("%"))))
+    for s in sorted_stats:
+        w(f"| `{s['table']}` | {s['description']} | {s['json_count']} | {s['mapped']} | {s['unmapped']} | {s['pct']} |")
+    w()
+
+    # ── 3. 逐表详情 ──
+    w("## 3. 逐表详情")
+    w()
+
+    for idx, t in enumerate(tables, 1):
+        table_name = t["table"]
+        fm = load_json(_fm_dir / f"{table_name}.json")
+        jt = load_json(data_dir / "json_trees" / f"{table_name}.json")
+        ods_schema = load_json(data_dir / "db_schemas" / f"ods_{table_name}.json")
+        bd = load_json(data_dir / "bd_descriptions" / f"{table_name}.json")
+
+        # 锚点 ID
+        anchors = fm.get("anchors", {}) if fm else {}
+        api_anchor = anchors.get("api", f"api-{table_name}")
+        ods_anchor = anchors.get("ods", f"ods-{table_name}")
+        dwd_anchors = anchors.get("dwd", {})
+
+        dwd_tables_list = t.get("dwd_tables", [])
+        json_fc = t.get("json_field_count", 0)
+
+        w(f"### 3.{idx} {table_name}（{t['description']}）")
+        w()
+        w(f"- 任务代码：`{t['task_code']}`")
+        w(f"- 采样记录数：{t['record_count']}")
+        w(f"- API JSON 字段数：{json_fc}")
+        w(f"- ODS 列数：{t['ods_column_count']}")
+        if dwd_tables_list:
+            w(f"- DWD 目标表：{', '.join(dwd_tables_list)}")
+        else:
+            w("- DWD 目标表：—（仅 ODS 落地）")
+        w()
+
+        # ── API 源字段区块 ──
+        _write_api_section(w, fm, jt, bd, table_name, api_anchor, ods_anchor)
+
+        # ── ODS 表结构区块 ──
+        _write_ods_section(w, fm, ods_schema, bd, table_name, ods_anchor, api_anchor, dwd_anchors)
+
+        # ── DWD 表结构区块 ──
+        for dwd_name in dwd_tables_list:
+            dwd_anchor = dwd_anchors.get(dwd_name, f"dwd-{dwd_name}")
+            dwd_schema = load_json(data_dir / "db_schemas" / f"dwd_{dwd_name}.json")
+            _write_dwd_section(w, fm, dwd_schema, bd, dwd_name, dwd_anchor, ods_anchor, table_name)
+
+    return "\n".join(lines)
+
+
+
+def _write_field_diff_report(w, data_dir: Path, tables: list[dict], fm_dir: Path | None = None):
+    """生成 API↔ODS↔DWD 字段对比差异报告（汇总表 + 逐表分表）"""
+    if fm_dir is None:
+        fm_dir = data_dir / "field_mappings"
+    w("### 1.1 API↔ODS↔DWD 字段对比差异")
+    w()
+    w("以下汇总各表在三层之间的字段差异（点击数字跳转至分表详情）：")
+    w()
+    w("| ODS 表名 | API→ODS 未映射 | ODS 无 JSON 源 | ODS→DWD 未映射 | DWD 无 ODS 源 | 主要差异原因 |")
+    w("|---------|--------------|--------------|--------------|-------------|------------|")
+
+    # CHANGE 2026-02-21 | 重构白名单逻辑：字段仍正常检查计数，白名单字段在分表详情中折叠
+    # 收集每表差异数据，用于汇总表和分表
+    diff_rows: list[dict] = []
+
+    for t in tables:
+        table_name = t["table"]
+        fm = load_json(fm_dir / f"{table_name}.json")
+        if not fm:
+            w(f"| `{table_name}` | — | — | — | — | 无映射数据 |")
+            diff_rows.append(None)
+            continue
+
+        anchors = fm.get("anchors", {})
+        api_anchor = anchors.get("api", f"api-{table_name.replace('_', '-')}")
+        ods_anchor = anchors.get("ods", f"ods-{table_name.replace('_', '-')}")
+        dwd_anchors = anchors.get("dwd", {})
+        diff_anchor = f"diff-{table_name.replace('_', '-')}"
+
+        j2o = fm.get("json_to_ods", [])
+        o2d = fm.get("ods_to_dwd", {})
+        d2o = fm.get("dwd_to_ods", {})
+
+        # ── API→ODS 未映射字段（全部检查，含白名单） ──
+        api_unmapped_flat: list[str] = []
+        api_unmapped_nested: list[str] = []
+        api_unmapped_whitelist: list[tuple[str, str]] = []  # (json_path, reason)
+        for m in j2o:
+            if m.get("ods_col") is None:
+                jp = m.get("json_path", "")
+                wl_reason = whitelist_reason("", json_path=jp)
+                if wl_reason:
+                    api_unmapped_whitelist.append((jp, wl_reason))
+                elif "." in jp:
+                    api_unmapped_nested.append(jp)
+                else:
+                    api_unmapped_flat.append(jp)
+        api_unmapped_total = len(api_unmapped_flat) + len(api_unmapped_nested) + len(api_unmapped_whitelist)
+
+        # ── ODS 无 JSON 源（全部检查，含白名单） ──
+        ods_schema = load_json(data_dir / "db_schemas" / f"ods_{table_name}.json")
+        ods_mapped_cols = {m["ods_col"] for m in j2o if m.get("ods_col")}
+        ods_no_json_fields: list[str] = []
+        ods_no_json_whitelist: list[tuple[str, str]] = []  # (col_name, reason)
+        if ods_schema and "columns" in ods_schema:
+            for col in ods_schema["columns"]:
+                if col["name"] not in ods_mapped_cols:
+                    wl_reason = whitelist_reason(col["name"])
+                    if wl_reason:
+                        ods_no_json_whitelist.append((col["name"], wl_reason))
+                    else:
+                        ods_no_json_fields.append(col["name"])
+
+        # ── ODS→DWD 未映射（全部检查，含白名单） ──
+        ods_cols_with_dwd = set(o2d.keys())
+        ods_no_dwd_fields: list[str] = []
+        ods_no_dwd_whitelist: list[tuple[str, str]] = []
+        if ods_schema and "columns" in ods_schema:
+            for col in ods_schema["columns"]:
+                if col["name"] not in ods_cols_with_dwd:
+                    wl_reason = whitelist_reason(col["name"])
+                    if wl_reason:
+                        ods_no_dwd_whitelist.append((col["name"], wl_reason))
+                    else:
+                        ods_no_dwd_fields.append(col["name"])
+
+        # ── DWD 无 ODS 源（全部检查，含白名单） ──
+        dwd_no_ods_fields: list[tuple[str, str]] = []  # (dwd_table, dwd_col)
+        dwd_no_ods_whitelist: list[tuple[str, str, str]] = []  # (dwd_table, dwd_col, reason)
+        for dwd_name, entries in d2o.items():
+            for entry in entries:
+                if entry.get("ods_source") == "—":
+                    wl_reason = whitelist_reason(entry["dwd_col"])
+                    if wl_reason:
+                        dwd_no_ods_whitelist.append((dwd_name, entry["dwd_col"], wl_reason))
+                    else:
+                        dwd_no_ods_fields.append((dwd_name, entry["dwd_col"]))
+
+        # 差异原因（含白名单统计）
+        reasons: list[str] = []
+        if api_unmapped_nested:
+            reasons.append(f"嵌套对象 {len(api_unmapped_nested)} 个")
+        if api_unmapped_flat:
+            reasons.append(f"平层未映射 {len(api_unmapped_flat)} 个")
+        if dwd_no_ods_fields:
+            reasons.append(f"SCD2/派生列 {len(dwd_no_ods_fields)} 个")
+        wl_total = len(api_unmapped_whitelist) + len(ods_no_json_whitelist) + len(ods_no_dwd_whitelist) + len(dwd_no_ods_whitelist)
+        if wl_total:
+            reasons.append(f"白名单 {wl_total} 个")
+        reason_str = "；".join(reasons) if reasons else "—"
+
+        # 汇总表单元格：数量 + 跳转链接（白名单字段也计入总数）
+        def _cell(count: int) -> str:
+            if count == 0:
+                return "0"
+            return f"[{count}](#{diff_anchor})"
+
+        w(f"| `{table_name}` | {_cell(api_unmapped_total)} | {_cell(len(ods_no_json_fields) + len(ods_no_json_whitelist))} | {_cell(len(ods_no_dwd_fields) + len(ods_no_dwd_whitelist))} | {_cell(len(dwd_no_ods_fields) + len(dwd_no_ods_whitelist))} | {reason_str} |")
+
+        diff_rows.append({
+            "table_name": table_name,
+            "diff_anchor": diff_anchor,
+            "api_anchor": api_anchor,
+            "ods_anchor": ods_anchor,
+            "dwd_anchors": dwd_anchors,
+            "api_unmapped_flat": api_unmapped_flat,
+            "api_unmapped_nested": api_unmapped_nested,
+            "api_unmapped_whitelist": api_unmapped_whitelist,
+            "ods_no_json_fields": ods_no_json_fields,
+            "ods_no_json_whitelist": ods_no_json_whitelist,
+            "ods_no_dwd_fields": ods_no_dwd_fields,
+            "ods_no_dwd_whitelist": ods_no_dwd_whitelist,
+            "dwd_no_ods_fields": dwd_no_ods_fields,
+            "dwd_no_ods_whitelist": dwd_no_ods_whitelist,
+        })
+
+    w()
+
+    # ── 逐表差异分表 ──
+    # CHANGE 2026-02-21 | 白名单字段折叠显示，不展开详细表格行，注明白名单原因
+    sub_idx = 0
+    for row in diff_rows:
+        if row is None:
+            continue
+        has_any = (row["api_unmapped_flat"] or row["api_unmapped_nested"]
+                   or row["api_unmapped_whitelist"]
+                   or row["ods_no_json_fields"] or row["ods_no_json_whitelist"]
+                   or row["ods_no_dwd_fields"] or row["ods_no_dwd_whitelist"]
+                   or row["dwd_no_ods_fields"] or row["dwd_no_ods_whitelist"])
+        if not has_any:
+            continue
+
+        sub_idx += 1
+        table_name = row["table_name"]
+        w(f'<a id="{row["diff_anchor"]}"></a>')
+        w()
+        w(f"#### 1.1.{sub_idx} {table_name} 字段差异明细")
+        w()
+
+        api_anchor = row["api_anchor"]
+        ods_anchor = row["ods_anchor"]
+        dwd_anchors = row["dwd_anchors"]
+
+        # 加载辅助数据：json_trees（示例值）、bd_descriptions（业务说明）
+        jt = load_json(data_dir / "json_trees" / f"{table_name}.json")
+        bd = load_json(data_dir / "bd_descriptions" / f"{table_name}.json")
+        jt_lookup: dict[str, dict] = {}
+        if jt and "fields" in jt:
+            for fld in jt["fields"]:
+                jt_lookup[fld["path"]] = fld
+        ods_descs = bd.get("ods_fields", {}) if bd else {}
+        dwd_descs_all = bd.get("dwd_fields", {}) if bd else {}
+
+        def _sample_str(field_name: str, layer: str, dwd_tbl: str = "") -> str:
+            """从 json_trees 或 bd_descriptions 获取示例值字符串"""
+            if layer == "API":
+                entry = jt_lookup.get(field_name, {})
+                samples = entry.get("samples", [])
+                total_recs = entry.get("total_records", 0)
+                if not samples:
+                    single = entry.get("sample", "")
+                    if single:
+                        samples = [str(single)]
+                if _is_enum_like(samples, total_recs):
+                    return ", ".join(f"`{_esc(s)}`" for s in samples[:5])
+                if samples:
+                    return _format_samples(samples, max_show=3)
+            return ""
+
+        def _desc_str(field_name: str, layer: str, dwd_tbl: str = "") -> str:
+            """从 bd_descriptions 获取业务说明"""
+            key = field_name.split(".")[-1].replace("[]", "").lower()
+            if layer in ("ODS", "API"):
+                desc = ods_descs.get(key, "")
+            elif layer == "DWD" and dwd_tbl:
+                desc = dwd_descs_all.get(dwd_tbl, {}).get(key, "")
+            else:
+                desc = ""
+            if desc and len(desc) > 40:
+                desc = desc[:37] + "..."
+            return _esc(desc)
+
+        def _write_whitelist_summary(w, items: list, category: str):
+            """白名单字段折叠汇总（不展开详细表格行）"""
+            if not items:
+                return
+            # 按原因分组
+            by_reason: dict[str, list[str]] = {}
+            for item in items:
+                if isinstance(item, tuple) and len(item) == 3:
+                    name, _, reason = item  # (dwd_table, dwd_col, reason)
+                elif isinstance(item, tuple) and len(item) == 2:
+                    name, reason = item
+                else:
+                    name, reason = str(item), "白名单"
+                by_reason.setdefault(reason, []).append(name)
+            parts = []
+            for reason, names in by_reason.items():
+                parts.append(f"{reason}: `{'`, `'.join(names[:5])}`{'...' if len(names) > 5 else ''} ({len(names)} 个)")
+            w(f"> ℹ️ {category}白名单字段（已检查，不展开详情）：{'；'.join(parts)}")
+            w()
+
+        # ── API→ODS 未映射（平层） ──
+        if row["api_unmapped_flat"]:
+            w(f"**API→ODS 未映射（平层）** — {len(row['api_unmapped_flat'])} 个")
+            w()
+            w("| # | JSON 字段 | 示例值 | 说明 | 状态 |")
+            w("|---|----------|-------|------|------|")
+            for i, f in enumerate(row["api_unmapped_flat"], 1):
+                sample = _sample_str(f, "API")
+                desc = _desc_str(f, "API")
+                w(f"| {i} | **[`{_esc(f)}`](#{api_anchor})** | {sample} | {desc} | **⚠️ 未映射** |")
+            w()
+
+        # ── API→ODS 未映射（嵌套对象，非白名单） ──
+        if row["api_unmapped_nested"]:
+            w(f"<details><summary>API→ODS 未映射（嵌套对象）— {len(row['api_unmapped_nested'])} 个</summary>")
+            w()
+            w("| # | JSON 字段 | 示例值 | 说明 | 状态 |")
+            w("|---|----------|-------|------|------|")
+            for i, f in enumerate(row["api_unmapped_nested"], 1):
+                sample = _sample_str(f, "API")
+                desc = _desc_str(f, "API")
+                w(f"| {i} | [`{_esc(f)}`](#{api_anchor}) | {sample} | {desc} | 📦 嵌套 |")
+            w()
+            w("</details>")
+            w()
+
+        # ── API 白名单字段汇总 ──
+        _write_whitelist_summary(w, row["api_unmapped_whitelist"], "API→ODS ")
+
+        # ── ODS 无 JSON 源 ──
+        if row["ods_no_json_fields"]:
+            w(f"**ODS 无 JSON 源** — {len(row['ods_no_json_fields'])} 个")
+            w()
+            w("| # | ODS 列 | 说明 | 状态 |")
+            w("|---|-------|------|------|")
+            for i, f in enumerate(row["ods_no_json_fields"], 1):
+                desc = _desc_str(f, "ODS")
+                w(f"| {i} | **[`{_esc(f)}`](#{ods_anchor})** | {desc} | **⚠️ 无 JSON 源** |")
+            w()
+
+        # ── ODS 无 JSON 源 白名单汇总 ──
+        _write_whitelist_summary(w, row["ods_no_json_whitelist"], "ODS 无 JSON 源 ")
+
+        # ── ODS→DWD 未映射 ──
+        if row["ods_no_dwd_fields"]:
+            w(f"**ODS→DWD 未映射** — {len(row['ods_no_dwd_fields'])} 个")
+            w()
+            w("| # | ODS 列 | 说明 | 状态 |")
+            w("|---|-------|------|------|")
+            for i, f in enumerate(row["ods_no_dwd_fields"], 1):
+                desc = _desc_str(f, "ODS")
+                w(f"| {i} | **[`{_esc(f)}`](#{ods_anchor})** | {desc} | **⚠️ 无 DWD 目标** |")
+            w()
+
+        # ── ODS→DWD 白名单汇总 ──
+        _write_whitelist_summary(w, row["ods_no_dwd_whitelist"], "ODS→DWD ")
+
+        # ── DWD 无 ODS 源 ──
+        if row["dwd_no_ods_fields"]:
+            w(f"**DWD 无 ODS 源** — {len(row['dwd_no_ods_fields'])} 个")
+            w()
+            w("| # | DWD 表 | DWD 列 | 说明 | 状态 |")
+            w("|---|-------|-------|------|------|")
+            for i, (dwd_name, dwd_col) in enumerate(row["dwd_no_ods_fields"], 1):
+                dwd_a = dwd_anchors.get(dwd_name, f"dwd-{dwd_name.replace('_', '-')}")
+                desc = _desc_str(dwd_col, "DWD", dwd_tbl=dwd_name)
+                w(f"| {i} | {dwd_name} | **[`{_esc(dwd_col)}`](#{dwd_a})** | {desc} | **⚠️ 无 ODS 源** |")
+            w()
+
+        # ── DWD 无 ODS 源 白名单汇总 ──
+        _write_whitelist_summary(w, row["dwd_no_ods_whitelist"], "DWD 无 ODS 源 ")
+
+    w()
+
+
+
+
+def _write_api_section(w, fm, jt, bd, table_name, api_anchor, ods_anchor):
+    """生成 API 源字段区块（增加业务描述列，合并说明+示例值，白名单字段折叠）"""
+    w(f'<a id="{api_anchor}"></a>')
+    w()
+    w(f"#### API 源字段 — {table_name} [🔗 ODS](#{ods_anchor})")
+    w()
+
+    if not fm or "json_to_ods" not in fm:
+        w("_无 field_mappings 数据_")
+        w()
+        return
+
+    j2o = fm["json_to_ods"]
+    # 构建 json_tree 查找表（含 samples）
+    jt_lookup: dict[str, dict] = {}
+    if jt and "fields" in jt:
+        for f in jt["fields"]:
+            jt_lookup[f["path"]] = f
+
+    # BD_manual ODS 描述（用于交叉引用 JSON 字段的业务含义）
+    ods_descs = bd.get("ods_fields", {}) if bd else {}
+
+    # CHANGE 2026-02-21 | 白名单字段从表格中排除，折叠汇总
+    normal_items: list[dict] = []
+    whitelist_items: list[tuple[str, str]] = []  # (json_path, reason)
+    for m in j2o:
+        jp = m.get("json_path", "")
+        wl_reason = whitelist_reason("", json_path=jp)
+        if wl_reason:
+            whitelist_items.append((jp, wl_reason))
+        else:
+            normal_items.append(m)
+
+    mapped_count = sum(1 for m in j2o if m.get("ods_col") is not None)
+    total_count = len(j2o)
+    if total_count > 0:
+        w(f"已映射 {mapped_count}/{total_count}，覆盖率 {mapped_count / total_count * 100:.1f}%")
+        if whitelist_items:
+            w(f"（其中 {len(whitelist_items)} 个白名单字段已折叠）")
+    else:
+        w("无字段")
+    w()
+    w("| # | JSON 字段 | 类型 | → ODS 列 | 业务描述 | 示例值与说明 |")
+    w("|---|----------|------|---------|---------|------------|")
+
+    for i, m in enumerate(normal_items, 1):
+        json_path = m["json_path"]
+        json_type = m.get("json_type", "")
+        ods_col = m.get("ods_col")
+        match_type = m.get("match_type", "")
+        occurrence_pct = m.get("occurrence_pct", 0)
+
+        # 从 json_tree 获取示例值（优先用 samples 多示例）
+        jt_entry = jt_lookup.get(json_path, {})
+        samples = jt_entry.get("samples", [])
+        total_recs = jt_entry.get("total_records", 0)
+        if not samples:
+            single = jt_entry.get("sample", "")
+            if single:
+                samples = [str(single)]
+
+        # 构建 ODS 列链接
+        if ods_col:
+            ods_link = f"[`{ods_col}`](#{ods_anchor})"
+        else:
+            ods_link = "⚠️ 未映射"
+
+        # 业务描述（从 BD_manual 查找，用 ODS 列名或 JSON 叶子名）
+        leaf = json_path.split(".")[-1].replace("[]", "").lower()
+        biz_desc = ods_descs.get(leaf, "")
+        if biz_desc and len(biz_desc) > 60:
+            biz_desc = biz_desc[:57] + "..."
+        biz_desc = _esc(biz_desc)
+
+        # 合并说明+示例值
+        notes_parts: list[str] = []
+        if "." in json_path and match_type == "unmapped":
+            notes_parts.append("📦 嵌套对象")
+        if match_type == "case_insensitive":
+            notes_parts.append("大小写匹配")
+        if occurrence_pct < 100:
+            notes_parts.append(f"出现率 {occurrence_pct:.0f}%")
+
+        # 示例值展示
+        if _is_enum_like(samples, total_recs):
+            notes_parts.append(f"枚举值: {', '.join(f'`{_esc(s)}`' for s in samples[:8])}")
+        elif samples:
+            notes_parts.append(f"示例: {_format_samples(samples)}")
+
+        note_str = "；".join(notes_parts) if notes_parts else ""
+
+        w(f"| {i} | `{_esc(json_path)}` | {json_type} | {ods_link} | {biz_desc} | {note_str} |")
+
+    w()
+
+    # 白名单字段折叠汇总
+    if whitelist_items:
+        by_reason: dict[str, list[str]] = {}
+        for jp, reason in whitelist_items:
+            by_reason.setdefault(reason, []).append(jp)
+        parts = []
+        for reason, names in by_reason.items():
+            parts.append(f"{reason}: `{'`, `'.join(names[:5])}`{'...' if len(names) > 5 else ''} ({len(names)} 个)")
+        w(f"> ℹ️ 白名单字段（已检查，不展开详情）：{'；'.join(parts)}")
+        w()
+
+
+def _write_ods_section(w, fm, ods_schema, bd, table_name, ods_anchor, api_anchor, dwd_anchors):
+    """生成 ODS 表结构区块（含上下游双向映射列 + 业务描述，白名单字段折叠）"""
+    w(f'<a id="{ods_anchor}"></a>')
+    w()
+    w(f"#### ODS 表结构 — ods.{table_name} [🔗 API](#{api_anchor})")
+    w()
+
+    if not ods_schema or "columns" not in ods_schema:
+        w("_无 DB schema 数据_")
+        w()
+        return
+
+    # 构建 json_to_ods 反向查找：ods_col → json_path
+    ods_to_json: dict[str, str] = {}
+    if fm and "json_to_ods" in fm:
+        for m in fm["json_to_ods"]:
+            if m.get("ods_col"):
+                ods_to_json.setdefault(m["ods_col"], m["json_path"])
+
+    # 构建 ods_to_dwd 查找
+    ods_to_dwd: dict[str, list[dict]] = {}
+    if fm and "ods_to_dwd" in fm:
+        ods_to_dwd = fm["ods_to_dwd"]
+
+    # BD_manual ODS 描述
+    ods_descs = bd.get("ods_fields", {}) if bd else {}
+
+    cols = ods_schema["columns"]
+
+    # CHANGE 2026-02-21 | 白名单字段从表格中排除，折叠汇总
+    normal_cols: list[dict] = []
+    whitelist_cols: list[tuple[str, str]] = []  # (col_name, reason)
+    for col in cols:
+        wl_reason = whitelist_reason(col["name"])
+        if wl_reason:
+            whitelist_cols.append((col["name"], wl_reason))
+        else:
+            normal_cols.append(col)
+
+    w(f"共 {len(cols)} 列")
+    if whitelist_cols:
+        w(f"（其中 {len(whitelist_cols)} 个白名单列已折叠）")
+    w()
+    w("| # | ODS 列名 | 类型 | ← JSON 源 | → DWD 目标 | 业务描述 |")
+    w("|---|---------|------|----------|-----------|---------|")
+
+    for i, col in enumerate(normal_cols, 1):
+        col_name = col["name"]
+        col_type = col["data_type"]
+
+        # ← JSON 源
+        json_src = ods_to_json.get(col_name)
+        if json_src:
+            json_link = f"[`{_esc(json_src)}`](#{api_anchor})"
+        else:
+            json_link = "—"
+
+        # → DWD 目标
+        dwd_targets = ods_to_dwd.get(col_name, [])
+        if dwd_targets:
+            dwd_links = []
+            for dt in dwd_targets:
+                dwd_tbl = dt["dwd_table"]
+                dwd_col = dt["dwd_col"]
+                dwd_anc = dwd_anchors.get(dwd_tbl, f"dwd-{dwd_tbl}")
+                dwd_links.append(f"[`{dwd_tbl}.{dwd_col}`](#{dwd_anc})")
+            dwd_link = ", ".join(dwd_links)
+        else:
+            dwd_link = "—"
+
+        # 业务描述
+        biz_desc = ods_descs.get(col_name.lower(), "")
+        if biz_desc and len(biz_desc) > 60:
+            biz_desc = biz_desc[:57] + "..."
+        biz_desc = _esc(biz_desc)
+
+        w(f"| {i} | `{col_name}` | {col_type} | {json_link} | {dwd_link} | {biz_desc} |")
+
+    w()
+
+    # 白名单列折叠汇总
+    if whitelist_cols:
+        by_reason: dict[str, list[str]] = {}
+        for cn, reason in whitelist_cols:
+            by_reason.setdefault(reason, []).append(cn)
+        parts = []
+        for reason, names in by_reason.items():
+            parts.append(f"{reason}: `{'`, `'.join(names)}` ({len(names)} 个)")
+        w(f"> ℹ️ 白名单列（已检查，不展开详情）：{'；'.join(parts)}")
+        w()
+
+
+def _write_dwd_section(w, fm, dwd_schema, bd, dwd_name, dwd_anchor, ods_anchor, table_name):
+    """生成 DWD 表结构区块（增加业务描述列，白名单字段折叠）"""
+    w(f'<a id="{dwd_anchor}"></a>')
+    w()
+    w(f"#### DWD 表结构 — dwd.{dwd_name} [🔗 ODS](#{ods_anchor})")
+    w()
+
+    if not dwd_schema or "columns" not in dwd_schema:
+        w("_无 DB schema 数据_")
+        w()
+        return
+
+    # 构建 dwd_to_ods 查找
+    dwd_to_ods_map: dict[str, dict] = {}
+    if fm and "dwd_to_ods" in fm and dwd_name in fm["dwd_to_ods"]:
+        for entry in fm["dwd_to_ods"][dwd_name]:
+            dwd_to_ods_map[entry["dwd_col"]] = entry
+
+    # BD_manual DWD 描述
+    dwd_descs = {}
+    if bd and "dwd_fields" in bd:
+        dwd_descs = bd["dwd_fields"].get(dwd_name, {})
+
+    cols = dwd_schema["columns"]
+
+    # CHANGE 2026-02-21 | 白名单字段从表格中排除，折叠汇总
+    normal_cols: list[dict] = []
+    whitelist_cols: list[tuple[str, str]] = []  # (col_name, reason)
+    for col in cols:
+        wl_reason = whitelist_reason(col["name"])
+        if wl_reason:
+            whitelist_cols.append((col["name"], wl_reason))
+        else:
+            normal_cols.append(col)
+
+    w(f"共 {len(cols)} 列")
+    if whitelist_cols:
+        w(f"（其中 {len(whitelist_cols)} 个白名单列已折叠）")
+    w()
+    w("| # | DWD 列名 | 类型 | ← ODS 来源 | 转换 | 业务描述 |")
+    w("|---|---------|------|----------|------|---------|")
+
+    for i, col in enumerate(normal_cols, 1):
+        col_name = col["name"]
+        col_type = col["data_type"]
+
+        mapping = dwd_to_ods_map.get(col_name)
+        if mapping:
+            ods_src = mapping.get("ods_source", "")
+            ods_link = f"[`{ods_src}`](#{ods_anchor})" if ods_src and ods_src != "—" else "—"
+            transform = mapping.get("mapping_type", "")
+            note = mapping.get("note", "")
+        else:
+            ods_link = "—"
+            transform = ""
+            note = ""
+
+        # 业务描述（优先 BD_manual，其次 mapping note，最后 DB comment）
+        biz_desc = dwd_descs.get(col_name.lower(), "")
+        if not biz_desc and note:
+            biz_desc = note
+        if not biz_desc:
+            db_comment = col.get("comment", "")
+            if db_comment:
+                if "【说明】" in db_comment:
+                    desc_part = db_comment.split("【说明】")[1]
+                    if "【" in desc_part:
+                        desc_part = desc_part.split("【")[0]
+                    biz_desc = desc_part.strip().rstrip("。").strip()
+                else:
+                    biz_desc = db_comment
+        if biz_desc and len(biz_desc) > 60:
+            biz_desc = biz_desc[:57] + "..."
+        biz_desc = _esc(biz_desc)
+
+        w(f"| {i} | `{col_name}` | {col_type} | {ods_link} | {_esc(transform)} | {biz_desc} |")
+
+    w()
+
+    # 白名单列折叠汇总
+    if whitelist_cols:
+        by_reason: dict[str, list[str]] = {}
+        for cn, reason in whitelist_cols:
+            by_reason.setdefault(reason, []).append(cn)
+        parts = []
+        for reason, names in by_reason.items():
+            parts.append(f"{reason}: `{'`, `'.join(names)}` ({len(names)} 个)")
+        w(f"> ℹ️ 白名单列（已检查，不展开详情）：{'；'.join(parts)}")
+        w()
+
+
+def main() -> None:
+    # _env_paths 在 import 时已通过绝对路径加载根 .env，无需相对路径 load_dotenv
+    # CHANGE 2026-02-21 | 移除 load_dotenv(Path(".env"))，避免 cwd 不在项目根时失效
+    from _env_paths import get_output_path  # noqa: F401 — 触发 .env 加载
+
+    parser = build_parser()
+    args = parser.parse_args()
+
+    data_dir = resolve_data_dir(args.output_dir)
+    if not data_dir.exists():
+        print(f"错误：数据目录不存在: {data_dir}")
+        return
+
+    print(f"读取数据目录: {data_dir}")
+    report = generate_report(data_dir)
+
+    now = datetime.now()
+    filename = f"dataflow_{now.strftime('%Y-%m-%d_%H%M%S')}.md"
+    output_path = data_dir / filename
+
+    with open(output_path, "w", encoding="utf-8") as f:
+        f.write(report)
+
+    print(f"\n{'='*60}")
+    print(f"报告生成完成")
+    print(f"{'='*60}")
+    print(f"  输出路径: {output_path}")
+    print(f"  文件大小: {output_path.stat().st_size / 1024:.1f} KB")
+    print(f"{'='*60}")
+
+
+if __name__ == "__main__":
+    main()
--- a/tools/reporting/gen_full_dataflow_doc.py
+++ b/tools/reporting/gen_full_dataflow_doc.py