init: 项目初始提交 - NeoZQYY Monorepo 完整代码

2026-02-15 14:58:14 +08:00
commit ded6dfb9d8
769 changed files with 182616 additions and 0 deletions
--- a/apps/etl/pipelines/feiqiu/scripts/README.md
+++ b/apps/etl/pipelines/feiqiu/scripts/README.md
@@ -0,0 +1,40 @@
+# scripts/ — 运维与工具脚本
+
+## 子目录
+
+| 目录 | 用途 | 典型场景 |
+|------|------|----------|
+| `audit/` | 仓库审计（文件清单、调用流、文档对齐分析） | `python -m scripts.audit.run_audit` |
+| `check/` | 数据检查（ODS 缺口、内容哈希、完整性校验） | `python -m scripts.check.check_data_integrity` |
+| `db_admin/` | 数据库管理（Excel 导入 DWS 支出/回款/提成） | `python scripts/db_admin/import_dws_excel.py --type expense` |
+| `export/` | 数据导出（指数、团购、亲密度、会员明细等） | `python scripts/export/export_index_tables.py` |
+| `rebuild/` | 数据重建（全量 ODS→DWD 重建） | `python scripts/rebuild/rebuild_db_and_run_ods_to_dwd.py` |
+| `repair/` | 数据修复（回填、去重、hash 修复、维度修复） | `python scripts/repair/dedupe_ods_snapshots.py` |
+
+## 根目录脚本
+
+- `run_update.py` — 一键增量更新（ODS → DWD → DWS），适合 cron/计划任务调用
+- `run_ods.bat` — Windows 批处理：ODS 建表 + 灌入示例 JSON
+- `compare_ddl_db.py` — DDL 文件与数据库实际表结构对比（支持 `--all` 对比四个 schema）
+- `validate_bd_manual.py` — BD_Manual 文档体系验证（覆盖率、格式、命名规范）
+
+## 运行方式
+
+所有脚本在项目根目录（`C:\ZQYY\FQ-ETL`）执行：
+
+```bash
+# 审计报告生成
+python -m scripts.audit.run_audit
+
+# 一键增量更新
+python scripts/run_update.py
+
+# 数据完整性检查（需要数据库连接）
+python -m scripts.check.check_data_integrity --window-start "2025-01-01" --window-end "2025-02-01"
+```
+
+## 注意事项
+
+- 所有脚本依赖 `.env` 中的 `PG_DSN` 配置（或环境变量）
+- `rebuild/` 下的脚本会重建 Schema，生产环境慎用
+- `repair/` 下的脚本会修改数据，建议先 `--dry-run`（如支持）
--- a/apps/etl/pipelines/feiqiu/scripts/init.py
+++ b/apps/etl/pipelines/feiqiu/scripts/init.py
@@ -0,0 +1 @@
+# 脚本辅助工具包标记。
--- a/apps/etl/pipelines/feiqiu/scripts/audit/init.py
+++ b/apps/etl/pipelines/feiqiu/scripts/audit/init.py
@@ -0,0 +1,107 @@
+# -*- coding: utf-8 -*-
+"""
+仓库治理只读审计 — 共享数据模型
+
+定义审计脚本各模块共用的 dataclass 和枚举类型。
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from enum import Enum
+
+
+# ---------------------------------------------------------------------------
+# 文件元信息
+# ---------------------------------------------------------------------------
+
+@dataclass
+class FileEntry:
+    """单个文件/目录的元信息。"""
+
+    rel_path: str          # 相对于仓库根目录的路径
+    is_dir: bool           # 是否为目录
+    size_bytes: int        # 文件大小（目录为 0）
+    extension: str         # 文件扩展名（小写，含点号）
+    is_empty_dir: bool     # 是否为空目录
+
+
+# ---------------------------------------------------------------------------
+# 用途分类与处置标签
+# ---------------------------------------------------------------------------
+
+class Category(str, Enum):
+    """文件用途分类。"""
+
+    CORE_CODE = "核心代码"
+    CONFIG = "配置"
+    DATABASE_DEF = "数据库定义"
+    TEST = "测试"
+    DOCS = "文档"
+    SCRIPTS = "脚本工具"
+    GUI = "GUI"
+    BUILD_DEPLOY = "构建与部署"
+    LOG_OUTPUT = "日志与输出"
+    TEMP_DEBUG = "临时与调试"
+    OTHER = "其他"
+
+
+class Disposition(str, Enum):
+    """处置标签。"""
+
+    KEEP = "保留"
+    CANDIDATE_DELETE = "候选删除"
+    CANDIDATE_ARCHIVE = "候选归档"
+    NEEDS_REVIEW = "待确认"
+
+
+# ---------------------------------------------------------------------------
+# 文件清单条目
+# ---------------------------------------------------------------------------
+
+@dataclass
+class InventoryItem:
+    """清单条目：路径 + 分类 + 处置 + 说明。"""
+
+    rel_path: str
+    category: Category
+    disposition: Disposition
+    description: str
+
+
+# ---------------------------------------------------------------------------
+# 流程树节点
+# ---------------------------------------------------------------------------
+
+@dataclass
+class FlowNode:
+    """流程树节点。"""
+
+    name: str                              # 节点名称（模块名/类名/函数名）
+    source_file: str                       # 所在源文件路径
+    node_type: str                         # 类型：entry / module / class / function
+    children: list[FlowNode] = field(default_factory=list)
+
+
+# ---------------------------------------------------------------------------
+# 文档对齐
+# ---------------------------------------------------------------------------
+
+@dataclass
+class DocMapping:
+    """文档与代码的映射关系。"""
+
+    doc_path: str              # 文档文件路径
+    doc_topic: str             # 文档主题
+    related_code: list[str]    # 关联的代码文件/模块
+    status: str                # 状态：aligned / stale / conflict / orphan
+
+
+@dataclass
+class AlignmentIssue:
+    """对齐问题。"""
+
+    doc_path: str              # 文档路径
+    issue_type: str            # stale / conflict / missing
+    description: str           # 问题描述
+    related_code: str          # 关联代码路径
--- a/apps/etl/pipelines/feiqiu/scripts/audit/doc_alignment_analyzer.py
+++ b/apps/etl/pipelines/feiqiu/scripts/audit/doc_alignment_analyzer.py
@@ -0,0 +1,608 @@
+# -*- coding: utf-8 -*-
+"""
+文档对齐分析器 — 检查文档与代码之间的映射关系、过期点、冲突点和缺失点。
+
+文档来源：
+- docs/ 目录（.md, .txt, .csv, .json）
+- 根目录 README.md
+- 各模块内的 README.md
+- .kiro/steering/ 引导文件
+- docs/test-json-doc/ API 响应样本
+"""
+
+from __future__ import annotations
+
+import json
+import re
+from datetime import datetime, timezone
+from pathlib import Path
+
+from scripts.audit import AlignmentIssue, DocMapping
+
+# ---------------------------------------------------------------------------
+# 常量
+# ---------------------------------------------------------------------------
+
+# 文档文件扩展名
+_DOC_EXTENSIONS = {".md", ".txt", ".csv"}
+
+# 核心代码目录——缺少文档时应报告
+_CORE_CODE_DIRS = {
+    "tasks",
+    "loaders",
+    "orchestration",
+    "quality",
+    "models",
+    "utils",
+    "api",
+    "scd",
+    "config",
+    "database",
+}
+
+# ODS 表中的通用元数据列，比对时忽略
+_ODS_META_COLUMNS = {"content_hash", "payload", "created_at", "updated_at", "id"}
+
+# SQL 关键字，解析 DDL 列名时排除
+_SQL_KEYWORDS = {
+    "primary", "key", "not", "null", "default", "unique", "check",
+    "references", "foreign", "constraint", "index", "create", "table",
+    "if", "exists", "serial", "bigserial", "true", "false",
+}
+
+
+# ---------------------------------------------------------------------------
+# 安全读取文件（编码回退）
+# ---------------------------------------------------------------------------
+
+def _safe_read(path: Path) -> str:
+    """尝试以 utf-8 → gbk → latin-1 回退读取文件内容。"""
+    for enc in ("utf-8", "gbk", "latin-1"):
+        try:
+            return path.read_text(encoding=enc)
+        except (UnicodeDecodeError, UnicodeError):
+            continue
+    return ""
+
+
+# ---------------------------------------------------------------------------
+# scan_docs — 扫描所有文档来源
+# ---------------------------------------------------------------------------
+
+def scan_docs(repo_root: Path) -> list[str]:
+    """扫描所有文档文件路径，返回相对路径列表（已排序）。
+
+    文档来源：
+    1. docs/ 目录下的 .md, .txt, .csv, .json 文件
+    2. 根目录 README.md
+    3. 各模块内的 README.md（如 gui/README.md）
+    4. .kiro/steering/ 引导文件
+    """
+    results: list[str] = []
+
+    def _rel(p: Path) -> str:
+        """返回归一化的正斜杠相对路径。"""
+        return str(p.relative_to(repo_root)).replace("\\", "/")
+
+    # 1. docs/ 目录（递归，含 test-json-doc 下的 .json）
+    docs_dir = repo_root / "docs"
+    if docs_dir.is_dir():
+        for p in docs_dir.rglob("*"):
+            if p.is_file():
+                ext = p.suffix.lower()
+                if ext in _DOC_EXTENSIONS or ext == ".json":
+                    results.append(_rel(p))
+
+    # 2. 根目录 README.md
+    root_readme = repo_root / "README.md"
+    if root_readme.is_file():
+        results.append("README.md")
+
+    # 3. 各模块内的 README.md
+    for child in sorted(repo_root.iterdir()):
+        if child.is_dir() and child.name not in ("docs", ".kiro"):
+            readme = child / "README.md"
+            if readme.is_file():
+                results.append(_rel(readme))
+
+    # 4. .kiro/steering/
+    steering_dir = repo_root / ".kiro" / "steering"
+    if steering_dir.is_dir():
+        for p in sorted(steering_dir.iterdir()):
+            if p.is_file():
+                results.append(_rel(p))
+
+    return sorted(set(results))
+
+
+# ---------------------------------------------------------------------------
+# extract_code_references — 从文档提取代码引用
+# ---------------------------------------------------------------------------
+
+def extract_code_references(doc_path: Path) -> list[str]:
+    """从文档中提取代码引用（反引号内的文件路径、类名、函数名等）。
+
+    规则：
+    - 提取反引号内的内容
+    - 跳过单字符引用
+    - 跳过纯数字/版本号
+    - 反斜杠归一化为正斜杠
+    - 去重
+    """
+    if not doc_path.is_file():
+        return []
+
+    text = _safe_read(doc_path)
+    if not text:
+        return []
+
+    # 提取反引号内容
+    backtick_refs = re.findall(r"`([^`]+)`", text)
+
+    seen: set[str] = set()
+    results: list[str] = []
+
+    for raw in backtick_refs:
+        ref = raw.strip()
+        # 归一化反斜杠
+        ref = ref.replace("\\", "/")
+        # 跳过单字符
+        if len(ref) <= 1:
+            continue
+        # 跳过纯数字和版本号
+        if re.fullmatch(r"[\d.]+", ref):
+            continue
+        # 去重
+        if ref in seen:
+            continue
+        seen.add(ref)
+        results.append(ref)
+
+    return results
+
+
+# ---------------------------------------------------------------------------
+# check_reference_validity — 检查引用有效性
+# ---------------------------------------------------------------------------
+
+def check_reference_validity(ref: str, repo_root: Path) -> bool:
+    """检查文档中的代码引用是否仍然有效。
+
+    检查策略：
+    1. 直接作为文件/目录路径检查
+    2. 去掉 FQ-ETL/ 前缀后检查（兼容旧文档引用）
+    3. 将点号路径转为文件路径检查（如 config.settings → config/settings.py）
+    """
+    # 1. 直接路径
+    if (repo_root / ref).exists():
+        return True
+
+    # 2. 去掉旧包名前缀（兼容历史文档）
+    for prefix in ("FQ-ETL/", "etl_billiards/"):
+        if ref.startswith(prefix):
+            stripped = ref[len(prefix):]
+            if (repo_root / stripped).exists():
+                return True
+
+    # 3. 点号模块路径 → 文件路径
+    if "." in ref and "/" not in ref:
+        as_path = ref.replace(".", "/") + ".py"
+        if (repo_root / as_path).exists():
+            return True
+        # 也可能是目录（包）
+        as_dir = ref.replace(".", "/")
+        if (repo_root / as_dir).is_dir():
+            return True
+
+    return False
+
+
+# ---------------------------------------------------------------------------
+# find_undocumented_modules — 找出缺少文档的核心代码模块
+# ---------------------------------------------------------------------------
+
+def find_undocumented_modules(
+    repo_root: Path,
+    documented: set[str],
+) -> list[str]:
+    """找出缺少文档的核心代码模块。
+
+    只检查 _CORE_CODE_DIRS 中的 .py 文件（排除 __init__.py）。
+    返回已排序的相对路径列表。
+    """
+    undocumented: list[str] = []
+
+    for core_dir in sorted(_CORE_CODE_DIRS):
+        dir_path = repo_root / core_dir
+        if not dir_path.is_dir():
+            continue
+        for py_file in dir_path.rglob("*.py"):
+            if py_file.name == "__init__.py":
+                continue
+            rel = str(py_file.relative_to(repo_root))
+            # 归一化路径分隔符
+            rel = rel.replace("\\", "/")
+            if rel not in documented:
+                undocumented.append(rel)
+
+    return sorted(undocumented)
+
+
+# ---------------------------------------------------------------------------
+# DDL / 数据字典解析辅助函数
+# ---------------------------------------------------------------------------
+
+def _parse_ddl_tables(sql: str) -> dict[str, set[str]]:
+    """从 DDL SQL 中提取表名和列名。
+
+    返回 {表名: {列名集合}} 字典。
+    支持带 schema 前缀的表名（如 billiards_dwd.dim_member → dim_member）。
+    """
+    tables: dict[str, set[str]] = {}
+
+    # 匹配 CREATE TABLE [IF NOT EXISTS] [schema.]table_name (
+    create_re = re.compile(
+        r"CREATE\s+TABLE\s+(?:IF\s+NOT\s+EXISTS\s+)?"
+        r"(?:\w+\.)?(\w+)\s*\(",
+        re.IGNORECASE,
+    )
+
+    for match in create_re.finditer(sql):
+        table_name = match.group(1)
+        # 找到对应的括号内容
+        start = match.end()
+        depth = 1
+        pos = start
+        while pos < len(sql) and depth > 0:
+            if sql[pos] == "(":
+                depth += 1
+            elif sql[pos] == ")":
+                depth -= 1
+            pos += 1
+        body = sql[start:pos - 1]
+
+        columns: set[str] = set()
+        # 逐行提取列名——取每行第一个标识符
+        for line in body.split("\n"):
+            line = line.strip().rstrip(",")
+            if not line:
+                continue
+            # 提取第一个单词
+            col_match = re.match(r"(\w+)", line)
+            if col_match:
+                col_name = col_match.group(1).lower()
+                # 排除 SQL 关键字
+                if col_name not in _SQL_KEYWORDS:
+                    columns.add(col_name)
+
+        tables[table_name] = columns
+
+    return tables
+
+
+def _parse_dictionary_tables(md: str) -> dict[str, set[str]]:
+    """从数据字典 Markdown 中提取表名和字段名。
+
+    约定：
+    - 表名出现在 ## 标题中（可能带反引号）
+    - 字段名出现在 Markdown 表格的第一列
+    - 跳过表头行（含"字段"字样）和分隔行（含 ---）
+    """
+    tables: dict[str, set[str]] = {}
+    current_table: str | None = None
+
+    for line in md.split("\n"):
+        # 匹配 ## 标题中的表名
+        heading = re.match(r"^##\s+`?(\w+)`?", line)
+        if heading:
+            current_table = heading.group(1)
+            tables[current_table] = set()
+            continue
+
+        if current_table is None:
+            continue
+
+        # 跳过分隔行
+        if re.match(r"^\s*\|[-\s|]+\|\s*$", line):
+            continue
+
+        # 解析表格行
+        row_match = re.match(r"^\s*\|\s*(\S+)", line)
+        if row_match:
+            field = row_match.group(1)
+            # 跳过表头（含"字段"字样）
+            if field in ("字段",):
+                continue
+            tables[current_table].add(field)
+
+    return tables
+
+
+# ---------------------------------------------------------------------------
+# check_ddl_vs_dictionary — DDL 与数据字典比对
+# ---------------------------------------------------------------------------
+
+def check_ddl_vs_dictionary(repo_root: Path) -> list[AlignmentIssue]:
+    """比对 DDL 文件与数据字典文档的覆盖度。
+
+    检查：
+    1. DDL 中有但字典中没有的表 → missing
+    2. 同名表中 DDL 有但字典没有的列 → conflict
+    """
+    issues: list[AlignmentIssue] = []
+
+    # 收集所有 DDL 表定义
+    ddl_tables: dict[str, set[str]] = {}
+    db_dir = repo_root / "database"
+    if db_dir.is_dir():
+        for sql_file in sorted(db_dir.glob("schema_*.sql")):
+            content = _safe_read(sql_file)
+            for tbl, cols in _parse_ddl_tables(content).items():
+                if tbl in ddl_tables:
+                    ddl_tables[tbl] |= cols
+                else:
+                    ddl_tables[tbl] = set(cols)
+
+    # 收集所有数据字典表定义
+    dict_tables: dict[str, set[str]] = {}
+    docs_dir = repo_root / "docs"
+    if docs_dir.is_dir():
+        for dict_file in sorted(docs_dir.glob("*dictionary*.md")):
+            content = _safe_read(dict_file)
+            for tbl, fields in _parse_dictionary_tables(content).items():
+                if tbl in dict_tables:
+                    dict_tables[tbl] |= fields
+                else:
+                    dict_tables[tbl] = set(fields)
+
+    # 比对
+    for tbl, ddl_cols in sorted(ddl_tables.items()):
+        if tbl not in dict_tables:
+            issues.append(AlignmentIssue(
+                doc_path="docs/*dictionary*.md",
+                issue_type="missing",
+                description=f"DDL 定义了表 `{tbl}`，但数据字典中未收录",
+                related_code=f"database/schema_*.sql ({tbl})",
+            ))
+        else:
+            # 检查列差异
+            dict_cols = dict_tables[tbl]
+            missing_cols = ddl_cols - dict_cols
+            for col in sorted(missing_cols):
+                issues.append(AlignmentIssue(
+                    doc_path="docs/*dictionary*.md",
+                    issue_type="conflict",
+                    description=f"表 `{tbl}` 的列 `{col}` 在 DDL 中存在但数据字典中缺失",
+                    related_code=f"database/schema_*.sql ({tbl}.{col})",
+                ))
+
+    return issues
+
+
+# ---------------------------------------------------------------------------
+# check_api_samples_vs_parsers — API 样本与解析器比对
+# ---------------------------------------------------------------------------
+
+def check_api_samples_vs_parsers(repo_root: Path) -> list[AlignmentIssue]:
+    """比对 API 响应样本与 ODS 表结构的一致性。
+
+    策略：
+    1. 扫描 docs/test-json-doc/ 下的 .json 文件
+    2. 提取 JSON 中的顶层字段名
+    3. 从 ODS DDL 中查找同名表
+    4. 比对字段差异（忽略 ODS 元数据列）
+    """
+    issues: list[AlignmentIssue] = []
+
+    sample_dir = repo_root / "docs" / "test-json-doc"
+    if not sample_dir.is_dir():
+        return issues
+
+    # 收集 ODS 表定义（保留全部列，比对时忽略元数据列）
+    ods_tables: dict[str, set[str]] = {}
+    db_dir = repo_root / "database"
+    if db_dir.is_dir():
+        for sql_file in sorted(db_dir.glob("schema_*ODS*.sql")):
+            content = _safe_read(sql_file)
+            for tbl, cols in _parse_ddl_tables(content).items():
+                ods_tables[tbl] = cols
+
+    # 逐个样本文件比对
+    for json_file in sorted(sample_dir.glob("*.json")):
+        entity_name = json_file.stem  # 文件名（不含扩展名）作为实体名
+
+        # 解析 JSON 样本
+        try:
+            content = _safe_read(json_file)
+            data = json.loads(content)
+        except (json.JSONDecodeError, ValueError):
+            continue
+
+        # 提取顶层字段名
+        sample_fields: set[str] = set()
+        if isinstance(data, list) and data:
+            # 数组格式——取第一个元素的键
+            first = data[0]
+            if isinstance(first, dict):
+                sample_fields = set(first.keys())
+        elif isinstance(data, dict):
+            sample_fields = set(data.keys())
+
+        if not sample_fields:
+            continue
+
+        # 查找匹配的 ODS 表
+        matched_table: str | None = None
+        matched_cols: set[str] = set()
+        for tbl, cols in ods_tables.items():
+            # 表名包含实体名（如 test_entity 匹配 billiards_ods.test_entity）
+            tbl_lower = tbl.lower()
+            entity_lower = entity_name.lower()
+            if entity_lower in tbl_lower or tbl_lower == entity_lower:
+                matched_table = tbl
+                matched_cols = cols
+                break
+
+        if matched_table is None:
+            continue
+
+        # 比对：样本中有但 ODS 表中没有的字段
+        extra_fields = sample_fields - matched_cols
+        for field in sorted(extra_fields):
+            issues.append(AlignmentIssue(
+                doc_path=f"docs/test-json-doc/{json_file.name}",
+                issue_type="conflict",
+                description=(
+                    f"API 样本字段 `{field}` 在 ODS 表 `{matched_table}` 中未定义"
+                ),
+                related_code=f"database/schema_*ODS*.sql ({matched_table})",
+            ))
+
+    return issues
+
+
+# ---------------------------------------------------------------------------
+# build_mappings — 构建文档与代码的映射关系
+# ---------------------------------------------------------------------------
+
+def build_mappings(
+    doc_paths: list[str],
+    repo_root: Path,
+) -> list[DocMapping]:
+    """为每份文档建立与代码模块的映射关系。"""
+    mappings: list[DocMapping] = []
+
+    for doc_rel in doc_paths:
+        doc_path = repo_root / doc_rel
+        refs = extract_code_references(doc_path)
+
+        # 确定关联代码和状态
+        valid_refs: list[str] = []
+        has_stale = False
+        for ref in refs:
+            if check_reference_validity(ref, repo_root):
+                valid_refs.append(ref)
+            else:
+                has_stale = True
+
+        # 推断文档主题（取文件名或第一行标题）
+        topic = _infer_topic(doc_path, doc_rel)
+
+        if not refs:
+            status = "orphan"
+        elif has_stale:
+            status = "stale"
+        else:
+            status = "aligned"
+
+        mappings.append(DocMapping(
+            doc_path=doc_rel,
+            doc_topic=topic,
+            related_code=valid_refs,
+            status=status,
+        ))
+
+    return mappings
+
+
+def _infer_topic(doc_path: Path, doc_rel: str) -> str:
+    """从文档推断主题——优先取 Markdown 一级标题，否则用文件名。"""
+    if doc_path.is_file() and doc_path.suffix.lower() in (".md", ".txt"):
+        try:
+            text = _safe_read(doc_path)
+            for line in text.split("\n"):
+                line = line.strip()
+                if line.startswith("# "):
+                    return line[2:].strip()
+        except Exception:
+            pass
+    return doc_rel
+
+
+# ---------------------------------------------------------------------------
+# render_alignment_report — 生成 Markdown 格式的文档对齐报告
+# ---------------------------------------------------------------------------
+
+def render_alignment_report(
+    mappings: list[DocMapping],
+    issues: list[AlignmentIssue],
+    repo_root: str,
+) -> str:
+    """生成 Markdown 格式的文档对齐报告。
+
+    分区：映射关系表、过期点列表、冲突点列表、缺失点列表、统计摘要。
+    """
+    lines: list[str] = []
+
+    # --- 头部 ---
+    now = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
+    lines.append("# 文档对齐报告")
+    lines.append("")
+    lines.append(f"- 生成时间：{now}")
+    lines.append(f"- 仓库路径：`{repo_root}`")
+    lines.append("")
+
+    # --- 映射关系 ---
+    lines.append("## 映射关系")
+    lines.append("")
+    if mappings:
+        lines.append("| 文档路径 | 主题 | 关联代码 | 状态 |")
+        lines.append("|---|---|---|---|")
+        for m in mappings:
+            code_str = ", ".join(f"`{c}`" for c in m.related_code) if m.related_code else "—"
+            lines.append(f"| `{m.doc_path}` | {m.doc_topic} | {code_str} | {m.status} |")
+    else:
+        lines.append("未发现文档映射关系。")
+    lines.append("")
+
+    # --- 按 issue_type 分组 ---
+    stale = [i for i in issues if i.issue_type == "stale"]
+    conflict = [i for i in issues if i.issue_type == "conflict"]
+    missing = [i for i in issues if i.issue_type == "missing"]
+
+    # --- 过期点 ---
+    lines.append("## 过期点")
+    lines.append("")
+    if stale:
+        lines.append("| 文档路径 | 描述 | 关联代码 |")
+        lines.append("|---|---|---|")
+        for i in stale:
+            lines.append(f"| `{i.doc_path}` | {i.description} | `{i.related_code}` |")
+    else:
+        lines.append("未发现过期点。")
+    lines.append("")
+
+    # --- 冲突点 ---
+    lines.append("## 冲突点")
+    lines.append("")
+    if conflict:
+        lines.append("| 文档路径 | 描述 | 关联代码 |")
+        lines.append("|---|---|---|")
+        for i in conflict:
+            lines.append(f"| `{i.doc_path}` | {i.description} | `{i.related_code}` |")
+    else:
+        lines.append("未发现冲突点。")
+    lines.append("")
+
+    # --- 缺失点 ---
+    lines.append("## 缺失点")
+    lines.append("")
+    if missing:
+        lines.append("| 文档路径 | 描述 | 关联代码 |")
+        lines.append("|---|---|---|")
+        for i in missing:
+            lines.append(f"| `{i.doc_path}` | {i.description} | `{i.related_code}` |")
+    else:
+        lines.append("未发现缺失点。")
+    lines.append("")
+
+    # --- 统计摘要 ---
+    lines.append("## 统计摘要")
+    lines.append("")
+    lines.append(f"- 文档总数：{len(mappings)}")
+    lines.append(f"- 过期点数量：{len(stale)}")
+    lines.append(f"- 冲突点数量：{len(conflict)}")
+    lines.append(f"- 缺失点数量：{len(missing)}")
+    lines.append("")
+
+    return "\n".join(lines)
--- a/apps/etl/pipelines/feiqiu/scripts/audit/flow_analyzer.py
+++ b/apps/etl/pipelines/feiqiu/scripts/audit/flow_analyzer.py
@@ -0,0 +1,618 @@
+# -*- coding: utf-8 -*-
+"""
+流程树分析器 — 通过静态分析 Python 源码的 import 语句和类继承关系，
+构建从入口到末端模块的调用树。
+
+仅执行只读操作：读取并解析 Python 源文件，不修改任何文件。
+"""
+
+from __future__ import annotations
+
+import ast
+import logging
+import re
+import sys
+from datetime import datetime, timezone
+from pathlib import Path
+
+from scripts.audit import FileEntry, FlowNode
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# 项目内部包名列表（顶层目录中属于项目代码的包）
+# ---------------------------------------------------------------------------
+
+_PROJECT_PACKAGES: set[str] = {
+    "cli", "config", "api", "database", "tasks", "loaders",
+    "scd", "orchestration", "quality", "models", "utils",
+    "gui", "scripts",
+}
+
+# ---------------------------------------------------------------------------
+# 已知的第三方包和标准库顶层模块（用于排除非项目导入）
+# ---------------------------------------------------------------------------
+
+_KNOWN_THIRD_PARTY: set[str] = {
+    "psycopg2", "requests", "dateutil", "python_dateutil",
+    "dotenv", "openpyxl", "PySide6", "flask", "pyinstaller",
+    "PyInstaller", "hypothesis", "pytest", "_pytest", "py",
+    "pluggy", "pkg_resources", "setuptools", "pip", "wheel",
+    "tzdata", "six", "certifi", "urllib3", "charset_normalizer",
+    "idna", "shiboken6",
+}
+
+
+def _is_project_module(module_name: str) -> bool:
+    """判断模块名是否属于项目内部模块。"""
+    top = module_name.split(".")[0]
+    if top in _PROJECT_PACKAGES:
+        return True
+    return False
+
+
+def _is_stdlib_or_third_party(module_name: str) -> bool:
+    """判断模块名是否属于标准库或已知第三方包。"""
+    top = module_name.split(".")[0]
+    if top in _KNOWN_THIRD_PARTY:
+        return True
+    # 检查标准库
+    if top in sys.stdlib_module_names:
+        return True
+    return False
+
+
+# ---------------------------------------------------------------------------
+# 文件读取（多编码回退）
+# ---------------------------------------------------------------------------
+
+def _read_source(filepath: Path) -> str | None:
+    """读取 Python 源文件内容，尝试 utf-8 → gbk → latin-1 回退。
+
+    返回文件内容字符串，读取失败时返回 None。
+    """
+    for encoding in ("utf-8", "gbk", "latin-1"):
+        try:
+            return filepath.read_text(encoding=encoding)
+        except (UnicodeDecodeError, UnicodeError):
+            continue
+        except (OSError, PermissionError) as exc:
+            logger.warning("无法读取文件 %s: %s", filepath, exc)
+            return None
+    logger.warning("无法以任何编码读取文件 %s", filepath)
+    return None
+
+
+# ---------------------------------------------------------------------------
+# 路径 ↔ 模块名转换
+# ---------------------------------------------------------------------------
+
+def _path_to_module_name(rel_path: str) -> str:
+    """将相对路径转换为 Python 模块名。
+
+    例如：
+    - "cli/main.py" → "cli.main"
+    - "cli/__init__.py" → "cli"
+    - "tasks/dws/assistant.py" → "tasks.dws.assistant"
+    """
+    p = rel_path.replace("\\", "/")
+    if p.endswith("/__init__.py"):
+        p = p[: -len("/__init__.py")]
+    elif p.endswith(".py"):
+        p = p[:-3]
+    return p.replace("/", ".")
+
+
+def _module_to_path(module_name: str) -> str:
+    """将模块名转换为相对文件路径（优先 .py 文件）。
+
+    例如：
+    - "cli.main" → "cli/main.py"
+    - "cli" → "cli/__init__.py"
+    """
+    return module_name.replace(".", "/") + ".py"
+
+
+# ---------------------------------------------------------------------------
+# parse_imports — 解析 Python 文件的 import 语句
+# ---------------------------------------------------------------------------
+
+def parse_imports(filepath: Path) -> list[str]:
+    """使用 ast 模块解析 Python 文件的 import 语句，返回被导入的本地模块列表。
+
+    - 仅返回项目内部模块（排除标准库和第三方包）
+    - 结果去重
+    - 语法错误或文件不存在时返回空列表
+    """
+    if not filepath.exists():
+        return []
+
+    source = _read_source(filepath)
+    if source is None:
+        return []
+
+    try:
+        tree = ast.parse(source, filename=str(filepath))
+    except SyntaxError:
+        logger.warning("语法错误，无法解析 %s", filepath)
+        return []
+
+    modules: list[str] = []
+
+    for node in ast.walk(tree):
+        if isinstance(node, ast.Import):
+            for alias in node.names:
+                name = alias.name
+                if _is_project_module(name) and not _is_stdlib_or_third_party(name):
+                    modules.append(name)
+        elif isinstance(node, ast.ImportFrom):
+            if node.module and node.level == 0:
+                name = node.module
+                if _is_project_module(name) and not _is_stdlib_or_third_party(name):
+                    modules.append(name)
+
+    # 去重并保持顺序
+    seen: set[str] = set()
+    result: list[str] = []
+    for m in modules:
+        if m not in seen:
+            seen.add(m)
+            result.append(m)
+    return result
+
+
+# ---------------------------------------------------------------------------
+# build_flow_tree — 从入口递归追踪 import 链，构建流程树
+# ---------------------------------------------------------------------------
+
+def build_flow_tree(
+    repo_root: Path,
+    entry_file: str,
+    _visited: set[str] | None = None,
+) -> FlowNode:
+    """从指定入口文件出发，递归追踪 import 链，构建流程树。
+
+    Parameters
+    ----------
+    repo_root : Path
+        仓库根目录。
+    entry_file : str
+        入口文件的相对路径（如 "cli/main.py"）。
+    _visited : set[str] | None
+        内部使用，防止循环导入导致无限递归。
+
+    Returns
+    -------
+    FlowNode
+        以入口文件为根的流程树。
+    """
+    is_root = _visited is None
+    if _visited is None:
+        _visited = set()
+
+    module_name = _path_to_module_name(entry_file)
+    node_type = "entry" if is_root else "module"
+
+    _visited.add(entry_file)
+
+    filepath = repo_root / entry_file
+    children: list[FlowNode] = []
+
+    if filepath.exists():
+        imported_modules = parse_imports(filepath)
+        for mod in imported_modules:
+            child_path = _module_to_path(mod)
+            # 如果 .py 文件不存在，尝试 __init__.py
+            if not (repo_root / child_path).exists():
+                alt_path = mod.replace(".", "/") + "/__init__.py"
+                if (repo_root / alt_path).exists():
+                    child_path = alt_path
+
+            if child_path not in _visited:
+                child_node = build_flow_tree(repo_root, child_path, _visited)
+                children.append(child_node)
+
+    return FlowNode(
+        name=module_name,
+        source_file=entry_file,
+        node_type=node_type,
+        children=children,
+    )
+
+
+# ---------------------------------------------------------------------------
+# 批处理文件解析
+# ---------------------------------------------------------------------------
+
+def _parse_bat_python_target(bat_path: Path) -> str | None:
+    """从批处理文件中解析 python -m 命令的目标模块名。
+
+    返回模块名（如 "cli.main"），未找到时返回 None。
+    """
+    if not bat_path.exists():
+        return None
+
+    content = _read_source(bat_path)
+    if content is None:
+        return None
+
+    # 匹配 python -m module.name 或 python3 -m module.name
+    pattern = re.compile(r"python[3]?\s+-m\s+([\w.]+)", re.IGNORECASE)
+    for line in content.splitlines():
+        m = pattern.search(line)
+        if m:
+            return m.group(1)
+    return None
+
+
+# ---------------------------------------------------------------------------
+# 入口点识别
+# ---------------------------------------------------------------------------
+
+def discover_entry_points(repo_root: Path) -> list[dict[str, str]]:
+    """识别项目的所有入口点。
+
+    返回字典列表，每个字典包含：
+    - type: 入口类型（CLI / GUI / 批处理 / 运维脚本）
+    - file: 相对路径
+    - description: 简要说明
+
+    识别规则：
+    - cli/main.py → CLI 入口
+    - gui/main.py → GUI 入口
+    - *.bat 文件 → 解析其中的 python -m 命令
+    - scripts/*.py（含 if __name__ == "__main__"，排除 __init__.py 和 audit/ 子目录）
+    """
+    entries: list[dict[str, str]] = []
+
+    # CLI 入口
+    cli_main = repo_root / "cli" / "main.py"
+    if cli_main.exists():
+        entries.append({
+            "type": "CLI",
+            "file": "cli/main.py",
+            "description": "CLI 主入口 (`python -m cli.main`)",
+        })
+
+    # GUI 入口
+    gui_main = repo_root / "gui" / "main.py"
+    if gui_main.exists():
+        entries.append({
+            "type": "GUI",
+            "file": "gui/main.py",
+            "description": "GUI 主入口 (`python -m gui.main`)",
+        })
+
+    # 批处理文件
+    for bat in sorted(repo_root.glob("*.bat")):
+        target = _parse_bat_python_target(bat)
+        desc = f"批处理脚本"
+        if target:
+            desc += f"，调用 `{target}`"
+        entries.append({
+            "type": "批处理",
+            "file": bat.name,
+            "description": desc,
+        })
+
+    # 运维脚本：scripts/ 下的 .py 文件（排除 __init__.py 和 audit/ 子目录）
+    scripts_dir = repo_root / "scripts"
+    if scripts_dir.is_dir():
+        for py_file in sorted(scripts_dir.glob("*.py")):
+            if py_file.name == "__init__.py":
+                continue
+            # 检查是否包含 if __name__ == "__main__"
+            source = _read_source(py_file)
+            if source and '__name__' in source and '__main__' in source:
+                rel = py_file.relative_to(repo_root).as_posix()
+                entries.append({
+                    "type": "运维脚本",
+                    "file": rel,
+                    "description": f"运维脚本 `{py_file.name}`",
+                })
+
+    return entries
+
+
+# ---------------------------------------------------------------------------
+# 任务类型和加载器类型区分
+# ---------------------------------------------------------------------------
+
+def classify_task_type(rel_path: str) -> str:
+    """根据文件路径区分任务类型。
+
+    返回值：
+    - "ODS 抓取任务"
+    - "DWD 加载任务"
+    - "DWS 汇总任务"
+    - "校验任务"
+    - "Schema 初始化任务"
+    - "任务"（无法细分时的默认值）
+    """
+    p = rel_path.replace("\\", "/").lower()
+
+    if "verification/" in p or "verification\\" in p:
+        return "校验任务"
+    if "dws/" in p or "dws\\" in p:
+        return "DWS 汇总任务"
+    # 文件名级别判断
+    basename = p.rsplit("/", 1)[-1] if "/" in p else p
+    if basename.startswith("ods_") or basename.startswith("ods."):
+        return "ODS 抓取任务"
+    if basename.startswith("dwd_") or basename.startswith("dwd."):
+        return "DWD 加载任务"
+    if basename.startswith("dws_"):
+        return "DWS 汇总任务"
+    if "init" in basename and "schema" in basename:
+        return "Schema 初始化任务"
+    return "任务"
+
+
+def classify_loader_type(rel_path: str) -> str:
+    """根据文件路径区分加载器类型。
+
+    返回值：
+    - "维度加载器 (SCD2)"
+    - "事实表加载器"
+    - "ODS 通用加载器"
+    - "加载器"（无法细分时的默认值）
+    """
+    p = rel_path.replace("\\", "/").lower()
+
+    if "dimensions/" in p or "dimensions\\" in p:
+        return "维度加载器 (SCD2)"
+    if "facts/" in p or "facts\\" in p:
+        return "事实表加载器"
+    if "ods/" in p or "ods\\" in p:
+        return "ODS 通用加载器"
+    return "加载器"
+
+
+# ---------------------------------------------------------------------------
+# find_orphan_modules — 找出未被任何入口直接或间接引用的 Python 模块
+# ---------------------------------------------------------------------------
+
+def find_orphan_modules(
+    repo_root: Path,
+    all_entries: list[FileEntry],
+    reachable: set[str],
+) -> list[str]:
+    """找出未被任何入口直接或间接引用的 Python 模块。
+
+    排除规则（不视为孤立）：
+    - __init__.py 文件
+    - tests/ 目录下的文件
+    - scripts/audit/ 目录下的文件（审计脚本自身）
+    - 目录条目
+    - 非 .py 文件
+    - 不属于项目包的文件
+
+    返回按路径排序的孤立模块列表。
+    """
+    orphans: list[str] = []
+
+    for entry in all_entries:
+        # 跳过目录
+        if entry.is_dir:
+            continue
+        # 只关注 .py 文件
+        if entry.extension != ".py":
+            continue
+
+        rel = entry.rel_path.replace("\\", "/")
+
+        # 排除 __init__.py
+        if rel.endswith("/__init__.py") or rel == "__init__.py":
+            continue
+        # 排除测试文件
+        if rel.startswith("tests/") or rel.startswith("tests\\"):
+            continue
+        # 排除审计脚本自身
+        if rel.startswith("scripts/audit/") or rel.startswith("scripts\\audit\\"):
+            continue
+
+        # 只检查属于项目包的文件
+        top_dir = rel.split("/")[0] if "/" in rel else ""
+        if top_dir not in _PROJECT_PACKAGES:
+            continue
+
+        # 不在可达集合中 → 孤立
+        if rel not in reachable:
+            orphans.append(rel)
+
+    orphans.sort()
+    return orphans
+
+
+# ---------------------------------------------------------------------------
+# 统计辅助
+# ---------------------------------------------------------------------------
+
+def _count_nodes_by_type(trees: list[FlowNode]) -> dict[str, int]:
+    """递归统计流程树中各类型节点的数量。"""
+    counts: dict[str, int] = {"entry": 0, "module": 0, "class": 0, "function": 0}
+
+    def _walk(node: FlowNode) -> None:
+        t = node.node_type
+        counts[t] = counts.get(t, 0) + 1
+        for child in node.children:
+            _walk(child)
+
+    for tree in trees:
+        _walk(tree)
+    return counts
+
+
+def _count_tasks_and_loaders(trees: list[FlowNode]) -> tuple[int, int]:
+    """统计流程树中任务模块和加载器模块的数量。"""
+    tasks = 0
+    loaders = 0
+    seen: set[str] = set()
+
+    def _walk(node: FlowNode) -> None:
+        nonlocal tasks, loaders
+        if node.source_file in seen:
+            return
+        seen.add(node.source_file)
+        sf = node.source_file.replace("\\", "/")
+        if sf.startswith("tasks/") and not sf.endswith("__init__.py"):
+            base = sf.rsplit("/", 1)[-1]
+            if not base.startswith("base_"):
+                tasks += 1
+        if sf.startswith("loaders/") and not sf.endswith("__init__.py"):
+            base = sf.rsplit("/", 1)[-1]
+            if not base.startswith("base_"):
+                loaders += 1
+        for child in node.children:
+            _walk(child)
+
+    for tree in trees:
+        _walk(tree)
+    return tasks, loaders
+
+
+# ---------------------------------------------------------------------------
+# 类型标注辅助
+# ---------------------------------------------------------------------------
+
+def _get_type_annotation(source_file: str) -> str:
+    """根据源文件路径返回类型标注字符串（用于报告中的节点标注）。"""
+    sf = source_file.replace("\\", "/")
+    if sf.startswith("tasks/"):
+        return f" [{classify_task_type(sf)}]"
+    if sf.startswith("loaders/"):
+        return f" [{classify_loader_type(sf)}]"
+    return ""
+
+
+# ---------------------------------------------------------------------------
+# Mermaid 图生成
+# ---------------------------------------------------------------------------
+
+def _render_mermaid(trees: list[FlowNode]) -> str:
+    """生成 Mermaid 流程图代码。"""
+    lines: list[str] = ["```mermaid", "graph TD"]
+    seen_edges: set[tuple[str, str]] = set()
+    node_ids: dict[str, str] = {}
+    counter = [0]
+
+    def _node_id(name: str) -> str:
+        if name not in node_ids:
+            node_ids[name] = f"N{counter[0]}"
+            counter[0] += 1
+        return node_ids[name]
+
+    def _walk(node: FlowNode) -> None:
+        nid = _node_id(node.name)
+        annotation = _get_type_annotation(node.source_file)
+        label = f"{node.name}{annotation}"
+        # 声明节点
+        lines.append(f"    {nid}[\"`{label}`\"]")
+        for child in node.children:
+            cid = _node_id(child.name)
+            edge = (nid, cid)
+            if edge not in seen_edges:
+                seen_edges.add(edge)
+                lines.append(f"    {nid} --> {cid}")
+            _walk(child)
+
+    for tree in trees:
+        _walk(tree)
+
+    lines.append("```")
+    return "\n".join(lines)
+
+
+# ---------------------------------------------------------------------------
+# 缩进文本树生成
+# ---------------------------------------------------------------------------
+
+def _render_text_tree(trees: list[FlowNode]) -> str:
+    """生成缩进文本形式的流程树。"""
+    lines: list[str] = []
+    seen: set[str] = set()
+
+    def _walk(node: FlowNode, depth: int) -> None:
+        indent = "  " * depth
+        annotation = _get_type_annotation(node.source_file)
+        line = f"{indent}- `{node.name}` (`{node.source_file}`){annotation}"
+        lines.append(line)
+
+        key = node.source_file
+        if key in seen:
+            # 已展开过，不再递归（避免循环）
+            if node.children:
+                lines.append(f"{indent}  - *(已展开)*")
+            return
+        seen.add(key)
+
+        for child in node.children:
+            _walk(child, depth + 1)
+
+    for tree in trees:
+        _walk(tree, 0)
+
+    return "\n".join(lines)
+
+
+# ---------------------------------------------------------------------------
+# render_flow_report — 生成 Markdown 格式的流程树报告
+# ---------------------------------------------------------------------------
+
+def render_flow_report(
+    trees: list[FlowNode],
+    orphans: list[str],
+    repo_root: str,
+) -> str:
+    """生成 Markdown 格式的流程树报告（含 Mermaid 图和缩进文本）。
+
+    报告结构：
+    1. 头部（时间戳、仓库路径）
+    2. Mermaid 流程图
+    3. 缩进文本树
+    4. 孤立模块列表
+    5. 统计摘要
+    """
+    timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
+
+    sections: list[str] = []
+
+    # --- 头部 ---
+    sections.append("# 项目流程树报告\n")
+    sections.append(f"- 生成时间: {timestamp}")
+    sections.append(f"- 仓库路径: `{repo_root}`\n")
+
+    # --- Mermaid 图 ---
+    sections.append("## 流程图（Mermaid）\n")
+    sections.append(_render_mermaid(trees))
+    sections.append("")
+
+    # --- 缩进文本树 ---
+    sections.append("## 流程树（缩进文本）\n")
+    sections.append(_render_text_tree(trees))
+    sections.append("")
+
+    # --- 孤立模块 ---
+    sections.append("## 孤立模块\n")
+    if orphans:
+        for o in orphans:
+            sections.append(f"- `{o}`")
+    else:
+        sections.append("未发现孤立模块。")
+    sections.append("")
+
+    # --- 统计摘要 ---
+    entry_count = sum(1 for t in trees if t.node_type == "entry")
+    task_count, loader_count = _count_tasks_and_loaders(trees)
+    orphan_count = len(orphans)
+
+    sections.append("## 统计摘要\n")
+    sections.append(f"| 指标 | 数量 |")
+    sections.append(f"|------|------|")
+    sections.append(f"| 入口点 | {entry_count} |")
+    sections.append(f"| 任务 | {task_count} |")
+    sections.append(f"| 加载器 | {loader_count} |")
+    sections.append(f"| 孤立模块 | {orphan_count} |")
+    sections.append("")
+
+    return "\n".join(sections)
--- a/apps/etl/pipelines/feiqiu/scripts/audit/inventory_analyzer.py
+++ b/apps/etl/pipelines/feiqiu/scripts/audit/inventory_analyzer.py
@@ -0,0 +1,449 @@
+# -*- coding: utf-8 -*-
+"""
+文件清单分析器 — 对扫描结果进行用途分类和处置标签分配。
+
+分类规则按优先级从高到低排列：
+1. tmp/ 下所有文件 → 临时与调试 / 候选删除或候选归档
+2. logs/、export/ 下的运行时产出 → 日志与输出 / 候选归档
+3. *.lnk、*.rar 文件 → 其他 / 候选删除
+4. 空目录 → 其他 / 候选删除
+5. 核心代码目录（tasks/ 等）→ 核心代码 / 保留
+6. config/ → 配置 / 保留
+7. database/*.sql、database/migrations/ → 数据库定义 / 保留
+8. database/*.py → 核心代码 / 保留
+9. tests/ → 测试 / 保留
+10. docs/ → 文档 / 保留
+11. scripts/ 下的 .py 文件 → 脚本工具 / 保留
+12. gui/ → GUI / 保留
+13. 构建与部署文件 → 构建与部署 / 保留
+14. 其余 → 其他 / 待确认
+"""
+
+from __future__ import annotations
+
+import os
+from collections import Counter
+from datetime import datetime, timezone
+from itertools import groupby
+
+from scripts.audit import Category, Disposition, FileEntry, InventoryItem
+
+# ---------------------------------------------------------------------------
+# 常量
+# ---------------------------------------------------------------------------
+
+# 核心代码顶层目录
+_CORE_CODE_DIRS = (
+    "tasks/", "loaders/", "scd/", "orchestration/",
+    "quality/", "models/", "utils/", "api/",
+)
+
+# 构建与部署文件名（根目录级别）
+_BUILD_DEPLOY_BASENAMES = {"setup.py", "build_exe.py"}
+
+# 构建与部署扩展名
+_BUILD_DEPLOY_EXTENSIONS = {".bat", ".sh", ".ps1"}
+
+
+# ---------------------------------------------------------------------------
+# 辅助函数
+# ---------------------------------------------------------------------------
+
+def _top_dir(rel_path: str) -> str:
+    """返回相对路径的第一级目录名（含尾部斜杠），如 'tmp/foo.py' → 'tmp/'。"""
+    idx = rel_path.find("/")
+    if idx == -1:
+        return ""
+    return rel_path[: idx + 1]
+
+
+def _basename(rel_path: str) -> str:
+    """返回路径的最后一段文件名。"""
+    return rel_path.rsplit("/", 1)[-1]
+
+
+def _is_init_py(rel_path: str) -> bool:
+    """判断路径是否为 __init__.py。"""
+    return _basename(rel_path) == "__init__.py"
+
+
+# ---------------------------------------------------------------------------
+# classify — 核心分类函数
+# ---------------------------------------------------------------------------
+
+def classify(entry: FileEntry) -> InventoryItem:
+    """根据路径、扩展名等规则对单个文件/目录进行分类和标签分配。
+
+    规则按优先级从高到低依次匹配，首个命中的规则决定分类和处置。
+    """
+    path = entry.rel_path
+    top = _top_dir(path)
+    ext = entry.extension.lower()
+    base = _basename(path)
+
+    # --- 优先级 1: tmp/ 下所有文件 ---
+    if top == "tmp/" or path == "tmp":
+        return _classify_tmp(entry)
+
+    # --- 优先级 2: logs/、export/ 下的运行时产出 ---
+    if top in ("logs/", "export/") or path in ("logs", "export"):
+        return _classify_runtime_output(entry)
+
+    # --- 优先级 3: .lnk / .rar 文件 ---
+    if ext in (".lnk", ".rar"):
+        return InventoryItem(
+            rel_path=path,
+            category=Category.OTHER,
+            disposition=Disposition.CANDIDATE_DELETE,
+            description=f"快捷方式/压缩包文件（`{ext}`），建议删除",
+        )
+
+    # --- 优先级 4: 空目录 ---
+    if entry.is_empty_dir:
+        return InventoryItem(
+            rel_path=path,
+            category=Category.OTHER,
+            disposition=Disposition.CANDIDATE_DELETE,
+            description="空目录，建议删除",
+        )
+
+    # --- 优先级 5: 核心代码目录 ---
+    if any(path.startswith(d) or path + "/" == d for d in _CORE_CODE_DIRS):
+        return InventoryItem(
+            rel_path=path,
+            category=Category.CORE_CODE,
+            disposition=Disposition.KEEP,
+            description=f"核心代码（`{top.rstrip('/')}`）",
+        )
+
+    # --- 优先级 6: config/ ---
+    if top == "config/" or path == "config":
+        return InventoryItem(
+            rel_path=path,
+            category=Category.CONFIG,
+            disposition=Disposition.KEEP,
+            description="配置文件",
+        )
+
+    # --- 优先级 7: database/*.sql 和 database/migrations/ ---
+    if top == "database/" or path == "database":
+        return _classify_database(entry)
+
+    # --- 优先级 8: tests/ ---
+    if top == "tests/" or path == "tests":
+        return InventoryItem(
+            rel_path=path,
+            category=Category.TEST,
+            disposition=Disposition.KEEP,
+            description="测试文件",
+        )
+
+    # --- 优先级 9: docs/ ---
+    if top == "docs/" or path == "docs":
+        return InventoryItem(
+            rel_path=path,
+            category=Category.DOCS,
+            disposition=Disposition.KEEP,
+            description="文档",
+        )
+
+    # --- 优先级 10: scripts/ 下的 .py 文件 ---
+    if top == "scripts/" or path == "scripts":
+        cat = Category.SCRIPTS
+        if ext == ".py" or entry.is_dir:
+            return InventoryItem(
+                rel_path=path,
+                category=cat,
+                disposition=Disposition.KEEP,
+                description="脚本工具",
+            )
+        return InventoryItem(
+            rel_path=path,
+            category=cat,
+            disposition=Disposition.NEEDS_REVIEW,
+            description="脚本目录下的非 Python 文件，需确认用途",
+        )
+
+    # --- 优先级 11: gui/ ---
+    if top == "gui/" or path == "gui":
+        return InventoryItem(
+            rel_path=path,
+            category=Category.GUI,
+            disposition=Disposition.KEEP,
+            description="GUI 模块",
+        )
+
+    # --- 优先级 12: 构建与部署 ---
+    if base in _BUILD_DEPLOY_BASENAMES or ext in _BUILD_DEPLOY_EXTENSIONS:
+        return InventoryItem(
+            rel_path=path,
+            category=Category.BUILD_DEPLOY,
+            disposition=Disposition.KEEP,
+            description="构建与部署文件",
+        )
+
+    # --- 优先级 13: cli/ ---
+    if top == "cli/" or path == "cli":
+        return InventoryItem(
+            rel_path=path,
+            category=Category.CORE_CODE,
+            disposition=Disposition.KEEP,
+            description="CLI 入口模块",
+        )
+
+    # --- 优先级 14: 已知根目录文件 ---
+    if "/" not in path:
+        return _classify_root_file(entry)
+
+    # --- 兜底 ---
+    return InventoryItem(
+        rel_path=path,
+        category=Category.OTHER,
+        disposition=Disposition.NEEDS_REVIEW,
+        description="未匹配已知规则，需人工确认用途",
+    )
+
+
+# ---------------------------------------------------------------------------
+# 子分类函数
+# ---------------------------------------------------------------------------
+
+def _classify_tmp(entry: FileEntry) -> InventoryItem:
+    """tmp/ 目录下的文件分类。
+
+    默认候选删除；有意义的 .py 文件标记为候选归档。
+    """
+    ext = entry.extension.lower()
+    base = _basename(entry.rel_path)
+
+    # 空目录直接候选删除
+    if entry.is_empty_dir:
+        return InventoryItem(
+            rel_path=entry.rel_path,
+            category=Category.TEMP_DEBUG,
+            disposition=Disposition.CANDIDATE_DELETE,
+            description="临时目录下的空目录",
+        )
+
+    # .py 文件可能有参考价值 → 候选归档
+    if ext == ".py" and len(base) > 4:
+        return InventoryItem(
+            rel_path=entry.rel_path,
+            category=Category.TEMP_DEBUG,
+            disposition=Disposition.CANDIDATE_ARCHIVE,
+            description="临时 Python 脚本，可能有参考价值",
+        )
+
+    return InventoryItem(
+        rel_path=entry.rel_path,
+        category=Category.TEMP_DEBUG,
+        disposition=Disposition.CANDIDATE_DELETE,
+        description="临时/调试文件，建议删除",
+    )
+
+
+def _classify_runtime_output(entry: FileEntry) -> InventoryItem:
+    """logs/、export/ 目录下的运行时产出分类。
+
+    __init__.py 保留（包标记），其余候选归档。
+    """
+    if _is_init_py(entry.rel_path):
+        return InventoryItem(
+            rel_path=entry.rel_path,
+            category=Category.LOG_OUTPUT,
+            disposition=Disposition.KEEP,
+            description="包初始化文件",
+        )
+
+    return InventoryItem(
+        rel_path=entry.rel_path,
+        category=Category.LOG_OUTPUT,
+        disposition=Disposition.CANDIDATE_ARCHIVE,
+        description="运行时产出，建议归档",
+    )
+
+
+def _classify_database(entry: FileEntry) -> InventoryItem:
+    """database/ 目录下的文件分类。"""
+    path = entry.rel_path
+    ext = entry.extension.lower()
+
+    # migrations/ 子目录
+    if "migrations/" in path or path.endswith("migrations"):
+        return InventoryItem(
+            rel_path=path,
+            category=Category.DATABASE_DEF,
+            disposition=Disposition.KEEP,
+            description="数据库迁移脚本",
+        )
+
+    # .sql 文件
+    if ext == ".sql":
+        return InventoryItem(
+            rel_path=path,
+            category=Category.DATABASE_DEF,
+            disposition=Disposition.KEEP,
+            description="数据库 DDL/DML 脚本",
+        )
+
+    # .py 文件 → 核心代码
+    if ext == ".py":
+        return InventoryItem(
+            rel_path=path,
+            category=Category.CORE_CODE,
+            disposition=Disposition.KEEP,
+            description="数据库操作模块",
+        )
+
+    # 目录本身
+    if entry.is_dir:
+        if entry.is_empty_dir:
+            return InventoryItem(
+                rel_path=path,
+                category=Category.OTHER,
+                disposition=Disposition.CANDIDATE_DELETE,
+                description="数据库目录下的空目录",
+            )
+        return InventoryItem(
+            rel_path=path,
+            category=Category.DATABASE_DEF,
+            disposition=Disposition.KEEP,
+            description="数据库子目录",
+        )
+
+    # 其他文件
+    return InventoryItem(
+        rel_path=path,
+        category=Category.DATABASE_DEF,
+        disposition=Disposition.NEEDS_REVIEW,
+        description="数据库目录下的非标准文件，需确认",
+    )
+
+
+def _classify_root_file(entry: FileEntry) -> InventoryItem:
+    """根目录散落文件的分类。"""
+    ext = entry.extension.lower()
+    base = _basename(entry.rel_path)
+
+    # 已知构建文件
+    if base in _BUILD_DEPLOY_BASENAMES or ext in _BUILD_DEPLOY_EXTENSIONS:
+        return InventoryItem(
+            rel_path=entry.rel_path,
+            category=Category.BUILD_DEPLOY,
+            disposition=Disposition.KEEP,
+            description="构建与部署文件",
+        )
+
+    # 已知配置文件
+    if base in (
+        "requirements.txt", "pytest.ini", ".env", ".env.example",
+        ".gitignore", ".flake8", "pyproject.toml",
+    ):
+        return InventoryItem(
+            rel_path=entry.rel_path,
+            category=Category.CONFIG,
+            disposition=Disposition.KEEP,
+            description="项目配置文件",
+        )
+
+    # README
+    if base.lower().startswith("readme"):
+        return InventoryItem(
+            rel_path=entry.rel_path,
+            category=Category.DOCS,
+            disposition=Disposition.KEEP,
+            description="项目说明文档",
+        )
+
+    # 其他根目录文件 → 待确认
+    return InventoryItem(
+        rel_path=entry.rel_path,
+        category=Category.OTHER,
+        disposition=Disposition.NEEDS_REVIEW,
+        description=f"根目录散落文件（`{base}`），需确认用途",
+    )
+
+
+# ---------------------------------------------------------------------------
+# build_inventory — 批量分类
+# ---------------------------------------------------------------------------
+
+def build_inventory(entries: list[FileEntry]) -> list[InventoryItem]:
+    """对所有文件条目执行分类，返回清单列表。"""
+    return [classify(e) for e in entries]
+
+
+# ---------------------------------------------------------------------------
+# render_inventory_report — Markdown 渲染
+# ---------------------------------------------------------------------------
+
+def render_inventory_report(items: list[InventoryItem], repo_root: str) -> str:
+    """生成 Markdown 格式的文件清单报告。
+
+    报告结构：
+    - 头部：标题、生成时间、仓库路径
+    - 主体：按 Category 分组的表格
+    - 尾部：统计摘要
+    """
+    lines: list[str] = []
+
+    # --- 头部 ---
+    now = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
+    lines.append("# 文件清单报告")
+    lines.append("")
+    lines.append(f"- 生成时间：{now}")
+    lines.append(f"- 仓库路径：`{repo_root}`")
+    lines.append("")
+
+    # --- 按分类分组 ---
+    # 保持 Category 枚举定义顺序
+    cat_order = {c: i for i, c in enumerate(Category)}
+    sorted_items = sorted(items, key=lambda it: cat_order[it.category])
+
+    for cat, group in groupby(sorted_items, key=lambda it: it.category):
+        group_list = list(group)
+        lines.append(f"## {cat.value}")
+        lines.append("")
+        lines.append("| 相对路径 | 处置标签 | 简要说明 |")
+        lines.append("|---|---|---|")
+        for item in group_list:
+            lines.append(
+                f"| `{item.rel_path}` | {item.disposition.value} | {item.description} |"
+            )
+        lines.append("")
+
+    # --- 统计摘要 ---
+    lines.append("## 统计摘要")
+    lines.append("")
+
+    # 各分类计数
+    cat_counter: Counter[Category] = Counter()
+    disp_counter: Counter[Disposition] = Counter()
+    for item in items:
+        cat_counter[item.category] += 1
+        disp_counter[item.disposition] += 1
+
+    lines.append("### 按用途分类")
+    lines.append("")
+    lines.append("| 分类 | 数量 |")
+    lines.append("|---|---|")
+    for cat in Category:
+        count = cat_counter.get(cat, 0)
+        if count > 0:
+            lines.append(f"| {cat.value} | {count} |")
+    lines.append("")
+
+    lines.append("### 按处置标签")
+    lines.append("")
+    lines.append("| 标签 | 数量 |")
+    lines.append("|---|---|")
+    for disp in Disposition:
+        count = disp_counter.get(disp, 0)
+        if count > 0:
+            lines.append(f"| {disp.value} | {count} |")
+    lines.append("")
+
+    lines.append(f"**总计：{len(items)} 个条目**")
+    lines.append("")
+
+    return "\n".join(lines)
--- a/apps/etl/pipelines/feiqiu/scripts/audit/run_audit.py
+++ b/apps/etl/pipelines/feiqiu/scripts/audit/run_audit.py
@@ -0,0 +1,255 @@
+# -*- coding: utf-8 -*-
+"""
+审计主入口 — 依次调用扫描器和三个分析器，生成三份报告到 docs/audit/repo/。
+
+仅在 docs/audit/repo/ 目录下创建文件，不修改仓库中的任何现有文件。
+"""
+
+from __future__ import annotations
+
+import logging
+import re
+from datetime import datetime, timezone
+from pathlib import Path
+
+from scripts.audit.scanner import scan_repo
+from scripts.audit.inventory_analyzer import (
+    build_inventory,
+    render_inventory_report,
+)
+from scripts.audit.flow_analyzer import (
+    build_flow_tree,
+    discover_entry_points,
+    find_orphan_modules,
+    render_flow_report,
+)
+from scripts.audit.doc_alignment_analyzer import (
+    build_mappings,
+    check_api_samples_vs_parsers,
+    check_ddl_vs_dictionary,
+    find_undocumented_modules,
+    render_alignment_report,
+    scan_docs,
+)
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# 仓库根目录自动检测
+# ---------------------------------------------------------------------------
+
+def _detect_repo_root() -> Path:
+    """从当前文件向上查找仓库根目录。
+
+    判断依据：包含 cli/ 目录或 .git/ 目录的祖先目录。
+    """
+    current = Path(__file__).resolve().parent
+    for parent in (current, *current.parents):
+        if (parent / "cli").is_dir() or (parent / ".git").is_dir():
+            return parent
+    # 回退：假设 scripts/audit/ 在仓库根目录下
+    return current.parent.parent
+
+
+# ---------------------------------------------------------------------------
+# 报告输出目录
+# ---------------------------------------------------------------------------
+
+def _ensure_report_dir(repo_root: Path) -> Path:
+    """检查并创建 docs/audit/repo/ 目录。
+
+    如果目录已存在则直接返回；不存在则创建。
+    创建失败时抛出 RuntimeError（因为无法输出报告）。
+    """
+    audit_dir = repo_root / "docs" / "audit" / "repo"
+    if audit_dir.is_dir():
+        return audit_dir
+    try:
+        audit_dir.mkdir(parents=True, exist_ok=True)
+    except OSError as exc:
+        raise RuntimeError(f"无法创建报告输出目录 {audit_dir}: {exc}") from exc
+    logger.info("已创建报告输出目录: %s", audit_dir)
+    return audit_dir
+
+
+# ---------------------------------------------------------------------------
+# 报告头部元信息注入
+# ---------------------------------------------------------------------------
+
+_HEADER_PATTERN = re.compile(r"生成时间[：:]")
+_ISO_TS_PATTERN = re.compile(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z")
+# 匹配非 ISO 格式的时间戳行，用于替换
+_NON_ISO_TS_LINE = re.compile(
+    r"([-*]\s*生成时间[：:]\s*)\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}"
+)
+
+
+def _inject_header(report: str, timestamp: str, repo_path: str) -> str:
+    """确保报告头部包含 ISO 格式时间戳和仓库路径。
+
+    - 已有 ISO 时间戳 → 不修改
+    - 有非 ISO 时间戳 → 替换为 ISO 格式
+    - 无头部 → 在标题后注入
+    """
+    if _HEADER_PATTERN.search(report):
+        # 已有头部——检查时间戳格式是否为 ISO
+        if _ISO_TS_PATTERN.search(report):
+            return report
+        # 非 ISO 格式 → 替换时间戳
+        report = _NON_ISO_TS_LINE.sub(
+            lambda m: m.group(1) + timestamp, report,
+        )
+        # 同时确保仓库路径使用统一值（用 lambda 避免反斜杠转义问题）
+        safe_path = repo_path
+        report = re.sub(
+            r"([-*]\s*仓库路径[：:]\s*)`[^`]*`",
+            lambda m: m.group(1) + "`" + safe_path + "`",
+            report,
+        )
+        return report
+
+    # 无头部 → 在第一个标题行之后插入
+    lines = report.split("\n")
+    insert_idx = 1
+    for i, line in enumerate(lines):
+        if line.startswith("# "):
+            insert_idx = i + 1
+            break
+
+    header_lines = [
+        "",
+        f"- 生成时间: {timestamp}",
+        f"- 仓库路径: `{repo_path}`",
+        "",
+    ]
+    lines[insert_idx:insert_idx] = header_lines
+    return "\n".join(lines)
+
+
+# ---------------------------------------------------------------------------
+# 主函数
+# ---------------------------------------------------------------------------
+
+def run_audit(repo_root: Path | None = None) -> None:
+    """执行完整审计流程，生成三份报告到 docs/audit/repo/。
+
+    Parameters
+    ----------
+    repo_root : Path | None
+        仓库根目录。为 None 时自动检测。
+    """
+    # 1. 确定仓库根目录
+    if repo_root is None:
+        repo_root = _detect_repo_root()
+    repo_root = repo_root.resolve()
+    repo_path_str = str(repo_root)
+
+    logger.info("审计开始 — 仓库路径: %s", repo_path_str)
+
+    # 2. 检查/创建输出目录
+    audit_dir = _ensure_report_dir(repo_root)
+
+    # 3. 生成 UTC 时间戳（所有报告共用）
+    timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
+
+    # 4. 扫描仓库
+    logger.info("正在扫描仓库文件...")
+    entries = scan_repo(repo_root)
+    logger.info("扫描完成，共 %d 个条目", len(entries))
+
+    # 5. 文件清单报告
+    logger.info("正在生成文件清单报告...")
+    try:
+        inventory_items = build_inventory(entries)
+        inventory_report = render_inventory_report(inventory_items, repo_path_str)
+        inventory_report = _inject_header(inventory_report, timestamp, repo_path_str)
+        (audit_dir / "file_inventory.md").write_text(
+            inventory_report, encoding="utf-8",
+        )
+        logger.info("文件清单报告已写入: file_inventory.md")
+    except Exception:
+        logger.exception("生成文件清单报告时出错")
+
+    # 6. 流程树报告
+    logger.info("正在生成流程树报告...")
+    try:
+        entry_points = discover_entry_points(repo_root)
+        trees = []
+        reachable: set[str] = set()
+        for ep in entry_points:
+            ep_file = ep["file"]
+            # 批处理文件不构建流程树
+            if not ep_file.endswith(".py"):
+                continue
+            tree = build_flow_tree(repo_root, ep_file)
+            trees.append(tree)
+            # 收集可达模块
+            _collect_reachable(tree, reachable)
+
+        orphans = find_orphan_modules(repo_root, entries, reachable)
+        flow_report = render_flow_report(trees, orphans, repo_path_str)
+        flow_report = _inject_header(flow_report, timestamp, repo_path_str)
+        (audit_dir / "flow_tree.md").write_text(
+            flow_report, encoding="utf-8",
+        )
+        logger.info("流程树报告已写入: flow_tree.md")
+    except Exception:
+        logger.exception("生成流程树报告时出错")
+
+    # 7. 文档对齐报告
+    logger.info("正在生成文档对齐报告...")
+    try:
+        doc_paths = scan_docs(repo_root)
+        mappings = build_mappings(doc_paths, repo_root)
+
+        issues = []
+        issues.extend(check_ddl_vs_dictionary(repo_root))
+        issues.extend(check_api_samples_vs_parsers(repo_root))
+
+        # 缺失文档检测
+        documented: set[str] = set()
+        for m in mappings:
+            documented.update(m.related_code)
+        undoc_modules = find_undocumented_modules(repo_root, documented)
+        from scripts.audit import AlignmentIssue
+        for mod in undoc_modules:
+            issues.append(AlignmentIssue(
+                doc_path="—",
+                issue_type="missing",
+                description=f"核心代码模块 `{mod}` 缺少对应文档",
+                related_code=mod,
+            ))
+
+        alignment_report = render_alignment_report(mappings, issues, repo_path_str)
+        alignment_report = _inject_header(alignment_report, timestamp, repo_path_str)
+        (audit_dir / "doc_alignment.md").write_text(
+            alignment_report, encoding="utf-8",
+        )
+        logger.info("文档对齐报告已写入: doc_alignment.md")
+    except Exception:
+        logger.exception("生成文档对齐报告时出错")
+
+    logger.info("审计完成 — 报告输出目录: %s", audit_dir)
+
+
+# ---------------------------------------------------------------------------
+# 辅助：收集可达模块
+# ---------------------------------------------------------------------------
+
+def _collect_reachable(node, reachable: set[str]) -> None:
+    """递归收集流程树中所有节点的 source_file。"""
+    reachable.add(node.source_file)
+    for child in node.children:
+        _collect_reachable(child, reachable)
+
+
+# ---------------------------------------------------------------------------
+# 入口
+# ---------------------------------------------------------------------------
+
+if __name__ == "__main__":
+    logging.basicConfig(
+        level=logging.INFO,
+        format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
+    )
+    run_audit()
--- a/apps/etl/pipelines/feiqiu/scripts/audit/scanner.py
+++ b/apps/etl/pipelines/feiqiu/scripts/audit/scanner.py
@@ -0,0 +1,150 @@
+# -*- coding: utf-8 -*-
+"""
+仓库扫描器 — 递归遍历仓库文件系统，返回结构化的文件元信息。
+
+仅执行只读操作：读取文件元信息（大小、类型），不修改任何文件。
+遇到权限错误时跳过并记录日志，不中断扫描流程。
+"""
+
+from __future__ import annotations
+
+import fnmatch
+import logging
+from pathlib import Path
+
+from scripts.audit import FileEntry
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# 排除模式
+# ---------------------------------------------------------------------------
+
+EXCLUDED_PATTERNS: list[str] = [
+    ".git",
+    "__pycache__",
+    ".pytest_cache",
+    "*.pyc",
+    ".kiro",
+]
+
+
+# ---------------------------------------------------------------------------
+# 排除匹配逻辑
+# ---------------------------------------------------------------------------
+
+def _is_excluded(name: str, patterns: list[str]) -> bool:
+    """判断文件/目录名是否匹配任一排除模式。
+
+    支持两种模式：
+    - 精确匹配（如 ".git"、"__pycache__"）
+    - 通配符匹配（如 "*.pyc"），使用 fnmatch 语义
+    """
+    for pat in patterns:
+        if fnmatch.fnmatch(name, pat):
+            return True
+    return False
+
+
+# ---------------------------------------------------------------------------
+# 递归遍历
+# ---------------------------------------------------------------------------
+
+def _walk(
+    root: Path,
+    base: Path,
+    exclude: list[str],
+    results: list[FileEntry],
+) -> None:
+    """递归遍历 *root* 下的文件和目录，将结果追加到 *results*。
+
+    Parameters
+    ----------
+    root : Path
+        当前要遍历的目录。
+    base : Path
+        仓库根目录，用于计算相对路径。
+    exclude : list[str]
+        排除模式列表。
+    results : list[FileEntry]
+        收集结果的列表（就地修改）。
+    """
+    try:
+        children = sorted(root.iterdir(), key=lambda p: p.name)
+    except (PermissionError, OSError) as exc:
+        logger.warning("无法读取目录 %s: %s", root, exc)
+        return
+
+    # 用于判断当前目录是否为"空目录"（排除后无可见子项）
+    visible_count = 0
+
+    for child in children:
+        if _is_excluded(child.name, exclude):
+            continue
+
+        visible_count += 1
+        rel = child.relative_to(base).as_posix()
+
+        if child.is_dir():
+            # 先递归子目录，再判断该目录是否为空
+            sub_start = len(results)
+            _walk(child, base, exclude, results)
+            sub_end = len(results)
+
+            # 该目录下递归产生的条目数为 0 → 空目录
+            is_empty = (sub_end == sub_start)
+
+            results.append(FileEntry(
+                rel_path=rel,
+                is_dir=True,
+                size_bytes=0,
+                extension="",
+                is_empty_dir=is_empty,
+            ))
+        else:
+            # 文件
+            try:
+                size = child.stat().st_size
+            except (PermissionError, OSError) as exc:
+                logger.warning("无法获取文件信息 %s: %s", child, exc)
+                continue
+
+            results.append(FileEntry(
+                rel_path=rel,
+                is_dir=False,
+                size_bytes=size,
+                extension=child.suffix.lower(),
+                is_empty_dir=False,
+            ))
+
+    # 如果 root 是仓库根目录自身，不需要额外处理
+    # （根目录不作为条目出现在结果中）
+
+
+def scan_repo(
+    root: Path,
+    exclude: list[str] | None = None,
+) -> list[FileEntry]:
+    """递归扫描仓库，返回所有文件和目录的元信息列表。
+
+    Parameters
+    ----------
+    root : Path
+        仓库根目录路径。
+    exclude : list[str] | None
+        排除模式列表，默认使用 EXCLUDED_PATTERNS。
+
+    Returns
+    -------
+    list[FileEntry]
+        按 rel_path 排序的文件/目录元信息列表。
+    """
+    if exclude is None:
+        exclude = EXCLUDED_PATTERNS
+
+    results: list[FileEntry] = []
+    _walk(root, root, exclude, results)
+
+    # 按相对路径排序，保证输出稳定
+    results.sort(key=lambda e: e.rel_path)
+    return results
--- a/apps/etl/pipelines/feiqiu/scripts/check/check_data_integrity.py
+++ b/apps/etl/pipelines/feiqiu/scripts/check/check_data_integrity.py
@@ -0,0 +1,193 @@
+# -*- coding: utf-8 -*-
+"""Run data integrity checks across API -> ODS -> DWD."""
+from __future__ import annotations
+
+import argparse
+import sys
+from datetime import datetime
+from pathlib import Path
+from zoneinfo import ZoneInfo
+
+from dateutil import parser as dtparser
+
+from config.settings import AppConfig
+from quality.integrity_service import run_history_flow, run_window_flow, write_report
+from utils.logging_utils import build_log_path, configure_logging
+from utils.windowing import split_window
+
+
+def _parse_dt(value: str, tz: ZoneInfo) -> datetime:
+    dt = dtparser.parse(value)
+    if dt.tzinfo is None:
+        return dt.replace(tzinfo=tz)
+    return dt.astimezone(tz)
+
+
+def main() -> int:
+    if hasattr(sys.stdout, "reconfigure"):
+        try:
+            sys.stdout.reconfigure(encoding="utf-8")
+        except Exception:
+            pass
+
+    ap = argparse.ArgumentParser(description="Data integrity checks (API -> ODS -> DWD)")
+    ap.add_argument("--mode", choices=["history", "window"], default="history")
+    ap.add_argument(
+        "--flow",
+        choices=["verify", "update_and_verify"],
+        default="verify",
+        help="verify only or update+verify (auto backfill then optional recheck)",
+    )
+    ap.add_argument("--start", default="2025-07-01", help="history start date (default: 2025-07-01)")
+    ap.add_argument("--end", default="", help="history end datetime (default: last ETL end)")
+    ap.add_argument("--window-start", default="", help="window start datetime (mode=window)")
+    ap.add_argument("--window-end", default="", help="window end datetime (mode=window)")
+    ap.add_argument("--window-split-unit", default="", help="split unit (month/none), default from config")
+    ap.add_argument("--window-compensation-hours", type=int, default=None, help="window compensation hours, default from config")
+    ap.add_argument(
+        "--include-dimensions",
+        action="store_true",
+        default=None,
+        help="include dimension tables in ODS->DWD checks",
+    )
+    ap.add_argument(
+        "--no-include-dimensions",
+        action="store_true",
+        help="exclude dimension tables in ODS->DWD checks",
+    )
+    ap.add_argument("--ods-task-codes", default="", help="comma-separated ODS task codes for API checks")
+    ap.add_argument("--compare-content", action="store_true", help="compare API vs ODS content hash")
+    ap.add_argument("--no-compare-content", action="store_true", help="disable content comparison even if enabled in config")
+    ap.add_argument("--include-mismatch", action="store_true", help="backfill mismatch records as well")
+    ap.add_argument("--no-include-mismatch", action="store_true", help="disable mismatch backfill")
+    ap.add_argument("--recheck", action="store_true", help="re-run checks after backfill")
+    ap.add_argument("--no-recheck", action="store_true", help="skip recheck after backfill")
+    ap.add_argument("--content-sample-limit", type=int, default=None, help="max mismatch samples per table")
+    ap.add_argument("--out", default="", help="output JSON path")
+    ap.add_argument("--log-file", default="", help="log file path")
+    ap.add_argument("--log-dir", default="", help="log directory")
+    ap.add_argument("--log-level", default="INFO", help="log level")
+    ap.add_argument("--no-log-console", action="store_true", help="disable console logging")
+    args = ap.parse_args()
+
+    log_dir = Path(args.log_dir) if args.log_dir else (Path(__file__).resolve().parent / "logs")
+    log_file = Path(args.log_file) if args.log_file else build_log_path(log_dir, "data_integrity")
+    log_console = not args.no_log_console
+
+    with configure_logging(
+        "data_integrity",
+        log_file,
+        level=args.log_level,
+        console=log_console,
+        tee_std=True,
+    ) as logger:
+        cfg = AppConfig.load({})
+        tz = ZoneInfo(cfg.get("app.timezone", "Asia/Shanghai"))
+        report_path = Path(args.out) if args.out else None
+
+        if args.recheck and args.no_recheck:
+            raise SystemExit("cannot set both --recheck and --no-recheck")
+        if args.include_mismatch and args.no_include_mismatch:
+            raise SystemExit("cannot set both --include-mismatch and --no-include-mismatch")
+        if args.include_dimensions and args.no_include_dimensions:
+            raise SystemExit("cannot set both --include-dimensions and --no-include-dimensions")
+
+        compare_content = None
+        if args.compare_content and args.no_compare_content:
+            raise SystemExit("cannot set both --compare-content and --no-compare-content")
+        if args.compare_content:
+            compare_content = True
+        elif args.no_compare_content:
+            compare_content = False
+
+        include_mismatch = cfg.get("integrity.backfill_mismatch", True)
+        if args.include_mismatch:
+            include_mismatch = True
+        elif args.no_include_mismatch:
+            include_mismatch = False
+
+        recheck_after_backfill = cfg.get("integrity.recheck_after_backfill", True)
+        if args.recheck:
+            recheck_after_backfill = True
+        elif args.no_recheck:
+            recheck_after_backfill = False
+
+        include_dimensions = cfg.get("integrity.include_dimensions", True)
+        if args.include_dimensions:
+            include_dimensions = True
+        elif args.no_include_dimensions:
+            include_dimensions = False
+
+        if args.mode == "window":
+            if not args.window_start or not args.window_end:
+                raise SystemExit("window-start and window-end are required for mode=window")
+            start_dt = _parse_dt(args.window_start, tz)
+            end_dt = _parse_dt(args.window_end, tz)
+            split_unit = (args.window_split_unit or cfg.get("run.window_split.unit", "month") or "month").strip()
+            comp_hours = args.window_compensation_hours
+            if comp_hours is None:
+                comp_hours = cfg.get("run.window_split.compensation_hours", 0)
+
+            windows = split_window(
+                start_dt,
+                end_dt,
+                tz=tz,
+                split_unit=split_unit,
+                compensation_hours=comp_hours,
+            )
+            if not windows:
+                windows = [(start_dt, end_dt)]
+
+            report, counts = run_window_flow(
+                cfg=cfg,
+                windows=windows,
+                include_dimensions=bool(include_dimensions),
+                task_codes=args.ods_task_codes,
+                logger=logger,
+                compare_content=compare_content,
+                content_sample_limit=args.content_sample_limit,
+                do_backfill=args.flow == "update_and_verify",
+                include_mismatch=bool(include_mismatch),
+                recheck_after_backfill=bool(recheck_after_backfill),
+                page_size=int(cfg.get("api.page_size") or 200),
+                chunk_size=500,
+            )
+            report_path = write_report(report, prefix="data_integrity_window", tz=tz, report_path=report_path)
+            report["report_path"] = report_path
+            logger.info("REPORT_WRITTEN path=%s", report.get("report_path"))
+        else:
+            start_dt = _parse_dt(args.start, tz)
+            if args.end:
+                end_dt = _parse_dt(args.end, tz)
+            else:
+                end_dt = None
+            report, counts = run_history_flow(
+                cfg=cfg,
+                start_dt=start_dt,
+                end_dt=end_dt,
+                include_dimensions=bool(include_dimensions),
+                task_codes=args.ods_task_codes,
+                logger=logger,
+                compare_content=compare_content,
+                content_sample_limit=args.content_sample_limit,
+                do_backfill=args.flow == "update_and_verify",
+                include_mismatch=bool(include_mismatch),
+                recheck_after_backfill=bool(recheck_after_backfill),
+                page_size=int(cfg.get("api.page_size") or 200),
+                chunk_size=500,
+            )
+            report_path = write_report(report, prefix="data_integrity_history", tz=tz, report_path=report_path)
+            report["report_path"] = report_path
+            logger.info("REPORT_WRITTEN path=%s", report.get("report_path"))
+            logger.info(
+                "SUMMARY missing=%s mismatch=%s errors=%s",
+                counts.get("missing"),
+                counts.get("mismatch"),
+                counts.get("errors"),
+            )
+
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/apps/etl/pipelines/feiqiu/scripts/check/check_dwd_service.py
+++ b/apps/etl/pipelines/feiqiu/scripts/check/check_dwd_service.py
@@ -0,0 +1,82 @@
+# -*- coding: utf-8 -*-
+import sys
+sys.path.insert(0, '.')
+from config.settings import AppConfig
+from database.connection import DatabaseConnection
+from database.operations import DatabaseOperations
+
+config = AppConfig.load()
+db_conn = DatabaseConnection(config.config['db']['dsn'])
+db = DatabaseOperations(db_conn)
+
+# 检查DWD层服务记录分布
+print("=== DWD层服务记录分析 ===")
+print()
+
+# 1. 总体统计
+sql1 = """
+    SELECT 
+        COUNT(*) as total_records,
+        COUNT(DISTINCT tenant_member_id) as unique_members,
+        COUNT(DISTINCT site_assistant_id) as unique_assistants,
+        COUNT(DISTINCT (tenant_member_id, site_assistant_id)) as unique_pairs
+    FROM billiards_dwd.dwd_assistant_service_log
+    WHERE tenant_member_id > 0 AND is_delete = 0
+"""
+r = dict(db.query(sql1)[0])
+print("总体统计:")
+print(f"  总服务记录数: {r['total_records']}")
+print(f"  唯一会员数: {r['unique_members']}")
+print(f"  唯一助教数: {r['unique_assistants']}")
+print(f"  唯一客户-助教对: {r['unique_pairs']}")
+
+# 2. 助教服务会员数分布
+print()
+print("助教服务会员数分布 (Top 10):")
+sql2 = """
+    SELECT site_assistant_id, COUNT(DISTINCT tenant_member_id) as member_count
+    FROM billiards_dwd.dwd_assistant_service_log
+    WHERE tenant_member_id > 0 AND is_delete = 0
+    GROUP BY site_assistant_id
+    ORDER BY member_count DESC
+    LIMIT 10
+"""
+for row in db.query(sql2):
+    r = dict(row)
+    print(f"  助教 {r['site_assistant_id']}: 服务 {r['member_count']} 个会员")
+
+# 3. 每个客户-助教对的服务次数分布
+print()
+print("客户-助教对 服务次数分布 (Top 10):")
+sql3 = """
+    SELECT tenant_member_id, site_assistant_id, COUNT(*) as service_count
+    FROM billiards_dwd.dwd_assistant_service_log
+    WHERE tenant_member_id > 0 AND is_delete = 0
+    GROUP BY tenant_member_id, site_assistant_id
+    ORDER BY service_count DESC
+    LIMIT 10
+"""
+for row in db.query(sql3):
+    r = dict(row)
+    print(f"  会员 {r['tenant_member_id']} - 助教 {r['site_assistant_id']}: {r['service_count']} 次服务")
+
+# 4. 近60天的数据
+print()
+print("=== 近60天数据 ===")
+sql4 = """
+    SELECT 
+        COUNT(*) as total_records,
+        COUNT(DISTINCT tenant_member_id) as unique_members,
+        COUNT(DISTINCT site_assistant_id) as unique_assistants,
+        COUNT(DISTINCT (tenant_member_id, site_assistant_id)) as unique_pairs
+    FROM billiards_dwd.dwd_assistant_service_log
+    WHERE tenant_member_id > 0 AND is_delete = 0
+      AND last_use_time >= NOW() - INTERVAL '60 days'
+"""
+r4 = dict(db.query(sql4)[0])
+print(f"  总服务记录数: {r4['total_records']}")
+print(f"  唯一会员数: {r4['unique_members']}")
+print(f"  唯一助教数: {r4['unique_assistants']}")
+print(f"  唯一客户-助教对: {r4['unique_pairs']}")
+
+db_conn.close()
--- a/apps/etl/pipelines/feiqiu/scripts/check/check_ods_content_hash.py
+++ b/apps/etl/pipelines/feiqiu/scripts/check/check_ods_content_hash.py
@@ -0,0 +1,248 @@
+# -*- coding: utf-8 -*-
+"""
+Validate that ODS payload content matches stored content_hash.
+
+Usage:
+  PYTHONPATH=. python -m scripts.check.check_ods_content_hash
+  PYTHONPATH=. python -m scripts.check.check_ods_content_hash --schema billiards_ods
+  PYTHONPATH=. python -m scripts.check.check_ods_content_hash --tables member_profiles,orders
+"""
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Iterable, Sequence
+
+from psycopg2.extras import RealDictCursor
+
+PROJECT_ROOT = Path(__file__).resolve().parents[1]
+if str(PROJECT_ROOT) not in sys.path:
+    sys.path.insert(0, str(PROJECT_ROOT))
+
+from config.settings import AppConfig
+from database.connection import DatabaseConnection
+from tasks.ods.ods_tasks import BaseOdsTask
+
+
+def _reconfigure_stdout_utf8() -> None:
+    if hasattr(sys.stdout, "reconfigure"):
+        try:
+            sys.stdout.reconfigure(encoding="utf-8")
+        except Exception:
+            pass
+
+
+def _fetch_tables(conn, schema: str) -> list[str]:
+    sql = """
+        SELECT table_name
+        FROM information_schema.tables
+        WHERE table_schema = %s AND table_type = 'BASE TABLE'
+        ORDER BY table_name
+    """
+    with conn.cursor() as cur:
+        cur.execute(sql, (schema,))
+        return [r[0] for r in cur.fetchall()]
+
+
+def _fetch_columns(conn, schema: str, table: str) -> list[str]:
+    sql = """
+        SELECT column_name
+        FROM information_schema.columns
+        WHERE table_schema = %s AND table_name = %s
+        ORDER BY ordinal_position
+    """
+    with conn.cursor() as cur:
+        cur.execute(sql, (schema, table))
+        cols = [r[0] for r in cur.fetchall()]
+    return [c for c in cols if c]
+
+
+def _fetch_pk_columns(conn, schema: str, table: str) -> list[str]:
+    sql = """
+        SELECT kcu.column_name
+        FROM information_schema.table_constraints tc
+        JOIN information_schema.key_column_usage kcu
+          ON tc.constraint_name = kcu.constraint_name
+         AND tc.table_schema = kcu.table_schema
+        WHERE tc.constraint_type = 'PRIMARY KEY'
+          AND tc.table_schema = %s
+          AND tc.table_name = %s
+        ORDER BY kcu.ordinal_position
+    """
+    with conn.cursor() as cur:
+        cur.execute(sql, (schema, table))
+        cols = [r[0] for r in cur.fetchall()]
+    return [c for c in cols if c.lower() != "content_hash"]
+
+
+def _fetch_row_count(conn, schema: str, table: str) -> int:
+    sql = f'SELECT COUNT(*) FROM "{schema}"."{table}"'
+    with conn.cursor() as cur:
+        cur.execute(sql)
+        row = cur.fetchone()
+        return int(row[0] if row else 0)
+
+
+def _iter_rows(
+    conn,
+    schema: str,
+    table: str,
+    select_cols: Sequence[str],
+    batch_size: int,
+) -> Iterable[dict]:
+    cols_sql = ", ".join(f'"{c}"' for c in select_cols)
+    sql = f'SELECT {cols_sql} FROM "{schema}"."{table}"'
+    with conn.cursor(name=f"ods_hash_{table}", cursor_factory=RealDictCursor) as cur:
+        cur.itersize = max(1, int(batch_size or 500))
+        cur.execute(sql)
+        for row in cur:
+            yield row
+
+
+def _build_report_path(out_arg: str | None) -> Path:
+    if out_arg:
+        return Path(out_arg)
+    reports_dir = PROJECT_ROOT / "reports"
+    reports_dir.mkdir(parents=True, exist_ok=True)
+    ts = datetime.now().strftime("%Y%m%d_%H%M%S")
+    return reports_dir / f"ods_content_hash_check_{ts}.json"
+
+
+def _print_progress(
+    table_label: str,
+    processed: int,
+    total: int,
+    mismatched: int,
+    missing_hash: int,
+    invalid_payload: int,
+) -> None:
+    if total:
+        msg = (
+            f"[{table_label}] checked {processed}/{total} "
+            f"mismatch={mismatched} missing_hash={missing_hash} invalid_payload={invalid_payload}"
+        )
+    else:
+        msg = (
+            f"[{table_label}] checked {processed} "
+            f"mismatch={mismatched} missing_hash={missing_hash} invalid_payload={invalid_payload}"
+        )
+    print(msg, flush=True)
+
+
+def main() -> int:
+    _reconfigure_stdout_utf8()
+    ap = argparse.ArgumentParser(description="Validate ODS payload vs content_hash consistency")
+    ap.add_argument("--schema", default="billiards_ods", help="ODS schema name")
+    ap.add_argument("--tables", default="", help="comma-separated table names (optional)")
+    ap.add_argument("--batch-size", type=int, default=500, help="DB fetch batch size")
+    ap.add_argument("--progress-every", type=int, default=100, help="print progress every N rows")
+    ap.add_argument("--sample-limit", type=int, default=5, help="sample mismatch rows per table")
+    ap.add_argument("--out", default="", help="output report JSON path")
+    args = ap.parse_args()
+
+    cfg = AppConfig.load({})
+    db = DatabaseConnection(dsn=cfg["db"]["dsn"], session=cfg["db"].get("session"))
+    conn = db.conn
+
+    tables = _fetch_tables(conn, args.schema)
+    if args.tables.strip():
+        whitelist = {t.strip() for t in args.tables.split(",") if t.strip()}
+        tables = [t for t in tables if t in whitelist]
+
+    report = {
+        "schema": args.schema,
+        "tables": [],
+        "summary": {
+            "total_tables": 0,
+            "checked_tables": 0,
+            "total_rows": 0,
+            "checked_rows": 0,
+            "mismatch_rows": 0,
+            "missing_hash_rows": 0,
+            "invalid_payload_rows": 0,
+        },
+    }
+
+    for table in tables:
+        table_label = f"{args.schema}.{table}"
+        cols = _fetch_columns(conn, args.schema, table)
+        cols_lower = {c.lower() for c in cols}
+        if "payload" not in cols_lower or "content_hash" not in cols_lower:
+            print(f"[{table_label}] skip: missing payload/content_hash", flush=True)
+            continue
+
+        total = _fetch_row_count(conn, args.schema, table)
+        pk_cols = _fetch_pk_columns(conn, args.schema, table)
+        select_cols = ["content_hash", "payload", *pk_cols]
+
+        processed = 0
+        mismatched = 0
+        missing_hash = 0
+        invalid_payload = 0
+        samples: list[dict[str, Any]] = []
+
+        print(f"[{table_label}] start: total_rows={total}", flush=True)
+
+        for row in _iter_rows(conn, args.schema, table, select_cols, args.batch_size):
+            processed += 1
+            content_hash = row.get("content_hash")
+            payload = row.get("payload")
+            recomputed = BaseOdsTask._compute_compare_hash_from_payload(payload)
+
+            row_mismatch = False
+            if not content_hash:
+                missing_hash += 1
+                mismatched += 1
+                row_mismatch = True
+            elif not recomputed:
+                invalid_payload += 1
+                mismatched += 1
+                row_mismatch = True
+            elif content_hash != recomputed:
+                mismatched += 1
+                row_mismatch = True
+
+            if row_mismatch and len(samples) < max(0, int(args.sample_limit or 0)):
+                sample = {k: row.get(k) for k in pk_cols}
+                sample["content_hash"] = content_hash
+                sample["recomputed_hash"] = recomputed
+                samples.append(sample)
+
+            if args.progress_every and processed % int(args.progress_every) == 0:
+                _print_progress(table_label, processed, total, mismatched, missing_hash, invalid_payload)
+
+        if processed and (not args.progress_every or processed % int(args.progress_every) != 0):
+            _print_progress(table_label, processed, total, mismatched, missing_hash, invalid_payload)
+
+        report["tables"].append(
+            {
+                "table": table_label,
+                "total_rows": total,
+                "checked_rows": processed,
+                "mismatch_rows": mismatched,
+                "missing_hash_rows": missing_hash,
+                "invalid_payload_rows": invalid_payload,
+                "sample_mismatches": samples,
+            }
+        )
+
+        report["summary"]["checked_tables"] += 1
+        report["summary"]["total_rows"] += total
+        report["summary"]["checked_rows"] += processed
+        report["summary"]["mismatch_rows"] += mismatched
+        report["summary"]["missing_hash_rows"] += missing_hash
+        report["summary"]["invalid_payload_rows"] += invalid_payload
+
+    report["summary"]["total_tables"] = len(tables)
+
+    out_path = _build_report_path(args.out)
+    out_path.write_text(json.dumps(report, ensure_ascii=False, indent=2), encoding="utf-8")
+    print(f"[REPORT] {out_path}", flush=True)
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/apps/etl/pipelines/feiqiu/scripts/check/check_ods_gaps.py
+++ b/apps/etl/pipelines/feiqiu/scripts/check/check_ods_gaps.py
--- a/apps/etl/pipelines/feiqiu/scripts/check/check_ods_json_vs_table.py
+++ b/apps/etl/pipelines/feiqiu/scripts/check/check_ods_json_vs_table.py
@@ -0,0 +1,117 @@
+# -*- coding: utf-8 -*-
+"""
+ODS JSON 字段核对脚本：对照当前数据库中的 ODS 表字段，检查示例 JSON（默认目录 export/test-json-doc）
+是否包含同名键，并输出每表未命中的字段，便于补充映射或确认确实无源字段。
+
+使用方法：
+    set PG_DSN=postgresql://...               # 如 .env 中配置
+    python -m scripts.check.check_ods_json_vs_table
+"""
+from __future__ import annotations
+
+import json
+import os
+import pathlib
+from typing import Dict, Iterable, Set, Tuple
+
+import psycopg2
+
+from tasks.manual_ingest_task import ManualIngestTask
+
+
+def _flatten_keys(obj, prefix: str = "") -> Set[str]:
+    """递归展开 JSON 所有键路径，返回形如 data.assistantInfos.id 的集合。列表不保留索引，仅继续向下展开。"""
+    keys: Set[str] = set()
+    if isinstance(obj, dict):
+        for k, v in obj.items():
+            new_prefix = f"{prefix}.{k}" if prefix else k
+            keys.add(new_prefix)
+            keys |= _flatten_keys(v, new_prefix)
+    elif isinstance(obj, list):
+        for item in obj:
+            keys |= _flatten_keys(item, prefix)
+    return keys
+
+
+def _load_json_keys(path: pathlib.Path) -> Tuple[Set[str], dict[str, Set[str]]]:
+    """读取单个 JSON 文件并返回展开后的键集合以及末段->路径列表映射，若文件不存在或无法解析则返回空集合。"""
+    if not path.exists():
+        return set(), {}
+    data = json.loads(path.read_text(encoding="utf-8"))
+    paths = _flatten_keys(data)
+    last_map: dict[str, Set[str]] = {}
+    for p in paths:
+        last = p.split(".")[-1].lower()
+        last_map.setdefault(last, set()).add(p)
+    return paths, last_map
+
+
+def _load_ods_columns(dsn: str) -> Dict[str, Set[str]]:
+    """从数据库读取 billiards_ods.* 的列名集合，按表返回。"""
+    conn = psycopg2.connect(dsn)
+    cur = conn.cursor()
+    cur.execute(
+        """
+        SELECT table_name, column_name
+        FROM information_schema.columns
+        WHERE table_schema='billiards_ods'
+        ORDER BY table_name, ordinal_position
+        """
+    )
+    result: Dict[str, Set[str]] = {}
+    for table, col in cur.fetchall():
+        result.setdefault(table, set()).add(col.lower())
+    cur.close()
+    conn.close()
+    return result
+
+
+def main() -> None:
+    """主流程：遍历 FILE_MAPPING 中的 ODS 表，检查 JSON 键覆盖情况并打印报告。"""
+    dsn = os.environ.get("PG_DSN")
+    json_dir = pathlib.Path(os.environ.get("JSON_DOC_DIR", "export/test-json-doc"))
+
+    ods_cols_map = _load_ods_columns(dsn)
+
+    print(f"使用 JSON 目录: {json_dir}")
+    print(f"连接 DSN: {dsn}")
+    print("=" * 80)
+
+    for keywords, ods_table in ManualIngestTask.FILE_MAPPING:
+        table = ods_table.split(".")[-1]
+        cols = ods_cols_map.get(table, set())
+        file_name = f"{keywords[0]}.json"
+        file_path = json_dir / file_name
+        keys_full, path_map = _load_json_keys(file_path)
+        key_last_parts = set(path_map.keys())
+
+        missing: Set[str] = set()
+        extra_keys: Set[str] = set()
+        present: Set[str] = set()
+        for col in sorted(cols):
+            if col in key_last_parts:
+                present.add(col)
+            else:
+                missing.add(col)
+        for k in key_last_parts:
+            if k not in cols:
+                extra_keys.add(k)
+
+        print(f"[{table}] 文件={file_name} 列数={len(cols)} JSON键(末段)覆盖={len(present)}/{len(cols)}")
+        if missing:
+            print("  未命中列:", ", ".join(sorted(missing)))
+        else:
+            print("  未命中列: 无")
+        if extra_keys:
+            extras = []
+            for k in sorted(extra_keys):
+                paths = ", ".join(sorted(path_map.get(k, [])))
+                extras.append(f"{k} ({paths})")
+            print("  JSON 仅有(表无此列):", "; ".join(extras))
+        else:
+            print("  JSON 仅有(表无此列): 无")
+        print("-" * 80)
+
+
+if __name__ == "__main__":
+    main()
--- a/apps/etl/pipelines/feiqiu/scripts/check/verify_dws_config.py
+++ b/apps/etl/pipelines/feiqiu/scripts/check/verify_dws_config.py
@@ -0,0 +1,34 @@
+# -*- coding: utf-8 -*-
+"""验证DWS配置数据"""
+
+import os
+from pathlib import Path
+from dotenv import load_dotenv
+import psycopg2
+
+def main():
+    load_dotenv(Path(__file__).parent.parent / ".env")
+    dsn = os.getenv("PG_DSN")
+    conn = psycopg2.connect(dsn)
+    
+    tables = [
+        "cfg_performance_tier",
+        "cfg_assistant_level_price", 
+        "cfg_bonus_rules",
+        "cfg_area_category",
+        "cfg_skill_type"
+    ]
+    
+    print("DWS 配置表数据统计:")
+    print("-" * 40)
+    
+    with conn.cursor() as cur:
+        for t in tables:
+            cur.execute(f"SELECT COUNT(*) FROM billiards_dws.{t}")
+            cnt = cur.fetchone()[0]
+            print(f"{t}: {cnt} 行")
+    
+    conn.close()
+
+if __name__ == "__main__":
+    main()
--- a/apps/etl/pipelines/feiqiu/scripts/check_json_vs_md.py
+++ b/apps/etl/pipelines/feiqiu/scripts/check_json_vs_md.py
@@ -0,0 +1,205 @@
+# -*- coding: utf-8 -*-
+"""
+比对 JSON 样本字段 vs API 参考文档(.md)字段。
+找出 JSON 中存在但 .md 文档"四、响应字段详解"中缺失的字段。
+
+特殊处理：
+- settlement_records / recharge_settlements: 从 settleList 内层提取字段
+  siteProfile 子字段不提取（ODS 中存为 siteprofile jsonb 列）
+- stock_goods_category_tree: 从 goodsCategoryList 内层提取字段
+- 嵌套对象（siteProfile, tableProfile）作为整体字段名
+"""
+import json
+import os
+import re
+import sys
+
+SAMPLES_DIR = os.path.join("docs", "api-reference", "samples")
+DOCS_DIR = os.path.join("docs", "api-reference")
+
+# 结构包装器字段（不应出现在比对中）
+WRAPPER_FIELDS = {"settleList", "siteProfile", "tableProfile",
+                  "goodsCategoryList", "data", "code", "msg",
+                  "settlelist", "siteprofile", "tableprofile",
+                  "goodscategorylist"}
+
+# 表头关键字（跳过）— 注意 "type" 不能放这里，因为有些表有 type 业务字段
+CROSS_REF_HEADERS = {"字段名", "类型", "示例值", "说明", "field", "example", "description"}
+
+
+def extract_json_fields(table_name: str) -> set:
+    """从 JSON 样本提取所有字段名（小写）"""
+    path = os.path.join(SAMPLES_DIR, f"{table_name}.json")
+    if not os.path.exists(path):
+        return set()
+
+    with open(path, "r", encoding="utf-8") as f:
+        data = json.load(f)
+
+    # settlement_records / recharge_settlements: settleList 内层
+    if table_name in ("settlement_records", "recharge_settlements"):
+        settle = data.get("settleList", {})
+        if isinstance(settle, list):
+            settle = settle[0] if settle else {}
+        fields = set()
+        for k in settle.keys():
+            kl = k.lower()
+            if kl in {"siteprofile"}:
+                fields.add(kl)  # 作为整体 jsonb 列
+                continue
+            fields.add(kl)
+        return fields
+
+    # stock_goods_category_tree: goodsCategoryList 内层
+    if table_name == "stock_goods_category_tree":
+        cat_list = data.get("goodsCategoryList", [])
+        if cat_list:
+            return {k.lower() for k in cat_list[0].keys()
+                    if k.lower() not in WRAPPER_FIELDS}
+        return set()
+
+    # role_area_association: roleAreaRelations 内层
+    if table_name == "role_area_association":
+        rel_list = data.get("roleAreaRelations", [])
+        if rel_list:
+            return {k.lower() for k in rel_list[0].keys()
+                    if k.lower() not in WRAPPER_FIELDS}
+        return set()
+
+    # 通用：顶层字段
+    fields = set()
+    for k in data.keys():
+        kl = k.lower()
+        if kl in WRAPPER_FIELDS:
+            # 嵌套对象作为整体
+            if kl in ("siteprofile", "tableprofile"):
+                fields.add(kl)
+            continue
+        fields.add(kl)
+    return fields
+
+
+def extract_md_fields(table_name: str) -> set:
+    """从 .md 文档的"四、响应字段详解"章节提取字段名（小写）"""
+    md_path = os.path.join(DOCS_DIR, f"{table_name}.md")
+    if not os.path.exists(md_path):
+        return set()
+
+    with open(md_path, "r", encoding="utf-8") as f:
+        lines = f.readlines()
+
+    fields = set()
+    in_section = False
+    in_siteprofile = False
+    field_pattern = re.compile(r'^\|\s*`([^`]+)`\s*\|')
+    siteprofile_header = re.compile(r'^###.*siteProfile', re.IGNORECASE)
+
+    for line in lines:
+        s = line.strip()
+
+        if s.startswith("## 四、") and "响应字段" in s:
+            in_section = True
+            in_siteprofile = False
+            continue
+
+        if in_section and s.startswith("## ") and not s.startswith("## 四"):
+            break
+
+        if not in_section:
+            continue
+
+        # siteProfile 子章节处理
+        if table_name in ("settlement_records", "recharge_settlements"):
+            if siteprofile_header.search(s):
+                in_siteprofile = True
+                continue
+            if s.startswith("### ") and in_siteprofile:
+                if not siteprofile_header.search(s):
+                    in_siteprofile = False
+
+        m = field_pattern.match(s)
+        if m:
+            raw = m.group(1).strip()
+            if raw.lower() in {h.lower() for h in CROSS_REF_HEADERS}:
+                continue
+            if table_name in ("settlement_records", "recharge_settlements"):
+                if in_siteprofile:
+                    continue
+                if raw.startswith("siteProfile."):
+                    continue
+            if raw.lower() in WRAPPER_FIELDS and raw.lower() not in ("siteprofile", "tableprofile"):
+                continue
+            fields.add(raw.lower())
+
+    return fields
+
+
+def main():
+    samples = sorted([
+        f.replace(".json", "")
+        for f in os.listdir(SAMPLES_DIR)
+        if f.endswith(".json")
+    ])
+
+    results = []
+    for table in samples:
+        json_fields = extract_json_fields(table)
+        md_fields = extract_md_fields(table)
+
+        # JSON 中有但 .md 中没有的
+        json_only = json_fields - md_fields
+        # .md 中有但 JSON 中没有的（可能是条件性字段，仅供参考）
+        md_only = md_fields - json_fields
+
+        results.append({
+            "table": table,
+            "json_count": len(json_fields),
+            "md_count": len(md_fields),
+            "json_only": sorted(json_only),
+            "md_only": sorted(md_only),
+        })
+
+    # 输出
+    print("=" * 80)
+    print("JSON 样本 vs .md 文档 字段比对报告")
+    print("=" * 80)
+
+    issues = 0
+    for r in results:
+        if r["json_only"]:
+            issues += 1
+            print(f"\n❌ {r['table']} — JSON={r['json_count']}, MD={r['md_count']}")
+            print(f"   JSON 中有但 .md 缺失 ({len(r['json_only'])} 个):")
+            for f in r["json_only"]:
+                print(f"     - {f}")
+            if r["md_only"]:
+                print(f"   .md 中有但 JSON 无 ({len(r['md_only'])} 个，可能是条件性字段):")
+                for f in r["md_only"]:
+                    print(f"     - {f}")
+        else:
+            status = "✅" if not r["md_only"] else "⚠️"
+            extra = ""
+            if r["md_only"]:
+                extra = f" (.md 多 {len(r['md_only'])} 个条件性字段)"
+            print(f"\n{status} {r['table']} — JSON={r['json_count']}, MD={r['md_count']}{extra}")
+
+    print(f"\n{'=' * 80}")
+    print(f"总计: {len(results)} 个表, {issues} 个有 JSON→MD 缺失")
+
+    # 输出 JSON 格式供后续处理
+    out_path = os.path.join("docs", "reports", "json_vs_md_gaps.json")
+    with open(out_path, "w", encoding="utf-8") as f:
+        json.dump(results, f, ensure_ascii=False, indent=2)
+    print(f"\n详细结果已写入: {out_path}")
+
+
+if __name__ == "__main__":
+    main()
+
+# AI_CHANGELOG:
+# - 日期: 2026-02-14
+# - Prompt: P20260214-044500 — "md文档和json数据不对应！全面排查"
+# - 直接原因: 用户要求全面排查 JSON 样本与 .md 文档的字段一致性
+# - 变更摘要: 新建脚本，从 JSON 样本提取字段与 .md 文档"响应字段详解"章节比对；
+#   修复 3 个 bug（type 过滤、siteProfile/tableProfile 例外、roleAreaRelations 包装器）
+# - 风险与验证: 纯分析脚本，无运行时影响；运行 `python scripts/check_json_vs_md.py` 验证输出
--- a/apps/etl/pipelines/feiqiu/scripts/compare_api_ods.py
+++ b/apps/etl/pipelines/feiqiu/scripts/compare_api_ods.py
@@ -0,0 +1,381 @@
+# -*- coding: utf-8 -*-
+"""
+比对 API 参考文档的 JSON 字段与 ODS 数据库表列，生成对比报告和 ALTER SQL。
+支持 camelCase → snake_case 归一化匹配。
+用法: python scripts/compare_api_ods.py
+需要: psycopg2, python-dotenv
+"""
+import os, re, json, sys
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from dotenv import load_dotenv
+import psycopg2
+
+load_dotenv()
+
+PG_DSN = os.getenv("PG_DSN")
+ENDPOINTS_DIR = os.path.join("docs", "api-reference", "endpoints")
+REGISTRY_FILE = os.path.join("docs", "api-reference", "api_registry.json")
+
+# ODS 元数据列（ETL 框架自动添加，不属于 API 字段）
+ODS_META_COLUMNS = {
+    "source_file", "source_endpoint", "fetched_at", "payload", "content_hash"
+}
+
+# JSON 类型 → 推荐 PG 类型映射
+TYPE_MAP = {
+    "int": "bigint",
+    "float": "numeric(18,2)",
+    "string": "text",
+    "bool": "boolean",
+    "list": "jsonb",
+    "dict": "jsonb",
+    "object": "jsonb",
+    "array": "jsonb",
+}
+
+
+def camel_to_snake(name):
+    """将 camelCase/PascalCase 转为 snake_case 小写"""
+    # 处理连续大写如 ABCDef → abc_def
+    s1 = re.sub(r'([A-Z]+)([A-Z][a-z])', r'\1_\2', name)
+    s2 = re.sub(r'([a-z\d])([A-Z])', r'\1_\2', s1)
+    return s2.lower()
+
+
+def normalize_field_name(name):
+    """统一字段名：camelCase → snake_case → 全小写"""
+    return camel_to_snake(name).replace(".", "_").strip("_")
+
+
+def parse_api_fields(md_path):
+    """从 API 文档 md 中解析响应字段表，返回 {原始字段名: json_type}
+    跳过嵌套对象的子字段（如 siteProfile.xxx）"""
+    fields = {}
+    with open(md_path, "r", encoding="utf-8") as f:
+        content = f.read()
+
+    # 格式: | # | 字段名 | 类型 | 示例值 |
+    pattern = r"\|\s*\d+\s*\|\s*`([^`]+)`\s*\|\s*(\w+)\s*\|"
+    for m in re.finditer(pattern, content):
+        field_name = m.group(1).strip()
+        field_type = m.group(2).strip().lower()
+        # 跳过嵌套子字段（如 siteProfile.address）
+        if "." in field_name:
+            continue
+        fields[field_name] = field_type
+
+    return fields
+
+
+def get_ods_columns(cursor, table_name):
+    """查询 ODS 表的列信息，返回 {column_name: data_type}"""
+    cursor.execute("""
+        SELECT column_name, data_type
+        FROM information_schema.columns
+        WHERE table_schema = 'billiards_ods' AND table_name = %s
+        ORDER BY ordinal_position
+    """, (table_name,))
+    cols = {}
+    for row in cursor.fetchall():
+        cols[row[0]] = row[1]
+    return cols
+
+
+def suggest_pg_type(json_type):
+    """根据 JSON 类型推荐 PG 类型"""
+    return TYPE_MAP.get(json_type, "text")
+
+
+def compare_table(api_fields, ods_columns, table_name):
+    """比对单张表，使用归一化名称匹配。
+    返回 (truly_missing, extra_in_ods, matched_pairs, case_matched)
+    - truly_missing: API 有但 ODS 确实没有的字段 {api_name: json_type}
+    - extra_in_ods: ODS 有但 API 没有的列 {col_name: pg_type}
+    - matched_pairs: 精确匹配的字段 [(api_name, ods_name)]
+    - case_matched: 通过归一化匹配的字段 [(api_name, ods_name)]
+    """
+    # 排除 ODS 元数据列
+    ods_biz = {k: v for k, v in ods_columns.items() if k not in ODS_META_COLUMNS}
+
+    # 建立归一化索引
+    # api: normalized → (original_name, type)
+    api_norm = {}
+    for name, typ in api_fields.items():
+        norm = normalize_field_name(name)
+        api_norm[norm] = (name, typ)
+
+    # ods: normalized → (original_name, type)
+    ods_norm = {}
+    for name, typ in ods_biz.items():
+        norm = name.lower()  # ODS 列名已经是小写
+        ods_norm[norm] = (name, typ)
+
+    matched_pairs = []
+    case_matched = []
+    api_matched_norms = set()
+    ods_matched_norms = set()
+
+    # 第一轮：精确匹配（API 字段名 == ODS 列名）
+    for api_name, api_type in api_fields.items():
+        if api_name in ods_biz:
+            matched_pairs.append((api_name, api_name))
+            api_matched_norms.add(normalize_field_name(api_name))
+            ods_matched_norms.add(api_name)
+
+    # 第二轮：归一化匹配（camelCase → snake_case）
+    for norm_name, (api_name, api_type) in api_norm.items():
+        if norm_name in api_matched_norms:
+            continue
+        if norm_name in ods_norm:
+            ods_name = ods_norm[norm_name][0]
+            if ods_name not in ods_matched_norms:
+                case_matched.append((api_name, ods_name))
+                api_matched_norms.add(norm_name)
+                ods_matched_norms.add(ods_name)
+
+    # 第三轮：尝试去掉下划线的纯小写匹配
+    for norm_name, (api_name, api_type) in api_norm.items():
+        if norm_name in api_matched_norms:
+            continue
+        flat = norm_name.replace("_", "")
+        for ods_col, (ods_name, ods_type) in ods_norm.items():
+            if ods_name in ods_matched_norms:
+                continue
+            if ods_col.replace("_", "") == flat:
+                case_matched.append((api_name, ods_name))
+                api_matched_norms.add(norm_name)
+                ods_matched_norms.add(ods_name)
+                break
+
+    # 计算真正缺失和多余
+    truly_missing = {}
+    for norm_name, (api_name, api_type) in api_norm.items():
+        if norm_name not in api_matched_norms:
+            truly_missing[api_name] = api_type
+
+    extra_in_ods = {}
+    for ods_name, ods_type in ods_biz.items():
+        if ods_name not in ods_matched_norms:
+            extra_in_ods[ods_name] = ods_type
+
+    return truly_missing, extra_in_ods, matched_pairs, case_matched
+
+
+def generate_alter_sql(table_name, missing_fields):
+    """生成 ALTER TABLE ADD COLUMN SQL，列名用 snake_case"""
+    sqls = []
+    for field_name, json_type in sorted(missing_fields.items()):
+        pg_type = suggest_pg_type(json_type)
+        col_name = normalize_field_name(field_name)
+        sqls.append(
+            f"ALTER TABLE billiards_ods.{table_name} ADD COLUMN IF NOT EXISTS "
+            f"{col_name} {pg_type};  -- API 字段: {field_name}"
+        )
+    return sqls
+
+
+def main():
+    # 加载 API 注册表
+    with open(REGISTRY_FILE, "r", encoding="utf-8") as f:
+        registry = json.load(f)
+
+    # 建立 id → ods_table 映射
+    api_to_ods = {}
+    api_names = {}
+    for entry in registry:
+        if entry.get("ods_table") and not entry.get("skip"):
+            api_to_ods[entry["id"]] = entry["ods_table"]
+            api_names[entry["id"]] = entry.get("name_zh", entry["id"])
+
+    conn = psycopg2.connect(PG_DSN)
+    cursor = conn.cursor()
+
+    results = []
+    all_alter_sqls = []
+
+    for api_id, ods_table in sorted(api_to_ods.items()):
+        md_path = os.path.join(ENDPOINTS_DIR, f"{api_id}.md")
+        if not os.path.exists(md_path):
+            results.append({
+                "api_id": api_id, "name_zh": api_names.get(api_id, ""),
+                "ods_table": ods_table, "status": "NO_DOC",
+                "api_fields": 0, "ods_cols": 0,
+            })
+            continue
+
+        api_fields = parse_api_fields(md_path)
+        ods_columns = get_ods_columns(cursor, ods_table)
+
+        if not ods_columns:
+            results.append({
+                "api_id": api_id, "name_zh": api_names.get(api_id, ""),
+                "ods_table": ods_table, "status": "NO_TABLE",
+                "api_fields": len(api_fields), "ods_cols": 0,
+            })
+            continue
+
+        missing, extra, matched, case_matched = compare_table(
+            api_fields, ods_columns, ods_table
+        )
+        alter_sqls = generate_alter_sql(ods_table, missing)
+        all_alter_sqls.extend(alter_sqls)
+
+        ods_biz_count = len({k: v for k, v in ods_columns.items()
+                            if k not in ODS_META_COLUMNS})
+
+        status = "OK" if not missing else "DRIFT"
+        results.append({
+            "api_id": api_id,
+            "name_zh": api_names.get(api_id, ""),
+            "ods_table": ods_table,
+            "status": status,
+            "api_fields": len(api_fields),
+            "ods_cols": ods_biz_count,
+            "exact_match": len(matched),
+            "case_match": len(case_matched),
+            "total_match": len(matched) + len(case_matched),
+            "missing_in_ods": missing,
+            "extra_in_ods": extra,
+            "case_matched_pairs": case_matched,
+        })
+
+    cursor.close()
+    conn.close()
+
+    # ── 输出 JSON 报告 ──
+    report_json = os.path.join("docs", "reports", "api_ods_comparison.json")
+    os.makedirs(os.path.dirname(report_json), exist_ok=True)
+    # 序列化时把 tuple 转 list
+    json_results = []
+    for r in results:
+        jr = dict(r)
+        if "case_matched_pairs" in jr:
+            jr["case_matched_pairs"] = [list(p) for p in jr["case_matched_pairs"]]
+        if "missing_in_ods" in jr:
+            jr["missing_in_ods"] = dict(jr["missing_in_ods"])
+        if "extra_in_ods" in jr:
+            jr["extra_in_ods"] = dict(jr["extra_in_ods"])
+        json_results.append(jr)
+    with open(report_json, "w", encoding="utf-8") as f:
+        json.dump(json_results, f, ensure_ascii=False, indent=2)
+
+    # ── 输出 Markdown 报告 ──
+    report_md = os.path.join("docs", "reports", "api_ods_comparison.md")
+    with open(report_md, "w", encoding="utf-8") as f:
+        f.write("# API JSON 字段 vs ODS 表列 对比报告\n\n")
+        f.write("> 自动生成于 2026-02-13 | 数据来源：数据库实际表结构 + API 参考文档\n")
+        f.write("> 比对逻辑：camelCase → snake_case 归一化匹配 + 去下划线纯小写兜底\n\n")
+
+        # 汇总
+        ok_count = sum(1 for r in results if r["status"] == "OK")
+        drift_count = sum(1 for r in results if r["status"] == "DRIFT")
+        total_missing = sum(len(r.get("missing_in_ods", {})) for r in results)
+        total_extra = sum(len(r.get("extra_in_ods", {})) for r in results)
+
+        f.write("## 汇总\n\n")
+        f.write("| 指标 | 值 |\n|------|----|")
+        f.write(f"\n| 比对表数 | {len(results)} |")
+        f.write(f"\n| 完全一致（含大小写归一化） | {ok_count} |")
+        f.write(f"\n| 存在差异 | {drift_count} |")
+        f.write(f"\n| ODS 缺失字段总数 | {total_missing} |")
+        f.write(f"\n| ODS 多余列总数 | {total_extra} |")
+        f.write(f"\n| 生成 ALTER SQL 数 | {len(all_alter_sqls)} |\n\n")
+
+        # 总览表
+        f.write("## 逐表对比总览\n\n")
+        f.write("| # | API ID | 中文名 | ODS 表 | 状态 | API字段 | ODS列 | 精确匹配 | 大小写匹配 | ODS缺失 | ODS多余 |\n")
+        f.write("|---|--------|--------|--------|------|---------|-------|----------|-----------|---------|--------|\n")
+        for i, r in enumerate(results, 1):
+            missing_count = len(r.get("missing_in_ods", {}))
+            extra_count = len(r.get("extra_in_ods", {}))
+            exact = r.get("exact_match", 0)
+            case = r.get("case_match", 0)
+            icon = "✅" if r["status"] == "OK" else "⚠️" if r["status"] == "DRIFT" else "❌"
+            f.write(f"| {i} | {r['api_id']} | {r.get('name_zh','')} | {r['ods_table']} | "
+                    f"{icon} | {r['api_fields']} | {r['ods_cols']} | {exact} | {case} | "
+                    f"{missing_count} | {extra_count} |\n")
+
+        # 差异详情
+        has_drift = any(r["status"] == "DRIFT" for r in results)
+        if has_drift:
+            f.write("\n## 差异详情\n\n")
+            for r in results:
+                if r["status"] != "DRIFT":
+                    continue
+                f.write(f"### {r.get('name_zh','')}（`{r['ods_table']}`）\n\n")
+
+                missing = r.get("missing_in_ods", {})
+                extra = r.get("extra_in_ods", {})
+                case_pairs = r.get("case_matched_pairs", [])
+
+                if case_pairs:
+                    f.write("**大小写归一化匹配（已自动对齐，无需操作）：**\n\n")
+                    f.write("| API 字段名 (camelCase) | ODS 列名 (lowercase) |\n")
+                    f.write("|----------------------|---------------------|\n")
+                    for api_n, ods_n in sorted(case_pairs):
+                        f.write(f"| `{api_n}` | `{ods_n}` |\n")
+                    f.write("\n")
+
+                if missing:
+                    f.write("**ODS 真正缺失的字段（需要 ADD COLUMN）：**\n\n")
+                    f.write("| 字段名 | JSON 类型 | 建议 PG 列名 | 建议 PG 类型 |\n")
+                    f.write("|--------|-----------|-------------|-------------|\n")
+                    for fname, ftype in sorted(missing.items()):
+                        f.write(f"| `{fname}` | {ftype} | `{normalize_field_name(fname)}` | {suggest_pg_type(ftype)} |\n")
+                    f.write("\n")
+
+                if extra:
+                    f.write("**ODS 多余的列（API 中不存在）：**\n\n")
+                    f.write("| 列名 | PG 类型 | 可能原因 |\n")
+                    f.write("|------|---------|--------|\n")
+                    for cname, ctype in sorted(extra.items()):
+                        f.write(f"| `{cname}` | {ctype} | ETL 自行添加 / 历史遗留 / API 新版已移除 |\n")
+                    f.write("\n")
+
+    # ── 输出 ALTER SQL ──
+    sql_path = os.path.join("database", "migrations", "20260213_align_ods_with_api.sql")
+    os.makedirs(os.path.dirname(sql_path), exist_ok=True)
+    with open(sql_path, "w", encoding="utf-8") as f:
+        f.write("-- ============================================================\n")
+        f.write("-- ODS 表与 API JSON 字段对齐迁移\n")
+        f.write("-- 自动生成于 2026-02-13\n")
+        f.write("-- 基于: docs/api-reference/ 文档 vs billiards_ods 实际表结构\n")
+        f.write("-- 比对逻辑: camelCase → snake_case 归一化后再比较\n")
+        f.write("-- ============================================================\n\n")
+        if all_alter_sqls:
+            f.write("BEGIN;\n\n")
+            current_table = ""
+            for sql in all_alter_sqls:
+                # 提取表名做分组注释
+                tbl = sql.split("billiards_ods.")[1].split(" ")[0]
+                if tbl != current_table:
+                    if current_table:
+                        f.write("\n")
+                    f.write(f"-- ── {tbl} ──\n")
+                    current_table = tbl
+                f.write(sql + "\n")
+            f.write("\nCOMMIT;\n")
+        else:
+            f.write("-- 无需变更，所有 ODS 表已与 API JSON 字段对齐。\n")
+
+    print(f"[完成] 比对 {len(results)} 张表")
+    print(f"  - 完全一致: {ok_count}")
+    print(f"  - 存在差异: {drift_count}")
+    print(f"  - ODS 缺失字段: {total_missing}")
+    print(f"  - ODS 多余列: {total_extra}")
+    print(f"  - ALTER SQL: {len(all_alter_sqls)} 条")
+    print(f"  - 报告: {report_md}")
+    print(f"  - JSON: {report_json}")
+    print(f"  - SQL:  {sql_path}")
+
+
+if __name__ == "__main__":
+    main()
+
+# AI_CHANGELOG:
+# - 日期: 2026-02-13
+# - Prompt: P20260213-210000 — "用新梳理的API返回的JSON文档比对数据库ODS层"
+# - 直接原因: 用户要求比对 API 参考文档与 ODS 实际表结构，生成对比报告和 ALTER SQL
+# - 变更摘要: 新建比对脚本，支持 camelCase→snake_case 归一化匹配，输出 MD/JSON 报告和迁移 SQL
+# - 风险与验证: 纯分析脚本，不修改数据库；验证：python scripts/compare_api_ods.py 检查输出
--- a/apps/etl/pipelines/feiqiu/scripts/compare_api_ods_v2.py
+++ b/apps/etl/pipelines/feiqiu/scripts/compare_api_ods_v2.py
@@ -0,0 +1,461 @@
+# -*- coding: utf-8 -*-
+"""
+API 参考文档 vs ODS 实际表结构 对比脚本 (v2)
+
+从 docs/api-reference/*.md 的 JSON 样例中提取字段，
+查询 PostgreSQL billiards_ods 的实际列，
+输出差异报告 JSON 和 Markdown + ALTER SQL。
+
+用法: python scripts/compare_api_ods_v2.py
+"""
+import json
+import os
+import re
+import sys
+from datetime import datetime
+
+ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+sys.path.insert(0, ROOT)
+
+from dotenv import load_dotenv
+load_dotenv(os.path.join(ROOT, ".env"))
+
+import psycopg2
+
+# ODS 元列（ETL 管理列，不来自 API）
+ODS_META_COLS = {
+    "source_file", "source_endpoint", "fetched_at",
+    "payload", "content_hash",
+}
+
+
+def load_registry():
+    """加载 API 注册表"""
+    path = os.path.join(ROOT, "docs", "api-reference", "api_registry.json")
+    with open(path, "r", encoding="utf-8") as f:
+        return json.load(f)
+
+
+def extract_fields_from_md(md_path, api_id):
+    """
+    从 md 文件的 JSON 样例（五、响应样例）中提取所有字段名（小写）。
+    对 settlement_records / recharge_settlements 等嵌套结构，
+    提取 settleList 内层字段 + siteProfile 字段。
+    """
+    with open(md_path, "r", encoding="utf-8") as f:
+        content = f.read()
+
+    # 提取所有 ```json ... ``` 代码块
+    json_blocks = re.findall(r'```json\s*\n(.*?)\n```', content, re.DOTALL)
+    if not json_blocks:
+        return None, None, "无 JSON 样例"
+
+    # 找到最大的 JSON 对象（响应样例通常是最大的）
+    sample_json = None
+    for block in json_blocks:
+        try:
+            parsed = json.loads(block)
+            if isinstance(parsed, dict):
+                if sample_json is None or len(str(parsed)) > len(str(sample_json)):
+                    sample_json = parsed
+        except json.JSONDecodeError:
+            continue
+
+    if sample_json is None:
+        return None, None, "无法解析 JSON 样例"
+
+    fields = set()
+    has_nested = False
+
+    # settlement_records / recharge_settlements 嵌套结构:
+    # { "siteProfile": {...}, "settleList": {...} }
+    if "siteProfile" in sample_json and "settleList" in sample_json:
+        has_nested = True
+        sl = sample_json.get("settleList", {})
+        if isinstance(sl, dict):
+            for k in sl:
+                fields.add(k.lower())
+        return fields, has_nested, None
+
+    # CHANGE: stock_goods_category_tree 特殊结构处理
+    # intent: goodsCategoryList 是数组包装，ODS 存储的是展平后的分类节点字段
+    # assumptions: 外层 total/goodsCategoryList 不是 ODS 列
+    if "goodsCategoryList" in sample_json and isinstance(sample_json["goodsCategoryList"], list):
+        has_nested = True
+        arr = sample_json["goodsCategoryList"]
+        if arr and isinstance(arr[0], dict):
+            _extract_flat(arr[0], fields)
+        return fields, has_nested, None
+
+    for k in sample_json:
+        fields.add(k.lower())
+    return fields, has_nested, None
+
+
+def _extract_flat(obj, fields):
+    """递归提取字典的标量字段名（跳过数组/嵌套对象值，但保留键名）"""
+    if not isinstance(obj, dict):
+        return
+    for k, v in obj.items():
+        fields.add(k.lower())
+
+
+def get_all_ods_columns(conn):
+    """查询所有 ODS 表的列信息"""
+    cur = conn.cursor()
+    cur.execute("""
+        SELECT table_name, column_name, data_type, ordinal_position
+        FROM information_schema.columns
+        WHERE table_schema = 'billiards_ods'
+        ORDER BY table_name, ordinal_position
+    """)
+    rows = cur.fetchall()
+    cur.close()
+
+    tables = {}
+    for table_name, col_name, data_type, pos in rows:
+        if table_name not in tables:
+            tables[table_name] = {}
+        tables[table_name][col_name] = {
+            "data_type": data_type,
+            "ordinal_position": pos,
+        }
+    return tables
+
+
+
+def guess_pg_type(name):
+    """根据字段名猜测 PostgreSQL 类型（用于 ALTER TABLE ADD COLUMN）"""
+    n = name.lower()
+    if n == "id" or n.endswith("_id") or n.endswith("id"):
+        return "bigint"
+    money_kw = ["amount", "money", "price", "cost", "fee", "discount",
+                "deduct", "balance", "charge", "sale", "refund",
+                "promotion", "adjust", "rounding", "prepay", "income",
+                "royalty", "grade", "point", "stock", "num"]
+    for kw in money_kw:
+        if kw in n:
+            return "numeric(18,2)"
+    if "time" in n or "date" in n:
+        return "timestamp without time zone"
+    if n.startswith("is_") or (n.startswith("is") and len(n) > 2 and n[2].isupper()):
+        return "boolean"
+    if n.startswith("able_") or n.startswith("can"):
+        return "boolean"
+    int_kw = ["status", "type", "sort", "count", "seconds", "level",
+              "channel", "method", "way", "enabled", "switch", "delete",
+              "first", "single", "trash", "confirm", "clock", "cycle",
+              "delay", "free", "virtual", "online", "show", "audit",
+              "freeze", "send", "required", "scene", "range", "tag",
+              "on", "minutes", "number", "duration"]
+    for kw in int_kw:
+        if kw in n:
+            return "integer"
+    return "text"
+
+
+def compare_one(api_entry, md_path, ods_tables):
+    """比较单个 API 与其 ODS 表"""
+    api_id = api_entry["id"]
+    ods_table = api_entry.get("ods_table")
+    name_zh = api_entry.get("name_zh", "")
+
+    result = {
+        "api_id": api_id,
+        "name_zh": name_zh,
+        "ods_table": ods_table,
+    }
+
+    if not ods_table:
+        result["status"] = "skip"
+        result["reason"] = "无对应 ODS 表（ods_table=null）"
+        return result
+
+    if api_entry.get("skip"):
+        result["status"] = "skip"
+        result["reason"] = "接口标记为 skip（暂不可用）"
+        return result
+
+    # 提取 API JSON 样例字段
+    api_fields, has_nested, err = extract_fields_from_md(md_path, api_id)
+    if err:
+        result["status"] = "error"
+        result["reason"] = err
+        return result
+
+    # 获取 ODS 表列
+    if ods_table not in ods_tables:
+        result["status"] = "error"
+        result["reason"] = f"ODS 表 {ods_table} 不存在"
+        return result
+
+    ods_cols = ods_tables[ods_table]
+    ods_biz_cols = {c for c in ods_cols if c not in ODS_META_COLS}
+
+    # 比较
+    api_lower = {f.lower() for f in api_fields}
+    ods_lower = {c.lower() for c in ods_biz_cols}
+
+    # API 有但 ODS 没有的字段
+    api_only = sorted(api_lower - ods_lower)
+    # ODS 有但 API 没有的字段（非元列）
+    ods_only = sorted(ods_lower - api_lower)
+    # 两边都有的字段
+    matched = sorted(api_lower & ods_lower)
+
+    result["status"] = "ok" if not api_only else "drift"
+    result["has_nested_structure"] = has_nested
+    result["api_field_count"] = len(api_lower)
+    result["ods_biz_col_count"] = len(ods_biz_cols)
+    result["ods_total_col_count"] = len(ods_cols)
+    result["matched_count"] = len(matched)
+    result["api_only"] = api_only
+    result["api_only_count"] = len(api_only)
+    result["ods_only"] = ods_only
+    result["ods_only_count"] = len(ods_only)
+    result["matched"] = matched
+
+    return result
+
+
+def generate_alter_sql(results, ods_tables):
+    """生成 ALTER TABLE SQL 语句"""
+    sqls = []
+    for r in results:
+        if r.get("status") != "drift" or not r.get("api_only"):
+            continue
+        table = r["ods_table"]
+        for field in r["api_only"]:
+            pg_type = guess_pg_type(field)
+            sqls.append(
+                f"ALTER TABLE billiards_ods.{table} "
+                f"ADD COLUMN IF NOT EXISTS {field} {pg_type};"
+            )
+    return sqls
+
+
+def generate_markdown_report(results, alter_sqls):
+    """生成 Markdown 报告"""
+    now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+    lines = [
+        "# API 参考文档 vs ODS 实际表结构 对比报告 (v2)",
+        "",
+        f"> 生成时间：{now}",
+        "> 数据来源：`docs/api-reference/*.md` JSON 样例 vs `billiards_ods` 实际列",
+        "",
+        "---",
+        "",
+        "## 一、汇总",
+        "",
+        "| API 接口 | 中文名 | ODS 表 | 状态 | API 字段数 | ODS 业务列数 | 匹配 | API 独有 | ODS 独有 |",
+        "|----------|--------|--------|------|-----------|-------------|------|---------|---------|",
+    ]
+
+    total_api_only = 0
+    total_ods_only = 0
+    ok_count = 0
+    drift_count = 0
+    skip_count = 0
+    error_count = 0
+
+    for r in results:
+        status = r.get("status", "?")
+        if status == "skip":
+            skip_count += 1
+            lines.append(
+                f"| {r['api_id']} | {r['name_zh']} | {r.get('ods_table', '-')} "
+                f"| ⏭️ 跳过 | - | - | - | - | - |"
+            )
+            continue
+        if status == "error":
+            error_count += 1
+            lines.append(
+                f"| {r['api_id']} | {r['name_zh']} | {r.get('ods_table', '-')} "
+                f"| ❌ 错误 | - | - | - | - | - |"
+            )
+            continue
+
+        api_only_n = r.get("api_only_count", 0)
+        ods_only_n = r.get("ods_only_count", 0)
+        total_api_only += api_only_n
+        total_ods_only += ods_only_n
+
+        if status == "ok":
+            ok_count += 1
+            badge = "✅ 对齐"
+        else:
+            drift_count += 1
+            badge = "⚠️ 漂移"
+
+        lines.append(
+            f"| {r['api_id']} | {r['name_zh']} | {r['ods_table']} "
+            f"| {badge} | {r['api_field_count']} | {r['ods_biz_col_count']} "
+            f"| {r['matched_count']} | {api_only_n} | {ods_only_n} |"
+        )
+
+    lines.extend([
+        "",
+        f"**统计**：对齐 {ok_count} / 漂移 {drift_count} / 跳过 {skip_count} / 错误 {error_count}",
+        f"**API 独有字段总计**：{total_api_only}（需要 ALTER TABLE ADD COLUMN）",
+        f"**ODS 独有列总计**：{total_ods_only}（API 中不存在，可能是历史遗留或 ETL 派生列）",
+        "",
+    ])
+
+    # 详情：每个漂移表的字段差异
+    drift_results = [r for r in results if r.get("status") == "drift"]
+    if drift_results:
+        lines.extend(["---", "", "## 二、漂移详情", ""])
+        for r in drift_results:
+            lines.extend([
+                f"### {r['api_id']}（{r['name_zh']}）→ `{r['ods_table']}`",
+                "",
+            ])
+            if r["api_only"]:
+                lines.append("**API 有 / ODS 缺**：")
+                for f in r["api_only"]:
+                    pg_type = guess_pg_type(f)
+                    lines.append(f"- `{f}` → 建议类型 `{pg_type}`")
+                lines.append("")
+            if r["ods_only"]:
+                lines.append("**ODS 有 / API 无**（非元列）：")
+                for f in r["ods_only"]:
+                    lines.append(f"- `{f}`")
+                lines.append("")
+
+    # ODS 独有列详情（所有表）
+    ods_only_results = [r for r in results if r.get("ods_only") and r.get("status") in ("ok", "drift")]
+    if ods_only_results:
+        lines.extend(["---", "", "## 三、ODS 独有列详情（API 中不存在）", ""])
+        for r in ods_only_results:
+            if not r["ods_only"]:
+                continue
+            lines.extend([
+                f"### `{r['ods_table']}`（{r['name_zh']}）",
+                "",
+                "| 列名 | 说明 |",
+                "|------|------|",
+            ])
+            for f in r["ods_only"]:
+                lines.append(f"| `{f}` | ODS 独有，API JSON 样例中不存在 |")
+            lines.append("")
+
+    # ALTER SQL
+    if alter_sqls:
+        lines.extend([
+            "---", "",
+            "## 四、ALTER SQL（对齐 ODS 表结构）", "",
+            "```sql",
+            "-- 自动生成的 ALTER TABLE 语句",
+            f"-- 生成时间：{now}",
+            "-- 注意：类型为根据字段名猜测，请人工复核后执行",
+            "",
+        ])
+        lines.extend(alter_sqls)
+        lines.extend(["", "```", ""])
+
+    return "\n".join(lines)
+
+
+
+def main():
+    dsn = os.environ.get("PG_DSN")
+    if not dsn:
+        print("错误：未设置 PG_DSN 环境变量", file=sys.stderr)
+        sys.exit(1)
+
+    print("连接数据库...")
+    conn = psycopg2.connect(dsn)
+
+    print("查询 ODS 表结构...")
+    ods_tables = get_all_ods_columns(conn)
+    print(f"  共 {len(ods_tables)} 张 ODS 表")
+
+    print("加载 API 注册表...")
+    registry = load_registry()
+    print(f"  共 {len(registry)} 个 API 端点")
+
+    results = []
+    for entry in registry:
+        api_id = entry["id"]
+        ods_table = entry.get("ods_table")
+        md_path = os.path.join(ROOT, "docs", "api-reference", f"{api_id}.md")
+
+        if not os.path.exists(md_path):
+            results.append({
+                "api_id": api_id,
+                "name_zh": entry.get("name_zh", ""),
+                "ods_table": ods_table,
+                "status": "error",
+                "reason": f"文档不存在: {md_path}",
+            })
+            continue
+
+        r = compare_one(entry, md_path, ods_tables)
+        results.append(r)
+
+        status_icon = {"ok": "✅", "drift": "⚠️", "skip": "⏭️", "error": "❌"}.get(r["status"], "?")
+        extra = ""
+        if r.get("api_only_count"):
+            extra = f" (API独有: {r['api_only_count']})"
+        if r.get("ods_only_count"):
+            extra += f" (ODS独有: {r['ods_only_count']})"
+        print(f"  {status_icon} {api_id} → {ods_table or '-'}{extra}")
+
+    conn.close()
+
+    # 生成 ALTER SQL
+    alter_sqls = generate_alter_sql(results, ods_tables)
+
+    # 输出 JSON 报告
+    json_path = os.path.join(ROOT, "docs", "reports", "api_ods_comparison_v2.json")
+    os.makedirs(os.path.dirname(json_path), exist_ok=True)
+    with open(json_path, "w", encoding="utf-8") as f:
+        json.dump(results, f, ensure_ascii=False, indent=2)
+    print(f"\nJSON 报告: {json_path}")
+
+    # 输出 Markdown 报告
+    md_report = generate_markdown_report(results, alter_sqls)
+    md_path = os.path.join(ROOT, "docs", "reports", "api_ods_comparison_v2.md")
+    with open(md_path, "w", encoding="utf-8") as f:
+        f.write(md_report)
+    print(f"Markdown 报告: {md_path}")
+
+    # 输出 ALTER SQL 文件
+    if alter_sqls:
+        sql_path = os.path.join(ROOT, "database", "migrations",
+                                "20260213_align_ods_with_api_v2.sql")
+        os.makedirs(os.path.dirname(sql_path), exist_ok=True)
+        with open(sql_path, "w", encoding="utf-8") as f:
+            f.write("-- API vs ODS 对齐迁移脚本 (v2)\n")
+            f.write(f"-- 生成时间：{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
+            f.write("-- 注意：类型为根据字段名猜测，请人工复核后执行\n\n")
+            f.write("BEGIN;\n\n")
+            for sql in alter_sqls:
+                f.write(sql + "\n")
+            f.write("\nCOMMIT;\n")
+        print(f"ALTER SQL: {sql_path}")
+    else:
+        print("无需 ALTER SQL（所有表已对齐）")
+
+    # 统计
+    ok_n = sum(1 for r in results if r.get("status") == "ok")
+    drift_n = sum(1 for r in results if r.get("status") == "drift")
+    skip_n = sum(1 for r in results if r.get("status") == "skip")
+    err_n = sum(1 for r in results if r.get("status") == "error")
+    print(f"\n汇总：对齐 {ok_n} / 漂移 {drift_n} / 跳过 {skip_n} / 错误 {err_n}")
+    print(f"ALTER SQL 语句数：{len(alter_sqls)}")
+
+
+if __name__ == "__main__":
+    main()
+
+
+# ──────────────────────────────────────────────
+# AI_CHANGELOG:
+# - 日期: 2026-02-13
+#   Prompt: P20260213-223000 — 用 API 参考文档比对数据库 ODS 实际表结构（重做，不依赖 DDL）
+#   直接原因: 前次比对脚本 stock_goods_category_tree 嵌套结构解析 bug，需重写脚本
+#   变更摘要: 完整重写脚本，从 api-reference/*.md JSON 样例提取字段，查询 PG billiards_ods 实际列，
+#             处理三种特殊结构（标准/settleList 嵌套/goodsCategoryList 数组包装），输出 JSON+MD 报告
+#   风险与验证: 纯分析脚本，不修改数据库；验证方式：运行脚本确认 "对齐 22 / 漂移 0"
+# ──────────────────────────────────────────────
--- a/apps/etl/pipelines/feiqiu/scripts/compare_ddl_db.py
+++ b/apps/etl/pipelines/feiqiu/scripts/compare_ddl_db.py
@@ -0,0 +1,822 @@
+#!/usr/bin/env python3
+"""DDL 与数据库实际表结构对比脚本。
+
+# AI_CHANGELOG [2026-02-13] 修复列名以 UNIQUE/CHECK 开头被误判为约束行的 bug；新增 CREATE VIEW 解析支持（视图仅检查存在性）
+
+解析 database/schema_*.sql 中的 CREATE TABLE 语句，
+查询 information_schema.columns 获取数据库实际结构，
+逐表逐字段对比并输出差异报告。
+
+用法:
+    python scripts/compare_ddl_db.py --pg-dsn "postgresql://..." --schema billiards_ods --ddl-path database/schema_ODS_doc.sql
+    python scripts/compare_ddl_db.py --schema billiards_dwd --ddl-path database/schema_dwd_doc.sql  # 从 .env 读取 PG_DSN
+"""
+
+from __future__ import annotations
+
+import argparse
+import os
+import re
+import sys
+from dataclasses import dataclass, field
+from enum import Enum
+from pathlib import Path
+from typing import Optional
+
+
+class DiffKind(str, Enum):
+    """差异分类枚举。"""
+    MISSING_TABLE = "MISSING_TABLE"      # DDL 缺表（数据库有，DDL 没有）
+    EXTRA_TABLE = "EXTRA_TABLE"          # DDL 多表（DDL 有，数据库没有）
+    MISSING_COLUMN = "MISSING_COLUMN"    # DDL 缺字段
+    EXTRA_COLUMN = "EXTRA_COLUMN"        # DDL 多字段
+    TYPE_MISMATCH = "TYPE_MISMATCH"      # 字段类型不一致
+    NULLABLE_MISMATCH = "NULLABLE_MISMATCH"  # 可空约束不一致
+
+
+@dataclass
+class SchemaDiff:
+    """单条差异记录。"""
+    kind: DiffKind
+    table: str
+    column: Optional[str] = None
+    ddl_value: Optional[str] = None
+    db_value: Optional[str] = None
+
+    def __str__(self) -> str:
+        parts = [f"[{self.kind.value}] {self.table}"]
+        if self.column:
+            parts.append(f".{self.column}")
+        if self.ddl_value is not None or self.db_value is not None:
+            parts.append(f"  DDL={self.ddl_value}  DB={self.db_value}")
+        return "".join(parts)
+
+
+# ---------------------------------------------------------------------------
+# DDL 列定义
+# ---------------------------------------------------------------------------
+
+@dataclass
+class ColumnDef:
+    """从 DDL 解析出的单个字段定义。"""
+    name: str
+    data_type: str       # 标准化后的类型字符串
+    nullable: bool = True
+    is_pk: bool = False
+    default: Optional[str] = None
+
+
+@dataclass
+class TableDef:
+    """从 DDL 解析出的单张表定义。"""
+    name: str            # 不含 schema 前缀的表名（小写）
+    columns: dict[str, ColumnDef] = field(default_factory=dict)
+    pk_columns: list[str] = field(default_factory=list)
+    is_view: bool = False  # 视图标记，跳过列级对比
+
+
+# ---------------------------------------------------------------------------
+# 类型标准化：将 DDL 类型和 information_schema 类型映射到统一表示
+# ---------------------------------------------------------------------------
+
+# PostgreSQL information_schema.data_type → 简写映射
+_PG_TYPE_MAP: dict[str, str] = {
+    "bigint": "bigint",
+    "integer": "integer",
+    "smallint": "smallint",
+    "boolean": "boolean",
+    "text": "text",
+    "jsonb": "jsonb",
+    "json": "json",
+    "date": "date",
+    "bytea": "bytea",
+    "double precision": "double precision",
+    "real": "real",
+    "uuid": "uuid",
+    "timestamp without time zone": "timestamp",
+    "timestamp with time zone": "timestamptz",
+    "time without time zone": "time",
+    "time with time zone": "timetz",
+    "character varying": "varchar",
+    "character": "char",
+    "ARRAY": "array",
+    "USER-DEFINED": "user-defined",
+}
+
+
+def normalize_type(raw: str) -> str:
+    """将 DDL 或 information_schema 中的类型字符串标准化为可比较的形式。
+
+    规则：
+    - 全部小写
+    - BIGINT / INT8 → bigint
+    - INTEGER / INT / INT4 → integer
+    - SMALLINT / INT2 → smallint
+    - BOOLEAN / BOOL → boolean
+    - VARCHAR(n) / CHARACTER VARYING(n) → varchar(n)
+    - CHAR(n) / CHARACTER(n) → char(n)
+    - NUMERIC(p,s) / DECIMAL(p,s) → numeric(p,s)
+    - SERIAL → integer（serial 本质是 integer + sequence）
+    - BIGSERIAL → bigint
+    - TIMESTAMP → timestamp
+    - TIMESTAMPTZ / TIMESTAMP WITH TIME ZONE → timestamptz
+    - TEXT → text
+    - JSONB → jsonb
+    """
+    t = raw.strip().lower()
+
+    # 去掉多余空格
+    t = re.sub(r"\s+", " ", t)
+
+    # serial 家族 → 底层整数类型
+    if t == "bigserial":
+        return "bigint"
+    if t in ("serial", "serial4"):
+        return "integer"
+    if t == "smallserial":
+        return "smallint"
+
+    # 带精度的 numeric / decimal
+    m = re.match(r"(?:numeric|decimal)\s*\((\d+)\s*,\s*(\d+)\)", t)
+    if m:
+        return f"numeric({m.group(1)},{m.group(2)})"
+    m = re.match(r"(?:numeric|decimal)\s*\((\d+)\)", t)
+    if m:
+        return f"numeric({m.group(1)})"
+    if t in ("numeric", "decimal"):
+        return "numeric"
+
+    # varchar / character varying
+    m = re.match(r"(?:varchar|character varying)\s*\((\d+)\)", t)
+    if m:
+        return f"varchar({m.group(1)})"
+    if t in ("varchar", "character varying"):
+        return "varchar"
+
+    # char / character
+    m = re.match(r"(?:char|character)\s*\((\d+)\)", t)
+    if m:
+        return f"char({m.group(1)})"
+    if t in ("char", "character"):
+        return "char(1)"
+
+    # timestamp 家族
+    if t in ("timestamptz", "timestamp with time zone"):
+        return "timestamptz"
+    if t in ("timestamp", "timestamp without time zone"):
+        return "timestamp"
+
+    # 整数别名
+    if t in ("int8", "bigint"):
+        return "bigint"
+    if t in ("int", "int4", "integer"):
+        return "integer"
+    if t in ("int2", "smallint"):
+        return "smallint"
+
+    # 布尔
+    if t in ("bool", "boolean"):
+        return "boolean"
+
+    # information_schema 映射
+    if t in _PG_TYPE_MAP:
+        return _PG_TYPE_MAP[t]
+
+    return t
+
+
+# ---------------------------------------------------------------------------
+# DDL 解析器
+# ---------------------------------------------------------------------------
+
+# 匹配 CREATE TABLE [IF NOT EXISTS] [schema.]table_name (
+_CREATE_TABLE_RE = re.compile(
+    r"CREATE\s+TABLE\s+(?:IF\s+NOT\s+EXISTS\s+)?"
+    r"(?:(\w+)\.)?(\w+)\s*\(",
+    re.IGNORECASE,
+)
+
+# 匹配 DROP TABLE [IF EXISTS] [schema.]table_name [CASCADE];
+_DROP_TABLE_RE = re.compile(
+    r"DROP\s+TABLE\s+(?:IF\s+EXISTS\s+)?(?:\w+\.)?(\w+)",
+    re.IGNORECASE,
+)
+
+# 匹配 CREATE [OR REPLACE] VIEW [schema.]view_name AS SELECT ...
+_CREATE_VIEW_RE = re.compile(
+    r"CREATE\s+(?:OR\s+REPLACE\s+)?VIEW\s+"
+    r"(?:(\w+)\.)?(\w+)\s+AS\s+",
+    re.IGNORECASE,
+)
+
+
+def _strip_sql_comments(sql: str) -> str:
+    """移除 SQL 单行注释（-- ...）和块注释（/* ... */）。"""
+    # 块注释
+    sql = re.sub(r"/\*.*?\*/", "", sql, flags=re.DOTALL)
+    # 单行注释
+    sql = re.sub(r"--[^\n]*", "", sql)
+    return sql
+
+
+def _find_matching_paren(text: str, start: int) -> int:
+    """从 start 位置（应为 '('）开始，找到匹配的 ')' 位置。
+
+    处理嵌套括号和字符串字面量中的括号。
+    """
+    depth = 0
+    in_string = False
+    string_char = ""
+    i = start
+    while i < len(text):
+        ch = text[i]
+        if in_string:
+            if ch == string_char:
+                # 检查转义
+                if i + 1 < len(text) and text[i + 1] == string_char:
+                    i += 2
+                    continue
+                in_string = False
+        else:
+            if ch in ("'", '"'):
+                in_string = True
+                string_char = ch
+            elif ch == "(":
+                depth += 1
+            elif ch == ")":
+                depth -= 1
+                if depth == 0:
+                    return i
+        i += 1
+    return -1
+
+
+def _parse_column_line(line: str) -> Optional[ColumnDef]:
+    """解析单行字段定义，返回 ColumnDef 或 None（如果是约束行）。"""
+    line = line.strip().rstrip(",")
+    if not line:
+        return None
+
+    upper = line.upper()
+    # 跳过表级约束行
+    # 注意：需要区分约束行（如 "UNIQUE (...)"）和以约束关键字开头的列名
+    # （如 "unique_customers INTEGER"、"check_status INT"）
+    # 约束行的关键字后面紧跟空格+左括号或直接左括号，而列名后面跟下划线或字母
+    if re.match(
+        r"(?:PRIMARY\s+KEY|UNIQUE|CHECK|FOREIGN\s+KEY|EXCLUDE)"
+        r"(?:\s*\(|\s+(?![\w]))",
+        upper,
+    ) or upper.startswith("CONSTRAINT"):
+        return None
+
+    # 字段名 类型 [约束...]
+    # 字段名可能被双引号包裹
+    m = re.match(r'(?:"([^"]+)"|(\w+))\s+(.+)', line)
+    if not m:
+        return None
+
+    col_name = (m.group(1) or m.group(2)).lower()
+    rest = m.group(3).strip()
+
+    # 提取类型：取到第一个（位置最靠前的）已知约束关键字或行尾
+    # 类型可能包含括号，如 NUMERIC(18,2)、VARCHAR(50)
+    type_end_keywords = [
+        "NOT NULL", "NULL", "DEFAULT", "PRIMARY KEY", "UNIQUE",
+        "REFERENCES", "CHECK", "CONSTRAINT", "GENERATED",
+    ]
+    type_str = rest
+    constraint_part = ""
+    # 找所有关键字中位置最靠前的
+    best_idx = len(rest)
+    for kw in type_end_keywords:
+        idx = rest.upper().find(kw)
+        if idx > 0 and idx < best_idx:
+            candidate = rest[:idx].strip()
+            if candidate:
+                best_idx = idx
+    if best_idx < len(rest):
+        type_str = rest[:best_idx].strip()
+        constraint_part = rest[best_idx:]
+
+    # 去掉类型末尾的逗号
+    type_str = type_str.rstrip(",").strip()
+
+    nullable = True
+    if "NOT NULL" in constraint_part.upper():
+        nullable = False
+
+    is_pk = "PRIMARY KEY" in constraint_part.upper()
+
+    # 提取 DEFAULT 值
+    default_val = None
+    dm = re.search(r"DEFAULT\s+(.+?)(?:\s+(?:NOT\s+NULL|NULL|PRIMARY|UNIQUE|REFERENCES|CHECK|CONSTRAINT|,|$))",
+                   constraint_part, re.IGNORECASE)
+    if dm:
+        default_val = dm.group(1).strip().rstrip(",")
+
+    return ColumnDef(
+        name=col_name,
+        data_type=normalize_type(type_str),
+        nullable=nullable,
+        is_pk=is_pk,
+        default=default_val,
+    )
+
+
+def _extract_pk_from_body(body: str) -> list[str]:
+    """从 CREATE TABLE 体中提取表级 PRIMARY KEY 约束的列名列表。"""
+    # PRIMARY KEY (col1, col2, ...)
+    # 也可能是 CONSTRAINT xxx PRIMARY KEY (col1, col2)
+    m = re.search(r"PRIMARY\s+KEY\s*\(([^)]+)\)", body, re.IGNORECASE)
+    if not m:
+        return []
+    cols_str = m.group(1)
+    return [c.strip().strip('"').lower() for c in cols_str.split(",")]
+
+
+def parse_ddl(sql_text: str, target_schema: Optional[str] = None) -> dict[str, TableDef]:
+    """解析 DDL 文本，提取所有 CREATE TABLE 定义。
+
+    Args:
+        sql_text: 完整的 SQL DDL 文本
+        target_schema: 如果指定，只保留该 schema 下的表（或无 schema 前缀的表）
+
+    Returns:
+        {表名(小写): TableDef} 字典
+    """
+    # 先收集被 DROP 的表名，后续 CREATE 会覆盖
+    cleaned = _strip_sql_comments(sql_text)
+
+    tables: dict[str, TableDef] = {}
+
+    # 逐个匹配 CREATE TABLE
+    for m in _CREATE_TABLE_RE.finditer(cleaned):
+        schema_part = m.group(1)
+        table_name = m.group(2).lower()
+
+        # schema 过滤
+        if target_schema:
+            ts = target_schema.lower()
+            if schema_part and schema_part.lower() != ts:
+                continue
+            # 无 schema 前缀的表也接受（DWD DDL 中 SET search_path 后不带前缀）
+
+        # 找到 CREATE TABLE ... ( 的左括号位置
+        paren_start = m.end() - 1  # m.end() 指向 '(' 后一位
+        paren_end = _find_matching_paren(cleaned, paren_start)
+        if paren_end < 0:
+            continue
+
+        body = cleaned[paren_start + 1: paren_end]
+
+        # 按行解析字段
+        table_def = TableDef(name=table_name)
+
+        # 提取表级 PRIMARY KEY
+        pk_cols = _extract_pk_from_body(body)
+
+        # 逐行解析
+        for raw_line in body.split("\n"):
+            col = _parse_column_line(raw_line)
+            if col:
+                table_def.columns[col.name] = col
+
+        # 合并表级 PK 信息
+        if pk_cols:
+            table_def.pk_columns = pk_cols
+            for pk_col in pk_cols:
+                if pk_col in table_def.columns:
+                    table_def.columns[pk_col].is_pk = True
+                    # PK 隐含 NOT NULL
+                    table_def.columns[pk_col].nullable = False
+
+        # 合并内联 PK
+        inline_pk = [c.name for c in table_def.columns.values() if c.is_pk]
+        if inline_pk and not table_def.pk_columns:
+            table_def.pk_columns = inline_pk
+            for pk_col in inline_pk:
+                table_def.columns[pk_col].nullable = False
+
+        tables[table_name] = table_def
+
+    # 解析 CREATE VIEW，仅标记视图存在（列信息由数据库侧提供）
+    for m in _CREATE_VIEW_RE.finditer(cleaned):
+        schema_part = m.group(1)
+        view_name = m.group(2).lower()
+
+        if target_schema:
+            ts = target_schema.lower()
+            if schema_part and schema_part.lower() != ts:
+                continue
+
+        if view_name not in tables:
+            # 视图仅标记存在，不解析列（列由底层表决定）
+            tables[view_name] = TableDef(name=view_name)
+            # 标记为视图，跳过列级对比
+            tables[view_name].is_view = True
+
+    return tables
+
+
+# ---------------------------------------------------------------------------
+# 数据库 schema 读取
+# ---------------------------------------------------------------------------
+
+@dataclass
+class DbColumnInfo:
+    """从 information_schema 查询到的字段信息。"""
+    name: str
+    data_type: str       # 标准化后
+    nullable: bool
+    is_pk: bool = False
+
+
+def fetch_db_schema(pg_dsn: str, schema_name: str) -> dict[str, TableDef]:
+    """从数据库 information_schema 查询指定 schema 的所有表和字段。
+
+    Returns:
+        {表名(小写): TableDef} 字典
+    """
+    import psycopg2
+
+    conn = psycopg2.connect(pg_dsn)
+    try:
+        with conn.cursor() as cur:
+            # 检查 schema 是否存在
+            cur.execute(
+                "SELECT 1 FROM information_schema.schemata WHERE schema_name = %s",
+                (schema_name,),
+            )
+            if not cur.fetchone():
+                print(f"⚠ schema '{schema_name}' 在数据库中不存在，跳过", file=sys.stderr)
+                return {}
+
+            # 查询所有列信息
+            cur.execute("""
+                SELECT
+                    c.table_name,
+                    c.column_name,
+                    c.data_type,
+                    c.is_nullable,
+                    c.character_maximum_length,
+                    c.numeric_precision,
+                    c.numeric_scale,
+                    c.udt_name
+                FROM information_schema.columns c
+                WHERE c.table_schema = %s
+                ORDER BY c.table_name, c.ordinal_position
+            """, (schema_name,))
+
+            rows = cur.fetchall()
+
+            # 查询主键信息
+            cur.execute("""
+                SELECT
+                    tc.table_name,
+                    kcu.column_name
+                FROM information_schema.table_constraints tc
+                JOIN information_schema.key_column_usage kcu
+                    ON tc.constraint_name = kcu.constraint_name
+                    AND tc.table_schema = kcu.table_schema
+                WHERE tc.table_schema = %s
+                    AND tc.constraint_type = 'PRIMARY KEY'
+                ORDER BY tc.table_name, kcu.ordinal_position
+            """, (schema_name,))
+
+            pk_rows = cur.fetchall()
+    finally:
+        conn.close()
+
+    # 构建 PK 映射: {table_name: [col1, col2, ...]}
+    pk_map: dict[str, list[str]] = {}
+    for tbl, col in pk_rows:
+        pk_map.setdefault(tbl.lower(), []).append(col.lower())
+
+    # 构建 TableDef
+    tables: dict[str, TableDef] = {}
+    for tbl, col_name, data_type, is_nullable, char_max_len, num_prec, num_scale, udt_name in rows:
+        tbl_lower = tbl.lower()
+        col_lower = col_name.lower()
+
+        if tbl_lower not in tables:
+            tables[tbl_lower] = TableDef(
+                name=tbl_lower,
+                pk_columns=pk_map.get(tbl_lower, []),
+            )
+
+        # 构建精确类型字符串
+        type_str = _build_db_type_string(data_type, char_max_len, num_prec, num_scale, udt_name)
+
+        is_pk = col_lower in pk_map.get(tbl_lower, [])
+        nullable = is_nullable == "YES"
+
+        tables[tbl_lower].columns[col_lower] = ColumnDef(
+            name=col_lower,
+            data_type=normalize_type(type_str),
+            nullable=nullable,
+            is_pk=is_pk,
+        )
+
+    return tables
+
+
+def _build_db_type_string(
+    data_type: str,
+    char_max_len: Optional[int],
+    num_prec: Optional[int],
+    num_scale: Optional[int],
+    udt_name: str,
+) -> str:
+    """根据 information_schema 字段构建可比较的类型字符串。"""
+    dt = data_type.lower()
+
+    # character varying → varchar(n)
+    if dt == "character varying":
+        if char_max_len:
+            return f"varchar({char_max_len})"
+        return "varchar"
+
+    # character → char(n)
+    if dt == "character":
+        if char_max_len:
+            return f"char({char_max_len})"
+        return "char(1)"
+
+    # numeric → numeric(p,s)
+    if dt == "numeric":
+        if num_prec is not None and num_scale is not None:
+            return f"numeric({num_prec},{num_scale})"
+        if num_prec is not None:
+            return f"numeric({num_prec})"
+        return "numeric"
+
+    # USER-DEFINED → 使用 udt_name（如 jsonb, geometry 等）
+    if dt == "user-defined":
+        return udt_name.lower()
+
+    # ARRAY → 使用 udt_name 去掉前缀 _
+    if dt == "array":
+        base = udt_name.lstrip("_").lower()
+        return f"{base}[]"
+
+    return dt
+
+
+# ---------------------------------------------------------------------------
+# 对比逻辑
+# ---------------------------------------------------------------------------
+
+def compare_tables(
+    ddl_tables: dict[str, TableDef],
+    db_tables: dict[str, TableDef],
+) -> list[SchemaDiff]:
+    """对比 DDL 定义与数据库实际结构，返回差异列表。
+
+    差异分类：
+    - MISSING_TABLE: 数据库有但 DDL 没有
+    - EXTRA_TABLE: DDL 有但数据库没有
+    - MISSING_COLUMN: 数据库有但 DDL 没有的字段
+    - EXTRA_COLUMN: DDL 有但数据库没有的字段
+    - TYPE_MISMATCH: 字段类型不一致
+    - NULLABLE_MISMATCH: 可空约束不一致
+    """
+    diffs: list[SchemaDiff] = []
+
+    all_tables = sorted(set(ddl_tables.keys()) | set(db_tables.keys()))
+
+    for tbl in all_tables:
+        in_ddl = tbl in ddl_tables
+        in_db = tbl in db_tables
+
+        if in_db and not in_ddl:
+            diffs.append(SchemaDiff(kind=DiffKind.MISSING_TABLE, table=tbl))
+            continue
+
+        if in_ddl and not in_db:
+            diffs.append(SchemaDiff(kind=DiffKind.EXTRA_TABLE, table=tbl))
+            continue
+
+        # 两边都有，逐字段对比
+        # 视图仅检查存在性，跳过列级对比
+        ddl_def = ddl_tables[tbl]
+        if getattr(ddl_def, 'is_view', False):
+            continue
+
+        ddl_cols = ddl_def.columns
+        db_cols = db_tables[tbl].columns
+        all_cols = sorted(set(ddl_cols.keys()) | set(db_cols.keys()))
+
+        for col in all_cols:
+            col_in_ddl = col in ddl_cols
+            col_in_db = col in db_cols
+
+            if col_in_db and not col_in_ddl:
+                diffs.append(SchemaDiff(
+                    kind=DiffKind.MISSING_COLUMN,
+                    table=tbl,
+                    column=col,
+                    db_value=db_cols[col].data_type,
+                ))
+                continue
+
+            if col_in_ddl and not col_in_db:
+                diffs.append(SchemaDiff(
+                    kind=DiffKind.EXTRA_COLUMN,
+                    table=tbl,
+                    column=col,
+                    ddl_value=ddl_cols[col].data_type,
+                ))
+                continue
+
+            # 两边都有，比较类型
+            ddl_type = ddl_cols[col].data_type
+            db_type = db_cols[col].data_type
+            # 视图列从 DDL 解析时类型为 unknown，跳过类型比较
+            if ddl_type != db_type and ddl_type != "unknown":
+                diffs.append(SchemaDiff(
+                    kind=DiffKind.TYPE_MISMATCH,
+                    table=tbl,
+                    column=col,
+                    ddl_value=ddl_type,
+                    db_value=db_type,
+                ))
+
+            # 比较可空性（视图列跳过）
+            ddl_nullable = ddl_cols[col].nullable
+            db_nullable = db_cols[col].nullable
+            if ddl_nullable != db_nullable and ddl_type != "unknown":
+                diffs.append(SchemaDiff(
+                    kind=DiffKind.NULLABLE_MISMATCH,
+                    table=tbl,
+                    column=col,
+                    ddl_value="NULL" if ddl_nullable else "NOT NULL",
+                    db_value="NULL" if db_nullable else "NOT NULL",
+                ))
+
+    return diffs
+
+
+def compare_schema(ddl_path: str, schema_name: str, pg_dsn: str) -> list[SchemaDiff]:
+    """对比 DDL 文件与数据库 schema 的完整流程。
+
+    Args:
+        ddl_path: DDL 文件路径
+        schema_name: 数据库 schema 名称
+        pg_dsn: PostgreSQL 连接字符串
+
+    Returns:
+        差异列表
+    """
+    path = Path(ddl_path)
+    if not path.exists():
+        print(f"✗ DDL 文件不存在: {ddl_path}", file=sys.stderr)
+        return []
+
+    sql_text = path.read_text(encoding="utf-8")
+    ddl_tables = parse_ddl(sql_text, target_schema=schema_name)
+
+    if not ddl_tables:
+        print(f"⚠ DDL 文件中未解析到任何表: {ddl_path}", file=sys.stderr)
+
+    db_tables = fetch_db_schema(pg_dsn, schema_name)
+
+    return compare_tables(ddl_tables, db_tables)
+
+
+# ---------------------------------------------------------------------------
+# 报告输出
+# ---------------------------------------------------------------------------
+
+def print_report(diffs: list[SchemaDiff], schema_name: str, ddl_path: str) -> None:
+    """按表分组输出差异报告到控制台。"""
+    if not diffs:
+        print(f"\n✓ {schema_name} ({ddl_path}): 无差异")
+        return
+
+    print(f"\n{'='*60}")
+    print(f"  差异报告: {schema_name} ← {ddl_path}")
+    print(f"  共 {len(diffs)} 项差异")
+    print(f"{'='*60}")
+
+    # 按表分组
+    by_table: dict[str, list[SchemaDiff]] = {}
+    for d in diffs:
+        by_table.setdefault(d.table, []).append(d)
+
+    for tbl in sorted(by_table.keys()):
+        items = by_table[tbl]
+        print(f"\n  ▸ {tbl}")
+        for d in items:
+            icon = {
+                DiffKind.MISSING_TABLE: "🔴 DDL 缺表",
+                DiffKind.EXTRA_TABLE: "🟡 DDL 多表",
+                DiffKind.MISSING_COLUMN: "🔴 DDL 缺字段",
+                DiffKind.EXTRA_COLUMN: "🟡 DDL 多字段",
+                DiffKind.TYPE_MISMATCH: "🟠 类型不一致",
+                DiffKind.NULLABLE_MISMATCH: "🔵 可空不一致",
+            }.get(d.kind, d.kind.value)
+
+            if d.column:
+                detail = f"    {icon}: {d.column}"
+            else:
+                detail = f"    {icon}"
+
+            if d.ddl_value is not None or d.db_value is not None:
+                detail += f"  (DDL={d.ddl_value}, DB={d.db_value})"
+            print(detail)
+
+    print()
+
+
+# ---------------------------------------------------------------------------
+# CLI 入口
+# ---------------------------------------------------------------------------
+
+# 预定义的 schema → DDL 文件映射
+DEFAULT_SCHEMA_MAP: dict[str, str] = {
+    "billiards_ods": "database/schema_ODS_doc.sql",
+    "billiards_dwd": "database/schema_dwd_doc.sql",
+    "billiards_dws": "database/schema_dws.sql",
+    "etl_admin": "database/schema_etl_admin.sql",
+}
+
+
+def main(argv: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(
+        description="对比 DDL 文件与数据库实际表结构",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+示例:
+  # 对比单个 schema
+  python scripts/compare_ddl_db.py --schema billiards_ods --ddl-path database/schema_ODS_doc.sql
+
+  # 对比所有预定义 schema（从 .env 读取 PG_DSN）
+  python scripts/compare_ddl_db.py --all
+
+  # 指定连接字符串
+  python scripts/compare_ddl_db.py --all --pg-dsn "postgresql://user:pass@host/db"
+""",
+    )
+    parser.add_argument("--pg-dsn", help="PostgreSQL 连接字符串（默认从 PG_DSN 环境变量或 .env 读取）")
+    parser.add_argument("--schema", help="要对比的 schema 名称")
+    parser.add_argument("--ddl-path", help="DDL 文件路径")
+    parser.add_argument("--all", action="store_true", help="对比所有预定义 schema")
+
+    args = parser.parse_args(argv)
+
+    # 加载 .env
+    try:
+        from dotenv import load_dotenv
+        load_dotenv()
+    except ImportError:
+        pass
+
+    pg_dsn = args.pg_dsn or os.environ.get("PG_DSN")
+    if not pg_dsn:
+        print("✗ 未提供 PG_DSN，请通过 --pg-dsn 参数或 PG_DSN 环境变量指定", file=sys.stderr)
+        return 1
+
+    # 确定要对比的 schema 列表
+    pairs: list[tuple[str, str]] = []
+    if args.all:
+        for schema, ddl in DEFAULT_SCHEMA_MAP.items():
+            pairs.append((schema, ddl))
+    elif args.schema and args.ddl_path:
+        pairs.append((args.schema, args.ddl_path))
+    elif args.schema:
+        # 尝试从预定义映射中查找
+        ddl = DEFAULT_SCHEMA_MAP.get(args.schema)
+        if ddl:
+            pairs.append((args.schema, ddl))
+        else:
+            print(f"✗ 未知 schema '{args.schema}'，请通过 --ddl-path 指定 DDL 文件", file=sys.stderr)
+            return 1
+    else:
+        parser.print_help()
+        return 1
+
+    total_diffs = 0
+    for schema_name, ddl_path in pairs:
+        if not Path(ddl_path).exists():
+            print(f"⚠ DDL 文件不存在，跳过: {ddl_path}", file=sys.stderr)
+            continue
+
+        try:
+            diffs = compare_schema(ddl_path, schema_name, pg_dsn)
+        except Exception as e:
+            print(f"✗ 对比 {schema_name} 时出错: {e}", file=sys.stderr)
+            continue
+
+        print_report(diffs, schema_name, ddl_path)
+        total_diffs += len(diffs)
+
+    if total_diffs > 0:
+        print(f"共发现 {total_diffs} 项差异")
+        return 1
+
+    print("所有 schema 对比通过，无差异 ✓")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/apps/etl/pipelines/feiqiu/scripts/compare_ods_vs_summary_v2.py
+++ b/apps/etl/pipelines/feiqiu/scripts/compare_ods_vs_summary_v2.py
@@ -0,0 +1,373 @@
+# -*- coding: utf-8 -*-
+"""
+比对 ODS 数据库实际列 vs docs/api-reference/summary/*.md 文档中的响应字段。
+改进版：
+1. 只提取"响应字段详解"章节的字段（排除请求参数）
+2. 同时用 camelCase 原名和 snake_case 转换名做双向匹配
+3. 对 ODS 连写小写列名（如 siteid）也尝试匹配 camelCase（如 siteId）
+
+用法: python scripts/compare_ods_vs_summary_v2.py
+"""
+import os, re, sys, json
+from pathlib import Path
+from dotenv import load_dotenv
+import psycopg2
+
+load_dotenv()
+
+SUMMARY_DIR = Path("docs/api-reference/summary")
+ODS_SCHEMA = "billiards_ods"
+META_COLS = {"source_file", "source_endpoint", "fetched_at", "payload", "content_hash"}
+
+# CHANGE P20260214-170000: 从全局黑名单移除 start_time/end_time/starttime/endtime
+# intent: 这些字段在部分 API 中是请求参数，但在 assistant_accounts_master、
+#         group_buy_packages、member_stored_value_cards 中是真正的响应业务字段。
+#         全局过滤会导致误报"ODS有/MD无"。
+# assumptions: 请求参数的 startTime/endTime 不会出现在"响应字段详解"章节中
+#              （extract_response_fields 已限定只提取该章节），因此无需在此处过滤。
+# 请求参数（不应出现在 ODS 列比对中）
+# 注意：start_time/end_time 不在此列表中——它们在多张表中是响应业务字段，
+#       而作为请求参数时已被 extract_response_fields 的章节限定逻辑排除。
+REQUEST_PARAMS = {
+    "page", "limit",
+    "rangestarttime", "rangeendtime", "range_start_time", "range_end_time",
+    "startpaytime", "endpaytime", "start_pay_time", "end_pay_time",
+    "siteid_param", "settletype_param", "paymentmethod_param",
+    "isfirst_param", "goodssalestype", "goods_sales_type",
+    "issalesbind", "is_sales_bind", "existsgoodsstock", "exists_goods_stock",
+    "goodssecondcategoryid_param", "goodsstate_param",
+    "querytype", "query_type", "issalemanuser", "is_sale_man_user",
+    "couponusestatus", "coupon_use_status",
+    "total",  # 分页 total 不是业务字段
+}
+
+# CHANGE P20260214-210000: 添加包装器/容器字段忽略列表
+# intent: 某些 API 响应中的顶层字段是数组/对象容器（如 goodsCategoryList），
+#         ODS 穿透存储其子元素而非容器本身，MD 文档中记录了容器字段但 ODS 无对应列
+# assumptions: 这些字段在 ODS 中不建列，其子元素已被展开存储
+WRAPPER_FIELDS = {
+    "goodscategorylist",  # stock_goods_category_tree: 分类树的上级数组节点
+}
+
+DSN = os.getenv("PG_DSN") or os.getenv("DATABASE_URL")
+if not DSN:
+    print("ERROR: 需要设置 PG_DSN 或 DATABASE_URL 环境变量", file=sys.stderr)
+    sys.exit(1)
+
+
+def get_ods_columns(conn):
+    cur = conn.cursor()
+    cur.execute("""
+        SELECT table_name, column_name
+        FROM information_schema.columns
+        WHERE table_schema = %s
+        ORDER BY table_name, ordinal_position
+    """, (ODS_SCHEMA,))
+    result = {}
+    for table_name, col_name in cur.fetchall():
+        result.setdefault(table_name, set()).add(col_name)
+    cur.close()
+    return result
+
+
+def camel_to_snake(name):
+    """camelCase / PascalCase → snake_case"""
+    s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
+    return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()
+
+
+def extract_response_fields(md_path: Path) -> set:
+    """
+    只提取"四、响应字段详解"章节中的字段名。
+    排除请求参数和 siteProfile 子字段。
+    """
+    text = md_path.read_text(encoding="utf-8")
+    fields = set()
+
+    # 找到"响应字段详解"章节的起始位置
+    response_start = None
+    for pattern in [
+        r'##\s*四、响应字段详解',
+        r'##\s*四、.*响应字段',
+        r'##\s*响应字段详解',
+        r'###\s*4\.',
+    ]:
+        m = re.search(pattern, text)
+        if m:
+            response_start = m.start()
+            break
+
+    if response_start is None:
+        # 回退：提取所有表格字段
+        response_text = text
+    else:
+        # 找到下一个同级章节（## 五、或 ## 五 或文件结尾）
+        next_section = re.search(r'\n##\s*(五|六|七|八|九|十|5|6|7|8|9)', text[response_start + 10:])
+        if next_section:
+            response_text = text[response_start:response_start + 10 + next_section.start()]
+        else:
+            response_text = text[response_start:]
+
+    # 从响应字段章节提取表格中的字段名
+    # 匹配 | `fieldName` | 或 | fieldName | 格式
+    table_pattern = re.compile(
+        r'^\|\s*`?([a-zA-Z_][a-zA-Z0-9_]*)`?\s*\|',
+        re.MULTILINE
+    )
+
+    # CHANGE P20260214-200000: 用分隔行检测替代 skip_words 硬编码
+    # intent: skip_words 方式会误杀与表头词同名的业务字段（如 remark、type、note），
+    #         改为利用 Markdown 表格固定结构（表头行 → 分隔行 → 数据行）来跳过表头
+    # assumptions: 所有 summary MD 文档的表格均遵循标准 Markdown 格式，
+    #              分隔行匹配 |---...| 模式，分隔行的前一行即为表头行
+    separator_pattern = re.compile(r'^\|[\s\-:|]+\|', re.MULTILINE)
+
+    lines = response_text.split('\n')
+    # 标记哪些行是表头行（分隔行的前一行）
+    header_lines = set()
+    for i, line in enumerate(lines):
+        if separator_pattern.match(line) and i > 0:
+            header_lines.add(i - 1)
+
+    # 跟踪是否在 siteProfile/tableProfile 子字段展开区域中
+    # CHANGE P20260214-210000: 修复 siteProfile 子节跳过逻辑
+    # intent: 之前的逻辑会跳过整个 siteProfile 子节（包括 siteProfile 字段本身），
+    #         但 siteProfile 作为 object/jsonb 字段应该被提取，只需跳过其展开的子字段
+    # assumptions: siteProfile/tableProfile 子节标题后紧跟的表格中，第一行是 siteProfile 字段本身
+    #              （应保留），后续行是展开的子字段（应跳过）。
+    #              如果子节只有一行（siteProfile 本身），则不跳过任何内容。
+    in_site_profile = False
+    site_profile_field_seen = False
+    for i, line in enumerate(lines):
+        # 检测 siteProfile/tableProfile 子节标题
+        if re.search(r'siteProfile|门店信息快照|tableProfile|台桌信息快照', line, re.IGNORECASE):
+            if '###' in line or '####' in line:
+                in_site_profile = True
+                site_profile_field_seen = False
+                continue
+
+        # 检测离开 siteProfile 子节（遇到下一个同级或更高级标题）
+        if in_site_profile and re.match(r'\s*#{2,4}\s+', line):
+            if not re.search(r'siteProfile|tableProfile|门店信息快照|台桌信息快照', line, re.IGNORECASE):
+                in_site_profile = False
+                site_profile_field_seen = False
+
+        # 在 siteProfile 子节中：保留 siteProfile/tableProfile 字段本身，跳过展开的子字段
+        if in_site_profile:
+            m_check = table_pattern.match(line)
+            if m_check:
+                field_name = m_check.group(1).strip().lower()
+                if field_name in ('siteprofile', 'tableprofile') and not site_profile_field_seen:
+                    # 这是 siteProfile/tableProfile 字段本身，保留（不跳过）
+                    site_profile_field_seen = True
+                    # 不 continue，让下面的提取逻辑处理
+                else:
+                    # 这是展开的子字段，跳过
+                    continue
+            elif i not in header_lines and not separator_pattern.match(line):
+                # 非表格行（空行、标题等），不跳过
+                pass
+
+        # 跳过表头行（分隔行的前一行）和分隔行本身
+        if i in header_lines or separator_pattern.match(line):
+            continue
+
+        m = table_pattern.match(line)
+        if m:
+            field = m.group(1).strip()
+            if not field.startswith('---'):
+                fields.add(field)
+
+    return fields
+
+
+def match_fields(md_fields: set, ods_cols: set):
+    """
+    智能匹配 MD 字段和 ODS 列。
+    返回 (matched, md_only, ods_only)
+    """
+    matched = set()
+    md_remaining = set()
+    ods_remaining = set(ods_cols)
+
+    # 构建 ODS 列的查找索引
+    ods_lower = {c.lower(): c for c in ods_cols}
+    # 也构建去下划线版本 → 原名映射
+    ods_no_underscore = {}
+    for c in ods_cols:
+        key = c.lower().replace("_", "")
+        ods_no_underscore.setdefault(key, c)
+
+    for field in md_fields:
+        field_lower = field.lower()
+        field_snake = camel_to_snake(field).lower()
+        field_no_sep = field_lower.replace("_", "")
+
+        found = False
+
+        # 1. 精确匹配（小写）
+        if field_lower in ods_lower:
+            matched.add((field, ods_lower[field_lower]))
+            ods_remaining.discard(ods_lower[field_lower])
+            found = True
+        # 2. snake_case 匹配
+        elif field_snake in ods_lower:
+            matched.add((field, ods_lower[field_snake]))
+            ods_remaining.discard(ods_lower[field_snake])
+            found = True
+        # 3. 去下划线匹配（处理 camelCase vs 连写小写）
+        elif field_no_sep in ods_no_underscore:
+            matched.add((field, ods_no_underscore[field_no_sep]))
+            ods_remaining.discard(ods_no_underscore[field_no_sep])
+            found = True
+
+        if not found:
+            md_remaining.add(field)
+
+    return matched, md_remaining, ods_remaining
+
+
+def is_request_param(field: str) -> bool:
+    """判断字段是否为请求参数"""
+    f = field.lower().replace("_", "")
+    return f in {p.replace("_", "") for p in REQUEST_PARAMS}
+
+
+def main():
+    conn = psycopg2.connect(DSN)
+    ods_tables = get_ods_columns(conn)
+    conn.close()
+
+    md_files = sorted(SUMMARY_DIR.glob("*.md"))
+    report = []
+
+    for md_path in md_files:
+        table_name = md_path.stem
+        md_fields_raw = extract_response_fields(md_path)
+
+        # 过滤请求参数和包装器字段
+        md_fields = {f for f in md_fields_raw
+                     if not is_request_param(f)
+                     and f.lower() not in WRAPPER_FIELDS}
+
+        if table_name not in ods_tables:
+            report.append({
+                "table": table_name,
+                "status": "NO_ODS_TABLE",
+                "md_fields_count": len(md_fields),
+                "note": "summary 文档存在但 ODS 中无对应表"
+            })
+            continue
+
+        ods_cols = ods_tables[table_name] - META_COLS
+        matched, md_only, ods_only = match_fields(md_fields, ods_cols)
+
+        if md_only or ods_only:
+            report.append({
+                "table": table_name,
+                "status": "DIFF",
+                "ods_count": len(ods_cols),
+                "md_count": len(md_fields),
+                "matched": len(matched),
+                "md_only": sorted(md_only),
+                "ods_only": sorted(ods_only),
+            })
+        else:
+            report.append({
+                "table": table_name,
+                "status": "MATCH",
+                "ods_count": len(ods_cols),
+                "md_count": len(md_fields),
+                "matched": len(matched),
+            })
+
+    # 检查 ODS 中有但 summary 中没有的表
+    md_table_names = {p.stem for p in md_files}
+    for t in sorted(ods_tables.keys()):
+        if t not in md_table_names:
+            report.append({
+                "table": t,
+                "status": "NO_MD_FILE",
+                "ods_count": len(ods_tables[t] - META_COLS),
+                "note": "ODS 表存在但无对应 summary 文档"
+            })
+
+    # 输出
+    print(f"\n{'='*70}")
+    print(f"ODS vs Summary 字段比对报告 (v2 — 仅响应字段，智能匹配)")
+    print(f"ODS 表数: {len(ods_tables)} | Summary 文档数: {len(md_files)}")
+    print(f"{'='*70}\n")
+
+    match_count = sum(1 for r in report if r["status"] == "MATCH")
+    diff_count = sum(1 for r in report if r["status"] == "DIFF")
+    no_ods = sum(1 for r in report if r["status"] == "NO_ODS_TABLE")
+
+    print(f"完全匹配: {match_count} | 有差异: {diff_count} | 无ODS表: {no_ods}\n")
+
+    for entry in report:
+        if entry["status"] == "MATCH":
+            print(f"  ✅ {entry['table']} — 完全匹配 (匹配:{entry['matched']} ODS:{entry['ods_count']} MD:{entry['md_count']})")
+        elif entry["status"] == "DIFF":
+            print(f"\n  ❌ {entry['table']} — 有差异 (匹配:{entry['matched']} ODS:{entry['ods_count']} MD:{entry['md_count']})")
+            if entry["md_only"]:
+                print(f"     📄 MD有/ODS无 ({len(entry['md_only'])}): {', '.join(entry['md_only'])}")
+            if entry["ods_only"]:
+                print(f"     🗄️  ODS有/MD无 ({len(entry['ods_only'])}): {', '.join(entry['ods_only'])}")
+        elif entry["status"] == "NO_ODS_TABLE":
+            print(f"\n  ⚠️  {entry['table']} — {entry['note']} (MD字段数: {entry['md_fields_count']})")
+        elif entry["status"] == "NO_MD_FILE":
+            print(f"\n  ⚠️  {entry['table']} — {entry['note']} (ODS字段数: {entry['ods_count']})")
+
+    # JSON 输出
+    json_path = Path("docs/reports/ods_vs_summary_comparison_v2.json")
+    json_path.parent.mkdir(parents=True, exist_ok=True)
+    with open(json_path, "w", encoding="utf-8") as f:
+        json.dump(report, f, ensure_ascii=False, indent=2)
+    print(f"\n📁 JSON 报告: {json_path}")
+
+
+if __name__ == "__main__":
+    main()
+
+# AI_CHANGELOG:
+# - 日期: 2026-02-14
+#   Prompt: P20260214-150000 — ODS 数据库结构 vs summary MD 文档字段比对
+#   直接原因: 用户要求通过查询 billiards_ods schema 与 25 个 summary MD 文档进行字段比对
+#   变更摘要: 新建 v2 比对脚本，改进点：(1) 仅提取"响应字段详解"章节排除请求参数
+#             (2) 三重匹配（精确/camelCase→snake_case/去下划线）(3) 跳过 siteProfile 子字段
+#   风险与验证: 纯分析脚本，无运行时影响；验证：python scripts/compare_ods_vs_summary_v2.py
+#
+# - 日期: 2026-02-14
+#   Prompt: P20260214-170000 — assistant_accounts_master 的 start_time/end_time 误报修复
+#   直接原因: REQUEST_PARAMS 全局黑名单包含 start_time/end_time，但这些字段在 3 张表中是响应业务字段，
+#             且仅对 MD 侧过滤未对 ODS 侧过滤，导致假差异
+#   变更摘要: 从 REQUEST_PARAMS 移除 start_time/end_time/starttime/endtime 4 个值，
+#             添加 CHANGE 标记注释说明原因
+#   风险与验证: 验证：python scripts/compare_ods_vs_summary_v2.py，确认 assistant_accounts_master、
+#             member_stored_value_cards 变为完全匹配，group_buy_packages 不再误报 start_time/end_time
+#
+# - 日期: 2026-02-14
+#   Prompt: P20260214-190000 — goods_stock_movements 的 remark 字段误报修复
+#   直接原因: skip_words 集合包含 'remark'（本意过滤表头词），但 remark 在 goods_stock_movements、
+#             member_balance_changes、store_goods_master 中是真实业务字段名，导致被误过滤为表头词
+#   变更摘要: 从 skip_words 移除 'remark' 和 'note'，添加 CHANGE 标记注释
+#   风险与验证: 验证：python scripts/compare_ods_vs_summary_v2.py，完全匹配从 12→14，
+#             goods_stock_movements(19/19)、member_balance_changes(28/28) 变为完全匹配
+#
+# - 日期: 2026-02-14
+#   Prompt: P20260214-200000 — group_buy_packages 的 type 字段误报修复
+#   直接原因: skip_words 硬编码方式无法区分表头词和同名业务字段（type/remark/note 等），
+#             根本原因是过滤策略错误——应该用 Markdown 表格结构（分隔行检测）来跳过表头行
+#   变更摘要: 用分隔行检测（separator_pattern + header_lines）替代 skip_words 硬编码，
+#             彻底消除"表头词 vs 业务字段同名"的误过滤问题
+#   风险与验证: 验证：python scripts/compare_ods_vs_summary_v2.py，
+#             group_buy_packages 的 type 正确匹配（匹配 39，ODS有/MD无 不再包含 type）
+#
+# - 日期: 2026-02-14
+#   Prompt: P20260214-210000 — siteProfile 误跳过 + goodsCategoryList 包装器字段忽略
+#   直接原因: (1) siteProfile 子节跳过逻辑会跳过 siteProfile 字段本身，但它在 table_fee_transactions、
+#             platform_coupon_redemption_records 等表中是 object/jsonb 字段应被提取
+#             (2) goodsCategoryList 是 stock_goods_category_tree 的上级数组容器节点，ODS 穿透存储子元素
+#   变更摘要: (1) 重写 siteProfile 子节跳过逻辑，保留 siteProfile/tableProfile 字段本身，只跳过展开的子字段
+#             (2) 新增 WRAPPER_FIELDS 忽略列表，过滤 goodsCategoryList
+#   风险与验证: 验证：python scripts/compare_ods_vs_summary_v2.py，完全匹配从 14→17
--- a/apps/etl/pipelines/feiqiu/scripts/db_admin/import_dws_excel.py
+++ b/apps/etl/pipelines/feiqiu/scripts/db_admin/import_dws_excel.py
@@ -0,0 +1,605 @@
+# -*- coding: utf-8 -*-
+"""
+DWS Excel导入脚本
+
+功能说明：
+    支持三类Excel数据的导入：
+    1. 支出结构（dws_finance_expense_summary）
+    2. 平台结算（dws_platform_settlement）
+    3. 充值提成（dws_assistant_recharge_commission）
+
+导入规范：
+    - 字段定义：按照目标表字段要求
+    - 时间粒度：支出按月，平台结算按日，充值提成按月
+    - 门店维度：使用配置的site_id
+    - 去重规则：按import_batch_no去重
+    - 校验规则：金额字段非负，日期格式校验
+
+使用方式：
+    python import_dws_excel.py --type expense --file expenses.xlsx
+    python import_dws_excel.py --type platform --file platform_settlement.xlsx
+    python import_dws_excel.py --type commission --file recharge_commission.xlsx
+
+作者：ETL团队
+创建日期：2026-02-01
+"""
+
+import argparse
+import os
+import sys
+import uuid
+from datetime import date, datetime
+from decimal import Decimal, InvalidOperation
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+
+# 添加项目根目录到Python路径
+project_root = Path(__file__).parent.parent.parent
+sys.path.insert(0, str(project_root))
+
+try:
+    import pandas as pd
+except ImportError:
+    print("请安装 pandas: pip install pandas openpyxl")
+    sys.exit(1)
+
+from config.settings import AppConfig
+from database.connection import DatabaseConnection
+from database.operations import DatabaseOperations
+
+
+# =============================================================================
+# 常量定义
+# =============================================================================
+
+# 支出类型枚举
+EXPENSE_TYPES = {
+    '房租': 'RENT',
+    '水电费': 'UTILITY',
+    '物业费': 'PROPERTY',
+    '工资': 'SALARY',
+    '报销': 'REIMBURSE',
+    '平台服务费': 'PLATFORM_FEE',
+    '其他': 'OTHER',
+}
+
+# 支出大类映射
+EXPENSE_CATEGORIES = {
+    'RENT': 'FIXED_COST',
+    'UTILITY': 'VARIABLE_COST',
+    'PROPERTY': 'FIXED_COST',
+    'SALARY': 'FIXED_COST',
+    'REIMBURSE': 'VARIABLE_COST',
+    'PLATFORM_FEE': 'VARIABLE_COST',
+    'OTHER': 'OTHER',
+}
+
+# 平台类型枚举
+PLATFORM_TYPES = {
+    '美团': 'MEITUAN',
+    '抖音': 'DOUYIN',
+    '大众点评': 'DIANPING',
+    '其他': 'OTHER',
+}
+
+
+# =============================================================================
+# 导入基类
+# =============================================================================
+
+class BaseImporter:
+    """导入基类"""
+    
+    def __init__(self, config: Config, db: DatabaseConnection):
+        self.config = config
+        self.db = db
+        self.site_id = config.get("app.store_id")
+        self.tenant_id = config.get("app.tenant_id", self.site_id)
+        self.batch_no = self._generate_batch_no()
+    
+    def _generate_batch_no(self) -> str:
+        """生成导入批次号"""
+        timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
+        unique_id = str(uuid.uuid4())[:8]
+        return f"{timestamp}_{unique_id}"
+    
+    def _safe_decimal(self, value: Any, default: Decimal = Decimal('0')) -> Decimal:
+        """安全转换为Decimal"""
+        if value is None or pd.isna(value):
+            return default
+        try:
+            return Decimal(str(value))
+        except (ValueError, InvalidOperation):
+            return default
+    
+    def _safe_date(self, value: Any) -> Optional[date]:
+        """安全转换为日期"""
+        if value is None or pd.isna(value):
+            return None
+        if isinstance(value, datetime):
+            return value.date()
+        if isinstance(value, date):
+            return value
+        try:
+            return pd.to_datetime(value).date()
+        except:
+            return None
+    
+    def _safe_month(self, value: Any) -> Optional[date]:
+        """安全转换为月份（月第一天）"""
+        dt = self._safe_date(value)
+        if dt:
+            return dt.replace(day=1)
+        return None
+    
+    def import_file(self, file_path: str) -> Dict[str, Any]:
+        """导入文件"""
+        raise NotImplementedError
+    
+    def validate_row(self, row: Dict[str, Any], row_idx: int) -> List[str]:
+        """校验行数据，返回错误列表"""
+        return []
+    
+    def transform_row(self, row: Dict[str, Any]) -> Dict[str, Any]:
+        """转换行数据"""
+        raise NotImplementedError
+    
+    def insert_records(self, records: List[Dict[str, Any]]) -> int:
+        """插入记录"""
+        raise NotImplementedError
+
+
+# =============================================================================
+# 支出导入
+# =============================================================================
+
+class ExpenseImporter(BaseImporter):
+    """
+    支出导入
+    
+    Excel格式要求：
+    - 月份: 2026-01 或 2026/01/01 格式
+    - 支出类型: 房租/水电费/物业费/工资/报销/平台服务费/其他
+    - 金额: 数字
+    - 备注: 可选
+    """
+    
+    TARGET_TABLE = "billiards_dws.dws_finance_expense_summary"
+    
+    REQUIRED_COLUMNS = ['月份', '支出类型', '金额']
+    OPTIONAL_COLUMNS = ['明细', '备注']
+    
+    def import_file(self, file_path: str) -> Dict[str, Any]:
+        """导入支出Excel"""
+        print(f"开始导入支出文件: {file_path}")
+        
+        # 读取Excel
+        df = pd.read_excel(file_path)
+        
+        # 校验必要列
+        missing_cols = [c for c in self.REQUIRED_COLUMNS if c not in df.columns]
+        if missing_cols:
+            return {"status": "ERROR", "message": f"缺少必要列: {missing_cols}"}
+        
+        # 处理数据
+        records = []
+        errors = []
+        
+        for idx, row in df.iterrows():
+            row_dict = row.to_dict()
+            row_errors = self.validate_row(row_dict, idx + 2)  # Excel行号从2开始
+            
+            if row_errors:
+                errors.extend(row_errors)
+                continue
+            
+            record = self.transform_row(row_dict)
+            records.append(record)
+        
+        if errors:
+            print(f"校验错误: {len(errors)} 条")
+            for err in errors[:10]:
+                print(f"  - {err}")
+        
+        # 插入数据
+        inserted = 0
+        if records:
+            inserted = self.insert_records(records)
+        
+        return {
+            "status": "SUCCESS" if not errors else "PARTIAL",
+            "batch_no": self.batch_no,
+            "total_rows": len(df),
+            "inserted": inserted,
+            "errors": len(errors),
+            "error_messages": errors[:10]
+        }
+    
+    def validate_row(self, row: Dict[str, Any], row_idx: int) -> List[str]:
+        errors = []
+        
+        # 校验月份
+        month = self._safe_month(row.get('月份'))
+        if not month:
+            errors.append(f"行{row_idx}: 月份格式错误")
+        
+        # 校验支出类型
+        expense_type = row.get('支出类型', '').strip()
+        if expense_type not in EXPENSE_TYPES:
+            errors.append(f"行{row_idx}: 支出类型无效 '{expense_type}'")
+        
+        # 校验金额
+        amount = self._safe_decimal(row.get('金额'))
+        if amount < 0:
+            errors.append(f"行{row_idx}: 金额不能为负数")
+        
+        return errors
+    
+    def transform_row(self, row: Dict[str, Any]) -> Dict[str, Any]:
+        expense_type_name = row.get('支出类型', '').strip()
+        expense_type_code = EXPENSE_TYPES.get(expense_type_name, 'OTHER')
+        expense_category = EXPENSE_CATEGORIES.get(expense_type_code, 'OTHER')
+        
+        return {
+            'site_id': self.site_id,
+            'tenant_id': self.tenant_id,
+            'expense_month': self._safe_month(row.get('月份')),
+            'expense_type_code': expense_type_code,
+            'expense_type_name': expense_type_name,
+            'expense_category': expense_category,
+            'expense_amount': self._safe_decimal(row.get('金额')),
+            'expense_detail': row.get('明细'),
+            'import_batch_no': self.batch_no,
+            'import_file_name': os.path.basename(str(row.get('_file_path', ''))),
+            'import_time': datetime.now(),
+            'import_user': os.getenv('USERNAME', 'system'),
+            'remark': row.get('备注'),
+        }
+    
+    def insert_records(self, records: List[Dict[str, Any]]) -> int:
+        columns = [
+            'site_id', 'tenant_id', 'expense_month', 'expense_type_code',
+            'expense_type_name', 'expense_category', 'expense_amount',
+            'expense_detail', 'import_batch_no', 'import_file_name',
+            'import_time', 'import_user', 'remark'
+        ]
+        
+        cols_str = ", ".join(columns)
+        placeholders = ", ".join(["%s"] * len(columns))
+        sql = f"INSERT INTO {self.TARGET_TABLE} ({cols_str}) VALUES ({placeholders})"
+        
+        inserted = 0
+        with self.db.conn.cursor() as cur:
+            for record in records:
+                values = [record.get(col) for col in columns]
+                cur.execute(sql, values)
+                inserted += cur.rowcount
+        
+        self.db.commit()
+        return inserted
+
+
+# =============================================================================
+# 平台结算导入
+# =============================================================================
+
+class PlatformSettlementImporter(BaseImporter):
+    """
+    平台结算导入
+    
+    Excel格式要求：
+    - 回款日期: 日期格式
+    - 平台类型: 美团/抖音/大众点评/其他
+    - 平台订单号: 字符串
+    - 订单原始金额: 数字
+    - 佣金: 数字
+    - 服务费: 数字
+    - 回款金额: 数字
+    - 备注: 可选
+    """
+    
+    TARGET_TABLE = "billiards_dws.dws_platform_settlement"
+    
+    REQUIRED_COLUMNS = ['回款日期', '平台类型', '回款金额']
+    OPTIONAL_COLUMNS = ['平台订单号', '订单原始金额', '佣金', '服务费', '关联订单ID', '备注']
+    
+    def import_file(self, file_path: str) -> Dict[str, Any]:
+        print(f"开始导入平台结算文件: {file_path}")
+        
+        df = pd.read_excel(file_path)
+        
+        missing_cols = [c for c in self.REQUIRED_COLUMNS if c not in df.columns]
+        if missing_cols:
+            return {"status": "ERROR", "message": f"缺少必要列: {missing_cols}"}
+        
+        records = []
+        errors = []
+        
+        for idx, row in df.iterrows():
+            row_dict = row.to_dict()
+            row_errors = self.validate_row(row_dict, idx + 2)
+            
+            if row_errors:
+                errors.extend(row_errors)
+                continue
+            
+            record = self.transform_row(row_dict)
+            records.append(record)
+        
+        if errors:
+            print(f"校验错误: {len(errors)} 条")
+            for err in errors[:10]:
+                print(f"  - {err}")
+        
+        inserted = 0
+        if records:
+            inserted = self.insert_records(records)
+        
+        return {
+            "status": "SUCCESS" if not errors else "PARTIAL",
+            "batch_no": self.batch_no,
+            "total_rows": len(df),
+            "inserted": inserted,
+            "errors": len(errors),
+        }
+    
+    def validate_row(self, row: Dict[str, Any], row_idx: int) -> List[str]:
+        errors = []
+        
+        settlement_date = self._safe_date(row.get('回款日期'))
+        if not settlement_date:
+            errors.append(f"行{row_idx}: 回款日期格式错误")
+        
+        platform_type = row.get('平台类型', '').strip()
+        if platform_type not in PLATFORM_TYPES:
+            errors.append(f"行{row_idx}: 平台类型无效 '{platform_type}'")
+        
+        amount = self._safe_decimal(row.get('回款金额'))
+        if amount < 0:
+            errors.append(f"行{row_idx}: 回款金额不能为负数")
+        
+        return errors
+    
+    def transform_row(self, row: Dict[str, Any]) -> Dict[str, Any]:
+        platform_name = row.get('平台类型', '').strip()
+        platform_type = PLATFORM_TYPES.get(platform_name, 'OTHER')
+        
+        return {
+            'site_id': self.site_id,
+            'tenant_id': self.tenant_id,
+            'settlement_date': self._safe_date(row.get('回款日期')),
+            'platform_type': platform_type,
+            'platform_name': platform_name,
+            'platform_order_no': row.get('平台订单号'),
+            'order_settle_id': row.get('关联订单ID'),
+            'settlement_amount': self._safe_decimal(row.get('回款金额')),
+            'commission_amount': self._safe_decimal(row.get('佣金')),
+            'service_fee': self._safe_decimal(row.get('服务费')),
+            'gross_amount': self._safe_decimal(row.get('订单原始金额')),
+            'import_batch_no': self.batch_no,
+            'import_file_name': os.path.basename(str(row.get('_file_path', ''))),
+            'import_time': datetime.now(),
+            'import_user': os.getenv('USERNAME', 'system'),
+            'remark': row.get('备注'),
+        }
+    
+    def insert_records(self, records: List[Dict[str, Any]]) -> int:
+        columns = [
+            'site_id', 'tenant_id', 'settlement_date', 'platform_type',
+            'platform_name', 'platform_order_no', 'order_settle_id',
+            'settlement_amount', 'commission_amount', 'service_fee',
+            'gross_amount', 'import_batch_no', 'import_file_name',
+            'import_time', 'import_user', 'remark'
+        ]
+        
+        cols_str = ", ".join(columns)
+        placeholders = ", ".join(["%s"] * len(columns))
+        sql = f"INSERT INTO {self.TARGET_TABLE} ({cols_str}) VALUES ({placeholders})"
+        
+        inserted = 0
+        with self.db.conn.cursor() as cur:
+            for record in records:
+                values = [record.get(col) for col in columns]
+                cur.execute(sql, values)
+                inserted += cur.rowcount
+        
+        self.db.commit()
+        return inserted
+
+
+# =============================================================================
+# 充值提成导入
+# =============================================================================
+
+class RechargeCommissionImporter(BaseImporter):
+    """
+    充值提成导入
+    
+    Excel格式要求：
+    - 月份: 2026-01 格式
+    - 助教ID: 数字
+    - 助教花名: 字符串
+    - 充值订单金额: 数字
+    - 提成金额: 数字
+    - 充值订单号: 可选
+    - 备注: 可选
+    """
+    
+    TARGET_TABLE = "billiards_dws.dws_assistant_recharge_commission"
+    
+    REQUIRED_COLUMNS = ['月份', '助教ID', '提成金额']
+    OPTIONAL_COLUMNS = ['助教花名', '充值订单金额', '充值订单ID', '充值订单号', '备注']
+    
+    def import_file(self, file_path: str) -> Dict[str, Any]:
+        print(f"开始导入充值提成文件: {file_path}")
+        
+        df = pd.read_excel(file_path)
+        
+        missing_cols = [c for c in self.REQUIRED_COLUMNS if c not in df.columns]
+        if missing_cols:
+            return {"status": "ERROR", "message": f"缺少必要列: {missing_cols}"}
+        
+        records = []
+        errors = []
+        
+        for idx, row in df.iterrows():
+            row_dict = row.to_dict()
+            row_errors = self.validate_row(row_dict, idx + 2)
+            
+            if row_errors:
+                errors.extend(row_errors)
+                continue
+            
+            record = self.transform_row(row_dict)
+            records.append(record)
+        
+        if errors:
+            print(f"校验错误: {len(errors)} 条")
+            for err in errors[:10]:
+                print(f"  - {err}")
+        
+        inserted = 0
+        if records:
+            inserted = self.insert_records(records)
+        
+        return {
+            "status": "SUCCESS" if not errors else "PARTIAL",
+            "batch_no": self.batch_no,
+            "total_rows": len(df),
+            "inserted": inserted,
+            "errors": len(errors),
+        }
+    
+    def validate_row(self, row: Dict[str, Any], row_idx: int) -> List[str]:
+        errors = []
+        
+        month = self._safe_month(row.get('月份'))
+        if not month:
+            errors.append(f"行{row_idx}: 月份格式错误")
+        
+        assistant_id = row.get('助教ID')
+        if assistant_id is None or pd.isna(assistant_id):
+            errors.append(f"行{row_idx}: 助教ID不能为空")
+        
+        amount = self._safe_decimal(row.get('提成金额'))
+        if amount < 0:
+            errors.append(f"行{row_idx}: 提成金额不能为负数")
+        
+        return errors
+    
+    def transform_row(self, row: Dict[str, Any]) -> Dict[str, Any]:
+        recharge_amount = self._safe_decimal(row.get('充值订单金额'))
+        commission_amount = self._safe_decimal(row.get('提成金额'))
+        commission_ratio = commission_amount / recharge_amount if recharge_amount > 0 else None
+        
+        return {
+            'site_id': self.site_id,
+            'tenant_id': self.tenant_id,
+            'assistant_id': int(row.get('助教ID')),
+            'assistant_nickname': row.get('助教花名'),
+            'commission_month': self._safe_month(row.get('月份')),
+            'recharge_order_id': row.get('充值订单ID'),
+            'recharge_order_no': row.get('充值订单号'),
+            'recharge_amount': recharge_amount,
+            'commission_amount': commission_amount,
+            'commission_ratio': commission_ratio,
+            'import_batch_no': self.batch_no,
+            'import_file_name': os.path.basename(str(row.get('_file_path', ''))),
+            'import_time': datetime.now(),
+            'import_user': os.getenv('USERNAME', 'system'),
+            'remark': row.get('备注'),
+        }
+    
+    def insert_records(self, records: List[Dict[str, Any]]) -> int:
+        columns = [
+            'site_id', 'tenant_id', 'assistant_id', 'assistant_nickname',
+            'commission_month', 'recharge_order_id', 'recharge_order_no',
+            'recharge_amount', 'commission_amount', 'commission_ratio',
+            'import_batch_no', 'import_file_name', 'import_time',
+            'import_user', 'remark'
+        ]
+        
+        cols_str = ", ".join(columns)
+        placeholders = ", ".join(["%s"] * len(columns))
+        sql = f"INSERT INTO {self.TARGET_TABLE} ({cols_str}) VALUES ({placeholders})"
+        
+        inserted = 0
+        with self.db.conn.cursor() as cur:
+            for record in records:
+                values = [record.get(col) for col in columns]
+                cur.execute(sql, values)
+                inserted += cur.rowcount
+        
+        self.db.commit()
+        return inserted
+
+
+# =============================================================================
+# 主函数
+# =============================================================================
+
+def main():
+    parser = argparse.ArgumentParser(description='DWS Excel导入工具')
+    parser.add_argument(
+        '--type', '-t',
+        choices=['expense', 'platform', 'commission'],
+        required=True,
+        help='导入类型: expense(支出), platform(平台结算), commission(充值提成)'
+    )
+    parser.add_argument(
+        '--file', '-f',
+        required=True,
+        help='Excel文件路径'
+    )
+    
+    args = parser.parse_args()
+    
+    # 检查文件
+    if not os.path.exists(args.file):
+        print(f"文件不存在: {args.file}")
+        sys.exit(1)
+    
+    # 加载配置
+    config = AppConfig.load()
+    dsn = config["db"]["dsn"]
+    db_conn = DatabaseConnection(dsn=dsn)
+    db = DatabaseOperations(db_conn)
+    
+    try:
+        # 选择导入器
+        if args.type == 'expense':
+            importer = ExpenseImporter(config, db)
+        elif args.type == 'platform':
+            importer = PlatformSettlementImporter(config, db)
+        elif args.type == 'commission':
+            importer = RechargeCommissionImporter(config, db)
+        else:
+            print(f"未知的导入类型: {args.type}")
+            sys.exit(1)
+        
+        # 执行导入
+        result = importer.import_file(args.file)
+        
+        # 输出结果
+        print("\n" + "=" * 50)
+        print("导入结果:")
+        print(f"  状态: {result.get('status')}")
+        print(f"  批次号: {result.get('batch_no')}")
+        print(f"  总行数: {result.get('total_rows')}")
+        print(f"  插入行数: {result.get('inserted')}")
+        print(f"  错误行数: {result.get('errors')}")
+        
+        if result.get('status') == 'ERROR':
+            print(f"  错误信息: {result.get('message')}")
+            sys.exit(1)
+        
+    except Exception as e:
+        print(f"导入失败: {e}")
+        db_conn.rollback()
+        sys.exit(1)
+    finally:
+        db_conn.close()
+
+
+if __name__ == "__main__":
+    main()
--- a/apps/etl/pipelines/feiqiu/scripts/export/export_cfg_index_parameters.py
+++ b/apps/etl/pipelines/feiqiu/scripts/export/export_cfg_index_parameters.py
@@ -0,0 +1,94 @@
+# -*- coding: utf-8 -*-
+"""Export cfg_index_parameters table to CSV."""
+
+from __future__ import annotations
+
+import argparse
+import csv
+import os
+import sys
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+if ROOT not in sys.path:
+    sys.path.insert(0, ROOT)
+
+from config.settings import AppConfig
+from database.connection import DatabaseConnection
+from database.operations import DatabaseOperations
+
+FIELDS = [
+    "param_id",
+    "index_type",
+    "param_name",
+    "param_value",
+    "description",
+    "effective_from",
+    "effective_to",
+    "created_at",
+    "updated_at",
+]
+
+
+def _fetch_rows(db: DatabaseOperations, index_type: Optional[str]) -> List[Dict[str, Any]]:
+    base_sql = """
+    SELECT
+        param_id,
+        index_type,
+        param_name,
+        param_value,
+        description,
+        effective_from,
+        effective_to,
+        created_at,
+        updated_at
+    FROM billiards_dws.cfg_index_parameters
+    """
+    args: List[Any] = []
+    if index_type:
+        base_sql += " WHERE index_type = %s"
+        args.append(index_type)
+    base_sql += " ORDER BY index_type, param_name, effective_from, param_id"
+    rows = db.query(base_sql, args if args else None)
+    return [dict(r) for r in (rows or [])]
+
+
+def _write_csv(rows: List[Dict[str, Any]], out_csv: Path) -> None:
+    out_csv.parent.mkdir(parents=True, exist_ok=True)
+    with out_csv.open("w", newline="", encoding="utf-8-sig") as f:
+        writer = csv.DictWriter(f, fieldnames=FIELDS)
+        writer.writeheader()
+        for row in rows:
+            writer.writerow({k: row.get(k) for k in FIELDS})
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Export cfg_index_parameters to CSV.")
+    parser.add_argument(
+        "--index-type",
+        default=None,
+        help="Optional index type filter (e.g. RECALL, INTIMACY, NCI, WBI).",
+    )
+    parser.add_argument(
+        "--output-csv",
+        default=os.path.join(ROOT, "docs", "cfg_index_parameters.csv"),
+        help="Output CSV path.",
+    )
+    args = parser.parse_args()
+
+    config = AppConfig.load()
+    db_conn = DatabaseConnection(config.config["db"]["dsn"])
+    db = DatabaseOperations(db_conn)
+    try:
+        rows = _fetch_rows(db, args.index_type)
+        out_csv = Path(args.output_csv)
+        _write_csv(rows, out_csv)
+        print(f"rows={len(rows)}")
+        print(f"csv={out_csv}")
+    finally:
+        db_conn.close()
+
+
+if __name__ == "__main__":
+    main()
--- a/apps/etl/pipelines/feiqiu/scripts/export/export_groupbuy_orders_with_assistant_service.py
+++ b/apps/etl/pipelines/feiqiu/scripts/export/export_groupbuy_orders_with_assistant_service.py
@@ -0,0 +1,423 @@
+# -*- coding: utf-8 -*-
+"""Export groupbuy orders that used assistant services."""
+
+from __future__ import annotations
+
+import argparse
+import csv
+import os
+import sys
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Sequence, Tuple
+
+ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+if ROOT not in sys.path:
+    sys.path.insert(0, ROOT)
+
+from config.settings import AppConfig
+from database.connection import DatabaseConnection
+from database.operations import DatabaseOperations
+
+
+def _as_int(v: Any) -> Optional[int]:
+    if v is None or str(v).strip() == "":
+        return None
+    return int(v)
+
+
+def _resolve_site_id(config: AppConfig, db: DatabaseOperations, cli_site_id: Optional[int]) -> int:
+    if cli_site_id is not None:
+        return int(cli_site_id)
+
+    from_cfg = _as_int(config.get("app.store_id"))
+    if from_cfg is not None:
+        return from_cfg
+
+    rows = db.query(
+        """
+        SELECT site_id
+        FROM billiards_dwd.dwd_settlement_head
+        WHERE site_id IS NOT NULL
+        GROUP BY site_id
+        ORDER BY COUNT(*) DESC
+        LIMIT 1
+        """
+    )
+    if rows:
+        return int(dict(rows[0])["site_id"])
+
+    raise RuntimeError("Unable to resolve site_id; pass --site-id explicitly.")
+
+
+FIELD_ORDER: List[str] = [
+    "site_id",
+    "order_settle_id",
+    "order_trade_no",
+    "pay_time",
+    "settle_type",
+    "member_id",
+    "member_name",
+    "member_phone",
+    "table_id",
+    "table_name",
+    "table_area_name",
+    "settle_consume_money",
+    "settle_pay_amount",
+    "settle_coupon_amount",
+    "pl_coupon_sale_amount",
+    "groupbuy_item_count",
+    "groupbuy_pay_amount",
+    "groupbuy_ledger_amount",
+    "groupbuy_coupon_money",
+    "coupon_codes",
+    "groupbuy_items",
+    "assistant_service_count",
+    "assistant_count",
+    "assistant_nicknames",
+    "assistant_skills",
+    "assistant_real_use_seconds",
+    "assistant_projected_income",
+    "assistant_real_service_money",
+]
+
+ZH_HEADER_MAP: Dict[str, str] = {
+    "site_id": "门店ID",
+    "order_settle_id": "结账单ID",
+    "order_trade_no": "订单交易号",
+    "pay_time": "结账时间",
+    "settle_type": "结账类型",
+    "member_id": "会员ID",
+    "member_name": "会员姓名",
+    "member_phone": "会员手机号",
+    "table_id": "台桌ID",
+    "table_name": "台桌名称",
+    "table_area_name": "台区名称",
+    "settle_consume_money": "结算消费金额",
+    "settle_pay_amount": "结算实付金额",
+    "settle_coupon_amount": "结算团购抵扣金额",
+    "pl_coupon_sale_amount": "平台团购实付金额",
+    "groupbuy_item_count": "团购核销条目数",
+    "groupbuy_pay_amount": "团购实付合计",
+    "groupbuy_ledger_amount": "团购标价合计",
+    "groupbuy_coupon_money": "团购券面额合计",
+    "coupon_codes": "团购券码列表",
+    "groupbuy_items": "团购项目列表",
+    "assistant_service_count": "助教服务条目数",
+    "assistant_count": "助教人数",
+    "assistant_nicknames": "助教昵称列表",
+    "assistant_skills": "助教技能列表",
+    "assistant_real_use_seconds": "助教实际服务秒数",
+    "assistant_projected_income": "助教预计收入合计",
+    "assistant_real_service_money": "助教实收服务费合计",
+}
+
+
+def _fetch_rows_current(
+    db: DatabaseOperations,
+    site_id: int,
+    start_date: Optional[str],
+    end_date: Optional[str],
+) -> List[Dict[str, Any]]:
+    sql = """
+    WITH gb AS (
+        SELECT
+            site_id,
+            order_settle_id,
+            COUNT(*) AS groupbuy_item_count,
+            ROUND(SUM(COALESCE(ledger_unit_price, 0))::numeric, 2) AS groupbuy_pay_amount,
+            ROUND(SUM(COALESCE(ledger_amount, 0))::numeric, 2) AS groupbuy_ledger_amount,
+            ROUND(SUM(COALESCE(coupon_money, 0))::numeric, 2) AS groupbuy_coupon_money,
+            STRING_AGG(DISTINCT NULLIF(coupon_code, ''), '?' ORDER BY NULLIF(coupon_code, '')) AS coupon_codes,
+            STRING_AGG(DISTINCT NULLIF(ledger_name, ''), '?' ORDER BY NULLIF(ledger_name, '')) AS groupbuy_items
+        FROM billiards_dwd.dwd_groupbuy_redemption
+        WHERE site_id = %s
+          AND is_delete = 0
+        GROUP BY site_id, order_settle_id
+    ),
+    asv AS (
+        SELECT
+            site_id,
+            order_settle_id,
+            COUNT(*) AS assistant_service_count,
+            COUNT(DISTINCT NULLIF(assistant_no, '')) AS assistant_count,
+            STRING_AGG(DISTINCT NULLIF(nickname, ''), '?' ORDER BY NULLIF(nickname, '')) AS assistant_nicknames,
+            STRING_AGG(DISTINCT NULLIF(skill_name, ''), '?' ORDER BY NULLIF(skill_name, '')) AS assistant_skills,
+            ROUND(SUM(COALESCE(real_use_seconds, 0))::numeric, 0) AS assistant_real_use_seconds,
+            ROUND(SUM(COALESCE(projected_income, 0))::numeric, 2) AS assistant_projected_income,
+            ROUND(SUM(COALESCE(real_service_money, 0))::numeric, 2) AS assistant_real_service_money
+        FROM billiards_dwd.dwd_assistant_service_log
+        WHERE site_id = %s
+          AND is_delete = 0
+        GROUP BY site_id, order_settle_id
+    )
+    SELECT
+        sh.site_id,
+        sh.order_settle_id,
+        sh.order_trade_no,
+        sh.pay_time,
+        sh.settle_type,
+        sh.member_id,
+        COALESCE(dm.nickname, sh.member_name) AS member_name,
+        COALESCE(dm.mobile, sh.member_phone) AS member_phone,
+        sh.table_id,
+        dt.table_name,
+        dt.site_table_area_name AS table_area_name,
+        ROUND(COALESCE(sh.consume_money, 0)::numeric, 2) AS settle_consume_money,
+        ROUND(COALESCE(sh.pay_amount, 0)::numeric, 2) AS settle_pay_amount,
+        ROUND(COALESCE(sh.coupon_amount, 0)::numeric, 2) AS settle_coupon_amount,
+        ROUND(COALESCE(sh.pl_coupon_sale_amount, 0)::numeric, 2) AS pl_coupon_sale_amount,
+        gb.groupbuy_item_count,
+        gb.groupbuy_pay_amount,
+        gb.groupbuy_ledger_amount,
+        gb.groupbuy_coupon_money,
+        gb.coupon_codes,
+        gb.groupbuy_items,
+        asv.assistant_service_count,
+        asv.assistant_count,
+        asv.assistant_nicknames,
+        asv.assistant_skills,
+        asv.assistant_real_use_seconds,
+        asv.assistant_projected_income,
+        asv.assistant_real_service_money
+    FROM gb
+    JOIN asv
+      ON asv.site_id = gb.site_id
+     AND asv.order_settle_id = gb.order_settle_id
+    LEFT JOIN billiards_dwd.dwd_settlement_head sh
+      ON sh.site_id = gb.site_id
+     AND sh.order_settle_id = gb.order_settle_id
+    LEFT JOIN billiards_dwd.dim_member dm
+      ON dm.register_site_id = sh.site_id
+     AND dm.member_id = sh.member_id
+     AND dm.scd2_is_current = 1
+    LEFT JOIN billiards_dwd.dim_table dt
+      ON dt.site_id = sh.site_id
+     AND dt.table_id = sh.table_id
+     AND dt.scd2_is_current = 1
+    WHERE (%s::date IS NULL OR sh.pay_time::date >= %s::date)
+      AND (%s::date IS NULL OR sh.pay_time::date <= %s::date)
+    ORDER BY sh.pay_time DESC, sh.order_settle_id DESC
+    """
+    rows = db.query(
+        sql,
+        (
+            site_id,
+            site_id,
+            start_date,
+            start_date,
+            end_date,
+            end_date,
+        ),
+    )
+    return [dict(r) for r in (rows or [])]
+
+
+def _fetch_rows_optimized(
+    db: DatabaseOperations,
+    site_id: int,
+    start_date: Optional[str],
+    end_date: Optional[str],
+) -> List[Dict[str, Any]]:
+    """
+    Optimized export strategy:
+    - Deduplicate groupbuy rows by (order_settle_id, coupon_key) to handle retry noise.
+    - Deduplicate assistant rows by assistant_service_id.
+    - Keep output schema identical to current export for direct comparison.
+    """
+    sql = """
+    WITH gb_raw AS (
+        SELECT
+            redemption_id,
+            site_id,
+            order_settle_id,
+            order_coupon_id,
+            coupon_code,
+            ledger_name,
+            COALESCE(ledger_unit_price, 0) AS ledger_unit_price,
+            COALESCE(ledger_amount, 0) AS ledger_amount,
+            COALESCE(coupon_money, 0) AS coupon_money,
+            create_time,
+            COALESCE(NULLIF(coupon_code, ''), CAST(order_coupon_id AS varchar), CAST(redemption_id AS varchar)) AS coupon_key,
+            ROW_NUMBER() OVER (
+                PARTITION BY site_id, order_settle_id,
+                             COALESCE(NULLIF(coupon_code, ''), CAST(order_coupon_id AS varchar), CAST(redemption_id AS varchar))
+                ORDER BY create_time DESC NULLS LAST, redemption_id DESC
+            ) AS rn
+        FROM billiards_dwd.dwd_groupbuy_redemption
+        WHERE site_id = %s
+          AND is_delete = 0
+    ),
+    gb AS (
+        SELECT
+            site_id,
+            order_settle_id,
+            COUNT(*) AS groupbuy_item_count,
+            ROUND(SUM(ledger_unit_price)::numeric, 2) AS groupbuy_pay_amount,
+            ROUND(SUM(ledger_amount)::numeric, 2) AS groupbuy_ledger_amount,
+            ROUND(SUM(coupon_money)::numeric, 2) AS groupbuy_coupon_money,
+            STRING_AGG(DISTINCT NULLIF(coupon_code, ''), '?' ORDER BY NULLIF(coupon_code, '')) AS coupon_codes,
+            STRING_AGG(DISTINCT NULLIF(ledger_name, ''), '?' ORDER BY NULLIF(ledger_name, '')) AS groupbuy_items
+        FROM gb_raw
+        WHERE rn = 1
+        GROUP BY site_id, order_settle_id
+    ),
+    asv_raw AS (
+        SELECT DISTINCT ON (assistant_service_id)
+            assistant_service_id,
+            site_id,
+            order_settle_id,
+            assistant_no,
+            nickname,
+            skill_name,
+            COALESCE(real_use_seconds, 0) AS real_use_seconds,
+            COALESCE(projected_income, 0) AS projected_income,
+            COALESCE(real_service_money, 0) AS real_service_money
+        FROM billiards_dwd.dwd_assistant_service_log
+        WHERE site_id = %s
+          AND is_delete = 0
+        ORDER BY assistant_service_id
+    ),
+    asv AS (
+        SELECT
+            site_id,
+            order_settle_id,
+            COUNT(*) AS assistant_service_count,
+            COUNT(DISTINCT NULLIF(assistant_no, '')) AS assistant_count,
+            STRING_AGG(DISTINCT NULLIF(nickname, ''), '?' ORDER BY NULLIF(nickname, '')) AS assistant_nicknames,
+            STRING_AGG(DISTINCT NULLIF(skill_name, ''), '?' ORDER BY NULLIF(skill_name, '')) AS assistant_skills,
+            ROUND(SUM(real_use_seconds)::numeric, 0) AS assistant_real_use_seconds,
+            ROUND(SUM(projected_income)::numeric, 2) AS assistant_projected_income,
+            ROUND(SUM(real_service_money)::numeric, 2) AS assistant_real_service_money
+        FROM asv_raw
+        GROUP BY site_id, order_settle_id
+    )
+    SELECT
+        sh.site_id,
+        sh.order_settle_id,
+        sh.order_trade_no,
+        sh.pay_time,
+        sh.settle_type,
+        sh.member_id,
+        COALESCE(dm.nickname, sh.member_name) AS member_name,
+        COALESCE(dm.mobile, sh.member_phone) AS member_phone,
+        sh.table_id,
+        dt.table_name,
+        dt.site_table_area_name AS table_area_name,
+        ROUND(COALESCE(sh.consume_money, 0)::numeric, 2) AS settle_consume_money,
+        ROUND(COALESCE(sh.pay_amount, 0)::numeric, 2) AS settle_pay_amount,
+        ROUND(COALESCE(sh.coupon_amount, 0)::numeric, 2) AS settle_coupon_amount,
+        ROUND(COALESCE(sh.pl_coupon_sale_amount, 0)::numeric, 2) AS pl_coupon_sale_amount,
+        gb.groupbuy_item_count,
+        gb.groupbuy_pay_amount,
+        gb.groupbuy_ledger_amount,
+        gb.groupbuy_coupon_money,
+        gb.coupon_codes,
+        gb.groupbuy_items,
+        asv.assistant_service_count,
+        asv.assistant_count,
+        asv.assistant_nicknames,
+        asv.assistant_skills,
+        asv.assistant_real_use_seconds,
+        asv.assistant_projected_income,
+        asv.assistant_real_service_money
+    FROM gb
+    JOIN asv
+      ON asv.site_id = gb.site_id
+     AND asv.order_settle_id = gb.order_settle_id
+    LEFT JOIN billiards_dwd.dwd_settlement_head sh
+      ON sh.site_id = gb.site_id
+     AND sh.order_settle_id = gb.order_settle_id
+    LEFT JOIN billiards_dwd.dim_member dm
+      ON dm.register_site_id = sh.site_id
+     AND dm.member_id = sh.member_id
+     AND dm.scd2_is_current = 1
+    LEFT JOIN billiards_dwd.dim_table dt
+      ON dt.site_id = sh.site_id
+     AND dt.table_id = sh.table_id
+     AND dt.scd2_is_current = 1
+    WHERE (%s::date IS NULL OR sh.pay_time::date >= %s::date)
+      AND (%s::date IS NULL OR sh.pay_time::date <= %s::date)
+    ORDER BY sh.pay_time DESC, sh.order_settle_id DESC
+    """
+    rows = db.query(
+        sql,
+        (
+            site_id,
+            site_id,
+            start_date,
+            start_date,
+            end_date,
+            end_date,
+        ),
+    )
+    return [dict(r) for r in (rows or [])]
+
+
+def _write_csv(
+    rows: List[Dict[str, Any]],
+    out_csv: Path,
+    fields: Sequence[str],
+    header_map: Optional[Dict[str, str]] = None,
+) -> None:
+    out_csv.parent.mkdir(parents=True, exist_ok=True)
+    if header_map:
+        file_headers = [header_map.get(f, f) for f in fields]
+    else:
+        file_headers = list(fields)
+    with out_csv.open("w", newline="", encoding="utf-8-sig") as f:
+        writer = csv.writer(f)
+        writer.writerow(file_headers)
+        for row in rows:
+            writer.writerow([row.get(k) for k in fields])
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description="Export groupbuy orders that used assistant services."
+    )
+    parser.add_argument("--site-id", type=int, default=None, help="Site id to export")
+    parser.add_argument("--start-date", default=None, help="Filter start date: YYYY-MM-DD")
+    parser.add_argument("--end-date", default=None, help="Filter end date: YYYY-MM-DD")
+    parser.add_argument(
+        "--scheme",
+        choices=["current", "optimized"],
+        default="current",
+        help="Export scheme",
+    )
+    parser.add_argument(
+        "--header-lang",
+        choices=["zh", "en"],
+        default="zh",
+        help="CSV header language",
+    )
+    parser.add_argument(
+        "--output-csv",
+        default=os.path.join(ROOT, "docs", "groupbuy_orders_with_assistant_service.csv"),
+        help="Output CSV path",
+    )
+    args = parser.parse_args()
+
+    config = AppConfig.load()
+    db_conn = DatabaseConnection(config.config["db"]["dsn"])
+    db = DatabaseOperations(db_conn)
+    try:
+        site_id = _resolve_site_id(config, db, args.site_id)
+        if args.scheme == "optimized":
+            rows = _fetch_rows_optimized(db, site_id, args.start_date, args.end_date)
+        else:
+            rows = _fetch_rows_current(db, site_id, args.start_date, args.end_date)
+    finally:
+        db_conn.close()
+
+    out_csv = Path(args.output_csv)
+    header_map = ZH_HEADER_MAP if args.header_lang == "zh" else None
+    _write_csv(rows, out_csv, fields=FIELD_ORDER, header_map=header_map)
+
+    print(f"site_id={site_id}")
+    print(f"scheme={args.scheme}")
+    print(f"rows={len(rows)}")
+    print(f"csv={out_csv}")
+
+
+if __name__ == "__main__":
+    main()
--- a/apps/etl/pipelines/feiqiu/scripts/export/export_index_tables.py
+++ b/apps/etl/pipelines/feiqiu/scripts/export/export_index_tables.py
@@ -0,0 +1,143 @@
+# -*- coding: utf-8 -*-
+"""Export index tables to markdown for quick review."""
+import os
+import sys
+from datetime import datetime
+
+ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+if ROOT not in sys.path:
+    sys.path.insert(0, ROOT)
+
+from config.settings import AppConfig
+from database.connection import DatabaseConnection
+from database.operations import DatabaseOperations
+
+
+def _fmt(value, digits=2):
+    if value is None:
+        return "-"
+    if isinstance(value, (int, float)):
+        return f"{value:.{digits}f}"
+    return str(value)
+
+
+def _fetch(db: DatabaseOperations, sql: str):
+    return [dict(r) for r in (db.query(sql) or [])]
+
+
+def build_markdown(db: DatabaseOperations) -> str:
+    lines = []
+    lines.append("# Index Tables")
+    lines.append("")
+    lines.append(f"Generated at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+    lines.append("")
+
+    # 老客挽回指数（WBI）
+    wbi_sql = """
+        SELECT
+            COALESCE(m.nickname, CONCAT('member_', r.member_id)) AS member_name,
+            r.display_score,
+            r.raw_score,
+            r.t_v,
+            r.visits_14d,
+            r.sv_balance
+        FROM billiards_dws.dws_member_winback_index r
+        LEFT JOIN billiards_dwd.dim_member m
+            ON r.member_id = m.member_id AND m.scd2_is_current = 1
+        ORDER BY r.display_score DESC NULLS LAST
+    """
+    wbi_rows = _fetch(db, wbi_sql)
+    lines.append("## 1) WBI")
+    lines.append("")
+    lines.append("| member_name | wbi | raw_score | t_v | visits_14d | sv_balance |")
+    lines.append("|---|---:|---:|---:|---:|---:|")
+    for r in wbi_rows:
+        lines.append(
+            f"| {r.get('member_name') or '-'} | {_fmt(r.get('display_score'))} | {_fmt(r.get('raw_score'), 4)} | "
+            f"{_fmt(r.get('t_v'))} | {_fmt(r.get('visits_14d'), 0)} | {_fmt(r.get('sv_balance'))} |"
+        )
+    lines.append("")
+    lines.append(f"Total rows: {len(wbi_rows)}")
+    lines.append("")
+
+    # 新客转化指数（NCI）
+    nci_sql = """
+        SELECT
+            COALESCE(m.nickname, CONCAT('member_', r.member_id)) AS member_name,
+            r.display_score,
+            r.display_score_welcome,
+            r.display_score_convert,
+            r.raw_score,
+            r.raw_score_welcome,
+            r.raw_score_convert,
+            r.t_v,
+            r.visits_14d
+        FROM billiards_dws.dws_member_newconv_index r
+        LEFT JOIN billiards_dwd.dim_member m
+            ON r.member_id = m.member_id AND m.scd2_is_current = 1
+        ORDER BY r.display_score DESC NULLS LAST
+    """
+    nci_rows = _fetch(db, nci_sql)
+    lines.append("## 2) NCI")
+    lines.append("")
+    lines.append("| member_name | nci | welcome | convert | raw_total | raw_welcome | raw_convert | t_v | visits_14d |")
+    lines.append("|---|---:|---:|---:|---:|---:|---:|---:|---:|")
+    for r in nci_rows:
+        lines.append(
+            f"| {r.get('member_name') or '-'} | {_fmt(r.get('display_score'))} | {_fmt(r.get('display_score_welcome'))} | "
+            f"{_fmt(r.get('display_score_convert'))} | {_fmt(r.get('raw_score'), 4)} | {_fmt(r.get('raw_score_welcome'), 4)} | "
+            f"{_fmt(r.get('raw_score_convert'), 4)} | {_fmt(r.get('t_v'))} | {_fmt(r.get('visits_14d'), 0)} |"
+        )
+    lines.append("")
+    lines.append(f"Total rows: {len(nci_rows)}")
+    lines.append("")
+
+    # 亲密指数
+    intimacy_sql = """
+        SELECT
+            COALESCE(a.nickname, CONCAT('assistant_', i.assistant_id)) AS assistant_name,
+            COALESCE(m.nickname, CONCAT('member_', i.member_id)) AS member_name,
+            i.display_score,
+            i.session_count,
+            i.attributed_recharge_amount
+        FROM billiards_dws.dws_member_assistant_intimacy i
+        LEFT JOIN billiards_dwd.dim_member m
+            ON i.member_id = m.member_id AND m.scd2_is_current = 1
+        LEFT JOIN billiards_dwd.dim_assistant a
+            ON i.assistant_id = a.assistant_id AND a.scd2_is_current = 1
+        ORDER BY i.display_score DESC NULLS LAST, i.session_count DESC
+    """
+    intimacy_rows = _fetch(db, intimacy_sql)
+    lines.append("## 3) Intimacy")
+    lines.append("")
+    lines.append("| assistant | member | intimacy | sessions | recharge_amount |")
+    lines.append("|---|---|---:|---:|---:|")
+    for r in intimacy_rows:
+        lines.append(
+            f"| {r.get('assistant_name') or '-'} | {r.get('member_name') or '-'} | {_fmt(r.get('display_score'))} | "
+            f"{_fmt(r.get('session_count'), 0)} | {_fmt(r.get('attributed_recharge_amount'))} |"
+        )
+    lines.append("")
+    lines.append(f"Total rows: {len(intimacy_rows)}")
+
+    return "\n".join(lines)
+
+
+def main() -> None:
+    config = AppConfig.load()
+    db_conn = DatabaseConnection(config.config["db"]["dsn"])
+    db = DatabaseOperations(db_conn)
+    try:
+        markdown = build_markdown(db)
+    finally:
+        db_conn.close()
+
+    output_path = os.path.join(ROOT, "docs", "index_tables.md")
+    with open(output_path, "w", encoding="utf-8-sig") as f:
+        f.write(markdown)
+
+    print(f"Exported to {output_path}")
+
+
+if __name__ == "__main__":
+    main()
--- a/apps/etl/pipelines/feiqiu/scripts/export/export_intimacy_full_json.py
+++ b/apps/etl/pipelines/feiqiu/scripts/export/export_intimacy_full_json.py
@@ -0,0 +1,475 @@
+# -*- coding: utf-8 -*-
+"""Export full intimacy JSON with member visits and card balances."""
+
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import sys
+from datetime import date, datetime
+from decimal import Decimal
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+
+ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+if ROOT not in sys.path:
+    sys.path.insert(0, ROOT)
+
+from config.settings import AppConfig
+from database.connection import DatabaseConnection
+from database.operations import DatabaseOperations
+
+
+def _as_int(v: Any) -> Optional[int]:
+    if v is None:
+        return None
+    s = str(v).strip()
+    if not s:
+        return None
+    return int(s)
+
+
+def _to_float(v: Any, default: float = 0.0) -> float:
+    if v is None:
+        return default
+    if isinstance(v, Decimal):
+        return float(v)
+    if isinstance(v, (int, float)):
+        return float(v)
+    s = str(v).strip()
+    if not s:
+        return default
+    return float(s)
+
+
+def _fmt_dt(v: Any) -> Optional[str]:
+    if v is None:
+        return None
+    if isinstance(v, datetime):
+        return v.isoformat()
+    if isinstance(v, date):
+        return v.isoformat()
+    return str(v)
+
+
+def _resolve_site_id(config: AppConfig, db: DatabaseOperations, cli_site_id: Optional[int]) -> int:
+    if cli_site_id is not None:
+        return int(cli_site_id)
+
+    from_cfg = _as_int(config.get("app.store_id")) or _as_int(config.get("app.default_site_id"))
+    if from_cfg is not None:
+        return from_cfg
+
+    rows = db.query(
+        """
+        SELECT site_id
+        FROM billiards_dws.dws_member_assistant_intimacy
+        WHERE site_id IS NOT NULL
+        GROUP BY site_id
+        ORDER BY COUNT(*) DESC
+        LIMIT 1
+        """
+    )
+    if rows:
+        return int(dict(rows[0])["site_id"])
+
+    raise RuntimeError("Unable to resolve site_id; pass --site-id explicitly.")
+
+
+def _fetch_pairs(db: DatabaseOperations, site_id: int) -> List[Dict[str, Any]]:
+    sql = """
+    SELECT
+        i.site_id,
+        i.tenant_id,
+        i.member_id,
+        i.assistant_id,
+        i.session_count,
+        i.total_duration_minutes,
+        i.basic_session_count,
+        i.incentive_session_count,
+        i.days_since_last_session,
+        i.attributed_recharge_count,
+        i.attributed_recharge_amount,
+        i.score_frequency,
+        i.score_recency,
+        i.score_recharge,
+        i.score_duration,
+        i.burst_multiplier,
+        i.raw_score,
+        i.display_score,
+        i.calc_time,
+        COALESCE(m.nickname, CONCAT('member_', i.member_id::text)) AS member_nickname,
+        COALESCE(a.nickname, CONCAT('assistant_', i.assistant_id::text)) AS assistant_nickname
+    FROM billiards_dws.dws_member_assistant_intimacy i
+    LEFT JOIN billiards_dwd.dim_member m
+        ON i.member_id = m.member_id
+       AND m.scd2_is_current = 1
+    LEFT JOIN billiards_dwd.dim_assistant a
+        ON i.assistant_id = a.assistant_id
+       AND a.scd2_is_current = 1
+    WHERE i.site_id = %s
+    ORDER BY i.display_score DESC NULLS LAST, i.session_count DESC, i.member_id, i.assistant_id
+    """
+    rows = db.query(sql, (site_id,))
+    return [dict(r) for r in (rows or [])]
+
+
+def _fetch_member_cards(
+    db: DatabaseOperations,
+    site_id: int,
+    member_ids: List[int],
+) -> Dict[int, Dict[str, Any]]:
+    if not member_ids:
+        return {}
+
+    member_ids_str = ",".join(str(int(x)) for x in sorted(set(member_ids)))
+    sql = f"""
+    SELECT
+        tenant_member_id AS member_id,
+        member_card_id,
+        card_type_id,
+        member_card_grade_code,
+        member_card_grade_code_name,
+        member_card_type_name,
+        member_name,
+        member_mobile,
+        balance,
+        principal_balance,
+        status,
+        start_time,
+        end_time,
+        last_consume_time
+    FROM billiards_dwd.dim_member_card_account
+    WHERE register_site_id = %s
+      AND scd2_is_current = 1
+      AND COALESCE(is_delete, 0) = 0
+      AND tenant_member_id IN ({member_ids_str})
+    ORDER BY tenant_member_id, balance DESC NULLS LAST, member_card_id
+    """
+    rows = db.query(sql, (site_id,)) or []
+
+    result: Dict[int, Dict[str, Any]] = {}
+    for r in rows:
+        d = dict(r)
+        mid = int(d["member_id"])
+        balance = _to_float(d.get("balance"), 0.0)
+        card = {
+            "member_card_id": _as_int(d.get("member_card_id")),
+            "card_type_id": _as_int(d.get("card_type_id")),
+            "member_card_grade_code": _as_int(d.get("member_card_grade_code")),
+            "member_card_grade_code_name": d.get("member_card_grade_code_name"),
+            "member_card_type_name": d.get("member_card_type_name"),
+            "member_name": d.get("member_name"),
+            "member_mobile": d.get("member_mobile"),
+            "balance": round(balance, 2),
+            "principal_balance": round(_to_float(d.get("principal_balance"), 0.0), 2),
+            "status": _as_int(d.get("status")),
+            "start_time": _fmt_dt(d.get("start_time")),
+            "end_time": _fmt_dt(d.get("end_time")),
+            "last_consume_time": _fmt_dt(d.get("last_consume_time")),
+        }
+
+        bucket = result.setdefault(
+            mid,
+            {
+                "member_id": mid,
+                "cards_all": [],
+                "cards_balance_ge_10": [],
+                "total_card_balance_all": 0.0,
+            },
+        )
+        bucket["cards_all"].append(card)
+        bucket["total_card_balance_all"] = round(bucket["total_card_balance_all"] + balance, 2)
+        if balance >= 10.0:
+            bucket["cards_balance_ge_10"].append(card)
+
+    return result
+
+
+def _fetch_visit_rows(
+    db: DatabaseOperations,
+    site_id: int,
+    member_ids: List[int],
+) -> Dict[Tuple[int, int], Dict[str, Any]]:
+    if not member_ids:
+        return {}
+
+    member_ids_str = ",".join(str(int(x)) for x in sorted(set(member_ids)))
+    sql = f"""
+    SELECT
+        member_id,
+        order_settle_id,
+        visit_date,
+        visit_time,
+        table_name,
+        area_name,
+        area_category,
+        table_duration_min,
+        assistant_duration_min,
+        table_fee,
+        goods_amount,
+        assistant_amount,
+        total_consume,
+        total_discount,
+        actual_pay,
+        cash_pay,
+        cash_card_pay,
+        gift_card_pay,
+        groupbuy_pay
+    FROM billiards_dws.dws_member_visit_detail
+    WHERE site_id = %s
+      AND member_id IN ({member_ids_str})
+    ORDER BY member_id, visit_time DESC, order_settle_id DESC
+    """
+    rows = db.query(sql, (site_id,)) or []
+
+    result: Dict[Tuple[int, int], Dict[str, Any]] = {}
+    for r in rows:
+        d = dict(r)
+        key = (int(d["member_id"]), int(d["order_settle_id"]))
+        result[key] = {
+            "member_id": int(d["member_id"]),
+            "order_settle_id": int(d["order_settle_id"]),
+            "visit_date": _fmt_dt(d.get("visit_date")),
+            "visit_time": _fmt_dt(d.get("visit_time")),
+            "table_name": d.get("table_name"),
+            "area_name": d.get("area_name"),
+            "area_category": d.get("area_category"),
+            "table_duration_min": _as_int(d.get("table_duration_min")) or 0,
+            "assistant_duration_min_total": _as_int(d.get("assistant_duration_min")) or 0,
+            "table_fee": round(_to_float(d.get("table_fee"), 0.0), 2),
+            "goods_amount": round(_to_float(d.get("goods_amount"), 0.0), 2),
+            "assistant_amount": round(_to_float(d.get("assistant_amount"), 0.0), 2),
+            "total_consume": round(_to_float(d.get("total_consume"), 0.0), 2),
+            "total_discount": round(_to_float(d.get("total_discount"), 0.0), 2),
+            "actual_pay": round(_to_float(d.get("actual_pay"), 0.0), 2),
+            "cash_pay": round(_to_float(d.get("cash_pay"), 0.0), 2),
+            "cash_card_pay": round(_to_float(d.get("cash_card_pay"), 0.0), 2),
+            "gift_card_pay": round(_to_float(d.get("gift_card_pay"), 0.0), 2),
+            "groupbuy_pay": round(_to_float(d.get("groupbuy_pay"), 0.0), 2),
+        }
+    return result
+
+
+def _fetch_assistant_service_rows(
+    db: DatabaseOperations,
+    site_id: int,
+    member_ids: List[int],
+) -> Dict[Tuple[int, int], List[Dict[str, Any]]]:
+    if not member_ids:
+        return {}
+
+    member_ids_str = ",".join(str(int(x)) for x in sorted(set(member_ids)))
+    sql = f"""
+    SELECT
+        s.tenant_member_id AS member_id,
+        s.order_settle_id,
+        d.assistant_id,
+        COALESCE(d.nickname, s.nickname) AS assistant_nickname,
+        SUM(COALESCE(s.income_seconds, 0)) / 60.0 AS duration_min,
+        SUM(COALESCE(s.ledger_amount, 0)) AS amount
+    FROM billiards_dwd.dwd_assistant_service_log s
+    JOIN billiards_dwd.dim_assistant d
+        ON s.user_id = d.user_id
+       AND d.scd2_is_current = 1
+    WHERE s.site_id = %s
+      AND s.is_delete = 0
+      AND s.tenant_member_id IN ({member_ids_str})
+      AND s.order_settle_id IS NOT NULL
+    GROUP BY
+        s.tenant_member_id,
+        s.order_settle_id,
+        d.assistant_id,
+        COALESCE(d.nickname, s.nickname)
+    ORDER BY s.tenant_member_id, s.order_settle_id
+    """
+    rows = db.query(sql, (site_id,)) or []
+
+    result: Dict[Tuple[int, int], List[Dict[str, Any]]] = {}
+    for r in rows:
+        d = dict(r)
+        key = (int(d["member_id"]), int(d["order_settle_id"]))
+        rec = {
+            "assistant_id": int(d["assistant_id"]),
+            "assistant_nickname": d.get("assistant_nickname"),
+            "duration_min": round(_to_float(d.get("duration_min"), 0.0), 2),
+            "amount": round(_to_float(d.get("amount"), 0.0), 2),
+        }
+        result.setdefault(key, []).append(rec)
+
+    return result
+
+
+def _pk_key(assistant_nickname: Optional[str], member_nickname: Optional[str]) -> str:
+    a = (assistant_nickname or "").strip() or "assistant_unknown"
+    m = (member_nickname or "").strip() or "member_unknown"
+    return f"{a}__{m}"
+
+
+def build_export_payload(db: DatabaseOperations, site_id: int) -> Dict[str, Any]:
+    pairs = _fetch_pairs(db, site_id)
+    member_ids = sorted({int(p["member_id"]) for p in pairs})
+
+    cards_by_member = _fetch_member_cards(db, site_id, member_ids)
+    visits_by_key = _fetch_visit_rows(db, site_id, member_ids)
+    service_by_key = _fetch_assistant_service_rows(db, site_id, member_ids)
+
+    visits_by_member: Dict[int, List[Tuple[Tuple[int, int], Dict[str, Any]]]] = {}
+    for k, v in visits_by_key.items():
+        visits_by_member.setdefault(k[0], []).append((k, v))
+
+    data_by_pk: Dict[str, Dict[str, Any]] = {}
+    collisions: List[str] = []
+
+    for p in pairs:
+        member_id = int(p["member_id"])
+        assistant_id = int(p["assistant_id"])
+        assistant_nickname = p.get("assistant_nickname")
+        member_nickname = p.get("member_nickname")
+
+        visit_items: List[Dict[str, Any]] = []
+        for key, visit in visits_by_member.get(member_id, []):
+
+            service_list = service_by_key.get(key, [])
+            if not service_list:
+                continue
+
+            matched = [x for x in service_list if x["assistant_id"] == assistant_id]
+            if not matched:
+                continue
+
+            matched_duration = round(sum(x["duration_min"] for x in matched), 2)
+            matched_amount = round(sum(x["amount"] for x in matched), 2)
+            matched_nicknames = sorted({x.get("assistant_nickname") for x in matched if x.get("assistant_nickname")})
+
+            visit_items.append(
+                {
+                    "order_settle_id": visit.get("order_settle_id"),
+                    "visit_date": visit.get("visit_date"),
+                    "visit_time": visit.get("visit_time"),
+                    "table_name": visit.get("table_name"),
+                    "area_name": visit.get("area_name"),
+                    "area_category": visit.get("area_category"),
+                    "table_duration_min": visit.get("table_duration_min"),
+                    "assistant_duration_min_total": visit.get("assistant_duration_min_total"),
+                    "table_fee": visit.get("table_fee"),
+                    "goods_amount": visit.get("goods_amount"),
+                    "assistant_amount": visit.get("assistant_amount"),
+                    "total_consume": visit.get("total_consume"),
+                    "total_discount": visit.get("total_discount"),
+                    "actual_pay": visit.get("actual_pay"),
+                    "cash_pay": visit.get("cash_pay"),
+                    "cash_card_pay": visit.get("cash_card_pay"),
+                    "gift_card_pay": visit.get("gift_card_pay"),
+                    "groupbuy_pay": visit.get("groupbuy_pay"),
+                    "target_assistant_nickname": ", ".join(matched_nicknames) if matched_nicknames else p.get("assistant_nickname"),
+                    "target_assistant_duration_min": matched_duration,
+                    "target_assistant_amount": matched_amount,
+                }
+            )
+
+        visit_items.sort(
+            key=lambda x: (x.get("visit_time") or "", x.get("order_settle_id") or 0),
+            reverse=True,
+        )
+
+        member_cards = cards_by_member.get(
+            member_id,
+            {
+                "member_id": member_id,
+                "cards_all": [],
+                "cards_balance_ge_10": [],
+                "total_card_balance_all": 0.0,
+            },
+        )
+
+        pk = _pk_key(assistant_nickname, member_nickname)
+        item = {
+            "primary_key": {
+                "assistant_nickname": assistant_nickname,
+                "member_nickname": member_nickname,
+            },
+            "intimacy": {
+                "display_score": round(_to_float(p.get("display_score"), 0.0), 2),
+                "raw_score": round(_to_float(p.get("raw_score"), 0.0), 6),
+                "session_count": _as_int(p.get("session_count")) or 0,
+                "total_duration_minutes": _as_int(p.get("total_duration_minutes")) or 0,
+                "basic_session_count": _as_int(p.get("basic_session_count")) or 0,
+                "incentive_session_count": _as_int(p.get("incentive_session_count")) or 0,
+                "days_since_last_session": _as_int(p.get("days_since_last_session")),
+                "attributed_recharge_count": _as_int(p.get("attributed_recharge_count")) or 0,
+                "attributed_recharge_amount": round(_to_float(p.get("attributed_recharge_amount"), 0.0), 2),
+                "score_frequency": round(_to_float(p.get("score_frequency"), 0.0), 4),
+                "score_recency": round(_to_float(p.get("score_recency"), 0.0), 4),
+                "score_recharge": round(_to_float(p.get("score_recharge"), 0.0), 4),
+                "score_duration": round(_to_float(p.get("score_duration"), 0.0), 4),
+                "burst_multiplier": round(_to_float(p.get("burst_multiplier"), 1.0), 4),
+                "calc_time": _fmt_dt(p.get("calc_time")),
+            },
+            "member_cards": {
+                "cards_balance_ge_10": member_cards.get("cards_balance_ge_10", []),
+                "total_card_balance_all": round(_to_float(member_cards.get("total_card_balance_all"), 0.0), 2),
+            },
+            "visit_consumptions": visit_items,
+        }
+        if pk in data_by_pk:
+            collisions.append(pk)
+            existing = data_by_pk[pk]
+            existing["collision_items"] = existing.get("collision_items", [])
+            existing["collision_items"].append(item)
+        else:
+            data_by_pk[pk] = item
+
+    payload = {
+        "meta": {
+            "site_id": site_id,
+            "generated_at": datetime.now().isoformat(),
+            "pair_count": len(pairs),
+            "primary_key_count": len(data_by_pk),
+            "member_count": len(member_ids),
+            "primary_key_rule": "assistant_nickname + member_nickname",
+            "collision_count": len(collisions),
+        },
+        "data": data_by_pk,
+    }
+    return payload
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Export full intimacy JSON")
+    parser.add_argument("--site-id", type=int, default=None, help="site_id, defaults to app.store_id")
+    parser.add_argument(
+        "--output",
+        default="tmp/intimacy_full_export.json",
+        help="output JSON file path",
+    )
+    return parser.parse_args()
+
+
+def main() -> None:
+    args = parse_args()
+
+    config = AppConfig.load()
+    db_conn = DatabaseConnection(config.config["db"]["dsn"])
+    db = DatabaseOperations(db_conn)
+
+    try:
+        site_id = _resolve_site_id(config, db, args.site_id)
+        payload = build_export_payload(db, site_id)
+    finally:
+        db_conn.close()
+
+    output_path = Path(args.output)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    output_path.write_text(
+        json.dumps(payload, ensure_ascii=False, indent=2),
+        encoding="utf-8",
+    )
+
+    print(f"Exported intimacy JSON: {output_path}")
+    print(f"pair_count={payload['meta']['pair_count']}, member_count={payload['meta']['member_count']}")
+
+
+if __name__ == "__main__":
+    main()
--- a/apps/etl/pipelines/feiqiu/scripts/export/export_visit_60d_member_detail_with_indices.py
+++ b/apps/etl/pipelines/feiqiu/scripts/export/export_visit_60d_member_detail_with_indices.py
@@ -0,0 +1,720 @@
+# -*- coding: utf-8 -*-
+"""Export 60-day member visit detail with WBI/NCI scores."""
+
+from __future__ import annotations
+
+import argparse
+import csv
+import math
+import os
+import sys
+from datetime import date, datetime, timedelta
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+
+ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+if ROOT not in sys.path:
+    sys.path.insert(0, ROOT)
+
+from config.settings import AppConfig
+from database.connection import DatabaseConnection
+from database.operations import DatabaseOperations
+
+
+FIELDS = [
+    "site_id",
+    "member_id",
+    "member_nickname",
+    "visit_time",
+    "consume_amount",
+    "sv_balance",
+    "assistant_nicknames",
+    "wbi_score",
+    "nci_score",
+]
+
+
+def _as_int(v: Any) -> Optional[int]:
+    if v is None or str(v).strip() == "":
+        return None
+    return int(v)
+
+
+def _as_float(v: Any, default: float = 0.0) -> float:
+    if v is None or str(v).strip() == "":
+        return default
+    return float(v)
+
+
+def _resolve_site_id(config: AppConfig, db: DatabaseOperations, cli_site_id: Optional[int]) -> int:
+    if cli_site_id is not None:
+        return int(cli_site_id)
+
+    from_cfg = _as_int(config.get("app.store_id")) or _as_int(config.get("app.default_site_id"))
+    if from_cfg is not None:
+        return from_cfg
+
+    rows = db.query(
+        """
+        SELECT site_id
+        FROM billiards_dwd.dwd_settlement_head
+        WHERE site_id IS NOT NULL
+        GROUP BY site_id
+        ORDER BY COUNT(*) DESC
+        LIMIT 1
+        """
+    )
+    if rows:
+        return int(dict(rows[0])["site_id"])
+
+    raise RuntimeError("Unable to resolve site_id; pass --site-id explicitly.")
+
+
+def _visit_condition_sql() -> str:
+    return """
+        (
+            s.settle_type = 1
+            OR (
+                s.settle_type = 3
+                AND EXISTS (
+                    SELECT 1
+                    FROM billiards_dwd.dwd_assistant_service_log asl
+                    JOIN billiards_dws.cfg_skill_type st
+                        ON asl.skill_id = st.skill_id
+                       AND st.course_type_code = 'BONUS'
+                       AND st.is_active = TRUE
+                    WHERE asl.order_settle_id = s.order_settle_id
+                      AND asl.site_id = s.site_id
+                      AND asl.tenant_member_id = s.member_id
+                      AND asl.is_delete = 0
+                )
+            )
+        )
+    """
+
+
+def _fetch_visit_rows_base(
+    db: DatabaseOperations,
+    site_id: int,
+    start_time: datetime,
+    end_time: datetime,
+) -> List[Dict[str, Any]]:
+    sql = f"""
+    WITH visit_raw AS (
+        SELECT
+            s.site_id,
+            COALESCE(NULLIF(s.member_id, 0), mca.tenant_member_id) AS member_id,
+            s.order_settle_id,
+            s.pay_time AS visit_time,
+            COALESCE(s.consume_money, 0) AS consume_amount
+        FROM billiards_dwd.dwd_settlement_head s
+        LEFT JOIN billiards_dwd.dim_member_card_account mca
+            ON s.member_card_account_id = mca.member_card_id
+           AND mca.scd2_is_current = 1
+           AND mca.register_site_id = s.site_id
+        WHERE s.site_id = %s
+          AND s.pay_time >= %s
+          AND s.pay_time < %s
+          AND {_visit_condition_sql()}
+          AND COALESCE(NULLIF(s.member_id, 0), mca.tenant_member_id) > 0
+    ),
+    assistant_agg AS (
+        SELECT
+            asl.order_settle_id,
+            STRING_AGG(DISTINCT NULLIF(asl.nickname, ''), '?' ORDER BY NULLIF(asl.nickname, '')) AS assistant_nicknames
+        FROM billiards_dwd.dwd_assistant_service_log asl
+        WHERE asl.site_id = %s
+          AND asl.is_delete = 0
+        GROUP BY asl.order_settle_id
+    ),
+    member_balance AS (
+        SELECT
+            mca.register_site_id AS site_id,
+            mca.tenant_member_id AS member_id,
+            SUM(
+                CASE
+                    WHEN mca.card_type_id = 2793249295533893 THEN COALESCE(mca.balance, 0)
+                    ELSE 0
+                END
+            ) AS sv_balance
+        FROM billiards_dwd.dim_member_card_account mca
+        WHERE mca.register_site_id = %s
+          AND mca.scd2_is_current = 1
+        GROUP BY mca.register_site_id, mca.tenant_member_id
+    ),
+    member_name AS (
+        SELECT member_id, nickname
+        FROM billiards_dwd.dim_member
+        WHERE register_site_id = %s
+          AND scd2_is_current = 1
+    )
+    SELECT
+        vr.site_id,
+        vr.member_id,
+        COALESCE(mn.nickname, CONCAT('member_', vr.member_id::text)) AS member_nickname,
+        vr.visit_time,
+        ROUND(vr.consume_amount::numeric, 2) AS consume_amount,
+        ROUND(COALESCE(mb.sv_balance, 0)::numeric, 2) AS sv_balance,
+        aa.assistant_nicknames
+    FROM visit_raw vr
+    LEFT JOIN assistant_agg aa
+        ON aa.order_settle_id = vr.order_settle_id
+    LEFT JOIN member_balance mb
+        ON mb.site_id = vr.site_id
+       AND mb.member_id = vr.member_id
+    LEFT JOIN member_name mn
+        ON mn.member_id = vr.member_id
+    ORDER BY vr.visit_time DESC, vr.order_settle_id DESC
+    """
+    rows = db.query(sql, (site_id, start_time, end_time, site_id, site_id, site_id))
+    return [dict(r) for r in (rows or [])]
+
+
+def _fetch_current_score_maps(
+    db: DatabaseOperations,
+    site_id: int,
+) -> Tuple[Dict[int, float], Dict[int, float]]:
+    wbi_rows = db.query(
+        """
+        SELECT member_id, display_score AS wbi_score
+        FROM billiards_dws.dws_member_winback_index
+        WHERE site_id = %s
+        """,
+        (site_id,),
+    )
+    nci_rows = db.query(
+        """
+        SELECT member_id, display_score AS nci_score
+        FROM billiards_dws.dws_member_newconv_index
+        WHERE site_id = %s
+        """,
+        (site_id,),
+    )
+    wbi_map = {
+        int(dict(r)["member_id"]): round(float(dict(r)["wbi_score"]), 2)
+        for r in (wbi_rows or [])
+        if dict(r).get("wbi_score") is not None
+    }
+    nci_map = {
+        int(dict(r)["member_id"]): round(float(dict(r)["nci_score"]), 2)
+        for r in (nci_rows or [])
+        if dict(r).get("nci_score") is not None
+    }
+    return wbi_map, nci_map
+
+
+def _load_wbi_params(db: DatabaseOperations) -> Dict[str, float]:
+    sql = """
+    SELECT param_name, param_value
+    FROM (
+        SELECT
+            param_name,
+            param_value,
+            ROW_NUMBER() OVER (
+                PARTITION BY param_name
+                ORDER BY effective_from DESC, updated_at DESC, created_at DESC
+            ) AS rn
+        FROM billiards_dws.cfg_index_parameters
+        WHERE index_type = 'WBI'
+          AND effective_from <= CURRENT_DATE
+    ) t
+    WHERE rn = 1
+    """
+    rows = db.query(sql)
+    params: Dict[str, float] = {}
+    for row in (rows or []):
+        d = dict(row)
+        params[str(d["param_name"])] = float(d["param_value"])
+    return params
+
+
+def _fetch_wbi_member_rows(db: DatabaseOperations, site_id: int) -> Dict[int, Dict[str, Any]]:
+    rows = db.query(
+        """
+        SELECT
+            member_id,
+            status,
+            segment,
+            t_v,
+            interval_count,
+            overdue_old,
+            drop_old,
+            recharge_old,
+            value_old,
+            raw_score,
+            display_score
+        FROM billiards_dws.dws_member_winback_index
+        WHERE site_id = %s
+        """,
+        (site_id,),
+    )
+    result: Dict[int, Dict[str, Any]] = {}
+    for row in (rows or []):
+        d = dict(row)
+        mid = int(d["member_id"])
+        result[mid] = d
+    return result
+
+
+def _fetch_member_interval_samples(
+    db: DatabaseOperations,
+    site_id: int,
+    member_ids: List[int],
+    base_date: date,
+    visit_lookback_days: int,
+    recency_days: int,
+) -> Dict[int, List[Tuple[float, int]]]:
+    if not member_ids:
+        return {}
+    member_ids_str = ",".join(str(m) for m in member_ids)
+    start_date = base_date - timedelta(days=visit_lookback_days)
+    sql = f"""
+    WITH visit_source AS (
+        SELECT
+            COALESCE(NULLIF(s.member_id, 0), mca.tenant_member_id) AS member_id,
+            DATE(s.pay_time) AS visit_date
+        FROM billiards_dwd.dwd_settlement_head s
+        LEFT JOIN billiards_dwd.dim_member_card_account mca
+            ON s.member_card_account_id = mca.member_card_id
+           AND mca.scd2_is_current = 1
+           AND mca.register_site_id = s.site_id
+        WHERE s.site_id = %s
+          AND s.pay_time >= %s
+          AND s.pay_time < %s + INTERVAL '1 day'
+          AND {_visit_condition_sql()}
+          AND COALESCE(NULLIF(s.member_id, 0), mca.tenant_member_id) IN ({member_ids_str})
+    ),
+    visit_dedup AS (
+        SELECT member_id, visit_date
+        FROM visit_source
+        GROUP BY member_id, visit_date
+    )
+    SELECT member_id, visit_date
+    FROM visit_dedup
+    ORDER BY member_id, visit_date
+    """
+    rows = db.query(sql, (site_id, start_date, base_date))
+    member_dates: Dict[int, List[date]] = {}
+    for row in (rows or []):
+        d = dict(row)
+        mid = int(d["member_id"])
+        vdt = d["visit_date"]
+        if vdt is None:
+            continue
+        member_dates.setdefault(mid, []).append(vdt)
+
+    result: Dict[int, List[Tuple[float, int]]] = {}
+    for mid, dates in member_dates.items():
+        samples: List[Tuple[float, int]] = []
+        for i in range(1, len(dates)):
+            interval = (dates[i] - dates[i - 1]).days
+            interval_capped = float(min(recency_days, interval))
+            age_days = max(0, (base_date - dates[i]).days)
+            samples.append((interval_capped, age_days))
+        result[mid] = samples
+    return result
+
+
+def _weighted_cdf(
+    samples: List[Tuple[float, int]],
+    t_v: float,
+    halflife_days: float,
+    blend_min_samples: int = 8,
+) -> float:
+    if not samples:
+        return 0.5
+    if halflife_days <= 0:
+        p_eq = sum(1.0 for x, _ in samples if x <= t_v) / len(samples)
+        return p_eq
+
+    ln2 = math.log(2.0)
+    weights: List[float] = []
+    indicators: List[float] = []
+    for interval, age_days in samples:
+        w = math.exp(-ln2 * float(age_days) / halflife_days)
+        weights.append(w)
+        indicators.append(1.0 if interval <= t_v else 0.0)
+
+    w_sum = sum(weights)
+    if w_sum <= 0:
+        p_w = 0.5
+    else:
+        p_w = sum(w * ind for w, ind in zip(weights, indicators)) / w_sum
+    p_eq = sum(indicators) / len(indicators)
+
+    m = len(samples)
+    lam = min(1.0, float(m) / float(max(1, blend_min_samples)))
+    p = lam * p_w + (1.0 - lam) * p_eq
+    return max(0.0, min(1.0, p))
+
+
+def _calculate_percentiles(scores: List[float], lower: int, upper: int) -> Tuple[float, float]:
+    if not scores:
+        return 0.0, 0.0
+    sorted_scores = sorted(scores)
+    n = len(sorted_scores)
+    lower_idx = max(0, int(n * lower / 100) - 1)
+    upper_idx = min(n - 1, int(n * upper / 100))
+    return sorted_scores[lower_idx], sorted_scores[upper_idx]
+
+
+def _winsorize(value: float, lower: float, upper: float) -> float:
+    return min(max(value, lower), upper)
+
+
+def _normalize_to_display(value: float, min_val: float, max_val: float, compression_mode: str) -> float:
+    if compression_mode == "log1p":
+        value = math.log1p(value)
+        min_val = math.log1p(min_val)
+        max_val = math.log1p(max_val)
+    elif compression_mode == "asinh":
+        value = math.asinh(value)
+        min_val = math.asinh(min_val)
+        max_val = math.asinh(max_val)
+
+    eps = 1e-6
+    rng = max_val - min_val
+    if rng < eps:
+        return 5.0
+    score = 10.0 * (value - min_val) / rng
+    return max(0.0, min(10.0, score))
+
+
+def _compression_mode_from_param(params: Dict[str, float]) -> str:
+    mode = int(params.get("compression_mode", 0))
+    if mode == 1:
+        return "log1p"
+    if mode == 2:
+        return "asinh"
+    return "none"
+
+
+def _build_wbi_optimized_map(
+    db: DatabaseOperations,
+    site_id: int,
+    base_date: date,
+    half_life_days: float,
+) -> Dict[int, Optional[float]]:
+    params = _load_wbi_params(db)
+    w_over = float(params.get("w_over", 2.0))
+    w_drop = float(params.get("w_drop", 1.0))
+    w_re = float(params.get("w_re", 0.4))
+    w_value = float(params.get("w_value", 1.2))
+    overdue_alpha = float(params.get("overdue_alpha", 2.0))
+    percentile_lower = int(params.get("percentile_lower", 5))
+    percentile_upper = int(params.get("percentile_upper", 95))
+    recency_days = int(params.get("lookback_days_recency", 60))
+    visit_lookback_days = int(params.get("visit_lookback_days", 180))
+
+    member_rows = _fetch_wbi_member_rows(db, site_id)
+    member_ids_for_calc = [
+        mid
+        for mid, row in member_rows.items()
+        if row.get("segment") == "OLD" and row.get("raw_score") is not None
+    ]
+    interval_samples = _fetch_member_interval_samples(
+        db=db,
+        site_id=site_id,
+        member_ids=member_ids_for_calc,
+        base_date=base_date,
+        visit_lookback_days=visit_lookback_days,
+        recency_days=recency_days,
+    )
+
+    raw_new_map: Dict[int, float] = {}
+    for mid in member_ids_for_calc:
+        row = member_rows[mid]
+        t_v = _as_float(row.get("t_v"), recency_days)
+        overdue_old = _as_float(row.get("overdue_old"))
+        drop_old = _as_float(row.get("drop_old"))
+        recharge_old = _as_float(row.get("recharge_old"))
+        value_old = _as_float(row.get("value_old"))
+        raw_old = _as_float(row.get("raw_score"))
+
+        pre_old = (
+            w_over * overdue_old
+            + w_drop * drop_old
+            + w_re * recharge_old
+            + w_value * value_old
+        )
+        if pre_old <= 1e-9:
+            suppression = 1.0
+        else:
+            suppression = max(0.0, min(1.0, raw_old / pre_old))
+
+        p_weighted = _weighted_cdf(
+            samples=interval_samples.get(mid, []),
+            t_v=t_v,
+            halflife_days=half_life_days,
+        )
+        overdue_new = math.pow(p_weighted, overdue_alpha)
+        pre_new = (
+            w_over * overdue_new
+            + w_drop * drop_old
+            + w_re * recharge_old
+            + w_value * value_old
+        )
+        raw_new = max(0.0, pre_new * suppression)
+        raw_new_map[mid] = raw_new
+
+    if not raw_new_map:
+        return {mid: _as_float(row.get("display_score")) for mid, row in member_rows.items()}
+
+    scores = list(raw_new_map.values())
+    q_l, q_u = _calculate_percentiles(scores, percentile_lower, percentile_upper)
+    compression_mode = _compression_mode_from_param(params)
+
+    display_new_map: Dict[int, Optional[float]] = {}
+    for mid, raw_score in raw_new_map.items():
+        clipped = _winsorize(raw_score, q_l, q_u)
+        display = _normalize_to_display(clipped, q_l, q_u, compression_mode=compression_mode)
+        display_new_map[mid] = round(display, 2)
+
+    # 保留未重新计算的会员（如 STOP_HIGH_BALANCE）的当前展示分数。
+    result: Dict[int, Optional[float]] = {}
+    for mid, row in member_rows.items():
+        if mid in display_new_map:
+            result[mid] = display_new_map[mid]
+        else:
+            current = row.get("display_score")
+            result[mid] = None if current is None else round(float(current), 2)
+    return result
+
+
+def _attach_scores(
+    base_rows: List[Dict[str, Any]],
+    wbi_map: Dict[int, Optional[float]],
+    nci_map: Dict[int, float],
+) -> List[Dict[str, Any]]:
+    result: List[Dict[str, Any]] = []
+    for row in base_rows:
+        mid = int(row["member_id"])
+        new_row = {
+            "site_id": row.get("site_id"),
+            "member_id": row.get("member_id"),
+            "member_nickname": row.get("member_nickname"),
+            "visit_time": row.get("visit_time"),
+            "consume_amount": row.get("consume_amount"),
+            "sv_balance": row.get("sv_balance"),
+            "assistant_nicknames": row.get("assistant_nicknames"),
+            "wbi_score": wbi_map.get(mid),
+            "nci_score": nci_map.get(mid),
+        }
+        result.append(new_row)
+    return result
+
+
+def _write_csv(rows: List[Dict[str, Any]], out_csv: Path) -> None:
+    out_csv.parent.mkdir(parents=True, exist_ok=True)
+    with out_csv.open("w", newline="", encoding="utf-8-sig") as f:
+        writer = csv.DictWriter(f, fieldnames=FIELDS)
+        writer.writeheader()
+        for row in rows:
+            writer.writerow({k: row.get(k) for k in FIELDS})
+
+
+def _write_preview_md(rows: List[Dict[str, Any]], out_md: Path, limit: int = 200) -> None:
+    out_md.parent.mkdir(parents=True, exist_ok=True)
+    lines = [
+        "|" + "|".join(FIELDS) + "|",
+        "|" + "|".join(["---"] * len(FIELDS)) + "|",
+    ]
+    for row in rows[:limit]:
+        cells = ["" if row.get(c) is None else str(row.get(c)) for c in FIELDS]
+        lines.append("|" + "|".join(cells) + "|")
+    out_md.write_text("\n".join(lines), encoding="utf-8-sig")
+
+
+def _diff_and_write_report(
+    current_rows: List[Dict[str, Any]],
+    optimized_rows: List[Dict[str, Any]],
+    out_md: Path,
+) -> None:
+    def _to_map(rows: List[Dict[str, Any]]) -> Dict[Tuple[Any, Any, Any], Dict[str, Any]]:
+        result: Dict[Tuple[Any, Any, Any], Dict[str, Any]] = {}
+        for r in rows:
+            key = (r.get("site_id"), r.get("member_id"), r.get("visit_time"))
+            result[key] = r
+        return result
+
+    cur_map = _to_map(current_rows)
+    opt_map = _to_map(optimized_rows)
+    cur_keys = set(cur_map.keys())
+    opt_keys = set(opt_map.keys())
+    common_keys = sorted(cur_keys & opt_keys)
+
+    changed_rows = 0
+    changed_wbi_rows = 0
+    changed_nci_rows = 0
+    changed_member_ids = set()
+    member_wbi_deltas: Dict[int, List[float]] = {}
+
+    for k in common_keys:
+        c = cur_map[k]
+        o = opt_map[k]
+        wbi_c = c.get("wbi_score")
+        wbi_o = o.get("wbi_score")
+        nci_c = c.get("nci_score")
+        nci_o = o.get("nci_score")
+        row_changed = (wbi_c != wbi_o) or (nci_c != nci_o)
+        if row_changed:
+            changed_rows += 1
+            mid = int(c["member_id"])
+            changed_member_ids.add(mid)
+            if wbi_c != wbi_o:
+                changed_wbi_rows += 1
+                if wbi_c is not None and wbi_o is not None:
+                    member_wbi_deltas.setdefault(mid, []).append(float(wbi_o) - float(wbi_c))
+            if nci_c != nci_o:
+                changed_nci_rows += 1
+
+    member_delta_summary: List[Tuple[int, float, int]] = []
+    for mid, ds in member_wbi_deltas.items():
+        if not ds:
+            continue
+        avg_delta = sum(ds) / len(ds)
+        member_delta_summary.append((mid, avg_delta, len(ds)))
+    member_delta_summary.sort(key=lambda x: abs(x[1]), reverse=True)
+
+    lines = [
+        "# visit_60d_member_detail_with_indices：当前版 vs 优化版",
+        "",
+        "## 对比概览",
+        f"- 当前行数: `{len(current_rows)}`",
+        f"- 优化行数: `{len(optimized_rows)}`",
+        f"- 共同主键行数(site_id,member_id,visit_time): `{len(common_keys)}`",
+        f"- 仅当前有: `{len(cur_keys - opt_keys)}`",
+        f"- 仅优化有: `{len(opt_keys - cur_keys)}`",
+        f"- 分数发生变化的行: `{changed_rows}`",
+        f"- WBI变化行: `{changed_wbi_rows}`",
+        f"- NCI变化行: `{changed_nci_rows}`",
+        f"- 涉及会员数: `{len(changed_member_ids)}`",
+        "",
+        "## 经营解读",
+        "- 本次优化只改 WBI：把 Overdue 从等权历史替换为时间加权CDF（近期样本权重更高）。",
+        "- NCI保持不变，用于避免把两类策略（老客挽回/新客转化）混在一次改动里。",
+        "- 若变化主要出现在近期行为变化快的会员，通常更符合一线“近期状态优先”的经营直觉。",
+        "",
+        "## WBI变化最大会员(按平均分差绝对值)",
+        "|member_id|avg_delta(optimized-current)|visit_rows|",
+        "|---|---:|---:|",
+    ]
+    for mid, avg_delta, cnt in member_delta_summary[:20]:
+        lines.append(f"|{mid}|{avg_delta:.2f}|{cnt}|")
+    if len(member_delta_summary) == 0:
+        lines.append("|(none)|0.00|0|")
+
+    out_md.parent.mkdir(parents=True, exist_ok=True)
+    out_md.write_text("\n".join(lines), encoding="utf-8-sig")
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Export 60-day member visit detail with WBI/NCI scores.")
+    parser.add_argument("--site-id", type=int, default=None, help="Site id to export")
+    parser.add_argument("--days", type=int, default=60, help="Lookback days (default: 60)")
+    parser.add_argument(
+        "--scheme",
+        choices=["current", "optimized", "both"],
+        default="current",
+        help="Export scheme",
+    )
+    parser.add_argument(
+        "--wbi-interval-halflife-days",
+        type=float,
+        default=30.0,
+        help="Half-life days for weighted CDF in optimized WBI",
+    )
+    parser.add_argument(
+        "--output-csv",
+        default=os.path.join(ROOT, "docs", "visit_60d_member_detail_with_indices.csv"),
+        help="Output CSV path (used by current/optimized single scheme)",
+    )
+    parser.add_argument(
+        "--output-preview-md",
+        default=os.path.join(ROOT, "docs", "visit_60d_member_detail_with_indices_preview.md"),
+        help="Output preview markdown path (used by current/optimized single scheme)",
+    )
+    parser.add_argument(
+        "--output-csv-current",
+        default=os.path.join(ROOT, "docs", "visit_60d_member_detail_with_indices_current.csv"),
+        help="Output CSV path for current scheme when --scheme both",
+    )
+    parser.add_argument(
+        "--output-csv-optimized",
+        default=os.path.join(ROOT, "docs", "visit_60d_member_detail_with_indices_optimized.csv"),
+        help="Output CSV path for optimized scheme when --scheme both",
+    )
+    parser.add_argument(
+        "--output-compare-md",
+        default=os.path.join(ROOT, "docs", "visit_60d_member_detail_with_indices_compare.md"),
+        help="Output compare markdown path when --scheme both",
+    )
+    parser.add_argument("--preview-limit", type=int, default=200, help="Preview markdown row limit")
+    args = parser.parse_args()
+
+    config = AppConfig.load()
+    db_conn = DatabaseConnection(config.config["db"]["dsn"])
+    db = DatabaseOperations(db_conn)
+    try:
+        site_id = _resolve_site_id(config, db, args.site_id)
+        now = datetime.now()
+        start_time = now - timedelta(days=max(1, int(args.days)))
+        end_time = now
+
+        base_rows = _fetch_visit_rows_base(db, site_id, start_time, end_time)
+        wbi_current_map, nci_current_map = _fetch_current_score_maps(db, site_id)
+
+        if args.scheme == "current":
+            rows = _attach_scores(base_rows, wbi_current_map, nci_current_map)
+            out_csv = Path(args.output_csv)
+            out_md = Path(args.output_preview_md)
+            _write_csv(rows, out_csv)
+            _write_preview_md(rows, out_md, limit=max(1, int(args.preview_limit)))
+            print(f"site_id={site_id}")
+            print("scheme=current")
+            print(f"rows={len(rows)}")
+            print(f"csv={out_csv}")
+            print(f"preview={out_md}")
+            return
+
+        wbi_optimized_map = _build_wbi_optimized_map(
+            db=db,
+            site_id=site_id,
+            base_date=end_time.date(),
+            half_life_days=max(1.0, float(args.wbi_interval_halflife_days)),
+        )
+
+        if args.scheme == "optimized":
+            rows = _attach_scores(base_rows, wbi_optimized_map, nci_current_map)
+            out_csv = Path(args.output_csv)
+            out_md = Path(args.output_preview_md)
+            _write_csv(rows, out_csv)
+            _write_preview_md(rows, out_md, limit=max(1, int(args.preview_limit)))
+            print(f"site_id={site_id}")
+            print("scheme=optimized")
+            print(f"rows={len(rows)}")
+            print(f"csv={out_csv}")
+            print(f"preview={out_md}")
+            return
+
+        current_rows = _attach_scores(base_rows, wbi_current_map, nci_current_map)
+        optimized_rows = _attach_scores(base_rows, wbi_optimized_map, nci_current_map)
+
+        out_cur = Path(args.output_csv_current)
+        out_opt = Path(args.output_csv_optimized)
+        out_cmp = Path(args.output_compare_md)
+        _write_csv(current_rows, out_cur)
+        _write_csv(optimized_rows, out_opt)
+        _diff_and_write_report(current_rows, optimized_rows, out_cmp)
+        print(f"site_id={site_id}")
+        print("scheme=both")
+        print(f"rows={len(current_rows)}")
+        print(f"csv_current={out_cur}")
+        print(f"csv_optimized={out_opt}")
+        print(f"compare={out_cmp}")
+    finally:
+        db_conn.close()
+
+
+if __name__ == "__main__":
+    main()
--- a/apps/etl/pipelines/feiqiu/scripts/full_api_refresh_v2.py
+++ b/apps/etl/pipelines/feiqiu/scripts/full_api_refresh_v2.py
@@ -0,0 +1,634 @@
+# -*- coding: utf-8 -*-
+"""
+全量 API JSON 刷新 + 字段分析 + MD 文档完善 + 对比报告（v2）
+时间范围：2026-01-01 00:00:00 ~ 2026-02-13 00:00:00，每接口 100 条
+
+改进点（相比 v1）：
+- siteProfile/tableProfile 等嵌套对象：MD 中已记录为 object 则不展开子字段
+- 请求参数与响应字段分开对比
+- 只对比顶层业务字段
+- 真正缺失的新字段才补充到 MD
+
+用法：python scripts/full_api_refresh_v2.py
+"""
+import json
+import os
+import re
+import sys
+import time
+from datetime import datetime
+
+import requests
+
+# ── 配置 ──────────────────────────────────────────────────────────────────
+API_BASE = "https://pc.ficoo.vip/apiprod/admin/v1/"
+API_TOKEN = os.environ.get("API_TOKEN", "")
+if not API_TOKEN:
+    env_path = os.path.join(os.path.dirname(__file__), "..", ".env")
+    if os.path.exists(env_path):
+        with open(env_path, "r", encoding="utf-8") as f:
+            for line in f:
+                line = line.strip()
+                if line.startswith("API_TOKEN="):
+                    API_TOKEN = line.split("=", 1)[1].strip()
+                    break
+
+SITE_ID = 2790685415443269
+START_TIME = "2026-01-01 00:00:00"
+END_TIME = "2026-02-13 00:00:00"
+LIMIT = 100
+
+SAMPLES_DIR = os.path.join("docs", "api-reference", "samples")
+DOCS_DIR = os.path.join("docs", "api-reference")
+REPORT_DIR = os.path.join("docs", "reports")
+REGISTRY_PATH = os.path.join("docs", "api-reference", "api_registry.json")
+
+HEADERS = {
+    "Authorization": f"Bearer {API_TOKEN}",
+    "Content-Type": "application/json",
+}
+
+# 已知的嵌套对象字段名（MD 中记录为 object，不展开子字段）
+KNOWN_NESTED_OBJECTS = {
+    "siteProfile", "tableProfile", "settleList",
+    "goodsStockWarningInfo", "goodsCategoryList",
+}
+
+
+def load_registry():
+    with open(REGISTRY_PATH, "r", encoding="utf-8") as f:
+        return json.load(f)
+
+
+def call_api(module, action, body):
+    url = f"{API_BASE}{module}/{action}"
+    try:
+        resp = requests.post(url, json=body, headers=HEADERS, timeout=30)
+        resp.raise_for_status()
+        return resp.json()
+    except Exception as e:
+        print(f"  ❌ 请求失败: {e}")
+        return None
+
+
+def build_body(entry):
+    body = dict(entry.get("body") or {})
+    if entry.get("time_range") and entry.get("time_keys"):
+        keys = entry["time_keys"]
+        if len(keys) >= 2:
+            body[keys[0]] = START_TIME
+            body[keys[1]] = END_TIME
+    if entry.get("pagination"):
+        body[entry["pagination"].get("page_key", "page")] = 1
+        body[entry["pagination"].get("limit_key", "limit")] = LIMIT
+    return body
+
+
+def unwrap_records(raw_json, entry):
+    """从原始 API 响应中提取业务记录列表"""
+    if raw_json is None:
+        return []
+    data = raw_json.get("data")
+    if data is None:
+        return []
+
+    table_name = entry["id"]
+    data_path = entry.get("data_path", "")
+
+    # tenant_member_balance_overview: data 本身就是汇总对象
+    if table_name == "tenant_member_balance_overview":
+        if isinstance(data, dict):
+            return [data]
+        return []
+
+    # 按 data_path 解析
+    if data_path and data_path.startswith("data."):
+        path_parts = data_path.split(".")[1:]
+        current = data
+        for part in path_parts:
+            if isinstance(current, dict):
+                current = current.get(part)
+            else:
+                current = None
+                break
+        if isinstance(current, list):
+            return current
+
+    # fallback
+    if isinstance(data, dict):
+        for k, v in data.items():
+            if isinstance(v, list) and k.lower() not in ("total",):
+                return v
+    if isinstance(data, list):
+        return data
+    return []
+
+
+
+def get_top_level_fields(record):
+    """只提取顶层字段名和类型（不递归展开嵌套对象）"""
+    fields = {}
+    if not isinstance(record, dict):
+        return fields
+    for k, v in record.items():
+        if isinstance(v, dict):
+            fields[k] = "object"
+        elif isinstance(v, list):
+            fields[k] = "array"
+        elif isinstance(v, bool):
+            fields[k] = "boolean"
+        elif isinstance(v, int):
+            fields[k] = "integer"
+        elif isinstance(v, float):
+            fields[k] = "number"
+        elif v is None:
+            fields[k] = "null"
+        else:
+            fields[k] = "string"
+    return fields
+
+
+def get_nested_fields(record, parent_key):
+    """提取指定嵌套对象的子字段"""
+    obj = record.get(parent_key)
+    if not isinstance(obj, dict):
+        return {}
+    fields = {}
+    for k, v in obj.items():
+        path = f"{parent_key}.{k}"
+        if isinstance(v, dict):
+            fields[path] = "object"
+        elif isinstance(v, list):
+            fields[path] = "array"
+        elif isinstance(v, bool):
+            fields[path] = "boolean"
+        elif isinstance(v, int):
+            fields[path] = "integer"
+        elif isinstance(v, float):
+            fields[path] = "number"
+        elif v is None:
+            fields[path] = "null"
+        else:
+            fields[path] = "string"
+    return fields
+
+
+def select_top5_richest(records):
+    """从所有记录中选出字段数最多的前 5 条"""
+    if not records:
+        return []
+    scored = []
+    for i, rec in enumerate(records):
+        if not isinstance(rec, dict):
+            continue
+        field_count = len(rec)
+        json_len = len(json.dumps(rec, ensure_ascii=False))
+        scored.append((field_count, json_len, i, rec))
+    scored.sort(key=lambda x: (x[0], x[1]), reverse=True)
+    return [item[3] for item in scored[:5]]
+
+
+def collect_all_top_fields(records):
+    """遍历所有记录，收集所有顶层字段（含类型、出现次数、示例值）"""
+    all_fields = {}
+    for rec in records:
+        if not isinstance(rec, dict):
+            continue
+        fields = get_top_level_fields(rec)
+        for name, typ in fields.items():
+            if name not in all_fields:
+                all_fields[name] = {"type": typ, "count": 0, "example": None}
+            all_fields[name]["count"] += 1
+            if all_fields[name]["example"] is None:
+                val = rec.get(name)
+                if val is not None and val != "" and val != 0 and not isinstance(val, (dict, list)):
+                    ex = str(val)
+                    if len(ex) > 80:
+                        ex = ex[:77] + "..."
+                    all_fields[name]["example"] = ex
+    return all_fields
+
+
+def collect_nested_fields(records, parent_key):
+    """遍历所有记录，收集指定嵌套对象的子字段"""
+    all_fields = {}
+    for rec in records:
+        if not isinstance(rec, dict):
+            continue
+        fields = get_nested_fields(rec, parent_key)
+        for path, typ in fields.items():
+            if path not in all_fields:
+                all_fields[path] = {"type": typ, "count": 0, "example": None}
+            all_fields[path]["count"] += 1
+            if all_fields[path]["example"] is None:
+                obj = rec.get(parent_key, {})
+                k = path.split(".")[-1]
+                val = obj.get(k) if isinstance(obj, dict) else None
+                if val is not None and val != "" and val != 0 and not isinstance(val, (dict, list)):
+                    ex = str(val)
+                    if len(ex) > 80:
+                        ex = ex[:77] + "..."
+                    all_fields[path]["example"] = ex
+    return all_fields
+
+
+def extract_md_response_fields(table_name):
+    """从 MD 文档的响应字段章节提取字段名（排除请求参数）"""
+    md_path = os.path.join(DOCS_DIR, f"{table_name}.md")
+    if not os.path.exists(md_path):
+        return set(), set(), ""
+
+    with open(md_path, "r", encoding="utf-8") as f:
+        content = f.read()
+
+    response_fields = set()
+    nested_fields = set()  # siteProfile.xxx 等嵌套字段
+    field_pattern = re.compile(r'^\|\s*`([^`]+)`\s*\|', re.MULTILINE)
+    header_fields = {"字段名", "类型", "示例值", "说明", "field", "example",
+                     "description", "type", "路径", "参数", "必填", "属性", "值"}
+
+    # 找到"四、响应字段"章节的范围
+    in_response = False
+    lines = content.split("\n")
+    response_start = None
+    response_end = len(lines)
+
+    for i, line in enumerate(lines):
+        s = line.strip()
+        if ("## 四" in s or "## 4" in s) and "响应字段" in s:
+            in_response = True
+            response_start = i
+            continue
+        if in_response and s.startswith("## ") and "响应字段" not in s:
+            response_end = i
+            break
+
+    if response_start is None:
+        # 没有明确的响应字段章节，尝试从整个文档提取
+        for m in field_pattern.finditer(content):
+            raw = m.group(1).strip()
+            if raw.lower() in {h.lower() for h in header_fields}:
+                continue
+            if "." in raw:
+                nested_fields.add(raw)
+            else:
+                response_fields.add(raw)
+        return response_fields, nested_fields, content
+
+    # 只从响应字段章节提取
+    response_section = "\n".join(lines[response_start:response_end])
+    for m in field_pattern.finditer(response_section):
+        raw = m.group(1).strip()
+        if raw.lower() in {h.lower() for h in header_fields}:
+            continue
+        if "." in raw:
+            nested_fields.add(raw)
+        else:
+            response_fields.add(raw)
+
+    return response_fields, nested_fields, content
+
+
+def compare_fields(json_fields, md_fields, md_nested_fields, table_name):
+    """对比 JSON 字段与 MD 字段，返回缺失和多余"""
+    json_names = set(json_fields.keys())
+    md_names = set(md_fields) if isinstance(md_fields, set) else set(md_fields)
+
+    # JSON 有但 MD 没有的顶层字段
+    missing_in_md = []
+    for name in sorted(json_names - md_names):
+        # 跳过已知嵌套对象（如果 MD 中已记录为 object）
+        if name in KNOWN_NESTED_OBJECTS and name in md_names:
+            continue
+        info = json_fields[name]
+        missing_in_md.append((name, info))
+
+    # MD 有但 JSON 没有的字段
+    extra_in_md = sorted(md_names - json_names)
+
+    return missing_in_md, extra_in_md
+
+
+def save_top5_sample(table_name, top5):
+    """保存前 5 条最全记录作为 JSON 样本"""
+    sample_path = os.path.join(SAMPLES_DIR, f"{table_name}.json")
+    with open(sample_path, "w", encoding="utf-8") as f:
+        json.dump(top5, f, ensure_ascii=False, indent=2)
+    return sample_path
+
+
+
+def update_md_with_missing_fields(table_name, missing_fields, md_content):
+    """将真正缺失的字段补充到 MD 文档的响应字段章节末尾"""
+    if not missing_fields:
+        return False
+
+    md_path = os.path.join(DOCS_DIR, f"{table_name}.md")
+    if not os.path.exists(md_path):
+        return False
+
+    lines = md_content.split("\n")
+
+    # 找到响应字段章节的最后一个表格行
+    insert_idx = None
+    in_response = False
+    last_table_row = None
+
+    for i, line in enumerate(lines):
+        s = line.strip()
+        if ("## 四" in s or "## 4" in s) and "响应字段" in s:
+            in_response = True
+            continue
+        if in_response and s.startswith("## ") and "响应字段" not in s:
+            insert_idx = last_table_row
+            break
+        if in_response and s.startswith("|") and "---" not in s:
+            # 检查是否是表头行
+            if not any(h in s for h in ["字段名", "字段", "类型", "说明"]):
+                last_table_row = i
+            elif last_table_row is None:
+                last_table_row = i
+
+    if insert_idx is None and last_table_row is not None:
+        insert_idx = last_table_row
+
+    if insert_idx is None:
+        return False
+
+    new_rows = []
+    for name, info in missing_fields:
+        typ = info["type"]
+        example = info["example"] or ""
+        count = info["count"]
+        new_rows.append(
+            f"| `{name}` | {typ} | {example} | "
+            f"（新发现字段，{count}/{LIMIT} 条记录中出现） |"
+        )
+
+    for row in reversed(new_rows):
+        lines.insert(insert_idx + 1, row)
+
+    with open(md_path, "w", encoding="utf-8") as f:
+        f.write("\n".join(lines))
+    return True
+
+
+def generate_report(results):
+    """生成最终的 JSON vs MD 对比报告"""
+    lines = []
+    lines.append("# API JSON 字段 vs MD 文档对比报告")
+    lines.append("")
+    lines.append(f"生成时间：{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} (Asia/Shanghai)")
+    lines.append(f"数据范围：{START_TIME} ~ {END_TIME}")
+    lines.append(f"每接口获取：{LIMIT} 条")
+    lines.append("")
+
+    # 汇总
+    ok = sum(1 for r in results if r["status"] == "ok")
+    gap = sum(1 for r in results if r["status"] == "gap")
+    skip = sum(1 for r in results if r["status"] == "skipped")
+    err = sum(1 for r in results if r["status"] == "error")
+
+    lines.append("## 汇总")
+    lines.append("")
+    lines.append("| 状态 | 数量 |")
+    lines.append("|------|------|")
+    lines.append(f"| ✅ 完全一致 | {ok} |")
+    lines.append(f"| ⚠️ 有新字段（已补充） | {gap} |")
+    lines.append(f"| ⏭️ 跳过 | {skip} |")
+    lines.append(f"| 💥 错误 | {err} |")
+    lines.append(f"| 合计 | {len(results)} |")
+    lines.append("")
+
+    # 各接口详情
+    lines.append("## 各接口详情")
+    lines.append("")
+
+    for r in results:
+        icon = {"ok": "✅", "gap": "⚠️", "skipped": "⏭️", "error": "💥"}.get(r["status"], "❓")
+        lines.append(f"### {r['table']} ({r.get('name_zh', '')})")
+        lines.append("")
+        lines.append(f"| 项目 | 值 |")
+        lines.append(f"|------|-----|")
+        lines.append(f"| 状态 | {icon} {r['status']} |")
+        lines.append(f"| 获取记录数 | {r['record_count']} |")
+        lines.append(f"| JSON 顶层字段数 | {r['json_field_count']} |")
+        lines.append(f"| MD 响应字段数 | {r['md_field_count']} |")
+        lines.append(f"| 数据路径 | `{r.get('data_path', 'N/A')}` |")
+        if r.get("top5_field_counts"):
+            lines.append(f"| 前5条最全记录字段数 | {r['top5_field_counts']} |")
+        lines.append("")
+
+        if r.get("missing_in_md"):
+            lines.append("新发现字段（已补充到 MD）：")
+            lines.append("")
+            lines.append("| 字段名 | 类型 | 示例 | 出现次数 |")
+            lines.append("|--------|------|------|----------|")
+            for name, info in r["missing_in_md"]:
+                lines.append(f"| `{name}` | {info['type']} | {info.get('example', '')} | {info['count']} |")
+            lines.append("")
+
+        if r.get("extra_in_md"):
+            lines.append(f"MD 中有但本次 JSON 未出现的字段（可能为条件性字段）：`{'`, `'.join(r['extra_in_md'])}`")
+            lines.append("")
+
+        # 嵌套对象子字段汇总
+        if r.get("nested_summary"):
+            for parent, count in r["nested_summary"].items():
+                lines.append(f"嵌套对象 `{parent}` 含 {count} 个子字段（MD 中已记录为 object，不逐字段展开）")
+            lines.append("")
+
+    # 附录：siteProfile 通用字段参考
+    lines.append("## 附录：siteProfile 通用字段参考")
+    lines.append("")
+    lines.append("以下字段在大多数接口的 `siteProfile` 嵌套对象中出现，为门店信息快照（冗余），各接口结构一致：")
+    lines.append("")
+    lines.append("| 字段 | 类型 | 说明 |")
+    lines.append("|------|------|------|")
+    lines.append("| `id` | integer | 门店 ID |")
+    lines.append("| `org_id` | integer | 组织 ID |")
+    lines.append("| `shop_name` | string | 门店名称 |")
+    lines.append("| `avatar` | string | 门店头像 URL |")
+    lines.append("| `business_tel` | string | 门店电话 |")
+    lines.append("| `full_address` | string | 完整地址 |")
+    lines.append("| `address` | string | 简短地址 |")
+    lines.append("| `longitude` | number | 经度 |")
+    lines.append("| `latitude` | number | 纬度 |")
+    lines.append("| `tenant_site_region_id` | integer | 区域 ID |")
+    lines.append("| `tenant_id` | integer | 租户 ID |")
+    lines.append("| `auto_light` | integer | 自动开灯 |")
+    lines.append("| `attendance_distance` | integer | 考勤距离 |")
+    lines.append("| `attendance_enabled` | integer | 考勤启用 |")
+    lines.append("| `wifi_name` | string | WiFi 名称 |")
+    lines.append("| `wifi_password` | string | WiFi 密码 |")
+    lines.append("| `customer_service_qrcode` | string | 客服二维码 |")
+    lines.append("| `customer_service_wechat` | string | 客服微信 |")
+    lines.append("| `fixed_pay_qrCode` | string | 固定支付二维码 |")
+    lines.append("| `prod_env` | integer | 生产环境标识 |")
+    lines.append("| `light_status` | integer | 灯光状态 |")
+    lines.append("| `light_type` | integer | 灯光类型 |")
+    lines.append("| `light_token` | string | 灯光控制 token |")
+    lines.append("| `site_type` | integer | 门店类型 |")
+    lines.append("| `site_label` | string | 门店标签 |")
+    lines.append("| `shop_status` | integer | 门店状态 |")
+    lines.append("")
+
+    return "\n".join(lines)
+
+
+def main():
+    registry = load_registry()
+    print(f"加载 API 注册表: {len(registry)} 个端点")
+    print(f"时间范围: {START_TIME} ~ {END_TIME}")
+    print(f"每接口获取: {LIMIT} 条")
+    print("=" * 80)
+
+    results = []
+
+    for entry in registry:
+        table_name = entry["id"]
+        name_zh = entry.get("name_zh", "")
+        module = entry["module"]
+        action = entry["action"]
+        skip = entry.get("skip", False)
+
+        print(f"\n{'─' * 60}")
+        print(f"[{table_name}] {name_zh} — {module}/{action}")
+
+        if skip:
+            print("  ⏭️ 跳过")
+            results.append({
+                "table": table_name, "name_zh": name_zh,
+                "status": "skipped", "record_count": 0,
+                "json_field_count": 0, "md_field_count": 0,
+                "data_path": entry.get("data_path"),
+            })
+            continue
+
+        # 使用已有的 raw JSON（上一步已获取）
+        raw_path = os.path.join(SAMPLES_DIR, f"{table_name}_raw.json")
+        if os.path.exists(raw_path):
+            with open(raw_path, "r", encoding="utf-8") as f:
+                raw = json.load(f)
+            print(f"  使用已缓存的原始响应")
+        else:
+            body = build_body(entry)
+            print(f"  请求: POST {module}/{action}")
+            raw = call_api(module, action, body)
+            if raw:
+                with open(raw_path, "w", encoding="utf-8") as f:
+                    json.dump(raw, f, ensure_ascii=False, indent=2)
+
+        if raw is None:
+            results.append({
+                "table": table_name, "name_zh": name_zh,
+                "status": "error", "record_count": 0,
+                "json_field_count": 0, "md_field_count": 0,
+                "data_path": entry.get("data_path"),
+            })
+            continue
+
+        records = unwrap_records(raw, entry)
+        print(f"  记录数: {len(records)}")
+
+        if not records:
+            results.append({
+                "table": table_name, "name_zh": name_zh,
+                "status": "ok", "record_count": 0,
+                "json_field_count": 0, "md_field_count": 0,
+                "data_path": entry.get("data_path"),
+            })
+            continue
+
+        # 选出字段最全的前 5 条
+        top5 = select_top5_richest(records)
+        top5_counts = [len(r) for r in top5]
+        print(f"  前 5 条最全记录顶层字段数: {top5_counts}")
+
+        # 保存前 5 条样本
+        save_top5_sample(table_name, top5)
+
+        # 收集所有顶层字段
+        json_fields = collect_all_top_fields(records)
+        print(f"  JSON 顶层字段数: {len(json_fields)}")
+
+        # 收集嵌套对象子字段（仅用于报告，不用于对比）
+        nested_summary = {}
+        for name, info in json_fields.items():
+            if info["type"] == "object" and name in KNOWN_NESTED_OBJECTS:
+                nested = collect_nested_fields(records, name)
+                nested_summary[name] = len(nested)
+
+        # 提取 MD 响应字段
+        md_fields, md_nested, md_content = extract_md_response_fields(table_name)
+        print(f"  MD 响应字段数: {len(md_fields)}")
+
+        # 对比
+        missing_in_md, extra_in_md = compare_fields(json_fields, md_fields, md_nested, table_name)
+
+        # 过滤掉已知嵌套对象（MD 中已记录为 object）
+        real_missing = [(n, i) for n, i in missing_in_md
+                        if n not in KNOWN_NESTED_OBJECTS or n not in md_fields]
+
+        status = "ok" if not real_missing else "gap"
+
+        if real_missing:
+            print(f"  ⚠️ 发现 {len(real_missing)} 个新字段:")
+            for name, info in real_missing:
+                print(f"     + {name} ({info['type']}, {info['count']}次)")
+            # 补充到 MD
+            updated = update_md_with_missing_fields(table_name, real_missing, md_content)
+            if updated:
+                print(f"  📝 已补充到 MD 文档")
+        else:
+            print(f"  ✅ 字段完全覆盖")
+
+        if extra_in_md:
+            print(f"  ℹ️ MD 多 {len(extra_in_md)} 个条件性字段")
+
+        results.append({
+            "table": table_name, "name_zh": name_zh,
+            "status": status,
+            "record_count": len(records),
+            "json_field_count": len(json_fields),
+            "md_field_count": len(md_fields),
+            "data_path": entry.get("data_path"),
+            "missing_in_md": real_missing,
+            "extra_in_md": extra_in_md,
+            "top5_field_counts": top5_counts,
+            "nested_summary": nested_summary,
+        })
+
+    # ── 生成报告 ──
+    print(f"\n{'=' * 80}")
+    print("生成对比报告...")
+
+    report = generate_report(results)
+    os.makedirs(REPORT_DIR, exist_ok=True)
+    report_path = os.path.join(REPORT_DIR, "api_json_vs_md_report_20260214.md")
+    with open(report_path, "w", encoding="utf-8") as f:
+        f.write(report)
+    print(f"报告: {report_path}")
+
+    # JSON 详细结果
+    json_path = os.path.join(REPORT_DIR, "api_refresh_detail_20260214.json")
+    serializable = []
+    for r in results:
+        sr = dict(r)
+        if "missing_in_md" in sr and sr["missing_in_md"]:
+            sr["missing_in_md"] = [(n, {"type": i["type"], "count": i["count"]})
+                                    for n, i in sr["missing_in_md"]]
+        serializable.append(sr)
+    with open(json_path, "w", encoding="utf-8") as f:
+        json.dump(serializable, f, ensure_ascii=False, indent=2)
+
+    # 汇总
+    ok = sum(1 for r in results if r["status"] == "ok")
+    gap = sum(1 for r in results if r["status"] == "gap")
+    skip = sum(1 for r in results if r["status"] == "skipped")
+    err = sum(1 for r in results if r["status"] == "error")
+    print(f"\n汇总: ✅ {ok} | ⚠️ {gap} | ⏭️ {skip} | 💥 {err}")
+
+
+if __name__ == "__main__":
+    main()
--- a/apps/etl/pipelines/feiqiu/scripts/gen_audit_dashboard.py
+++ b/apps/etl/pipelines/feiqiu/scripts/gen_audit_dashboard.py
@@ -0,0 +1,488 @@
+#!/usr/bin/env python3
+"""审计一览表生成脚本 — 解析模块
+
+从 docs/audit/changes/ 目录扫描审计源记录 Markdown 文件，
+提取结构化信息（日期、标题、修改文件、风险等级、变更类型、影响模块）。
+"""
+
+from __future__ import annotations
+
+import os
+import re
+from dataclasses import dataclass, field
+from pathlib import Path
+
+# ---------------------------------------------------------------------------
+# 常量
+# ---------------------------------------------------------------------------
+
+# 文件名格式：YYYY-MM-DD__slug.md
+_FILENAME_RE = re.compile(r"^(\d{4}-\d{2}-\d{2})__(.+)\.md$")
+
+# 文件路径 → 功能模块映射（按最长前缀优先匹配）
+MODULE_MAP: dict[str, str] = {
+    "api/": "API 层",
+    "tasks/ods": "ODS 层",
+    "tasks/dwd": "DWD 层",
+    "tasks/dws": "DWS 层",
+    "tasks/index": "指数算法",
+    "loaders/": "数据装载",
+    "database/": "数据库",
+    "orchestration/": "调度",
+    "config/": "配置",
+    "cli/": "CLI",
+    "models/": "模型",
+    "scd/": "SCD2",
+    "docs/": "文档",
+    "scripts/": "脚本工具",
+    "tests/": "测试",
+    "quality/": "质量校验",
+    "gui/": "GUI",
+    "utils/": "工具库",
+}
+
+# 按前缀长度降序排列，确保最长前缀优先匹配
+_SORTED_PREFIXES: list[tuple[str, str]] = sorted(
+    MODULE_MAP.items(), key=lambda kv: len(kv[0]), reverse=True
+)
+
+# 所有合法模块名称（含兜底"其他"）
+VALID_MODULES: frozenset[str] = frozenset(MODULE_MAP.values()) | {"其他"}
+
+
+# ---------------------------------------------------------------------------
+# 数据类
+# ---------------------------------------------------------------------------
+
+@dataclass
+class AuditEntry:
+    """从单个审计源记录文件解析出的结构化数据"""
+
+    date: str                              # YYYY-MM-DD，从文件名提取
+    slug: str                              # 文件名中 __ 后的标识符
+    title: str                             # Markdown 一级标题
+    filename: str                          # 源文件名（不含路径）
+    changed_files: list[str] = field(default_factory=list)   # 修改的文件路径列表
+    modules: set[str] = field(default_factory=set)           # 影响的功能模块集合
+    risk_level: str = "未知"               # 风险等级：高/中/低/极低
+    change_type: str = "功能"              # 变更类型：bugfix/功能/文档/重构/清理
+
+
+# ---------------------------------------------------------------------------
+# 模块分类
+# ---------------------------------------------------------------------------
+
+def classify_module(filepath: str) -> str:
+    """根据 MODULE_MAP 将文件路径映射到功能模块。
+
+    匹配规则：按前缀长度降序逐一比较，首个命中即返回。
+    无任何前缀命中时返回 "其他"。
+    """
+    # 统一为正斜杠，去除前导 ./ 或 /
+    normalized = filepath.replace("\\", "/").lstrip("./")
+    for prefix, module_name in _SORTED_PREFIXES:
+        if normalized.startswith(prefix):
+            return module_name
+    return "其他"
+
+
+# ---------------------------------------------------------------------------
+# 解析辅助函数
+# ---------------------------------------------------------------------------
+
+def _extract_title(content: str) -> str | None:
+    """从 Markdown 内容中提取第一个一级标题（# ...）。"""
+    for line in content.splitlines():
+        stripped = line.strip()
+        if stripped.startswith("# "):
+            return stripped[2:].strip()
+    return None
+
+
+# 匹配"修改文件清单"/"文件清单"/"Changed"/"变更范围"/"变更摘要" 等章节标题
+_FILE_SECTION_RE = re.compile(
+    r"^##\s+.*(修改文件|文件清单|Changed|变更范围|变更摘要).*$",
+    re.IGNORECASE,
+)
+
+# 从表格行提取文件路径：| `path` | ... 或 | path | ...
+_TABLE_FILE_RE = re.compile(
+    r"^\|\s*`?([^`|]+?)`?\s*\|"
+)
+
+# 从列表行提取文件路径：- path 或 - `path`（忽略纯描述行）
+_LIST_FILE_RE = re.compile(
+    r"^[-*]\s+`?([^\s`(]+\.[a-zA-Z0-9_]+)`?"
+)
+
+# 从含 → 的行提取源路径和目标路径
+_ARROW_PATH_RE = re.compile(
+    r"`([^`]+?)`\s*→\s*`([^`]+?)`"
+)
+
+# 子章节标题（### ...），用于在文件清单章节内继续扫描
+_SUB_HEADING_RE = re.compile(r"^###\s+")
+
+
+def _extract_changed_files(content: str) -> list[str]:
+    """从审计文件内容中提取修改文件路径列表。
+
+    扫描策略：
+    1. 找到"修改文件清单"/"文件清单"/"Changed"/"变更范围"等二级章节
+    2. 在该章节内解析表格行和列表行中的文件路径
+    3. 遇到下一个同级（##）章节时停止
+    """
+    lines = content.splitlines()
+    results: list[str] = []
+    in_section = False
+
+    for line in lines:
+        stripped = line.strip()
+
+        if _FILE_SECTION_RE.match(stripped):
+            in_section = True
+            continue
+
+        # 遇到下一个二级章节，退出扫描
+        if in_section and stripped.startswith("## ") and not _FILE_SECTION_RE.match(stripped):
+            break
+
+        if not in_section:
+            continue
+
+        # 跳过表头分隔行
+        if re.match(r"^\|[-\s|:]+\|$", stripped):
+            continue
+
+        # 跳过子章节标题（### 新增文件 等），但继续扫描
+        if _SUB_HEADING_RE.match(stripped):
+            continue
+
+        # 尝试表格行
+        m = _TABLE_FILE_RE.match(stripped)
+        if m:
+            path = m.group(1).strip()
+            # 排除表头行（"文件"、"文件/对象" 等）
+            if path and not re.match(r"^(文件|File|路径|对象)", path, re.IGNORECASE):
+                results.append(path)
+            continue
+
+        # 尝试含 → 的移动/重命名行（提取源和目标路径）
+        m_arrow = _ARROW_PATH_RE.search(stripped)
+        if m_arrow:
+            src, dst = m_arrow.group(1).strip(), m_arrow.group(2).strip()
+            if "/" in src:
+                results.append(src)
+            if "/" in dst:
+                results.append(dst)
+            continue
+
+        # 尝试列表行
+        m = _LIST_FILE_RE.match(stripped)
+        if m:
+            path = m.group(1).strip()
+            if path and "/" in path:
+                results.append(path)
+            continue
+
+    return results
+
+
+# 风险等级关键词（按优先级排列）
+_RISK_KEYWORDS: list[tuple[str, str]] = [
+    ("极低", "极低"),
+    ("低", "低"),
+    ("中", "中"),
+    ("高", "高"),
+]
+
+# 匹配风险相关章节标题
+_RISK_SECTION_RE = re.compile(
+    r"^##\s+.*(风险|Risk).*$", re.IGNORECASE
+)
+
+
+def _extract_risk_level(content: str) -> str:
+    """从审计文件内容中提取风险等级。
+
+    扫描策略（按优先级）：
+    1. 头部元数据行：`- 风险等级：低` 或 `- 风险：极低`
+    2. 风险相关二级章节内的关键词
+    3. 兜底：全文搜索含"风险"的行
+    """
+    lines = content.splitlines()
+
+    # 策略 1：头部元数据（通常在前 15 行内）
+    _meta_risk_re = re.compile(r"^-\s*风险[等级]*[：:]\s*(.+)$")
+    for line in lines[:15]:
+        m = _meta_risk_re.match(line.strip())
+        if m:
+            val = m.group(1)
+            if "极低" in val:
+                return "极低"
+            if "高" in val:
+                return "高"
+            if "中" in val:
+                return "中"
+            if "低" in val:
+                return "低"
+
+    # 策略 2：风险相关二级章节
+    in_section = False
+    section_text = ""
+    for line in lines:
+        stripped = line.strip()
+        if _RISK_SECTION_RE.match(stripped):
+            in_section = True
+            continue
+        if in_section and stripped.startswith("## "):
+            break
+        if in_section:
+            section_text += stripped + " "
+
+    # 策略 3：兜底全文搜索含"风险"的行
+    if not section_text:
+        for line in lines:
+            if "风险" in line:
+                section_text += line.strip() + " "
+
+    if not section_text:
+        return "未知"
+
+    # 按优先级匹配：先检查"极低"，再检查独立的"高/中/低"
+    if "极低" in section_text:
+        return "极低"
+    if re.search(r"风险[：:]\s*高|高风险", section_text):
+        return "高"
+    if re.search(r"风险[：:]\s*中|中等风险", section_text):
+        return "中"
+    # "纯文档" 等描述中含"低"但不含"极低"时匹配为"低"
+    if re.search(r"风险[：:]\s*低|低风险|风险.*低", section_text):
+        return "低"
+
+    # 推断：描述中含"纯文档/无运行时影响/纯分析"等表述视为极低
+    if re.search(r"纯文档|无运行时影响|纯分析|无逻辑改动|无代码", section_text):
+        return "极低"
+
+    return "未知"
+
+
+# 变更类型推断关键词
+_CHANGE_TYPE_PATTERNS: list[tuple[str, str]] = [
+    ("bugfix", "bugfix"),
+    ("bug", "bugfix"),
+    ("修复", "bugfix"),
+    ("重构", "重构"),
+    ("清理", "清理"),
+    ("纯文档", "文档"),
+    ("无逻辑改动", "文档"),
+    ("文档", "文档"),
+]
+
+
+def _infer_change_type(content: str) -> str:
+    """从审计文件内容推断变更类型。
+
+    按优先级扫描关键词，首个命中即返回。
+    默认返回 "功能"。
+    """
+    lower = content.lower()
+    for keyword, ctype in _CHANGE_TYPE_PATTERNS:
+        if keyword in lower:
+            return ctype
+    return "功能"
+
+
+# ---------------------------------------------------------------------------
+# 核心解析函数
+# ---------------------------------------------------------------------------
+
+def parse_audit_file(filepath: str | Path) -> AuditEntry | None:
+    """解析单个审计源记录文件，返回 AuditEntry。
+
+    文件名必须符合 YYYY-MM-DD__slug.md 格式，否则返回 None 并打印警告。
+    """
+    filepath = Path(filepath)
+    filename = filepath.name
+
+    # 校验文件名格式
+    m = _FILENAME_RE.match(filename)
+    if not m:
+        print(f"[警告] 文件名格式不符，已跳过：{filename}")
+        return None
+
+    date_str = m.group(1)
+    slug = m.group(2)
+
+    # 读取文件内容
+    try:
+        content = filepath.read_text(encoding="utf-8")
+    except (UnicodeDecodeError, OSError) as exc:
+        print(f"[警告] 无法读取文件，已跳过：{filename}（{exc}）")
+        return None
+
+    # 提取标题（缺失时用 slug 兜底）
+    title = _extract_title(content) or slug
+
+    # 提取修改文件列表
+    changed_files = _extract_changed_files(content)
+
+    # 推导影响模块
+    if changed_files:
+        modules = {classify_module(f) for f in changed_files}
+    else:
+        modules = {"其他"}
+
+    # 提取风险等级
+    risk_level = _extract_risk_level(content)
+
+    # 推断变更类型
+    change_type = _infer_change_type(content)
+
+    return AuditEntry(
+        date=date_str,
+        slug=slug,
+        title=title,
+        filename=filename,
+        changed_files=changed_files,
+        modules=modules,
+        risk_level=risk_level,
+        change_type=change_type,
+    )
+
+
+def scan_audit_dir(dirpath: str | Path) -> list[AuditEntry]:
+    """扫描审计目录，返回按日期倒序排列的 AuditEntry 列表。
+
+    跳过非 .md 文件和格式不合规的文件。
+    目录为空或不存在时返回空列表。
+    """
+    dirpath = Path(dirpath)
+    if not dirpath.is_dir():
+        return []
+
+    entries: list[AuditEntry] = []
+    for child in sorted(dirpath.iterdir()):
+        if not child.is_file() or child.suffix != ".md":
+            continue
+        entry = parse_audit_file(child)
+        if entry is not None:
+            entries.append(entry)
+
+    # 按日期倒序
+    entries.sort(key=lambda e: e.date, reverse=True)
+    return entries
+
+
+# ---------------------------------------------------------------------------
+# 渲染函数
+# ---------------------------------------------------------------------------
+
+def render_timeline_table(entries: list[AuditEntry]) -> str:
+    """按时间倒序生成 Markdown 表格。
+
+    输入的 entries 应已按日期倒序排列（由 scan_audit_dir 保证）。
+    空列表时返回"暂无审计记录"提示。
+    """
+    if not entries:
+        return "> 暂无审计记录\n"
+
+    lines: list[str] = [
+        "| 日期 | 需求摘要 | 变更类型 | 影响模块 | 风险 | 详情 |",
+        "|------|----------|----------|----------|------|------|",
+    ]
+    for e in entries:
+        modules_str = ", ".join(sorted(e.modules))
+        link = f"[链接](changes/{e.filename})"
+        lines.append(
+            f"| {e.date} | {e.title} | {e.change_type} | {modules_str} | {e.risk_level} | {link} |"
+        )
+    return "\n".join(lines) + "\n"
+
+
+def render_module_index(entries: list[AuditEntry]) -> str:
+    """按模块分组生成 Markdown 章节。
+
+    每个模块一个三级标题 + 表格，模块按字母序排列。
+    空列表时返回"暂无审计记录"提示。
+    """
+    if not entries:
+        return "> 暂无审计记录\n"
+
+    # 按模块分组
+    module_entries: dict[str, list[AuditEntry]] = {}
+    for e in entries:
+        for mod in e.modules:
+            module_entries.setdefault(mod, []).append(e)
+
+    sections: list[str] = []
+    for mod in sorted(module_entries.keys()):
+        mod_list = module_entries[mod]
+        section_lines: list[str] = [
+            f"### {mod}",
+            "",
+            "| 日期 | 需求摘要 | 变更类型 | 风险 | 详情 |",
+            "|------|----------|----------|------|------|",
+        ]
+        for e in mod_list:
+            link = f"[链接](changes/{e.filename})"
+            section_lines.append(
+                f"| {e.date} | {e.title} | {e.change_type} | {e.risk_level} | {link} |"
+            )
+        sections.append("\n".join(section_lines) + "\n")
+
+    return "\n".join(sections)
+
+
+def render_dashboard(entries: list[AuditEntry]) -> str:
+    """组合时间线和模块索引生成完整 dashboard Markdown 文档。
+
+    包含：标题、生成时间戳、时间线视图、模块索引视图。
+    """
+    from datetime import datetime
+
+    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+
+    parts: list[str] = [
+        "# 审计一览表",
+        "",
+        f"> 自动生成于 {timestamp}，请勿手动编辑。",
+        "",
+        "## 时间线视图",
+        "",
+        render_timeline_table(entries),
+        "## 模块索引",
+        "",
+        render_module_index(entries),
+    ]
+    return "\n".join(parts)
+
+
+# ---------------------------------------------------------------------------
+# 主入口
+# ---------------------------------------------------------------------------
+
+def main() -> None:
+    """扫描审计源记录 → 解析 → 渲染 → 写入 audit_dashboard.md。"""
+    audit_dir = Path("docs/audit/changes")
+    output_path = Path("docs/audit/audit_dashboard.md")
+
+    # 扫描并解析
+    entries = scan_audit_dir(audit_dir)
+
+    # 渲染完整 dashboard
+    content = render_dashboard(entries)
+
+    # 确保输出目录存在
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+
+    # 写入文件
+    output_path.write_text(content, encoding="utf-8")
+
+    # 输出摘要
+    print(f"已解析 {len(entries)} 条审计记录")
+    print(f"输出文件：{output_path}")
+
+
+if __name__ == "__main__":
+    main()
--- a/apps/etl/pipelines/feiqiu/scripts/ods_columns.json
+++ b/apps/etl/pipelines/feiqiu/scripts/ods_columns.json
@@ -0,0 +1,983 @@
+{
+  "assistant_accounts_master": [
+    "id",
+    "tenant_id",
+    "site_id",
+    "assistant_no",
+    "nickname",
+    "real_name",
+    "mobile",
+    "team_id",
+    "team_name",
+    "user_id",
+    "level",
+    "assistant_status",
+    "work_status",
+    "leave_status",
+    "entry_time",
+    "resign_time",
+    "start_time",
+    "end_time",
+    "create_time",
+    "update_time",
+    "order_trade_no",
+    "staff_id",
+    "staff_profile_id",
+    "system_role_id",
+    "avatar",
+    "birth_date",
+    "gender",
+    "height",
+    "weight",
+    "job_num",
+    "show_status",
+    "show_sort",
+    "sum_grade",
+    "assistant_grade",
+    "get_grade_times",
+    "introduce",
+    "video_introduction_url",
+    "group_id",
+    "group_name",
+    "shop_name",
+    "charge_way",
+    "entry_type",
+    "allow_cx",
+    "is_guaranteed",
+    "salary_grant_enabled",
+    "light_status",
+    "online_status",
+    "is_delete",
+    "cx_unit_price",
+    "pd_unit_price",
+    "last_table_id",
+    "last_table_name",
+    "person_org_id",
+    "serial_number",
+    "is_team_leader",
+    "criticism_status",
+    "last_update_name",
+    "ding_talk_synced",
+    "site_light_cfg_id",
+    "light_equipment_id",
+    "entry_sign_status",
+    "resign_sign_status",
+    "source_file",
+    "source_endpoint",
+    "fetched_at",
+    "payload",
+    "content_hash"
+  ],
+  "assistant_cancellation_records": [
+    "id",
+    "siteid",
+    "siteprofile",
+    "assistantname",
+    "assistantabolishamount",
+    "assistanton",
+    "pdchargeminutes",
+    "tableareaid",
+    "tablearea",
+    "tableid",
+    "tablename",
+    "trashreason",
+    "createtime",
+    "source_file",
+    "source_endpoint",
+    "fetched_at",
+    "payload",
+    "content_hash",
+    "tenant_id"
+  ],
+  "assistant_service_records": [
+    "id",
+    "tenant_id",
+    "site_id",
+    "siteprofile",
+    "site_table_id",
+    "order_settle_id",
+    "order_trade_no",
+    "order_pay_id",
+    "order_assistant_id",
+    "order_assistant_type",
+    "assistantname",
+    "assistantno",
+    "assistant_level",
+    "levelname",
+    "site_assistant_id",
+    "skill_id",
+    "skillname",
+    "system_member_id",
+    "tablename",
+    "tenant_member_id",
+    "user_id",
+    "assistant_team_id",
+    "nickname",
+    "ledger_name",
+    "ledger_group_name",
+    "ledger_amount",
+    "ledger_count",
+    "ledger_unit_price",
+    "ledger_status",
+    "ledger_start_time",
+    "ledger_end_time",
+    "manual_discount_amount",
+    "member_discount_amount",
+    "coupon_deduct_money",
+    "service_money",
+    "projected_income",
+    "real_use_seconds",
+    "income_seconds",
+    "start_use_time",
+    "last_use_time",
+    "create_time",
+    "is_single_order",
+    "is_delete",
+    "is_trash",
+    "trash_reason",
+    "trash_applicant_id",
+    "trash_applicant_name",
+    "operator_id",
+    "operator_name",
+    "salesman_name",
+    "salesman_org_id",
+    "salesman_user_id",
+    "person_org_id",
+    "add_clock",
+    "returns_clock",
+    "composite_grade",
+    "composite_grade_time",
+    "skill_grade",
+    "service_grade",
+    "sum_grade",
+    "grade_status",
+    "get_grade_times",
+    "is_not_responding",
+    "is_confirm",
+    "payload",
+    "source_file",
+    "source_endpoint",
+    "fetched_at",
+    "content_hash",
+    "assistantteamname",
+    "real_service_money"
+  ],
+  "goods_stock_movements": [
+    "sitegoodsstockid",
+    "tenantid",
+    "siteid",
+    "sitegoodsid",
+    "goodsname",
+    "goodscategoryid",
+    "goodssecondcategoryid",
+    "unit",
+    "price",
+    "stocktype",
+    "changenum",
+    "startnum",
+    "endnum",
+    "changenuma",
+    "startnuma",
+    "endnuma",
+    "remark",
+    "operatorname",
+    "createtime",
+    "source_file",
+    "source_endpoint",
+    "fetched_at",
+    "payload",
+    "content_hash"
+  ],
+  "goods_stock_summary": [
+    "sitegoodsid",
+    "goodsname",
+    "goodsunit",
+    "goodscategoryid",
+    "goodscategorysecondid",
+    "categoryname",
+    "rangestartstock",
+    "rangeendstock",
+    "rangein",
+    "rangeout",
+    "rangesale",
+    "rangesalemoney",
+    "rangeinventory",
+    "currentstock",
+    "source_file",
+    "source_endpoint",
+    "fetched_at",
+    "payload",
+    "content_hash"
+  ],
+  "group_buy_packages": [
+    "id",
+    "package_id",
+    "package_name",
+    "selling_price",
+    "coupon_money",
+    "date_type",
+    "date_info",
+    "start_time",
+    "end_time",
+    "start_clock",
+    "end_clock",
+    "add_start_clock",
+    "add_end_clock",
+    "duration",
+    "usable_count",
+    "usable_range",
+    "table_area_id",
+    "table_area_name",
+    "table_area_id_list",
+    "tenant_table_area_id",
+    "tenant_table_area_id_list",
+    "site_id",
+    "site_name",
+    "tenant_id",
+    "card_type_ids",
+    "group_type",
+    "system_group_type",
+    "type",
+    "effective_status",
+    "is_enabled",
+    "is_delete",
+    "max_selectable_categories",
+    "area_tag_type",
+    "creator_name",
+    "create_time",
+    "source_file",
+    "source_endpoint",
+    "fetched_at",
+    "payload",
+    "content_hash",
+    "is_first_limit",
+    "sort",
+    "tenantcouponsaleorderitemid"
+  ],
+  "group_buy_redemption_records": [
+    "id",
+    "tenant_id",
+    "site_id",
+    "sitename",
+    "table_id",
+    "tablename",
+    "tableareaname",
+    "tenant_table_area_id",
+    "order_trade_no",
+    "order_settle_id",
+    "order_pay_id",
+    "order_coupon_id",
+    "order_coupon_channel",
+    "coupon_code",
+    "coupon_money",
+    "coupon_origin_id",
+    "ledger_name",
+    "ledger_group_name",
+    "ledger_amount",
+    "ledger_count",
+    "ledger_unit_price",
+    "ledger_status",
+    "table_charge_seconds",
+    "promotion_activity_id",
+    "promotion_coupon_id",
+    "promotion_seconds",
+    "offer_type",
+    "assistant_promotion_money",
+    "assistant_service_promotion_money",
+    "table_service_promotion_money",
+    "goods_promotion_money",
+    "recharge_promotion_money",
+    "reward_promotion_money",
+    "goodsoptionprice",
+    "salesman_name",
+    "sales_man_org_id",
+    "salesman_role_id",
+    "salesman_user_id",
+    "operator_id",
+    "operator_name",
+    "is_single_order",
+    "is_delete",
+    "create_time",
+    "payload",
+    "source_file",
+    "source_endpoint",
+    "fetched_at",
+    "content_hash",
+    "assistant_service_share_money",
+    "assistant_share_money",
+    "coupon_sale_id",
+    "good_service_share_money",
+    "goods_share_money",
+    "member_discount_money",
+    "recharge_share_money",
+    "table_service_share_money",
+    "table_share_money"
+  ],
+  "member_balance_changes": [
+    "tenant_id",
+    "site_id",
+    "register_site_id",
+    "registersitename",
+    "paysitename",
+    "id",
+    "tenant_member_id",
+    "tenant_member_card_id",
+    "system_member_id",
+    "membername",
+    "membermobile",
+    "card_type_id",
+    "membercardtypename",
+    "account_data",
+    "before",
+    "after",
+    "refund_amount",
+    "from_type",
+    "payment_method",
+    "relate_id",
+    "remark",
+    "operator_id",
+    "operator_name",
+    "is_delete",
+    "create_time",
+    "source_file",
+    "source_endpoint",
+    "fetched_at",
+    "payload",
+    "content_hash",
+    "principal_after",
+    "principal_before",
+    "principal_data"
+  ],
+  "member_profiles": [
+    "tenant_id",
+    "register_site_id",
+    "site_name",
+    "id",
+    "system_member_id",
+    "member_card_grade_code",
+    "member_card_grade_name",
+    "mobile",
+    "nickname",
+    "point",
+    "growth_value",
+    "referrer_member_id",
+    "status",
+    "user_status",
+    "create_time",
+    "source_file",
+    "source_endpoint",
+    "fetched_at",
+    "payload",
+    "content_hash",
+    "pay_money_sum",
+    "person_tenant_org_id",
+    "person_tenant_org_name",
+    "recharge_money_sum",
+    "register_source"
+  ],
+  "member_stored_value_cards": [
+    "tenant_id",
+    "tenant_member_id",
+    "system_member_id",
+    "register_site_id",
+    "site_name",
+    "id",
+    "member_card_grade_code",
+    "member_card_grade_code_name",
+    "member_card_type_name",
+    "member_name",
+    "member_mobile",
+    "card_type_id",
+    "card_no",
+    "card_physics_type",
+    "balance",
+    "denomination",
+    "table_discount",
+    "goods_discount",
+    "assistant_discount",
+    "assistant_reward_discount",
+    "table_service_discount",
+    "assistant_service_discount",
+    "coupon_discount",
+    "goods_service_discount",
+    "assistant_discount_sub_switch",
+    "table_discount_sub_switch",
+    "goods_discount_sub_switch",
+    "assistant_reward_discount_sub_switch",
+    "table_service_deduct_radio",
+    "assistant_service_deduct_radio",
+    "goods_service_deduct_radio",
+    "assistant_deduct_radio",
+    "table_deduct_radio",
+    "goods_deduct_radio",
+    "coupon_deduct_radio",
+    "assistant_reward_deduct_radio",
+    "tablecarddeduct",
+    "tableservicecarddeduct",
+    "goodscardeduct",
+    "goodsservicecarddeduct",
+    "assistantcarddeduct",
+    "assistantservicecarddeduct",
+    "assistantrewardcarddeduct",
+    "cardsettlededuct",
+    "couponcarddeduct",
+    "deliveryfeededuct",
+    "use_scene",
+    "able_cross_site",
+    "is_allow_give",
+    "is_allow_order_deduct",
+    "is_delete",
+    "bind_password",
+    "goods_discount_range_type",
+    "goodscategoryid",
+    "tableareaid",
+    "effect_site_id",
+    "start_time",
+    "end_time",
+    "disable_start_time",
+    "disable_end_time",
+    "last_consume_time",
+    "create_time",
+    "status",
+    "sort",
+    "tenantavatar",
+    "tenantname",
+    "pdassisnatlevel",
+    "cxassisnatlevel",
+    "source_file",
+    "source_endpoint",
+    "fetched_at",
+    "payload",
+    "content_hash",
+    "able_share_member_discount",
+    "electricity_deduct_radio",
+    "electricity_discount",
+    "electricitycarddeduct",
+    "member_grade",
+    "principal_balance",
+    "rechargefreezebalance"
+  ],
+  "payment_transactions": [
+    "id",
+    "site_id",
+    "siteprofile",
+    "relate_type",
+    "relate_id",
+    "pay_amount",
+    "pay_status",
+    "pay_time",
+    "create_time",
+    "payment_method",
+    "online_pay_channel",
+    "source_file",
+    "source_endpoint",
+    "fetched_at",
+    "payload",
+    "content_hash",
+    "tenant_id"
+  ],
+  "platform_coupon_redemption_records": [
+    "id",
+    "verify_id",
+    "certificate_id",
+    "coupon_code",
+    "coupon_name",
+    "coupon_channel",
+    "groupon_type",
+    "group_package_id",
+    "sale_price",
+    "coupon_money",
+    "coupon_free_time",
+    "coupon_cover",
+    "coupon_remark",
+    "use_status",
+    "consume_time",
+    "create_time",
+    "deal_id",
+    "channel_deal_id",
+    "site_id",
+    "site_order_id",
+    "table_id",
+    "tenant_id",
+    "operator_id",
+    "operator_name",
+    "is_delete",
+    "siteprofile",
+    "source_file",
+    "source_endpoint",
+    "fetched_at",
+    "payload",
+    "content_hash"
+  ],
+  "recharge_settlements": [
+    "id",
+    "tenantid",
+    "siteid",
+    "sitename",
+    "balanceamount",
+    "cardamount",
+    "cashamount",
+    "couponamount",
+    "createtime",
+    "memberid",
+    "membername",
+    "tenantmembercardid",
+    "membercardtypename",
+    "memberphone",
+    "tableid",
+    "consumemoney",
+    "onlineamount",
+    "operatorid",
+    "operatorname",
+    "revokeorderid",
+    "revokeordername",
+    "revoketime",
+    "payamount",
+    "pointamount",
+    "refundamount",
+    "settlename",
+    "settlerelateid",
+    "settlestatus",
+    "settletype",
+    "paytime",
+    "roundingamount",
+    "paymentmethod",
+    "adjustamount",
+    "assistantcxmoney",
+    "assistantpdmoney",
+    "couponsaleamount",
+    "memberdiscountamount",
+    "tablechargemoney",
+    "goodsmoney",
+    "realgoodsmoney",
+    "servicemoney",
+    "prepaymoney",
+    "salesmanname",
+    "orderremark",
+    "salesmanuserid",
+    "canberevoked",
+    "pointdiscountprice",
+    "pointdiscountcost",
+    "activitydiscount",
+    "serialnumber",
+    "assistantmanualdiscount",
+    "allcoupondiscount",
+    "goodspromotionmoney",
+    "assistantpromotionmoney",
+    "isusecoupon",
+    "isusediscount",
+    "isactivity",
+    "isbindmember",
+    "isfirst",
+    "rechargecardamount",
+    "giftcardamount",
+    "source_file",
+    "source_endpoint",
+    "fetched_at",
+    "payload",
+    "content_hash",
+    "electricityadjustmoney",
+    "electricitymoney",
+    "mervousalesamount",
+    "plcouponsaleamount",
+    "realelectricitymoney"
+  ],
+  "refund_transactions": [
+    "id",
+    "tenant_id",
+    "tenantname",
+    "site_id",
+    "siteprofile",
+    "relate_type",
+    "relate_id",
+    "pay_sn",
+    "pay_amount",
+    "refund_amount",
+    "round_amount",
+    "pay_status",
+    "pay_time",
+    "create_time",
+    "payment_method",
+    "pay_terminal",
+    "pay_config_id",
+    "online_pay_channel",
+    "online_pay_type",
+    "channel_fee",
+    "channel_payer_id",
+    "channel_pay_no",
+    "member_id",
+    "member_card_id",
+    "cashier_point_id",
+    "operator_id",
+    "action_type",
+    "check_status",
+    "is_revoke",
+    "is_delete",
+    "balance_frozen_amount",
+    "card_frozen_amount",
+    "source_file",
+    "source_endpoint",
+    "fetched_at",
+    "payload",
+    "content_hash"
+  ],
+  "settlement_records": [
+    "id",
+    "tenantid",
+    "siteid",
+    "sitename",
+    "balanceamount",
+    "cardamount",
+    "cashamount",
+    "couponamount",
+    "createtime",
+    "memberid",
+    "membername",
+    "tenantmembercardid",
+    "membercardtypename",
+    "memberphone",
+    "tableid",
+    "consumemoney",
+    "onlineamount",
+    "operatorid",
+    "operatorname",
+    "revokeorderid",
+    "revokeordername",
+    "revoketime",
+    "payamount",
+    "pointamount",
+    "refundamount",
+    "settlename",
+    "settlerelateid",
+    "settlestatus",
+    "settletype",
+    "paytime",
+    "roundingamount",
+    "paymentmethod",
+    "adjustamount",
+    "assistantcxmoney",
+    "assistantpdmoney",
+    "couponsaleamount",
+    "memberdiscountamount",
+    "tablechargemoney",
+    "goodsmoney",
+    "realgoodsmoney",
+    "servicemoney",
+    "prepaymoney",
+    "salesmanname",
+    "orderremark",
+    "salesmanuserid",
+    "canberevoked",
+    "pointdiscountprice",
+    "pointdiscountcost",
+    "activitydiscount",
+    "serialnumber",
+    "assistantmanualdiscount",
+    "allcoupondiscount",
+    "goodspromotionmoney",
+    "assistantpromotionmoney",
+    "isusecoupon",
+    "isusediscount",
+    "isactivity",
+    "isbindmember",
+    "isfirst",
+    "rechargecardamount",
+    "giftcardamount",
+    "source_file",
+    "source_endpoint",
+    "fetched_at",
+    "payload",
+    "content_hash",
+    "electricityadjustmoney",
+    "electricitymoney",
+    "mervousalesamount",
+    "plcouponsaleamount",
+    "realelectricitymoney"
+  ],
+  "site_tables_master": [
+    "id",
+    "site_id",
+    "sitename",
+    "appletQrCodeUrl",
+    "areaname",
+    "audit_status",
+    "charge_free",
+    "create_time",
+    "delay_lights_time",
+    "is_online_reservation",
+    "is_rest_area",
+    "light_status",
+    "only_allow_groupon",
+    "order_delay_time",
+    "self_table",
+    "show_status",
+    "site_table_area_id",
+    "tablestatusname",
+    "table_cloth_use_cycle",
+    "table_cloth_use_time",
+    "table_name",
+    "table_price",
+    "table_status",
+    "temporary_light_second",
+    "virtual_table",
+    "source_file",
+    "source_endpoint",
+    "fetched_at",
+    "payload",
+    "content_hash",
+    "order_id"
+  ],
+  "stock_goods_category_tree": [
+    "id",
+    "tenant_id",
+    "category_name",
+    "alias_name",
+    "pid",
+    "business_name",
+    "tenant_goods_business_id",
+    "open_salesman",
+    "categoryboxes",
+    "sort",
+    "is_warehousing",
+    "source_file",
+    "source_endpoint",
+    "fetched_at",
+    "payload",
+    "content_hash"
+  ],
+  "store_goods_master": [
+    "id",
+    "tenant_id",
+    "site_id",
+    "sitename",
+    "tenant_goods_id",
+    "goods_name",
+    "goods_bar_code",
+    "goods_category_id",
+    "goods_second_category_id",
+    "onecategoryname",
+    "twocategoryname",
+    "unit",
+    "sale_price",
+    "cost_price",
+    "cost_price_type",
+    "min_discount_price",
+    "safe_stock",
+    "stock",
+    "stock_a",
+    "sale_num",
+    "total_purchase_cost",
+    "total_sales",
+    "average_monthly_sales",
+    "batch_stock_quantity",
+    "days_available",
+    "provisional_total_cost",
+    "enable_status",
+    "audit_status",
+    "goods_state",
+    "is_delete",
+    "is_warehousing",
+    "able_discount",
+    "able_site_transfer",
+    "forbid_sell_status",
+    "freeze",
+    "send_state",
+    "custom_label_type",
+    "option_required",
+    "sale_channel",
+    "sort",
+    "remark",
+    "pinyin_initial",
+    "goods_cover",
+    "create_time",
+    "update_time",
+    "payload",
+    "source_file",
+    "source_endpoint",
+    "fetched_at",
+    "content_hash",
+    "commodity_code",
+    "not_sale"
+  ],
+  "store_goods_sales_records": [
+    "id",
+    "tenant_id",
+    "site_id",
+    "siteid",
+    "sitename",
+    "site_goods_id",
+    "tenant_goods_id",
+    "order_settle_id",
+    "order_trade_no",
+    "order_goods_id",
+    "ordergoodsid",
+    "order_pay_id",
+    "order_coupon_id",
+    "ledger_name",
+    "ledger_group_name",
+    "ledger_amount",
+    "ledger_count",
+    "ledger_unit_price",
+    "ledger_status",
+    "discount_money",
+    "discount_price",
+    "coupon_deduct_money",
+    "member_discount_amount",
+    "option_coupon_deduct_money",
+    "option_member_discount_money",
+    "point_discount_money",
+    "point_discount_money_cost",
+    "real_goods_money",
+    "cost_money",
+    "push_money",
+    "sales_type",
+    "is_single_order",
+    "is_delete",
+    "goods_remark",
+    "option_price",
+    "option_value_name",
+    "member_coupon_id",
+    "package_coupon_id",
+    "sales_man_org_id",
+    "salesman_name",
+    "salesman_role_id",
+    "salesman_user_id",
+    "operator_id",
+    "operator_name",
+    "opensalesman",
+    "returns_number",
+    "site_table_id",
+    "tenant_goods_business_id",
+    "tenant_goods_category_id",
+    "create_time",
+    "payload",
+    "source_file",
+    "source_endpoint",
+    "fetched_at",
+    "content_hash",
+    "coupon_share_money"
+  ],
+  "table_fee_discount_records": [
+    "id",
+    "tenant_id",
+    "site_id",
+    "siteprofile",
+    "site_table_id",
+    "tableprofile",
+    "tenant_table_area_id",
+    "adjust_type",
+    "ledger_amount",
+    "ledger_count",
+    "ledger_name",
+    "ledger_status",
+    "applicant_id",
+    "applicant_name",
+    "operator_id",
+    "operator_name",
+    "order_settle_id",
+    "order_trade_no",
+    "is_delete",
+    "create_time",
+    "source_file",
+    "source_endpoint",
+    "fetched_at",
+    "payload",
+    "content_hash",
+    "area_type_id",
+    "charge_free",
+    "site_table_area_id",
+    "site_table_area_name",
+    "sitename",
+    "table_name",
+    "table_price",
+    "tenant_name"
+  ],
+  "table_fee_transactions": [
+    "id",
+    "tenant_id",
+    "site_id",
+    "siteprofile",
+    "site_table_id",
+    "site_table_area_id",
+    "site_table_area_name",
+    "tenant_table_area_id",
+    "order_trade_no",
+    "order_pay_id",
+    "order_settle_id",
+    "ledger_name",
+    "ledger_amount",
+    "ledger_count",
+    "ledger_unit_price",
+    "ledger_status",
+    "ledger_start_time",
+    "ledger_end_time",
+    "start_use_time",
+    "last_use_time",
+    "real_table_use_seconds",
+    "real_table_charge_money",
+    "add_clock_seconds",
+    "adjust_amount",
+    "coupon_promotion_amount",
+    "member_discount_amount",
+    "used_card_amount",
+    "mgmt_fee",
+    "service_money",
+    "fee_total",
+    "is_single_order",
+    "is_delete",
+    "member_id",
+    "operator_id",
+    "operator_name",
+    "salesman_name",
+    "salesman_org_id",
+    "salesman_user_id",
+    "create_time",
+    "payload",
+    "source_file",
+    "source_endpoint",
+    "fetched_at",
+    "content_hash",
+    "activity_discount_amount",
+    "order_consumption_type",
+    "real_service_money"
+  ],
+  "tenant_goods_master": [
+    "id",
+    "tenant_id",
+    "goods_name",
+    "goods_bar_code",
+    "goods_category_id",
+    "goods_second_category_id",
+    "categoryname",
+    "unit",
+    "goods_number",
+    "out_goods_id",
+    "goods_state",
+    "sale_channel",
+    "able_discount",
+    "able_site_transfer",
+    "is_delete",
+    "is_warehousing",
+    "isinsite",
+    "cost_price",
+    "cost_price_type",
+    "market_price",
+    "min_discount_price",
+    "common_sale_royalty",
+    "point_sale_royalty",
+    "pinyin_initial",
+    "commoditycode",
+    "commodity_code",
+    "goods_cover",
+    "supplier_id",
+    "remark_name",
+    "create_time",
+    "update_time",
+    "payload",
+    "source_file",
+    "source_endpoint",
+    "fetched_at",
+    "content_hash",
+    "not_sale"
+  ]
+}
--- a/apps/etl/pipelines/feiqiu/scripts/rebuild/rebuild_db_and_run_ods_to_dwd.py
+++ b/apps/etl/pipelines/feiqiu/scripts/rebuild/rebuild_db_and_run_ods_to_dwd.py
@@ -0,0 +1,404 @@
+# -*- coding: utf-8 -*-
+"""
+一键重建 ETL 相关 Schema，并执行 ODS → DWD。
+
+本脚本面向“离线示例 JSON 回放”的开发/运维场景，使用当前项目内的任务实现：
+1) （可选）DROP 并重建 schema：`etl_admin` / `billiards_ods` / `billiards_dwd`
+2) 执行 `INIT_ODS_SCHEMA`：创建 `etl_admin` 元数据表 + 执行 `schema_ODS_doc.sql`（内部会做轻量清洗）
+3) 执行 `INIT_DWD_SCHEMA`：执行 `schema_dwd_doc.sql`
+4) 执行 `MANUAL_INGEST`：从本地 JSON 目录灌入 ODS
+5) 执行 `DWD_LOAD_FROM_ODS`：从 ODS 装载到 DWD
+
+用法（推荐）：
+  python -m scripts.rebuild.rebuild_db_and_run_ods_to_dwd ^
+    --dsn "postgresql://user:pwd@host:5432/db" ^
+    --store-id 1 ^
+    --json-dir "export/test-json-doc" ^
+    --drop-schemas
+
+环境变量（可选）：
+  PG_DSN、STORE_ID、INGEST_SOURCE_DIR
+
+日志：
+  默认同时输出到控制台与文件；文件路径为 `io.log_root/rebuild_db_<时间戳>.log`。
+"""
+
+from __future__ import annotations
+
+import argparse
+import logging
+import os
+import time
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+
+import psycopg2
+
+from config.settings import AppConfig
+from database.connection import DatabaseConnection
+from database.operations import DatabaseOperations
+from tasks.dwd.dwd_load_task import DwdLoadTask
+from tasks.utility.init_dwd_schema_task import InitDwdSchemaTask
+from tasks.utility.init_schema_task import InitOdsSchemaTask
+from tasks.utility.manual_ingest_task import ManualIngestTask
+
+
+DEFAULT_JSON_DIR = "export/test-json-doc"
+
+
+@dataclass(frozen=True)
+class RunArgs:
+    """脚本参数对象（用于减少散落的参数传递）。"""
+
+    dsn: str
+    store_id: int
+    json_dir: str
+    drop_schemas: bool
+    terminate_own_sessions: bool
+    demo: bool
+    only_files: list[str]
+    only_dwd_tables: list[str]
+    stop_after: str | None
+
+
+def _attach_file_logger(log_root: str | Path, filename: str, logger: logging.Logger) -> logging.Handler | None:
+    """
+    给 root logger 附加文件日志处理器（UTF-8）。
+
+    说明：
+        - 使用 root logger 是为了覆盖项目中不同命名的 logger（包含第三方/子模块）。
+        - 若创建失败仅记录 warning，不中断主流程。
+
+    返回值：
+        创建成功返回 handler（调用方负责 removeHandler/close），失败返回 None。
+    """
+    log_dir = Path(log_root)
+    try:
+        log_dir.mkdir(parents=True, exist_ok=True)
+    except Exception as exc:  # noqa: BLE001
+        logger.warning("创建日志目录失败：%s（%s）", log_dir, exc)
+        return None
+
+    log_path = log_dir / filename
+    try:
+        handler: logging.Handler = logging.FileHandler(log_path, encoding="utf-8")
+    except Exception as exc:  # noqa: BLE001
+        logger.warning("创建文件日志失败：%s（%s）", log_path, exc)
+        return None
+
+    handler.setLevel(logging.INFO)
+    handler.setFormatter(
+        logging.Formatter(
+            fmt="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
+            datefmt="%Y-%m-%d %H:%M:%S",
+        )
+    )
+    logging.getLogger().addHandler(handler)
+    logger.info("文件日志已启用：%s", log_path)
+    return handler
+
+
+def _parse_args() -> RunArgs:
+    """解析命令行/环境变量参数。"""
+    parser = argparse.ArgumentParser(description="重建 Schema 并执行 ODS→DWD（离线 JSON 回放）")
+    parser.add_argument("--dsn", default=os.environ.get("PG_DSN"), help="PostgreSQL DSN（默认读取 PG_DSN）")
+    parser.add_argument(
+        "--store-id",
+        type=int,
+        default=int(os.environ.get("STORE_ID") or 1),
+        help="门店/租户 store_id（默认读取 STORE_ID，否则为 1）",
+    )
+    parser.add_argument(
+        "--json-dir",
+        default=os.environ.get("INGEST_SOURCE_DIR") or DEFAULT_JSON_DIR,
+        help=f"示例 JSON 目录（默认 {DEFAULT_JSON_DIR}，也可读 INGEST_SOURCE_DIR）",
+    )
+    parser.add_argument(
+        "--drop-schemas",
+        action=argparse.BooleanOptionalAction,
+        default=True,
+        help="是否先 DROP 并重建 etl_admin/billiards_ods/billiards_dwd（默认：是）",
+    )
+    parser.add_argument(
+        "--terminate-own-sessions",
+        action=argparse.BooleanOptionalAction,
+        default=True,
+        help="执行 DROP 前是否终止当前用户的 idle-in-transaction 会话（默认：是）",
+    )
+    parser.add_argument(
+        "--demo",
+        action=argparse.BooleanOptionalAction,
+        default=False,
+        help="运行最小 Demo（仅导入 member_profiles 并生成 dim_member/dim_member_ex）",
+    )
+    parser.add_argument(
+        "--only-files",
+        default="",
+        help="仅处理指定 JSON 文件（逗号分隔，不含 .json，例如：member_profiles,settlement_records）",
+    )
+    parser.add_argument(
+        "--only-dwd-tables",
+        default="",
+        help="仅处理指定 DWD 表（逗号分隔，支持完整名或表名，例如：billiards_dwd.dim_member,dim_member_ex）",
+    )
+    parser.add_argument(
+        "--stop-after",
+        default="",
+        help="在指定阶段后停止（可选：DROP_SCHEMAS/INIT_ODS_SCHEMA/INIT_DWD_SCHEMA/MANUAL_INGEST/DWD_LOAD_FROM_ODS/BASIC_VALIDATE）",
+    )
+    args = parser.parse_args()
+
+    if not args.dsn:
+        raise SystemExit("缺少 DSN：请传入 --dsn 或设置环境变量 PG_DSN")
+    only_files = [x.strip().lower() for x in str(args.only_files or "").split(",") if x.strip()]
+    only_dwd_tables = [x.strip().lower() for x in str(args.only_dwd_tables or "").split(",") if x.strip()]
+    stop_after = str(args.stop_after or "").strip().upper() or None
+    return RunArgs(
+        dsn=args.dsn,
+        store_id=args.store_id,
+        json_dir=str(args.json_dir),
+        drop_schemas=bool(args.drop_schemas),
+        terminate_own_sessions=bool(args.terminate_own_sessions),
+        demo=bool(args.demo),
+        only_files=only_files,
+        only_dwd_tables=only_dwd_tables,
+        stop_after=stop_after,
+    )
+
+
+def _build_config(args: RunArgs) -> AppConfig:
+    """构建本次执行所需的最小配置覆盖。"""
+    manual_cfg: dict[str, Any] = {}
+    dwd_cfg: dict[str, Any] = {}
+    if args.demo:
+        manual_cfg["include_files"] = ["member_profiles"]
+        dwd_cfg["only_tables"] = ["billiards_dwd.dim_member", "billiards_dwd.dim_member_ex"]
+    if args.only_files:
+        manual_cfg["include_files"] = args.only_files
+    if args.only_dwd_tables:
+        dwd_cfg["only_tables"] = args.only_dwd_tables
+
+    overrides: dict[str, Any] = {
+        "app": {"store_id": args.store_id},
+        "pipeline": {"flow": "INGEST_ONLY", "ingest_source_dir": args.json_dir},
+        "manual": manual_cfg,
+        "dwd": dwd_cfg,
+        # 离线回放/建仓可能耗时较长，关闭 statement_timeout，避免被默认 30s 中断。
+        # 同时关闭 lock_timeout，避免 DROP/DDL 因锁等待稍久就直接失败。
+        "db": {"dsn": args.dsn, "session": {"statement_timeout_ms": 0, "lock_timeout_ms": 0}},
+    }
+    return AppConfig.load(overrides)
+
+
+def _drop_schemas(db: DatabaseOperations, logger: logging.Logger) -> None:
+    """删除并重建 ETL 相关 schema（具备破坏性，请谨慎）。"""
+    with db.conn.cursor() as cur:
+        # 避免因为其他会话持锁而无限等待；若确实被占用，提示用户先释放/终止阻塞会话。
+        cur.execute("SET lock_timeout TO '5s'")
+        for schema in ("billiards_dwd", "billiards_ods", "etl_admin"):
+            logger.info("DROP SCHEMA IF EXISTS %s CASCADE ...", schema)
+            cur.execute(f'DROP SCHEMA IF EXISTS "{schema}" CASCADE;')
+
+
+def _terminate_own_idle_in_tx(db: DatabaseOperations, logger: logging.Logger) -> int:
+    """终止当前用户在本库中处于 idle-in-transaction 的会话，避免阻塞 DROP/DDL。"""
+    with db.conn.cursor() as cur:
+        cur.execute(
+            """
+            SELECT pid
+            FROM pg_stat_activity
+            WHERE datname = current_database()
+              AND usename = current_user
+              AND pid <> pg_backend_pid()
+              AND state = 'idle in transaction'
+            """
+        )
+        pids = [r[0] for r in cur.fetchall()]
+        killed = 0
+        for pid in pids:
+            cur.execute("SELECT pg_terminate_backend(%s)", (pid,))
+            ok = bool(cur.fetchone()[0])
+            logger.info("终止会话 pid=%s ok=%s", pid, ok)
+            killed += 1 if ok else 0
+        return killed
+
+
+def _run_task(task, logger: logging.Logger) -> dict:
+    """统一运行任务并打印关键结果。"""
+    result = task.execute(None)
+    logger.info("%s: status=%s counts=%s", task.get_task_code(), result.get("status"), result.get("counts"))
+    return result
+
+
+def _basic_validate(db: DatabaseOperations, logger: logging.Logger) -> None:
+    """做最基础的可用性校验：schema 存在、关键表行数可查询。"""
+    checks = [
+        ("billiards_ods", "member_profiles"),
+        ("billiards_ods", "settlement_records"),
+        ("billiards_dwd", "dim_member"),
+        ("billiards_dwd", "dwd_settlement_head"),
+    ]
+    for schema, table in checks:
+        try:
+            rows = db.query(f'SELECT COUNT(1) AS cnt FROM "{schema}"."{table}"')
+            logger.info("校验行数：%s.%s = %s", schema, table, (rows[0] or {}).get("cnt") if rows else None)
+        except Exception as exc:  # noqa: BLE001
+            logger.warning("校验失败：%s.%s（%s）", schema, table, exc)
+
+
+def _connect_db_with_retry(cfg: AppConfig, logger: logging.Logger) -> DatabaseConnection:
+    """创建数据库连接（带重试），避免短暂网络抖动导致脚本直接失败。"""
+    dsn = cfg["db"]["dsn"]
+    session = cfg["db"].get("session")
+    connect_timeout = cfg["db"].get("connect_timeout_sec")
+
+    backoffs = [1, 2, 4, 8, 16]
+    last_exc: Exception | None = None
+    for attempt, wait_sec in enumerate([0] + backoffs, start=1):
+        if wait_sec:
+            time.sleep(wait_sec)
+        try:
+            return DatabaseConnection(dsn=dsn, session=session, connect_timeout=connect_timeout)
+        except Exception as exc:  # noqa: BLE001
+            last_exc = exc
+            logger.warning("数据库连接失败（第 %s 次）：%s", attempt, exc)
+    raise last_exc or RuntimeError("数据库连接失败")
+
+
+def _is_connection_error(exc: Exception) -> bool:
+    """判断是否为连接断开/服务端异常导致的可重试错误。"""
+    return isinstance(exc, (psycopg2.OperationalError, psycopg2.InterfaceError))
+
+
+def _run_stage_with_reconnect(
+    cfg: AppConfig,
+    logger: logging.Logger,
+    stage_name: str,
+    fn,
+    max_attempts: int = 3,
+) -> dict | None:
+    """
+    运行单个阶段：失败（尤其是连接断开）时自动重连并重试。
+
+    fn: (db_ops) -> dict | None
+    """
+    last_exc: Exception | None = None
+    for attempt in range(1, max_attempts + 1):
+        db_conn = _connect_db_with_retry(cfg, logger)
+        db_ops = DatabaseOperations(db_conn)
+        try:
+            logger.info("阶段开始：%s（第 %s/%s 次）", stage_name, attempt, max_attempts)
+            result = fn(db_ops)
+            logger.info("阶段完成：%s", stage_name)
+            return result
+        except Exception as exc:  # noqa: BLE001
+            last_exc = exc
+            logger.exception("阶段失败：%s（第 %s/%s 次）：%s", stage_name, attempt, max_attempts, exc)
+            # 连接类错误允许重试；非连接错误直接抛出，避免掩盖逻辑问题。
+            if not _is_connection_error(exc):
+                raise
+            time.sleep(min(2**attempt, 10))
+        finally:
+            try:
+                db_ops.close()  # type: ignore[attr-defined]
+            except Exception:
+                pass
+            try:
+                db_conn.close()
+            except Exception:
+                pass
+    raise last_exc or RuntimeError(f"阶段失败：{stage_name}")
+
+
+def main() -> int:
+    """脚本主入口：按顺序重建并跑通 ODS→DWD。"""
+    logging.basicConfig(
+        level=logging.INFO,
+        format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
+        datefmt="%Y-%m-%d %H:%M:%S",
+    )
+    logger = logging.getLogger("fq_etl.rebuild_db")
+
+    args = _parse_args()
+    cfg = _build_config(args)
+
+    # 默认启用文件日志，便于事后追溯（即便运行失败也应尽早落盘）。
+    file_handler = _attach_file_logger(
+        log_root=cfg["io"]["log_root"],
+        filename=time.strftime("rebuild_db_%Y%m%d-%H%M%S.log"),
+        logger=logger,
+    )
+
+    try:
+        json_dir = Path(args.json_dir)
+        if not json_dir.exists():
+            logger.error("示例 JSON 目录不存在：%s", json_dir)
+            return 2
+
+        def stage_drop(db_ops: DatabaseOperations):
+            if not args.drop_schemas:
+                return None
+            if args.terminate_own_sessions:
+                killed = _terminate_own_idle_in_tx(db_ops, logger)
+                if killed:
+                    db_ops.commit()
+            _drop_schemas(db_ops, logger)
+            db_ops.commit()
+            return None
+
+        def stage_init_ods(db_ops: DatabaseOperations):
+            return _run_task(InitOdsSchemaTask(cfg, db_ops, None, logger), logger)
+
+        def stage_init_dwd(db_ops: DatabaseOperations):
+            return _run_task(InitDwdSchemaTask(cfg, db_ops, None, logger), logger)
+
+        def stage_manual_ingest(db_ops: DatabaseOperations):
+            logger.info("开始执行：MANUAL_INGEST（json_dir=%s）", json_dir)
+            return _run_task(ManualIngestTask(cfg, db_ops, None, logger), logger)
+
+        def stage_dwd_load(db_ops: DatabaseOperations):
+            logger.info("开始执行：DWD_LOAD_FROM_ODS")
+            return _run_task(DwdLoadTask(cfg, db_ops, None, logger), logger)
+
+        _run_stage_with_reconnect(cfg, logger, "DROP_SCHEMAS", stage_drop, max_attempts=3)
+        if args.stop_after == "DROP_SCHEMAS":
+            return 0
+        _run_stage_with_reconnect(cfg, logger, "INIT_ODS_SCHEMA", stage_init_ods, max_attempts=3)
+        if args.stop_after == "INIT_ODS_SCHEMA":
+            return 0
+        _run_stage_with_reconnect(cfg, logger, "INIT_DWD_SCHEMA", stage_init_dwd, max_attempts=3)
+        if args.stop_after == "INIT_DWD_SCHEMA":
+            return 0
+        _run_stage_with_reconnect(cfg, logger, "MANUAL_INGEST", stage_manual_ingest, max_attempts=5)
+        if args.stop_after == "MANUAL_INGEST":
+            return 0
+        _run_stage_with_reconnect(cfg, logger, "DWD_LOAD_FROM_ODS", stage_dwd_load, max_attempts=5)
+        if args.stop_after == "DWD_LOAD_FROM_ODS":
+            return 0
+
+        # 校验阶段复用一条新连接即可
+        _run_stage_with_reconnect(
+            cfg,
+            logger,
+            "BASIC_VALIDATE",
+            lambda db_ops: _basic_validate(db_ops, logger),
+            max_attempts=3,
+        )
+        if args.stop_after == "BASIC_VALIDATE":
+            return 0
+        return 0
+    finally:
+        if file_handler is not None:
+            try:
+                logging.getLogger().removeHandler(file_handler)
+            except Exception:
+                pass
+            try:
+                file_handler.close()
+            except Exception:
+                pass
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/apps/etl/pipelines/feiqiu/scripts/refresh_json_and_audit.py
+++ b/apps/etl/pipelines/feiqiu/scripts/refresh_json_and_audit.py
@@ -0,0 +1,523 @@
+# -*- coding: utf-8 -*-
+"""
+重新获取全部 API 接口的 JSON 数据（最多 100 条），
+遍历所有记录提取最全字段集合，
+与 .md 文档比对并输出差异报告。
+
+时间范围：2026-01-01 00:00:00 ~ 2026-02-13 00:00:00
+
+用法：python scripts/refresh_json_and_audit.py
+"""
+import json
+import os
+import re
+import sys
+import time
+import requests
+
+# ── 配置 ──────────────────────────────────────────────────────────────────
+API_BASE = "https://pc.ficoo.vip/apiprod/admin/v1/"
+API_TOKEN = os.environ.get("API_TOKEN", "")
+if not API_TOKEN:
+    env_path = os.path.join(os.path.dirname(__file__), "..", ".env")
+    if os.path.exists(env_path):
+        with open(env_path, "r", encoding="utf-8") as f:
+            for line in f:
+                line = line.strip()
+                if line.startswith("API_TOKEN="):
+                    API_TOKEN = line.split("=", 1)[1].strip()
+                    break
+
+SITE_ID = 2790685415443269
+START_TIME = "2026-01-01 00:00:00"
+END_TIME = "2026-02-13 00:00:00"
+LIMIT = 100
+
+SAMPLES_DIR = os.path.join("docs", "api-reference", "samples")
+DOCS_DIR = os.path.join("docs", "api-reference")
+REPORT_DIR = os.path.join("docs", "reports")
+
+HEADERS = {
+    "Authorization": f"Bearer {API_TOKEN}",
+    "Content-Type": "application/json",
+}
+
+REGISTRY_PATH = os.path.join("docs", "api-reference", "api_registry.json")
+
+WRAPPER_FIELDS = {"settleList", "siteProfile", "tableProfile",
+                  "goodsCategoryList", "data", "code", "msg",
+                  "settlelist", "siteprofile", "tableprofile",
+                  "goodscategorylist"}
+
+CROSS_REF_HEADERS = {"字段名", "类型", "示例值", "说明", "field", "example",
+                     "description"}
+
+# 每个接口实际返回的列表字段名（从调试中获得）
+ACTUAL_LIST_KEY = {
+    "assistant_accounts_master": "assistantInfos",
+    "assistant_service_records": "orderAssistantDetails",
+    "assistant_cancellation_records": "abolitionAssistants",
+    "table_fee_transactions": "siteTableUseDetailsList",
+    "table_fee_discount_records": "taiFeeAdjustInfos",
+    "tenant_goods_master": "tenantGoodsList",
+    "store_goods_sales_records": "orderGoodsLedgers",
+    "store_goods_master": "orderGoodsList",
+    "goods_stock_movements": "queryDeliveryRecordsList",
+    "member_profiles": "tenantMemberInfos",
+    "member_stored_value_cards": "tenantMemberCards",
+    "member_balance_changes": "tenantMemberCardLogs",
+    "group_buy_packages": "packageCouponList",
+    "group_buy_redemption_records": "siteTableUseDetailsList",
+    "site_tables_master": "siteTables",
+    # 以下使用 "list" 或特殊路径
+    "payment_transactions": "list",
+    "refund_transactions": "list",
+    "platform_coupon_redemption_records": "list",
+    "goods_stock_summary": "list",
+    "settlement_records": "settleList",
+    "recharge_settlements": "settleList",
+}
+
+
+def load_registry():
+    with open(REGISTRY_PATH, "r", encoding="utf-8") as f:
+        return json.load(f)
+
+
+def call_api(module, action, body):
+    url = f"{API_BASE}{module}/{action}"
+    try:
+        resp = requests.post(url, json=body, headers=HEADERS, timeout=30)
+        resp.raise_for_status()
+        return resp.json()
+    except Exception as e:
+        print(f"  ❌ 请求失败: {e}")
+        return None
+
+
+def unwrap_records(raw_json, table_name):
+    """从原始 API 响应中提取业务记录列表"""
+    if raw_json is None:
+        return []
+
+    data = raw_json.get("data")
+    if data is None:
+        return []
+
+    # ── 特殊表：stock_goods_category_tree ──
+    if table_name == "stock_goods_category_tree":
+        if isinstance(data, dict):
+            cats = data.get("goodsCategoryList", [])
+            return cats if isinstance(cats, list) else []
+        return []
+
+    # ── 特殊表：role_area_association ──
+    if table_name == "role_area_association":
+        if isinstance(data, dict):
+            rels = data.get("roleAreaRelations", [])
+            return rels if isinstance(rels, list) else []
+        return []
+
+    # ── 特殊表：tenant_member_balance_overview ──
+    # 返回的是汇总对象 + rechargeCardList/giveCardList
+    if table_name == "tenant_member_balance_overview":
+        if isinstance(data, dict):
+            # 合并顶层标量字段 + 列表中的字段
+            records = [data]  # 顶层作为一条记录
+            for list_key in ("rechargeCardList", "giveCardList"):
+                items = data.get(list_key, [])
+                if isinstance(items, list):
+                    records.extend(items)
+            return records
+        return []
+
+    # ── settlement_records / recharge_settlements ──
+    # data.settleList 是列表，每个元素内部有 settleList 子对象
+    if table_name in ("settlement_records", "recharge_settlements"):
+        if isinstance(data, dict):
+            settle_list = data.get("settleList", [])
+            if isinstance(settle_list, list):
+                return settle_list
+        return []
+
+    # ── 通用：data 是 dict，从中找列表字段 ──
+    if isinstance(data, dict):
+        list_key = ACTUAL_LIST_KEY.get(table_name, "list")
+        items = data.get(list_key, [])
+        if isinstance(items, list):
+            return items
+        # fallback: 找第一个列表字段
+        for k, v in data.items():
+            if isinstance(v, list) and k != "total":
+                return v
+        return []
+
+    if isinstance(data, list):
+        return data
+
+    return []
+
+
+def extract_all_fields(records, table_name):
+    """从多条记录中提取所有唯一字段名（小写）"""
+    all_fields = set()
+    for record in records:
+        if not isinstance(record, dict):
+            continue
+
+        # settlement_records / recharge_settlements: 内层 settleList 展开
+        if table_name in ("settlement_records", "recharge_settlements"):
+            settle = record.get("settleList", record)
+            if isinstance(settle, list):
+                settle = settle[0] if settle else {}
+            if isinstance(settle, dict):
+                for k in settle.keys():
+                    kl = k.lower()
+                    if kl == "siteprofile":
+                        all_fields.add("siteprofile")
+                    elif kl in WRAPPER_FIELDS:
+                        continue
+                    else:
+                        all_fields.add(kl)
+            continue
+
+        # tenant_member_balance_overview: 特殊处理
+        if table_name == "tenant_member_balance_overview":
+            for k in record.keys():
+                kl = k.lower()
+                # 跳过嵌套列表键名本身
+                if kl in ("rechargecardlist", "givecardlist"):
+                    continue
+                all_fields.add(kl)
+            continue
+
+        # 通用
+        for k in record.keys():
+            kl = k.lower()
+            if kl in WRAPPER_FIELDS:
+                if kl in ("siteprofile", "tableprofile"):
+                    all_fields.add(kl)
+                continue
+            all_fields.add(kl)
+
+    return all_fields
+
+
+def extract_md_fields(table_name):
+    """从 .md 文档的"四、响应字段详解"章节提取字段名（小写）"""
+    md_path = os.path.join(DOCS_DIR, f"{table_name}.md")
+    if not os.path.exists(md_path):
+        return set()
+
+    with open(md_path, "r", encoding="utf-8") as f:
+        lines = f.readlines()
+
+    fields = set()
+    in_section = False
+    in_siteprofile = False
+    field_pattern = re.compile(r'^\|\s*`([^`]+)`\s*\|')
+    siteprofile_header = re.compile(r'^###.*siteProfile', re.IGNORECASE)
+
+    for line in lines:
+        s = line.strip()
+
+        if s.startswith("## 四、") and "响应字段" in s:
+            in_section = True
+            in_siteprofile = False
+            continue
+
+        if in_section and s.startswith("## ") and not s.startswith("## 四"):
+            break
+
+        if not in_section:
+            continue
+
+        if table_name in ("settlement_records", "recharge_settlements"):
+            if siteprofile_header.search(s):
+                in_siteprofile = True
+                continue
+            if s.startswith("### ") and in_siteprofile:
+                if not siteprofile_header.search(s):
+                    in_siteprofile = False
+
+        m = field_pattern.match(s)
+        if m:
+            raw = m.group(1).strip()
+            if raw.lower() in {h.lower() for h in CROSS_REF_HEADERS}:
+                continue
+            if table_name in ("settlement_records", "recharge_settlements"):
+                if in_siteprofile:
+                    continue
+                if raw.startswith("siteProfile."):
+                    continue
+            if raw.lower() in WRAPPER_FIELDS and raw.lower() not in (
+                    "siteprofile", "tableprofile"):
+                continue
+            fields.add(raw.lower())
+
+    return fields
+
+
+def build_body(entry):
+    body = dict(entry.get("body") or {})
+    if entry.get("time_range") and entry.get("time_keys"):
+        keys = entry["time_keys"]
+        if len(keys) >= 2:
+            body[keys[0]] = START_TIME
+            body[keys[1]] = END_TIME
+    if entry.get("pagination"):
+        body[entry["pagination"].get("page_key", "page")] = 1
+        body[entry["pagination"].get("limit_key", "limit")] = LIMIT
+    return body
+
+
+def save_sample(table_name, records):
+    """保存第一条记录作为 JSON 样本"""
+    sample_path = os.path.join(SAMPLES_DIR, f"{table_name}.json")
+    if records and isinstance(records[0], dict):
+        with open(sample_path, "w", encoding="utf-8") as f:
+            json.dump(records[0], f, ensure_ascii=False, indent=2)
+    return sample_path
+
+
+def discover_actual_data_path(raw_json, table_name):
+    """发现 API 实际返回的数据路径"""
+    data = raw_json.get("data") if raw_json else None
+    if data is None:
+        return None
+
+    # 特殊表
+    if table_name == "stock_goods_category_tree":
+        return "data.goodsCategoryList"
+    if table_name == "role_area_association":
+        return "data.roleAreaRelations"
+    if table_name == "tenant_member_balance_overview":
+        return "data"  # 顶层汇总对象
+    if table_name in ("settlement_records", "recharge_settlements"):
+        return "data.settleList"
+
+    if isinstance(data, dict):
+        list_key = ACTUAL_LIST_KEY.get(table_name)
+        if list_key and list_key in data:
+            return f"data.{list_key}"
+        # fallback
+        for k, v in data.items():
+            if isinstance(v, list) and k.lower() != "total":
+                return f"data.{k}"
+    return None
+
+
+def update_md_data_path(table_name, actual_path):
+    """在 .md 文档的接口概述表格中更新/添加实际数据路径"""
+    md_path = os.path.join(DOCS_DIR, f"{table_name}.md")
+    if not os.path.exists(md_path):
+        return False
+
+    with open(md_path, "r", encoding="utf-8") as f:
+        content = f.read()
+
+    # 检查是否已有"数据路径"或"响应数据路径"行
+    if "数据路径" in content or "data_path" in content.lower():
+        # 尝试更新已有行
+        pattern = re.compile(
+            r'(\|\s*(?:数据路径|响应数据路径|data_path)\s*\|\s*)`[^`]*`(\s*\|)',
+            re.IGNORECASE
+        )
+        if pattern.search(content):
+            new_content = pattern.sub(
+                rf'\g<1>`{actual_path}`\g<2>', content
+            )
+            if new_content != content:
+                with open(md_path, "w", encoding="utf-8") as f:
+                    f.write(new_content)
+                return True
+            return False  # 已经是最新值
+
+    # 没有数据路径行，在接口概述表格末尾添加
+    # 找到"## 一、接口概述"后的表格最后一行（以 | 开头）
+    lines = content.split("\n")
+    insert_idx = None
+    in_overview = False
+    last_table_row = None
+
+    for i, line in enumerate(lines):
+        s = line.strip()
+        if "## 一、" in s and "接口概述" in s:
+            in_overview = True
+            continue
+        if in_overview and s.startswith("## "):
+            break
+        if in_overview and s.startswith("|") and "---" not in s:
+            last_table_row = i
+
+    if last_table_row is not None:
+        new_line = f"| 响应数据路径 | `{actual_path}` |"
+        lines.insert(last_table_row + 1, new_line)
+        with open(md_path, "w", encoding="utf-8") as f:
+            f.write("\n".join(lines))
+        return True
+
+    return False
+
+
+def main():
+    registry = load_registry()
+    print(f"加载 API 注册表: {len(registry)} 个端点")
+    print(f"时间范围: {START_TIME} ~ {END_TIME}")
+    print(f"每接口获取: {LIMIT} 条")
+    print("=" * 80)
+
+    results = []
+    all_gaps = []
+    registry_updates = {}  # table_name -> actual_data_path
+
+    for entry in registry:
+        table_name = entry["id"]
+        name_zh = entry.get("name_zh", "")
+        module = entry["module"]
+        action = entry["action"]
+        skip = entry.get("skip", False)
+
+        print(f"\n{'─' * 60}")
+        print(f"[{table_name}] {name_zh} — {module}/{action}")
+
+        if skip:
+            print("  ⏭️ 跳过（标记为 skip）")
+            results.append({
+                "table": table_name,
+                "status": "skipped",
+                "record_count": 0,
+                "json_field_count": 0,
+                "md_field_count": 0,
+                "json_fields": [],
+                "md_fields": [],
+                "json_only": [],
+                "md_only": [],
+                "actual_data_path": None,
+            })
+            continue
+
+        body = build_body(entry)
+
+        print(f"  请求: POST {module}/{action}")
+        raw = call_api(module, action, body)
+
+        if raw is None:
+            results.append({
+                "table": table_name,
+                "status": "error",
+                "record_count": 0,
+                "json_field_count": 0,
+                "md_field_count": 0,
+                "json_fields": [],
+                "md_fields": [],
+                "json_only": [],
+                "md_only": [],
+                "actual_data_path": None,
+            })
+            continue
+
+        # 发现实际数据路径
+        actual_path = discover_actual_data_path(raw, table_name)
+        old_path = entry.get("data_path", "")
+        if actual_path and actual_path != old_path:
+            print(f"  📍 数据路径: {old_path} → {actual_path}")
+            registry_updates[table_name] = actual_path
+        else:
+            print(f"  📍 数据路径: {actual_path or old_path}")
+
+        records = unwrap_records(raw, table_name)
+        print(f"  获取记录数: {len(records)}")
+
+        # 保存样本（第一条）
+        save_sample(table_name, records)
+
+        # 遍历所有记录提取全字段
+        json_fields = extract_all_fields(records, table_name)
+        md_fields = extract_md_fields(table_name)
+
+        json_only = json_fields - md_fields
+        md_only = md_fields - json_fields
+
+        status = "ok"
+        if json_only:
+            status = "gap"
+            print(f"  ❌ JSON 有但 .md 缺失 ({len(json_only)} 个): {sorted(json_only)}")
+            all_gaps.append((table_name, name_zh, sorted(json_only)))
+        else:
+            if md_only:
+                print(f"  ⚠️ .md 多 {len(md_only)} 个条件性字段")
+            else:
+                print(f"  ✅ 完全一致 ({len(json_fields)} 个字段)")
+
+        # 更新 .md 文档中的数据路径
+        if actual_path:
+            updated = update_md_data_path(table_name, actual_path)
+            if updated:
+                print(f"  📝 已更新 .md 文档数据路径")
+
+        results.append({
+            "table": table_name,
+            "status": status,
+            "record_count": len(records),
+            "json_field_count": len(json_fields),
+            "md_field_count": len(md_fields),
+            "json_fields": sorted(json_fields),
+            "md_fields": sorted(md_fields),
+            "json_only": sorted(json_only),
+            "md_only": sorted(md_only),
+            "actual_data_path": actual_path,
+        })
+
+        time.sleep(0.3)
+
+    # ── 更新 api_registry.json 中的 data_path ──
+    if registry_updates:
+        print(f"\n{'─' * 60}")
+        print(f"更新 api_registry.json 中 {len(registry_updates)} 个 data_path...")
+        for entry in registry:
+            tid = entry["id"]
+            if tid in registry_updates:
+                entry["data_path"] = registry_updates[tid]
+        with open(REGISTRY_PATH, "w", encoding="utf-8") as f:
+            json.dump(registry, f, ensure_ascii=False, indent=2)
+        print("  ✅ api_registry.json 已更新")
+
+    # ── 汇总 ──
+    print(f"\n{'=' * 80}")
+    print("汇总报告")
+    print(f"{'=' * 80}")
+
+    gap_count = sum(1 for r in results if r["status"] == "gap")
+    ok_count = sum(1 for r in results if r["status"] == "ok")
+    skip_count = sum(1 for r in results if r["status"] == "skipped")
+    err_count = sum(1 for r in results if r["status"] == "error")
+
+    print(f"  完全一致: {ok_count}")
+    print(f"  有缺失:   {gap_count}")
+    print(f"  跳过:     {skip_count}")
+    print(f"  错误:     {err_count}")
+
+    if all_gaps:
+        print(f"\n需要补充到 .md 文档的字段:")
+        for table, name_zh, fields in all_gaps:
+            print(f"  {table} ({name_zh}): {fields}")
+
+    # 保存详细结果
+    out_path = os.path.join(REPORT_DIR, "json_refresh_audit.json")
+    os.makedirs(REPORT_DIR, exist_ok=True)
+    with open(out_path, "w", encoding="utf-8") as f:
+        json.dump(results, f, ensure_ascii=False, indent=2)
+    print(f"\n详细结果已写入: {out_path}")
+
+
+if __name__ == "__main__":
+    main()
+
+# AI_CHANGELOG:
+# - 日期: 2026-02-14
+# - Prompt: P20260214-060000 — 全量 JSON 刷新 + MD 文档补全 + 数据路径修正
+# - 直接原因: 旧 JSON 样本仅含单条记录，缺少条件性字段；需重新获取 100 条数据并遍历提取最全字段
+# - 变更摘要: 新建脚本，实现：(1) 调用全部 24 个 API 端点获取 100 条数据 (2) 遍历所有记录提取字段并集
+#   (3) 与 .md 文档比对找出缺失字段 (4) 更新 JSON 样本和 api_registry.json data_path (5) 更新 .md 文档响应数据路径行
+# - 风险与验证: 脚本需要有效的 API_TOKEN 和网络连接；验证：运行后检查 json_refresh_audit.json 中 24/24 通过
--- a/apps/etl/pipelines/feiqiu/scripts/repair/backfill_missing_data.py
+++ b/apps/etl/pipelines/feiqiu/scripts/repair/backfill_missing_data.py
@@ -0,0 +1,717 @@
+# -*- coding: utf-8 -*-
+"""
+补全丢失的 ODS 数据
+
+通过运行数据完整性检查，找出 API 与 ODS 之间的差异，
+然后重新从 API 获取丢失的数据并插入 ODS。
+
+用法:
+    python -m scripts.backfill_missing_data --start 2025-07-01 --end 2026-01-19
+    python -m scripts.backfill_missing_data --from-report reports/ods_gap_check_xxx.json
+"""
+from __future__ import annotations
+
+import argparse
+import json
+import logging
+import sys
+import time as time_mod
+from datetime import datetime, timedelta
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Set, Tuple
+from zoneinfo import ZoneInfo
+
+from dateutil import parser as dtparser
+from psycopg2.extras import Json, execute_values
+
+PROJECT_ROOT = Path(__file__).resolve().parents[1]
+if str(PROJECT_ROOT) not in sys.path:
+    sys.path.insert(0, str(PROJECT_ROOT))
+
+from api.recording_client import build_recording_client
+from config.settings import AppConfig
+from database.connection import DatabaseConnection
+from models.parsers import TypeParser
+from tasks.ods.ods_tasks import BaseOdsTask, ENABLED_ODS_CODES, ODS_TASK_SPECS, OdsTaskSpec
+from scripts.check.check_ods_gaps import run_gap_check
+from utils.logging_utils import build_log_path, configure_logging
+from utils.ods_record_utils import (
+    get_value_case_insensitive,
+    merge_record_layers,
+    normalize_pk_value,
+    pk_tuple_from_record,
+)
+
+
+def _reconfigure_stdout_utf8() -> None:
+    if hasattr(sys.stdout, "reconfigure"):
+        try:
+            sys.stdout.reconfigure(encoding="utf-8")
+        except Exception:
+            pass
+
+
+def _parse_dt(value: str, tz: ZoneInfo, *, is_end: bool = False) -> datetime:
+    raw = (value or "").strip()
+    if not raw:
+        raise ValueError("empty datetime")
+    has_time = any(ch in raw for ch in (":", "T"))
+    dt = dtparser.parse(raw)
+    if dt.tzinfo is None:
+        dt = dt.replace(tzinfo=tz)
+    else:
+        dt = dt.astimezone(tz)
+    if not has_time:
+        dt = dt.replace(
+            hour=23 if is_end else 0,
+            minute=59 if is_end else 0,
+            second=59 if is_end else 0,
+            microsecond=0
+        )
+    return dt
+
+
+def _get_spec(code: str) -> Optional[OdsTaskSpec]:
+    """根据任务代码获取 ODS 任务规格"""
+    for spec in ODS_TASK_SPECS:
+        if spec.code == code:
+            return spec
+    return None
+
+
+def _merge_record_layers(record: dict) -> dict:
+    """Flatten nested data layers into a single dict."""
+    return merge_record_layers(record)
+
+
+def _get_value_case_insensitive(record: dict | None, col: str | None):
+    """Fetch value without case sensitivity."""
+    return get_value_case_insensitive(record, col)
+
+
+def _normalize_pk_value(value):
+    """Normalize PK value."""
+    return normalize_pk_value(value)
+
+
+def _pk_tuple_from_record(record: dict, pk_cols: List[str]) -> Optional[Tuple]:
+    """Extract PK tuple from record."""
+    return pk_tuple_from_record(record, pk_cols)
+
+
+def _get_table_pk_columns(conn, table: str, *, include_content_hash: bool = False) -> List[str]:
+    """获取表的主键列"""
+    if "." in table:
+        schema, name = table.split(".", 1)
+    else:
+        schema, name = "public", table
+    sql = """
+        SELECT kcu.column_name
+        FROM information_schema.table_constraints tc
+        JOIN information_schema.key_column_usage kcu
+          ON tc.constraint_name = kcu.constraint_name
+         AND tc.table_schema = kcu.table_schema
+        WHERE tc.constraint_type = 'PRIMARY KEY'
+          AND tc.table_schema = %s
+          AND tc.table_name = %s
+        ORDER BY kcu.ordinal_position
+    """
+    with conn.cursor() as cur:
+        cur.execute(sql, (schema, name))
+        cols = [r[0] for r in cur.fetchall()]
+        if include_content_hash:
+            return cols
+        return [c for c in cols if c.lower() != "content_hash"]
+
+
+def _get_table_columns(conn, table: str) -> List[Tuple[str, str, str]]:
+    """获取表的所有列信息"""
+    if "." in table:
+        schema, name = table.split(".", 1)
+    else:
+        schema, name = "public", table
+    sql = """
+        SELECT column_name, data_type, udt_name
+        FROM information_schema.columns
+        WHERE table_schema = %s AND table_name = %s
+        ORDER BY ordinal_position
+    """
+    with conn.cursor() as cur:
+        cur.execute(sql, (schema, name))
+        return [(r[0], (r[1] or "").lower(), (r[2] or "").lower()) for r in cur.fetchall()]
+
+
+def _fetch_existing_pk_set(
+    conn, table: str, pk_cols: List[str], pk_values: List[Tuple], chunk_size: int
+) -> Set[Tuple]:
+    """获取已存在的 PK 集合"""
+    if not pk_values:
+        return set()
+    select_cols = ", ".join(f't."{c}"' for c in pk_cols)
+    value_cols = ", ".join(f'"{c}"' for c in pk_cols)
+    join_cond = " AND ".join(f't."{c}" = v."{c}"' for c in pk_cols)
+    sql = (
+        f"SELECT {select_cols} FROM {table} t "
+        f"JOIN (VALUES %s) AS v({value_cols}) ON {join_cond}"
+    )
+    existing: Set[Tuple] = set()
+    with conn.cursor() as cur:
+        for i in range(0, len(pk_values), chunk_size):
+            chunk = pk_values[i:i + chunk_size]
+            execute_values(cur, sql, chunk, page_size=len(chunk))
+            for row in cur.fetchall():
+                existing.add(tuple(row))
+    return existing
+
+
+def _cast_value(value, data_type: str):
+    """类型转换"""
+    if value is None:
+        return None
+    dt = (data_type or "").lower()
+    if dt in ("integer", "bigint", "smallint"):
+        if isinstance(value, bool):
+            return int(value)
+        try:
+            return int(value)
+        except Exception:
+            return None
+    if dt in ("numeric", "double precision", "real", "decimal"):
+        if isinstance(value, bool):
+            return int(value)
+        try:
+            return float(value)
+        except Exception:
+            return None
+    if dt.startswith("timestamp") or dt in ("date", "time", "interval"):
+        return value if isinstance(value, (str, datetime)) else None
+    return value
+
+
+def _normalize_scalar(value):
+    """规范化标量值"""
+    if value == "" or value == "{}" or value == "[]":
+        return None
+    return value
+
+
+class MissingDataBackfiller:
+    """丢失数据补全器"""
+    
+    def __init__(
+        self,
+        cfg: AppConfig,
+        logger: logging.Logger,
+        dry_run: bool = False,
+    ):
+        self.cfg = cfg
+        self.logger = logger
+        self.dry_run = dry_run
+        self.tz = ZoneInfo(cfg.get("app.timezone", "Asia/Shanghai"))
+        self.store_id = int(cfg.get("app.store_id") or 0)
+        
+        # API 客户端
+        self.api = build_recording_client(cfg, task_code="BACKFILL_MISSING_DATA")
+        
+        # 数据库连接（DatabaseConnection 构造时已设置 autocommit=False）
+        self.db = DatabaseConnection(dsn=cfg["db"]["dsn"], session=cfg["db"].get("session"))
+    
+    def close(self):
+        """关闭连接"""
+        if self.db:
+            self.db.close()
+
+    def _ensure_db(self):
+        """确保数据库连接可用"""
+        if self.db and getattr(self.db, "conn", None) is not None:
+            if getattr(self.db.conn, "closed", 0) == 0:
+                return
+        self.db = DatabaseConnection(dsn=self.cfg["db"]["dsn"], session=self.cfg["db"].get("session"))
+    
+    def backfill_from_gap_check(
+        self,
+        *,
+        start: datetime,
+        end: datetime,
+        task_codes: Optional[str] = None,
+        include_mismatch: bool = False,
+        page_size: int = 200,
+        chunk_size: int = 500,
+        content_sample_limit: int | None = None,
+    ) -> Dict[str, Any]:
+        """
+        运行 gap check 并补全丢失数据
+        
+        Returns:
+            补全结果统计
+        """
+        self.logger.info("数据补全开始 起始=%s 结束=%s", start.isoformat(), end.isoformat())
+        
+        # 计算窗口大小
+        total_seconds = max(0, int((end - start).total_seconds()))
+        if total_seconds >= 86400:
+            window_days = max(1, total_seconds // 86400)
+            window_hours = 0
+        else:
+            window_days = 0
+            window_hours = max(1, total_seconds // 3600 or 1)
+        
+        # 运行 gap check
+        self.logger.info("正在执行缺失检查...")
+        gap_result = run_gap_check(
+            cfg=self.cfg,
+            start=start,
+            end=end,
+            window_days=window_days,
+            window_hours=window_hours,
+            page_size=page_size,
+            chunk_size=chunk_size,
+            sample_limit=10000,  # 获取所有丢失样本
+            sleep_per_window=0,
+            sleep_per_page=0,
+            task_codes=task_codes or "",
+            from_cutoff=False,
+            cutoff_overlap_hours=24,
+            allow_small_window=True,
+            logger=self.logger,
+            compare_content=include_mismatch,
+            content_sample_limit=content_sample_limit or 10000,
+        )
+        
+        total_missing = gap_result.get("total_missing", 0)
+        total_mismatch = gap_result.get("total_mismatch", 0)
+        if total_missing == 0 and (not include_mismatch or total_mismatch == 0):
+            self.logger.info("Data complete: no missing/mismatch records")
+            return {"backfilled": 0, "errors": 0, "details": []}
+        
+        if include_mismatch:
+            self.logger.info("Missing/mismatch check done missing=%s mismatch=%s", total_missing, total_mismatch)
+        else:
+            self.logger.info("Missing check done missing=%s", total_missing)
+        
+        results = []
+        total_backfilled = 0
+        total_errors = 0
+        
+        for task_result in gap_result.get("results", []):
+            task_code = task_result.get("task_code")
+            missing = task_result.get("missing", 0)
+            missing_samples = task_result.get("missing_samples", [])
+            mismatch = task_result.get("mismatch", 0) if include_mismatch else 0
+            mismatch_samples = task_result.get("mismatch_samples", []) if include_mismatch else []
+            target_samples = list(missing_samples) + list(mismatch_samples)
+            
+            if missing == 0 and mismatch == 0:
+                continue
+            
+            self.logger.info(
+                "Start backfill task task=%s missing=%s mismatch=%s samples=%s",
+                task_code, missing, mismatch, len(target_samples)
+            )
+            
+            try:
+                backfilled = self._backfill_task(
+                    task_code=task_code,
+                    table=task_result.get("table"),
+                    pk_columns=task_result.get("pk_columns", []),
+                    pk_samples=target_samples,
+                    start=start,
+                    end=end,
+                    page_size=page_size,
+                    chunk_size=chunk_size,
+                )
+                results.append({
+                    "task_code": task_code,
+                    "missing": missing,
+                    "mismatch": mismatch,
+                    "backfilled": backfilled,
+                    "error": None,
+                })
+                total_backfilled += backfilled
+            except Exception as exc:
+                self.logger.exception("补全失败 任务=%s", task_code)
+                results.append({
+                    "task_code": task_code,
+                    "missing": missing,
+                    "mismatch": mismatch,
+                    "backfilled": 0,
+                    "error": str(exc),
+                })
+                total_errors += 1
+        
+        self.logger.info(
+            "数据补全完成 总缺失=%s 已补全=%s 错误数=%s",
+            total_missing, total_backfilled, total_errors
+        )
+        
+        return {
+            "total_missing": total_missing,
+            "total_mismatch": total_mismatch,
+            "backfilled": total_backfilled,
+            "errors": total_errors,
+            "details": results,
+        }
+    
+    def _backfill_task(
+        self,
+        *,
+        task_code: str,
+        table: str,
+        pk_columns: List[str],
+        pk_samples: List[Dict],
+        start: datetime,
+        end: datetime,
+        page_size: int,
+        chunk_size: int,
+    ) -> int:
+        """补全单个任务的丢失数据"""
+        self._ensure_db()
+        spec = _get_spec(task_code)
+        if not spec:
+            self.logger.warning("未找到任务规格 任务=%s", task_code)
+            return 0
+        
+        if not pk_columns:
+            pk_columns = _get_table_pk_columns(self.db.conn, table, include_content_hash=False)
+
+        conflict_columns = _get_table_pk_columns(self.db.conn, table, include_content_hash=True)
+        if not conflict_columns:
+            conflict_columns = pk_columns
+        
+        if not pk_columns:
+            self.logger.warning("未找到主键列 任务=%s 表=%s", task_code, table)
+            return 0
+        
+        # 提取丢失的 PK 值
+        missing_pks: Set[Tuple] = set()
+        for sample in pk_samples:
+            pk_tuple = tuple(sample.get(col) for col in pk_columns)
+            if all(v is not None for v in pk_tuple):
+                missing_pks.add(pk_tuple)
+        
+        if not missing_pks:
+            self.logger.info("无缺失主键 任务=%s", task_code)
+            return 0
+        
+        self.logger.info(
+            "开始获取数据 任务=%s 缺失主键数=%s",
+            task_code, len(missing_pks)
+        )
+        
+        # 从 API 获取数据并过滤出丢失的记录
+        params = self._build_params(spec, start, end)
+        
+        backfilled = 0
+        cols_info = _get_table_columns(self.db.conn, table)
+        db_json_cols_lower = {
+            c[0].lower() for c in cols_info
+            if c[1] in ("json", "jsonb") or c[2] in ("json", "jsonb")
+        }
+        col_names = [c[0] for c in cols_info]
+
+        # 结束只读事务，避免长时间 API 拉取导致 idle_in_tx 超时
+        try:
+            self.db.conn.commit()
+        except Exception:
+            self.db.conn.rollback()
+        
+        try:
+            for page_no, records, _, response_payload in self.api.iter_paginated(
+                endpoint=spec.endpoint,
+                params=params,
+                page_size=page_size,
+                data_path=spec.data_path,
+                list_key=spec.list_key,
+            ):
+                # 过滤出丢失的记录
+                records_to_insert = []
+                for rec in records:
+                    if not isinstance(rec, dict):
+                        continue
+                    pk_tuple = _pk_tuple_from_record(rec, pk_columns)
+                    if pk_tuple and pk_tuple in missing_pks:
+                        records_to_insert.append(rec)
+                
+                if not records_to_insert:
+                    continue
+                
+                # 插入丢失的记录
+                if self.dry_run:
+                    backfilled += len(records_to_insert)
+                    self.logger.info(
+                        "模拟运行 任务=%s 页=%s 将插入=%s",
+                        task_code, page_no, len(records_to_insert)
+                    )
+                else:
+                    inserted = self._insert_records(
+                        table=table,
+                        records=records_to_insert,
+                        cols_info=cols_info,
+                        pk_columns=pk_columns,
+                        conflict_columns=conflict_columns,
+                        db_json_cols_lower=db_json_cols_lower,
+                    )
+                    backfilled += inserted
+                    # 避免长事务阻塞与 idle_in_tx 超时
+                    self.db.conn.commit()
+                    self.logger.info(
+                        "已插入 任务=%s 页=%s 数量=%s",
+                        task_code, page_no, inserted
+                    )
+            
+            if not self.dry_run:
+                self.db.conn.commit()
+            
+            self.logger.info("任务补全完成 任务=%s 已补全=%s", task_code, backfilled)
+            return backfilled
+            
+        except Exception:
+            self.db.conn.rollback()
+            raise
+    
+    def _build_params(
+        self,
+        spec: OdsTaskSpec,
+        start: datetime,
+        end: datetime,
+    ) -> Dict:
+        """构建 API 请求参数"""
+        base: Dict[str, Any] = {}
+        if spec.include_site_id:
+            if spec.endpoint == "/TenantGoods/GetGoodsInventoryList":
+                base["siteId"] = [self.store_id]
+            else:
+                base["siteId"] = self.store_id
+        
+        if spec.requires_window and spec.time_fields:
+            start_key, end_key = spec.time_fields
+            base[start_key] = TypeParser.format_timestamp(start, self.tz)
+            base[end_key] = TypeParser.format_timestamp(end, self.tz)
+        
+        # 合并公共参数
+        common = self.cfg.get("api.params", {}) or {}
+        if isinstance(common, dict):
+            merged = {**common, **base}
+        else:
+            merged = base
+        
+        merged.update(spec.extra_params or {})
+        return merged
+    
+    def _insert_records(
+        self,
+        *,
+        table: str,
+        records: List[Dict],
+        cols_info: List[Tuple[str, str, str]],
+        pk_columns: List[str],
+        conflict_columns: List[str],
+        db_json_cols_lower: Set[str],
+    ) -> int:
+        """插入记录到数据库"""
+        if not records:
+            return 0
+        
+        col_names = [c[0] for c in cols_info]
+        needs_content_hash = any(c[0].lower() == "content_hash" for c in cols_info)
+        quoted_cols = ", ".join(f'"{c}"' for c in col_names)
+        sql = f"INSERT INTO {table} ({quoted_cols}) VALUES %s"
+        conflict_cols = conflict_columns or pk_columns
+        if conflict_cols:
+            pk_clause = ", ".join(f'"{c}"' for c in conflict_cols)
+            sql += f" ON CONFLICT ({pk_clause}) DO NOTHING"
+        
+        now = datetime.now(self.tz)
+        json_dump = lambda v: json.dumps(v, ensure_ascii=False)
+        
+        params: List[Tuple] = []
+        for rec in records:
+            merged_rec = _merge_record_layers(rec)
+            
+            # 检查 PK
+            if pk_columns:
+                missing_pk = False
+                for pk in pk_columns:
+                    if str(pk).lower() == "content_hash":
+                        continue
+                    pk_val = _get_value_case_insensitive(merged_rec, pk)
+                    if pk_val is None or pk_val == "":
+                        missing_pk = True
+                        break
+                if missing_pk:
+                    continue
+
+            content_hash = None
+            if needs_content_hash:
+                content_hash = BaseOdsTask._compute_content_hash(
+                    merged_rec, include_fetched_at=False
+                )
+            
+            row_vals: List[Any] = []
+            for (col_name, data_type, _udt) in cols_info:
+                col_lower = col_name.lower()
+                if col_lower == "payload":
+                    row_vals.append(Json(rec, dumps=json_dump))
+                    continue
+                if col_lower == "source_file":
+                    row_vals.append("backfill")
+                    continue
+                if col_lower == "source_endpoint":
+                    row_vals.append("backfill")
+                    continue
+                if col_lower == "fetched_at":
+                    row_vals.append(now)
+                    continue
+                if col_lower == "content_hash":
+                    row_vals.append(content_hash)
+                    continue
+                
+                value = _normalize_scalar(_get_value_case_insensitive(merged_rec, col_name))
+                if col_lower in db_json_cols_lower:
+                    row_vals.append(Json(value, dumps=json_dump) if value is not None else None)
+                    continue
+                
+                row_vals.append(_cast_value(value, data_type))
+            
+            params.append(tuple(row_vals))
+        
+        if not params:
+            return 0
+        
+        inserted = 0
+        with self.db.conn.cursor() as cur:
+            for i in range(0, len(params), 200):
+                chunk = params[i:i + 200]
+                execute_values(cur, sql, chunk, page_size=len(chunk))
+                if cur.rowcount is not None and cur.rowcount > 0:
+                    inserted += int(cur.rowcount)
+        
+        return inserted
+
+
+def run_backfill(
+    *,
+    cfg: AppConfig,
+    start: datetime,
+    end: datetime,
+    task_codes: Optional[str] = None,
+    include_mismatch: bool = False,
+    dry_run: bool = False,
+    page_size: int = 200,
+    chunk_size: int = 500,
+    content_sample_limit: int | None = None,
+    logger: logging.Logger,
+) -> Dict[str, Any]:
+    """
+    运行数据补全
+    
+    Args:
+        cfg: 应用配置
+        start: 开始时间
+        end: 结束时间
+        task_codes: 指定任务代码（逗号分隔）
+        dry_run: 是否仅预览
+        page_size: API 分页大小
+        chunk_size: 数据库批量大小
+        logger: 日志记录器
+    
+    Returns:
+        补全结果
+    """
+    backfiller = MissingDataBackfiller(cfg, logger, dry_run)
+    try:
+        return backfiller.backfill_from_gap_check(
+            start=start,
+            end=end,
+            task_codes=task_codes,
+            include_mismatch=include_mismatch,
+            page_size=page_size,
+            chunk_size=chunk_size,
+            content_sample_limit=content_sample_limit,
+        )
+    finally:
+        backfiller.close()
+
+
+def main() -> int:
+    _reconfigure_stdout_utf8()
+    
+    ap = argparse.ArgumentParser(description="补全丢失的 ODS 数据")
+    ap.add_argument("--start", default="2025-07-01", help="开始日期 (默认: 2025-07-01)")
+    ap.add_argument("--end", default="", help="结束日期 (默认: 当前时间)")
+    ap.add_argument("--task-codes", default="", help="指定任务代码（逗号分隔，留空=全部）")
+    ap.add_argument("--include-mismatch", action="store_true", help="同时补全内容不一致的记录")
+    ap.add_argument("--content-sample-limit", type=int, default=None, help="不一致样本上限 (默认: 10000)")
+    ap.add_argument("--dry-run", action="store_true", help="仅预览，不实际写入")
+    ap.add_argument("--page-size", type=int, default=200, help="API 分页大小 (默认: 200)")
+    ap.add_argument("--chunk-size", type=int, default=500, help="数据库批量大小 (默认: 500)")
+    ap.add_argument("--log-file", default="", help="日志文件路径")
+    ap.add_argument("--log-dir", default="", help="日志目录")
+    ap.add_argument("--log-level", default="INFO", help="日志级别 (默认: INFO)")
+    ap.add_argument("--no-log-console", action="store_true", help="禁用控制台日志")
+    args = ap.parse_args()
+    
+    log_dir = Path(args.log_dir) if args.log_dir else (PROJECT_ROOT / "logs")
+    log_file = Path(args.log_file) if args.log_file else build_log_path(log_dir, "backfill_missing")
+    log_console = not args.no_log_console
+    
+    with configure_logging(
+        "backfill_missing",
+        log_file,
+        level=args.log_level,
+        console=log_console,
+        tee_std=True,
+    ) as logger:
+        cfg = AppConfig.load({})
+        tz = ZoneInfo(cfg.get("app.timezone", "Asia/Shanghai"))
+        
+        start = _parse_dt(args.start, tz)
+        end = _parse_dt(args.end, tz, is_end=True) if args.end else datetime.now(tz)
+        
+        result = run_backfill(
+            cfg=cfg,
+            start=start,
+            end=end,
+            task_codes=args.task_codes or None,
+            include_mismatch=args.include_mismatch,
+            dry_run=args.dry_run,
+            page_size=args.page_size,
+            chunk_size=args.chunk_size,
+            content_sample_limit=args.content_sample_limit,
+            logger=logger,
+        )
+        
+        logger.info("=" * 60)
+        logger.info("补全完成!")
+        logger.info("  总丢失: %s", result.get("total_missing", 0))
+        if args.include_mismatch:
+            logger.info("  总不一致: %s", result.get("total_mismatch", 0))
+        logger.info("  已补全: %s", result.get("backfilled", 0))
+        logger.info("  错误数: %s", result.get("errors", 0))
+        logger.info("=" * 60)
+        
+        # 输出详细结果
+        for detail in result.get("details", []):
+            if detail.get("error"):
+                logger.error(
+                    "  %s: 丢失=%s 不一致=%s 补全=%s 错误=%s",
+                    detail.get("task_code"),
+                    detail.get("missing"),
+                    detail.get("mismatch", 0),
+                    detail.get("backfilled"),
+                    detail.get("error"),
+                )
+            elif detail.get("backfilled", 0) > 0:
+                logger.info(
+                    "  %s: 丢失=%s 不一致=%s 补全=%s",
+                    detail.get("task_code"),
+                    detail.get("missing"),
+                    detail.get("mismatch", 0),
+                    detail.get("backfilled"),
+                )
+    
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/apps/etl/pipelines/feiqiu/scripts/repair/dedupe_ods_snapshots.py
+++ b/apps/etl/pipelines/feiqiu/scripts/repair/dedupe_ods_snapshots.py
@@ -0,0 +1,261 @@
+# -*- coding: utf-8 -*-
+"""
+Deduplicate ODS snapshots by (business PK, content_hash).
+Keep the latest row by fetched_at (tie-breaker: ctid desc).
+
+Usage:
+  PYTHONPATH=. python -m scripts.repair.dedupe_ods_snapshots
+  PYTHONPATH=. python -m scripts.repair.dedupe_ods_snapshots --schema billiards_ods
+  PYTHONPATH=. python -m scripts.repair.dedupe_ods_snapshots --tables member_profiles,orders
+"""
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+from datetime import datetime
+from pathlib import Path
+from typing import Iterable, Sequence
+
+import psycopg2
+
+PROJECT_ROOT = Path(__file__).resolve().parents[1]
+if str(PROJECT_ROOT) not in sys.path:
+    sys.path.insert(0, str(PROJECT_ROOT))
+
+from config.settings import AppConfig
+from database.connection import DatabaseConnection
+
+
+def _reconfigure_stdout_utf8() -> None:
+    if hasattr(sys.stdout, "reconfigure"):
+        try:
+            sys.stdout.reconfigure(encoding="utf-8")
+        except Exception:
+            pass
+
+
+def _quote_ident(name: str) -> str:
+    return '"' + str(name).replace('"', '""') + '"'
+
+
+def _fetch_tables(conn, schema: str) -> list[str]:
+    sql = """
+        SELECT table_name
+        FROM information_schema.tables
+        WHERE table_schema = %s AND table_type = 'BASE TABLE'
+        ORDER BY table_name
+    """
+    with conn.cursor() as cur:
+        cur.execute(sql, (schema,))
+        return [r[0] for r in cur.fetchall()]
+
+
+def _fetch_columns(conn, schema: str, table: str) -> list[str]:
+    sql = """
+        SELECT column_name
+        FROM information_schema.columns
+        WHERE table_schema = %s AND table_name = %s
+        ORDER BY ordinal_position
+    """
+    with conn.cursor() as cur:
+        cur.execute(sql, (schema, table))
+        return [r[0] for r in cur.fetchall()]
+
+
+def _fetch_pk_columns(conn, schema: str, table: str) -> list[str]:
+    sql = """
+        SELECT kcu.column_name
+        FROM information_schema.table_constraints tc
+        JOIN information_schema.key_column_usage kcu
+          ON tc.constraint_name = kcu.constraint_name
+         AND tc.table_schema = kcu.table_schema
+        WHERE tc.constraint_type = 'PRIMARY KEY'
+          AND tc.table_schema = %s
+          AND tc.table_name = %s
+        ORDER BY kcu.ordinal_position
+    """
+    with conn.cursor() as cur:
+        cur.execute(sql, (schema, table))
+        cols = [r[0] for r in cur.fetchall()]
+    return [c for c in cols if c.lower() != "content_hash"]
+
+
+def _build_report_path(out_arg: str | None) -> Path:
+    if out_arg:
+        return Path(out_arg)
+    reports_dir = PROJECT_ROOT / "reports"
+    reports_dir.mkdir(parents=True, exist_ok=True)
+    ts = datetime.now().strftime("%Y%m%d_%H%M%S")
+    return reports_dir / f"ods_snapshot_dedupe_{ts}.json"
+
+
+def _print_progress(
+    table_label: str,
+    deleted: int,
+    total: int,
+    errors: int,
+) -> None:
+    if total:
+        msg = f"[{table_label}] deleted {deleted}/{total} errors={errors}"
+    else:
+        msg = f"[{table_label}] deleted {deleted} errors={errors}"
+    print(msg, flush=True)
+
+
+def _count_duplicates(conn, schema: str, table: str, key_cols: Sequence[str]) -> int:
+    keys_sql = ", ".join(_quote_ident(c) for c in [*key_cols, "content_hash"])
+    table_sql = f"{_quote_ident(schema)}.{_quote_ident(table)}"
+    sql = f"""
+        SELECT COUNT(*) FROM (
+            SELECT 1
+            FROM (
+                SELECT ROW_NUMBER() OVER (
+                    PARTITION BY {keys_sql}
+                    ORDER BY fetched_at DESC NULLS LAST, ctid DESC
+                ) AS rn
+                FROM {table_sql}
+            ) t
+            WHERE rn > 1
+        ) s
+    """
+    with conn.cursor() as cur:
+        cur.execute(sql)
+        row = cur.fetchone()
+        return int(row[0] if row else 0)
+
+
+def _delete_duplicate_batch(
+    conn,
+    schema: str,
+    table: str,
+    key_cols: Sequence[str],
+    batch_size: int,
+) -> int:
+    keys_sql = ", ".join(_quote_ident(c) for c in [*key_cols, "content_hash"])
+    table_sql = f"{_quote_ident(schema)}.{_quote_ident(table)}"
+    sql = f"""
+        WITH dupes AS (
+            SELECT ctid
+            FROM (
+                SELECT ctid,
+                       ROW_NUMBER() OVER (
+                           PARTITION BY {keys_sql}
+                           ORDER BY fetched_at DESC NULLS LAST, ctid DESC
+                       ) AS rn
+                FROM {table_sql}
+            ) s
+            WHERE rn > 1
+            LIMIT %s
+        )
+        DELETE FROM {table_sql} t
+        USING dupes d
+        WHERE t.ctid = d.ctid
+        RETURNING 1
+    """
+    with conn.cursor() as cur:
+        cur.execute(sql, (int(batch_size),))
+        rows = cur.fetchall()
+        return len(rows or [])
+
+
+def main() -> int:
+    _reconfigure_stdout_utf8()
+    ap = argparse.ArgumentParser(description="Deduplicate ODS snapshot rows by PK+content_hash")
+    ap.add_argument("--schema", default="billiards_ods", help="ODS schema name")
+    ap.add_argument("--tables", default="", help="comma-separated table names (optional)")
+    ap.add_argument("--batch-size", type=int, default=1000, help="delete batch size")
+    ap.add_argument("--progress-every", type=int, default=100, help="print progress every N deletions")
+    ap.add_argument("--out", default="", help="output report JSON path")
+    ap.add_argument("--dry-run", action="store_true", help="only compute duplicate counts")
+    args = ap.parse_args()
+
+    cfg = AppConfig.load({})
+    db = DatabaseConnection(dsn=cfg["db"]["dsn"], session=cfg["db"].get("session"))
+    try:
+        db.conn.rollback()
+    except Exception:
+        pass
+    db.conn.autocommit = True
+
+    tables = _fetch_tables(db.conn, args.schema)
+    if args.tables.strip():
+        whitelist = {t.strip() for t in args.tables.split(",") if t.strip()}
+        tables = [t for t in tables if t in whitelist]
+
+    report = {
+        "schema": args.schema,
+        "tables": [],
+        "summary": {
+            "total_tables": len(tables),
+            "checked_tables": 0,
+            "total_duplicates": 0,
+            "deleted_rows": 0,
+            "error_rows": 0,
+            "skipped_tables": 0,
+        },
+    }
+
+    for table in tables:
+        table_label = f"{args.schema}.{table}"
+        cols = _fetch_columns(db.conn, args.schema, table)
+        cols_lower = {c.lower() for c in cols}
+        if "content_hash" not in cols_lower or "fetched_at" not in cols_lower:
+            print(f"[{table_label}] skip: missing content_hash/fetched_at", flush=True)
+            report["summary"]["skipped_tables"] += 1
+            continue
+
+        key_cols = _fetch_pk_columns(db.conn, args.schema, table)
+        if not key_cols:
+            print(f"[{table_label}] skip: missing primary key", flush=True)
+            report["summary"]["skipped_tables"] += 1
+            continue
+
+        total_dupes = _count_duplicates(db.conn, args.schema, table, key_cols)
+        print(f"[{table_label}] duplicates={total_dupes}", flush=True)
+        deleted = 0
+        errors = 0
+
+        if not args.dry_run and total_dupes:
+            while True:
+                try:
+                    batch_deleted = _delete_duplicate_batch(
+                        db.conn,
+                        args.schema,
+                        table,
+                        key_cols,
+                        args.batch_size,
+                    )
+                except psycopg2.Error:
+                    errors += 1
+                    break
+                if batch_deleted <= 0:
+                    break
+                deleted += batch_deleted
+                if args.progress_every and deleted % int(args.progress_every) == 0:
+                    _print_progress(table_label, deleted, total_dupes, errors)
+
+            if deleted and (not args.progress_every or deleted % int(args.progress_every) != 0):
+                _print_progress(table_label, deleted, total_dupes, errors)
+
+        report["tables"].append(
+            {
+                "table": table_label,
+                "duplicate_rows": total_dupes,
+                "deleted_rows": deleted,
+                "error_rows": errors,
+            }
+        )
+        report["summary"]["checked_tables"] += 1
+        report["summary"]["total_duplicates"] += total_dupes
+        report["summary"]["deleted_rows"] += deleted
+        report["summary"]["error_rows"] += errors
+
+    out_path = _build_report_path(args.out)
+    out_path.write_text(json.dumps(report, ensure_ascii=False, indent=2), encoding="utf-8")
+    print(f"[REPORT] {out_path}", flush=True)
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/apps/etl/pipelines/feiqiu/scripts/repair/fix_dim_assistant_user_id.py
+++ b/apps/etl/pipelines/feiqiu/scripts/repair/fix_dim_assistant_user_id.py
@@ -0,0 +1,86 @@
+# -*- coding: utf-8 -*-
+"""修复 dim_assistant 表中的 user_id 字段"""
+import sys
+sys.path.insert(0, '.')
+from config.settings import AppConfig
+from database.connection import DatabaseConnection
+from database.operations import DatabaseOperations
+
+config = AppConfig.load()
+db_conn = DatabaseConnection(config.config['db']['dsn'])
+db = DatabaseOperations(db_conn)
+
+print("=== 修复 dim_assistant.user_id ===")
+
+# 方案：从 ODS 表更新 DWD 表的 user_id
+# 通过 id (ODS) = assistant_id (DWD) 关联
+
+# 1. 先检查当前状态
+print("\n修复前:")
+sql_before = """
+    SELECT 
+        COUNT(*) as total,
+        COUNT(CASE WHEN user_id > 0 THEN 1 END) as has_user_id
+    FROM billiards_dwd.dim_assistant
+    WHERE scd2_is_current = 1
+"""
+r = dict(db.query(sql_before)[0])
+print(f"  总记录: {r['total']}, 有user_id: {r['has_user_id']}")
+
+# 2. 执行更新
+print("\n执行更新...")
+update_sql = """
+    UPDATE billiards_dwd.dim_assistant d
+    SET user_id = o.user_id
+    FROM (
+        SELECT DISTINCT ON (id) id, user_id
+        FROM billiards_ods.assistant_accounts_master
+        WHERE user_id > 0
+        ORDER BY id, fetched_at DESC
+    ) o
+    WHERE d.assistant_id = o.id
+      AND (d.user_id IS NULL OR d.user_id = 0)
+"""
+with db_conn.conn.cursor() as cur:
+    cur.execute(update_sql)
+    updated = cur.rowcount
+    print(f"  更新了 {updated} 条记录")
+db_conn.conn.commit()
+
+# 3. 检查修复后状态
+print("\n修复后:")
+r2 = dict(db.query(sql_before)[0])
+print(f"  总记录: {r2['total']}, 有user_id: {r2['has_user_id']}")
+
+# 4. 显示样本数据
+print("\n样本数据:")
+sql_sample = """
+    SELECT assistant_id, user_id, assistant_no, nickname
+    FROM billiards_dwd.dim_assistant
+    WHERE scd2_is_current = 1
+    ORDER BY assistant_no::int
+    LIMIT 10
+"""
+for row in db.query(sql_sample):
+    r = dict(row)
+    print(f"  assistant_id={r['assistant_id']}, user_id={r['user_id']}, no={r['assistant_no']}, nickname={r['nickname']}")
+
+# 5. 验证与服务日志的关联
+print("\n验证与服务日志的关联:")
+sql_verify = """
+    SELECT 
+        COUNT(DISTINCT s.user_id) as service_unique_users,
+        COUNT(DISTINCT CASE WHEN d.assistant_id IS NOT NULL THEN s.user_id END) as matched_users
+    FROM billiards_dwd.dwd_assistant_service_log s
+    LEFT JOIN billiards_dwd.dim_assistant d 
+        ON s.user_id = d.user_id AND d.scd2_is_current = 1
+    WHERE s.is_delete = 0 AND s.user_id > 0
+"""
+r3 = dict(db.query(sql_verify)[0])
+print(f"  服务日志唯一user_id: {r3['service_unique_users']}")
+print(f"  能匹配到dim_assistant: {r3['matched_users']}")
+match_rate = r3['matched_users'] / r3['service_unique_users'] * 100 if r3['service_unique_users'] > 0 else 0
+print(f"  匹配率: {match_rate:.1f}%")
+
+db_conn.close()
+print("\n完成!")
--- a/apps/etl/pipelines/feiqiu/scripts/repair/repair_ods_content_hash.py
+++ b/apps/etl/pipelines/feiqiu/scripts/repair/repair_ods_content_hash.py
@@ -0,0 +1,302 @@
+# -*- coding: utf-8 -*-
+"""
+Repair ODS content_hash values by recomputing from payload.
+
+Usage:
+  PYTHONPATH=. python -m scripts.repair.repair_ods_content_hash
+  PYTHONPATH=. python -m scripts.repair.repair_ods_content_hash --schema billiards_ods
+  PYTHONPATH=. python -m scripts.repair.repair_ods_content_hash --tables member_profiles,orders
+"""
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Iterable, Sequence
+
+import psycopg2
+from psycopg2.extras import RealDictCursor
+
+PROJECT_ROOT = Path(__file__).resolve().parents[1]
+if str(PROJECT_ROOT) not in sys.path:
+    sys.path.insert(0, str(PROJECT_ROOT))
+
+from config.settings import AppConfig
+from database.connection import DatabaseConnection
+from tasks.ods.ods_tasks import BaseOdsTask
+
+
+def _reconfigure_stdout_utf8() -> None:
+    if hasattr(sys.stdout, "reconfigure"):
+        try:
+            sys.stdout.reconfigure(encoding="utf-8")
+        except Exception:
+            pass
+
+
+def _fetch_tables(conn, schema: str) -> list[str]:
+    sql = """
+        SELECT table_name
+        FROM information_schema.tables
+        WHERE table_schema = %s AND table_type = 'BASE TABLE'
+        ORDER BY table_name
+    """
+    with conn.cursor() as cur:
+        cur.execute(sql, (schema,))
+        return [r[0] for r in cur.fetchall()]
+
+
+def _fetch_columns(conn, schema: str, table: str) -> list[str]:
+    sql = """
+        SELECT column_name
+        FROM information_schema.columns
+        WHERE table_schema = %s AND table_name = %s
+        ORDER BY ordinal_position
+    """
+    with conn.cursor() as cur:
+        cur.execute(sql, (schema, table))
+        cols = [r[0] for r in cur.fetchall()]
+    return [c for c in cols if c]
+
+
+def _fetch_pk_columns(conn, schema: str, table: str) -> list[str]:
+    sql = """
+        SELECT kcu.column_name
+        FROM information_schema.table_constraints tc
+        JOIN information_schema.key_column_usage kcu
+          ON tc.constraint_name = kcu.constraint_name
+         AND tc.table_schema = kcu.table_schema
+        WHERE tc.constraint_type = 'PRIMARY KEY'
+          AND tc.table_schema = %s
+          AND tc.table_name = %s
+        ORDER BY kcu.ordinal_position
+    """
+    with conn.cursor() as cur:
+        cur.execute(sql, (schema, table))
+        cols = [r[0] for r in cur.fetchall()]
+    return [c for c in cols if c.lower() != "content_hash"]
+
+
+def _fetch_row_count(conn, schema: str, table: str) -> int:
+    sql = f'SELECT COUNT(*) FROM "{schema}"."{table}"'
+    with conn.cursor() as cur:
+        cur.execute(sql)
+        row = cur.fetchone()
+        return int(row[0] if row else 0)
+
+
+def _iter_rows(
+    conn,
+    schema: str,
+    table: str,
+    select_cols: Sequence[str],
+    batch_size: int,
+) -> Iterable[dict]:
+    cols_sql = ", ".join("ctid" if c == "ctid" else f'"{c}"' for c in select_cols)
+    sql = f'SELECT {cols_sql} FROM "{schema}"."{table}"'
+    with conn.cursor(name=f"ods_hash_fix_{table}", cursor_factory=RealDictCursor) as cur:
+        cur.itersize = max(1, int(batch_size or 500))
+        cur.execute(sql)
+        for row in cur:
+            yield row
+
+
+def _build_report_path(out_arg: str | None) -> Path:
+    if out_arg:
+        return Path(out_arg)
+    reports_dir = PROJECT_ROOT / "reports"
+    reports_dir.mkdir(parents=True, exist_ok=True)
+    ts = datetime.now().strftime("%Y%m%d_%H%M%S")
+    return reports_dir / f"ods_content_hash_repair_{ts}.json"
+
+
+def _print_progress(
+    table_label: str,
+    processed: int,
+    total: int,
+    updated: int,
+    skipped: int,
+    conflicts: int,
+    errors: int,
+    missing_hash: int,
+    invalid_payload: int,
+) -> None:
+    if total:
+        msg = (
+            f"[{table_label}] checked {processed}/{total} "
+            f"updated={updated} skipped={skipped} conflicts={conflicts} errors={errors} "
+            f"missing_hash={missing_hash} invalid_payload={invalid_payload}"
+        )
+    else:
+        msg = (
+            f"[{table_label}] checked {processed} "
+            f"updated={updated} skipped={skipped} conflicts={conflicts} errors={errors} "
+            f"missing_hash={missing_hash} invalid_payload={invalid_payload}"
+        )
+    print(msg, flush=True)
+
+
+def main() -> int:
+    _reconfigure_stdout_utf8()
+    ap = argparse.ArgumentParser(description="Repair ODS content_hash using payload")
+    ap.add_argument("--schema", default="billiards_ods", help="ODS schema name")
+    ap.add_argument("--tables", default="", help="comma-separated table names (optional)")
+    ap.add_argument("--batch-size", type=int, default=500, help="DB fetch batch size")
+    ap.add_argument("--progress-every", type=int, default=100, help="print progress every N rows")
+    ap.add_argument("--sample-limit", type=int, default=10, help="sample conflicts per table")
+    ap.add_argument("--out", default="", help="output report JSON path")
+    ap.add_argument("--dry-run", action="store_true", help="only compute stats, do not update")
+    args = ap.parse_args()
+
+    cfg = AppConfig.load({})
+    db_read = DatabaseConnection(dsn=cfg["db"]["dsn"], session=cfg["db"].get("session"))
+    db_write = DatabaseConnection(dsn=cfg["db"]["dsn"], session=cfg["db"].get("session"))
+    try:
+        db_write.conn.rollback()
+    except Exception:
+        pass
+    db_write.conn.autocommit = True
+
+    tables = _fetch_tables(db_read.conn, args.schema)
+    if args.tables.strip():
+        whitelist = {t.strip() for t in args.tables.split(",") if t.strip()}
+        tables = [t for t in tables if t in whitelist]
+
+    report = {
+        "schema": args.schema,
+        "tables": [],
+        "summary": {
+            "total_tables": len(tables),
+            "checked_tables": 0,
+            "total_rows": 0,
+            "checked_rows": 0,
+            "updated_rows": 0,
+            "skipped_rows": 0,
+            "conflict_rows": 0,
+            "error_rows": 0,
+            "missing_hash_rows": 0,
+            "invalid_payload_rows": 0,
+        },
+    }
+
+    for table in tables:
+        table_label = f"{args.schema}.{table}"
+        cols = _fetch_columns(db_read.conn, args.schema, table)
+        cols_lower = {c.lower() for c in cols}
+        if "payload" not in cols_lower or "content_hash" not in cols_lower:
+            print(f"[{table_label}] skip: missing payload/content_hash", flush=True)
+            continue
+
+        total = _fetch_row_count(db_read.conn, args.schema, table)
+        pk_cols = _fetch_pk_columns(db_read.conn, args.schema, table)
+        select_cols = ["ctid", "content_hash", "payload", *pk_cols]
+
+        processed = 0
+        updated = 0
+        skipped = 0
+        conflicts = 0
+        errors = 0
+        missing_hash = 0
+        invalid_payload = 0
+        samples: list[dict[str, Any]] = []
+
+        print(f"[{table_label}] start: total_rows={total}", flush=True)
+
+        for row in _iter_rows(db_read.conn, args.schema, table, select_cols, args.batch_size):
+            processed += 1
+            content_hash = row.get("content_hash")
+            payload = row.get("payload")
+            recomputed = BaseOdsTask._compute_compare_hash_from_payload(payload)
+            row_ctid = row.get("ctid")
+
+            if not content_hash:
+                missing_hash += 1
+            if not recomputed:
+                invalid_payload += 1
+
+            if not recomputed:
+                skipped += 1
+            elif content_hash == recomputed:
+                skipped += 1
+            else:
+                if args.dry_run:
+                    updated += 1
+                else:
+                    try:
+                        with db_write.conn.cursor() as cur:
+                            cur.execute(
+                                f'UPDATE "{args.schema}"."{table}" SET content_hash = %s WHERE ctid = %s',
+                                (recomputed, row_ctid),
+                            )
+                        updated += 1
+                    except psycopg2.errors.UniqueViolation:
+                        conflicts += 1
+                        if len(samples) < max(0, int(args.sample_limit or 0)):
+                            sample = {k: row.get(k) for k in pk_cols}
+                            sample["content_hash"] = content_hash
+                            sample["recomputed_hash"] = recomputed
+                            samples.append(sample)
+                    except psycopg2.Error:
+                        errors += 1
+
+            if args.progress_every and processed % int(args.progress_every) == 0:
+                _print_progress(
+                    table_label,
+                    processed,
+                    total,
+                    updated,
+                    skipped,
+                    conflicts,
+                    errors,
+                    missing_hash,
+                    invalid_payload,
+                )
+
+        if processed and (not args.progress_every or processed % int(args.progress_every) != 0):
+            _print_progress(
+                table_label,
+                processed,
+                total,
+                updated,
+                skipped,
+                conflicts,
+                errors,
+                missing_hash,
+                invalid_payload,
+            )
+
+        report["tables"].append(
+            {
+                "table": table_label,
+                "total_rows": total,
+                "checked_rows": processed,
+                "updated_rows": updated,
+                "skipped_rows": skipped,
+                "conflict_rows": conflicts,
+                "error_rows": errors,
+                "missing_hash_rows": missing_hash,
+                "invalid_payload_rows": invalid_payload,
+                "conflict_samples": samples,
+            }
+        )
+
+        report["summary"]["checked_tables"] += 1
+        report["summary"]["total_rows"] += total
+        report["summary"]["checked_rows"] += processed
+        report["summary"]["updated_rows"] += updated
+        report["summary"]["skipped_rows"] += skipped
+        report["summary"]["conflict_rows"] += conflicts
+        report["summary"]["error_rows"] += errors
+        report["summary"]["missing_hash_rows"] += missing_hash
+        report["summary"]["invalid_payload_rows"] += invalid_payload
+
+    out_path = _build_report_path(args.out)
+    out_path.write_text(json.dumps(report, ensure_ascii=False, indent=2), encoding="utf-8")
+    print(f"[REPORT] {out_path}", flush=True)
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/apps/etl/pipelines/feiqiu/scripts/repair/tune_integrity_indexes.py
+++ b/apps/etl/pipelines/feiqiu/scripts/repair/tune_integrity_indexes.py
@@ -0,0 +1,231 @@
+# -*- coding: utf-8 -*-
+"""Create performance indexes for integrity verification and run ANALYZE.
+
+Usage:
+    python -m scripts.tune_integrity_indexes
+    python -m scripts.tune_integrity_indexes --dry-run
+"""
+
+from __future__ import annotations
+
+import argparse
+import hashlib
+from dataclasses import dataclass
+from typing import Dict, List, Sequence, Set, Tuple
+
+import psycopg2
+from psycopg2 import sql
+
+from config.settings import AppConfig
+
+
+TIME_CANDIDATES = (
+    "pay_time",
+    "create_time",
+    "start_use_time",
+    "scd2_start_time",
+    "calc_time",
+    "order_date",
+    "fetched_at",
+)
+
+
+@dataclass(frozen=True)
+class IndexPlan:
+    schema: str
+    table: str
+    index_name: str
+    columns: Tuple[str, ...]
+
+
+def _short_index_name(table: str, tag: str, columns: Sequence[str]) -> str:
+    raw = f"idx_{table}_{tag}_{'_'.join(columns)}"
+    if len(raw) <= 63:
+        return raw
+    digest = hashlib.md5(raw.encode("utf-8")).hexdigest()[:8]
+    shortened = f"idx_{table}_{tag}_{digest}"
+    return shortened[:63]
+
+
+def _load_table_columns(cur, schema: str, table: str) -> Set[str]:
+    cur.execute(
+        """
+        SELECT column_name
+        FROM information_schema.columns
+        WHERE table_schema = %s AND table_name = %s
+        """,
+        (schema, table),
+    )
+    return {r[0] for r in cur.fetchall()}
+
+
+def _load_pk_columns(cur, schema: str, table: str) -> List[str]:
+    cur.execute(
+        """
+        SELECT kcu.column_name
+        FROM information_schema.table_constraints tc
+        JOIN information_schema.key_column_usage kcu
+          ON tc.constraint_name = kcu.constraint_name
+         AND tc.table_schema = kcu.table_schema
+         AND tc.table_name = kcu.table_name
+        WHERE tc.table_schema = %s
+          AND tc.table_name = %s
+          AND tc.constraint_type = 'PRIMARY KEY'
+        ORDER BY kcu.ordinal_position
+        """,
+        (schema, table),
+    )
+    return [r[0] for r in cur.fetchall()]
+
+
+def _load_tables(cur, schema: str) -> List[str]:
+    cur.execute(
+        """
+        SELECT table_name
+        FROM information_schema.tables
+        WHERE table_schema = %s
+          AND table_type = 'BASE TABLE'
+        ORDER BY table_name
+        """,
+        (schema,),
+    )
+    return [r[0] for r in cur.fetchall()]
+
+
+def _plan_indexes(cur, schema: str, table: str) -> List[IndexPlan]:
+    plans: List[IndexPlan] = []
+    cols = _load_table_columns(cur, schema, table)
+    pk_cols = _load_pk_columns(cur, schema, table)
+
+    if schema == "billiards_ods":
+        if "fetched_at" in cols:
+            plans.append(
+                IndexPlan(
+                    schema=schema,
+                    table=table,
+                    index_name=_short_index_name(table, "fetched_at", ("fetched_at",)),
+                    columns=("fetched_at",),
+                )
+            )
+            if pk_cols and len(pk_cols) <= 3 and all(c in cols for c in pk_cols):
+                comp_cols = ("fetched_at", *pk_cols)
+                plans.append(
+                    IndexPlan(
+                        schema=schema,
+                        table=table,
+                        index_name=_short_index_name(table, "fetched_pk", comp_cols),
+                        columns=comp_cols,
+                    )
+                )
+
+    if schema == "billiards_dwd":
+        if pk_cols and "scd2_is_current" in cols and len(pk_cols) <= 4:
+            comp_cols = (*pk_cols, "scd2_is_current")
+            plans.append(
+                IndexPlan(
+                    schema=schema,
+                    table=table,
+                    index_name=_short_index_name(table, "pk_current", comp_cols),
+                    columns=comp_cols,
+                )
+            )
+
+        for tcol in TIME_CANDIDATES:
+            if tcol in cols:
+                plans.append(
+                    IndexPlan(
+                        schema=schema,
+                        table=table,
+                        index_name=_short_index_name(table, "time", (tcol,)),
+                        columns=(tcol,),
+                    )
+                )
+                if pk_cols and len(pk_cols) <= 3 and all(c in cols for c in pk_cols):
+                    comp_cols = (tcol, *pk_cols)
+                    plans.append(
+                        IndexPlan(
+                            schema=schema,
+                            table=table,
+                            index_name=_short_index_name(table, "time_pk", comp_cols),
+                            columns=comp_cols,
+                        )
+                    )
+
+    # 按索引名去重
+    dedup: Dict[str, IndexPlan] = {}
+    for p in plans:
+        dedup[p.index_name] = p
+    return list(dedup.values())
+
+
+def _create_index(cur, plan: IndexPlan) -> None:
+    stmt = sql.SQL("CREATE INDEX IF NOT EXISTS {idx} ON {sch}.{tbl} ({cols})").format(
+        idx=sql.Identifier(plan.index_name),
+        sch=sql.Identifier(plan.schema),
+        tbl=sql.Identifier(plan.table),
+        cols=sql.SQL(", ").join(sql.Identifier(c) for c in plan.columns),
+    )
+    cur.execute(stmt)
+
+
+def _analyze_table(cur, schema: str, table: str) -> None:
+    stmt = sql.SQL("ANALYZE {sch}.{tbl}").format(
+        sch=sql.Identifier(schema),
+        tbl=sql.Identifier(table),
+    )
+    cur.execute(stmt)
+
+
+def main() -> int:
+    ap = argparse.ArgumentParser(description="Tune indexes for integrity verification.")
+    ap.add_argument("--dry-run", action="store_true", help="Print planned SQL only.")
+    ap.add_argument(
+        "--skip-analyze",
+        action="store_true",
+        help="Create indexes but skip ANALYZE.",
+    )
+    args = ap.parse_args()
+
+    cfg = AppConfig.load({})
+    dsn = cfg.get("db.dsn")
+    timeout_sec = int(cfg.get("db.connect_timeout_sec", 10) or 10)
+
+    with psycopg2.connect(dsn, connect_timeout=timeout_sec) as conn:
+        conn.autocommit = False
+        with conn.cursor() as cur:
+            all_plans: List[IndexPlan] = []
+            for schema in ("billiards_ods", "billiards_dwd"):
+                for table in _load_tables(cur, schema):
+                    all_plans.extend(_plan_indexes(cur, schema, table))
+
+            touched_tables: Set[Tuple[str, str]] = set()
+            print(f"planned indexes: {len(all_plans)}")
+            for plan in all_plans:
+                cols = ", ".join(plan.columns)
+                print(f"[INDEX] {plan.schema}.{plan.table} ({cols}) -> {plan.index_name}")
+                if not args.dry_run:
+                    _create_index(cur, plan)
+                    touched_tables.add((plan.schema, plan.table))
+
+            if not args.skip_analyze:
+                if args.dry_run:
+                    for schema, table in sorted({(p.schema, p.table) for p in all_plans}):
+                        print(f"[ANALYZE] {schema}.{table}")
+                else:
+                    for schema, table in sorted(touched_tables):
+                        _analyze_table(cur, schema, table)
+                        print(f"[ANALYZE] {schema}.{table}")
+
+        if args.dry_run:
+            conn.rollback()
+            print("dry-run complete; transaction rolled back")
+        else:
+            conn.commit()
+            print("index tuning complete")
+
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
+
--- a/apps/etl/pipelines/feiqiu/scripts/run_compare_v3.py
+++ b/apps/etl/pipelines/feiqiu/scripts/run_compare_v3.py
@@ -0,0 +1,113 @@
+# -*- coding: utf-8 -*-
+"""
+v3 比对脚本 — 直接从 JSON 样本提取字段，与硬编码的 ODS 列比对。
+ODS 列数据来自 information_schema.columns WHERE table_schema = 'billiards_ods'。
+"""
+import json
+import os
+
+SAMPLES_DIR = os.path.join(os.path.dirname(__file__), "..", "docs", "api-reference", "samples")
+REPORT_DIR = os.path.join(os.path.dirname(__file__), "..", "docs", "reports")
+ODS_META = {"source_file", "source_endpoint", "fetched_at", "payload", "content_hash"}
+NESTED_OBJECTS = {"siteprofile", "tableprofile"}
+
+# 22 张需要比对的表
+TABLES = [
+    "assistant_accounts_master", "settlement_records", "assistant_service_records",
+    "assistant_cancellation_records", "table_fee_transactions", "table_fee_discount_records",
+    "payment_transactions", "refund_transactions", "platform_coupon_redemption_records",
+    "tenant_goods_master", "store_goods_sales_records", "store_goods_master",
+    "stock_goods_category_tree", "goods_stock_movements", "member_profiles",
+    "member_stored_value_cards", "recharge_settlements", "member_balance_changes",
+    "group_buy_packages", "group_buy_redemption_records", "goods_stock_summary",
+    "site_tables_master",
+]
+
+def load_json(table):
+    path = os.path.join(SAMPLES_DIR, f"{table}.json")
+    with open(path, "r", encoding="utf-8") as f:
+        return json.load(f)
+
+def extract_fields(table):
+    data = load_json(table)
+    # settlement_records / recharge_settlements: 取 settleList 内层
+    if table in ("settlement_records", "recharge_settlements"):
+        record = data.get("settleList", {})
+        if isinstance(record, list):
+            record = record[0] if record else {}
+        fields = {k.lower() for k in record.keys()}
+        # 加上 siteProfile（顶层嵌套对象）
+        if "siteProfile" in data:
+            fields.add("siteprofile")
+        return fields
+    # stock_goods_category_tree: 取 goodsCategoryList 数组元素
+    if table == "stock_goods_category_tree":
+        cat_list = data.get("goodsCategoryList", [])
+        if cat_list:
+            return {k.lower() for k in cat_list[0].keys()}
+        return set()
+    # 通用：顶层 keys
+    fields = set()
+    for k, v in data.items():
+        kl = k.lower()
+        if kl in NESTED_OBJECTS:
+            fields.add(kl)  # 嵌套对象作为单列
+        else:
+            fields.add(kl)
+    return fields
+
+def main():
+    # 从数据库查询结果构建 ODS 列映射（硬编码，来自 information_schema）
+    # 这里我们直接读取 JSON 样本并用 psycopg2 查询
+    # 但为了独立运行，我们从环境变量或文件读取
+    
+    # 实际上我们直接用 extract_fields + 从文件读取 ODS 列
+    # ODS 列从单独的 JSON 文件读取
+    ods_cols_path = os.path.join(os.path.dirname(__file__), "ods_columns.json")
+    with open(ods_cols_path, "r", encoding="utf-8") as f:
+        ods_all = json.load(f)
+    
+    results = []
+    for table in TABLES:
+        api_fields = extract_fields(table)
+        ods_cols = set(ods_all.get(table, [])) - ODS_META
+        
+        matched = sorted(api_fields & ods_cols)
+        api_only = sorted(api_fields - ods_cols)
+        ods_only = sorted(ods_cols - api_fields)
+        
+        results.append({
+            "table": table,
+            "api_count": len(api_fields),
+            "ods_count": len(ods_cols),
+            "matched": len(matched),
+            "api_only": api_only,
+            "ods_only": ods_only,
+        })
+        
+        status = "✓ 完全对齐" if not api_only and not ods_only else ""
+        print(f"{table}: API={len(api_fields)} ODS={len(ods_cols)} 匹配={len(matched)} API独有={len(api_only)} ODS独有={len(ods_only)} {status}")
+        if api_only:
+            print(f"  API独有: {api_only}")
+        if ods_only:
+            print(f"  ODS独有: {ods_only}")
+    
+    # 写 JSON 报告
+    os.makedirs(REPORT_DIR, exist_ok=True)
+    out = os.path.join(REPORT_DIR, "api_ods_comparison_v3.json")
+    with open(out, "w", encoding="utf-8") as f:
+        json.dump(results, f, ensure_ascii=False, indent=2)
+    print(f"\nJSON 报告: {out}")
+
+if __name__ == "__main__":
+    main()
+
+# ──────────────────────────────────────────────────────────────────
+# AI_CHANGELOG:
+# - 日期: 2026-02-14
+#   Prompt: P20260214-000000 — "还是不准。现在拆解任务，所有表，每个表当作一个任务进行比对。"
+#   直接原因: v2 比对脚本结果不准确，需从 JSON 样本直接提取字段与数据库实际列精确比对
+#   变更摘要: 新建脚本，读取 samples/*.json 提取 API 字段，读取 ods_columns.json 获取 ODS 列，
+#             处理 settleList 嵌套/goodsCategoryList 数组/siteProfile 嵌套对象等特殊结构，逐表输出比对结果
+#   风险与验证: 纯分析脚本，不修改数据库；验证方式：运行脚本确认输出与 v3 报告一致
+# ──────────────────────────────────────────────────────────────────
--- a/apps/etl/pipelines/feiqiu/scripts/run_compare_v3_fixed.py
+++ b/apps/etl/pipelines/feiqiu/scripts/run_compare_v3_fixed.py
@@ -0,0 +1,465 @@
+# -*- coding: utf-8 -*-
+"""
+v3-fixed: API 参考文档 (.md) 响应字段详解 vs ODS 实际列 — 精确比对
+
+核心改进（相对 v3）：
+1. 仅从"四、响应字段详解"章节提取字段（排除请求参数、跨表关联等章节）
+2. 对 settlement_records / recharge_settlements 特殊处理：
+   - settleList 内层字段 → 直接比对 ODS 列
+   - siteProfile → ODS 中存为 siteprofile jsonb 单列（不展开子字段）
+3. 对 table_fee_discount_records / payment_transactions 等含 siteProfile/tableProfile 的表：
+   - siteProfile/tableProfile 作为嵌套对象 → ODS 中存为 jsonb 单列
+4. 对 stock_goods_category_tree：goodsCategoryList/categoryBoxes 是结构包装器，不是业务字段
+5. JSON 样本作为补充来源（union）
+
+CHANGE P20260214-003000: 完全重写字段提取逻辑
+intent: 精确限定提取范围到"响应字段详解"章节，避免误提取请求参数和跨表关联字段
+assumptions: 所有 .md 文档均以"## 四、响应字段详解"开始响应字段章节，以"## 五、"结束
+edge cases: settlement_records/recharge_settlements 的 siteProfile 子字段不应与 ODS 列比对
+"""
+import json
+import os
+import re
+from datetime import datetime
+
+DOCS_DIR = os.path.join(os.path.dirname(__file__), "..", "docs", "api-reference")
+SAMPLES_DIR = os.path.join(DOCS_DIR, "samples")
+REPORT_DIR = os.path.join(os.path.dirname(__file__), "..", "docs", "reports")
+ODS_META = {"source_file", "source_endpoint", "fetched_at", "payload", "content_hash"}
+
+TABLES = [
+    "assistant_accounts_master", "settlement_records", "assistant_service_records",
+    "assistant_cancellation_records", "table_fee_transactions", "table_fee_discount_records",
+    "payment_transactions", "refund_transactions", "platform_coupon_redemption_records",
+    "tenant_goods_master", "store_goods_sales_records", "store_goods_master",
+    "stock_goods_category_tree", "goods_stock_movements", "member_profiles",
+    "member_stored_value_cards", "recharge_settlements", "member_balance_changes",
+    "group_buy_packages", "group_buy_redemption_records", "goods_stock_summary",
+    "site_tables_master",
+]
+
+# 这些字段在 API JSON 中是嵌套对象，ODS 中存为 jsonb 单列
+NESTED_OBJECTS = {"siteprofile", "tableprofile"}
+# 这些字段是结构包装器，不是业务字段
+# 注意：categoryboxes 虽然是嵌套数组，但 ODS 中确实有 categoryboxes 列（jsonb），所以不排除
+WRAPPER_FIELDS = {"goodscategorylist", "total"}
+# 跨表关联章节中常见的"本表字段"列标题
+CROSS_REF_HEADERS = {"本表字段", "关联表字段", "关联表", "参数", "字段"}
+
+
+def extract_response_fields_from_md(table_name: str) -> tuple[set[str], list[str]]:
+    """
+    从 API 参考文档中精确提取"响应字段详解"章节的字段名。
+
+    返回: (fields_set_lowercase, debug_messages)
+
+    提取策略：
+    - 找到"## 四、响应字段详解"章节
+    - 在该章节内提取所有 Markdown 表格第一列的反引号字段名
+    - 遇到"## 五、"或更高级别标题时停止
+    - 对 settlement_records / recharge_settlements：
+      - siteProfile 子字段（带 siteProfile. 前缀的）→ 不提取，ODS 中存为 siteprofile jsonb
+      - settleList 内层字段 → 正常提取
+    - 对含 siteProfile/tableProfile 的表：这些作为顶层字段名提取（ODS 中是 jsonb 列）
+    """
+    md_path = os.path.join(DOCS_DIR, f"{table_name}.md")
+    debug = []
+    if not os.path.exists(md_path):
+        debug.append(f"[WARN] 文档不存在: {md_path}")
+        return set(), debug
+
+    with open(md_path, "r", encoding="utf-8") as f:
+        lines = f.readlines()
+
+    fields = set()
+    in_response_section = False
+    in_siteprofile_subsection = False
+    field_pattern = re.compile(r'^\|\s*`([^`]+)`\s*\|')
+    # 用于检测 siteProfile 子章节（如 "### A. siteProfile" 或 "### 4.1 门店信息快照（siteProfile）"）
+    siteprofile_header = re.compile(r'^###.*siteProfile', re.IGNORECASE)
+
+    for line in lines:
+        stripped = line.strip()
+
+        # 检测进入"响应字段详解"章节
+        if stripped.startswith("## 四、") and "响应字段" in stripped:
+            in_response_section = True
+            in_siteprofile_subsection = False
+            continue
+
+        # 检测离开（遇到下一个 ## 级别标题）
+        if in_response_section and stripped.startswith("## ") and not stripped.startswith("## 四"):
+            break
+
+        if not in_response_section:
+            continue
+
+        # 检测 siteProfile 子章节（仅对 settlement_records / recharge_settlements）
+        if table_name in ("settlement_records", "recharge_settlements"):
+            if siteprofile_header.search(stripped):
+                in_siteprofile_subsection = True
+                continue
+            # 遇到下一个 ### 标题，退出 siteProfile 子章节
+            if stripped.startswith("### ") and in_siteprofile_subsection:
+                if not siteprofile_header.search(stripped):
+                    in_siteprofile_subsection = False
+
+        # 提取字段名
+        m = field_pattern.match(stripped)
+        if m:
+            raw_field = m.group(1).strip()
+
+            # 跳过表头行
+            if raw_field in CROSS_REF_HEADERS:
+                continue
+
+            # 对 settlement_records / recharge_settlements：跳过 siteProfile 子字段
+            if table_name in ("settlement_records", "recharge_settlements"):
+                if in_siteprofile_subsection:
+                    # siteProfile 子字段不提取（ODS 中存为 siteprofile jsonb）
+                    continue
+                # 带 siteProfile. 前缀的也跳过
+                if raw_field.startswith("siteProfile."):
+                    continue
+
+            # 跳过结构包装器字段
+            if raw_field.lower() in WRAPPER_FIELDS:
+                continue
+
+            fields.add(raw_field.lower())
+
+    debug.append(f"从 .md 提取 {len(fields)} 个响应字段")
+    return fields, debug
+
+
+def extract_fields_from_json(table_name: str) -> tuple[set[str], list[str]]:
+    """从 JSON 样本提取字段（作为补充）"""
+    path = os.path.join(SAMPLES_DIR, f"{table_name}.json")
+    debug = []
+    if not os.path.exists(path):
+        debug.append("[INFO] 无 JSON 样本")
+        return set(), debug
+
+    with open(path, "r", encoding="utf-8") as f:
+        data = json.load(f)
+
+    # settlement_records / recharge_settlements: 提取 settleList 内层字段
+    if table_name in ("settlement_records", "recharge_settlements"):
+        settle = data.get("settleList", {})
+        if isinstance(settle, list):
+            settle = settle[0] if settle else {}
+        fields = {k.lower() for k in settle.keys()}
+        # siteProfile 作为整体（ODS 中不存 siteProfile 的子字段，但可能有 siteprofile jsonb 列）
+        # 不添加 siteProfile 的子字段
+        debug.append(f"从 JSON settleList 提取 {len(fields)} 个字段")
+        return fields, debug
+
+    # stock_goods_category_tree: 提取 goodsCategoryList 内层字段
+    if table_name == "stock_goods_category_tree":
+        cat_list = data.get("goodsCategoryList", [])
+        if cat_list:
+            fields = set()
+            for k in cat_list[0].keys():
+                kl = k.lower()
+                if kl not in WRAPPER_FIELDS:
+                    fields.add(kl)
+            debug.append(f"从 JSON goodsCategoryList 提取 {len(fields)} 个字段")
+            return fields, debug
+        return set(), debug
+
+    # 通用：提取顶层字段
+    fields = set()
+    for k in data.keys():
+        kl = k.lower()
+        # siteProfile/tableProfile 作为整体保留（ODS 中是 jsonb 列）
+        if kl in NESTED_OBJECTS:
+            fields.add(kl)
+        elif kl not in WRAPPER_FIELDS:
+            fields.add(kl)
+    debug.append(f"从 JSON 提取 {len(fields)} 个字段")
+    return fields, debug
+
+
+def classify_ods_only(table_name: str, field: str) -> str:
+    """对 ODS 独有字段进行分类说明"""
+    # table_fee_discount_records 的展开字段
+    if table_name == "table_fee_discount_records" and field in (
+        "area_type_id", "charge_free", "site_table_area_id", "site_table_area_name",
+        "sitename", "table_name", "table_price", "tenant_name"
+    ):
+        return "从 tableProfile/siteProfile 嵌套对象展开的字段"
+    # site_tables_master 的 order_id
+    if table_name == "site_tables_master" and field == "order_id":
+        return "ODS 后续版本新增字段（当前使用中的台桌关联订单 ID）"
+    # tenant_id 在某些表中是 ODS 额外添加的
+    if field == "tenant_id" and table_name in (
+        "assistant_cancellation_records", "payment_transactions"
+    ):
+        return "ODS 额外添加的租户 ID 字段（API 响应中不含，ETL 入库时补充）"
+    # API 后续版本新增字段（文档快照未覆盖）
+    api_version_fields = {
+        "assistant_service_records": {
+            "assistantteamname": "API 后续版本新增（助教团队名称）",
+            "real_service_money": "API 后续版本新增（实际服务金额）",
+        },
+        "table_fee_transactions": {
+            "activity_discount_amount": "API 后续版本新增（活动折扣金额）",
+            "order_consumption_type": "API 后续版本新增（订单消费类型）",
+            "real_service_money": "API 后续版本新增（实际服务金额）",
+        },
+        "tenant_goods_master": {
+            "not_sale": "API 后续版本新增（是否禁售标记）",
+        },
+        "store_goods_sales_records": {
+            "coupon_share_money": "API 后续版本新增（优惠券分摊金额）",
+        },
+        "store_goods_master": {
+            "commodity_code": "API 后续版本新增（商品编码）",
+            "not_sale": "API 后续版本新增（是否禁售标记）",
+        },
+        "member_profiles": {
+            "pay_money_sum": "API 后续版本新增（累计消费金额）",
+            "person_tenant_org_id": "API 后续版本新增（人事组织 ID）",
+            "person_tenant_org_name": "API 后续版本新增（人事组织名称）",
+            "recharge_money_sum": "API 后续版本新增（累计充值金额）",
+            "register_source": "API 后续版本新增（注册来源）",
+        },
+        "member_stored_value_cards": {
+            "able_share_member_discount": "API 后续版本新增（是否共享会员折扣）",
+            "electricity_deduct_radio": "API 后续版本新增（电费抵扣比例）",
+            "electricity_discount": "API 后续版本新增（电费折扣）",
+            "electricitycarddeduct": "API 后续版本新增（电费卡扣金额）",
+            "member_grade": "API 后续版本新增（会员等级）",
+            "principal_balance": "API 后续版本新增（本金余额）",
+            "rechargefreezebalance": "API 后续版本新增（充值冻结余额）",
+        },
+        "member_balance_changes": {
+            "principal_after": "API 后续版本新增（变动后本金）",
+            "principal_before": "API 后续版本新增（变动前本金）",
+            "principal_data": "API 后续版本新增（本金明细数据）",
+        },
+        "group_buy_packages": {
+            "is_first_limit": "API 后续版本新增（是否限首单）",
+            "sort": "API 后续版本新增（排序序号）",
+            "tenantcouponsaleorderitemid": "API 后续版本新增（租户券销售订单项 ID）",
+        },
+        "group_buy_redemption_records": {
+            "assistant_service_share_money": "API 后续版本新增（助教服务分摊金额）",
+            "assistant_share_money": "API 后续版本新增（助教分摊金额）",
+            "coupon_sale_id": "API 后续版本新增（券销售 ID）",
+            "good_service_share_money": "API 后续版本新增（商品服务分摊金额）",
+            "goods_share_money": "API 后续版本新增（商品分摊金额）",
+            "member_discount_money": "API 后续版本新增（会员折扣金额）",
+            "recharge_share_money": "API 后续版本新增（充值分摊金额）",
+            "table_service_share_money": "API 后续版本新增（台费服务分摊金额）",
+            "table_share_money": "API 后续版本新增（台费分摊金额）",
+        },
+    }
+    table_fields = api_version_fields.get(table_name, {})
+    if field in table_fields:
+        return table_fields[field]
+    return "ODS 独有（待确认来源）"
+
+
+def main():
+    ods_cols_path = os.path.join(os.path.dirname(__file__), "ods_columns.json")
+    with open(ods_cols_path, "r", encoding="utf-8") as f:
+        ods_all = json.load(f)
+
+    results = []
+    total_api_only = 0
+    total_ods_only = 0
+    all_debug = {}
+
+    for table in TABLES:
+        debug_lines = [f"\n{'='*60}", f"表: {table}", f"{'='*60}"]
+
+        # 从文档提取字段（主要来源）
+        md_fields, md_debug = extract_response_fields_from_md(table)
+        debug_lines.extend(md_debug)
+
+        # 从 JSON 样本提取字段（补充）
+        json_fields, json_debug = extract_fields_from_json(table)
+        debug_lines.extend(json_debug)
+
+        # 合并：文档字段 ∪ JSON 样本字段
+        api_fields = md_fields | json_fields
+
+        # 特殊处理：settlement_records / recharge_settlements
+        # ODS 中有 siteprofile 列但不展开子字段；也有 settlelist jsonb 列
+        # API 文档中 siteProfile 子字段已被排除，但需要确保 siteprofile 作为整体列被考虑
+        if table in ("settlement_records", "recharge_settlements"):
+            # 不把 siteprofile 加入 api_fields（因为 ODS 中 siteprofile 不是从 API 直接映射的列名）
+            # settlelist 也是 ODS 的 jsonb 列，不在 API 字段中
+            pass
+
+        # 特殊处理：含 siteProfile/tableProfile 的表
+        # 这些在 API 中是嵌套对象，ODS 中存为 jsonb 列
+        # 确保 api_fields 中包含 siteprofile/tableprofile（如果 ODS 有这些列）
+        ods_cols = set(ods_all.get(table, [])) - ODS_META
+        ods_cols_lower = set()
+        ods_case_map = {}
+        for c in ods_cols:
+            cl = c.lower()
+            ods_cols_lower.add(cl)
+            ods_case_map[cl] = c
+
+        # 如果 ODS 有 siteprofile/tableprofile 列，且 API 文档中有 siteProfile/tableProfile 字段
+        for nested in NESTED_OBJECTS:
+            if nested in ods_cols_lower and nested not in api_fields:
+                # 检查 API 文档/JSON 中是否有这个嵌套对象
+                # 对于 settlement_records/recharge_settlements，siteProfile 确实存在于 API 响应中
+                # 对于 payment_transactions 等，siteProfile 也存在
+                api_fields.add(nested)
+                debug_lines.append(f"  补充嵌套对象字段: {nested}")
+
+        matched = sorted(api_fields & ods_cols_lower)
+        api_only = sorted(api_fields - ods_cols_lower)
+        ods_only = sorted(ods_cols_lower - api_fields)
+
+        # 对 ODS 独有字段分类
+        ods_only_classified = []
+        for f in ods_only:
+            reason = classify_ods_only(table, f)
+            ods_only_classified.append({"field": f, "ods_original": ods_case_map.get(f, f), "reason": reason})
+
+        total_api_only += len(api_only)
+        total_ods_only += len(ods_only)
+
+        result = {
+            "table": table,
+            "api_count": len(api_fields),
+            "ods_count": len(ods_cols_lower),
+            "matched": len(matched),
+            "matched_fields": matched,
+            "api_only": api_only,
+            "ods_only": ods_only_classified,
+            "api_only_count": len(api_only),
+            "ods_only_count": len(ods_only),
+            "md_fields_count": len(md_fields),
+            "json_fields_count": len(json_fields),
+        }
+        results.append(result)
+
+        status = "✓ 完全对齐" if not api_only and not ods_only else ""
+        print(f"{table}: API={len(api_fields)}(md={len(md_fields)},json={len(json_fields)}) "
+              f"ODS={len(ods_cols_lower)} 匹配={len(matched)} "
+              f"API独有={len(api_only)} ODS独有={len(ods_only)} {status}")
+        if api_only:
+            print(f"  API独有: {api_only}")
+        if ods_only:
+            for item in ods_only_classified:
+                print(f"  ODS独有: {item['ods_original']} — {item['reason']}")
+
+        all_debug[table] = debug_lines
+
+    print(f"\n{'='*60}")
+    print(f"总计: API独有={total_api_only}, ODS独有={total_ods_only}")
+    print(f"{'='*60}")
+
+    # 写 JSON 报告
+    os.makedirs(REPORT_DIR, exist_ok=True)
+    json_out = os.path.join(REPORT_DIR, "api_ods_comparison_v3_fixed.json")
+    with open(json_out, "w", encoding="utf-8") as f:
+        json.dump(results, f, ensure_ascii=False, indent=2)
+    print(f"\nJSON 报告: {json_out}")
+
+    # 写 Markdown 报告
+    md_out = os.path.join(REPORT_DIR, "api_ods_comparison_v3_fixed.md")
+    write_md_report(results, md_out, total_api_only, total_ods_only)
+    print(f"MD 报告: {md_out}")
+
+
+def write_md_report(results, path, total_api_only, total_ods_only):
+    now = datetime.now().strftime("%Y-%m-%d %H:%M")
+    lines = [
+        f"# API 响应字段 vs ODS 表结构比对报告（v3-fixed）",
+        f"",
+        f"> 生成时间：{now}（Asia/Shanghai）",
+        f"> 数据来源：API 参考文档（docs/api-reference/*.md）+ JSON 样本 + PostgreSQL information_schema",
+        f'> 比对方法：从文档"响应字段详解"章节精确提取字段，与 ODS 实际列比对（排除 meta 列）',
+        f"",
+        f"## 汇总",
+        f"",
+        f"| 指标 | 值 |",
+        f"|------|-----|",
+        f"| 比对表数 | {len(results)} |",
+        f"| API 独有字段总数 | {total_api_only} |",
+        f"| ODS 独有字段总数 | {total_ods_only} |",
+        f"| 完全对齐表数 | {sum(1 for r in results if r['api_only_count'] == 0 and r['ods_only_count'] == 0)} |",
+        f"",
+        f"## 逐表比对",
+        f"",
+    ]
+
+    for r in results:
+        status = "✅ 完全对齐" if r["api_only_count"] == 0 and r["ods_only_count"] == 0 else "⚠️ 有差异"
+        lines.append(f"### {r['table']} — {status}")
+        lines.append(f"")
+        lines.append(f"| 指标 | 值 |")
+        lines.append(f"|------|-----|")
+        lines.append(f"| API 字段数 | {r['api_count']}（文档={r['md_fields_count']}，JSON={r['json_fields_count']}） |")
+        lines.append(f"| ODS 列数（排除 meta） | {r['ods_count']} |")
+        lines.append(f"| 匹配 | {r['matched']} |")
+        lines.append(f"| API 独有 | {r['api_only_count']} |")
+        lines.append(f"| ODS 独有 | {r['ods_only_count']} |")
+        lines.append(f"")
+
+        if r["api_only"]:
+            lines.append(f"**API 独有字段（ODS 中缺失）：**")
+            lines.append(f"")
+            for f in r["api_only"]:
+                lines.append(f"- `{f}`")
+            lines.append(f"")
+
+        if r["ods_only"]:
+            lines.append(f"**ODS 独有字段（API 文档中未出现）：**")
+            lines.append(f"")
+            lines.append(f"| ODS 列名 | 分类说明 |")
+            lines.append(f"|----------|----------|")
+            for item in r["ods_only"]:
+                lines.append(f"| `{item['ods_original']}` | {item['reason']} |")
+            lines.append(f"")
+
+        lines.append(f"---")
+        lines.append(f"")
+
+    # AI_CHANGELOG
+    lines.extend([
+        f"<!--",
+        f"AI_CHANGELOG:",
+        f"- 日期: 2026-02-14",
+        f"- Prompt: P20260214-003000 — v3 比对不准确，重写为 v3-fixed",
+        f"- 直接原因: v3 仅从 JSON 样本提取字段导致遗漏；v3-fixed 从 .md 文档响应字段详解章节精确提取",
+        f"- 变更摘要: 新建 v3-fixed 报告，精确限定提取范围，排除请求参数和跨表关联字段",
+        f"- 风险与验证: 纯分析报告，无运行时影响；验证方式：抽查 assistant_accounts_master 的 last_update_name 是否正确识别为匹配",
+        f"-->",
+    ])
+
+    with open(path, "w", encoding="utf-8") as f:
+        f.write("\n".join(lines))
+
+
+if __name__ == "__main__":
+    main()
+
+
+# AI_CHANGELOG:
+# - 日期: 2026-02-14
+# - Prompt: P20260214-003000 — "还是不准，比如assistant_accounts_master的last_update_name，命名Json里就有，再仔细比对下"
+# - 直接原因: v3 仅从 JSON 样本提取字段导致遗漏条件性字段；需改用 .md 文档响应字段详解章节作为主要来源
+# - 变更摘要: 完全重写脚本，精确限定提取范围到"四、响应字段详解"章节，排除请求参数和跨表关联；
+#   对 settlement_records/recharge_settlements 的 siteProfile 子字段不提取；对所有 ODS 独有字段分类说明
+# - 风险与验证: 纯分析脚本，无运行时影响；验证：确认 assistant_accounts_master 62:62 完全对齐，last_update_name 正确匹配
+#
+# - 日期: 2026-02-14
+# - Prompt: P20260214-030000 — 上下文传递续接，执行 settlelist 删除后的收尾工作
+# - 直接原因: settlelist 列已从 ODS 删除，classify_ods_only 中的 settlelist 特殊分类不再需要
+# - 变更摘要: 移除 classify_ods_only 函数中 settlelist 的特殊分类逻辑
+# - 风险与验证: 纯分析脚本；验证：重新运行脚本确认 ODS 独有=47，settlement_records 和 recharge_settlements 完全对齐
+#
+# - 日期: 2026-02-14
+# - Prompt: P20260214-070000 — ODS 清理与文档标注（5 项任务）
+# - 直接原因: option_name（store_goods_sales_records）和 able_site_transfer（member_stored_value_cards）已从 ODS 删除
+# - 变更摘要: 从 classify_ods_only 的 api_version_fields 字典中移除 option_name 和 able_site_transfer 条目
+# - 风险与验证: 纯分析脚本；验证：重新运行脚本确认两表 ODS 独有数减少
--- a/apps/etl/pipelines/feiqiu/scripts/run_ods.bat
+++ b/apps/etl/pipelines/feiqiu/scripts/run_ods.bat
@@ -0,0 +1,26 @@
+@echo off
+REM -*- coding: utf-8 -*-
+REM 说明：一键重建 ODS（执行 INIT_ODS_SCHEMA）并灌入示例 JSON（执行 MANUAL_INGEST）
+
+setlocal
+cd /d "%~dp0\.."
+
+REM 如果需要覆盖示例目录，可修改下面的 INGEST_DIR
+set "INGEST_DIR=export\\test-json-doc"
+
+echo [INIT_ODS_SCHEMA] 准备执行，源目录=%INGEST_DIR%
+python -m cli.main --tasks INIT_ODS_SCHEMA --pipeline-flow INGEST_ONLY --ingest-source "%INGEST_DIR%"
+if errorlevel 1 (
+  echo INIT_ODS_SCHEMA 失败，退出
+  exit /b 1
+)
+
+echo [MANUAL_INGEST] 准备执行，源目录=%INGEST_DIR%
+python -m cli.main --tasks MANUAL_INGEST --pipeline-flow INGEST_ONLY --ingest-source "%INGEST_DIR%"
+if errorlevel 1 (
+  echo MANUAL_INGEST 失败，退出
+  exit /b 1
+)
+
+echo 全部完成。
+endlocal
--- a/apps/etl/pipelines/feiqiu/scripts/run_update.py
+++ b/apps/etl/pipelines/feiqiu/scripts/run_update.py
@@ -0,0 +1,516 @@
+# -*- coding: utf-8 -*-
+"""
+一键增量更新脚本（ODS -> DWD -> DWS）。
+
+用法：
+  python scripts/run_update.py
+"""
+
+from __future__ import annotations
+
+import argparse
+import logging
+import multiprocessing as mp
+import subprocess
+import sys
+import time as time_mod
+from datetime import date, datetime, time, timedelta
+from pathlib import Path
+from zoneinfo import ZoneInfo
+
+from api.client import APIClient
+from config.settings import AppConfig
+from database.connection import DatabaseConnection
+from database.operations import DatabaseOperations
+from orchestration.scheduler import ETLScheduler
+from tasks.utility.check_cutoff_task import CheckCutoffTask
+from tasks.dwd.dwd_load_task import DwdLoadTask
+from tasks.ods.ods_tasks import ENABLED_ODS_CODES
+from utils.logging_utils import build_log_path, configure_logging
+
+STEP_TIMEOUT_SEC = 120
+
+
+
+def _coerce_date(s: str) -> date:
+    s = (s or "").strip()
+    if not s:
+        raise ValueError("empty date")
+    if len(s) >= 10:
+        s = s[:10]
+    return date.fromisoformat(s)
+
+
+def _compute_dws_window(
+    *,
+    cfg: AppConfig,
+    tz: ZoneInfo,
+    rebuild_days: int,
+    bootstrap_days: int,
+    dws_start: date | None,
+    dws_end: date | None,
+) -> tuple[datetime, datetime]:
+    if dws_start and dws_end and dws_end < dws_start:
+        raise ValueError("dws_end must be >= dws_start")
+
+    store_id = int(cfg.get("app.store_id"))
+    dsn = cfg["db"]["dsn"]
+    session = cfg["db"].get("session")
+    conn = DatabaseConnection(dsn=dsn, session=session)
+    try:
+        if dws_start is None:
+            row = conn.query(
+                "SELECT MAX(order_date) AS mx FROM billiards_dws.dws_order_summary WHERE site_id=%s",
+                (store_id,),
+            )
+            mx = (row[0] or {}).get("mx") if row else None
+            if isinstance(mx, date):
+                dws_start = mx - timedelta(days=max(0, int(rebuild_days)))
+            else:
+                dws_start = (datetime.now(tz).date()) - timedelta(days=max(1, int(bootstrap_days)))
+
+        if dws_end is None:
+            dws_end = datetime.now(tz).date()
+    finally:
+        conn.close()
+
+    start_dt = datetime.combine(dws_start, time.min).replace(tzinfo=tz)
+    # end_dt 取到当天 23:59:59，避免只跑到“当前时刻”的 date() 导致少一天
+    end_dt = datetime.combine(dws_end, time.max).replace(tzinfo=tz)
+    return start_dt, end_dt
+
+
+def _run_check_cutoff(cfg: AppConfig, logger: logging.Logger):
+    dsn = cfg["db"]["dsn"]
+    session = cfg["db"].get("session")
+    db_conn = DatabaseConnection(dsn=dsn, session=session)
+    db_ops = DatabaseOperations(db_conn)
+    api = APIClient(
+        base_url=cfg["api"]["base_url"],
+        token=cfg["api"]["token"],
+        timeout=cfg["api"]["timeout_sec"],
+        retry_max=cfg["api"]["retries"]["max_attempts"],
+        headers_extra=cfg["api"].get("headers_extra"),
+    )
+    try:
+        CheckCutoffTask(cfg, db_ops, api, logger).execute(None)
+    finally:
+        db_conn.close()
+
+
+def _iter_daily_windows(window_start: datetime, window_end: datetime) -> list[tuple[datetime, datetime]]:
+    if window_start > window_end:
+        return []
+    tz = window_start.tzinfo
+    windows: list[tuple[datetime, datetime]] = []
+    cur = window_start
+    while cur <= window_end:
+        day_start = datetime.combine(cur.date(), time.min).replace(tzinfo=tz)
+        day_end = datetime.combine(cur.date(), time.max).replace(tzinfo=tz)
+        if day_start < window_start:
+            day_start = window_start
+        if day_end > window_end:
+            day_end = window_end
+        windows.append((day_start, day_end))
+        next_day = cur.date() + timedelta(days=1)
+        cur = datetime.combine(next_day, time.min).replace(tzinfo=tz)
+    return windows
+
+
+def _run_step_worker(result_queue: "mp.Queue[dict[str, str]]", step: dict[str, str]) -> None:
+    if hasattr(sys.stdout, "reconfigure"):
+        try:
+            sys.stdout.reconfigure(encoding="utf-8")
+        except Exception:
+            pass
+    log_file = step.get("log_file") or ""
+    log_level = step.get("log_level") or "INFO"
+    log_console = bool(step.get("log_console", True))
+    log_path = Path(log_file) if log_file else None
+
+    with configure_logging(
+        "etl_update",
+        log_path,
+        level=log_level,
+        console=log_console,
+        tee_std=True,
+    ) as logger:
+        cfg_base = AppConfig.load({})
+        step_type = step.get("type", "")
+        try:
+            if step_type == "check_cutoff":
+                _run_check_cutoff(cfg_base, logger)
+            elif step_type == "ods_task":
+                task_code = step["task_code"]
+                overlap_seconds = int(step.get("overlap_seconds", 0))
+                cfg_ods = AppConfig.load(
+                    {
+                        "pipeline": {"flow": "FULL"},
+                        "run": {"tasks": [task_code], "overlap_seconds": overlap_seconds},
+                    }
+                )
+                scheduler = ETLScheduler(cfg_ods, logger)
+                try:
+                    scheduler.run_tasks([task_code])
+                finally:
+                    scheduler.close()
+            elif step_type == "init_dws_schema":
+                overlap_seconds = int(step.get("overlap_seconds", 0))
+                cfg_dwd = AppConfig.load(
+                    {
+                        "pipeline": {"flow": "INGEST_ONLY"},
+                        "run": {"tasks": ["INIT_DWS_SCHEMA"], "overlap_seconds": overlap_seconds},
+                    }
+                )
+                scheduler = ETLScheduler(cfg_dwd, logger)
+                try:
+                    scheduler.run_tasks(["INIT_DWS_SCHEMA"])
+                finally:
+                    scheduler.close()
+            elif step_type == "dwd_table":
+                dwd_table = step["dwd_table"]
+                overlap_seconds = int(step.get("overlap_seconds", 0))
+                cfg_dwd = AppConfig.load(
+                    {
+                        "pipeline": {"flow": "INGEST_ONLY"},
+                        "run": {"tasks": ["DWD_LOAD_FROM_ODS"], "overlap_seconds": overlap_seconds},
+                        "dwd": {"only_tables": [dwd_table]},
+                    }
+                )
+                scheduler = ETLScheduler(cfg_dwd, logger)
+                try:
+                    scheduler.run_tasks(["DWD_LOAD_FROM_ODS"])
+                finally:
+                    scheduler.close()
+            elif step_type == "dws_window":
+                overlap_seconds = int(step.get("overlap_seconds", 0))
+                window_start = step["window_start"]
+                window_end = step["window_end"]
+                cfg_dws = AppConfig.load(
+                    {
+                        "pipeline": {"flow": "INGEST_ONLY"},
+                        "run": {
+                            "tasks": ["DWS_BUILD_ORDER_SUMMARY"],
+                            "overlap_seconds": overlap_seconds,
+                            "window_override": {"start": window_start, "end": window_end},
+                        },
+                    }
+                )
+                scheduler = ETLScheduler(cfg_dws, logger)
+                try:
+                    scheduler.run_tasks(["DWS_BUILD_ORDER_SUMMARY"])
+                finally:
+                    scheduler.close()
+            elif step_type == "ods_gap_check":
+                overlap_hours = int(step.get("overlap_hours", 24))
+                window_days = int(step.get("window_days", 1))
+                window_hours = int(step.get("window_hours", 0))
+                page_size = int(step.get("page_size", 0) or 0)
+                sleep_per_window = float(step.get("sleep_per_window", 0) or 0)
+                sleep_per_page = float(step.get("sleep_per_page", 0) or 0)
+                tag = step.get("tag", "run_update")
+                task_codes = (step.get("task_codes") or "").strip()
+                script_dir = Path(__file__).resolve().parent.parent
+                script_path = script_dir / "scripts" / "check" / "check_ods_gaps.py"
+                cmd = [
+                    sys.executable,
+                    str(script_path),
+                    "--from-cutoff",
+                    "--cutoff-overlap-hours",
+                    str(overlap_hours),
+                    "--window-days",
+                    str(window_days),
+                    "--tag",
+                    str(tag),
+                ]
+                if window_hours > 0:
+                    cmd += ["--window-hours", str(window_hours)]
+                if page_size > 0:
+                    cmd += ["--page-size", str(page_size)]
+                if sleep_per_window > 0:
+                    cmd += ["--sleep-per-window-seconds", str(sleep_per_window)]
+                if sleep_per_page > 0:
+                    cmd += ["--sleep-per-page-seconds", str(sleep_per_page)]
+                if task_codes:
+                    cmd += ["--task-codes", task_codes]
+                subprocess.run(cmd, check=True, cwd=str(script_dir))
+            else:
+                raise ValueError(f"Unknown step type: {step_type}")
+            result_queue.put({"status": "ok"})
+        except Exception as exc:
+            result_queue.put({"status": "error", "error": str(exc)})
+
+
+def _run_step_with_timeout(
+    step: dict[str, str], logger: logging.Logger, timeout_sec: int
+) -> dict[str, object]:
+    start = time_mod.monotonic()
+    step_timeout = timeout_sec
+    if step.get("timeout_sec"):
+        try:
+            step_timeout = int(step.get("timeout_sec"))
+        except Exception:
+            step_timeout = timeout_sec
+    ctx = mp.get_context("spawn")
+    result_queue: mp.Queue = ctx.Queue()
+    proc = ctx.Process(target=_run_step_worker, args=(result_queue, step))
+    proc.start()
+    proc.join(timeout=step_timeout)
+    elapsed = time_mod.monotonic() - start
+    if proc.is_alive():
+        logger.error(
+            "STEP_TIMEOUT name=%s elapsed=%.2fs limit=%ss", step["name"], elapsed, step_timeout
+        )
+        proc.terminate()
+        proc.join(10)
+        return {"name": step["name"], "status": "timeout", "elapsed": elapsed}
+
+    result: dict[str, object] = {"name": step["name"], "status": "error", "elapsed": elapsed}
+    try:
+        payload = result_queue.get_nowait()
+    except Exception:
+        payload = {}
+    if payload:
+        result.update(payload)
+
+    if result.get("status") == "ok":
+        logger.info("STEP_OK name=%s elapsed=%.2fs", step["name"], elapsed)
+    else:
+        logger.error(
+            "STEP_FAIL name=%s elapsed=%.2fs error=%s",
+            step["name"],
+            elapsed,
+            result.get("error"),
+        )
+    return result
+
+
+def main() -> int:
+    if hasattr(sys.stdout, "reconfigure"):
+        try:
+            sys.stdout.reconfigure(encoding="utf-8")
+        except Exception:
+            pass
+
+    parser = argparse.ArgumentParser(description="One-click ETL update (ODS -> DWD -> DWS)")
+    parser.add_argument("--overlap-seconds", type=int, default=3600, help="overlap seconds (default: 3600)")
+    parser.add_argument(
+        "--dws-rebuild-days",
+        type=int,
+        default=1,
+        help="DWS 回算冗余天数（default: 1）",
+    )
+    parser.add_argument(
+        "--dws-bootstrap-days",
+        type=int,
+        default=30,
+        help="DWS 首次/空表时回算天数（default: 30）",
+    )
+    parser.add_argument("--dws-start", type=str, default="", help="DWS 回算开始日期 YYYY-MM-DD（可选）")
+    parser.add_argument("--dws-end", type=str, default="", help="DWS 回算结束日期 YYYY-MM-DD（可选）")
+    parser.add_argument(
+        "--skip-cutoff",
+        action="store_true",
+        help="跳过 CHECK_CUTOFF（默认会在开始/结束各跑一次）",
+    )
+    parser.add_argument(
+        "--skip-ods",
+        action="store_true",
+        help="跳过 ODS 在线抓取（仅跑 DWD/DWS）",
+    )
+    parser.add_argument(
+        "--ods-tasks",
+        type=str,
+        default="",
+        help="指定要跑的 ODS 任务（逗号分隔），默认跑全部 ENABLED_ODS_CODES",
+    )
+    parser.add_argument(
+        "--check-ods-gaps",
+        action="store_true",
+        help="run ODS gap check after ODS load (default: off)",
+    )
+    parser.add_argument(
+        "--check-ods-overlap-hours",
+        type=int,
+        default=24,
+        help="gap check overlap hours from cutoff (default: 24)",
+    )
+    parser.add_argument(
+        "--check-ods-window-days",
+        type=int,
+        default=1,
+        help="gap check window days (default: 1)",
+    )
+    parser.add_argument(
+        "--check-ods-window-hours",
+        type=int,
+        default=0,
+        help="gap check window hours (default: 0)",
+    )
+    parser.add_argument(
+        "--check-ods-page-size",
+        type=int,
+        default=200,
+        help="gap check API page size (default: 200)",
+    )
+    parser.add_argument(
+        "--check-ods-timeout-sec",
+        type=int,
+        default=1800,
+        help="gap check timeout seconds (default: 1800)",
+    )
+    parser.add_argument(
+        "--check-ods-task-codes",
+        type=str,
+        default="",
+        help="gap check task codes (comma-separated, optional)",
+    )
+    parser.add_argument(
+        "--check-ods-sleep-per-window-seconds",
+        type=float,
+        default=0,
+        help="gap check sleep seconds after each window (default: 0)",
+    )
+    parser.add_argument(
+        "--check-ods-sleep-per-page-seconds",
+        type=float,
+        default=0,
+        help="gap check sleep seconds after each page (default: 0)",
+    )
+    parser.add_argument("--log-file", type=str, default="", help="log file path (default: logs/run_update_YYYYMMDD_HHMMSS.log)")
+    parser.add_argument("--log-dir", type=str, default="", help="log directory (default: logs)")
+    parser.add_argument("--log-level", type=str, default="INFO", help="log level (default: INFO)")
+    parser.add_argument("--no-log-console", action="store_true", help="disable console logging")
+    args = parser.parse_args()
+
+    log_dir = Path(args.log_dir) if args.log_dir else (Path(__file__).resolve().parent.parent / "logs")
+    log_file = Path(args.log_file) if args.log_file else build_log_path(log_dir, "run_update")
+    log_console = not args.no_log_console
+
+    with configure_logging(
+        "etl_update",
+        log_file,
+        level=args.log_level,
+        console=log_console,
+        tee_std=True,
+    ) as logger:
+        cfg_base = AppConfig.load({})
+        tz = ZoneInfo(cfg_base.get("app.timezone", "Asia/Shanghai"))
+
+        dws_start = _coerce_date(args.dws_start) if args.dws_start else None
+        dws_end = _coerce_date(args.dws_end) if args.dws_end else None
+
+        steps: list[dict[str, str]] = []
+        if not args.skip_cutoff:
+            steps.append({"name": "CHECK_CUTOFF:before", "type": "check_cutoff"})
+
+        # ------------------------------------------------------------------ ODS（在线抓取 + 写入）
+        if not args.skip_ods:
+            if args.ods_tasks:
+                ods_tasks = [t.strip().upper() for t in args.ods_tasks.split(",") if t.strip()]
+            else:
+                ods_tasks = sorted(ENABLED_ODS_CODES)
+            for task_code in ods_tasks:
+                steps.append(
+                    {
+                        "name": f"ODS:{task_code}",
+                        "type": "ods_task",
+                        "task_code": task_code,
+                        "overlap_seconds": str(args.overlap_seconds),
+                    }
+                )
+
+        if args.check_ods_gaps:
+            steps.append(
+                {
+                    "name": "ODS_GAP_CHECK",
+                    "type": "ods_gap_check",
+                    "overlap_hours": str(args.check_ods_overlap_hours),
+                    "window_days": str(args.check_ods_window_days),
+                    "window_hours": str(args.check_ods_window_hours),
+                    "page_size": str(args.check_ods_page_size),
+                    "sleep_per_window": str(args.check_ods_sleep_per_window_seconds),
+                    "sleep_per_page": str(args.check_ods_sleep_per_page_seconds),
+                    "timeout_sec": str(args.check_ods_timeout_sec),
+                    "task_codes": str(args.check_ods_task_codes or ""),
+                    "tag": "run_update",
+                }
+            )
+
+        # ------------------------------------------------------------------ DWD（从 ODS 表装载）
+        steps.append(
+            {
+                "name": "INIT_DWS_SCHEMA",
+                "type": "init_dws_schema",
+                "overlap_seconds": str(args.overlap_seconds),
+            }
+        )
+        for dwd_table in DwdLoadTask.TABLE_MAP.keys():
+            steps.append(
+                {
+                    "name": f"DWD:{dwd_table}",
+                    "type": "dwd_table",
+                    "dwd_table": dwd_table,
+                    "overlap_seconds": str(args.overlap_seconds),
+                }
+            )
+
+        # ------------------------------------------------------------------ DWS（按日期窗口重建）
+        window_start, window_end = _compute_dws_window(
+            cfg=cfg_base,
+            tz=tz,
+            rebuild_days=int(args.dws_rebuild_days),
+            bootstrap_days=int(args.dws_bootstrap_days),
+            dws_start=dws_start,
+            dws_end=dws_end,
+        )
+        for start_dt, end_dt in _iter_daily_windows(window_start, window_end):
+            steps.append(
+                {
+                    "name": f"DWS:{start_dt.date().isoformat()}",
+                    "type": "dws_window",
+                    "window_start": start_dt.strftime("%Y-%m-%d %H:%M:%S"),
+                    "window_end": end_dt.strftime("%Y-%m-%d %H:%M:%S"),
+                    "overlap_seconds": str(args.overlap_seconds),
+                }
+            )
+
+        if not args.skip_cutoff:
+            steps.append({"name": "CHECK_CUTOFF:after", "type": "check_cutoff"})
+
+        for step in steps:
+            step["log_file"] = str(log_file)
+            step["log_level"] = args.log_level
+            step["log_console"] = log_console
+
+        step_results: list[dict[str, object]] = []
+        for step in steps:
+            logger.info("STEP_START name=%s timeout=%ss", step["name"], STEP_TIMEOUT_SEC)
+            result = _run_step_with_timeout(step, logger, STEP_TIMEOUT_SEC)
+            step_results.append(result)
+
+        total = len(step_results)
+        ok_count = sum(1 for r in step_results if r.get("status") == "ok")
+        timeout_count = sum(1 for r in step_results if r.get("status") == "timeout")
+        fail_count = total - ok_count - timeout_count
+        logger.info(
+            "STEP_SUMMARY total=%s ok=%s failed=%s timeout=%s",
+            total,
+            ok_count,
+            fail_count,
+            timeout_count,
+        )
+        for item in sorted(step_results, key=lambda r: float(r.get("elapsed", 0.0)), reverse=True):
+            logger.info(
+                "STEP_RESULT name=%s status=%s elapsed=%.2fs",
+                item.get("name"),
+                item.get("status"),
+                item.get("elapsed", 0.0),
+            )
+
+        logger.info("Update done.")
+        return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/apps/etl/pipelines/feiqiu/scripts/validate_bd_manual.py
+++ b/apps/etl/pipelines/feiqiu/scripts/validate_bd_manual.py
@@ -0,0 +1,488 @@
+#!/usr/bin/env python3
+"""
+BD_Manual 文档体系验证脚本。
+
+# AI_CHANGELOG [2026-02-13] 新增：验证 Property 1/4/5/6/7/8/9/10，支持 --pg-dsn 参数
+
+验证 docs/database/ 下的目录结构、文档覆盖率、格式完整性和命名规范。
+需要连接 PostgreSQL 获取 billiards_ods schema 的表清单作为基准。
+
+用法:
+  python scripts/validate_bd_manual.py --pg-dsn "postgresql://user:pass@host/db"
+  python scripts/validate_bd_manual.py          # 从 PG_DSN 环境变量或 .env 读取
+"""
+
+from __future__ import annotations
+
+import argparse
+import os
+import re
+import sys
+from pathlib import Path
+from dataclasses import dataclass, field
+
+# ---------------------------------------------------------------------------
+# 常量
+# ---------------------------------------------------------------------------
+
+BD_MANUAL_ROOT = Path("docs/database")
+ODS_MAIN_DIR = BD_MANUAL_ROOT / "ODS" / "main"
+ODS_MAPPINGS_DIR = BD_MANUAL_ROOT / "ODS" / "mappings"
+ODS_DICT_PATH = Path("docs/database/overview/ods_tables_dictionary.md")
+
+# 四个数据层，每层都应有 main/ 和 changes/
+DATA_LAYERS = ["ODS", "DWD", "DWS", "ETL_Admin"]
+
+# ODS 文档必须包含的章节标题（Property 5）
+ODS_DOC_REQUIRED_SECTIONS = [
+    "表信息",
+    "字段说明",
+    "使用说明",
+    "可回溯性",
+]
+
+# ODS 文档"表信息"表格中必须出现的属性关键词
+ODS_DOC_TABLE_INFO_KEYS = ["Schema", "表名", "主键", "数据来源", "说明"]
+
+# ODS 文档必须提及的 ETL 元数据字段
+ODS_DOC_ETL_META_FIELDS = [
+    "content_hash",
+    "source_file",
+    "source_endpoint",
+    "fetched_at",
+    "payload",
+]
+
+# 映射文档必须包含的章节/关键内容（Property 8）
+MAPPING_DOC_REQUIRED_SECTIONS = [
+    "端点信息",
+    "字段映射",
+    "ETL 补充字段",
+]
+
+# 映射文档"端点信息"表格中必须出现的属性关键词
+MAPPING_DOC_ENDPOINT_KEYS = ["接口路径", "ODS 对应表", "JSON 数据路径"]
+
+
+# ---------------------------------------------------------------------------
+# 数据结构
+# ---------------------------------------------------------------------------
+
+@dataclass
+class CheckResult:
+    """单条验证结果。"""
+    property_id: str      # 如 "Property 1"
+    description: str
+    passed: bool
+    details: list[str] = field(default_factory=list)  # 失败时的具体说明
+
+
+# ---------------------------------------------------------------------------
+# 数据库查询：获取 ODS 表清单
+# ---------------------------------------------------------------------------
+
+def fetch_ods_tables(pg_dsn: str) -> list[str]:
+    """从 billiards_ods schema 获取所有用户表名（排除系统表）。"""
+    import psycopg2
+    sql = """
+        SELECT table_name
+        FROM information_schema.tables
+        WHERE table_schema = 'billiards_ods'
+          AND table_type = 'BASE TABLE'
+        ORDER BY table_name;
+    """
+    with psycopg2.connect(pg_dsn) as conn:
+        with conn.cursor() as cur:
+            cur.execute(sql)
+            return [row[0] for row in cur.fetchall()]
+
+
+# ---------------------------------------------------------------------------
+# Property 1: 数据层目录结构一致性
+# ---------------------------------------------------------------------------
+
+def check_directory_structure() -> CheckResult:
+    """ODS/DWD/DWS/ETL_Admin 各层都应有 main/ 和 changes/ 子目录。"""
+    missing: list[str] = []
+    for layer in DATA_LAYERS:
+        for sub in ("main", "changes"):
+            p = BD_MANUAL_ROOT / layer / sub
+            if not p.is_dir():
+                missing.append(str(p))
+
+    return CheckResult(
+        property_id="Property 1",
+        description="数据层目录结构一致性（main/ + changes/）",
+        passed=len(missing) == 0,
+        details=[f"缺失目录: {d}" for d in missing],
+    )
+
+
+# ---------------------------------------------------------------------------
+# Property 4: ODS 表级文档覆盖率
+# ---------------------------------------------------------------------------
+
+def check_ods_doc_coverage(ods_tables: list[str]) -> CheckResult:
+    """billiards_ods 中每张表都应有 BD_manual_{表名}.md。"""
+    missing: list[str] = []
+    for tbl in ods_tables:
+        expected = ODS_MAIN_DIR / f"BD_manual_{tbl}.md"
+        if not expected.is_file():
+            missing.append(tbl)
+
+    return CheckResult(
+        property_id="Property 4",
+        description="ODS 表级文档覆盖率",
+        passed=len(missing) == 0,
+        details=[f"缺失文档: BD_manual_{t}.md" for t in missing],
+    )
+
+
+# ---------------------------------------------------------------------------
+# Property 5: ODS 表级文档格式完整性
+# ---------------------------------------------------------------------------
+
+def _check_single_ods_doc(filepath: Path) -> list[str]:
+    """检查单份 ODS 文档是否包含必要章节和内容，返回问题列表。"""
+    issues: list[str] = []
+    name = filepath.name
+    try:
+        content = filepath.read_text(encoding="utf-8")
+    except Exception as e:
+        return [f"{name}: 无法读取 ({e})"]
+
+    # 检查必要章节
+    for section in ODS_DOC_REQUIRED_SECTIONS:
+        # 匹配 ## 章节标题（允许前后有空格）
+        pattern = rf"^##\s+.*{re.escape(section)}"
+        if not re.search(pattern, content, re.MULTILINE):
+            issues.append(f"{name}: 缺少「{section}」章节")
+
+    # 检查"表信息"表格中的关键属性
+    for key in ODS_DOC_TABLE_INFO_KEYS:
+        if key not in content:
+            issues.append(f"{name}: 表信息缺少「{key}」属性")
+
+    # 检查 ETL 元数据字段是否被提及
+    meta_missing = [f for f in ODS_DOC_ETL_META_FIELDS if f not in content]
+    if meta_missing:
+        issues.append(f"{name}: 未提及 ETL 元数据字段: {', '.join(meta_missing)}")
+
+    return issues
+
+
+def check_ods_doc_format() -> CheckResult:
+    """每份 ODS 文档应包含：表信息、字段说明、使用说明、可回溯性、ETL 元数据字段。"""
+    all_issues: list[str] = []
+    if not ODS_MAIN_DIR.is_dir():
+        return CheckResult(
+            property_id="Property 5",
+            description="ODS 表级文档格式完整性",
+            passed=False,
+            details=["ODS/main/ 目录不存在"],
+        )
+
+    for f in sorted(ODS_MAIN_DIR.glob("BD_manual_*.md")):
+        all_issues.extend(_check_single_ods_doc(f))
+
+    return CheckResult(
+        property_id="Property 5",
+        description="ODS 表级文档格式完整性",
+        passed=len(all_issues) == 0,
+        details=all_issues,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Property 6: ODS 表级文档命名规范
+# ---------------------------------------------------------------------------
+
+def check_ods_doc_naming() -> CheckResult:
+    """ODS/main/ 下的文件名应匹配 BD_manual_{表名}.md。"""
+    bad: list[str] = []
+    if not ODS_MAIN_DIR.is_dir():
+        return CheckResult(
+            property_id="Property 6",
+            description="ODS 表级文档命名规范",
+            passed=False,
+            details=["ODS/main/ 目录不存在"],
+        )
+
+    pattern = re.compile(r"^BD_manual_[a-z][a-z0-9_]*\.md$")
+    for f in sorted(ODS_MAIN_DIR.iterdir()):
+        if f.suffix == ".md" and not pattern.match(f.name):
+            bad.append(f.name)
+
+    return CheckResult(
+        property_id="Property 6",
+        description="ODS 表级文档命名规范（BD_manual_{表名}.md）",
+        passed=len(bad) == 0,
+        details=[f"命名不规范: {n}" for n in bad],
+    )
+
+
+# ---------------------------------------------------------------------------
+# Property 7: 映射文档覆盖率
+# ---------------------------------------------------------------------------
+
+def check_mapping_doc_coverage(ods_tables: list[str]) -> CheckResult:
+    """每个有 ODS 表的 API 端点都应有映射文档。
+
+    策略：遍历 ODS 表，检查 mappings/ 下是否存在至少一个
+    mapping_*_{表名}.md 文件。
+    """
+    missing: list[str] = []
+    if not ODS_MAPPINGS_DIR.is_dir():
+        return CheckResult(
+            property_id="Property 7",
+            description="映射文档覆盖率",
+            passed=False,
+            details=["ODS/mappings/ 目录不存在"],
+        )
+
+    existing_mappings = {f.name for f in ODS_MAPPINGS_DIR.glob("mapping_*.md")}
+    for tbl in ods_tables:
+        # 查找 mapping_*_{表名}.md
+        found = any(
+            name.endswith(f"_{tbl}.md") and name.startswith("mapping_")
+            for name in existing_mappings
+        )
+        if not found:
+            missing.append(tbl)
+
+    return CheckResult(
+        property_id="Property 7",
+        description="映射文档覆盖率（每张 ODS 表至少一份映射文档）",
+        passed=len(missing) == 0,
+        details=[f"缺失映射文档: mapping_*_{t}.md" for t in missing],
+    )
+
+
+# ---------------------------------------------------------------------------
+# Property 8: 映射文档内容完整性
+# ---------------------------------------------------------------------------
+
+def _check_single_mapping_doc(filepath: Path) -> list[str]:
+    """检查单份映射文档是否包含必要章节和内容。"""
+    issues: list[str] = []
+    name = filepath.name
+    try:
+        content = filepath.read_text(encoding="utf-8")
+    except Exception as e:
+        return [f"{name}: 无法读取 ({e})"]
+
+    # 检查必要章节
+    for section in MAPPING_DOC_REQUIRED_SECTIONS:
+        pattern = rf"^##\s+.*{re.escape(section)}"
+        if not re.search(pattern, content, re.MULTILINE):
+            issues.append(f"{name}: 缺少「{section}」章节")
+
+    # 检查端点信息表格中的关键属性
+    for key in MAPPING_DOC_ENDPOINT_KEYS:
+        if key not in content:
+            issues.append(f"{name}: 端点信息缺少「{key}」属性")
+
+    # 检查 ETL 补充字段是否被提及
+    etl_missing = [f for f in ODS_DOC_ETL_META_FIELDS if f not in content]
+    if etl_missing:
+        issues.append(f"{name}: 未提及 ETL 补充字段: {', '.join(etl_missing)}")
+
+    return issues
+
+
+def check_mapping_doc_content() -> CheckResult:
+    """每份映射文档应包含：端点路径、ODS 表名、JSON 数据路径、字段映射表、ETL 补充字段。"""
+    all_issues: list[str] = []
+    if not ODS_MAPPINGS_DIR.is_dir():
+        return CheckResult(
+            property_id="Property 8",
+            description="映射文档内容完整性",
+            passed=False,
+            details=["ODS/mappings/ 目录不存在"],
+        )
+
+    for f in sorted(ODS_MAPPINGS_DIR.glob("mapping_*.md")):
+        all_issues.extend(_check_single_mapping_doc(f))
+
+    return CheckResult(
+        property_id="Property 8",
+        description="映射文档内容完整性",
+        passed=len(all_issues) == 0,
+        details=all_issues,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Property 9: 映射文档命名规范
+# ---------------------------------------------------------------------------
+
+def check_mapping_doc_naming() -> CheckResult:
+    """映射文档文件名应匹配 mapping_{API端点名}_{ODS表名}.md。"""
+    bad: list[str] = []
+    if not ODS_MAPPINGS_DIR.is_dir():
+        return CheckResult(
+            property_id="Property 9",
+            description="映射文档命名规范",
+            passed=False,
+            details=["ODS/mappings/ 目录不存在"],
+        )
+
+    # mapping_{EndpointName}_{table_name}.md
+    # 端点名：PascalCase（字母数字），表名：snake_case
+    pattern = re.compile(r"^mapping_[A-Z][A-Za-z0-9]+_[a-z][a-z0-9_]*\.md$")
+    for f in sorted(ODS_MAPPINGS_DIR.iterdir()):
+        if f.suffix == ".md" and f.name.startswith("mapping_"):
+            if not pattern.match(f.name):
+                bad.append(f.name)
+
+    return CheckResult(
+        property_id="Property 9",
+        description="映射文档命名规范（mapping_{API端点名}_{ODS表名}.md）",
+        passed=len(bad) == 0,
+        details=[f"命名不规范: {n}" for n in bad],
+    )
+
+
+# ---------------------------------------------------------------------------
+# Property 10: ODS 数据字典覆盖率
+# ---------------------------------------------------------------------------
+
+def check_ods_dictionary_coverage(ods_tables: list[str]) -> CheckResult:
+    """数据字典中应包含所有 ODS 表条目。"""
+    if not ODS_DICT_PATH.is_file():
+        return CheckResult(
+            property_id="Property 10",
+            description="ODS 数据字典覆盖率",
+            passed=False,
+            details=[f"数据字典文件不存在: {ODS_DICT_PATH}"],
+        )
+
+    try:
+        content = ODS_DICT_PATH.read_text(encoding="utf-8")
+    except Exception as e:
+        return CheckResult(
+            property_id="Property 10",
+            description="ODS 数据字典覆盖率",
+            passed=False,
+            details=[f"无法读取数据字典: {e}"],
+        )
+
+    missing: list[str] = []
+    for tbl in ods_tables:
+        # 在字典内容中查找表名（反引号包裹或直接出现）
+        if tbl not in content:
+            missing.append(tbl)
+
+    return CheckResult(
+        property_id="Property 10",
+        description="ODS 数据字典覆盖率",
+        passed=len(missing) == 0,
+        details=[f"数据字典缺失条目: {t}" for t in missing],
+    )
+
+
+# ---------------------------------------------------------------------------
+# 报告输出
+# ---------------------------------------------------------------------------
+
+def print_report(results: list[CheckResult]) -> None:
+    """打印验证报告。"""
+    print("=" * 60)
+    print("BD_Manual 文档体系验证报告")
+    print("=" * 60)
+
+    passed_count = sum(1 for r in results if r.passed)
+    total = len(results)
+
+    for r in results:
+        status = "✓ PASS" if r.passed else "✗ FAIL"
+        print(f"\n[{status}] {r.property_id}: {r.description}")
+        if not r.passed:
+            for d in r.details[:20]:  # 最多显示 20 条
+                print(f"    - {d}")
+            if len(r.details) > 20:
+                print(f"    ... 还有 {len(r.details) - 20} 条问题")
+
+    print("\n" + "-" * 60)
+    print(f"结果: {passed_count}/{total} 项通过")
+    if passed_count < total:
+        print("存在未通过的验证项，请检查上述详情。")
+    else:
+        print("所有验证项均通过 ✓")
+    print("=" * 60)
+
+
+# ---------------------------------------------------------------------------
+# 主入口
+# ---------------------------------------------------------------------------
+
+def main(argv: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(
+        description="验证 BD_Manual 文档体系的覆盖率、格式和命名规范",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+示例:
+  # 从 .env 或 PG_DSN 环境变量读取连接字符串
+  python scripts/validate_bd_manual.py
+
+  # 指定连接字符串
+  python scripts/validate_bd_manual.py --pg-dsn "postgresql://user:pass@host/db"
+""",
+    )
+    parser.add_argument(
+        "--pg-dsn",
+        help="PostgreSQL 连接字符串（默认从 PG_DSN 环境变量或 .env 读取）",
+    )
+
+    args = parser.parse_args(argv)
+
+    # 加载 .env
+    try:
+        from dotenv import load_dotenv
+        load_dotenv()
+    except ImportError:
+        pass
+
+    pg_dsn = args.pg_dsn or os.environ.get("PG_DSN")
+    if not pg_dsn:
+        print(
+            "✗ 未提供 PG_DSN，请通过 --pg-dsn 参数或 PG_DSN 环境变量指定",
+            file=sys.stderr,
+        )
+        return 1
+
+    # 获取 ODS 表清单
+    try:
+        ods_tables = fetch_ods_tables(pg_dsn)
+    except Exception as e:
+        print(f"✗ 连接数据库失败: {e}", file=sys.stderr)
+        return 1
+
+    if not ods_tables:
+        print("⚠ billiards_ods schema 中未找到任何表", file=sys.stderr)
+        return 1
+
+    print(f"从数据库获取到 {len(ods_tables)} 张 ODS 表\n")
+
+    # 运行所有验证
+    results: list[CheckResult] = [
+        check_directory_structure(),           # Property 1
+        check_ods_doc_coverage(ods_tables),    # Property 4
+        check_ods_doc_format(),                # Property 5
+        check_ods_doc_naming(),                # Property 6
+        check_mapping_doc_coverage(ods_tables),# Property 7
+        check_mapping_doc_content(),           # Property 8
+        check_mapping_doc_naming(),            # Property 9
+        check_ods_dictionary_coverage(ods_tables),  # Property 10
+    ]
+
+    print_report(results)
+
+    # 任一验证失败则返回非零退出码
+    if any(not r.passed for r in results):
+        return 1
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())