# -*- coding: utf-8 -*- """ ODS JSON 字段核对脚本:对照当前数据库中的 ODS 表字段,检查示例 JSON(默认目录 C:\\dev\\LLTQ\\export\\test-json-doc) 是否包含同名键,并输出每表未命中的字段,便于补充映射或确认确实无源字段。 使用方法: set PG_DSN=postgresql://... # 如 .env 中配置 python -m etl_billiards.scripts.check_ods_json_vs_table """ from __future__ import annotations import json import os import pathlib from typing import Dict, Iterable, Set, Tuple import psycopg2 from etl_billiards.tasks.manual_ingest_task import ManualIngestTask def _flatten_keys(obj, prefix: str = "") -> Set[str]: """递归展开 JSON 所有键路径,返回形如 data.assistantInfos.id 的集合。列表不保留索引,仅继续向下展开。""" keys: Set[str] = set() if isinstance(obj, dict): for k, v in obj.items(): new_prefix = f"{prefix}.{k}" if prefix else k keys.add(new_prefix) keys |= _flatten_keys(v, new_prefix) elif isinstance(obj, list): for item in obj: keys |= _flatten_keys(item, prefix) return keys def _load_json_keys(path: pathlib.Path) -> Tuple[Set[str], dict[str, Set[str]]]: """读取单个 JSON 文件并返回展开后的键集合以及末段->路径列表映射,若文件不存在或无法解析则返回空集合。""" if not path.exists(): return set(), {} data = json.loads(path.read_text(encoding="utf-8")) paths = _flatten_keys(data) last_map: dict[str, Set[str]] = {} for p in paths: last = p.split(".")[-1].lower() last_map.setdefault(last, set()).add(p) return paths, last_map def _load_ods_columns(dsn: str) -> Dict[str, Set[str]]: """从数据库读取 billiards_ods.* 的列名集合,按表返回。""" conn = psycopg2.connect(dsn) cur = conn.cursor() cur.execute( """ SELECT table_name, column_name FROM information_schema.columns WHERE table_schema='billiards_ods' ORDER BY table_name, ordinal_position """ ) result: Dict[str, Set[str]] = {} for table, col in cur.fetchall(): result.setdefault(table, set()).add(col.lower()) cur.close() conn.close() return result def main() -> None: """主流程:遍历 FILE_MAPPING 中的 ODS 表,检查 JSON 键覆盖情况并打印报告。""" dsn = os.environ.get("PG_DSN") json_dir = pathlib.Path(os.environ.get("JSON_DOC_DIR", r"C:\dev\LLTQ\export\test-json-doc")) ods_cols_map = _load_ods_columns(dsn) print(f"使用 JSON 目录: {json_dir}") print(f"连接 DSN: {dsn}") print("=" * 80) for keywords, ods_table in ManualIngestTask.FILE_MAPPING: table = ods_table.split(".")[-1] cols = ods_cols_map.get(table, set()) file_name = f"{keywords[0]}.json" file_path = json_dir / file_name keys_full, path_map = _load_json_keys(file_path) key_last_parts = set(path_map.keys()) missing: Set[str] = set() extra_keys: Set[str] = set() present: Set[str] = set() for col in sorted(cols): if col in key_last_parts: present.add(col) else: missing.add(col) for k in key_last_parts: if k not in cols: extra_keys.add(k) print(f"[{table}] 文件={file_name} 列数={len(cols)} JSON键(末段)覆盖={len(present)}/{len(cols)}") if missing: print(" 未命中列:", ", ".join(sorted(missing))) else: print(" 未命中列: 无") if extra_keys: extras = [] for k in sorted(extra_keys): paths = ", ".join(sorted(path_map.get(k, []))) extras.append(f"{k} ({paths})") print(" JSON 仅有(表无此列):", "; ".join(extras)) else: print(" JSON 仅有(表无此列): 无") print("-" * 80) if __name__ == "__main__": main()