DWD完成

This commit is contained in:
Neo
2025-12-09 04:57:05 +08:00
parent f301cc1fd5
commit 561c640700
46 changed files with 26181 additions and 3540 deletions

View File

@@ -0,0 +1,117 @@
# -*- coding: utf-8 -*-
"""
ODS JSON 字段核对脚本:对照当前数据库中的 ODS 表字段,检查示例 JSON默认目录 C:\\dev\\LLTQ\\export\\test-json-doc
是否包含同名键,并输出每表未命中的字段,便于补充映射或确认确实无源字段。
使用方法:
set PG_DSN=postgresql://... # 如 .env 中配置
python -m etl_billiards.scripts.check_ods_json_vs_table
"""
from __future__ import annotations
import json
import os
import pathlib
from typing import Dict, Iterable, Set, Tuple
import psycopg2
from etl_billiards.tasks.manual_ingest_task import ManualIngestTask
def _flatten_keys(obj, prefix: str = "") -> Set[str]:
"""递归展开 JSON 所有键路径,返回形如 data.assistantInfos.id 的集合。列表不保留索引,仅继续向下展开。"""
keys: Set[str] = set()
if isinstance(obj, dict):
for k, v in obj.items():
new_prefix = f"{prefix}.{k}" if prefix else k
keys.add(new_prefix)
keys |= _flatten_keys(v, new_prefix)
elif isinstance(obj, list):
for item in obj:
keys |= _flatten_keys(item, prefix)
return keys
def _load_json_keys(path: pathlib.Path) -> Tuple[Set[str], dict[str, Set[str]]]:
"""读取单个 JSON 文件并返回展开后的键集合以及末段->路径列表映射,若文件不存在或无法解析则返回空集合。"""
if not path.exists():
return set(), {}
data = json.loads(path.read_text(encoding="utf-8"))
paths = _flatten_keys(data)
last_map: dict[str, Set[str]] = {}
for p in paths:
last = p.split(".")[-1].lower()
last_map.setdefault(last, set()).add(p)
return paths, last_map
def _load_ods_columns(dsn: str) -> Dict[str, Set[str]]:
"""从数据库读取 billiards_ods.* 的列名集合,按表返回。"""
conn = psycopg2.connect(dsn)
cur = conn.cursor()
cur.execute(
"""
SELECT table_name, column_name
FROM information_schema.columns
WHERE table_schema='billiards_ods'
ORDER BY table_name, ordinal_position
"""
)
result: Dict[str, Set[str]] = {}
for table, col in cur.fetchall():
result.setdefault(table, set()).add(col.lower())
cur.close()
conn.close()
return result
def main() -> None:
"""主流程:遍历 FILE_MAPPING 中的 ODS 表,检查 JSON 键覆盖情况并打印报告。"""
dsn = os.environ.get("PG_DSN")
json_dir = pathlib.Path(os.environ.get("JSON_DOC_DIR", r"C:\dev\LLTQ\export\test-json-doc"))
ods_cols_map = _load_ods_columns(dsn)
print(f"使用 JSON 目录: {json_dir}")
print(f"连接 DSN: {dsn}")
print("=" * 80)
for keywords, ods_table in ManualIngestTask.FILE_MAPPING:
table = ods_table.split(".")[-1]
cols = ods_cols_map.get(table, set())
file_name = f"{keywords[0]}.json"
file_path = json_dir / file_name
keys_full, path_map = _load_json_keys(file_path)
key_last_parts = set(path_map.keys())
missing: Set[str] = set()
extra_keys: Set[str] = set()
present: Set[str] = set()
for col in sorted(cols):
if col in key_last_parts:
present.add(col)
else:
missing.add(col)
for k in key_last_parts:
if k not in cols:
extra_keys.add(k)
print(f"[{table}] 文件={file_name} 列数={len(cols)} JSON键(末段)覆盖={len(present)}/{len(cols)}")
if missing:
print(" 未命中列:", ", ".join(sorted(missing)))
else:
print(" 未命中列: 无")
if extra_keys:
extras = []
for k in sorted(extra_keys):
paths = ", ".join(sorted(path_map.get(k, [])))
extras.append(f"{k} ({paths})")
print(" JSON 仅有(表无此列):", "; ".join(extras))
else:
print(" JSON 仅有(表无此列): 无")
print("-" * 80)
if __name__ == "__main__":
main()