116 lines
5.2 KiB
Python
116 lines
5.2 KiB
Python
# -*- coding: utf-8 -*-
|
||
"""
|
||
v3 比对脚本 — 直接从 JSON 样本提取字段,与硬编码的 ODS 列比对。
|
||
ODS 列数据来自 information_schema.columns WHERE table_schema = 'ods'。
|
||
"""
|
||
import json
|
||
import os
|
||
|
||
SAMPLES_DIR = os.path.join(os.path.dirname(__file__), "..", "docs", "api-reference", "samples")
|
||
REPORT_DIR = os.environ.get("ETL_REPORT_ROOT")
|
||
if not REPORT_DIR:
|
||
raise KeyError("环境变量 ETL_REPORT_ROOT 未定义。请在根 .env 中配置。")
|
||
ODS_META = {"source_file", "source_endpoint", "fetched_at", "payload", "content_hash"}
|
||
NESTED_OBJECTS = {"siteprofile", "tableprofile"}
|
||
|
||
# 22 张需要比对的表
|
||
TABLES = [
|
||
"assistant_accounts_master", "settlement_records", "assistant_service_records",
|
||
"assistant_cancellation_records", "table_fee_transactions", "table_fee_discount_records",
|
||
"payment_transactions", "refund_transactions", "platform_coupon_redemption_records",
|
||
"tenant_goods_master", "store_goods_sales_records", "store_goods_master",
|
||
"stock_goods_category_tree", "goods_stock_movements", "member_profiles",
|
||
"member_stored_value_cards", "recharge_settlements", "member_balance_changes",
|
||
"group_buy_packages", "group_buy_redemption_records", "goods_stock_summary",
|
||
"site_tables_master",
|
||
]
|
||
|
||
def load_json(table):
|
||
path = os.path.join(SAMPLES_DIR, f"{table}.json")
|
||
with open(path, "r", encoding="utf-8") as f:
|
||
return json.load(f)
|
||
|
||
def extract_fields(table):
|
||
data = load_json(table)
|
||
# settlement_records / recharge_settlements: 取 settleList 内层
|
||
if table in ("settlement_records", "recharge_settlements"):
|
||
record = data.get("settleList", {})
|
||
if isinstance(record, list):
|
||
record = record[0] if record else {}
|
||
fields = {k.lower() for k in record.keys()}
|
||
# 加上 siteProfile(顶层嵌套对象)
|
||
if "siteProfile" in data:
|
||
fields.add("siteprofile")
|
||
return fields
|
||
# stock_goods_category_tree: 取 goodsCategoryList 数组元素
|
||
if table == "stock_goods_category_tree":
|
||
cat_list = data.get("goodsCategoryList", [])
|
||
if cat_list:
|
||
return {k.lower() for k in cat_list[0].keys()}
|
||
return set()
|
||
# 通用:顶层 keys
|
||
fields = set()
|
||
for k, v in data.items():
|
||
kl = k.lower()
|
||
if kl in NESTED_OBJECTS:
|
||
fields.add(kl) # 嵌套对象作为单列
|
||
else:
|
||
fields.add(kl)
|
||
return fields
|
||
|
||
def main():
|
||
# 从数据库查询结果构建 ODS 列映射(硬编码,来自 information_schema)
|
||
# 这里我们直接读取 JSON 样本并用 psycopg2 查询
|
||
# 但为了独立运行,我们从环境变量或文件读取
|
||
|
||
# 实际上我们直接用 extract_fields + 从文件读取 ODS 列
|
||
# ODS 列从单独的 JSON 文件读取
|
||
ods_cols_path = os.path.join(os.path.dirname(__file__), "ods_columns.json")
|
||
with open(ods_cols_path, "r", encoding="utf-8") as f:
|
||
ods_all = json.load(f)
|
||
|
||
results = []
|
||
for table in TABLES:
|
||
api_fields = extract_fields(table)
|
||
ods_cols = set(ods_all.get(table, [])) - ODS_META
|
||
|
||
matched = sorted(api_fields & ods_cols)
|
||
api_only = sorted(api_fields - ods_cols)
|
||
ods_only = sorted(ods_cols - api_fields)
|
||
|
||
results.append({
|
||
"table": table,
|
||
"api_count": len(api_fields),
|
||
"ods_count": len(ods_cols),
|
||
"matched": len(matched),
|
||
"api_only": api_only,
|
||
"ods_only": ods_only,
|
||
})
|
||
|
||
status = "✓ 完全对齐" if not api_only and not ods_only else ""
|
||
print(f"{table}: API={len(api_fields)} ODS={len(ods_cols)} 匹配={len(matched)} API独有={len(api_only)} ODS独有={len(ods_only)} {status}")
|
||
if api_only:
|
||
print(f" API独有: {api_only}")
|
||
if ods_only:
|
||
print(f" ODS独有: {ods_only}")
|
||
|
||
# 写 JSON 报告
|
||
os.makedirs(REPORT_DIR, exist_ok=True)
|
||
out = os.path.join(REPORT_DIR, "api_ods_comparison_v3.json")
|
||
with open(out, "w", encoding="utf-8") as f:
|
||
json.dump(results, f, ensure_ascii=False, indent=2)
|
||
print(f"\nJSON 报告: {out}")
|
||
|
||
if __name__ == "__main__":
|
||
main()
|
||
|
||
# ──────────────────────────────────────────────────────────────────
|
||
# AI_CHANGELOG:
|
||
# - 日期: 2026-02-14
|
||
# Prompt: P20260214-000000 — "还是不准。现在拆解任务,所有表,每个表当作一个任务进行比对。"
|
||
# 直接原因: v2 比对脚本结果不准确,需从 JSON 样本直接提取字段与数据库实际列精确比对
|
||
# 变更摘要: 新建脚本,读取 samples/*.json 提取 API 字段,读取 ods_columns.json 获取 ODS 列,
|
||
# 处理 settleList 嵌套/goodsCategoryList 数组/siteProfile 嵌套对象等特殊结构,逐表输出比对结果
|
||
# 风险与验证: 纯分析脚本,不修改数据库;验证方式:运行脚本确认输出与 v3 报告一致
|
||
# ──────────────────────────────────────────────────────────────────
|