Files
Neo-ZQYY/apps/etl/connectors/feiqiu/scripts/run_compare_v3.py

114 lines
5.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
"""
v3 比对脚本 — 直接从 JSON 样本提取字段,与硬编码的 ODS 列比对。
ODS 列数据来自 information_schema.columns WHERE table_schema = 'ods'
"""
import json
import os
SAMPLES_DIR = os.path.join(os.path.dirname(__file__), "..", "docs", "api-reference", "samples")
REPORT_DIR = os.path.join(os.path.dirname(__file__), "..", "docs", "reports")
ODS_META = {"source_file", "source_endpoint", "fetched_at", "payload", "content_hash"}
NESTED_OBJECTS = {"siteprofile", "tableprofile"}
# 22 张需要比对的表
TABLES = [
"assistant_accounts_master", "settlement_records", "assistant_service_records",
"assistant_cancellation_records", "table_fee_transactions", "table_fee_discount_records",
"payment_transactions", "refund_transactions", "platform_coupon_redemption_records",
"tenant_goods_master", "store_goods_sales_records", "store_goods_master",
"stock_goods_category_tree", "goods_stock_movements", "member_profiles",
"member_stored_value_cards", "recharge_settlements", "member_balance_changes",
"group_buy_packages", "group_buy_redemption_records", "goods_stock_summary",
"site_tables_master",
]
def load_json(table):
path = os.path.join(SAMPLES_DIR, f"{table}.json")
with open(path, "r", encoding="utf-8") as f:
return json.load(f)
def extract_fields(table):
data = load_json(table)
# settlement_records / recharge_settlements: 取 settleList 内层
if table in ("settlement_records", "recharge_settlements"):
record = data.get("settleList", {})
if isinstance(record, list):
record = record[0] if record else {}
fields = {k.lower() for k in record.keys()}
# 加上 siteProfile顶层嵌套对象
if "siteProfile" in data:
fields.add("siteprofile")
return fields
# stock_goods_category_tree: 取 goodsCategoryList 数组元素
if table == "stock_goods_category_tree":
cat_list = data.get("goodsCategoryList", [])
if cat_list:
return {k.lower() for k in cat_list[0].keys()}
return set()
# 通用:顶层 keys
fields = set()
for k, v in data.items():
kl = k.lower()
if kl in NESTED_OBJECTS:
fields.add(kl) # 嵌套对象作为单列
else:
fields.add(kl)
return fields
def main():
# 从数据库查询结果构建 ODS 列映射(硬编码,来自 information_schema
# 这里我们直接读取 JSON 样本并用 psycopg2 查询
# 但为了独立运行,我们从环境变量或文件读取
# 实际上我们直接用 extract_fields + 从文件读取 ODS 列
# ODS 列从单独的 JSON 文件读取
ods_cols_path = os.path.join(os.path.dirname(__file__), "ods_columns.json")
with open(ods_cols_path, "r", encoding="utf-8") as f:
ods_all = json.load(f)
results = []
for table in TABLES:
api_fields = extract_fields(table)
ods_cols = set(ods_all.get(table, [])) - ODS_META
matched = sorted(api_fields & ods_cols)
api_only = sorted(api_fields - ods_cols)
ods_only = sorted(ods_cols - api_fields)
results.append({
"table": table,
"api_count": len(api_fields),
"ods_count": len(ods_cols),
"matched": len(matched),
"api_only": api_only,
"ods_only": ods_only,
})
status = "✓ 完全对齐" if not api_only and not ods_only else ""
print(f"{table}: API={len(api_fields)} ODS={len(ods_cols)} 匹配={len(matched)} API独有={len(api_only)} ODS独有={len(ods_only)} {status}")
if api_only:
print(f" API独有: {api_only}")
if ods_only:
print(f" ODS独有: {ods_only}")
# 写 JSON 报告
os.makedirs(REPORT_DIR, exist_ok=True)
out = os.path.join(REPORT_DIR, "api_ods_comparison_v3.json")
with open(out, "w", encoding="utf-8") as f:
json.dump(results, f, ensure_ascii=False, indent=2)
print(f"\nJSON 报告: {out}")
if __name__ == "__main__":
main()
# ──────────────────────────────────────────────────────────────────
# AI_CHANGELOG:
# - 日期: 2026-02-14
# Prompt: P20260214-000000 — "还是不准。现在拆解任务,所有表,每个表当作一个任务进行比对。"
# 直接原因: v2 比对脚本结果不准确,需从 JSON 样本直接提取字段与数据库实际列精确比对
# 变更摘要: 新建脚本,读取 samples/*.json 提取 API 字段,读取 ods_columns.json 获取 ODS 列,
# 处理 settleList 嵌套/goodsCategoryList 数组/siteProfile 嵌套对象等特殊结构,逐表输出比对结果
# 风险与验证: 纯分析脚本,不修改数据库;验证方式:运行脚本确认输出与 v3 报告一致
# ──────────────────────────────────────────────────────────────────