在准备环境前提交次全部更改。
This commit is contained in:
113
apps/etl/connectors/feiqiu/scripts/run_compare_v3.py
Normal file
113
apps/etl/connectors/feiqiu/scripts/run_compare_v3.py
Normal file
@@ -0,0 +1,113 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
v3 比对脚本 — 直接从 JSON 样本提取字段,与硬编码的 ODS 列比对。
|
||||
ODS 列数据来自 information_schema.columns WHERE table_schema = 'ods'。
|
||||
"""
|
||||
import json
|
||||
import os
|
||||
|
||||
SAMPLES_DIR = os.path.join(os.path.dirname(__file__), "..", "docs", "api-reference", "samples")
|
||||
REPORT_DIR = os.path.join(os.path.dirname(__file__), "..", "docs", "reports")
|
||||
ODS_META = {"source_file", "source_endpoint", "fetched_at", "payload", "content_hash"}
|
||||
NESTED_OBJECTS = {"siteprofile", "tableprofile"}
|
||||
|
||||
# 22 张需要比对的表
|
||||
TABLES = [
|
||||
"assistant_accounts_master", "settlement_records", "assistant_service_records",
|
||||
"assistant_cancellation_records", "table_fee_transactions", "table_fee_discount_records",
|
||||
"payment_transactions", "refund_transactions", "platform_coupon_redemption_records",
|
||||
"tenant_goods_master", "store_goods_sales_records", "store_goods_master",
|
||||
"stock_goods_category_tree", "goods_stock_movements", "member_profiles",
|
||||
"member_stored_value_cards", "recharge_settlements", "member_balance_changes",
|
||||
"group_buy_packages", "group_buy_redemption_records", "goods_stock_summary",
|
||||
"site_tables_master",
|
||||
]
|
||||
|
||||
def load_json(table):
|
||||
path = os.path.join(SAMPLES_DIR, f"{table}.json")
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
|
||||
def extract_fields(table):
|
||||
data = load_json(table)
|
||||
# settlement_records / recharge_settlements: 取 settleList 内层
|
||||
if table in ("settlement_records", "recharge_settlements"):
|
||||
record = data.get("settleList", {})
|
||||
if isinstance(record, list):
|
||||
record = record[0] if record else {}
|
||||
fields = {k.lower() for k in record.keys()}
|
||||
# 加上 siteProfile(顶层嵌套对象)
|
||||
if "siteProfile" in data:
|
||||
fields.add("siteprofile")
|
||||
return fields
|
||||
# stock_goods_category_tree: 取 goodsCategoryList 数组元素
|
||||
if table == "stock_goods_category_tree":
|
||||
cat_list = data.get("goodsCategoryList", [])
|
||||
if cat_list:
|
||||
return {k.lower() for k in cat_list[0].keys()}
|
||||
return set()
|
||||
# 通用:顶层 keys
|
||||
fields = set()
|
||||
for k, v in data.items():
|
||||
kl = k.lower()
|
||||
if kl in NESTED_OBJECTS:
|
||||
fields.add(kl) # 嵌套对象作为单列
|
||||
else:
|
||||
fields.add(kl)
|
||||
return fields
|
||||
|
||||
def main():
|
||||
# 从数据库查询结果构建 ODS 列映射(硬编码,来自 information_schema)
|
||||
# 这里我们直接读取 JSON 样本并用 psycopg2 查询
|
||||
# 但为了独立运行,我们从环境变量或文件读取
|
||||
|
||||
# 实际上我们直接用 extract_fields + 从文件读取 ODS 列
|
||||
# ODS 列从单独的 JSON 文件读取
|
||||
ods_cols_path = os.path.join(os.path.dirname(__file__), "ods_columns.json")
|
||||
with open(ods_cols_path, "r", encoding="utf-8") as f:
|
||||
ods_all = json.load(f)
|
||||
|
||||
results = []
|
||||
for table in TABLES:
|
||||
api_fields = extract_fields(table)
|
||||
ods_cols = set(ods_all.get(table, [])) - ODS_META
|
||||
|
||||
matched = sorted(api_fields & ods_cols)
|
||||
api_only = sorted(api_fields - ods_cols)
|
||||
ods_only = sorted(ods_cols - api_fields)
|
||||
|
||||
results.append({
|
||||
"table": table,
|
||||
"api_count": len(api_fields),
|
||||
"ods_count": len(ods_cols),
|
||||
"matched": len(matched),
|
||||
"api_only": api_only,
|
||||
"ods_only": ods_only,
|
||||
})
|
||||
|
||||
status = "✓ 完全对齐" if not api_only and not ods_only else ""
|
||||
print(f"{table}: API={len(api_fields)} ODS={len(ods_cols)} 匹配={len(matched)} API独有={len(api_only)} ODS独有={len(ods_only)} {status}")
|
||||
if api_only:
|
||||
print(f" API独有: {api_only}")
|
||||
if ods_only:
|
||||
print(f" ODS独有: {ods_only}")
|
||||
|
||||
# 写 JSON 报告
|
||||
os.makedirs(REPORT_DIR, exist_ok=True)
|
||||
out = os.path.join(REPORT_DIR, "api_ods_comparison_v3.json")
|
||||
with open(out, "w", encoding="utf-8") as f:
|
||||
json.dump(results, f, ensure_ascii=False, indent=2)
|
||||
print(f"\nJSON 报告: {out}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────
|
||||
# AI_CHANGELOG:
|
||||
# - 日期: 2026-02-14
|
||||
# Prompt: P20260214-000000 — "还是不准。现在拆解任务,所有表,每个表当作一个任务进行比对。"
|
||||
# 直接原因: v2 比对脚本结果不准确,需从 JSON 样本直接提取字段与数据库实际列精确比对
|
||||
# 变更摘要: 新建脚本,读取 samples/*.json 提取 API 字段,读取 ods_columns.json 获取 ODS 列,
|
||||
# 处理 settleList 嵌套/goodsCategoryList 数组/siteProfile 嵌套对象等特殊结构,逐表输出比对结果
|
||||
# 风险与验证: 纯分析脚本,不修改数据库;验证方式:运行脚本确认输出与 v3 报告一致
|
||||
# ──────────────────────────────────────────────────────────────────
|
||||
Reference in New Issue
Block a user