Files
Neo-ZQYY/apps/etl/pipelines/feiqiu/scripts/refresh_json_and_audit.py

524 lines
18 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
"""
重新获取全部 API 接口的 JSON 数据(最多 100 条),
遍历所有记录提取最全字段集合,
与 .md 文档比对并输出差异报告。
时间范围2026-01-01 00:00:00 ~ 2026-02-13 00:00:00
用法python scripts/refresh_json_and_audit.py
"""
import json
import os
import re
import sys
import time
import requests
# ── 配置 ──────────────────────────────────────────────────────────────────
API_BASE = "https://pc.ficoo.vip/apiprod/admin/v1/"
API_TOKEN = os.environ.get("API_TOKEN", "")
if not API_TOKEN:
env_path = os.path.join(os.path.dirname(__file__), "..", ".env")
if os.path.exists(env_path):
with open(env_path, "r", encoding="utf-8") as f:
for line in f:
line = line.strip()
if line.startswith("API_TOKEN="):
API_TOKEN = line.split("=", 1)[1].strip()
break
SITE_ID = 2790685415443269
START_TIME = "2026-01-01 00:00:00"
END_TIME = "2026-02-13 00:00:00"
LIMIT = 100
SAMPLES_DIR = os.path.join("docs", "api-reference", "samples")
DOCS_DIR = os.path.join("docs", "api-reference")
REPORT_DIR = os.path.join("docs", "reports")
HEADERS = {
"Authorization": f"Bearer {API_TOKEN}",
"Content-Type": "application/json",
}
REGISTRY_PATH = os.path.join("docs", "api-reference", "api_registry.json")
WRAPPER_FIELDS = {"settleList", "siteProfile", "tableProfile",
"goodsCategoryList", "data", "code", "msg",
"settlelist", "siteprofile", "tableprofile",
"goodscategorylist"}
CROSS_REF_HEADERS = {"字段名", "类型", "示例值", "说明", "field", "example",
"description"}
# 每个接口实际返回的列表字段名(从调试中获得)
ACTUAL_LIST_KEY = {
"assistant_accounts_master": "assistantInfos",
"assistant_service_records": "orderAssistantDetails",
"assistant_cancellation_records": "abolitionAssistants",
"table_fee_transactions": "siteTableUseDetailsList",
"table_fee_discount_records": "taiFeeAdjustInfos",
"tenant_goods_master": "tenantGoodsList",
"store_goods_sales_records": "orderGoodsLedgers",
"store_goods_master": "orderGoodsList",
"goods_stock_movements": "queryDeliveryRecordsList",
"member_profiles": "tenantMemberInfos",
"member_stored_value_cards": "tenantMemberCards",
"member_balance_changes": "tenantMemberCardLogs",
"group_buy_packages": "packageCouponList",
"group_buy_redemption_records": "siteTableUseDetailsList",
"site_tables_master": "siteTables",
# 以下使用 "list" 或特殊路径
"payment_transactions": "list",
"refund_transactions": "list",
"platform_coupon_redemption_records": "list",
"goods_stock_summary": "list",
"settlement_records": "settleList",
"recharge_settlements": "settleList",
}
def load_registry():
with open(REGISTRY_PATH, "r", encoding="utf-8") as f:
return json.load(f)
def call_api(module, action, body):
url = f"{API_BASE}{module}/{action}"
try:
resp = requests.post(url, json=body, headers=HEADERS, timeout=30)
resp.raise_for_status()
return resp.json()
except Exception as e:
print(f" ❌ 请求失败: {e}")
return None
def unwrap_records(raw_json, table_name):
"""从原始 API 响应中提取业务记录列表"""
if raw_json is None:
return []
data = raw_json.get("data")
if data is None:
return []
# ── 特殊表stock_goods_category_tree ──
if table_name == "stock_goods_category_tree":
if isinstance(data, dict):
cats = data.get("goodsCategoryList", [])
return cats if isinstance(cats, list) else []
return []
# ── 特殊表role_area_association ──
if table_name == "role_area_association":
if isinstance(data, dict):
rels = data.get("roleAreaRelations", [])
return rels if isinstance(rels, list) else []
return []
# ── 特殊表tenant_member_balance_overview ──
# 返回的是汇总对象 + rechargeCardList/giveCardList
if table_name == "tenant_member_balance_overview":
if isinstance(data, dict):
# 合并顶层标量字段 + 列表中的字段
records = [data] # 顶层作为一条记录
for list_key in ("rechargeCardList", "giveCardList"):
items = data.get(list_key, [])
if isinstance(items, list):
records.extend(items)
return records
return []
# ── settlement_records / recharge_settlements ──
# data.settleList 是列表,每个元素内部有 settleList 子对象
if table_name in ("settlement_records", "recharge_settlements"):
if isinstance(data, dict):
settle_list = data.get("settleList", [])
if isinstance(settle_list, list):
return settle_list
return []
# ── 通用data 是 dict从中找列表字段 ──
if isinstance(data, dict):
list_key = ACTUAL_LIST_KEY.get(table_name, "list")
items = data.get(list_key, [])
if isinstance(items, list):
return items
# fallback: 找第一个列表字段
for k, v in data.items():
if isinstance(v, list) and k != "total":
return v
return []
if isinstance(data, list):
return data
return []
def extract_all_fields(records, table_name):
"""从多条记录中提取所有唯一字段名(小写)"""
all_fields = set()
for record in records:
if not isinstance(record, dict):
continue
# settlement_records / recharge_settlements: 内层 settleList 展开
if table_name in ("settlement_records", "recharge_settlements"):
settle = record.get("settleList", record)
if isinstance(settle, list):
settle = settle[0] if settle else {}
if isinstance(settle, dict):
for k in settle.keys():
kl = k.lower()
if kl == "siteprofile":
all_fields.add("siteprofile")
elif kl in WRAPPER_FIELDS:
continue
else:
all_fields.add(kl)
continue
# tenant_member_balance_overview: 特殊处理
if table_name == "tenant_member_balance_overview":
for k in record.keys():
kl = k.lower()
# 跳过嵌套列表键名本身
if kl in ("rechargecardlist", "givecardlist"):
continue
all_fields.add(kl)
continue
# 通用
for k in record.keys():
kl = k.lower()
if kl in WRAPPER_FIELDS:
if kl in ("siteprofile", "tableprofile"):
all_fields.add(kl)
continue
all_fields.add(kl)
return all_fields
def extract_md_fields(table_name):
"""从 .md 文档的"四、响应字段详解"章节提取字段名(小写)"""
md_path = os.path.join(DOCS_DIR, f"{table_name}.md")
if not os.path.exists(md_path):
return set()
with open(md_path, "r", encoding="utf-8") as f:
lines = f.readlines()
fields = set()
in_section = False
in_siteprofile = False
field_pattern = re.compile(r'^\|\s*`([^`]+)`\s*\|')
siteprofile_header = re.compile(r'^###.*siteProfile', re.IGNORECASE)
for line in lines:
s = line.strip()
if s.startswith("## 四、") and "响应字段" in s:
in_section = True
in_siteprofile = False
continue
if in_section and s.startswith("## ") and not s.startswith("## 四"):
break
if not in_section:
continue
if table_name in ("settlement_records", "recharge_settlements"):
if siteprofile_header.search(s):
in_siteprofile = True
continue
if s.startswith("### ") and in_siteprofile:
if not siteprofile_header.search(s):
in_siteprofile = False
m = field_pattern.match(s)
if m:
raw = m.group(1).strip()
if raw.lower() in {h.lower() for h in CROSS_REF_HEADERS}:
continue
if table_name in ("settlement_records", "recharge_settlements"):
if in_siteprofile:
continue
if raw.startswith("siteProfile."):
continue
if raw.lower() in WRAPPER_FIELDS and raw.lower() not in (
"siteprofile", "tableprofile"):
continue
fields.add(raw.lower())
return fields
def build_body(entry):
body = dict(entry.get("body") or {})
if entry.get("time_range") and entry.get("time_keys"):
keys = entry["time_keys"]
if len(keys) >= 2:
body[keys[0]] = START_TIME
body[keys[1]] = END_TIME
if entry.get("pagination"):
body[entry["pagination"].get("page_key", "page")] = 1
body[entry["pagination"].get("limit_key", "limit")] = LIMIT
return body
def save_sample(table_name, records):
"""保存第一条记录作为 JSON 样本"""
sample_path = os.path.join(SAMPLES_DIR, f"{table_name}.json")
if records and isinstance(records[0], dict):
with open(sample_path, "w", encoding="utf-8") as f:
json.dump(records[0], f, ensure_ascii=False, indent=2)
return sample_path
def discover_actual_data_path(raw_json, table_name):
"""发现 API 实际返回的数据路径"""
data = raw_json.get("data") if raw_json else None
if data is None:
return None
# 特殊表
if table_name == "stock_goods_category_tree":
return "data.goodsCategoryList"
if table_name == "role_area_association":
return "data.roleAreaRelations"
if table_name == "tenant_member_balance_overview":
return "data" # 顶层汇总对象
if table_name in ("settlement_records", "recharge_settlements"):
return "data.settleList"
if isinstance(data, dict):
list_key = ACTUAL_LIST_KEY.get(table_name)
if list_key and list_key in data:
return f"data.{list_key}"
# fallback
for k, v in data.items():
if isinstance(v, list) and k.lower() != "total":
return f"data.{k}"
return None
def update_md_data_path(table_name, actual_path):
"""在 .md 文档的接口概述表格中更新/添加实际数据路径"""
md_path = os.path.join(DOCS_DIR, f"{table_name}.md")
if not os.path.exists(md_path):
return False
with open(md_path, "r", encoding="utf-8") as f:
content = f.read()
# 检查是否已有"数据路径"或"响应数据路径"行
if "数据路径" in content or "data_path" in content.lower():
# 尝试更新已有行
pattern = re.compile(
r'(\|\s*(?:数据路径|响应数据路径|data_path)\s*\|\s*)`[^`]*`(\s*\|)',
re.IGNORECASE
)
if pattern.search(content):
new_content = pattern.sub(
rf'\g<1>`{actual_path}`\g<2>', content
)
if new_content != content:
with open(md_path, "w", encoding="utf-8") as f:
f.write(new_content)
return True
return False # 已经是最新值
# 没有数据路径行,在接口概述表格末尾添加
# 找到"## 一、接口概述"后的表格最后一行(以 | 开头)
lines = content.split("\n")
insert_idx = None
in_overview = False
last_table_row = None
for i, line in enumerate(lines):
s = line.strip()
if "## 一、" in s and "接口概述" in s:
in_overview = True
continue
if in_overview and s.startswith("## "):
break
if in_overview and s.startswith("|") and "---" not in s:
last_table_row = i
if last_table_row is not None:
new_line = f"| 响应数据路径 | `{actual_path}` |"
lines.insert(last_table_row + 1, new_line)
with open(md_path, "w", encoding="utf-8") as f:
f.write("\n".join(lines))
return True
return False
def main():
registry = load_registry()
print(f"加载 API 注册表: {len(registry)} 个端点")
print(f"时间范围: {START_TIME} ~ {END_TIME}")
print(f"每接口获取: {LIMIT}")
print("=" * 80)
results = []
all_gaps = []
registry_updates = {} # table_name -> actual_data_path
for entry in registry:
table_name = entry["id"]
name_zh = entry.get("name_zh", "")
module = entry["module"]
action = entry["action"]
skip = entry.get("skip", False)
print(f"\n{'' * 60}")
print(f"[{table_name}] {name_zh}{module}/{action}")
if skip:
print(" ⏭️ 跳过(标记为 skip")
results.append({
"table": table_name,
"status": "skipped",
"record_count": 0,
"json_field_count": 0,
"md_field_count": 0,
"json_fields": [],
"md_fields": [],
"json_only": [],
"md_only": [],
"actual_data_path": None,
})
continue
body = build_body(entry)
print(f" 请求: POST {module}/{action}")
raw = call_api(module, action, body)
if raw is None:
results.append({
"table": table_name,
"status": "error",
"record_count": 0,
"json_field_count": 0,
"md_field_count": 0,
"json_fields": [],
"md_fields": [],
"json_only": [],
"md_only": [],
"actual_data_path": None,
})
continue
# 发现实际数据路径
actual_path = discover_actual_data_path(raw, table_name)
old_path = entry.get("data_path", "")
if actual_path and actual_path != old_path:
print(f" 📍 数据路径: {old_path}{actual_path}")
registry_updates[table_name] = actual_path
else:
print(f" 📍 数据路径: {actual_path or old_path}")
records = unwrap_records(raw, table_name)
print(f" 获取记录数: {len(records)}")
# 保存样本(第一条)
save_sample(table_name, records)
# 遍历所有记录提取全字段
json_fields = extract_all_fields(records, table_name)
md_fields = extract_md_fields(table_name)
json_only = json_fields - md_fields
md_only = md_fields - json_fields
status = "ok"
if json_only:
status = "gap"
print(f" ❌ JSON 有但 .md 缺失 ({len(json_only)} 个): {sorted(json_only)}")
all_gaps.append((table_name, name_zh, sorted(json_only)))
else:
if md_only:
print(f" ⚠️ .md 多 {len(md_only)} 个条件性字段")
else:
print(f" ✅ 完全一致 ({len(json_fields)} 个字段)")
# 更新 .md 文档中的数据路径
if actual_path:
updated = update_md_data_path(table_name, actual_path)
if updated:
print(f" 📝 已更新 .md 文档数据路径")
results.append({
"table": table_name,
"status": status,
"record_count": len(records),
"json_field_count": len(json_fields),
"md_field_count": len(md_fields),
"json_fields": sorted(json_fields),
"md_fields": sorted(md_fields),
"json_only": sorted(json_only),
"md_only": sorted(md_only),
"actual_data_path": actual_path,
})
time.sleep(0.3)
# ── 更新 api_registry.json 中的 data_path ──
if registry_updates:
print(f"\n{'' * 60}")
print(f"更新 api_registry.json 中 {len(registry_updates)} 个 data_path...")
for entry in registry:
tid = entry["id"]
if tid in registry_updates:
entry["data_path"] = registry_updates[tid]
with open(REGISTRY_PATH, "w", encoding="utf-8") as f:
json.dump(registry, f, ensure_ascii=False, indent=2)
print(" ✅ api_registry.json 已更新")
# ── 汇总 ──
print(f"\n{'=' * 80}")
print("汇总报告")
print(f"{'=' * 80}")
gap_count = sum(1 for r in results if r["status"] == "gap")
ok_count = sum(1 for r in results if r["status"] == "ok")
skip_count = sum(1 for r in results if r["status"] == "skipped")
err_count = sum(1 for r in results if r["status"] == "error")
print(f" 完全一致: {ok_count}")
print(f" 有缺失: {gap_count}")
print(f" 跳过: {skip_count}")
print(f" 错误: {err_count}")
if all_gaps:
print(f"\n需要补充到 .md 文档的字段:")
for table, name_zh, fields in all_gaps:
print(f" {table} ({name_zh}): {fields}")
# 保存详细结果
out_path = os.path.join(REPORT_DIR, "json_refresh_audit.json")
os.makedirs(REPORT_DIR, exist_ok=True)
with open(out_path, "w", encoding="utf-8") as f:
json.dump(results, f, ensure_ascii=False, indent=2)
print(f"\n详细结果已写入: {out_path}")
if __name__ == "__main__":
main()
# AI_CHANGELOG:
# - 日期: 2026-02-14
# - Prompt: P20260214-060000 — 全量 JSON 刷新 + MD 文档补全 + 数据路径修正
# - 直接原因: 旧 JSON 样本仅含单条记录,缺少条件性字段;需重新获取 100 条数据并遍历提取最全字段
# - 变更摘要: 新建脚本,实现:(1) 调用全部 24 个 API 端点获取 100 条数据 (2) 遍历所有记录提取字段并集
# (3) 与 .md 文档比对找出缺失字段 (4) 更新 JSON 样本和 api_registry.json data_path (5) 更新 .md 文档响应数据路径行
# - 风险与验证: 脚本需要有效的 API_TOKEN 和网络连接;验证:运行后检查 json_refresh_audit.json 中 24/24 通过