init: 项目初始提交 - NeoZQYY Monorepo 完整代码
This commit is contained in:
523
apps/etl/pipelines/feiqiu/scripts/refresh_json_and_audit.py
Normal file
523
apps/etl/pipelines/feiqiu/scripts/refresh_json_and_audit.py
Normal file
@@ -0,0 +1,523 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
重新获取全部 API 接口的 JSON 数据(最多 100 条),
|
||||
遍历所有记录提取最全字段集合,
|
||||
与 .md 文档比对并输出差异报告。
|
||||
|
||||
时间范围:2026-01-01 00:00:00 ~ 2026-02-13 00:00:00
|
||||
|
||||
用法:python scripts/refresh_json_and_audit.py
|
||||
"""
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
import requests
|
||||
|
||||
# ── 配置 ──────────────────────────────────────────────────────────────────
|
||||
API_BASE = "https://pc.ficoo.vip/apiprod/admin/v1/"
|
||||
API_TOKEN = os.environ.get("API_TOKEN", "")
|
||||
if not API_TOKEN:
|
||||
env_path = os.path.join(os.path.dirname(__file__), "..", ".env")
|
||||
if os.path.exists(env_path):
|
||||
with open(env_path, "r", encoding="utf-8") as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line.startswith("API_TOKEN="):
|
||||
API_TOKEN = line.split("=", 1)[1].strip()
|
||||
break
|
||||
|
||||
SITE_ID = 2790685415443269
|
||||
START_TIME = "2026-01-01 00:00:00"
|
||||
END_TIME = "2026-02-13 00:00:00"
|
||||
LIMIT = 100
|
||||
|
||||
SAMPLES_DIR = os.path.join("docs", "api-reference", "samples")
|
||||
DOCS_DIR = os.path.join("docs", "api-reference")
|
||||
REPORT_DIR = os.path.join("docs", "reports")
|
||||
|
||||
HEADERS = {
|
||||
"Authorization": f"Bearer {API_TOKEN}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
REGISTRY_PATH = os.path.join("docs", "api-reference", "api_registry.json")
|
||||
|
||||
WRAPPER_FIELDS = {"settleList", "siteProfile", "tableProfile",
|
||||
"goodsCategoryList", "data", "code", "msg",
|
||||
"settlelist", "siteprofile", "tableprofile",
|
||||
"goodscategorylist"}
|
||||
|
||||
CROSS_REF_HEADERS = {"字段名", "类型", "示例值", "说明", "field", "example",
|
||||
"description"}
|
||||
|
||||
# 每个接口实际返回的列表字段名(从调试中获得)
|
||||
ACTUAL_LIST_KEY = {
|
||||
"assistant_accounts_master": "assistantInfos",
|
||||
"assistant_service_records": "orderAssistantDetails",
|
||||
"assistant_cancellation_records": "abolitionAssistants",
|
||||
"table_fee_transactions": "siteTableUseDetailsList",
|
||||
"table_fee_discount_records": "taiFeeAdjustInfos",
|
||||
"tenant_goods_master": "tenantGoodsList",
|
||||
"store_goods_sales_records": "orderGoodsLedgers",
|
||||
"store_goods_master": "orderGoodsList",
|
||||
"goods_stock_movements": "queryDeliveryRecordsList",
|
||||
"member_profiles": "tenantMemberInfos",
|
||||
"member_stored_value_cards": "tenantMemberCards",
|
||||
"member_balance_changes": "tenantMemberCardLogs",
|
||||
"group_buy_packages": "packageCouponList",
|
||||
"group_buy_redemption_records": "siteTableUseDetailsList",
|
||||
"site_tables_master": "siteTables",
|
||||
# 以下使用 "list" 或特殊路径
|
||||
"payment_transactions": "list",
|
||||
"refund_transactions": "list",
|
||||
"platform_coupon_redemption_records": "list",
|
||||
"goods_stock_summary": "list",
|
||||
"settlement_records": "settleList",
|
||||
"recharge_settlements": "settleList",
|
||||
}
|
||||
|
||||
|
||||
def load_registry():
|
||||
with open(REGISTRY_PATH, "r", encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
def call_api(module, action, body):
|
||||
url = f"{API_BASE}{module}/{action}"
|
||||
try:
|
||||
resp = requests.post(url, json=body, headers=HEADERS, timeout=30)
|
||||
resp.raise_for_status()
|
||||
return resp.json()
|
||||
except Exception as e:
|
||||
print(f" ❌ 请求失败: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def unwrap_records(raw_json, table_name):
|
||||
"""从原始 API 响应中提取业务记录列表"""
|
||||
if raw_json is None:
|
||||
return []
|
||||
|
||||
data = raw_json.get("data")
|
||||
if data is None:
|
||||
return []
|
||||
|
||||
# ── 特殊表:stock_goods_category_tree ──
|
||||
if table_name == "stock_goods_category_tree":
|
||||
if isinstance(data, dict):
|
||||
cats = data.get("goodsCategoryList", [])
|
||||
return cats if isinstance(cats, list) else []
|
||||
return []
|
||||
|
||||
# ── 特殊表:role_area_association ──
|
||||
if table_name == "role_area_association":
|
||||
if isinstance(data, dict):
|
||||
rels = data.get("roleAreaRelations", [])
|
||||
return rels if isinstance(rels, list) else []
|
||||
return []
|
||||
|
||||
# ── 特殊表:tenant_member_balance_overview ──
|
||||
# 返回的是汇总对象 + rechargeCardList/giveCardList
|
||||
if table_name == "tenant_member_balance_overview":
|
||||
if isinstance(data, dict):
|
||||
# 合并顶层标量字段 + 列表中的字段
|
||||
records = [data] # 顶层作为一条记录
|
||||
for list_key in ("rechargeCardList", "giveCardList"):
|
||||
items = data.get(list_key, [])
|
||||
if isinstance(items, list):
|
||||
records.extend(items)
|
||||
return records
|
||||
return []
|
||||
|
||||
# ── settlement_records / recharge_settlements ──
|
||||
# data.settleList 是列表,每个元素内部有 settleList 子对象
|
||||
if table_name in ("settlement_records", "recharge_settlements"):
|
||||
if isinstance(data, dict):
|
||||
settle_list = data.get("settleList", [])
|
||||
if isinstance(settle_list, list):
|
||||
return settle_list
|
||||
return []
|
||||
|
||||
# ── 通用:data 是 dict,从中找列表字段 ──
|
||||
if isinstance(data, dict):
|
||||
list_key = ACTUAL_LIST_KEY.get(table_name, "list")
|
||||
items = data.get(list_key, [])
|
||||
if isinstance(items, list):
|
||||
return items
|
||||
# fallback: 找第一个列表字段
|
||||
for k, v in data.items():
|
||||
if isinstance(v, list) and k != "total":
|
||||
return v
|
||||
return []
|
||||
|
||||
if isinstance(data, list):
|
||||
return data
|
||||
|
||||
return []
|
||||
|
||||
|
||||
def extract_all_fields(records, table_name):
|
||||
"""从多条记录中提取所有唯一字段名(小写)"""
|
||||
all_fields = set()
|
||||
for record in records:
|
||||
if not isinstance(record, dict):
|
||||
continue
|
||||
|
||||
# settlement_records / recharge_settlements: 内层 settleList 展开
|
||||
if table_name in ("settlement_records", "recharge_settlements"):
|
||||
settle = record.get("settleList", record)
|
||||
if isinstance(settle, list):
|
||||
settle = settle[0] if settle else {}
|
||||
if isinstance(settle, dict):
|
||||
for k in settle.keys():
|
||||
kl = k.lower()
|
||||
if kl == "siteprofile":
|
||||
all_fields.add("siteprofile")
|
||||
elif kl in WRAPPER_FIELDS:
|
||||
continue
|
||||
else:
|
||||
all_fields.add(kl)
|
||||
continue
|
||||
|
||||
# tenant_member_balance_overview: 特殊处理
|
||||
if table_name == "tenant_member_balance_overview":
|
||||
for k in record.keys():
|
||||
kl = k.lower()
|
||||
# 跳过嵌套列表键名本身
|
||||
if kl in ("rechargecardlist", "givecardlist"):
|
||||
continue
|
||||
all_fields.add(kl)
|
||||
continue
|
||||
|
||||
# 通用
|
||||
for k in record.keys():
|
||||
kl = k.lower()
|
||||
if kl in WRAPPER_FIELDS:
|
||||
if kl in ("siteprofile", "tableprofile"):
|
||||
all_fields.add(kl)
|
||||
continue
|
||||
all_fields.add(kl)
|
||||
|
||||
return all_fields
|
||||
|
||||
|
||||
def extract_md_fields(table_name):
|
||||
"""从 .md 文档的"四、响应字段详解"章节提取字段名(小写)"""
|
||||
md_path = os.path.join(DOCS_DIR, f"{table_name}.md")
|
||||
if not os.path.exists(md_path):
|
||||
return set()
|
||||
|
||||
with open(md_path, "r", encoding="utf-8") as f:
|
||||
lines = f.readlines()
|
||||
|
||||
fields = set()
|
||||
in_section = False
|
||||
in_siteprofile = False
|
||||
field_pattern = re.compile(r'^\|\s*`([^`]+)`\s*\|')
|
||||
siteprofile_header = re.compile(r'^###.*siteProfile', re.IGNORECASE)
|
||||
|
||||
for line in lines:
|
||||
s = line.strip()
|
||||
|
||||
if s.startswith("## 四、") and "响应字段" in s:
|
||||
in_section = True
|
||||
in_siteprofile = False
|
||||
continue
|
||||
|
||||
if in_section and s.startswith("## ") and not s.startswith("## 四"):
|
||||
break
|
||||
|
||||
if not in_section:
|
||||
continue
|
||||
|
||||
if table_name in ("settlement_records", "recharge_settlements"):
|
||||
if siteprofile_header.search(s):
|
||||
in_siteprofile = True
|
||||
continue
|
||||
if s.startswith("### ") and in_siteprofile:
|
||||
if not siteprofile_header.search(s):
|
||||
in_siteprofile = False
|
||||
|
||||
m = field_pattern.match(s)
|
||||
if m:
|
||||
raw = m.group(1).strip()
|
||||
if raw.lower() in {h.lower() for h in CROSS_REF_HEADERS}:
|
||||
continue
|
||||
if table_name in ("settlement_records", "recharge_settlements"):
|
||||
if in_siteprofile:
|
||||
continue
|
||||
if raw.startswith("siteProfile."):
|
||||
continue
|
||||
if raw.lower() in WRAPPER_FIELDS and raw.lower() not in (
|
||||
"siteprofile", "tableprofile"):
|
||||
continue
|
||||
fields.add(raw.lower())
|
||||
|
||||
return fields
|
||||
|
||||
|
||||
def build_body(entry):
|
||||
body = dict(entry.get("body") or {})
|
||||
if entry.get("time_range") and entry.get("time_keys"):
|
||||
keys = entry["time_keys"]
|
||||
if len(keys) >= 2:
|
||||
body[keys[0]] = START_TIME
|
||||
body[keys[1]] = END_TIME
|
||||
if entry.get("pagination"):
|
||||
body[entry["pagination"].get("page_key", "page")] = 1
|
||||
body[entry["pagination"].get("limit_key", "limit")] = LIMIT
|
||||
return body
|
||||
|
||||
|
||||
def save_sample(table_name, records):
|
||||
"""保存第一条记录作为 JSON 样本"""
|
||||
sample_path = os.path.join(SAMPLES_DIR, f"{table_name}.json")
|
||||
if records and isinstance(records[0], dict):
|
||||
with open(sample_path, "w", encoding="utf-8") as f:
|
||||
json.dump(records[0], f, ensure_ascii=False, indent=2)
|
||||
return sample_path
|
||||
|
||||
|
||||
def discover_actual_data_path(raw_json, table_name):
|
||||
"""发现 API 实际返回的数据路径"""
|
||||
data = raw_json.get("data") if raw_json else None
|
||||
if data is None:
|
||||
return None
|
||||
|
||||
# 特殊表
|
||||
if table_name == "stock_goods_category_tree":
|
||||
return "data.goodsCategoryList"
|
||||
if table_name == "role_area_association":
|
||||
return "data.roleAreaRelations"
|
||||
if table_name == "tenant_member_balance_overview":
|
||||
return "data" # 顶层汇总对象
|
||||
if table_name in ("settlement_records", "recharge_settlements"):
|
||||
return "data.settleList"
|
||||
|
||||
if isinstance(data, dict):
|
||||
list_key = ACTUAL_LIST_KEY.get(table_name)
|
||||
if list_key and list_key in data:
|
||||
return f"data.{list_key}"
|
||||
# fallback
|
||||
for k, v in data.items():
|
||||
if isinstance(v, list) and k.lower() != "total":
|
||||
return f"data.{k}"
|
||||
return None
|
||||
|
||||
|
||||
def update_md_data_path(table_name, actual_path):
|
||||
"""在 .md 文档的接口概述表格中更新/添加实际数据路径"""
|
||||
md_path = os.path.join(DOCS_DIR, f"{table_name}.md")
|
||||
if not os.path.exists(md_path):
|
||||
return False
|
||||
|
||||
with open(md_path, "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
|
||||
# 检查是否已有"数据路径"或"响应数据路径"行
|
||||
if "数据路径" in content or "data_path" in content.lower():
|
||||
# 尝试更新已有行
|
||||
pattern = re.compile(
|
||||
r'(\|\s*(?:数据路径|响应数据路径|data_path)\s*\|\s*)`[^`]*`(\s*\|)',
|
||||
re.IGNORECASE
|
||||
)
|
||||
if pattern.search(content):
|
||||
new_content = pattern.sub(
|
||||
rf'\g<1>`{actual_path}`\g<2>', content
|
||||
)
|
||||
if new_content != content:
|
||||
with open(md_path, "w", encoding="utf-8") as f:
|
||||
f.write(new_content)
|
||||
return True
|
||||
return False # 已经是最新值
|
||||
|
||||
# 没有数据路径行,在接口概述表格末尾添加
|
||||
# 找到"## 一、接口概述"后的表格最后一行(以 | 开头)
|
||||
lines = content.split("\n")
|
||||
insert_idx = None
|
||||
in_overview = False
|
||||
last_table_row = None
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
s = line.strip()
|
||||
if "## 一、" in s and "接口概述" in s:
|
||||
in_overview = True
|
||||
continue
|
||||
if in_overview and s.startswith("## "):
|
||||
break
|
||||
if in_overview and s.startswith("|") and "---" not in s:
|
||||
last_table_row = i
|
||||
|
||||
if last_table_row is not None:
|
||||
new_line = f"| 响应数据路径 | `{actual_path}` |"
|
||||
lines.insert(last_table_row + 1, new_line)
|
||||
with open(md_path, "w", encoding="utf-8") as f:
|
||||
f.write("\n".join(lines))
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def main():
|
||||
registry = load_registry()
|
||||
print(f"加载 API 注册表: {len(registry)} 个端点")
|
||||
print(f"时间范围: {START_TIME} ~ {END_TIME}")
|
||||
print(f"每接口获取: {LIMIT} 条")
|
||||
print("=" * 80)
|
||||
|
||||
results = []
|
||||
all_gaps = []
|
||||
registry_updates = {} # table_name -> actual_data_path
|
||||
|
||||
for entry in registry:
|
||||
table_name = entry["id"]
|
||||
name_zh = entry.get("name_zh", "")
|
||||
module = entry["module"]
|
||||
action = entry["action"]
|
||||
skip = entry.get("skip", False)
|
||||
|
||||
print(f"\n{'─' * 60}")
|
||||
print(f"[{table_name}] {name_zh} — {module}/{action}")
|
||||
|
||||
if skip:
|
||||
print(" ⏭️ 跳过(标记为 skip)")
|
||||
results.append({
|
||||
"table": table_name,
|
||||
"status": "skipped",
|
||||
"record_count": 0,
|
||||
"json_field_count": 0,
|
||||
"md_field_count": 0,
|
||||
"json_fields": [],
|
||||
"md_fields": [],
|
||||
"json_only": [],
|
||||
"md_only": [],
|
||||
"actual_data_path": None,
|
||||
})
|
||||
continue
|
||||
|
||||
body = build_body(entry)
|
||||
|
||||
print(f" 请求: POST {module}/{action}")
|
||||
raw = call_api(module, action, body)
|
||||
|
||||
if raw is None:
|
||||
results.append({
|
||||
"table": table_name,
|
||||
"status": "error",
|
||||
"record_count": 0,
|
||||
"json_field_count": 0,
|
||||
"md_field_count": 0,
|
||||
"json_fields": [],
|
||||
"md_fields": [],
|
||||
"json_only": [],
|
||||
"md_only": [],
|
||||
"actual_data_path": None,
|
||||
})
|
||||
continue
|
||||
|
||||
# 发现实际数据路径
|
||||
actual_path = discover_actual_data_path(raw, table_name)
|
||||
old_path = entry.get("data_path", "")
|
||||
if actual_path and actual_path != old_path:
|
||||
print(f" 📍 数据路径: {old_path} → {actual_path}")
|
||||
registry_updates[table_name] = actual_path
|
||||
else:
|
||||
print(f" 📍 数据路径: {actual_path or old_path}")
|
||||
|
||||
records = unwrap_records(raw, table_name)
|
||||
print(f" 获取记录数: {len(records)}")
|
||||
|
||||
# 保存样本(第一条)
|
||||
save_sample(table_name, records)
|
||||
|
||||
# 遍历所有记录提取全字段
|
||||
json_fields = extract_all_fields(records, table_name)
|
||||
md_fields = extract_md_fields(table_name)
|
||||
|
||||
json_only = json_fields - md_fields
|
||||
md_only = md_fields - json_fields
|
||||
|
||||
status = "ok"
|
||||
if json_only:
|
||||
status = "gap"
|
||||
print(f" ❌ JSON 有但 .md 缺失 ({len(json_only)} 个): {sorted(json_only)}")
|
||||
all_gaps.append((table_name, name_zh, sorted(json_only)))
|
||||
else:
|
||||
if md_only:
|
||||
print(f" ⚠️ .md 多 {len(md_only)} 个条件性字段")
|
||||
else:
|
||||
print(f" ✅ 完全一致 ({len(json_fields)} 个字段)")
|
||||
|
||||
# 更新 .md 文档中的数据路径
|
||||
if actual_path:
|
||||
updated = update_md_data_path(table_name, actual_path)
|
||||
if updated:
|
||||
print(f" 📝 已更新 .md 文档数据路径")
|
||||
|
||||
results.append({
|
||||
"table": table_name,
|
||||
"status": status,
|
||||
"record_count": len(records),
|
||||
"json_field_count": len(json_fields),
|
||||
"md_field_count": len(md_fields),
|
||||
"json_fields": sorted(json_fields),
|
||||
"md_fields": sorted(md_fields),
|
||||
"json_only": sorted(json_only),
|
||||
"md_only": sorted(md_only),
|
||||
"actual_data_path": actual_path,
|
||||
})
|
||||
|
||||
time.sleep(0.3)
|
||||
|
||||
# ── 更新 api_registry.json 中的 data_path ──
|
||||
if registry_updates:
|
||||
print(f"\n{'─' * 60}")
|
||||
print(f"更新 api_registry.json 中 {len(registry_updates)} 个 data_path...")
|
||||
for entry in registry:
|
||||
tid = entry["id"]
|
||||
if tid in registry_updates:
|
||||
entry["data_path"] = registry_updates[tid]
|
||||
with open(REGISTRY_PATH, "w", encoding="utf-8") as f:
|
||||
json.dump(registry, f, ensure_ascii=False, indent=2)
|
||||
print(" ✅ api_registry.json 已更新")
|
||||
|
||||
# ── 汇总 ──
|
||||
print(f"\n{'=' * 80}")
|
||||
print("汇总报告")
|
||||
print(f"{'=' * 80}")
|
||||
|
||||
gap_count = sum(1 for r in results if r["status"] == "gap")
|
||||
ok_count = sum(1 for r in results if r["status"] == "ok")
|
||||
skip_count = sum(1 for r in results if r["status"] == "skipped")
|
||||
err_count = sum(1 for r in results if r["status"] == "error")
|
||||
|
||||
print(f" 完全一致: {ok_count}")
|
||||
print(f" 有缺失: {gap_count}")
|
||||
print(f" 跳过: {skip_count}")
|
||||
print(f" 错误: {err_count}")
|
||||
|
||||
if all_gaps:
|
||||
print(f"\n需要补充到 .md 文档的字段:")
|
||||
for table, name_zh, fields in all_gaps:
|
||||
print(f" {table} ({name_zh}): {fields}")
|
||||
|
||||
# 保存详细结果
|
||||
out_path = os.path.join(REPORT_DIR, "json_refresh_audit.json")
|
||||
os.makedirs(REPORT_DIR, exist_ok=True)
|
||||
with open(out_path, "w", encoding="utf-8") as f:
|
||||
json.dump(results, f, ensure_ascii=False, indent=2)
|
||||
print(f"\n详细结果已写入: {out_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
# AI_CHANGELOG:
|
||||
# - 日期: 2026-02-14
|
||||
# - Prompt: P20260214-060000 — 全量 JSON 刷新 + MD 文档补全 + 数据路径修正
|
||||
# - 直接原因: 旧 JSON 样本仅含单条记录,缺少条件性字段;需重新获取 100 条数据并遍历提取最全字段
|
||||
# - 变更摘要: 新建脚本,实现:(1) 调用全部 24 个 API 端点获取 100 条数据 (2) 遍历所有记录提取字段并集
|
||||
# (3) 与 .md 文档比对找出缺失字段 (4) 更新 JSON 样本和 api_registry.json data_path (5) 更新 .md 文档响应数据路径行
|
||||
# - 风险与验证: 脚本需要有效的 API_TOKEN 和网络连接;验证:运行后检查 json_refresh_audit.json 中 24/24 通过
|
||||
Reference in New Issue
Block a user