# -*- coding: utf-8 -*- """ 重新获取全部 API 接口的 JSON 数据(最多 100 条), 遍历所有记录提取最全字段集合, 与 .md 文档比对并输出差异报告。 时间范围:2026-01-01 00:00:00 ~ 2026-02-13 00:00:00 用法:python scripts/refresh_json_and_audit.py """ import json import os import re import sys import time import requests # ── 配置 ────────────────────────────────────────────────────────────────── API_BASE = "https://pc.ficoo.vip/apiprod/admin/v1/" API_TOKEN = os.environ.get("API_TOKEN", "") if not API_TOKEN: env_path = os.path.join(os.path.dirname(__file__), "..", ".env") if os.path.exists(env_path): with open(env_path, "r", encoding="utf-8") as f: for line in f: line = line.strip() if line.startswith("API_TOKEN="): API_TOKEN = line.split("=", 1)[1].strip() break SITE_ID = 2790685415443269 START_TIME = "2026-01-01 00:00:00" END_TIME = "2026-02-13 00:00:00" LIMIT = 100 SAMPLES_DIR = os.path.join("docs", "api-reference", "samples") DOCS_DIR = os.path.join("docs", "api-reference") REPORT_DIR = os.path.join("docs", "reports") HEADERS = { "Authorization": f"Bearer {API_TOKEN}", "Content-Type": "application/json", } REGISTRY_PATH = os.path.join("docs", "api-reference", "api_registry.json") WRAPPER_FIELDS = {"settleList", "siteProfile", "tableProfile", "goodsCategoryList", "data", "code", "msg", "settlelist", "siteprofile", "tableprofile", "goodscategorylist"} CROSS_REF_HEADERS = {"字段名", "类型", "示例值", "说明", "field", "example", "description"} # 每个接口实际返回的列表字段名(从调试中获得) ACTUAL_LIST_KEY = { "assistant_accounts_master": "assistantInfos", "assistant_service_records": "orderAssistantDetails", "assistant_cancellation_records": "abolitionAssistants", "table_fee_transactions": "siteTableUseDetailsList", "table_fee_discount_records": "taiFeeAdjustInfos", "tenant_goods_master": "tenantGoodsList", "store_goods_sales_records": "orderGoodsLedgers", "store_goods_master": "orderGoodsList", "goods_stock_movements": "queryDeliveryRecordsList", "member_profiles": "tenantMemberInfos", "member_stored_value_cards": "tenantMemberCards", "member_balance_changes": "tenantMemberCardLogs", "group_buy_packages": "packageCouponList", "group_buy_redemption_records": "siteTableUseDetailsList", "site_tables_master": "siteTables", # 以下使用 "list" 或特殊路径 "payment_transactions": "list", "refund_transactions": "list", "platform_coupon_redemption_records": "list", "goods_stock_summary": "list", "settlement_records": "settleList", "recharge_settlements": "settleList", } def load_registry(): with open(REGISTRY_PATH, "r", encoding="utf-8") as f: return json.load(f) def call_api(module, action, body): url = f"{API_BASE}{module}/{action}" try: resp = requests.post(url, json=body, headers=HEADERS, timeout=30) resp.raise_for_status() return resp.json() except Exception as e: print(f" ❌ 请求失败: {e}") return None def unwrap_records(raw_json, table_name): """从原始 API 响应中提取业务记录列表""" if raw_json is None: return [] data = raw_json.get("data") if data is None: return [] # ── 特殊表:stock_goods_category_tree ── if table_name == "stock_goods_category_tree": if isinstance(data, dict): cats = data.get("goodsCategoryList", []) return cats if isinstance(cats, list) else [] return [] # ── 特殊表:role_area_association ── if table_name == "role_area_association": if isinstance(data, dict): rels = data.get("roleAreaRelations", []) return rels if isinstance(rels, list) else [] return [] # ── 特殊表:tenant_member_balance_overview ── # 返回的是汇总对象 + rechargeCardList/giveCardList if table_name == "tenant_member_balance_overview": if isinstance(data, dict): # 合并顶层标量字段 + 列表中的字段 records = [data] # 顶层作为一条记录 for list_key in ("rechargeCardList", "giveCardList"): items = data.get(list_key, []) if isinstance(items, list): records.extend(items) return records return [] # ── settlement_records / recharge_settlements ── # data.settleList 是列表,每个元素内部有 settleList 子对象 if table_name in ("settlement_records", "recharge_settlements"): if isinstance(data, dict): settle_list = data.get("settleList", []) if isinstance(settle_list, list): return settle_list return [] # ── 通用:data 是 dict,从中找列表字段 ── if isinstance(data, dict): list_key = ACTUAL_LIST_KEY.get(table_name, "list") items = data.get(list_key, []) if isinstance(items, list): return items # fallback: 找第一个列表字段 for k, v in data.items(): if isinstance(v, list) and k != "total": return v return [] if isinstance(data, list): return data return [] def extract_all_fields(records, table_name): """从多条记录中提取所有唯一字段名(小写)""" all_fields = set() for record in records: if not isinstance(record, dict): continue # settlement_records / recharge_settlements: 内层 settleList 展开 if table_name in ("settlement_records", "recharge_settlements"): settle = record.get("settleList", record) if isinstance(settle, list): settle = settle[0] if settle else {} if isinstance(settle, dict): for k in settle.keys(): kl = k.lower() if kl == "siteprofile": all_fields.add("siteprofile") elif kl in WRAPPER_FIELDS: continue else: all_fields.add(kl) continue # tenant_member_balance_overview: 特殊处理 if table_name == "tenant_member_balance_overview": for k in record.keys(): kl = k.lower() # 跳过嵌套列表键名本身 if kl in ("rechargecardlist", "givecardlist"): continue all_fields.add(kl) continue # 通用 for k in record.keys(): kl = k.lower() if kl in WRAPPER_FIELDS: if kl in ("siteprofile", "tableprofile"): all_fields.add(kl) continue all_fields.add(kl) return all_fields def extract_md_fields(table_name): """从 .md 文档的"四、响应字段详解"章节提取字段名(小写)""" md_path = os.path.join(DOCS_DIR, f"{table_name}.md") if not os.path.exists(md_path): return set() with open(md_path, "r", encoding="utf-8") as f: lines = f.readlines() fields = set() in_section = False in_siteprofile = False field_pattern = re.compile(r'^\|\s*`([^`]+)`\s*\|') siteprofile_header = re.compile(r'^###.*siteProfile', re.IGNORECASE) for line in lines: s = line.strip() if s.startswith("## 四、") and "响应字段" in s: in_section = True in_siteprofile = False continue if in_section and s.startswith("## ") and not s.startswith("## 四"): break if not in_section: continue if table_name in ("settlement_records", "recharge_settlements"): if siteprofile_header.search(s): in_siteprofile = True continue if s.startswith("### ") and in_siteprofile: if not siteprofile_header.search(s): in_siteprofile = False m = field_pattern.match(s) if m: raw = m.group(1).strip() if raw.lower() in {h.lower() for h in CROSS_REF_HEADERS}: continue if table_name in ("settlement_records", "recharge_settlements"): if in_siteprofile: continue if raw.startswith("siteProfile."): continue if raw.lower() in WRAPPER_FIELDS and raw.lower() not in ( "siteprofile", "tableprofile"): continue fields.add(raw.lower()) return fields def build_body(entry): body = dict(entry.get("body") or {}) if entry.get("time_range") and entry.get("time_keys"): keys = entry["time_keys"] if len(keys) >= 2: body[keys[0]] = START_TIME body[keys[1]] = END_TIME if entry.get("pagination"): body[entry["pagination"].get("page_key", "page")] = 1 body[entry["pagination"].get("limit_key", "limit")] = LIMIT return body def save_sample(table_name, records): """保存第一条记录作为 JSON 样本""" sample_path = os.path.join(SAMPLES_DIR, f"{table_name}.json") if records and isinstance(records[0], dict): with open(sample_path, "w", encoding="utf-8") as f: json.dump(records[0], f, ensure_ascii=False, indent=2) return sample_path def discover_actual_data_path(raw_json, table_name): """发现 API 实际返回的数据路径""" data = raw_json.get("data") if raw_json else None if data is None: return None # 特殊表 if table_name == "stock_goods_category_tree": return "data.goodsCategoryList" if table_name == "role_area_association": return "data.roleAreaRelations" if table_name == "tenant_member_balance_overview": return "data" # 顶层汇总对象 if table_name in ("settlement_records", "recharge_settlements"): return "data.settleList" if isinstance(data, dict): list_key = ACTUAL_LIST_KEY.get(table_name) if list_key and list_key in data: return f"data.{list_key}" # fallback for k, v in data.items(): if isinstance(v, list) and k.lower() != "total": return f"data.{k}" return None def update_md_data_path(table_name, actual_path): """在 .md 文档的接口概述表格中更新/添加实际数据路径""" md_path = os.path.join(DOCS_DIR, f"{table_name}.md") if not os.path.exists(md_path): return False with open(md_path, "r", encoding="utf-8") as f: content = f.read() # 检查是否已有"数据路径"或"响应数据路径"行 if "数据路径" in content or "data_path" in content.lower(): # 尝试更新已有行 pattern = re.compile( r'(\|\s*(?:数据路径|响应数据路径|data_path)\s*\|\s*)`[^`]*`(\s*\|)', re.IGNORECASE ) if pattern.search(content): new_content = pattern.sub( rf'\g<1>`{actual_path}`\g<2>', content ) if new_content != content: with open(md_path, "w", encoding="utf-8") as f: f.write(new_content) return True return False # 已经是最新值 # 没有数据路径行,在接口概述表格末尾添加 # 找到"## 一、接口概述"后的表格最后一行(以 | 开头) lines = content.split("\n") insert_idx = None in_overview = False last_table_row = None for i, line in enumerate(lines): s = line.strip() if "## 一、" in s and "接口概述" in s: in_overview = True continue if in_overview and s.startswith("## "): break if in_overview and s.startswith("|") and "---" not in s: last_table_row = i if last_table_row is not None: new_line = f"| 响应数据路径 | `{actual_path}` |" lines.insert(last_table_row + 1, new_line) with open(md_path, "w", encoding="utf-8") as f: f.write("\n".join(lines)) return True return False def main(): registry = load_registry() print(f"加载 API 注册表: {len(registry)} 个端点") print(f"时间范围: {START_TIME} ~ {END_TIME}") print(f"每接口获取: {LIMIT} 条") print("=" * 80) results = [] all_gaps = [] registry_updates = {} # table_name -> actual_data_path for entry in registry: table_name = entry["id"] name_zh = entry.get("name_zh", "") module = entry["module"] action = entry["action"] skip = entry.get("skip", False) print(f"\n{'─' * 60}") print(f"[{table_name}] {name_zh} — {module}/{action}") if skip: print(" ⏭️ 跳过(标记为 skip)") results.append({ "table": table_name, "status": "skipped", "record_count": 0, "json_field_count": 0, "md_field_count": 0, "json_fields": [], "md_fields": [], "json_only": [], "md_only": [], "actual_data_path": None, }) continue body = build_body(entry) print(f" 请求: POST {module}/{action}") raw = call_api(module, action, body) if raw is None: results.append({ "table": table_name, "status": "error", "record_count": 0, "json_field_count": 0, "md_field_count": 0, "json_fields": [], "md_fields": [], "json_only": [], "md_only": [], "actual_data_path": None, }) continue # 发现实际数据路径 actual_path = discover_actual_data_path(raw, table_name) old_path = entry.get("data_path", "") if actual_path and actual_path != old_path: print(f" 📍 数据路径: {old_path} → {actual_path}") registry_updates[table_name] = actual_path else: print(f" 📍 数据路径: {actual_path or old_path}") records = unwrap_records(raw, table_name) print(f" 获取记录数: {len(records)}") # 保存样本(第一条) save_sample(table_name, records) # 遍历所有记录提取全字段 json_fields = extract_all_fields(records, table_name) md_fields = extract_md_fields(table_name) json_only = json_fields - md_fields md_only = md_fields - json_fields status = "ok" if json_only: status = "gap" print(f" ❌ JSON 有但 .md 缺失 ({len(json_only)} 个): {sorted(json_only)}") all_gaps.append((table_name, name_zh, sorted(json_only))) else: if md_only: print(f" ⚠️ .md 多 {len(md_only)} 个条件性字段") else: print(f" ✅ 完全一致 ({len(json_fields)} 个字段)") # 更新 .md 文档中的数据路径 if actual_path: updated = update_md_data_path(table_name, actual_path) if updated: print(f" 📝 已更新 .md 文档数据路径") results.append({ "table": table_name, "status": status, "record_count": len(records), "json_field_count": len(json_fields), "md_field_count": len(md_fields), "json_fields": sorted(json_fields), "md_fields": sorted(md_fields), "json_only": sorted(json_only), "md_only": sorted(md_only), "actual_data_path": actual_path, }) time.sleep(0.3) # ── 更新 api_registry.json 中的 data_path ── if registry_updates: print(f"\n{'─' * 60}") print(f"更新 api_registry.json 中 {len(registry_updates)} 个 data_path...") for entry in registry: tid = entry["id"] if tid in registry_updates: entry["data_path"] = registry_updates[tid] with open(REGISTRY_PATH, "w", encoding="utf-8") as f: json.dump(registry, f, ensure_ascii=False, indent=2) print(" ✅ api_registry.json 已更新") # ── 汇总 ── print(f"\n{'=' * 80}") print("汇总报告") print(f"{'=' * 80}") gap_count = sum(1 for r in results if r["status"] == "gap") ok_count = sum(1 for r in results if r["status"] == "ok") skip_count = sum(1 for r in results if r["status"] == "skipped") err_count = sum(1 for r in results if r["status"] == "error") print(f" 完全一致: {ok_count}") print(f" 有缺失: {gap_count}") print(f" 跳过: {skip_count}") print(f" 错误: {err_count}") if all_gaps: print(f"\n需要补充到 .md 文档的字段:") for table, name_zh, fields in all_gaps: print(f" {table} ({name_zh}): {fields}") # 保存详细结果 out_path = os.path.join(REPORT_DIR, "json_refresh_audit.json") os.makedirs(REPORT_DIR, exist_ok=True) with open(out_path, "w", encoding="utf-8") as f: json.dump(results, f, ensure_ascii=False, indent=2) print(f"\n详细结果已写入: {out_path}") if __name__ == "__main__": main() # AI_CHANGELOG: # - 日期: 2026-02-14 # - Prompt: P20260214-060000 — 全量 JSON 刷新 + MD 文档补全 + 数据路径修正 # - 直接原因: 旧 JSON 样本仅含单条记录,缺少条件性字段;需重新获取 100 条数据并遍历提取最全字段 # - 变更摘要: 新建脚本,实现:(1) 调用全部 24 个 API 端点获取 100 条数据 (2) 遍历所有记录提取字段并集 # (3) 与 .md 文档比对找出缺失字段 (4) 更新 JSON 样本和 api_registry.json data_path (5) 更新 .md 文档响应数据路径行 # - 风险与验证: 脚本需要有效的 API_TOKEN 和网络连接;验证:运行后检查 json_refresh_audit.json 中 24/24 通过