# -*- coding: utf-8 -*- """ 全量 API JSON 刷新 + 字段分析 + MD 文档完善 + 对比报告(v2) 时间范围:2026-01-01 00:00:00 ~ 2026-02-13 00:00:00,每接口 100 条 改进点(相比 v1): - siteProfile/tableProfile 等嵌套对象:MD 中已记录为 object 则不展开子字段 - 请求参数与响应字段分开对比 - 只对比顶层业务字段 - 真正缺失的新字段才补充到 MD 用法:python scripts/full_api_refresh_v2.py """ import json import os import re import sys import time from datetime import datetime import requests # ── 配置 ────────────────────────────────────────────────────────────────── API_BASE = "https://pc.ficoo.vip/apiprod/admin/v1/" API_TOKEN = os.environ.get("API_TOKEN", "") if not API_TOKEN: env_path = os.path.join(os.path.dirname(__file__), "..", ".env") if os.path.exists(env_path): with open(env_path, "r", encoding="utf-8") as f: for line in f: line = line.strip() if line.startswith("API_TOKEN="): API_TOKEN = line.split("=", 1)[1].strip() break SITE_ID = 2790685415443269 START_TIME = "2026-01-01 00:00:00" END_TIME = "2026-02-13 00:00:00" LIMIT = 100 SAMPLES_DIR = os.path.join("docs", "api-reference", "samples") DOCS_DIR = os.path.join("docs", "api-reference") REPORT_DIR = os.path.join("docs", "reports") REGISTRY_PATH = os.path.join("docs", "api-reference", "api_registry.json") HEADERS = { "Authorization": f"Bearer {API_TOKEN}", "Content-Type": "application/json", } # 已知的嵌套对象字段名(MD 中记录为 object,不展开子字段) KNOWN_NESTED_OBJECTS = { "siteProfile", "tableProfile", "settleList", "goodsStockWarningInfo", "goodsCategoryList", } def load_registry(): with open(REGISTRY_PATH, "r", encoding="utf-8") as f: return json.load(f) def call_api(module, action, body): url = f"{API_BASE}{module}/{action}" try: resp = requests.post(url, json=body, headers=HEADERS, timeout=30) resp.raise_for_status() return resp.json() except Exception as e: print(f" ❌ 请求失败: {e}") return None def build_body(entry): body = dict(entry.get("body") or {}) if entry.get("time_range") and entry.get("time_keys"): keys = entry["time_keys"] if len(keys) >= 2: body[keys[0]] = START_TIME body[keys[1]] = END_TIME if entry.get("pagination"): body[entry["pagination"].get("page_key", "page")] = 1 body[entry["pagination"].get("limit_key", "limit")] = LIMIT return body def unwrap_records(raw_json, entry): """从原始 API 响应中提取业务记录列表""" if raw_json is None: return [] data = raw_json.get("data") if data is None: return [] table_name = entry["id"] data_path = entry.get("data_path", "") # tenant_member_balance_overview: data 本身就是汇总对象 if table_name == "tenant_member_balance_overview": if isinstance(data, dict): return [data] return [] # 按 data_path 解析 if data_path and data_path.startswith("data."): path_parts = data_path.split(".")[1:] current = data for part in path_parts: if isinstance(current, dict): current = current.get(part) else: current = None break if isinstance(current, list): return current # fallback if isinstance(data, dict): for k, v in data.items(): if isinstance(v, list) and k.lower() not in ("total",): return v if isinstance(data, list): return data return [] def get_top_level_fields(record): """只提取顶层字段名和类型(不递归展开嵌套对象)""" fields = {} if not isinstance(record, dict): return fields for k, v in record.items(): if isinstance(v, dict): fields[k] = "object" elif isinstance(v, list): fields[k] = "array" elif isinstance(v, bool): fields[k] = "boolean" elif isinstance(v, int): fields[k] = "integer" elif isinstance(v, float): fields[k] = "number" elif v is None: fields[k] = "null" else: fields[k] = "string" return fields def get_nested_fields(record, parent_key): """提取指定嵌套对象的子字段""" obj = record.get(parent_key) if not isinstance(obj, dict): return {} fields = {} for k, v in obj.items(): path = f"{parent_key}.{k}" if isinstance(v, dict): fields[path] = "object" elif isinstance(v, list): fields[path] = "array" elif isinstance(v, bool): fields[path] = "boolean" elif isinstance(v, int): fields[path] = "integer" elif isinstance(v, float): fields[path] = "number" elif v is None: fields[path] = "null" else: fields[path] = "string" return fields def select_top5_richest(records): """从所有记录中选出字段数最多的前 5 条""" if not records: return [] scored = [] for i, rec in enumerate(records): if not isinstance(rec, dict): continue field_count = len(rec) json_len = len(json.dumps(rec, ensure_ascii=False)) scored.append((field_count, json_len, i, rec)) scored.sort(key=lambda x: (x[0], x[1]), reverse=True) return [item[3] for item in scored[:5]] def collect_all_top_fields(records): """遍历所有记录,收集所有顶层字段(含类型、出现次数、示例值)""" all_fields = {} for rec in records: if not isinstance(rec, dict): continue fields = get_top_level_fields(rec) for name, typ in fields.items(): if name not in all_fields: all_fields[name] = {"type": typ, "count": 0, "example": None} all_fields[name]["count"] += 1 if all_fields[name]["example"] is None: val = rec.get(name) if val is not None and val != "" and val != 0 and not isinstance(val, (dict, list)): ex = str(val) if len(ex) > 80: ex = ex[:77] + "..." all_fields[name]["example"] = ex return all_fields def collect_nested_fields(records, parent_key): """遍历所有记录,收集指定嵌套对象的子字段""" all_fields = {} for rec in records: if not isinstance(rec, dict): continue fields = get_nested_fields(rec, parent_key) for path, typ in fields.items(): if path not in all_fields: all_fields[path] = {"type": typ, "count": 0, "example": None} all_fields[path]["count"] += 1 if all_fields[path]["example"] is None: obj = rec.get(parent_key, {}) k = path.split(".")[-1] val = obj.get(k) if isinstance(obj, dict) else None if val is not None and val != "" and val != 0 and not isinstance(val, (dict, list)): ex = str(val) if len(ex) > 80: ex = ex[:77] + "..." all_fields[path]["example"] = ex return all_fields def extract_md_response_fields(table_name): """从 MD 文档的响应字段章节提取字段名(排除请求参数)""" md_path = os.path.join(DOCS_DIR, f"{table_name}.md") if not os.path.exists(md_path): return set(), set(), "" with open(md_path, "r", encoding="utf-8") as f: content = f.read() response_fields = set() nested_fields = set() # siteProfile.xxx 等嵌套字段 field_pattern = re.compile(r'^\|\s*`([^`]+)`\s*\|', re.MULTILINE) header_fields = {"字段名", "类型", "示例值", "说明", "field", "example", "description", "type", "路径", "参数", "必填", "属性", "值"} # 找到"四、响应字段"章节的范围 in_response = False lines = content.split("\n") response_start = None response_end = len(lines) for i, line in enumerate(lines): s = line.strip() if ("## 四" in s or "## 4" in s) and "响应字段" in s: in_response = True response_start = i continue if in_response and s.startswith("## ") and "响应字段" not in s: response_end = i break if response_start is None: # 没有明确的响应字段章节,尝试从整个文档提取 for m in field_pattern.finditer(content): raw = m.group(1).strip() if raw.lower() in {h.lower() for h in header_fields}: continue if "." in raw: nested_fields.add(raw) else: response_fields.add(raw) return response_fields, nested_fields, content # 只从响应字段章节提取 response_section = "\n".join(lines[response_start:response_end]) for m in field_pattern.finditer(response_section): raw = m.group(1).strip() if raw.lower() in {h.lower() for h in header_fields}: continue if "." in raw: nested_fields.add(raw) else: response_fields.add(raw) return response_fields, nested_fields, content def compare_fields(json_fields, md_fields, md_nested_fields, table_name): """对比 JSON 字段与 MD 字段,返回缺失和多余""" json_names = set(json_fields.keys()) md_names = set(md_fields) if isinstance(md_fields, set) else set(md_fields) # JSON 有但 MD 没有的顶层字段 missing_in_md = [] for name in sorted(json_names - md_names): # 跳过已知嵌套对象(如果 MD 中已记录为 object) if name in KNOWN_NESTED_OBJECTS and name in md_names: continue info = json_fields[name] missing_in_md.append((name, info)) # MD 有但 JSON 没有的字段 extra_in_md = sorted(md_names - json_names) return missing_in_md, extra_in_md def save_top5_sample(table_name, top5): """保存前 5 条最全记录作为 JSON 样本""" sample_path = os.path.join(SAMPLES_DIR, f"{table_name}.json") with open(sample_path, "w", encoding="utf-8") as f: json.dump(top5, f, ensure_ascii=False, indent=2) return sample_path def update_md_with_missing_fields(table_name, missing_fields, md_content): """将真正缺失的字段补充到 MD 文档的响应字段章节末尾""" if not missing_fields: return False md_path = os.path.join(DOCS_DIR, f"{table_name}.md") if not os.path.exists(md_path): return False lines = md_content.split("\n") # 找到响应字段章节的最后一个表格行 insert_idx = None in_response = False last_table_row = None for i, line in enumerate(lines): s = line.strip() if ("## 四" in s or "## 4" in s) and "响应字段" in s: in_response = True continue if in_response and s.startswith("## ") and "响应字段" not in s: insert_idx = last_table_row break if in_response and s.startswith("|") and "---" not in s: # 检查是否是表头行 if not any(h in s for h in ["字段名", "字段", "类型", "说明"]): last_table_row = i elif last_table_row is None: last_table_row = i if insert_idx is None and last_table_row is not None: insert_idx = last_table_row if insert_idx is None: return False new_rows = [] for name, info in missing_fields: typ = info["type"] example = info["example"] or "" count = info["count"] new_rows.append( f"| `{name}` | {typ} | {example} | " f"(新发现字段,{count}/{LIMIT} 条记录中出现) |" ) for row in reversed(new_rows): lines.insert(insert_idx + 1, row) with open(md_path, "w", encoding="utf-8") as f: f.write("\n".join(lines)) return True def generate_report(results): """生成最终的 JSON vs MD 对比报告""" lines = [] lines.append("# API JSON 字段 vs MD 文档对比报告") lines.append("") lines.append(f"生成时间:{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} (Asia/Shanghai)") lines.append(f"数据范围:{START_TIME} ~ {END_TIME}") lines.append(f"每接口获取:{LIMIT} 条") lines.append("") # 汇总 ok = sum(1 for r in results if r["status"] == "ok") gap = sum(1 for r in results if r["status"] == "gap") skip = sum(1 for r in results if r["status"] == "skipped") err = sum(1 for r in results if r["status"] == "error") lines.append("## 汇总") lines.append("") lines.append("| 状态 | 数量 |") lines.append("|------|------|") lines.append(f"| ✅ 完全一致 | {ok} |") lines.append(f"| ⚠️ 有新字段(已补充) | {gap} |") lines.append(f"| ⏭️ 跳过 | {skip} |") lines.append(f"| 💥 错误 | {err} |") lines.append(f"| 合计 | {len(results)} |") lines.append("") # 各接口详情 lines.append("## 各接口详情") lines.append("") for r in results: icon = {"ok": "✅", "gap": "⚠️", "skipped": "⏭️", "error": "💥"}.get(r["status"], "❓") lines.append(f"### {r['table']} ({r.get('name_zh', '')})") lines.append("") lines.append(f"| 项目 | 值 |") lines.append(f"|------|-----|") lines.append(f"| 状态 | {icon} {r['status']} |") lines.append(f"| 获取记录数 | {r['record_count']} |") lines.append(f"| JSON 顶层字段数 | {r['json_field_count']} |") lines.append(f"| MD 响应字段数 | {r['md_field_count']} |") lines.append(f"| 数据路径 | `{r.get('data_path', 'N/A')}` |") if r.get("top5_field_counts"): lines.append(f"| 前5条最全记录字段数 | {r['top5_field_counts']} |") lines.append("") if r.get("missing_in_md"): lines.append("新发现字段(已补充到 MD):") lines.append("") lines.append("| 字段名 | 类型 | 示例 | 出现次数 |") lines.append("|--------|------|------|----------|") for name, info in r["missing_in_md"]: lines.append(f"| `{name}` | {info['type']} | {info.get('example', '')} | {info['count']} |") lines.append("") if r.get("extra_in_md"): lines.append(f"MD 中有但本次 JSON 未出现的字段(可能为条件性字段):`{'`, `'.join(r['extra_in_md'])}`") lines.append("") # 嵌套对象子字段汇总 if r.get("nested_summary"): for parent, count in r["nested_summary"].items(): lines.append(f"嵌套对象 `{parent}` 含 {count} 个子字段(MD 中已记录为 object,不逐字段展开)") lines.append("") # 附录:siteProfile 通用字段参考 lines.append("## 附录:siteProfile 通用字段参考") lines.append("") lines.append("以下字段在大多数接口的 `siteProfile` 嵌套对象中出现,为门店信息快照(冗余),各接口结构一致:") lines.append("") lines.append("| 字段 | 类型 | 说明 |") lines.append("|------|------|------|") lines.append("| `id` | integer | 门店 ID |") lines.append("| `org_id` | integer | 组织 ID |") lines.append("| `shop_name` | string | 门店名称 |") lines.append("| `avatar` | string | 门店头像 URL |") lines.append("| `business_tel` | string | 门店电话 |") lines.append("| `full_address` | string | 完整地址 |") lines.append("| `address` | string | 简短地址 |") lines.append("| `longitude` | number | 经度 |") lines.append("| `latitude` | number | 纬度 |") lines.append("| `tenant_site_region_id` | integer | 区域 ID |") lines.append("| `tenant_id` | integer | 租户 ID |") lines.append("| `auto_light` | integer | 自动开灯 |") lines.append("| `attendance_distance` | integer | 考勤距离 |") lines.append("| `attendance_enabled` | integer | 考勤启用 |") lines.append("| `wifi_name` | string | WiFi 名称 |") lines.append("| `wifi_password` | string | WiFi 密码 |") lines.append("| `customer_service_qrcode` | string | 客服二维码 |") lines.append("| `customer_service_wechat` | string | 客服微信 |") lines.append("| `fixed_pay_qrCode` | string | 固定支付二维码 |") lines.append("| `prod_env` | integer | 生产环境标识 |") lines.append("| `light_status` | integer | 灯光状态 |") lines.append("| `light_type` | integer | 灯光类型 |") lines.append("| `light_token` | string | 灯光控制 token |") lines.append("| `site_type` | integer | 门店类型 |") lines.append("| `site_label` | string | 门店标签 |") lines.append("| `shop_status` | integer | 门店状态 |") lines.append("") return "\n".join(lines) def main(): registry = load_registry() print(f"加载 API 注册表: {len(registry)} 个端点") print(f"时间范围: {START_TIME} ~ {END_TIME}") print(f"每接口获取: {LIMIT} 条") print("=" * 80) results = [] for entry in registry: table_name = entry["id"] name_zh = entry.get("name_zh", "") module = entry["module"] action = entry["action"] skip = entry.get("skip", False) print(f"\n{'─' * 60}") print(f"[{table_name}] {name_zh} — {module}/{action}") if skip: print(" ⏭️ 跳过") results.append({ "table": table_name, "name_zh": name_zh, "status": "skipped", "record_count": 0, "json_field_count": 0, "md_field_count": 0, "data_path": entry.get("data_path"), }) continue # 使用已有的 raw JSON(上一步已获取) raw_path = os.path.join(SAMPLES_DIR, f"{table_name}_raw.json") if os.path.exists(raw_path): with open(raw_path, "r", encoding="utf-8") as f: raw = json.load(f) print(f" 使用已缓存的原始响应") else: body = build_body(entry) print(f" 请求: POST {module}/{action}") raw = call_api(module, action, body) if raw: with open(raw_path, "w", encoding="utf-8") as f: json.dump(raw, f, ensure_ascii=False, indent=2) if raw is None: results.append({ "table": table_name, "name_zh": name_zh, "status": "error", "record_count": 0, "json_field_count": 0, "md_field_count": 0, "data_path": entry.get("data_path"), }) continue records = unwrap_records(raw, entry) print(f" 记录数: {len(records)}") if not records: results.append({ "table": table_name, "name_zh": name_zh, "status": "ok", "record_count": 0, "json_field_count": 0, "md_field_count": 0, "data_path": entry.get("data_path"), }) continue # 选出字段最全的前 5 条 top5 = select_top5_richest(records) top5_counts = [len(r) for r in top5] print(f" 前 5 条最全记录顶层字段数: {top5_counts}") # 保存前 5 条样本 save_top5_sample(table_name, top5) # 收集所有顶层字段 json_fields = collect_all_top_fields(records) print(f" JSON 顶层字段数: {len(json_fields)}") # 收集嵌套对象子字段(仅用于报告,不用于对比) nested_summary = {} for name, info in json_fields.items(): if info["type"] == "object" and name in KNOWN_NESTED_OBJECTS: nested = collect_nested_fields(records, name) nested_summary[name] = len(nested) # 提取 MD 响应字段 md_fields, md_nested, md_content = extract_md_response_fields(table_name) print(f" MD 响应字段数: {len(md_fields)}") # 对比 missing_in_md, extra_in_md = compare_fields(json_fields, md_fields, md_nested, table_name) # 过滤掉已知嵌套对象(MD 中已记录为 object) real_missing = [(n, i) for n, i in missing_in_md if n not in KNOWN_NESTED_OBJECTS or n not in md_fields] status = "ok" if not real_missing else "gap" if real_missing: print(f" ⚠️ 发现 {len(real_missing)} 个新字段:") for name, info in real_missing: print(f" + {name} ({info['type']}, {info['count']}次)") # 补充到 MD updated = update_md_with_missing_fields(table_name, real_missing, md_content) if updated: print(f" 📝 已补充到 MD 文档") else: print(f" ✅ 字段完全覆盖") if extra_in_md: print(f" ℹ️ MD 多 {len(extra_in_md)} 个条件性字段") results.append({ "table": table_name, "name_zh": name_zh, "status": status, "record_count": len(records), "json_field_count": len(json_fields), "md_field_count": len(md_fields), "data_path": entry.get("data_path"), "missing_in_md": real_missing, "extra_in_md": extra_in_md, "top5_field_counts": top5_counts, "nested_summary": nested_summary, }) # ── 生成报告 ── print(f"\n{'=' * 80}") print("生成对比报告...") report = generate_report(results) os.makedirs(REPORT_DIR, exist_ok=True) report_path = os.path.join(REPORT_DIR, "api_json_vs_md_report_20260214.md") with open(report_path, "w", encoding="utf-8") as f: f.write(report) print(f"报告: {report_path}") # JSON 详细结果 json_path = os.path.join(REPORT_DIR, "api_refresh_detail_20260214.json") serializable = [] for r in results: sr = dict(r) if "missing_in_md" in sr and sr["missing_in_md"]: sr["missing_in_md"] = [(n, {"type": i["type"], "count": i["count"]}) for n, i in sr["missing_in_md"]] serializable.append(sr) with open(json_path, "w", encoding="utf-8") as f: json.dump(serializable, f, ensure_ascii=False, indent=2) # 汇总 ok = sum(1 for r in results if r["status"] == "ok") gap = sum(1 for r in results if r["status"] == "gap") skip = sum(1 for r in results if r["status"] == "skipped") err = sum(1 for r in results if r["status"] == "error") print(f"\n汇总: ✅ {ok} | ⚠️ {gap} | ⏭️ {skip} | 💥 {err}") if __name__ == "__main__": main()