init: 项目初始提交 - NeoZQYY Monorepo 完整代码
This commit is contained in:
634
apps/etl/pipelines/feiqiu/scripts/full_api_refresh_v2.py
Normal file
634
apps/etl/pipelines/feiqiu/scripts/full_api_refresh_v2.py
Normal file
@@ -0,0 +1,634 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
全量 API JSON 刷新 + 字段分析 + MD 文档完善 + 对比报告(v2)
|
||||
时间范围:2026-01-01 00:00:00 ~ 2026-02-13 00:00:00,每接口 100 条
|
||||
|
||||
改进点(相比 v1):
|
||||
- siteProfile/tableProfile 等嵌套对象:MD 中已记录为 object 则不展开子字段
|
||||
- 请求参数与响应字段分开对比
|
||||
- 只对比顶层业务字段
|
||||
- 真正缺失的新字段才补充到 MD
|
||||
|
||||
用法:python scripts/full_api_refresh_v2.py
|
||||
"""
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
from datetime import datetime
|
||||
|
||||
import requests
|
||||
|
||||
# ── 配置 ──────────────────────────────────────────────────────────────────
|
||||
API_BASE = "https://pc.ficoo.vip/apiprod/admin/v1/"
|
||||
API_TOKEN = os.environ.get("API_TOKEN", "")
|
||||
if not API_TOKEN:
|
||||
env_path = os.path.join(os.path.dirname(__file__), "..", ".env")
|
||||
if os.path.exists(env_path):
|
||||
with open(env_path, "r", encoding="utf-8") as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line.startswith("API_TOKEN="):
|
||||
API_TOKEN = line.split("=", 1)[1].strip()
|
||||
break
|
||||
|
||||
SITE_ID = 2790685415443269
|
||||
START_TIME = "2026-01-01 00:00:00"
|
||||
END_TIME = "2026-02-13 00:00:00"
|
||||
LIMIT = 100
|
||||
|
||||
SAMPLES_DIR = os.path.join("docs", "api-reference", "samples")
|
||||
DOCS_DIR = os.path.join("docs", "api-reference")
|
||||
REPORT_DIR = os.path.join("docs", "reports")
|
||||
REGISTRY_PATH = os.path.join("docs", "api-reference", "api_registry.json")
|
||||
|
||||
HEADERS = {
|
||||
"Authorization": f"Bearer {API_TOKEN}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
# 已知的嵌套对象字段名(MD 中记录为 object,不展开子字段)
|
||||
KNOWN_NESTED_OBJECTS = {
|
||||
"siteProfile", "tableProfile", "settleList",
|
||||
"goodsStockWarningInfo", "goodsCategoryList",
|
||||
}
|
||||
|
||||
|
||||
def load_registry():
|
||||
with open(REGISTRY_PATH, "r", encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
def call_api(module, action, body):
|
||||
url = f"{API_BASE}{module}/{action}"
|
||||
try:
|
||||
resp = requests.post(url, json=body, headers=HEADERS, timeout=30)
|
||||
resp.raise_for_status()
|
||||
return resp.json()
|
||||
except Exception as e:
|
||||
print(f" ❌ 请求失败: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def build_body(entry):
|
||||
body = dict(entry.get("body") or {})
|
||||
if entry.get("time_range") and entry.get("time_keys"):
|
||||
keys = entry["time_keys"]
|
||||
if len(keys) >= 2:
|
||||
body[keys[0]] = START_TIME
|
||||
body[keys[1]] = END_TIME
|
||||
if entry.get("pagination"):
|
||||
body[entry["pagination"].get("page_key", "page")] = 1
|
||||
body[entry["pagination"].get("limit_key", "limit")] = LIMIT
|
||||
return body
|
||||
|
||||
|
||||
def unwrap_records(raw_json, entry):
|
||||
"""从原始 API 响应中提取业务记录列表"""
|
||||
if raw_json is None:
|
||||
return []
|
||||
data = raw_json.get("data")
|
||||
if data is None:
|
||||
return []
|
||||
|
||||
table_name = entry["id"]
|
||||
data_path = entry.get("data_path", "")
|
||||
|
||||
# tenant_member_balance_overview: data 本身就是汇总对象
|
||||
if table_name == "tenant_member_balance_overview":
|
||||
if isinstance(data, dict):
|
||||
return [data]
|
||||
return []
|
||||
|
||||
# 按 data_path 解析
|
||||
if data_path and data_path.startswith("data."):
|
||||
path_parts = data_path.split(".")[1:]
|
||||
current = data
|
||||
for part in path_parts:
|
||||
if isinstance(current, dict):
|
||||
current = current.get(part)
|
||||
else:
|
||||
current = None
|
||||
break
|
||||
if isinstance(current, list):
|
||||
return current
|
||||
|
||||
# fallback
|
||||
if isinstance(data, dict):
|
||||
for k, v in data.items():
|
||||
if isinstance(v, list) and k.lower() not in ("total",):
|
||||
return v
|
||||
if isinstance(data, list):
|
||||
return data
|
||||
return []
|
||||
|
||||
|
||||
|
||||
def get_top_level_fields(record):
|
||||
"""只提取顶层字段名和类型(不递归展开嵌套对象)"""
|
||||
fields = {}
|
||||
if not isinstance(record, dict):
|
||||
return fields
|
||||
for k, v in record.items():
|
||||
if isinstance(v, dict):
|
||||
fields[k] = "object"
|
||||
elif isinstance(v, list):
|
||||
fields[k] = "array"
|
||||
elif isinstance(v, bool):
|
||||
fields[k] = "boolean"
|
||||
elif isinstance(v, int):
|
||||
fields[k] = "integer"
|
||||
elif isinstance(v, float):
|
||||
fields[k] = "number"
|
||||
elif v is None:
|
||||
fields[k] = "null"
|
||||
else:
|
||||
fields[k] = "string"
|
||||
return fields
|
||||
|
||||
|
||||
def get_nested_fields(record, parent_key):
|
||||
"""提取指定嵌套对象的子字段"""
|
||||
obj = record.get(parent_key)
|
||||
if not isinstance(obj, dict):
|
||||
return {}
|
||||
fields = {}
|
||||
for k, v in obj.items():
|
||||
path = f"{parent_key}.{k}"
|
||||
if isinstance(v, dict):
|
||||
fields[path] = "object"
|
||||
elif isinstance(v, list):
|
||||
fields[path] = "array"
|
||||
elif isinstance(v, bool):
|
||||
fields[path] = "boolean"
|
||||
elif isinstance(v, int):
|
||||
fields[path] = "integer"
|
||||
elif isinstance(v, float):
|
||||
fields[path] = "number"
|
||||
elif v is None:
|
||||
fields[path] = "null"
|
||||
else:
|
||||
fields[path] = "string"
|
||||
return fields
|
||||
|
||||
|
||||
def select_top5_richest(records):
|
||||
"""从所有记录中选出字段数最多的前 5 条"""
|
||||
if not records:
|
||||
return []
|
||||
scored = []
|
||||
for i, rec in enumerate(records):
|
||||
if not isinstance(rec, dict):
|
||||
continue
|
||||
field_count = len(rec)
|
||||
json_len = len(json.dumps(rec, ensure_ascii=False))
|
||||
scored.append((field_count, json_len, i, rec))
|
||||
scored.sort(key=lambda x: (x[0], x[1]), reverse=True)
|
||||
return [item[3] for item in scored[:5]]
|
||||
|
||||
|
||||
def collect_all_top_fields(records):
|
||||
"""遍历所有记录,收集所有顶层字段(含类型、出现次数、示例值)"""
|
||||
all_fields = {}
|
||||
for rec in records:
|
||||
if not isinstance(rec, dict):
|
||||
continue
|
||||
fields = get_top_level_fields(rec)
|
||||
for name, typ in fields.items():
|
||||
if name not in all_fields:
|
||||
all_fields[name] = {"type": typ, "count": 0, "example": None}
|
||||
all_fields[name]["count"] += 1
|
||||
if all_fields[name]["example"] is None:
|
||||
val = rec.get(name)
|
||||
if val is not None and val != "" and val != 0 and not isinstance(val, (dict, list)):
|
||||
ex = str(val)
|
||||
if len(ex) > 80:
|
||||
ex = ex[:77] + "..."
|
||||
all_fields[name]["example"] = ex
|
||||
return all_fields
|
||||
|
||||
|
||||
def collect_nested_fields(records, parent_key):
|
||||
"""遍历所有记录,收集指定嵌套对象的子字段"""
|
||||
all_fields = {}
|
||||
for rec in records:
|
||||
if not isinstance(rec, dict):
|
||||
continue
|
||||
fields = get_nested_fields(rec, parent_key)
|
||||
for path, typ in fields.items():
|
||||
if path not in all_fields:
|
||||
all_fields[path] = {"type": typ, "count": 0, "example": None}
|
||||
all_fields[path]["count"] += 1
|
||||
if all_fields[path]["example"] is None:
|
||||
obj = rec.get(parent_key, {})
|
||||
k = path.split(".")[-1]
|
||||
val = obj.get(k) if isinstance(obj, dict) else None
|
||||
if val is not None and val != "" and val != 0 and not isinstance(val, (dict, list)):
|
||||
ex = str(val)
|
||||
if len(ex) > 80:
|
||||
ex = ex[:77] + "..."
|
||||
all_fields[path]["example"] = ex
|
||||
return all_fields
|
||||
|
||||
|
||||
def extract_md_response_fields(table_name):
|
||||
"""从 MD 文档的响应字段章节提取字段名(排除请求参数)"""
|
||||
md_path = os.path.join(DOCS_DIR, f"{table_name}.md")
|
||||
if not os.path.exists(md_path):
|
||||
return set(), set(), ""
|
||||
|
||||
with open(md_path, "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
|
||||
response_fields = set()
|
||||
nested_fields = set() # siteProfile.xxx 等嵌套字段
|
||||
field_pattern = re.compile(r'^\|\s*`([^`]+)`\s*\|', re.MULTILINE)
|
||||
header_fields = {"字段名", "类型", "示例值", "说明", "field", "example",
|
||||
"description", "type", "路径", "参数", "必填", "属性", "值"}
|
||||
|
||||
# 找到"四、响应字段"章节的范围
|
||||
in_response = False
|
||||
lines = content.split("\n")
|
||||
response_start = None
|
||||
response_end = len(lines)
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
s = line.strip()
|
||||
if ("## 四" in s or "## 4" in s) and "响应字段" in s:
|
||||
in_response = True
|
||||
response_start = i
|
||||
continue
|
||||
if in_response and s.startswith("## ") and "响应字段" not in s:
|
||||
response_end = i
|
||||
break
|
||||
|
||||
if response_start is None:
|
||||
# 没有明确的响应字段章节,尝试从整个文档提取
|
||||
for m in field_pattern.finditer(content):
|
||||
raw = m.group(1).strip()
|
||||
if raw.lower() in {h.lower() for h in header_fields}:
|
||||
continue
|
||||
if "." in raw:
|
||||
nested_fields.add(raw)
|
||||
else:
|
||||
response_fields.add(raw)
|
||||
return response_fields, nested_fields, content
|
||||
|
||||
# 只从响应字段章节提取
|
||||
response_section = "\n".join(lines[response_start:response_end])
|
||||
for m in field_pattern.finditer(response_section):
|
||||
raw = m.group(1).strip()
|
||||
if raw.lower() in {h.lower() for h in header_fields}:
|
||||
continue
|
||||
if "." in raw:
|
||||
nested_fields.add(raw)
|
||||
else:
|
||||
response_fields.add(raw)
|
||||
|
||||
return response_fields, nested_fields, content
|
||||
|
||||
|
||||
def compare_fields(json_fields, md_fields, md_nested_fields, table_name):
|
||||
"""对比 JSON 字段与 MD 字段,返回缺失和多余"""
|
||||
json_names = set(json_fields.keys())
|
||||
md_names = set(md_fields) if isinstance(md_fields, set) else set(md_fields)
|
||||
|
||||
# JSON 有但 MD 没有的顶层字段
|
||||
missing_in_md = []
|
||||
for name in sorted(json_names - md_names):
|
||||
# 跳过已知嵌套对象(如果 MD 中已记录为 object)
|
||||
if name in KNOWN_NESTED_OBJECTS and name in md_names:
|
||||
continue
|
||||
info = json_fields[name]
|
||||
missing_in_md.append((name, info))
|
||||
|
||||
# MD 有但 JSON 没有的字段
|
||||
extra_in_md = sorted(md_names - json_names)
|
||||
|
||||
return missing_in_md, extra_in_md
|
||||
|
||||
|
||||
def save_top5_sample(table_name, top5):
|
||||
"""保存前 5 条最全记录作为 JSON 样本"""
|
||||
sample_path = os.path.join(SAMPLES_DIR, f"{table_name}.json")
|
||||
with open(sample_path, "w", encoding="utf-8") as f:
|
||||
json.dump(top5, f, ensure_ascii=False, indent=2)
|
||||
return sample_path
|
||||
|
||||
|
||||
|
||||
def update_md_with_missing_fields(table_name, missing_fields, md_content):
|
||||
"""将真正缺失的字段补充到 MD 文档的响应字段章节末尾"""
|
||||
if not missing_fields:
|
||||
return False
|
||||
|
||||
md_path = os.path.join(DOCS_DIR, f"{table_name}.md")
|
||||
if not os.path.exists(md_path):
|
||||
return False
|
||||
|
||||
lines = md_content.split("\n")
|
||||
|
||||
# 找到响应字段章节的最后一个表格行
|
||||
insert_idx = None
|
||||
in_response = False
|
||||
last_table_row = None
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
s = line.strip()
|
||||
if ("## 四" in s or "## 4" in s) and "响应字段" in s:
|
||||
in_response = True
|
||||
continue
|
||||
if in_response and s.startswith("## ") and "响应字段" not in s:
|
||||
insert_idx = last_table_row
|
||||
break
|
||||
if in_response and s.startswith("|") and "---" not in s:
|
||||
# 检查是否是表头行
|
||||
if not any(h in s for h in ["字段名", "字段", "类型", "说明"]):
|
||||
last_table_row = i
|
||||
elif last_table_row is None:
|
||||
last_table_row = i
|
||||
|
||||
if insert_idx is None and last_table_row is not None:
|
||||
insert_idx = last_table_row
|
||||
|
||||
if insert_idx is None:
|
||||
return False
|
||||
|
||||
new_rows = []
|
||||
for name, info in missing_fields:
|
||||
typ = info["type"]
|
||||
example = info["example"] or ""
|
||||
count = info["count"]
|
||||
new_rows.append(
|
||||
f"| `{name}` | {typ} | {example} | "
|
||||
f"(新发现字段,{count}/{LIMIT} 条记录中出现) |"
|
||||
)
|
||||
|
||||
for row in reversed(new_rows):
|
||||
lines.insert(insert_idx + 1, row)
|
||||
|
||||
with open(md_path, "w", encoding="utf-8") as f:
|
||||
f.write("\n".join(lines))
|
||||
return True
|
||||
|
||||
|
||||
def generate_report(results):
|
||||
"""生成最终的 JSON vs MD 对比报告"""
|
||||
lines = []
|
||||
lines.append("# API JSON 字段 vs MD 文档对比报告")
|
||||
lines.append("")
|
||||
lines.append(f"生成时间:{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} (Asia/Shanghai)")
|
||||
lines.append(f"数据范围:{START_TIME} ~ {END_TIME}")
|
||||
lines.append(f"每接口获取:{LIMIT} 条")
|
||||
lines.append("")
|
||||
|
||||
# 汇总
|
||||
ok = sum(1 for r in results if r["status"] == "ok")
|
||||
gap = sum(1 for r in results if r["status"] == "gap")
|
||||
skip = sum(1 for r in results if r["status"] == "skipped")
|
||||
err = sum(1 for r in results if r["status"] == "error")
|
||||
|
||||
lines.append("## 汇总")
|
||||
lines.append("")
|
||||
lines.append("| 状态 | 数量 |")
|
||||
lines.append("|------|------|")
|
||||
lines.append(f"| ✅ 完全一致 | {ok} |")
|
||||
lines.append(f"| ⚠️ 有新字段(已补充) | {gap} |")
|
||||
lines.append(f"| ⏭️ 跳过 | {skip} |")
|
||||
lines.append(f"| 💥 错误 | {err} |")
|
||||
lines.append(f"| 合计 | {len(results)} |")
|
||||
lines.append("")
|
||||
|
||||
# 各接口详情
|
||||
lines.append("## 各接口详情")
|
||||
lines.append("")
|
||||
|
||||
for r in results:
|
||||
icon = {"ok": "✅", "gap": "⚠️", "skipped": "⏭️", "error": "💥"}.get(r["status"], "❓")
|
||||
lines.append(f"### {r['table']} ({r.get('name_zh', '')})")
|
||||
lines.append("")
|
||||
lines.append(f"| 项目 | 值 |")
|
||||
lines.append(f"|------|-----|")
|
||||
lines.append(f"| 状态 | {icon} {r['status']} |")
|
||||
lines.append(f"| 获取记录数 | {r['record_count']} |")
|
||||
lines.append(f"| JSON 顶层字段数 | {r['json_field_count']} |")
|
||||
lines.append(f"| MD 响应字段数 | {r['md_field_count']} |")
|
||||
lines.append(f"| 数据路径 | `{r.get('data_path', 'N/A')}` |")
|
||||
if r.get("top5_field_counts"):
|
||||
lines.append(f"| 前5条最全记录字段数 | {r['top5_field_counts']} |")
|
||||
lines.append("")
|
||||
|
||||
if r.get("missing_in_md"):
|
||||
lines.append("新发现字段(已补充到 MD):")
|
||||
lines.append("")
|
||||
lines.append("| 字段名 | 类型 | 示例 | 出现次数 |")
|
||||
lines.append("|--------|------|------|----------|")
|
||||
for name, info in r["missing_in_md"]:
|
||||
lines.append(f"| `{name}` | {info['type']} | {info.get('example', '')} | {info['count']} |")
|
||||
lines.append("")
|
||||
|
||||
if r.get("extra_in_md"):
|
||||
lines.append(f"MD 中有但本次 JSON 未出现的字段(可能为条件性字段):`{'`, `'.join(r['extra_in_md'])}`")
|
||||
lines.append("")
|
||||
|
||||
# 嵌套对象子字段汇总
|
||||
if r.get("nested_summary"):
|
||||
for parent, count in r["nested_summary"].items():
|
||||
lines.append(f"嵌套对象 `{parent}` 含 {count} 个子字段(MD 中已记录为 object,不逐字段展开)")
|
||||
lines.append("")
|
||||
|
||||
# 附录:siteProfile 通用字段参考
|
||||
lines.append("## 附录:siteProfile 通用字段参考")
|
||||
lines.append("")
|
||||
lines.append("以下字段在大多数接口的 `siteProfile` 嵌套对象中出现,为门店信息快照(冗余),各接口结构一致:")
|
||||
lines.append("")
|
||||
lines.append("| 字段 | 类型 | 说明 |")
|
||||
lines.append("|------|------|------|")
|
||||
lines.append("| `id` | integer | 门店 ID |")
|
||||
lines.append("| `org_id` | integer | 组织 ID |")
|
||||
lines.append("| `shop_name` | string | 门店名称 |")
|
||||
lines.append("| `avatar` | string | 门店头像 URL |")
|
||||
lines.append("| `business_tel` | string | 门店电话 |")
|
||||
lines.append("| `full_address` | string | 完整地址 |")
|
||||
lines.append("| `address` | string | 简短地址 |")
|
||||
lines.append("| `longitude` | number | 经度 |")
|
||||
lines.append("| `latitude` | number | 纬度 |")
|
||||
lines.append("| `tenant_site_region_id` | integer | 区域 ID |")
|
||||
lines.append("| `tenant_id` | integer | 租户 ID |")
|
||||
lines.append("| `auto_light` | integer | 自动开灯 |")
|
||||
lines.append("| `attendance_distance` | integer | 考勤距离 |")
|
||||
lines.append("| `attendance_enabled` | integer | 考勤启用 |")
|
||||
lines.append("| `wifi_name` | string | WiFi 名称 |")
|
||||
lines.append("| `wifi_password` | string | WiFi 密码 |")
|
||||
lines.append("| `customer_service_qrcode` | string | 客服二维码 |")
|
||||
lines.append("| `customer_service_wechat` | string | 客服微信 |")
|
||||
lines.append("| `fixed_pay_qrCode` | string | 固定支付二维码 |")
|
||||
lines.append("| `prod_env` | integer | 生产环境标识 |")
|
||||
lines.append("| `light_status` | integer | 灯光状态 |")
|
||||
lines.append("| `light_type` | integer | 灯光类型 |")
|
||||
lines.append("| `light_token` | string | 灯光控制 token |")
|
||||
lines.append("| `site_type` | integer | 门店类型 |")
|
||||
lines.append("| `site_label` | string | 门店标签 |")
|
||||
lines.append("| `shop_status` | integer | 门店状态 |")
|
||||
lines.append("")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def main():
|
||||
registry = load_registry()
|
||||
print(f"加载 API 注册表: {len(registry)} 个端点")
|
||||
print(f"时间范围: {START_TIME} ~ {END_TIME}")
|
||||
print(f"每接口获取: {LIMIT} 条")
|
||||
print("=" * 80)
|
||||
|
||||
results = []
|
||||
|
||||
for entry in registry:
|
||||
table_name = entry["id"]
|
||||
name_zh = entry.get("name_zh", "")
|
||||
module = entry["module"]
|
||||
action = entry["action"]
|
||||
skip = entry.get("skip", False)
|
||||
|
||||
print(f"\n{'─' * 60}")
|
||||
print(f"[{table_name}] {name_zh} — {module}/{action}")
|
||||
|
||||
if skip:
|
||||
print(" ⏭️ 跳过")
|
||||
results.append({
|
||||
"table": table_name, "name_zh": name_zh,
|
||||
"status": "skipped", "record_count": 0,
|
||||
"json_field_count": 0, "md_field_count": 0,
|
||||
"data_path": entry.get("data_path"),
|
||||
})
|
||||
continue
|
||||
|
||||
# 使用已有的 raw JSON(上一步已获取)
|
||||
raw_path = os.path.join(SAMPLES_DIR, f"{table_name}_raw.json")
|
||||
if os.path.exists(raw_path):
|
||||
with open(raw_path, "r", encoding="utf-8") as f:
|
||||
raw = json.load(f)
|
||||
print(f" 使用已缓存的原始响应")
|
||||
else:
|
||||
body = build_body(entry)
|
||||
print(f" 请求: POST {module}/{action}")
|
||||
raw = call_api(module, action, body)
|
||||
if raw:
|
||||
with open(raw_path, "w", encoding="utf-8") as f:
|
||||
json.dump(raw, f, ensure_ascii=False, indent=2)
|
||||
|
||||
if raw is None:
|
||||
results.append({
|
||||
"table": table_name, "name_zh": name_zh,
|
||||
"status": "error", "record_count": 0,
|
||||
"json_field_count": 0, "md_field_count": 0,
|
||||
"data_path": entry.get("data_path"),
|
||||
})
|
||||
continue
|
||||
|
||||
records = unwrap_records(raw, entry)
|
||||
print(f" 记录数: {len(records)}")
|
||||
|
||||
if not records:
|
||||
results.append({
|
||||
"table": table_name, "name_zh": name_zh,
|
||||
"status": "ok", "record_count": 0,
|
||||
"json_field_count": 0, "md_field_count": 0,
|
||||
"data_path": entry.get("data_path"),
|
||||
})
|
||||
continue
|
||||
|
||||
# 选出字段最全的前 5 条
|
||||
top5 = select_top5_richest(records)
|
||||
top5_counts = [len(r) for r in top5]
|
||||
print(f" 前 5 条最全记录顶层字段数: {top5_counts}")
|
||||
|
||||
# 保存前 5 条样本
|
||||
save_top5_sample(table_name, top5)
|
||||
|
||||
# 收集所有顶层字段
|
||||
json_fields = collect_all_top_fields(records)
|
||||
print(f" JSON 顶层字段数: {len(json_fields)}")
|
||||
|
||||
# 收集嵌套对象子字段(仅用于报告,不用于对比)
|
||||
nested_summary = {}
|
||||
for name, info in json_fields.items():
|
||||
if info["type"] == "object" and name in KNOWN_NESTED_OBJECTS:
|
||||
nested = collect_nested_fields(records, name)
|
||||
nested_summary[name] = len(nested)
|
||||
|
||||
# 提取 MD 响应字段
|
||||
md_fields, md_nested, md_content = extract_md_response_fields(table_name)
|
||||
print(f" MD 响应字段数: {len(md_fields)}")
|
||||
|
||||
# 对比
|
||||
missing_in_md, extra_in_md = compare_fields(json_fields, md_fields, md_nested, table_name)
|
||||
|
||||
# 过滤掉已知嵌套对象(MD 中已记录为 object)
|
||||
real_missing = [(n, i) for n, i in missing_in_md
|
||||
if n not in KNOWN_NESTED_OBJECTS or n not in md_fields]
|
||||
|
||||
status = "ok" if not real_missing else "gap"
|
||||
|
||||
if real_missing:
|
||||
print(f" ⚠️ 发现 {len(real_missing)} 个新字段:")
|
||||
for name, info in real_missing:
|
||||
print(f" + {name} ({info['type']}, {info['count']}次)")
|
||||
# 补充到 MD
|
||||
updated = update_md_with_missing_fields(table_name, real_missing, md_content)
|
||||
if updated:
|
||||
print(f" 📝 已补充到 MD 文档")
|
||||
else:
|
||||
print(f" ✅ 字段完全覆盖")
|
||||
|
||||
if extra_in_md:
|
||||
print(f" ℹ️ MD 多 {len(extra_in_md)} 个条件性字段")
|
||||
|
||||
results.append({
|
||||
"table": table_name, "name_zh": name_zh,
|
||||
"status": status,
|
||||
"record_count": len(records),
|
||||
"json_field_count": len(json_fields),
|
||||
"md_field_count": len(md_fields),
|
||||
"data_path": entry.get("data_path"),
|
||||
"missing_in_md": real_missing,
|
||||
"extra_in_md": extra_in_md,
|
||||
"top5_field_counts": top5_counts,
|
||||
"nested_summary": nested_summary,
|
||||
})
|
||||
|
||||
# ── 生成报告 ──
|
||||
print(f"\n{'=' * 80}")
|
||||
print("生成对比报告...")
|
||||
|
||||
report = generate_report(results)
|
||||
os.makedirs(REPORT_DIR, exist_ok=True)
|
||||
report_path = os.path.join(REPORT_DIR, "api_json_vs_md_report_20260214.md")
|
||||
with open(report_path, "w", encoding="utf-8") as f:
|
||||
f.write(report)
|
||||
print(f"报告: {report_path}")
|
||||
|
||||
# JSON 详细结果
|
||||
json_path = os.path.join(REPORT_DIR, "api_refresh_detail_20260214.json")
|
||||
serializable = []
|
||||
for r in results:
|
||||
sr = dict(r)
|
||||
if "missing_in_md" in sr and sr["missing_in_md"]:
|
||||
sr["missing_in_md"] = [(n, {"type": i["type"], "count": i["count"]})
|
||||
for n, i in sr["missing_in_md"]]
|
||||
serializable.append(sr)
|
||||
with open(json_path, "w", encoding="utf-8") as f:
|
||||
json.dump(serializable, f, ensure_ascii=False, indent=2)
|
||||
|
||||
# 汇总
|
||||
ok = sum(1 for r in results if r["status"] == "ok")
|
||||
gap = sum(1 for r in results if r["status"] == "gap")
|
||||
skip = sum(1 for r in results if r["status"] == "skipped")
|
||||
err = sum(1 for r in results if r["status"] == "error")
|
||||
print(f"\n汇总: ✅ {ok} | ⚠️ {gap} | ⏭️ {skip} | 💥 {err}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user