206 lines
7.2 KiB
Python
206 lines
7.2 KiB
Python
# -*- coding: utf-8 -*-
|
||
"""
|
||
比对 JSON 样本字段 vs API 参考文档(.md)字段。
|
||
找出 JSON 中存在但 .md 文档"四、响应字段详解"中缺失的字段。
|
||
|
||
特殊处理:
|
||
- settlement_records / recharge_settlements: 从 settleList 内层提取字段
|
||
siteProfile 子字段不提取(ODS 中存为 siteprofile jsonb 列)
|
||
- stock_goods_category_tree: 从 goodsCategoryList 内层提取字段
|
||
- 嵌套对象(siteProfile, tableProfile)作为整体字段名
|
||
"""
|
||
import json
|
||
import os
|
||
import re
|
||
import sys
|
||
|
||
SAMPLES_DIR = os.path.join("docs", "api-reference", "samples")
|
||
DOCS_DIR = os.path.join("docs", "api-reference")
|
||
|
||
# 结构包装器字段(不应出现在比对中)
|
||
WRAPPER_FIELDS = {"settleList", "siteProfile", "tableProfile",
|
||
"goodsCategoryList", "data", "code", "msg",
|
||
"settlelist", "siteprofile", "tableprofile",
|
||
"goodscategorylist"}
|
||
|
||
# 表头关键字(跳过)— 注意 "type" 不能放这里,因为有些表有 type 业务字段
|
||
CROSS_REF_HEADERS = {"字段名", "类型", "示例值", "说明", "field", "example", "description"}
|
||
|
||
|
||
def extract_json_fields(table_name: str) -> set:
|
||
"""从 JSON 样本提取所有字段名(小写)"""
|
||
path = os.path.join(SAMPLES_DIR, f"{table_name}.json")
|
||
if not os.path.exists(path):
|
||
return set()
|
||
|
||
with open(path, "r", encoding="utf-8") as f:
|
||
data = json.load(f)
|
||
|
||
# settlement_records / recharge_settlements: settleList 内层
|
||
if table_name in ("settlement_records", "recharge_settlements"):
|
||
settle = data.get("settleList", {})
|
||
if isinstance(settle, list):
|
||
settle = settle[0] if settle else {}
|
||
fields = set()
|
||
for k in settle.keys():
|
||
kl = k.lower()
|
||
if kl in {"siteprofile"}:
|
||
fields.add(kl) # 作为整体 jsonb 列
|
||
continue
|
||
fields.add(kl)
|
||
return fields
|
||
|
||
# stock_goods_category_tree: goodsCategoryList 内层
|
||
if table_name == "stock_goods_category_tree":
|
||
cat_list = data.get("goodsCategoryList", [])
|
||
if cat_list:
|
||
return {k.lower() for k in cat_list[0].keys()
|
||
if k.lower() not in WRAPPER_FIELDS}
|
||
return set()
|
||
|
||
# role_area_association: roleAreaRelations 内层
|
||
if table_name == "role_area_association":
|
||
rel_list = data.get("roleAreaRelations", [])
|
||
if rel_list:
|
||
return {k.lower() for k in rel_list[0].keys()
|
||
if k.lower() not in WRAPPER_FIELDS}
|
||
return set()
|
||
|
||
# 通用:顶层字段
|
||
fields = set()
|
||
for k in data.keys():
|
||
kl = k.lower()
|
||
if kl in WRAPPER_FIELDS:
|
||
# 嵌套对象作为整体
|
||
if kl in ("siteprofile", "tableprofile"):
|
||
fields.add(kl)
|
||
continue
|
||
fields.add(kl)
|
||
return fields
|
||
|
||
|
||
def extract_md_fields(table_name: str) -> set:
|
||
"""从 .md 文档的"四、响应字段详解"章节提取字段名(小写)"""
|
||
md_path = os.path.join(DOCS_DIR, f"{table_name}.md")
|
||
if not os.path.exists(md_path):
|
||
return set()
|
||
|
||
with open(md_path, "r", encoding="utf-8") as f:
|
||
lines = f.readlines()
|
||
|
||
fields = set()
|
||
in_section = False
|
||
in_siteprofile = False
|
||
field_pattern = re.compile(r'^\|\s*`([^`]+)`\s*\|')
|
||
siteprofile_header = re.compile(r'^###.*siteProfile', re.IGNORECASE)
|
||
|
||
for line in lines:
|
||
s = line.strip()
|
||
|
||
if s.startswith("## 四、") and "响应字段" in s:
|
||
in_section = True
|
||
in_siteprofile = False
|
||
continue
|
||
|
||
if in_section and s.startswith("## ") and not s.startswith("## 四"):
|
||
break
|
||
|
||
if not in_section:
|
||
continue
|
||
|
||
# siteProfile 子章节处理
|
||
if table_name in ("settlement_records", "recharge_settlements"):
|
||
if siteprofile_header.search(s):
|
||
in_siteprofile = True
|
||
continue
|
||
if s.startswith("### ") and in_siteprofile:
|
||
if not siteprofile_header.search(s):
|
||
in_siteprofile = False
|
||
|
||
m = field_pattern.match(s)
|
||
if m:
|
||
raw = m.group(1).strip()
|
||
if raw.lower() in {h.lower() for h in CROSS_REF_HEADERS}:
|
||
continue
|
||
if table_name in ("settlement_records", "recharge_settlements"):
|
||
if in_siteprofile:
|
||
continue
|
||
if raw.startswith("siteProfile."):
|
||
continue
|
||
if raw.lower() in WRAPPER_FIELDS and raw.lower() not in ("siteprofile", "tableprofile"):
|
||
continue
|
||
fields.add(raw.lower())
|
||
|
||
return fields
|
||
|
||
|
||
def main():
|
||
samples = sorted([
|
||
f.replace(".json", "")
|
||
for f in os.listdir(SAMPLES_DIR)
|
||
if f.endswith(".json")
|
||
])
|
||
|
||
results = []
|
||
for table in samples:
|
||
json_fields = extract_json_fields(table)
|
||
md_fields = extract_md_fields(table)
|
||
|
||
# JSON 中有但 .md 中没有的
|
||
json_only = json_fields - md_fields
|
||
# .md 中有但 JSON 中没有的(可能是条件性字段,仅供参考)
|
||
md_only = md_fields - json_fields
|
||
|
||
results.append({
|
||
"table": table,
|
||
"json_count": len(json_fields),
|
||
"md_count": len(md_fields),
|
||
"json_only": sorted(json_only),
|
||
"md_only": sorted(md_only),
|
||
})
|
||
|
||
# 输出
|
||
print("=" * 80)
|
||
print("JSON 样本 vs .md 文档 字段比对报告")
|
||
print("=" * 80)
|
||
|
||
issues = 0
|
||
for r in results:
|
||
if r["json_only"]:
|
||
issues += 1
|
||
print(f"\n❌ {r['table']} — JSON={r['json_count']}, MD={r['md_count']}")
|
||
print(f" JSON 中有但 .md 缺失 ({len(r['json_only'])} 个):")
|
||
for f in r["json_only"]:
|
||
print(f" - {f}")
|
||
if r["md_only"]:
|
||
print(f" .md 中有但 JSON 无 ({len(r['md_only'])} 个,可能是条件性字段):")
|
||
for f in r["md_only"]:
|
||
print(f" - {f}")
|
||
else:
|
||
status = "✅" if not r["md_only"] else "⚠️"
|
||
extra = ""
|
||
if r["md_only"]:
|
||
extra = f" (.md 多 {len(r['md_only'])} 个条件性字段)"
|
||
print(f"\n{status} {r['table']} — JSON={r['json_count']}, MD={r['md_count']}{extra}")
|
||
|
||
print(f"\n{'=' * 80}")
|
||
print(f"总计: {len(results)} 个表, {issues} 个有 JSON→MD 缺失")
|
||
|
||
# 输出 JSON 格式供后续处理
|
||
out_path = os.path.join("docs", "reports", "json_vs_md_gaps.json")
|
||
with open(out_path, "w", encoding="utf-8") as f:
|
||
json.dump(results, f, ensure_ascii=False, indent=2)
|
||
print(f"\n详细结果已写入: {out_path}")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|
||
|
||
# AI_CHANGELOG:
|
||
# - 日期: 2026-02-14
|
||
# - Prompt: P20260214-044500 — "md文档和json数据不对应!全面排查"
|
||
# - 直接原因: 用户要求全面排查 JSON 样本与 .md 文档的字段一致性
|
||
# - 变更摘要: 新建脚本,从 JSON 样本提取字段与 .md 文档"响应字段详解"章节比对;
|
||
# 修复 3 个 bug(type 过滤、siteProfile/tableProfile 例外、roleAreaRelations 包装器)
|
||
# - 风险与验证: 纯分析脚本,无运行时影响;运行 `python scripts/check_json_vs_md.py` 验证输出
|