init: 项目初始提交 - NeoZQYY Monorepo 完整代码

This commit is contained in:
Neo
2026-02-15 14:58:14 +08:00
commit ded6dfb9d8
769 changed files with 182616 additions and 0 deletions

View File

@@ -0,0 +1,205 @@
# -*- coding: utf-8 -*-
"""
比对 JSON 样本字段 vs API 参考文档(.md)字段。
找出 JSON 中存在但 .md 文档"四、响应字段详解"中缺失的字段。
特殊处理:
- settlement_records / recharge_settlements: 从 settleList 内层提取字段
siteProfile 子字段不提取ODS 中存为 siteprofile jsonb 列)
- stock_goods_category_tree: 从 goodsCategoryList 内层提取字段
- 嵌套对象siteProfile, tableProfile作为整体字段名
"""
import json
import os
import re
import sys
SAMPLES_DIR = os.path.join("docs", "api-reference", "samples")
DOCS_DIR = os.path.join("docs", "api-reference")
# 结构包装器字段(不应出现在比对中)
WRAPPER_FIELDS = {"settleList", "siteProfile", "tableProfile",
"goodsCategoryList", "data", "code", "msg",
"settlelist", "siteprofile", "tableprofile",
"goodscategorylist"}
# 表头关键字(跳过)— 注意 "type" 不能放这里,因为有些表有 type 业务字段
CROSS_REF_HEADERS = {"字段名", "类型", "示例值", "说明", "field", "example", "description"}
def extract_json_fields(table_name: str) -> set:
"""从 JSON 样本提取所有字段名(小写)"""
path = os.path.join(SAMPLES_DIR, f"{table_name}.json")
if not os.path.exists(path):
return set()
with open(path, "r", encoding="utf-8") as f:
data = json.load(f)
# settlement_records / recharge_settlements: settleList 内层
if table_name in ("settlement_records", "recharge_settlements"):
settle = data.get("settleList", {})
if isinstance(settle, list):
settle = settle[0] if settle else {}
fields = set()
for k in settle.keys():
kl = k.lower()
if kl in {"siteprofile"}:
fields.add(kl) # 作为整体 jsonb 列
continue
fields.add(kl)
return fields
# stock_goods_category_tree: goodsCategoryList 内层
if table_name == "stock_goods_category_tree":
cat_list = data.get("goodsCategoryList", [])
if cat_list:
return {k.lower() for k in cat_list[0].keys()
if k.lower() not in WRAPPER_FIELDS}
return set()
# role_area_association: roleAreaRelations 内层
if table_name == "role_area_association":
rel_list = data.get("roleAreaRelations", [])
if rel_list:
return {k.lower() for k in rel_list[0].keys()
if k.lower() not in WRAPPER_FIELDS}
return set()
# 通用:顶层字段
fields = set()
for k in data.keys():
kl = k.lower()
if kl in WRAPPER_FIELDS:
# 嵌套对象作为整体
if kl in ("siteprofile", "tableprofile"):
fields.add(kl)
continue
fields.add(kl)
return fields
def extract_md_fields(table_name: str) -> set:
"""从 .md 文档的"四、响应字段详解"章节提取字段名(小写)"""
md_path = os.path.join(DOCS_DIR, f"{table_name}.md")
if not os.path.exists(md_path):
return set()
with open(md_path, "r", encoding="utf-8") as f:
lines = f.readlines()
fields = set()
in_section = False
in_siteprofile = False
field_pattern = re.compile(r'^\|\s*`([^`]+)`\s*\|')
siteprofile_header = re.compile(r'^###.*siteProfile', re.IGNORECASE)
for line in lines:
s = line.strip()
if s.startswith("## 四、") and "响应字段" in s:
in_section = True
in_siteprofile = False
continue
if in_section and s.startswith("## ") and not s.startswith("## 四"):
break
if not in_section:
continue
# siteProfile 子章节处理
if table_name in ("settlement_records", "recharge_settlements"):
if siteprofile_header.search(s):
in_siteprofile = True
continue
if s.startswith("### ") and in_siteprofile:
if not siteprofile_header.search(s):
in_siteprofile = False
m = field_pattern.match(s)
if m:
raw = m.group(1).strip()
if raw.lower() in {h.lower() for h in CROSS_REF_HEADERS}:
continue
if table_name in ("settlement_records", "recharge_settlements"):
if in_siteprofile:
continue
if raw.startswith("siteProfile."):
continue
if raw.lower() in WRAPPER_FIELDS and raw.lower() not in ("siteprofile", "tableprofile"):
continue
fields.add(raw.lower())
return fields
def main():
samples = sorted([
f.replace(".json", "")
for f in os.listdir(SAMPLES_DIR)
if f.endswith(".json")
])
results = []
for table in samples:
json_fields = extract_json_fields(table)
md_fields = extract_md_fields(table)
# JSON 中有但 .md 中没有的
json_only = json_fields - md_fields
# .md 中有但 JSON 中没有的(可能是条件性字段,仅供参考)
md_only = md_fields - json_fields
results.append({
"table": table,
"json_count": len(json_fields),
"md_count": len(md_fields),
"json_only": sorted(json_only),
"md_only": sorted(md_only),
})
# 输出
print("=" * 80)
print("JSON 样本 vs .md 文档 字段比对报告")
print("=" * 80)
issues = 0
for r in results:
if r["json_only"]:
issues += 1
print(f"\n{r['table']} — JSON={r['json_count']}, MD={r['md_count']}")
print(f" JSON 中有但 .md 缺失 ({len(r['json_only'])} 个):")
for f in r["json_only"]:
print(f" - {f}")
if r["md_only"]:
print(f" .md 中有但 JSON 无 ({len(r['md_only'])} 个,可能是条件性字段):")
for f in r["md_only"]:
print(f" - {f}")
else:
status = "" if not r["md_only"] else "⚠️"
extra = ""
if r["md_only"]:
extra = f" (.md 多 {len(r['md_only'])} 个条件性字段)"
print(f"\n{status} {r['table']} — JSON={r['json_count']}, MD={r['md_count']}{extra}")
print(f"\n{'=' * 80}")
print(f"总计: {len(results)} 个表, {issues} 个有 JSON→MD 缺失")
# 输出 JSON 格式供后续处理
out_path = os.path.join("docs", "reports", "json_vs_md_gaps.json")
with open(out_path, "w", encoding="utf-8") as f:
json.dump(results, f, ensure_ascii=False, indent=2)
print(f"\n详细结果已写入: {out_path}")
if __name__ == "__main__":
main()
# AI_CHANGELOG:
# - 日期: 2026-02-14
# - Prompt: P20260214-044500 — "md文档和json数据不对应全面排查"
# - 直接原因: 用户要求全面排查 JSON 样本与 .md 文档的字段一致性
# - 变更摘要: 新建脚本,从 JSON 样本提取字段与 .md 文档"响应字段详解"章节比对;
# 修复 3 个 bugtype 过滤、siteProfile/tableProfile 例外、roleAreaRelations 包装器)
# - 风险与验证: 纯分析脚本,无运行时影响;运行 `python scripts/check_json_vs_md.py` 验证输出