init: 项目初始提交 - NeoZQYY Monorepo 完整代码

2026-02-15 14:58:14 +08:00
commit ded6dfb9d8
769 changed files with 182616 additions and 0 deletions
--- a/apps/etl/pipelines/feiqiu/scripts/refresh_json_and_audit.py
+++ b/apps/etl/pipelines/feiqiu/scripts/refresh_json_and_audit.py
@@ -0,0 +1,523 @@
+# -*- coding: utf-8 -*-
+"""
+重新获取全部 API 接口的 JSON 数据（最多 100 条），
+遍历所有记录提取最全字段集合，
+与 .md 文档比对并输出差异报告。
+
+时间范围：2026-01-01 00:00:00 ~ 2026-02-13 00:00:00
+
+用法：python scripts/refresh_json_and_audit.py
+"""
+import json
+import os
+import re
+import sys
+import time
+import requests
+
+# ── 配置 ──────────────────────────────────────────────────────────────────
+API_BASE = "https://pc.ficoo.vip/apiprod/admin/v1/"
+API_TOKEN = os.environ.get("API_TOKEN", "")
+if not API_TOKEN:
+    env_path = os.path.join(os.path.dirname(__file__), "..", ".env")
+    if os.path.exists(env_path):
+        with open(env_path, "r", encoding="utf-8") as f:
+            for line in f:
+                line = line.strip()
+                if line.startswith("API_TOKEN="):
+                    API_TOKEN = line.split("=", 1)[1].strip()
+                    break
+
+SITE_ID = 2790685415443269
+START_TIME = "2026-01-01 00:00:00"
+END_TIME = "2026-02-13 00:00:00"
+LIMIT = 100
+
+SAMPLES_DIR = os.path.join("docs", "api-reference", "samples")
+DOCS_DIR = os.path.join("docs", "api-reference")
+REPORT_DIR = os.path.join("docs", "reports")
+
+HEADERS = {
+    "Authorization": f"Bearer {API_TOKEN}",
+    "Content-Type": "application/json",
+}
+
+REGISTRY_PATH = os.path.join("docs", "api-reference", "api_registry.json")
+
+WRAPPER_FIELDS = {"settleList", "siteProfile", "tableProfile",
+                  "goodsCategoryList", "data", "code", "msg",
+                  "settlelist", "siteprofile", "tableprofile",
+                  "goodscategorylist"}
+
+CROSS_REF_HEADERS = {"字段名", "类型", "示例值", "说明", "field", "example",
+                     "description"}
+
+# 每个接口实际返回的列表字段名（从调试中获得）
+ACTUAL_LIST_KEY = {
+    "assistant_accounts_master": "assistantInfos",
+    "assistant_service_records": "orderAssistantDetails",
+    "assistant_cancellation_records": "abolitionAssistants",
+    "table_fee_transactions": "siteTableUseDetailsList",
+    "table_fee_discount_records": "taiFeeAdjustInfos",
+    "tenant_goods_master": "tenantGoodsList",
+    "store_goods_sales_records": "orderGoodsLedgers",
+    "store_goods_master": "orderGoodsList",
+    "goods_stock_movements": "queryDeliveryRecordsList",
+    "member_profiles": "tenantMemberInfos",
+    "member_stored_value_cards": "tenantMemberCards",
+    "member_balance_changes": "tenantMemberCardLogs",
+    "group_buy_packages": "packageCouponList",
+    "group_buy_redemption_records": "siteTableUseDetailsList",
+    "site_tables_master": "siteTables",
+    # 以下使用 "list" 或特殊路径
+    "payment_transactions": "list",
+    "refund_transactions": "list",
+    "platform_coupon_redemption_records": "list",
+    "goods_stock_summary": "list",
+    "settlement_records": "settleList",
+    "recharge_settlements": "settleList",
+}
+
+
+def load_registry():
+    with open(REGISTRY_PATH, "r", encoding="utf-8") as f:
+        return json.load(f)
+
+
+def call_api(module, action, body):
+    url = f"{API_BASE}{module}/{action}"
+    try:
+        resp = requests.post(url, json=body, headers=HEADERS, timeout=30)
+        resp.raise_for_status()
+        return resp.json()
+    except Exception as e:
+        print(f"  ❌ 请求失败: {e}")
+        return None
+
+
+def unwrap_records(raw_json, table_name):
+    """从原始 API 响应中提取业务记录列表"""
+    if raw_json is None:
+        return []
+
+    data = raw_json.get("data")
+    if data is None:
+        return []
+
+    # ── 特殊表：stock_goods_category_tree ──
+    if table_name == "stock_goods_category_tree":
+        if isinstance(data, dict):
+            cats = data.get("goodsCategoryList", [])
+            return cats if isinstance(cats, list) else []
+        return []
+
+    # ── 特殊表：role_area_association ──
+    if table_name == "role_area_association":
+        if isinstance(data, dict):
+            rels = data.get("roleAreaRelations", [])
+            return rels if isinstance(rels, list) else []
+        return []
+
+    # ── 特殊表：tenant_member_balance_overview ──
+    # 返回的是汇总对象 + rechargeCardList/giveCardList
+    if table_name == "tenant_member_balance_overview":
+        if isinstance(data, dict):
+            # 合并顶层标量字段 + 列表中的字段
+            records = [data]  # 顶层作为一条记录
+            for list_key in ("rechargeCardList", "giveCardList"):
+                items = data.get(list_key, [])
+                if isinstance(items, list):
+                    records.extend(items)
+            return records
+        return []
+
+    # ── settlement_records / recharge_settlements ──
+    # data.settleList 是列表，每个元素内部有 settleList 子对象
+    if table_name in ("settlement_records", "recharge_settlements"):
+        if isinstance(data, dict):
+            settle_list = data.get("settleList", [])
+            if isinstance(settle_list, list):
+                return settle_list
+        return []
+
+    # ── 通用：data 是 dict，从中找列表字段 ──
+    if isinstance(data, dict):
+        list_key = ACTUAL_LIST_KEY.get(table_name, "list")
+        items = data.get(list_key, [])
+        if isinstance(items, list):
+            return items
+        # fallback: 找第一个列表字段
+        for k, v in data.items():
+            if isinstance(v, list) and k != "total":
+                return v
+        return []
+
+    if isinstance(data, list):
+        return data
+
+    return []
+
+
+def extract_all_fields(records, table_name):
+    """从多条记录中提取所有唯一字段名（小写）"""
+    all_fields = set()
+    for record in records:
+        if not isinstance(record, dict):
+            continue
+
+        # settlement_records / recharge_settlements: 内层 settleList 展开
+        if table_name in ("settlement_records", "recharge_settlements"):
+            settle = record.get("settleList", record)
+            if isinstance(settle, list):
+                settle = settle[0] if settle else {}
+            if isinstance(settle, dict):
+                for k in settle.keys():
+                    kl = k.lower()
+                    if kl == "siteprofile":
+                        all_fields.add("siteprofile")
+                    elif kl in WRAPPER_FIELDS:
+                        continue
+                    else:
+                        all_fields.add(kl)
+            continue
+
+        # tenant_member_balance_overview: 特殊处理
+        if table_name == "tenant_member_balance_overview":
+            for k in record.keys():
+                kl = k.lower()
+                # 跳过嵌套列表键名本身
+                if kl in ("rechargecardlist", "givecardlist"):
+                    continue
+                all_fields.add(kl)
+            continue
+
+        # 通用
+        for k in record.keys():
+            kl = k.lower()
+            if kl in WRAPPER_FIELDS:
+                if kl in ("siteprofile", "tableprofile"):
+                    all_fields.add(kl)
+                continue
+            all_fields.add(kl)
+
+    return all_fields
+
+
+def extract_md_fields(table_name):
+    """从 .md 文档的"四、响应字段详解"章节提取字段名（小写）"""
+    md_path = os.path.join(DOCS_DIR, f"{table_name}.md")
+    if not os.path.exists(md_path):
+        return set()
+
+    with open(md_path, "r", encoding="utf-8") as f:
+        lines = f.readlines()
+
+    fields = set()
+    in_section = False
+    in_siteprofile = False
+    field_pattern = re.compile(r'^\|\s*`([^`]+)`\s*\|')
+    siteprofile_header = re.compile(r'^###.*siteProfile', re.IGNORECASE)
+
+    for line in lines:
+        s = line.strip()
+
+        if s.startswith("## 四、") and "响应字段" in s:
+            in_section = True
+            in_siteprofile = False
+            continue
+
+        if in_section and s.startswith("## ") and not s.startswith("## 四"):
+            break
+
+        if not in_section:
+            continue
+
+        if table_name in ("settlement_records", "recharge_settlements"):
+            if siteprofile_header.search(s):
+                in_siteprofile = True
+                continue
+            if s.startswith("### ") and in_siteprofile:
+                if not siteprofile_header.search(s):
+                    in_siteprofile = False
+
+        m = field_pattern.match(s)
+        if m:
+            raw = m.group(1).strip()
+            if raw.lower() in {h.lower() for h in CROSS_REF_HEADERS}:
+                continue
+            if table_name in ("settlement_records", "recharge_settlements"):
+                if in_siteprofile:
+                    continue
+                if raw.startswith("siteProfile."):
+                    continue
+            if raw.lower() in WRAPPER_FIELDS and raw.lower() not in (
+                    "siteprofile", "tableprofile"):
+                continue
+            fields.add(raw.lower())
+
+    return fields
+
+
+def build_body(entry):
+    body = dict(entry.get("body") or {})
+    if entry.get("time_range") and entry.get("time_keys"):
+        keys = entry["time_keys"]
+        if len(keys) >= 2:
+            body[keys[0]] = START_TIME
+            body[keys[1]] = END_TIME
+    if entry.get("pagination"):
+        body[entry["pagination"].get("page_key", "page")] = 1
+        body[entry["pagination"].get("limit_key", "limit")] = LIMIT
+    return body
+
+
+def save_sample(table_name, records):
+    """保存第一条记录作为 JSON 样本"""
+    sample_path = os.path.join(SAMPLES_DIR, f"{table_name}.json")
+    if records and isinstance(records[0], dict):
+        with open(sample_path, "w", encoding="utf-8") as f:
+            json.dump(records[0], f, ensure_ascii=False, indent=2)
+    return sample_path
+
+
+def discover_actual_data_path(raw_json, table_name):
+    """发现 API 实际返回的数据路径"""
+    data = raw_json.get("data") if raw_json else None
+    if data is None:
+        return None
+
+    # 特殊表
+    if table_name == "stock_goods_category_tree":
+        return "data.goodsCategoryList"
+    if table_name == "role_area_association":
+        return "data.roleAreaRelations"
+    if table_name == "tenant_member_balance_overview":
+        return "data"  # 顶层汇总对象
+    if table_name in ("settlement_records", "recharge_settlements"):
+        return "data.settleList"
+
+    if isinstance(data, dict):
+        list_key = ACTUAL_LIST_KEY.get(table_name)
+        if list_key and list_key in data:
+            return f"data.{list_key}"
+        # fallback
+        for k, v in data.items():
+            if isinstance(v, list) and k.lower() != "total":
+                return f"data.{k}"
+    return None
+
+
+def update_md_data_path(table_name, actual_path):
+    """在 .md 文档的接口概述表格中更新/添加实际数据路径"""
+    md_path = os.path.join(DOCS_DIR, f"{table_name}.md")
+    if not os.path.exists(md_path):
+        return False
+
+    with open(md_path, "r", encoding="utf-8") as f:
+        content = f.read()
+
+    # 检查是否已有"数据路径"或"响应数据路径"行
+    if "数据路径" in content or "data_path" in content.lower():
+        # 尝试更新已有行
+        pattern = re.compile(
+            r'(\|\s*(?:数据路径|响应数据路径|data_path)\s*\|\s*)`[^`]*`(\s*\|)',
+            re.IGNORECASE
+        )
+        if pattern.search(content):
+            new_content = pattern.sub(
+                rf'\g<1>`{actual_path}`\g<2>', content
+            )
+            if new_content != content:
+                with open(md_path, "w", encoding="utf-8") as f:
+                    f.write(new_content)
+                return True
+            return False  # 已经是最新值
+
+    # 没有数据路径行，在接口概述表格末尾添加
+    # 找到"## 一、接口概述"后的表格最后一行（以 | 开头）
+    lines = content.split("\n")
+    insert_idx = None
+    in_overview = False
+    last_table_row = None
+
+    for i, line in enumerate(lines):
+        s = line.strip()
+        if "## 一、" in s and "接口概述" in s:
+            in_overview = True
+            continue
+        if in_overview and s.startswith("## "):
+            break
+        if in_overview and s.startswith("|") and "---" not in s:
+            last_table_row = i
+
+    if last_table_row is not None:
+        new_line = f"| 响应数据路径 | `{actual_path}` |"
+        lines.insert(last_table_row + 1, new_line)
+        with open(md_path, "w", encoding="utf-8") as f:
+            f.write("\n".join(lines))
+        return True
+
+    return False
+
+
+def main():
+    registry = load_registry()
+    print(f"加载 API 注册表: {len(registry)} 个端点")
+    print(f"时间范围: {START_TIME} ~ {END_TIME}")
+    print(f"每接口获取: {LIMIT} 条")
+    print("=" * 80)
+
+    results = []
+    all_gaps = []
+    registry_updates = {}  # table_name -> actual_data_path
+
+    for entry in registry:
+        table_name = entry["id"]
+        name_zh = entry.get("name_zh", "")
+        module = entry["module"]
+        action = entry["action"]
+        skip = entry.get("skip", False)
+
+        print(f"\n{'─' * 60}")
+        print(f"[{table_name}] {name_zh} — {module}/{action}")
+
+        if skip:
+            print("  ⏭️ 跳过（标记为 skip）")
+            results.append({
+                "table": table_name,
+                "status": "skipped",
+                "record_count": 0,
+                "json_field_count": 0,
+                "md_field_count": 0,
+                "json_fields": [],
+                "md_fields": [],
+                "json_only": [],
+                "md_only": [],
+                "actual_data_path": None,
+            })
+            continue
+
+        body = build_body(entry)
+
+        print(f"  请求: POST {module}/{action}")
+        raw = call_api(module, action, body)
+
+        if raw is None:
+            results.append({
+                "table": table_name,
+                "status": "error",
+                "record_count": 0,
+                "json_field_count": 0,
+                "md_field_count": 0,
+                "json_fields": [],
+                "md_fields": [],
+                "json_only": [],
+                "md_only": [],
+                "actual_data_path": None,
+            })
+            continue
+
+        # 发现实际数据路径
+        actual_path = discover_actual_data_path(raw, table_name)
+        old_path = entry.get("data_path", "")
+        if actual_path and actual_path != old_path:
+            print(f"  📍 数据路径: {old_path} → {actual_path}")
+            registry_updates[table_name] = actual_path
+        else:
+            print(f"  📍 数据路径: {actual_path or old_path}")
+
+        records = unwrap_records(raw, table_name)
+        print(f"  获取记录数: {len(records)}")
+
+        # 保存样本（第一条）
+        save_sample(table_name, records)
+
+        # 遍历所有记录提取全字段
+        json_fields = extract_all_fields(records, table_name)
+        md_fields = extract_md_fields(table_name)
+
+        json_only = json_fields - md_fields
+        md_only = md_fields - json_fields
+
+        status = "ok"
+        if json_only:
+            status = "gap"
+            print(f"  ❌ JSON 有但 .md 缺失 ({len(json_only)} 个): {sorted(json_only)}")
+            all_gaps.append((table_name, name_zh, sorted(json_only)))
+        else:
+            if md_only:
+                print(f"  ⚠️ .md 多 {len(md_only)} 个条件性字段")
+            else:
+                print(f"  ✅ 完全一致 ({len(json_fields)} 个字段)")
+
+        # 更新 .md 文档中的数据路径
+        if actual_path:
+            updated = update_md_data_path(table_name, actual_path)
+            if updated:
+                print(f"  📝 已更新 .md 文档数据路径")
+
+        results.append({
+            "table": table_name,
+            "status": status,
+            "record_count": len(records),
+            "json_field_count": len(json_fields),
+            "md_field_count": len(md_fields),
+            "json_fields": sorted(json_fields),
+            "md_fields": sorted(md_fields),
+            "json_only": sorted(json_only),
+            "md_only": sorted(md_only),
+            "actual_data_path": actual_path,
+        })
+
+        time.sleep(0.3)
+
+    # ── 更新 api_registry.json 中的 data_path ──
+    if registry_updates:
+        print(f"\n{'─' * 60}")
+        print(f"更新 api_registry.json 中 {len(registry_updates)} 个 data_path...")
+        for entry in registry:
+            tid = entry["id"]
+            if tid in registry_updates:
+                entry["data_path"] = registry_updates[tid]
+        with open(REGISTRY_PATH, "w", encoding="utf-8") as f:
+            json.dump(registry, f, ensure_ascii=False, indent=2)
+        print("  ✅ api_registry.json 已更新")
+
+    # ── 汇总 ──
+    print(f"\n{'=' * 80}")
+    print("汇总报告")
+    print(f"{'=' * 80}")
+
+    gap_count = sum(1 for r in results if r["status"] == "gap")
+    ok_count = sum(1 for r in results if r["status"] == "ok")
+    skip_count = sum(1 for r in results if r["status"] == "skipped")
+    err_count = sum(1 for r in results if r["status"] == "error")
+
+    print(f"  完全一致: {ok_count}")
+    print(f"  有缺失:   {gap_count}")
+    print(f"  跳过:     {skip_count}")
+    print(f"  错误:     {err_count}")
+
+    if all_gaps:
+        print(f"\n需要补充到 .md 文档的字段:")
+        for table, name_zh, fields in all_gaps:
+            print(f"  {table} ({name_zh}): {fields}")
+
+    # 保存详细结果
+    out_path = os.path.join(REPORT_DIR, "json_refresh_audit.json")
+    os.makedirs(REPORT_DIR, exist_ok=True)
+    with open(out_path, "w", encoding="utf-8") as f:
+        json.dump(results, f, ensure_ascii=False, indent=2)
+    print(f"\n详细结果已写入: {out_path}")
+
+
+if __name__ == "__main__":
+    main()
+
+# AI_CHANGELOG:
+# - 日期: 2026-02-14
+# - Prompt: P20260214-060000 — 全量 JSON 刷新 + MD 文档补全 + 数据路径修正
+# - 直接原因: 旧 JSON 样本仅含单条记录，缺少条件性字段；需重新获取 100 条数据并遍历提取最全字段
+# - 变更摘要: 新建脚本，实现：(1) 调用全部 24 个 API 端点获取 100 条数据 (2) 遍历所有记录提取字段并集
+#   (3) 与 .md 文档比对找出缺失字段 (4) 更新 JSON 样本和 api_registry.json data_path (5) 更新 .md 文档响应数据路径行
+# - 风险与验证: 脚本需要有效的 API_TOKEN 和网络连接；验证：运行后检查 json_refresh_audit.json 中 24/24 通过