init: 项目初始提交 - NeoZQYY Monorepo 完整代码

This commit is contained in:
Neo
2026-02-15 14:58:14 +08:00
commit ded6dfb9d8
769 changed files with 182616 additions and 0 deletions

View File

@@ -0,0 +1,461 @@
# -*- coding: utf-8 -*-
"""
API 参考文档 vs ODS 实际表结构 对比脚本 (v2)
从 docs/api-reference/*.md 的 JSON 样例中提取字段,
查询 PostgreSQL billiards_ods 的实际列,
输出差异报告 JSON 和 Markdown + ALTER SQL。
用法: python scripts/compare_api_ods_v2.py
"""
import json
import os
import re
import sys
from datetime import datetime
ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, ROOT)
from dotenv import load_dotenv
load_dotenv(os.path.join(ROOT, ".env"))
import psycopg2
# ODS 元列ETL 管理列,不来自 API
ODS_META_COLS = {
"source_file", "source_endpoint", "fetched_at",
"payload", "content_hash",
}
def load_registry():
"""加载 API 注册表"""
path = os.path.join(ROOT, "docs", "api-reference", "api_registry.json")
with open(path, "r", encoding="utf-8") as f:
return json.load(f)
def extract_fields_from_md(md_path, api_id):
"""
从 md 文件的 JSON 样例(五、响应样例)中提取所有字段名(小写)。
对 settlement_records / recharge_settlements 等嵌套结构,
提取 settleList 内层字段 + siteProfile 字段。
"""
with open(md_path, "r", encoding="utf-8") as f:
content = f.read()
# 提取所有 ```json ... ``` 代码块
json_blocks = re.findall(r'```json\s*\n(.*?)\n```', content, re.DOTALL)
if not json_blocks:
return None, None, "无 JSON 样例"
# 找到最大的 JSON 对象(响应样例通常是最大的)
sample_json = None
for block in json_blocks:
try:
parsed = json.loads(block)
if isinstance(parsed, dict):
if sample_json is None or len(str(parsed)) > len(str(sample_json)):
sample_json = parsed
except json.JSONDecodeError:
continue
if sample_json is None:
return None, None, "无法解析 JSON 样例"
fields = set()
has_nested = False
# settlement_records / recharge_settlements 嵌套结构:
# { "siteProfile": {...}, "settleList": {...} }
if "siteProfile" in sample_json and "settleList" in sample_json:
has_nested = True
sl = sample_json.get("settleList", {})
if isinstance(sl, dict):
for k in sl:
fields.add(k.lower())
return fields, has_nested, None
# CHANGE: stock_goods_category_tree 特殊结构处理
# intent: goodsCategoryList 是数组包装ODS 存储的是展平后的分类节点字段
# assumptions: 外层 total/goodsCategoryList 不是 ODS 列
if "goodsCategoryList" in sample_json and isinstance(sample_json["goodsCategoryList"], list):
has_nested = True
arr = sample_json["goodsCategoryList"]
if arr and isinstance(arr[0], dict):
_extract_flat(arr[0], fields)
return fields, has_nested, None
for k in sample_json:
fields.add(k.lower())
return fields, has_nested, None
def _extract_flat(obj, fields):
"""递归提取字典的标量字段名(跳过数组/嵌套对象值,但保留键名)"""
if not isinstance(obj, dict):
return
for k, v in obj.items():
fields.add(k.lower())
def get_all_ods_columns(conn):
"""查询所有 ODS 表的列信息"""
cur = conn.cursor()
cur.execute("""
SELECT table_name, column_name, data_type, ordinal_position
FROM information_schema.columns
WHERE table_schema = 'billiards_ods'
ORDER BY table_name, ordinal_position
""")
rows = cur.fetchall()
cur.close()
tables = {}
for table_name, col_name, data_type, pos in rows:
if table_name not in tables:
tables[table_name] = {}
tables[table_name][col_name] = {
"data_type": data_type,
"ordinal_position": pos,
}
return tables
def guess_pg_type(name):
"""根据字段名猜测 PostgreSQL 类型(用于 ALTER TABLE ADD COLUMN"""
n = name.lower()
if n == "id" or n.endswith("_id") or n.endswith("id"):
return "bigint"
money_kw = ["amount", "money", "price", "cost", "fee", "discount",
"deduct", "balance", "charge", "sale", "refund",
"promotion", "adjust", "rounding", "prepay", "income",
"royalty", "grade", "point", "stock", "num"]
for kw in money_kw:
if kw in n:
return "numeric(18,2)"
if "time" in n or "date" in n:
return "timestamp without time zone"
if n.startswith("is_") or (n.startswith("is") and len(n) > 2 and n[2].isupper()):
return "boolean"
if n.startswith("able_") or n.startswith("can"):
return "boolean"
int_kw = ["status", "type", "sort", "count", "seconds", "level",
"channel", "method", "way", "enabled", "switch", "delete",
"first", "single", "trash", "confirm", "clock", "cycle",
"delay", "free", "virtual", "online", "show", "audit",
"freeze", "send", "required", "scene", "range", "tag",
"on", "minutes", "number", "duration"]
for kw in int_kw:
if kw in n:
return "integer"
return "text"
def compare_one(api_entry, md_path, ods_tables):
"""比较单个 API 与其 ODS 表"""
api_id = api_entry["id"]
ods_table = api_entry.get("ods_table")
name_zh = api_entry.get("name_zh", "")
result = {
"api_id": api_id,
"name_zh": name_zh,
"ods_table": ods_table,
}
if not ods_table:
result["status"] = "skip"
result["reason"] = "无对应 ODS 表ods_table=null"
return result
if api_entry.get("skip"):
result["status"] = "skip"
result["reason"] = "接口标记为 skip暂不可用"
return result
# 提取 API JSON 样例字段
api_fields, has_nested, err = extract_fields_from_md(md_path, api_id)
if err:
result["status"] = "error"
result["reason"] = err
return result
# 获取 ODS 表列
if ods_table not in ods_tables:
result["status"] = "error"
result["reason"] = f"ODS 表 {ods_table} 不存在"
return result
ods_cols = ods_tables[ods_table]
ods_biz_cols = {c for c in ods_cols if c not in ODS_META_COLS}
# 比较
api_lower = {f.lower() for f in api_fields}
ods_lower = {c.lower() for c in ods_biz_cols}
# API 有但 ODS 没有的字段
api_only = sorted(api_lower - ods_lower)
# ODS 有但 API 没有的字段(非元列)
ods_only = sorted(ods_lower - api_lower)
# 两边都有的字段
matched = sorted(api_lower & ods_lower)
result["status"] = "ok" if not api_only else "drift"
result["has_nested_structure"] = has_nested
result["api_field_count"] = len(api_lower)
result["ods_biz_col_count"] = len(ods_biz_cols)
result["ods_total_col_count"] = len(ods_cols)
result["matched_count"] = len(matched)
result["api_only"] = api_only
result["api_only_count"] = len(api_only)
result["ods_only"] = ods_only
result["ods_only_count"] = len(ods_only)
result["matched"] = matched
return result
def generate_alter_sql(results, ods_tables):
"""生成 ALTER TABLE SQL 语句"""
sqls = []
for r in results:
if r.get("status") != "drift" or not r.get("api_only"):
continue
table = r["ods_table"]
for field in r["api_only"]:
pg_type = guess_pg_type(field)
sqls.append(
f"ALTER TABLE billiards_ods.{table} "
f"ADD COLUMN IF NOT EXISTS {field} {pg_type};"
)
return sqls
def generate_markdown_report(results, alter_sqls):
"""生成 Markdown 报告"""
now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
lines = [
"# API 参考文档 vs ODS 实际表结构 对比报告 (v2)",
"",
f"> 生成时间:{now}",
"> 数据来源:`docs/api-reference/*.md` JSON 样例 vs `billiards_ods` 实际列",
"",
"---",
"",
"## 一、汇总",
"",
"| API 接口 | 中文名 | ODS 表 | 状态 | API 字段数 | ODS 业务列数 | 匹配 | API 独有 | ODS 独有 |",
"|----------|--------|--------|------|-----------|-------------|------|---------|---------|",
]
total_api_only = 0
total_ods_only = 0
ok_count = 0
drift_count = 0
skip_count = 0
error_count = 0
for r in results:
status = r.get("status", "?")
if status == "skip":
skip_count += 1
lines.append(
f"| {r['api_id']} | {r['name_zh']} | {r.get('ods_table', '-')} "
f"| ⏭️ 跳过 | - | - | - | - | - |"
)
continue
if status == "error":
error_count += 1
lines.append(
f"| {r['api_id']} | {r['name_zh']} | {r.get('ods_table', '-')} "
f"| ❌ 错误 | - | - | - | - | - |"
)
continue
api_only_n = r.get("api_only_count", 0)
ods_only_n = r.get("ods_only_count", 0)
total_api_only += api_only_n
total_ods_only += ods_only_n
if status == "ok":
ok_count += 1
badge = "✅ 对齐"
else:
drift_count += 1
badge = "⚠️ 漂移"
lines.append(
f"| {r['api_id']} | {r['name_zh']} | {r['ods_table']} "
f"| {badge} | {r['api_field_count']} | {r['ods_biz_col_count']} "
f"| {r['matched_count']} | {api_only_n} | {ods_only_n} |"
)
lines.extend([
"",
f"**统计**:对齐 {ok_count} / 漂移 {drift_count} / 跳过 {skip_count} / 错误 {error_count}",
f"**API 独有字段总计**{total_api_only}(需要 ALTER TABLE ADD COLUMN",
f"**ODS 独有列总计**{total_ods_only}API 中不存在,可能是历史遗留或 ETL 派生列)",
"",
])
# 详情:每个漂移表的字段差异
drift_results = [r for r in results if r.get("status") == "drift"]
if drift_results:
lines.extend(["---", "", "## 二、漂移详情", ""])
for r in drift_results:
lines.extend([
f"### {r['api_id']}{r['name_zh']})→ `{r['ods_table']}`",
"",
])
if r["api_only"]:
lines.append("**API 有 / ODS 缺**")
for f in r["api_only"]:
pg_type = guess_pg_type(f)
lines.append(f"- `{f}` → 建议类型 `{pg_type}`")
lines.append("")
if r["ods_only"]:
lines.append("**ODS 有 / API 无**(非元列):")
for f in r["ods_only"]:
lines.append(f"- `{f}`")
lines.append("")
# ODS 独有列详情(所有表)
ods_only_results = [r for r in results if r.get("ods_only") and r.get("status") in ("ok", "drift")]
if ods_only_results:
lines.extend(["---", "", "## 三、ODS 独有列详情API 中不存在)", ""])
for r in ods_only_results:
if not r["ods_only"]:
continue
lines.extend([
f"### `{r['ods_table']}`{r['name_zh']}",
"",
"| 列名 | 说明 |",
"|------|------|",
])
for f in r["ods_only"]:
lines.append(f"| `{f}` | ODS 独有API JSON 样例中不存在 |")
lines.append("")
# ALTER SQL
if alter_sqls:
lines.extend([
"---", "",
"## 四、ALTER SQL对齐 ODS 表结构)", "",
"```sql",
"-- 自动生成的 ALTER TABLE 语句",
f"-- 生成时间:{now}",
"-- 注意:类型为根据字段名猜测,请人工复核后执行",
"",
])
lines.extend(alter_sqls)
lines.extend(["", "```", ""])
return "\n".join(lines)
def main():
dsn = os.environ.get("PG_DSN")
if not dsn:
print("错误:未设置 PG_DSN 环境变量", file=sys.stderr)
sys.exit(1)
print("连接数据库...")
conn = psycopg2.connect(dsn)
print("查询 ODS 表结构...")
ods_tables = get_all_ods_columns(conn)
print(f"{len(ods_tables)} 张 ODS 表")
print("加载 API 注册表...")
registry = load_registry()
print(f"{len(registry)} 个 API 端点")
results = []
for entry in registry:
api_id = entry["id"]
ods_table = entry.get("ods_table")
md_path = os.path.join(ROOT, "docs", "api-reference", f"{api_id}.md")
if not os.path.exists(md_path):
results.append({
"api_id": api_id,
"name_zh": entry.get("name_zh", ""),
"ods_table": ods_table,
"status": "error",
"reason": f"文档不存在: {md_path}",
})
continue
r = compare_one(entry, md_path, ods_tables)
results.append(r)
status_icon = {"ok": "", "drift": "⚠️", "skip": "⏭️", "error": ""}.get(r["status"], "?")
extra = ""
if r.get("api_only_count"):
extra = f" (API独有: {r['api_only_count']})"
if r.get("ods_only_count"):
extra += f" (ODS独有: {r['ods_only_count']})"
print(f" {status_icon} {api_id}{ods_table or '-'}{extra}")
conn.close()
# 生成 ALTER SQL
alter_sqls = generate_alter_sql(results, ods_tables)
# 输出 JSON 报告
json_path = os.path.join(ROOT, "docs", "reports", "api_ods_comparison_v2.json")
os.makedirs(os.path.dirname(json_path), exist_ok=True)
with open(json_path, "w", encoding="utf-8") as f:
json.dump(results, f, ensure_ascii=False, indent=2)
print(f"\nJSON 报告: {json_path}")
# 输出 Markdown 报告
md_report = generate_markdown_report(results, alter_sqls)
md_path = os.path.join(ROOT, "docs", "reports", "api_ods_comparison_v2.md")
with open(md_path, "w", encoding="utf-8") as f:
f.write(md_report)
print(f"Markdown 报告: {md_path}")
# 输出 ALTER SQL 文件
if alter_sqls:
sql_path = os.path.join(ROOT, "database", "migrations",
"20260213_align_ods_with_api_v2.sql")
os.makedirs(os.path.dirname(sql_path), exist_ok=True)
with open(sql_path, "w", encoding="utf-8") as f:
f.write("-- API vs ODS 对齐迁移脚本 (v2)\n")
f.write(f"-- 生成时间:{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
f.write("-- 注意:类型为根据字段名猜测,请人工复核后执行\n\n")
f.write("BEGIN;\n\n")
for sql in alter_sqls:
f.write(sql + "\n")
f.write("\nCOMMIT;\n")
print(f"ALTER SQL: {sql_path}")
else:
print("无需 ALTER SQL所有表已对齐")
# 统计
ok_n = sum(1 for r in results if r.get("status") == "ok")
drift_n = sum(1 for r in results if r.get("status") == "drift")
skip_n = sum(1 for r in results if r.get("status") == "skip")
err_n = sum(1 for r in results if r.get("status") == "error")
print(f"\n汇总:对齐 {ok_n} / 漂移 {drift_n} / 跳过 {skip_n} / 错误 {err_n}")
print(f"ALTER SQL 语句数:{len(alter_sqls)}")
if __name__ == "__main__":
main()
# ──────────────────────────────────────────────
# AI_CHANGELOG:
# - 日期: 2026-02-13
# Prompt: P20260213-223000 — 用 API 参考文档比对数据库 ODS 实际表结构(重做,不依赖 DDL
# 直接原因: 前次比对脚本 stock_goods_category_tree 嵌套结构解析 bug需重写脚本
# 变更摘要: 完整重写脚本,从 api-reference/*.md JSON 样例提取字段,查询 PG billiards_ods 实际列,
# 处理三种特殊结构(标准/settleList 嵌套/goodsCategoryList 数组包装),输出 JSON+MD 报告
# 风险与验证: 纯分析脚本,不修改数据库;验证方式:运行脚本确认 "对齐 22 / 漂移 0"
# ──────────────────────────────────────────────