在前后端开发联调前 的提交20260223

This commit is contained in:
Neo
2026-02-23 23:02:20 +08:00
parent 254ccb1e77
commit fafc95e64c
1142 changed files with 10366960 additions and 36957 deletions

View File

@@ -4,8 +4,8 @@
从真实 API 获取 JSON 样本,结合 DDL 和 ETL 源码,生成带跨层跳转链接的 Markdown 文档。
用法: python scripts/ops/gen_full_dataflow_doc.py
输出: docs/reports/dataflow_api_ods_dwd.md
tmp/api_samples/*.jsonAPI 原始响应缓存)
输出: $FULL_DATAFLOW_DOC_ROOT/dataflow_api_ods_dwd.md(由 .env 配置)
$API_SAMPLE_CACHE_ROOT/*.jsonAPI 原始响应缓存)
"""
import json
import os
@@ -24,8 +24,10 @@ from dotenv import load_dotenv
ROOT = Path(__file__).resolve().parents[2]
ETL = ROOT / "apps" / "etl" / "pipelines" / "feiqiu"
DB = ROOT / "db" / "etl_feiqiu" / "schemas"
OUT = ROOT / "docs" / "reports" / "dataflow_api_ods_dwd.md"
SAMPLE_DIR = ROOT / "tmp" / "api_samples"
# 从 .env 读取输出路径(缺失时抛 KeyError
from _env_paths import get_output_path as _get_path
OUT = _get_path("FULL_DATAFLOW_DOC_ROOT") / "dataflow_api_ods_dwd.md"
SAMPLE_DIR = _get_path("API_SAMPLE_CACHE_ROOT")
TZ = ZoneInfo("Asia/Shanghai")
@@ -91,17 +93,6 @@ ODS_SPECS = [
"extra_params": {},
"description": "助教服务流水",
},
{
"code": "ODS_ASSISTANT_ABOLISH",
"table": "assistant_cancellation_records",
"endpoint": "/AssistantPerformance/GetAbolitionAssistant",
"data_path": ("data",),
"list_key": "abolitionAssistants",
"time_fields": ("startTime", "endTime"),
"requires_window": True,
"extra_params": {},
"description": "助教废除记录",
},
{
"code": "ODS_STORE_GOODS_SALES",
"table": "store_goods_sales_records",
@@ -289,17 +280,6 @@ ODS_SPECS = [
"extra_params": {},
"description": "租户商品档案",
},
{
"code": "ODS_SETTLEMENT_TICKET",
"table": "settlement_ticket_details",
"endpoint": "/Order/GetOrderSettleTicketNew",
"data_path": (),
"list_key": None,
"time_fields": None,
"requires_window": False,
"extra_params": {},
"description": "结账小票详情(按 orderSettleId 逐条获取,不走常规分页)",
},
]
@@ -373,7 +353,6 @@ def fetch_records(spec: dict, target_count: int = 200) -> list[dict]:
获取 API 记录。
- 有时间字段的表:从今天往回 10 天一批,不够则继续扩展,最多 10 次重试
- 无时间字段的表:单次请求 200 条
- 特殊表settlement_ticket_details跳过
"""
endpoint = spec["endpoint"]
data_path = spec["data_path"]
@@ -381,10 +360,6 @@ def fetch_records(spec: dict, target_count: int = 200) -> list[dict]:
time_fields = spec["time_fields"]
extra_params = spec.get("extra_params", {})
# 结账小票是逐条获取的,跳过
if spec["table"] == "settlement_ticket_details":
return []
all_records = []
if time_fields:
@@ -1103,12 +1078,6 @@ def main():
print(f" [{spec['code']}] {table}: 请求 API...", end=" ", flush=True)
if spec["table"] == "settlement_ticket_details":
print("跳过(逐条获取,不走常规分页)")
api_data[table] = []
api_fields[table] = OrderedDict()
continue
try:
records = fetch_records(spec, target_count=200)
api_data[table] = records
@@ -1129,49 +1098,6 @@ def main():
api_data[table] = []
api_fields[table] = OrderedDict()
# ── 特殊处理settlement_ticket_details 从数据库 payload 获取 ──
# 该表不走常规 API 分页,尝试从已有缓存或跳过
ticket_table = "settlement_ticket_details"
if not api_data.get(ticket_table) and not api_fields.get(ticket_table):
# 尝试从结算记录的 API 响应中获取小票结构(如果有的话)
print(f" [{ticket_table}] 无法通过常规 API 获取,将从数据库 payload 分析")
try:
import psycopg2
dsn = os.environ.get("PG_DSN", "")
if dsn:
conn = psycopg2.connect(dsn)
conn.set_client_encoding("UTF8")
# 自动检测 schema
with conn.cursor() as cur:
cur.execute("""
SELECT schema_name FROM information_schema.schemata
WHERE schema_name IN ('ods', 'billiards_ods')
ORDER BY schema_name
""")
schemas = [r[0] for r in cur.fetchall()]
ods_schema = "ods" if "ods" in schemas else schemas[0] if schemas else "ods"
with conn.cursor() as cur:
cur.execute(f"""
SELECT payload FROM {ods_schema}.{ticket_table}
WHERE payload IS NOT NULL
ORDER BY fetched_at DESC LIMIT 10
""")
rows = cur.fetchall()
payloads = []
for row in rows:
p = row[0]
if isinstance(p, str):
p = json.loads(p)
if isinstance(p, dict):
payloads.append(p)
conn.close()
if payloads:
api_data[ticket_table] = payloads
api_fields[ticket_table] = analyze_json_fields(payloads)
print(f" 从数据库获取 {len(payloads)} 条 payload")
except Exception as e:
print(f" 从数据库获取失败: {e}")
# ── 生成文档 ──
print()
print("生成文档...")