This commit is contained in:
Neo
2026-01-27 22:45:50 +08:00
parent a6ad343092
commit 4c192e921c
476 changed files with 381543 additions and 5819 deletions

View File

@@ -8,6 +8,8 @@ import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
from api.endpoint_routing import plan_calls
DEFAULT_BROWSER_HEADERS = {
"Accept": "application/json, text/plain, */*",
"Content-Type": "application/json",
@@ -142,7 +144,7 @@ class APIClient:
raise ValueError(f"API 返回错误 code={code} msg={msg}")
# ------------------------------------------------------------------ 分页
def iter_paginated(
def _iter_paginated_single(
self,
endpoint: str,
params: dict | None,
@@ -155,8 +157,7 @@ class APIClient:
page_end: int | None = None,
) -> Iterable[tuple[int, list, dict, dict]]:
"""
分页迭代器:逐页拉取数据并产出 (page_no, records, request_params, raw_response)
page_size=None 时不附带分页参数,仅拉取一次。
单一 endpoint 的分页迭代器(不包含 recent/former 路由逻辑)
"""
base_params = dict(params or {})
page = page_start
@@ -183,6 +184,42 @@ class APIClient:
page += 1
def iter_paginated(
self,
endpoint: str,
params: dict | None,
page_size: int | None = 200,
page_field: str = "page",
size_field: str = "limit",
data_path: tuple = ("data",),
list_key: str | Sequence[str] | None = None,
page_start: int = 1,
page_end: int | None = None,
) -> Iterable[tuple[int, list, dict, dict]]:
"""
分页迭代器:逐页拉取数据并产出 (page_no, records, request_params, raw_response)。
page_size=None 时不附带分页参数,仅拉取一次。
"""
# recent/former 路由:当 params 带时间范围字段时按“3个月自然月”边界决定走哪个 endpoint
# 跨越边界则拆分为两段请求并顺序产出,确保调用方使用 page_no 命名文件时不会被覆盖。
call_plan = plan_calls(endpoint, params)
global_page = 1
for call in call_plan:
for _, records, request_params, payload in self._iter_paginated_single(
endpoint=call.endpoint,
params=call.params,
page_size=page_size,
page_field=page_field,
size_field=size_field,
data_path=data_path,
list_key=list_key,
page_start=page_start,
page_end=page_end,
):
yield global_page, records, request_params, payload
global_page += 1
def get_paginated(
self,
endpoint: str,

View File

@@ -0,0 +1,166 @@
# -*- coding: utf-8 -*-
"""
“近期记录 / 历史记录(Former)”接口路由规则。
需求:
- 当请求参数包含可定义时间范围的字段时,根据当前时间(北京时间/上海时区)判断:
- 3个月自然月之前 -> 使用“历史记录”接口
- 3个月以内 -> 使用“近期记录”接口
- 若时间范围跨越边界 -> 拆分为两段分别请求并合并(由上层分页迭代器顺序产出)
"""
from __future__ import annotations
from dataclasses import dataclass
from datetime import datetime
from typing import Optional
from dateutil import parser as dtparser
from dateutil.relativedelta import relativedelta
from zoneinfo import ZoneInfo
ROUTING_TZ = ZoneInfo("Asia/Shanghai")
RECENT_MONTHS = 3
# 按 `fetch-test/recent_vs_former_report.md` 更新(“无”表示没有历史接口;相同 path 表示同一个接口可查历史)
RECENT_TO_FORMER_OVERRIDES: dict[str, str | None] = {
"/AssistantPerformance/GetAbolitionAssistant": None,
"/Site/GetSiteTableUseDetails": "/Site/GetSiteTableUseDetails",
"/GoodsStockManage/QueryGoodsOutboundReceipt": "/GoodsStockManage/QueryFormerGoodsOutboundReceipt",
"/Promotion/GetOfflineCouponConsumePageList": "/Promotion/GetOfflineCouponConsumePageList",
"/Order/GetRefundPayLogList": None,
# 已知特殊
"/Site/GetAllOrderSettleList": "/Site/GetFormerOrderSettleList",
"/PayLog/GetPayLogListPage": "/PayLog/GetFormerPayLogListPage",
}
TIME_WINDOW_KEYS: tuple[tuple[str, str], ...] = (
("startTime", "endTime"),
("rangeStartTime", "rangeEndTime"),
("StartPayTime", "EndPayTime"),
)
@dataclass(frozen=True)
class WindowSpec:
start_key: str
end_key: str
start: datetime
end: datetime
@dataclass(frozen=True)
class RoutedCall:
endpoint: str
params: dict
def is_former_endpoint(endpoint: str) -> bool:
return "Former" in str(endpoint or "")
def _parse_dt(value: object, tz: ZoneInfo) -> datetime | None:
if value is None:
return None
s = str(value).strip()
if not s:
return None
dt = dtparser.parse(s)
if dt.tzinfo is None:
return dt.replace(tzinfo=tz)
return dt.astimezone(tz)
def _fmt_dt(dt: datetime, tz: ZoneInfo) -> str:
return dt.astimezone(tz).strftime("%Y-%m-%d %H:%M:%S")
def extract_window_spec(params: dict | None, tz: ZoneInfo = ROUTING_TZ) -> WindowSpec | None:
if not isinstance(params, dict) or not params:
return None
for start_key, end_key in TIME_WINDOW_KEYS:
if start_key in params or end_key in params:
start = _parse_dt(params.get(start_key), tz)
end = _parse_dt(params.get(end_key), tz)
if start and end:
return WindowSpec(start_key=start_key, end_key=end_key, start=start, end=end)
return None
def derive_former_endpoint(recent_endpoint: str) -> str | None:
endpoint = str(recent_endpoint or "").strip()
if not endpoint:
return None
if endpoint in RECENT_TO_FORMER_OVERRIDES:
return RECENT_TO_FORMER_OVERRIDES[endpoint]
if is_former_endpoint(endpoint):
return endpoint
idx = endpoint.find("Get")
if idx == -1:
return endpoint
return f"{endpoint[:idx]}GetFormer{endpoint[idx + 3:]}"
def recent_boundary(now: datetime, months: int = RECENT_MONTHS) -> datetime:
"""
3个月自然月边界取 (now - months) 所在月份的 1 号 00:00:00。
"""
if now.tzinfo is None:
raise ValueError("now 必须为时区时间")
base = now - relativedelta(months=months)
return base.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
def plan_calls(
endpoint: str,
params: dict | None,
*,
now: datetime | None = None,
tz: ZoneInfo = ROUTING_TZ,
months: int = RECENT_MONTHS,
) -> list[RoutedCall]:
"""
根据 endpoint + params 的时间窗口,返回要调用的 endpoint/params 列表(可能拆分为两段)。
"""
base_params = dict(params or {})
if not base_params:
return [RoutedCall(endpoint=endpoint, params=base_params)]
# 若调用方显式传了 Former 接口,则不二次路由。
if is_former_endpoint(endpoint):
return [RoutedCall(endpoint=endpoint, params=base_params)]
window = extract_window_spec(base_params, tz)
if not window:
return [RoutedCall(endpoint=endpoint, params=base_params)]
former_endpoint = derive_former_endpoint(endpoint)
if former_endpoint is None or former_endpoint == endpoint:
return [RoutedCall(endpoint=endpoint, params=base_params)]
now_dt = (now or datetime.now(tz)).astimezone(tz)
boundary = recent_boundary(now_dt, months=months)
start, end = window.start, window.end
if end <= boundary:
return [RoutedCall(endpoint=former_endpoint, params=base_params)]
if start >= boundary:
return [RoutedCall(endpoint=endpoint, params=base_params)]
# 跨越边界:拆分两段(老数据 -> former新数据 -> recent
p1 = dict(base_params)
p1[window.start_key] = _fmt_dt(start, tz)
p1[window.end_key] = _fmt_dt(boundary, tz)
p2 = dict(base_params)
p2[window.start_key] = _fmt_dt(boundary, tz)
p2[window.end_key] = _fmt_dt(end, tz)
return [RoutedCall(endpoint=former_endpoint, params=p1), RoutedCall(endpoint=endpoint, params=p2)]

View File

@@ -20,6 +20,10 @@ class LocalJsonClient:
if not self.base_dir.exists():
raise FileNotFoundError(f"JSON 目录不存在: {self.base_dir}")
def get_source_hint(self, endpoint: str) -> str:
"""Return the JSON file path for this endpoint (for source_file lineage)."""
return str(self.base_dir / endpoint_to_filename(endpoint))
def iter_paginated(
self,
endpoint: str,

View File

@@ -7,6 +7,7 @@ from pathlib import Path
from typing import Any, Iterable, Tuple
from api.client import APIClient
from api.endpoint_routing import plan_calls
from utils.json_store import dump_json, endpoint_to_filename
@@ -33,6 +34,10 @@ class RecordingAPIClient:
self.last_dump: dict[str, Any] | None = None
# ------------------------------------------------------------------ public API
def get_source_hint(self, endpoint: str) -> str:
"""Return the JSON dump path for this endpoint (for source_file lineage)."""
return str(self.output_dir / endpoint_to_filename(endpoint))
def iter_paginated(
self,
endpoint: str,
@@ -99,11 +104,18 @@ class RecordingAPIClient:
):
filename = endpoint_to_filename(endpoint)
path = self.output_dir / filename
routing_calls = []
try:
for call in plan_calls(endpoint, params):
routing_calls.append({"endpoint": call.endpoint, "params": call.params})
except Exception:
routing_calls = []
payload = {
"task_code": self.task_code,
"run_id": self.run_id,
"endpoint": endpoint,
"params": params or {},
"endpoint_routing": {"calls": routing_calls} if routing_calls else None,
"page_size": page_size,
"pages": pages,
"total_records": total_records,