This commit is contained in:
Neo
2026-01-27 22:45:50 +08:00
parent a6ad343092
commit 4c192e921c
476 changed files with 381543 additions and 5819 deletions

View File

@@ -8,6 +8,8 @@ import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
from api.endpoint_routing import plan_calls
DEFAULT_BROWSER_HEADERS = {
"Accept": "application/json, text/plain, */*",
"Content-Type": "application/json",
@@ -142,7 +144,7 @@ class APIClient:
raise ValueError(f"API 返回错误 code={code} msg={msg}")
# ------------------------------------------------------------------ 分页
def iter_paginated(
def _iter_paginated_single(
self,
endpoint: str,
params: dict | None,
@@ -155,8 +157,7 @@ class APIClient:
page_end: int | None = None,
) -> Iterable[tuple[int, list, dict, dict]]:
"""
分页迭代器:逐页拉取数据并产出 (page_no, records, request_params, raw_response)
page_size=None 时不附带分页参数,仅拉取一次。
单一 endpoint 的分页迭代器(不包含 recent/former 路由逻辑)
"""
base_params = dict(params or {})
page = page_start
@@ -183,6 +184,42 @@ class APIClient:
page += 1
def iter_paginated(
self,
endpoint: str,
params: dict | None,
page_size: int | None = 200,
page_field: str = "page",
size_field: str = "limit",
data_path: tuple = ("data",),
list_key: str | Sequence[str] | None = None,
page_start: int = 1,
page_end: int | None = None,
) -> Iterable[tuple[int, list, dict, dict]]:
"""
分页迭代器:逐页拉取数据并产出 (page_no, records, request_params, raw_response)。
page_size=None 时不附带分页参数,仅拉取一次。
"""
# recent/former 路由:当 params 带时间范围字段时按“3个月自然月”边界决定走哪个 endpoint
# 跨越边界则拆分为两段请求并顺序产出,确保调用方使用 page_no 命名文件时不会被覆盖。
call_plan = plan_calls(endpoint, params)
global_page = 1
for call in call_plan:
for _, records, request_params, payload in self._iter_paginated_single(
endpoint=call.endpoint,
params=call.params,
page_size=page_size,
page_field=page_field,
size_field=size_field,
data_path=data_path,
list_key=list_key,
page_start=page_start,
page_end=page_end,
):
yield global_page, records, request_params, payload
global_page += 1
def get_paginated(
self,
endpoint: str,