init: 项目初始提交 - NeoZQYY Monorepo 完整代码

This commit is contained in:
Neo
2026-02-15 14:58:14 +08:00
commit ded6dfb9d8
769 changed files with 182616 additions and 0 deletions

View File

@@ -0,0 +1,293 @@
# -*- coding: utf-8 -*-
"""API客户端统一封装 POST/重试/分页与列表提取逻辑。"""
from __future__ import annotations
from typing import Iterable, Sequence, Tuple
import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
from api.endpoint_routing import plan_calls
DEFAULT_BROWSER_HEADERS = {
"Accept": "application/json, text/plain, */*",
"Content-Type": "application/json",
"Origin": "https://pc.ficoo.vip",
"Referer": "https://pc.ficoo.vip/",
"User-Agent": (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36"
),
"Accept-Language": "zh-CN,zh;q=0.9",
"sec-ch-ua": '"Google Chrome";v="141", "Not?A_Brand";v="8", "Chromium";v="141"',
"sec-ch-ua-platform": '"Windows"',
"sec-ch-ua-mobile": "?0",
"sec-fetch-site": "same-origin",
"sec-fetch-mode": "cors",
"sec-fetch-dest": "empty",
"priority": "u=1, i",
"X-Requested-With": "XMLHttpRequest",
"DNT": "1",
}
DEFAULT_LIST_KEYS: Tuple[str, ...] = (
"list",
"rows",
"records",
"items",
"dataList",
"data_list",
"tenantMemberInfos",
"tenantMemberCardLogs",
"tenantMemberCards",
"settleList",
"orderAssistantDetails",
"assistantInfos",
"siteTables",
"taiFeeAdjustInfos",
"siteTableUseDetailsList",
"tenantGoodsList",
"packageCouponList",
"queryDeliveryRecordsList",
"goodsCategoryList",
"orderGoodsList",
"orderGoodsLedgers",
)
class APIClient:
"""HTTP API 客户端(默认使用 POST + JSON 请求体)"""
def __init__(
self,
base_url: str,
token: str | None = None,
timeout: int = 20,
retry_max: int = 3,
headers_extra: dict | None = None,
):
self.base_url = (base_url or "").rstrip("/")
self.token = self._normalize_token(token)
self.timeout = timeout
self.retry_max = retry_max
self.headers_extra = headers_extra or {}
self._session: requests.Session | None = None
# ------------------------------------------------------------------ HTTP 基础
def _get_session(self) -> requests.Session:
"""获取或创建带重试的 Session。"""
if self._session is None:
self._session = requests.Session()
retries = max(0, int(self.retry_max) - 1)
retry = Retry(
total=None,
connect=retries,
read=retries,
status=retries,
allowed_methods=frozenset(["GET", "POST"]),
status_forcelist=(429, 500, 502, 503, 504),
backoff_factor=0.5,
respect_retry_after_header=True,
raise_on_status=False,
)
adapter = HTTPAdapter(max_retries=retry)
self._session.mount("http://", adapter)
self._session.mount("https://", adapter)
self._session.headers.update(self._build_headers())
return self._session
def get(self, endpoint: str, params: dict | None = None) -> dict:
"""
兼容旧名的请求入口(实际以 POST JSON 方式请求)。
"""
return self._post_json(endpoint, params)
def _post_json(self, endpoint: str, payload: dict | None = None) -> dict:
if not self.base_url:
raise ValueError("API base_url 未配置")
url = f"{self.base_url}/{endpoint.lstrip('/')}"
sess = self._get_session()
resp = sess.post(url, json=payload or {}, timeout=self.timeout)
resp.raise_for_status()
data = resp.json()
self._ensure_success(data)
return data
def _build_headers(self) -> dict:
headers = dict(DEFAULT_BROWSER_HEADERS)
headers.update(self.headers_extra)
if self.token:
headers["Authorization"] = self.token
return headers
@staticmethod
def _normalize_token(token: str | None) -> str | None:
if not token:
return None
t = str(token).strip()
if not t.lower().startswith("bearer "):
t = f"Bearer {t}"
return t
@staticmethod
def _ensure_success(payload: dict):
"""API 返回 code 非 0 时主动抛错,便于上层重试/记录。"""
if isinstance(payload, dict) and "code" in payload:
code = payload.get("code")
if code not in (0, "0", None):
msg = payload.get("msg") or payload.get("message") or ""
raise ValueError(f"API 返回错误 code={code} msg={msg}")
# ------------------------------------------------------------------ 分页
def _iter_paginated_single(
self,
endpoint: str,
params: dict | None,
page_size: int | None = 200,
page_field: str = "page",
size_field: str = "limit",
data_path: tuple = ("data",),
list_key: str | Sequence[str] | None = None,
page_start: int = 1,
page_end: int | None = None,
) -> Iterable[tuple[int, list, dict, dict]]:
"""
单一 endpoint 的分页迭代器(不包含 recent/former 路由逻辑)。
"""
base_params = dict(params or {})
page = page_start
while True:
page_params = dict(base_params)
if page_size is not None:
page_params[page_field] = page
page_params[size_field] = page_size
payload = self._post_json(endpoint, page_params)
records = self._extract_list(payload, data_path, list_key)
yield page, records, page_params, payload
if page_size is None:
break
if page_end is not None and page >= page_end:
break
if len(records) < (page_size or 0):
break
if len(records) == 0:
break
page += 1
def iter_paginated(
self,
endpoint: str,
params: dict | None,
page_size: int | None = 200,
page_field: str = "page",
size_field: str = "limit",
data_path: tuple = ("data",),
list_key: str | Sequence[str] | None = None,
page_start: int = 1,
page_end: int | None = None,
) -> Iterable[tuple[int, list, dict, dict]]:
"""
分页迭代器:逐页拉取数据并产出 (page_no, records, request_params, raw_response)。
page_size=None 时不附带分页参数,仅拉取一次。
"""
# recent/former 路由:当 params 带时间范围字段时按“3个月自然月”边界决定走哪个 endpoint
# 跨越边界则拆分为两段请求并顺序产出,确保调用方使用 page_no 命名文件时不会被覆盖。
call_plan = plan_calls(endpoint, params)
global_page = 1
for call in call_plan:
for _, records, request_params, payload in self._iter_paginated_single(
endpoint=call.endpoint,
params=call.params,
page_size=page_size,
page_field=page_field,
size_field=size_field,
data_path=data_path,
list_key=list_key,
page_start=page_start,
page_end=page_end,
):
yield global_page, records, request_params, payload
global_page += 1
def get_paginated(
self,
endpoint: str,
params: dict,
page_size: int | None = 200,
page_field: str = "page",
size_field: str = "limit",
data_path: tuple = ("data",),
list_key: str | Sequence[str] | None = None,
page_start: int = 1,
page_end: int | None = None,
) -> tuple[list, list]:
"""分页获取数据并将所有记录汇总在一个列表中。"""
records, pages_meta = [], []
for page_no, page_records, request_params, response in self.iter_paginated(
endpoint=endpoint,
params=params,
page_size=page_size,
page_field=page_field,
size_field=size_field,
data_path=data_path,
list_key=list_key,
page_start=page_start,
page_end=page_end,
):
records.extend(page_records)
pages_meta.append(
{"page": page_no, "request": request_params, "response": response}
)
return records, pages_meta
# ------------------------------------------------------------------ 响应解析
@classmethod
def _extract_list(
cls, payload: dict | list, data_path: tuple, list_key: str | Sequence[str] | None
) -> list:
"""根据 data_path/list_key 提取列表结构,兼容常见字段名。"""
cur: object = payload
if isinstance(cur, list):
return cur
for key in data_path:
if isinstance(cur, dict):
cur = cur.get(key)
else:
cur = None
if cur is None:
break
if isinstance(cur, list):
return cur
if isinstance(cur, dict):
if list_key:
keys = (list_key,) if isinstance(list_key, str) else tuple(list_key)
for k in keys:
if isinstance(cur.get(k), list):
return cur[k]
for k in DEFAULT_LIST_KEYS:
if isinstance(cur.get(k), list):
return cur[k]
for v in cur.values():
if isinstance(v, list):
return v
return []

View File

@@ -0,0 +1,166 @@
# -*- coding: utf-8 -*-
"""
“近期记录 / 历史记录(Former)”接口路由规则。
需求:
- 当请求参数包含可定义时间范围的字段时,根据当前时间(北京时间/上海时区)判断:
- 3个月自然月之前 -> 使用“历史记录”接口
- 3个月以内 -> 使用“近期记录”接口
- 若时间范围跨越边界 -> 拆分为两段分别请求并合并(由上层分页迭代器顺序产出)
"""
from __future__ import annotations
from dataclasses import dataclass
from datetime import datetime
from typing import Optional
from dateutil import parser as dtparser
from dateutil.relativedelta import relativedelta
from zoneinfo import ZoneInfo
ROUTING_TZ = ZoneInfo("Asia/Shanghai")
RECENT_MONTHS = 3
# 按 `fetch-test/recent_vs_former_report.md` 更新(“无”表示没有历史接口;相同 path 表示同一个接口可查历史)
RECENT_TO_FORMER_OVERRIDES: dict[str, str | None] = {
"/AssistantPerformance/GetAbolitionAssistant": None,
"/Site/GetSiteTableUseDetails": "/Site/GetSiteTableUseDetails",
"/GoodsStockManage/QueryGoodsOutboundReceipt": "/GoodsStockManage/QueryFormerGoodsOutboundReceipt",
"/Promotion/GetOfflineCouponConsumePageList": "/Promotion/GetOfflineCouponConsumePageList",
"/Order/GetRefundPayLogList": None,
# 已知特殊
"/Site/GetAllOrderSettleList": "/Site/GetFormerOrderSettleList",
"/PayLog/GetPayLogListPage": "/PayLog/GetFormerPayLogListPage",
}
TIME_WINDOW_KEYS: tuple[tuple[str, str], ...] = (
("startTime", "endTime"),
("rangeStartTime", "rangeEndTime"),
("StartPayTime", "EndPayTime"),
)
@dataclass(frozen=True)
class WindowSpec:
start_key: str
end_key: str
start: datetime
end: datetime
@dataclass(frozen=True)
class RoutedCall:
endpoint: str
params: dict
def is_former_endpoint(endpoint: str) -> bool:
return "Former" in str(endpoint or "")
def _parse_dt(value: object, tz: ZoneInfo) -> datetime | None:
if value is None:
return None
s = str(value).strip()
if not s:
return None
dt = dtparser.parse(s)
if dt.tzinfo is None:
return dt.replace(tzinfo=tz)
return dt.astimezone(tz)
def _fmt_dt(dt: datetime, tz: ZoneInfo) -> str:
return dt.astimezone(tz).strftime("%Y-%m-%d %H:%M:%S")
def extract_window_spec(params: dict | None, tz: ZoneInfo = ROUTING_TZ) -> WindowSpec | None:
if not isinstance(params, dict) or not params:
return None
for start_key, end_key in TIME_WINDOW_KEYS:
if start_key in params or end_key in params:
start = _parse_dt(params.get(start_key), tz)
end = _parse_dt(params.get(end_key), tz)
if start and end:
return WindowSpec(start_key=start_key, end_key=end_key, start=start, end=end)
return None
def derive_former_endpoint(recent_endpoint: str) -> str | None:
endpoint = str(recent_endpoint or "").strip()
if not endpoint:
return None
if endpoint in RECENT_TO_FORMER_OVERRIDES:
return RECENT_TO_FORMER_OVERRIDES[endpoint]
if is_former_endpoint(endpoint):
return endpoint
idx = endpoint.find("Get")
if idx == -1:
return endpoint
return f"{endpoint[:idx]}GetFormer{endpoint[idx + 3:]}"
def recent_boundary(now: datetime, months: int = RECENT_MONTHS) -> datetime:
"""
3个月自然月边界取 (now - months) 所在月份的 1 号 00:00:00。
"""
if now.tzinfo is None:
raise ValueError("now 必须为时区时间")
base = now - relativedelta(months=months)
return base.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
def plan_calls(
endpoint: str,
params: dict | None,
*,
now: datetime | None = None,
tz: ZoneInfo = ROUTING_TZ,
months: int = RECENT_MONTHS,
) -> list[RoutedCall]:
"""
根据 endpoint + params 的时间窗口,返回要调用的 endpoint/params 列表(可能拆分为两段)。
"""
base_params = dict(params or {})
if not base_params:
return [RoutedCall(endpoint=endpoint, params=base_params)]
# 若调用方显式传了 Former 接口,则不二次路由。
if is_former_endpoint(endpoint):
return [RoutedCall(endpoint=endpoint, params=base_params)]
window = extract_window_spec(base_params, tz)
if not window:
return [RoutedCall(endpoint=endpoint, params=base_params)]
former_endpoint = derive_former_endpoint(endpoint)
if former_endpoint is None or former_endpoint == endpoint:
return [RoutedCall(endpoint=endpoint, params=base_params)]
now_dt = (now or datetime.now(tz)).astimezone(tz)
boundary = recent_boundary(now_dt, months=months)
start, end = window.start, window.end
if end <= boundary:
return [RoutedCall(endpoint=former_endpoint, params=base_params)]
if start >= boundary:
return [RoutedCall(endpoint=endpoint, params=base_params)]
# 跨越边界:拆分两段(老数据 -> former新数据 -> recent
p1 = dict(base_params)
p1[window.start_key] = _fmt_dt(start, tz)
p1[window.end_key] = _fmt_dt(boundary, tz)
p2 = dict(base_params)
p2[window.start_key] = _fmt_dt(boundary, tz)
p2[window.end_key] = _fmt_dt(end, tz)
return [RoutedCall(endpoint=former_endpoint, params=p1), RoutedCall(endpoint=endpoint, params=p2)]

View File

@@ -0,0 +1,78 @@
# -*- coding: utf-8 -*-
"""本地 JSON 客户端,模拟 APIClient 的分页接口,从落盘的 JSON 回放数据。"""
from __future__ import annotations
import json
from pathlib import Path
from typing import Iterable, Tuple
from api.client import APIClient
from utils.json_store import endpoint_to_filename
class LocalJsonClient:
"""
读取 RecordingAPIClient 生成的 JSON提供 iter_paginated/get_paginated 接口。
"""
def __init__(self, base_dir: str | Path):
self.base_dir = Path(base_dir)
if not self.base_dir.exists():
raise FileNotFoundError(f"JSON 目录不存在: {self.base_dir}")
def get_source_hint(self, endpoint: str) -> str:
"""Return the JSON file path for this endpoint (for source_file lineage)."""
return str(self.base_dir / endpoint_to_filename(endpoint))
def iter_paginated(
self,
endpoint: str,
params: dict | None,
page_size: int = 200,
page_field: str = "page",
size_field: str = "limit",
data_path: tuple = ("data",),
list_key: str | None = None,
) -> Iterable[Tuple[int, list, dict, dict]]:
file_path = self.base_dir / endpoint_to_filename(endpoint)
if not file_path.exists():
raise FileNotFoundError(f"未找到匹配的 JSON 文件: {file_path}")
with file_path.open("r", encoding="utf-8") as fp:
payload = json.load(fp)
pages = payload.get("pages")
if not isinstance(pages, list) or not pages:
pages = [{"page": 1, "request": params or {}, "response": payload}]
for idx, page in enumerate(pages, start=1):
response = page.get("response", {})
request_params = page.get("request") or {}
page_no = page.get("page") or idx
records = APIClient._extract_list(response, data_path, list_key) # type: ignore[attr-defined]
yield page_no, records, request_params, response
def get_paginated(
self,
endpoint: str,
params: dict,
page_size: int = 200,
page_field: str = "page",
size_field: str = "limit",
data_path: tuple = ("data",),
list_key: str | None = None,
) -> tuple[list, list]:
records: list = []
pages_meta: list = []
for page_no, page_records, request_params, response in self.iter_paginated(
endpoint=endpoint,
params=params,
page_size=page_size,
page_field=page_field,
size_field=size_field,
data_path=data_path,
list_key=list_key,
):
records.extend(page_records)
pages_meta.append({"page": page_no, "request": request_params, "response": response})
return records, pages_meta

View File

@@ -0,0 +1,195 @@
# -*- coding: utf-8 -*-
"""包装 APIClient将分页响应落盘便于后续本地清洗。"""
from __future__ import annotations
from datetime import datetime
from pathlib import Path
import time
from typing import Any, Iterable, Tuple
from zoneinfo import ZoneInfo
from api.client import APIClient
from api.endpoint_routing import plan_calls
from utils.json_store import dump_json, endpoint_to_filename
class RecordingAPIClient:
"""
代理 APIClient在调用 iter_paginated/get_paginated 时同时把响应写入 JSON 文件。
文件名根据 endpoint 生成,写入到指定 output_dir。
"""
def __init__(
self,
base_client: APIClient,
output_dir: Path | str,
task_code: str,
run_id: int,
write_pretty: bool = False,
):
self.base = base_client
self.output_dir = Path(output_dir)
self.output_dir.mkdir(parents=True, exist_ok=True)
self.task_code = task_code
self.run_id = run_id
self.write_pretty = write_pretty
self.last_dump: dict[str, Any] | None = None
# ------------------------------------------------------------------ 公共 API
def get_source_hint(self, endpoint: str) -> str:
"""Return the JSON dump path for this endpoint (for source_file lineage)."""
return str(self.output_dir / endpoint_to_filename(endpoint))
def iter_paginated(
self,
endpoint: str,
params: dict | None,
page_size: int = 200,
page_field: str = "page",
size_field: str = "limit",
data_path: tuple = ("data",),
list_key: str | None = None,
) -> Iterable[Tuple[int, list, dict, dict]]:
pages: list[dict[str, Any]] = []
total_records = 0
for page_no, records, request_params, response in self.base.iter_paginated(
endpoint=endpoint,
params=params,
page_size=page_size,
page_field=page_field,
size_field=size_field,
data_path=data_path,
list_key=list_key,
):
pages.append({"page": page_no, "request": request_params, "response": response})
total_records += len(records)
yield page_no, records, request_params, response
self._dump(endpoint, params, page_size, pages, total_records)
def get_paginated(
self,
endpoint: str,
params: dict,
page_size: int = 200,
page_field: str = "page",
size_field: str = "limit",
data_path: tuple = ("data",),
list_key: str | None = None,
) -> tuple[list, list]:
records: list = []
pages_meta: list = []
for page_no, page_records, request_params, response in self.iter_paginated(
endpoint=endpoint,
params=params,
page_size=page_size,
page_field=page_field,
size_field=size_field,
data_path=data_path,
list_key=list_key,
):
records.extend(page_records)
pages_meta.append({"page": page_no, "request": request_params, "response": response})
return records, pages_meta
# ------------------------------------------------------------------ 内部方法
def _dump(
self,
endpoint: str,
params: dict | None,
page_size: int,
pages: list[dict[str, Any]],
total_records: int,
):
filename = endpoint_to_filename(endpoint)
path = self.output_dir / filename
routing_calls = []
try:
for call in plan_calls(endpoint, params):
routing_calls.append({"endpoint": call.endpoint, "params": call.params})
except Exception:
routing_calls = []
payload = {
"task_code": self.task_code,
"run_id": self.run_id,
"endpoint": endpoint,
"params": params or {},
"endpoint_routing": {"calls": routing_calls} if routing_calls else None,
"page_size": page_size,
"pages": pages,
"total_records": total_records,
"dumped_at": datetime.utcnow().isoformat() + "Z",
}
dump_json(path, payload, pretty=self.write_pretty)
self.last_dump = {
"file": str(path),
"endpoint": endpoint,
"pages": len(pages),
"records": total_records,
}
def _cfg_get(cfg, key: str, default=None):
if isinstance(cfg, dict):
cur = cfg
for part in key.split("."):
if not isinstance(cur, dict) or part not in cur:
return default
cur = cur[part]
return cur
getter = getattr(cfg, "get", None)
if callable(getter):
return getter(key, default)
return default
def build_recording_client(
cfg,
*,
task_code: str,
output_dir: Path | str | None = None,
run_id: int | None = None,
write_pretty: bool | None = None,
):
"""Build RecordingAPIClient from AppConfig or dict config."""
base_client = APIClient(
base_url=_cfg_get(cfg, "api.base_url") or "",
token=_cfg_get(cfg, "api.token"),
timeout=int(_cfg_get(cfg, "api.timeout_sec", 20) or 20),
retry_max=int(_cfg_get(cfg, "api.retries.max_attempts", 3) or 3),
headers_extra=_cfg_get(cfg, "api.headers_extra") or {},
)
if write_pretty is None:
write_pretty = bool(_cfg_get(cfg, "io.write_pretty_json", False))
if run_id is None:
run_id = int(time.time())
if output_dir is None:
# CHANGE [2026-02-14] intent: 默认时区从 Asia/Taipei 修正为 Asia/Shanghai与运营地区一致
tz_name = _cfg_get(cfg, "app.timezone", "Asia/Shanghai") or "Asia/Shanghai"
tz = ZoneInfo(tz_name)
ts = datetime.now(tz).strftime("%Y%m%d-%H%M%S")
fetch_root = _cfg_get(cfg, "pipeline.fetch_root") or _cfg_get(cfg, "io.export_root") or "export/JSON"
task_upper = str(task_code).upper()
output_dir = Path(fetch_root) / task_upper / f"{task_upper}-{run_id}-{ts}"
return RecordingAPIClient(
base_client=base_client,
output_dir=output_dir,
task_code=str(task_code),
run_id=int(run_id),
write_pretty=bool(write_pretty),
)
# AI_CHANGELOG:
# - 日期: 2026-02-14
# - Prompt: P20260214-040231审计收口补录
# - 直接原因: 默认时区 Asia/Taipei 与运营地区(中国大陆)不符
# - 变更摘要: build_recording_client 默认时区从 Asia/Taipei 改为 Asia/Shanghai
# - 风险与验证: 极低风险,两时区当前 UTC 偏移相同(+08:00