ETL 完成

This commit is contained in:
Neo
2026-01-18 22:37:38 +08:00
parent 8da6cb6563
commit 7ca19a4a2c
159 changed files with 31225 additions and 467 deletions

View File

@@ -7,6 +7,7 @@ from pathlib import Path
from typing import Any, Iterable, Tuple
from api.client import APIClient
from api.endpoint_routing import plan_calls
from utils.json_store import dump_json, endpoint_to_filename
@@ -33,6 +34,10 @@ class RecordingAPIClient:
self.last_dump: dict[str, Any] | None = None
# ------------------------------------------------------------------ public API
def get_source_hint(self, endpoint: str) -> str:
"""Return the JSON dump path for this endpoint (for source_file lineage)."""
return str(self.output_dir / endpoint_to_filename(endpoint))
def iter_paginated(
self,
endpoint: str,
@@ -99,11 +104,18 @@ class RecordingAPIClient:
):
filename = endpoint_to_filename(endpoint)
path = self.output_dir / filename
routing_calls = []
try:
for call in plan_calls(endpoint, params):
routing_calls.append({"endpoint": call.endpoint, "params": call.params})
except Exception:
routing_calls = []
payload = {
"task_code": self.task_code,
"run_id": self.run_id,
"endpoint": endpoint,
"params": params or {},
"endpoint_routing": {"calls": routing_calls} if routing_calls else None,
"page_size": page_size,
"pages": pages,
"total_records": total_records,