This commit is contained in:
Neo
2026-01-27 22:47:05 +08:00
parent a6ad343092
commit f5f9a7eb66
476 changed files with 381543 additions and 5819 deletions

View File

@@ -5,6 +5,9 @@ from __future__ import annotations
from dataclasses import dataclass
from datetime import datetime, timedelta
from zoneinfo import ZoneInfo
from dateutil import parser as dtparser
from utils.windowing import build_window_segments, calc_window_minutes
@dataclass(frozen=True)
@@ -49,35 +52,72 @@ class BaseTask:
# ------------------------------------------------------------------ 主流程
def execute(self, cursor_data: dict | None = None) -> dict:
"""统一 orchestrate Extract → Transform → Load"""
context = self._build_context(cursor_data)
base_context = self._build_context(cursor_data)
task_code = self.get_task_code()
self.logger.info(
"%s: 开始执行,窗口[%s ~ %s]",
task_code,
context.window_start,
context.window_end,
segments = build_window_segments(
self.config,
base_context.window_start,
base_context.window_end,
tz=self.tz,
override_only=True,
)
if not segments:
segments = [(base_context.window_start, base_context.window_end)]
try:
extracted = self.extract(context)
transformed = self.transform(extracted, context)
counts = self.load(transformed, context) or {}
self.db.commit()
except Exception:
self.db.rollback()
self.logger.error("%s: 执行失败", task_code, exc_info=True)
raise
total_segments = len(segments)
if total_segments > 1:
self.logger.info("%s: 窗口拆分为 %s", task_code, total_segments)
result = self._build_result("SUCCESS", counts)
total_counts: dict = {}
segment_results: list[dict] = []
for idx, (window_start, window_end) in enumerate(segments, start=1):
context = self._build_context_for_window(window_start, window_end, cursor_data)
self.logger.info(
"%s: 开始执行(%s/%s),窗口[%s ~ %s]",
task_code,
idx,
total_segments,
context.window_start,
context.window_end,
)
try:
extracted = self.extract(context)
transformed = self.transform(extracted, context)
counts = self.load(transformed, context) or {}
self.db.commit()
except Exception:
self.db.rollback()
self.logger.error("%s: 执行失败", task_code, exc_info=True)
raise
self._accumulate_counts(total_counts, counts)
if total_segments > 1:
segment_results.append(
{
"window": {
"start": context.window_start,
"end": context.window_end,
"minutes": context.window_minutes,
},
"counts": counts,
}
)
overall_start = segments[0][0]
overall_end = segments[-1][1]
result = self._build_result("SUCCESS", total_counts)
result["window"] = {
"start": context.window_start,
"end": context.window_end,
"minutes": context.window_minutes,
"start": overall_start,
"end": overall_end,
"minutes": calc_window_minutes(overall_start, overall_end),
}
if segment_results:
result["segments"] = segment_results
self.logger.info("%s: 完成,统计=%s", task_code, result["counts"])
return result
# ------------------------------------------------------------------ 辅助方法
def _build_context(self, cursor_data: dict | None) -> TaskContext:
window_start, window_end, window_minutes = self._get_time_window(cursor_data)
return TaskContext(
@@ -88,10 +128,63 @@ class BaseTask:
cursor=cursor_data,
)
def _build_context_for_window(
self,
window_start: datetime,
window_end: datetime,
cursor_data: dict | None,
) -> TaskContext:
return TaskContext(
store_id=self.config.get("app.store_id"),
window_start=window_start,
window_end=window_end,
window_minutes=calc_window_minutes(window_start, window_end),
cursor=cursor_data,
)
@staticmethod
def _accumulate_counts(total: dict, current: dict) -> dict:
for key, value in (current or {}).items():
if isinstance(value, (int, float)):
total[key] = (total.get(key) or 0) + value
else:
total.setdefault(key, value)
return total
def _get_time_window(self, cursor_data: dict = None) -> tuple:
"""计算时间窗口"""
now = datetime.now(self.tz)
override_start = self.config.get("run.window_override.start")
override_end = self.config.get("run.window_override.end")
if override_start or override_end:
if not (override_start and override_end):
raise ValueError("run.window_override.start/end 需要同时提供")
window_start = override_start
if isinstance(window_start, str):
window_start = dtparser.parse(window_start)
if isinstance(window_start, datetime) and window_start.tzinfo is None:
window_start = window_start.replace(tzinfo=self.tz)
elif isinstance(window_start, datetime):
window_start = window_start.astimezone(self.tz)
window_end = override_end
if isinstance(window_end, str):
window_end = dtparser.parse(window_end)
if isinstance(window_end, datetime) and window_end.tzinfo is None:
window_end = window_end.replace(tzinfo=self.tz)
elif isinstance(window_end, datetime):
window_end = window_end.astimezone(self.tz)
if not isinstance(window_start, datetime) or not isinstance(window_end, datetime):
raise ValueError("run.window_override.start/end 解析失败")
if window_end <= window_start:
raise ValueError("run.window_override.end 必须大于 start")
window_minutes = max(1, int((window_end - window_start).total_seconds() // 60))
return window_start, window_end, window_minutes
idle_start = self.config.get("run.idle_window.start", "04:00")
idle_end = self.config.get("run.idle_window.end", "16:00")
is_idle = self._is_in_idle_window(now, idle_start, idle_end)