合并
This commit is contained in:
224
etl_billiards/scripts/reload_ods_windowed.py
Normal file
224
etl_billiards/scripts/reload_ods_windowed.py
Normal file
@@ -0,0 +1,224 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Reload ODS tasks by fixed time windows with optional sleep between windows.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
import subprocess
|
||||
import sys
|
||||
import time as time_mod
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
from dateutil import parser as dtparser
|
||||
|
||||
PROJECT_ROOT = Path(__file__).resolve().parents[1]
|
||||
if str(PROJECT_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(PROJECT_ROOT))
|
||||
|
||||
from config.settings import AppConfig
|
||||
from utils.windowing import split_window
|
||||
from utils.logging_utils import build_log_path, configure_logging
|
||||
|
||||
MIN_RELOAD_WINDOW_DAYS = 30
|
||||
|
||||
|
||||
def _parse_dt(value: str, tz: ZoneInfo, *, is_end: bool) -> datetime:
|
||||
raw = (value or "").strip()
|
||||
if not raw:
|
||||
raise ValueError("empty datetime")
|
||||
has_time = any(ch in raw for ch in (":", "T"))
|
||||
dt = dtparser.parse(raw)
|
||||
if dt.tzinfo is None:
|
||||
dt = dt.replace(tzinfo=tz)
|
||||
else:
|
||||
dt = dt.astimezone(tz)
|
||||
if not has_time:
|
||||
dt = dt.replace(hour=23 if is_end else 0, minute=59 if is_end else 0, second=59 if is_end else 0, microsecond=0)
|
||||
return dt
|
||||
|
||||
|
||||
def _iter_windows(start: datetime, end: datetime, window_size: timedelta):
|
||||
if window_size.total_seconds() <= 0:
|
||||
raise ValueError("window_size must be > 0")
|
||||
cur = start
|
||||
while cur < end:
|
||||
nxt = min(cur + window_size, end)
|
||||
yield cur, nxt
|
||||
cur = nxt
|
||||
|
||||
|
||||
def _run_task_window(
|
||||
task_code: str,
|
||||
window_start: datetime,
|
||||
window_end: datetime,
|
||||
api_page_size: int,
|
||||
api_timeout: int,
|
||||
logger: logging.Logger,
|
||||
window_split_unit: str | None = "none",
|
||||
window_compensation_hours: int | None = 0,
|
||||
) -> None:
|
||||
cmd = [
|
||||
sys.executable,
|
||||
"-m",
|
||||
"cli.main",
|
||||
"--pipeline-flow",
|
||||
"FULL",
|
||||
"--tasks",
|
||||
task_code,
|
||||
"--window-start",
|
||||
window_start.strftime("%Y-%m-%d %H:%M:%S"),
|
||||
"--window-end",
|
||||
window_end.strftime("%Y-%m-%d %H:%M:%S"),
|
||||
"--force-window-override",
|
||||
"--window-split-unit",
|
||||
str(window_split_unit or "none"),
|
||||
"--window-compensation-hours",
|
||||
str(int(window_compensation_hours or 0)),
|
||||
]
|
||||
if api_page_size > 0:
|
||||
cmd += ["--api-page-size", str(api_page_size)]
|
||||
if api_timeout > 0:
|
||||
cmd += ["--api-timeout", str(api_timeout)]
|
||||
logger.info(
|
||||
"RUN_TASK task=%s window_start=%s window_end=%s",
|
||||
task_code,
|
||||
window_start.isoformat(),
|
||||
window_end.isoformat(),
|
||||
)
|
||||
logger.debug("CMD %s", " ".join(cmd))
|
||||
subprocess.run(cmd, check=True, cwd=str(PROJECT_ROOT))
|
||||
|
||||
|
||||
def main() -> int:
|
||||
ap = argparse.ArgumentParser(description="Reload ODS tasks by window slices.")
|
||||
ap.add_argument("--tasks", required=True, help="comma-separated ODS task codes")
|
||||
ap.add_argument("--start", required=True, help="start datetime, e.g. 2025-07-01")
|
||||
ap.add_argument("--end", default="", help="end datetime (default: now)")
|
||||
ap.add_argument("--window-days", type=int, default=1, help="days per window (default: 1)")
|
||||
ap.add_argument("--window-hours", type=int, default=0, help="hours per window (default: 0)")
|
||||
ap.add_argument("--window-split-unit", default="", help="split unit (month/none), default from config")
|
||||
ap.add_argument("--window-compensation-hours", type=int, default=None, help="window compensation hours, default from config")
|
||||
ap.add_argument("--sleep-seconds", type=float, default=0, help="sleep seconds after each window")
|
||||
ap.add_argument("--api-page-size", type=int, default=200, help="API page size override")
|
||||
ap.add_argument("--api-timeout", type=int, default=20, help="API timeout seconds override")
|
||||
ap.add_argument("--log-file", default="", help="log file path (default: logs/reload_ods_windowed_YYYYMMDD_HHMMSS.log)")
|
||||
ap.add_argument("--log-dir", default="", help="log directory (default: logs)")
|
||||
ap.add_argument("--log-level", default="INFO", help="log level (default: INFO)")
|
||||
ap.add_argument("--no-log-console", action="store_true", help="disable console logging")
|
||||
args = ap.parse_args()
|
||||
|
||||
log_dir = Path(args.log_dir) if args.log_dir else (PROJECT_ROOT / "logs")
|
||||
log_file = Path(args.log_file) if args.log_file else build_log_path(log_dir, "reload_ods_windowed")
|
||||
log_console = not args.no_log_console
|
||||
|
||||
with configure_logging(
|
||||
"reload_ods_windowed",
|
||||
log_file,
|
||||
level=args.log_level,
|
||||
console=log_console,
|
||||
tee_std=True,
|
||||
) as logger:
|
||||
cfg = AppConfig.load({})
|
||||
tz = ZoneInfo(cfg.get("app.timezone", "Asia/Taipei"))
|
||||
|
||||
start = _parse_dt(args.start, tz, is_end=False)
|
||||
end = datetime.now(tz) if not args.end else _parse_dt(args.end, tz, is_end=True)
|
||||
window_days = int(args.window_days)
|
||||
window_hours = int(args.window_hours)
|
||||
split_unit = (args.window_split_unit or cfg.get("run.window_split.unit", "month") or "month").strip()
|
||||
comp_hours = args.window_compensation_hours
|
||||
if comp_hours is None:
|
||||
comp_hours = cfg.get("run.window_split.compensation_hours", 0)
|
||||
|
||||
use_split = split_unit.lower() not in ("", "none", "off", "false", "0")
|
||||
if use_split:
|
||||
windows = split_window(
|
||||
start,
|
||||
end,
|
||||
tz=tz,
|
||||
split_unit=split_unit,
|
||||
compensation_hours=comp_hours,
|
||||
)
|
||||
else:
|
||||
min_hours = MIN_RELOAD_WINDOW_DAYS * 24
|
||||
if window_hours > 0:
|
||||
if window_hours < min_hours:
|
||||
logger.warning(
|
||||
"window_hours=%s too small; adjust to %s",
|
||||
window_hours,
|
||||
min_hours,
|
||||
)
|
||||
window_hours = min_hours
|
||||
elif window_days < MIN_RELOAD_WINDOW_DAYS:
|
||||
logger.warning(
|
||||
"window_days=%s too small; adjust to %s",
|
||||
window_days,
|
||||
MIN_RELOAD_WINDOW_DAYS,
|
||||
)
|
||||
window_days = MIN_RELOAD_WINDOW_DAYS
|
||||
adjusted = split_window(
|
||||
start,
|
||||
end,
|
||||
tz=tz,
|
||||
split_unit="none",
|
||||
compensation_hours=comp_hours,
|
||||
)
|
||||
if adjusted:
|
||||
start, end = adjusted[0]
|
||||
window_size = timedelta(hours=window_hours) if window_hours > 0 else timedelta(days=window_days)
|
||||
windows = list(_iter_windows(start, end, window_size))
|
||||
|
||||
if windows:
|
||||
start, end = windows[0][0], windows[-1][1]
|
||||
|
||||
task_codes = [t.strip().upper() for t in args.tasks.split(",") if t.strip()]
|
||||
if not task_codes:
|
||||
raise SystemExit("no tasks specified")
|
||||
|
||||
logger.info(
|
||||
"START range=%s~%s window_days=%s window_hours=%s split_unit=%s comp_hours=%s sleep=%.2f",
|
||||
start.isoformat(),
|
||||
end.isoformat(),
|
||||
window_days,
|
||||
window_hours,
|
||||
split_unit,
|
||||
comp_hours,
|
||||
args.sleep_seconds,
|
||||
)
|
||||
|
||||
for task_code in task_codes:
|
||||
logger.info("TASK_START task=%s", task_code)
|
||||
for window_start, window_end in windows:
|
||||
start_ts = time_mod.monotonic()
|
||||
_run_task_window(
|
||||
task_code=task_code,
|
||||
window_start=window_start,
|
||||
window_end=window_end,
|
||||
api_page_size=args.api_page_size,
|
||||
api_timeout=args.api_timeout,
|
||||
logger=logger,
|
||||
window_split_unit="none",
|
||||
window_compensation_hours=0,
|
||||
)
|
||||
elapsed = time_mod.monotonic() - start_ts
|
||||
logger.info(
|
||||
"WINDOW_DONE task=%s window_start=%s window_end=%s elapsed=%.2fs",
|
||||
task_code,
|
||||
window_start.isoformat(),
|
||||
window_end.isoformat(),
|
||||
elapsed,
|
||||
)
|
||||
if args.sleep_seconds > 0:
|
||||
logger.debug("SLEEP seconds=%.2f", args.sleep_seconds)
|
||||
time_mod.sleep(args.sleep_seconds)
|
||||
logger.info("TASK_DONE task=%s", task_code)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
Reference in New Issue
Block a user