合并
This commit is contained in:
91
etl_billiards/utils/logging_utils.py
Normal file
91
etl_billiards/utils/logging_utils.py
Normal file
@@ -0,0 +1,91 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import sys
|
||||
from contextlib import contextmanager
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Iterator, TextIO
|
||||
|
||||
|
||||
class TeeStream:
|
||||
def __init__(self, *streams: TextIO) -> None:
|
||||
self._streams = streams
|
||||
|
||||
def write(self, data: str) -> int:
|
||||
for stream in self._streams:
|
||||
stream.write(data)
|
||||
return len(data)
|
||||
|
||||
def flush(self) -> None:
|
||||
for stream in self._streams:
|
||||
stream.flush()
|
||||
|
||||
def isatty(self) -> bool:
|
||||
return False
|
||||
|
||||
def fileno(self) -> int:
|
||||
return self._streams[0].fileno()
|
||||
|
||||
|
||||
def build_log_path(log_dir: Path, prefix: str, tag: str = "") -> Path:
|
||||
suffix = f"_{tag}" if tag else ""
|
||||
stamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
return log_dir / f"{prefix}{suffix}_{stamp}.log"
|
||||
|
||||
|
||||
@contextmanager
|
||||
def configure_logging(
|
||||
name: str,
|
||||
log_file: Path | None,
|
||||
*,
|
||||
level: str = "INFO",
|
||||
console: bool = True,
|
||||
tee_std: bool = True,
|
||||
) -> Iterator[logging.Logger]:
|
||||
logger = logging.getLogger(name)
|
||||
logger.handlers.clear()
|
||||
logger.setLevel(getattr(logging, level.upper(), logging.INFO))
|
||||
logger.propagate = False
|
||||
|
||||
formatter = logging.Formatter(
|
||||
"%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
||||
"%Y-%m-%d %H:%M:%S",
|
||||
)
|
||||
|
||||
original_stdout = sys.stdout
|
||||
original_stderr = sys.stderr
|
||||
log_fp: TextIO | None = None
|
||||
|
||||
try:
|
||||
if log_file:
|
||||
log_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
log_fp = open(log_file, "a", encoding="utf-8", buffering=1)
|
||||
if tee_std:
|
||||
if console:
|
||||
sys.stdout = TeeStream(original_stdout, log_fp)
|
||||
sys.stderr = TeeStream(original_stderr, log_fp)
|
||||
else:
|
||||
sys.stdout = log_fp
|
||||
sys.stderr = log_fp
|
||||
file_handler = logging.StreamHandler(log_fp)
|
||||
file_handler.setFormatter(formatter)
|
||||
logger.addHandler(file_handler)
|
||||
|
||||
if console:
|
||||
console_handler = logging.StreamHandler(original_stdout)
|
||||
console_handler.setFormatter(formatter)
|
||||
logger.addHandler(console_handler)
|
||||
|
||||
yield logger
|
||||
finally:
|
||||
for handler in list(logger.handlers):
|
||||
handler.flush()
|
||||
handler.close()
|
||||
logger.removeHandler(handler)
|
||||
if log_fp:
|
||||
log_fp.flush()
|
||||
log_fp.close()
|
||||
sys.stdout = original_stdout
|
||||
sys.stderr = original_stderr
|
||||
97
etl_billiards/utils/windowing.py
Normal file
97
etl_billiards/utils/windowing.py
Normal file
@@ -0,0 +1,97 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Time window helpers for ETL and validation tasks."""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timedelta, time
|
||||
from typing import List, Tuple
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
|
||||
def _ensure_tz(dt: datetime, tz: ZoneInfo | None) -> datetime:
|
||||
if tz is None:
|
||||
return dt
|
||||
if dt.tzinfo is None:
|
||||
return dt.replace(tzinfo=tz)
|
||||
return dt.astimezone(tz)
|
||||
|
||||
|
||||
def _next_month_start(dt: datetime, tz: ZoneInfo | None) -> datetime:
|
||||
year = dt.year
|
||||
month = dt.month
|
||||
if month == 12:
|
||||
year += 1
|
||||
month = 1
|
||||
else:
|
||||
month += 1
|
||||
return datetime(year, month, 1, tzinfo=tz)
|
||||
|
||||
|
||||
def calc_window_minutes(start: datetime, end: datetime) -> int:
|
||||
if end <= start:
|
||||
return 0
|
||||
return max(1, int((end - start).total_seconds() // 60))
|
||||
|
||||
|
||||
def split_window(
|
||||
start: datetime,
|
||||
end: datetime,
|
||||
*,
|
||||
tz: ZoneInfo | None,
|
||||
split_unit: str | None,
|
||||
compensation_hours: int | float | None,
|
||||
) -> List[Tuple[datetime, datetime]]:
|
||||
start = _ensure_tz(start, tz)
|
||||
end = _ensure_tz(end, tz)
|
||||
|
||||
comp = int(compensation_hours or 0)
|
||||
if comp:
|
||||
start = start - timedelta(hours=comp)
|
||||
end = end + timedelta(hours=comp)
|
||||
|
||||
if end <= start:
|
||||
return []
|
||||
|
||||
unit = (split_unit or "").strip().lower()
|
||||
if unit in ("", "none", "off", "false", "0"):
|
||||
return [(start, end)]
|
||||
|
||||
if unit not in ("month", "monthly"):
|
||||
return [(start, end)]
|
||||
|
||||
windows: List[Tuple[datetime, datetime]] = []
|
||||
cur = start
|
||||
while cur < end:
|
||||
boundary = _next_month_start(cur, tz)
|
||||
nxt = boundary if boundary < end else end
|
||||
if nxt <= cur:
|
||||
break
|
||||
windows.append((cur, nxt))
|
||||
cur = nxt
|
||||
return windows
|
||||
|
||||
|
||||
def build_window_segments(
|
||||
cfg,
|
||||
start: datetime,
|
||||
end: datetime,
|
||||
*,
|
||||
tz: ZoneInfo | None,
|
||||
override_only: bool,
|
||||
) -> List[Tuple[datetime, datetime]]:
|
||||
split_unit = cfg.get("run.window_split.unit", "month")
|
||||
compensation_hours = cfg.get("run.window_split.compensation_hours", 0)
|
||||
|
||||
if override_only:
|
||||
override_start = cfg.get("run.window_override.start")
|
||||
override_end = cfg.get("run.window_override.end")
|
||||
if not (override_start and override_end):
|
||||
split_unit = "none"
|
||||
compensation_hours = 0
|
||||
|
||||
return split_window(
|
||||
start,
|
||||
end,
|
||||
tz=tz,
|
||||
split_unit=split_unit,
|
||||
compensation_hours=compensation_hours,
|
||||
)
|
||||
Reference in New Issue
Block a user