改相对路径完成客户端

2026-01-27 22:14:01 +08:00
parent 04c064793a
commit 9f8976e75a
292 changed files with 307062 additions and 678 deletions
--- a/etl_billiards/scripts/check_ods_gaps.py
+++ b/etl_billiards/scripts/check_ods_gaps.py
@@ -34,6 +34,7 @@ from database.connection import DatabaseConnection
 from models.parsers import TypeParser
 from tasks.ods_tasks import ENABLED_ODS_CODES, ODS_TASK_SPECS
 from utils.logging_utils import build_log_path, configure_logging
+from utils.windowing import split_window

 DEFAULT_START = "2025-07-01"
 MIN_COMPLETENESS_WINDOW_DAYS = 30
@@ -214,8 +215,7 @@ def _check_spec(
    store_id: int,
    start: datetime | None,
    end: datetime | None,
-    window_days: int,
-    window_hours: int,
+    windows: list[tuple[datetime, datetime]] | None,
    page_size: int,
    chunk_size: int,
    sample_limit: int,
@@ -249,8 +249,7 @@ def _check_spec(
            result["errors"] = 1
            result["error_detail"] = "missing start/end for windowed endpoint"
            return result
-        window_size = timedelta(hours=window_hours) if window_hours > 0 else timedelta(days=window_days)
-        windows = list(_iter_windows(start, end, window_size))
+        windows = list(windows or [(start, end)])
    else:
        windows = [(None, None)]

@@ -377,8 +376,7 @@ def _check_settlement_tickets(
    store_id: int,
    start: datetime | None,
    end: datetime | None,
-    window_days: int,
-    window_hours: int,
+    windows: list[tuple[datetime, datetime]] | None,
    page_size: int,
    chunk_size: int,
    sample_limit: int,
@@ -415,8 +413,7 @@ def _check_settlement_tickets(
    missing_seen: set[tuple] = set()
    pay_endpoint = "/PayLog/GetPayLogListPage"

-    window_size = timedelta(hours=window_hours) if window_hours > 0 else timedelta(days=window_days)
-    windows = list(_iter_windows(start, end, window_size))
+    windows = list(windows or [(start, end)])
    logger.info(
        "CHECK_START task=%s table=%s windows=%s start=%s end=%s",
        result["task_code"],
@@ -571,6 +568,222 @@ def _resolve_window_from_cutoff(
    return start, now, cutoff


+def run_gap_check(
+    *,
+    cfg: AppConfig | None,
+    start: datetime | str | None,
+    end: datetime | str | None,
+    window_days: int,
+    window_hours: int,
+    page_size: int,
+    chunk_size: int,
+    sample_limit: int,
+    sleep_per_window: float,
+    sleep_per_page: float,
+    task_codes: str,
+    from_cutoff: bool,
+    cutoff_overlap_hours: int,
+    allow_small_window: bool,
+    logger: logging.Logger,
+    window_split_unit: str | None = None,
+    window_compensation_hours: int | None = None,
+) -> dict:
+    cfg = cfg or AppConfig.load({})
+    tz = ZoneInfo(cfg.get("app.timezone", "Asia/Taipei"))
+    store_id = int(cfg.get("app.store_id") or 0)
+
+    if not cfg.get("api.token"):
+        raise ValueError("missing api.token; please set API_TOKEN in .env")
+
+    window_days = int(window_days)
+    window_hours = int(window_hours)
+    split_unit = (window_split_unit or cfg.get("run.window_split.unit", "month") or "month").strip()
+    comp_hours = window_compensation_hours
+    if comp_hours is None:
+        comp_hours = cfg.get("run.window_split.compensation_hours", 0)
+
+    use_split = split_unit.lower() not in ("", "none", "off", "false", "0")
+    if not use_split and not from_cutoff and not allow_small_window:
+        min_hours = MIN_COMPLETENESS_WINDOW_DAYS * 24
+        if window_hours > 0:
+            if window_hours < min_hours:
+                logger.warning(
+                    "window_hours=%s too small for completeness check; adjust to %s",
+                    window_hours,
+                    min_hours,
+                )
+                window_hours = min_hours
+        elif window_days < MIN_COMPLETENESS_WINDOW_DAYS:
+            logger.warning(
+                "window_days=%s too small for completeness check; adjust to %s",
+                window_days,
+                MIN_COMPLETENESS_WINDOW_DAYS,
+            )
+            window_days = MIN_COMPLETENESS_WINDOW_DAYS
+
+    cutoff = None
+    if from_cutoff:
+        db_tmp = DatabaseConnection(dsn=cfg["db"]["dsn"], session=cfg["db"].get("session"))
+        ods_tables = [s.table_name for s in ODS_TASK_SPECS if s.code in ENABLED_ODS_CODES]
+        start, end, cutoff = _resolve_window_from_cutoff(
+            conn=db_tmp.conn,
+            ods_tables=ods_tables,
+            tz=tz,
+            overlap_hours=cutoff_overlap_hours,
+        )
+        db_tmp.close()
+    else:
+        if not start:
+            start = DEFAULT_START
+        if not end:
+            end = datetime.now(tz)
+        if isinstance(start, str):
+            start = _parse_dt(start, tz, is_end=False)
+        if isinstance(end, str):
+            end = _parse_dt(end, tz, is_end=True)
+
+        
+    windows = None
+    if use_split:
+        windows = split_window(
+            start,
+            end,
+            tz=tz,
+            split_unit=split_unit,
+            compensation_hours=comp_hours,
+        )
+    else:
+        adjusted = split_window(
+            start,
+            end,
+            tz=tz,
+            split_unit="none",
+            compensation_hours=comp_hours,
+        )
+        if adjusted:
+            start, end = adjusted[0]
+        window_size = timedelta(hours=window_hours) if window_hours > 0 else timedelta(days=window_days)
+        windows = list(_iter_windows(start, end, window_size))
+
+    if windows:
+        start, end = windows[0][0], windows[-1][1]
+
+    logger.info(
+        "START range=%s~%s window_days=%s window_hours=%s split_unit=%s comp_hours=%s page_size=%s chunk_size=%s",
+        start.isoformat() if isinstance(start, datetime) else None,
+        end.isoformat() if isinstance(end, datetime) else None,
+        window_days,
+        window_hours,
+        split_unit,
+        comp_hours,
+        page_size,
+        chunk_size,
+    )
+    if cutoff:
+        logger.info("CUTOFF=%s overlap_hours=%s", cutoff.isoformat(), cutoff_overlap_hours)
+
+    client = APIClient(
+        base_url=cfg["api"]["base_url"],
+        token=cfg["api"]["token"],
+        timeout=int(cfg["api"].get("timeout_sec") or 20),
+        retry_max=int(cfg["api"].get("retries", {}).get("max_attempts") or 3),
+        headers_extra=cfg["api"].get("headers_extra") or {},
+    )
+
+    db_conn = DatabaseConnection(dsn=cfg["db"]["dsn"], session=cfg["db"].get("session"))
+    try:
+        db_conn.conn.rollback()
+    except Exception:
+        pass
+    db_conn.conn.autocommit = True
+    try:
+        task_filter = {t.strip().upper() for t in (task_codes or "").split(",") if t.strip()}
+        specs = [s for s in ODS_TASK_SPECS if s.code in ENABLED_ODS_CODES]
+        if task_filter:
+            specs = [s for s in specs if s.code in task_filter]
+
+        results: list[dict] = []
+        for spec in specs:
+            if spec.code == "ODS_SETTLEMENT_TICKET":
+                continue
+            result = _check_spec(
+                client=client,
+                db_conn=db_conn.conn,
+                cfg=cfg,
+                tz=tz,
+                logger=logger,
+                spec=spec,
+                store_id=store_id,
+                start=start,
+                end=end,
+                windows=windows,
+                page_size=page_size,
+                chunk_size=chunk_size,
+                sample_limit=sample_limit,
+                sleep_per_window=sleep_per_window,
+                sleep_per_page=sleep_per_page,
+            )
+            results.append(result)
+            logger.info(
+                "CHECK_DONE task=%s missing=%s records=%s errors=%s",
+                result.get("task_code"),
+                result.get("missing"),
+                result.get("records"),
+                result.get("errors"),
+            )
+
+        if (not task_filter) or ("ODS_SETTLEMENT_TICKET" in task_filter):
+            ticket_result = _check_settlement_tickets(
+                client=client,
+                db_conn=db_conn.conn,
+                cfg=cfg,
+                tz=tz,
+                logger=logger,
+                store_id=store_id,
+                start=start,
+                end=end,
+                windows=windows,
+                page_size=page_size,
+                chunk_size=chunk_size,
+                sample_limit=sample_limit,
+                sleep_per_window=sleep_per_window,
+                sleep_per_page=sleep_per_page,
+            )
+            results.append(ticket_result)
+            logger.info(
+                "CHECK_DONE task=%s missing=%s records=%s errors=%s",
+                ticket_result.get("task_code"),
+                ticket_result.get("missing"),
+                ticket_result.get("records"),
+                ticket_result.get("errors"),
+            )
+
+        total_missing = sum(int(r.get("missing") or 0) for r in results)
+        total_errors = sum(int(r.get("errors") or 0) for r in results)
+
+        payload = {
+            "window_split_unit": split_unit,
+            "window_compensation_hours": comp_hours,
+            "start": start.isoformat() if isinstance(start, datetime) else None,
+            "end": end.isoformat() if isinstance(end, datetime) else None,
+            "cutoff": cutoff.isoformat() if cutoff else None,
+            "window_days": window_days,
+            "window_hours": window_hours,
+            "page_size": page_size,
+            "chunk_size": chunk_size,
+            "sample_limit": sample_limit,
+            "store_id": store_id,
+            "base_url": cfg.get("api.base_url"),
+            "results": results,
+            "total_missing": total_missing,
+            "total_errors": total_errors,
+            "generated_at": datetime.now(tz).isoformat(),
+        }
+        return payload
+    finally:
+        db_conn.close()
+
+
 def main() -> int:
    _reconfigure_stdout_utf8()
    ap = argparse.ArgumentParser(description="Check missing ODS records by comparing API vs ODS PKs.")
@@ -578,6 +791,8 @@ def main() -> int:
    ap.add_argument("--end", default="", help="end datetime (default: now)")
    ap.add_argument("--window-days", type=int, default=1, help="days per API window (default: 1)")
    ap.add_argument("--window-hours", type=int, default=0, help="hours per API window (default: 0)")
+    ap.add_argument("--window-split-unit", default="", help="split unit (month/none), default from config")
+    ap.add_argument("--window-compensation-hours", type=int, default=None, help="window compensation hours, default from config")
    ap.add_argument("--page-size", type=int, default=200, help="API page size (default: 200)")
    ap.add_argument("--chunk-size", type=int, default=500, help="DB query chunk size (default: 500)")
    ap.add_argument("--sample-limit", type=int, default=50, help="max missing PK samples per table")
@@ -593,6 +808,11 @@ def main() -> int:
        default=24,
        help="overlap hours when using --from-cutoff (default: 24)",
    )
+    ap.add_argument(
+        "--allow-small-window",
+        action="store_true",
+        help="allow windows smaller than default completeness guard",
+    )
    ap.add_argument("--log-file", default="", help="log file path (default: logs/check_ods_gaps_YYYYMMDD_HHMMSS.log)")
    ap.add_argument("--log-dir", default="", help="log directory (default: logs)")
    ap.add_argument("--log-level", default="INFO", help="log level (default: INFO)")
@@ -611,170 +831,41 @@ def main() -> int:
        tee_std=True,
    ) as logger:
        cfg = AppConfig.load({})
+        payload = run_gap_check(
+            cfg=cfg,
+            start=args.start,
+            end=args.end,
+            window_days=args.window_days,
+            window_hours=args.window_hours,
+            page_size=args.page_size,
+            chunk_size=args.chunk_size,
+            sample_limit=args.sample_limit,
+            sleep_per_window=args.sleep_per_window_seconds,
+            sleep_per_page=args.sleep_per_page_seconds,
+            task_codes=args.task_codes,
+            from_cutoff=args.from_cutoff,
+            cutoff_overlap_hours=args.cutoff_overlap_hours,
+            allow_small_window=args.allow_small_window,
+            logger=logger,
+            window_split_unit=args.window_split_unit or None,
+            window_compensation_hours=args.window_compensation_hours,
+        )
+
        tz = ZoneInfo(cfg.get("app.timezone", "Asia/Taipei"))
-        store_id = int(cfg.get("app.store_id"))
-
-        if not cfg.get("api.token"):
-            logger.error("missing api.token; please set API_TOKEN in .env")
-            raise SystemExit("missing api.token; please set API_TOKEN in .env")
-
-        window_days = int(args.window_days)
-        window_hours = int(args.window_hours)
-        if not args.from_cutoff:
-            min_hours = MIN_COMPLETENESS_WINDOW_DAYS * 24
-            if window_hours > 0:
-                if window_hours < min_hours:
-                    logger.warning(
-                        "window_hours=%s too small for completeness check; adjust to %s",
-                        window_hours,
-                        min_hours,
-                    )
-                    window_hours = min_hours
-            elif window_days < MIN_COMPLETENESS_WINDOW_DAYS:
-                logger.warning(
-                    "window_days=%s too small for completeness check; adjust to %s",
-                    window_days,
-                    MIN_COMPLETENESS_WINDOW_DAYS,
-                )
-                window_days = MIN_COMPLETENESS_WINDOW_DAYS
-
-        end = datetime.now(tz) if not args.end else _parse_dt(args.end, tz, is_end=True)
-        if args.from_cutoff:
-            db_tmp = DatabaseConnection(dsn=cfg["db"]["dsn"], session=cfg["db"].get("session"))
-            ods_tables = [s.table_name for s in ODS_TASK_SPECS if s.code in ENABLED_ODS_CODES]
-            start, end, cutoff = _resolve_window_from_cutoff(
-                conn=db_tmp.conn,
-                ods_tables=ods_tables,
-                tz=tz,
-                overlap_hours=args.cutoff_overlap_hours,
-            )
-            db_tmp.close()
+        if args.out:
+            out_path = Path(args.out)
        else:
-            start = _parse_dt(args.start, tz, is_end=False)
-            cutoff = None
-
+            tag = f"_{args.tag}" if args.tag else ""
+            stamp = datetime.now(tz).strftime("%Y%m%d_%H%M%S")
+            out_path = PROJECT_ROOT / "reports" / f"ods_gap_check{tag}_{stamp}.json"
+        out_path.parent.mkdir(parents=True, exist_ok=True)
+        out_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
+        logger.info("REPORT_WRITTEN path=%s", out_path)
        logger.info(
-            "START range=%s~%s window_days=%s window_hours=%s page_size=%s chunk_size=%s",
-            start.isoformat() if start else None,
-            end.isoformat() if end else None,
-            window_days,
-            window_hours,
-            args.page_size,
-            args.chunk_size,
+            "SUMMARY missing=%s errors=%s",
+            payload.get("total_missing"),
+            payload.get("total_errors"),
        )
-        if cutoff:
-            logger.info("CUTOFF=%s overlap_hours=%s", cutoff.isoformat(), args.cutoff_overlap_hours)
-
-        client = APIClient(
-            base_url=cfg["api"]["base_url"],
-            token=cfg["api"]["token"],
-            timeout=int(cfg["api"].get("timeout_sec") or 20),
-            retry_max=int(cfg["api"].get("retries", {}).get("max_attempts") or 3),
-            headers_extra=cfg["api"].get("headers_extra") or {},
-        )
-
-        db_conn = DatabaseConnection(dsn=cfg["db"]["dsn"], session=cfg["db"].get("session"))
-        try:
-            db_conn.conn.rollback()
-        except Exception:
-            pass
-        db_conn.conn.autocommit = True
-        try:
-            task_filter = {t.strip().upper() for t in args.task_codes.split(",") if t.strip()}
-            specs = [s for s in ODS_TASK_SPECS if s.code in ENABLED_ODS_CODES]
-            if task_filter:
-                specs = [s for s in specs if s.code in task_filter]
-
-            results: list[dict] = []
-            for spec in specs:
-                if spec.code == "ODS_SETTLEMENT_TICKET":
-                    continue
-                result = _check_spec(
-                    client=client,
-                    db_conn=db_conn.conn,
-                    cfg=cfg,
-                    tz=tz,
-                    logger=logger,
-                    spec=spec,
-                    store_id=store_id,
-                    start=start,
-                    end=end,
-                    window_days=window_days,
-                    window_hours=window_hours,
-                    page_size=args.page_size,
-                    chunk_size=args.chunk_size,
-                    sample_limit=args.sample_limit,
-                    sleep_per_window=args.sleep_per_window_seconds,
-                    sleep_per_page=args.sleep_per_page_seconds,
-                )
-                results.append(result)
-                logger.info(
-                    "CHECK_DONE task=%s missing=%s records=%s errors=%s",
-                    result.get("task_code"),
-                    result.get("missing"),
-                    result.get("records"),
-                    result.get("errors"),
-                )
-
-            if (not task_filter) or ("ODS_SETTLEMENT_TICKET" in task_filter):
-                ticket_result = _check_settlement_tickets(
-                    client=client,
-                    db_conn=db_conn.conn,
-                    cfg=cfg,
-                    tz=tz,
-                    logger=logger,
-                    store_id=store_id,
-                    start=start,
-                    end=end,
-                    window_days=window_days,
-                    window_hours=window_hours,
-                    page_size=args.page_size,
-                    chunk_size=args.chunk_size,
-                    sample_limit=args.sample_limit,
-                    sleep_per_window=args.sleep_per_window_seconds,
-                    sleep_per_page=args.sleep_per_page_seconds,
-                )
-                results.append(ticket_result)
-                logger.info(
-                    "CHECK_DONE task=%s missing=%s records=%s errors=%s",
-                    ticket_result.get("task_code"),
-                    ticket_result.get("missing"),
-                    ticket_result.get("records"),
-                    ticket_result.get("errors"),
-                )
-
-            total_missing = sum(int(r.get("missing") or 0) for r in results)
-            total_errors = sum(int(r.get("errors") or 0) for r in results)
-
-            if args.out:
-                out_path = Path(args.out)
-            else:
-                tag = f"_{args.tag}" if args.tag else ""
-                stamp = datetime.now(tz).strftime("%Y%m%d_%H%M%S")
-                out_path = PROJECT_ROOT / "reports" / f"ods_gap_check{tag}_{stamp}.json"
-            out_path.parent.mkdir(parents=True, exist_ok=True)
-
-            payload = {
-                "start": start.isoformat(),
-                "end": end.isoformat(),
-                "cutoff": cutoff.isoformat() if cutoff else None,
-                "window_days": window_days,
-                "window_hours": window_hours,
-                "page_size": args.page_size,
-                "chunk_size": args.chunk_size,
-                "sample_limit": args.sample_limit,
-                "store_id": store_id,
-                "base_url": cfg.get("api.base_url"),
-                "results": results,
-                "total_missing": total_missing,
-                "total_errors": total_errors,
-                "generated_at": datetime.now(tz).isoformat(),
-            }
-            out_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
-            logger.info("REPORT_WRITTEN path=%s", out_path)
-            logger.info("SUMMARY missing=%s errors=%s", total_missing, total_errors)
-        finally:
-            db_conn.close()

    return 0