784 lines
28 KiB
Python
784 lines
28 KiB
Python
# -*- coding: utf-8 -*-
|
|
"""
|
|
Check missing ODS records by comparing API primary keys vs ODS table primary keys.
|
|
|
|
Default range:
|
|
start = 2025-07-01 00:00:00
|
|
end = now
|
|
|
|
For update runs, use --from-cutoff to derive the start time from ODS max(fetched_at),
|
|
then backtrack by --cutoff-overlap-hours.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
import logging
|
|
import time as time_mod
|
|
import sys
|
|
from datetime import datetime, time, timedelta
|
|
from pathlib import Path
|
|
from typing import Iterable, Sequence
|
|
from zoneinfo import ZoneInfo
|
|
|
|
from dateutil import parser as dtparser
|
|
from psycopg2.extras import execute_values
|
|
|
|
PROJECT_ROOT = Path(__file__).resolve().parents[1]
|
|
if str(PROJECT_ROOT) not in sys.path:
|
|
sys.path.insert(0, str(PROJECT_ROOT))
|
|
|
|
from api.client import APIClient
|
|
from config.settings import AppConfig
|
|
from database.connection import DatabaseConnection
|
|
from models.parsers import TypeParser
|
|
from tasks.ods_tasks import ENABLED_ODS_CODES, ODS_TASK_SPECS
|
|
from utils.logging_utils import build_log_path, configure_logging
|
|
|
|
DEFAULT_START = "2025-07-01"
|
|
MIN_COMPLETENESS_WINDOW_DAYS = 30
|
|
|
|
|
|
def _reconfigure_stdout_utf8() -> None:
|
|
if hasattr(sys.stdout, "reconfigure"):
|
|
try:
|
|
sys.stdout.reconfigure(encoding="utf-8")
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
def _parse_dt(value: str, tz: ZoneInfo, *, is_end: bool) -> datetime:
|
|
raw = (value or "").strip()
|
|
if not raw:
|
|
raise ValueError("empty datetime")
|
|
has_time = any(ch in raw for ch in (":", "T"))
|
|
dt = dtparser.parse(raw)
|
|
if dt.tzinfo is None:
|
|
dt = dt.replace(tzinfo=tz)
|
|
else:
|
|
dt = dt.astimezone(tz)
|
|
if not has_time:
|
|
dt = dt.replace(hour=23 if is_end else 0, minute=59 if is_end else 0, second=59 if is_end else 0, microsecond=0)
|
|
return dt
|
|
|
|
|
|
def _iter_windows(start: datetime, end: datetime, window_size: timedelta) -> Iterable[tuple[datetime, datetime]]:
|
|
if window_size.total_seconds() <= 0:
|
|
raise ValueError("window_size must be > 0")
|
|
cur = start
|
|
while cur < end:
|
|
nxt = min(cur + window_size, end)
|
|
yield cur, nxt
|
|
cur = nxt
|
|
|
|
|
|
def _merge_record_layers(record: dict) -> dict:
|
|
merged = record
|
|
data_part = merged.get("data")
|
|
while isinstance(data_part, dict):
|
|
merged = {**data_part, **merged}
|
|
data_part = data_part.get("data")
|
|
settle_inner = merged.get("settleList")
|
|
if isinstance(settle_inner, dict):
|
|
merged = {**settle_inner, **merged}
|
|
return merged
|
|
|
|
|
|
def _get_value_case_insensitive(record: dict | None, col: str | None):
|
|
if record is None or col is None:
|
|
return None
|
|
if col in record:
|
|
return record.get(col)
|
|
col_lower = col.lower()
|
|
for k, v in record.items():
|
|
if isinstance(k, str) and k.lower() == col_lower:
|
|
return v
|
|
return None
|
|
|
|
|
|
def _normalize_pk_value(value):
|
|
if value is None:
|
|
return None
|
|
if isinstance(value, str) and value.isdigit():
|
|
try:
|
|
return int(value)
|
|
except Exception:
|
|
return value
|
|
return value
|
|
|
|
|
|
def _chunked(seq: Sequence, size: int) -> Iterable[Sequence]:
|
|
if size <= 0:
|
|
size = 500
|
|
for i in range(0, len(seq), size):
|
|
yield seq[i : i + size]
|
|
|
|
|
|
def _get_table_pk_columns(conn, table: str) -> list[str]:
|
|
if "." in table:
|
|
schema, name = table.split(".", 1)
|
|
else:
|
|
schema, name = "public", table
|
|
sql = """
|
|
SELECT kcu.column_name
|
|
FROM information_schema.table_constraints tc
|
|
JOIN information_schema.key_column_usage kcu
|
|
ON tc.constraint_name = kcu.constraint_name
|
|
AND tc.table_schema = kcu.table_schema
|
|
WHERE tc.constraint_type = 'PRIMARY KEY'
|
|
AND tc.table_schema = %s
|
|
AND tc.table_name = %s
|
|
ORDER BY kcu.ordinal_position
|
|
"""
|
|
with conn.cursor() as cur:
|
|
cur.execute(sql, (schema, name))
|
|
return [r[0] for r in cur.fetchall()]
|
|
|
|
|
|
def _fetch_existing_pk_set(conn, table: str, pk_cols: Sequence[str], pk_values: list[tuple], chunk_size: int) -> set[tuple]:
|
|
if not pk_values:
|
|
return set()
|
|
select_cols = ", ".join(f't."{c}"' for c in pk_cols)
|
|
value_cols = ", ".join(f'"{c}"' for c in pk_cols)
|
|
join_cond = " AND ".join(f't."{c}" = v."{c}"' for c in pk_cols)
|
|
sql = (
|
|
f"SELECT {select_cols} FROM {table} t "
|
|
f"JOIN (VALUES %s) AS v({value_cols}) ON {join_cond}"
|
|
)
|
|
existing: set[tuple] = set()
|
|
with conn.cursor() as cur:
|
|
for chunk in _chunked(pk_values, chunk_size):
|
|
execute_values(cur, sql, chunk, page_size=len(chunk))
|
|
for row in cur.fetchall():
|
|
existing.add(tuple(row))
|
|
return existing
|
|
|
|
|
|
def _merge_common_params(cfg: AppConfig, task_code: str, base: dict) -> dict:
|
|
merged: dict = {}
|
|
common = cfg.get("api.params", {}) or {}
|
|
if isinstance(common, dict):
|
|
merged.update(common)
|
|
scoped = cfg.get(f"api.params.{task_code.lower()}", {}) or {}
|
|
if isinstance(scoped, dict):
|
|
merged.update(scoped)
|
|
merged.update(base)
|
|
return merged
|
|
|
|
|
|
def _build_params(cfg: AppConfig, spec, store_id: int, window_start: datetime | None, window_end: datetime | None) -> dict:
|
|
base: dict = {}
|
|
if spec.include_site_id:
|
|
if spec.endpoint == "/TenantGoods/GetGoodsInventoryList":
|
|
base["siteId"] = [store_id]
|
|
else:
|
|
base["siteId"] = store_id
|
|
if spec.requires_window and spec.time_fields and window_start and window_end:
|
|
start_key, end_key = spec.time_fields
|
|
base[start_key] = TypeParser.format_timestamp(window_start, ZoneInfo(cfg.get("app.timezone", "Asia/Taipei")))
|
|
base[end_key] = TypeParser.format_timestamp(window_end, ZoneInfo(cfg.get("app.timezone", "Asia/Taipei")))
|
|
base.update(spec.extra_params or {})
|
|
return _merge_common_params(cfg, spec.code, base)
|
|
|
|
|
|
def _pk_tuple_from_record(record: dict, pk_cols: Sequence[str]) -> tuple | None:
|
|
merged = _merge_record_layers(record)
|
|
values = []
|
|
for col in pk_cols:
|
|
val = _normalize_pk_value(_get_value_case_insensitive(merged, col))
|
|
if val is None or val == "":
|
|
return None
|
|
values.append(val)
|
|
return tuple(values)
|
|
|
|
|
|
def _pk_tuple_from_ticket_candidate(value) -> tuple | None:
|
|
val = _normalize_pk_value(value)
|
|
if val is None or val == "":
|
|
return None
|
|
return (val,)
|
|
|
|
|
|
def _format_missing_sample(pk_cols: Sequence[str], pk_tuple: tuple) -> dict:
|
|
return {col: pk_tuple[idx] for idx, col in enumerate(pk_cols)}
|
|
|
|
|
|
def _check_spec(
|
|
*,
|
|
client: APIClient,
|
|
db_conn,
|
|
cfg: AppConfig,
|
|
tz: ZoneInfo,
|
|
logger: logging.Logger,
|
|
spec,
|
|
store_id: int,
|
|
start: datetime | None,
|
|
end: datetime | None,
|
|
window_days: int,
|
|
window_hours: int,
|
|
page_size: int,
|
|
chunk_size: int,
|
|
sample_limit: int,
|
|
sleep_per_window: float,
|
|
sleep_per_page: float,
|
|
) -> dict:
|
|
result = {
|
|
"task_code": spec.code,
|
|
"table": spec.table_name,
|
|
"endpoint": spec.endpoint,
|
|
"pk_columns": [],
|
|
"records": 0,
|
|
"records_with_pk": 0,
|
|
"missing": 0,
|
|
"missing_samples": [],
|
|
"pages": 0,
|
|
"skipped_missing_pk": 0,
|
|
"errors": 0,
|
|
"error_detail": None,
|
|
}
|
|
|
|
pk_cols = _get_table_pk_columns(db_conn, spec.table_name)
|
|
result["pk_columns"] = pk_cols
|
|
if not pk_cols:
|
|
result["errors"] = 1
|
|
result["error_detail"] = "no primary key columns found"
|
|
return result
|
|
|
|
if spec.requires_window and spec.time_fields:
|
|
if not start or not end:
|
|
result["errors"] = 1
|
|
result["error_detail"] = "missing start/end for windowed endpoint"
|
|
return result
|
|
window_size = timedelta(hours=window_hours) if window_hours > 0 else timedelta(days=window_days)
|
|
windows = list(_iter_windows(start, end, window_size))
|
|
else:
|
|
windows = [(None, None)]
|
|
|
|
logger.info(
|
|
"CHECK_START task=%s table=%s windows=%s start=%s end=%s",
|
|
spec.code,
|
|
spec.table_name,
|
|
len(windows),
|
|
start.isoformat() if start else None,
|
|
end.isoformat() if end else None,
|
|
)
|
|
missing_seen: set[tuple] = set()
|
|
|
|
for window_idx, (window_start, window_end) in enumerate(windows, start=1):
|
|
window_label = (
|
|
f"{window_start.isoformat()}~{window_end.isoformat()}"
|
|
if window_start and window_end
|
|
else "FULL"
|
|
)
|
|
logger.info(
|
|
"WINDOW_START task=%s idx=%s window=%s",
|
|
spec.code,
|
|
window_idx,
|
|
window_label,
|
|
)
|
|
window_pages = 0
|
|
window_records = 0
|
|
window_missing = 0
|
|
window_skipped = 0
|
|
params = _build_params(cfg, spec, store_id, window_start, window_end)
|
|
try:
|
|
for page_no, records, _, _ in client.iter_paginated(
|
|
endpoint=spec.endpoint,
|
|
params=params,
|
|
page_size=page_size,
|
|
data_path=spec.data_path,
|
|
list_key=spec.list_key,
|
|
):
|
|
window_pages += 1
|
|
window_records += len(records)
|
|
result["pages"] += 1
|
|
result["records"] += len(records)
|
|
pk_tuples: list[tuple] = []
|
|
for rec in records:
|
|
if not isinstance(rec, dict):
|
|
result["skipped_missing_pk"] += 1
|
|
window_skipped += 1
|
|
continue
|
|
pk_tuple = _pk_tuple_from_record(rec, pk_cols)
|
|
if not pk_tuple:
|
|
result["skipped_missing_pk"] += 1
|
|
window_skipped += 1
|
|
continue
|
|
pk_tuples.append(pk_tuple)
|
|
|
|
if not pk_tuples:
|
|
continue
|
|
|
|
result["records_with_pk"] += len(pk_tuples)
|
|
pk_unique = list(dict.fromkeys(pk_tuples))
|
|
existing = _fetch_existing_pk_set(db_conn, spec.table_name, pk_cols, pk_unique, chunk_size)
|
|
for pk_tuple in pk_unique:
|
|
if pk_tuple in existing:
|
|
continue
|
|
if pk_tuple in missing_seen:
|
|
continue
|
|
missing_seen.add(pk_tuple)
|
|
result["missing"] += 1
|
|
window_missing += 1
|
|
if len(result["missing_samples"]) < sample_limit:
|
|
result["missing_samples"].append(_format_missing_sample(pk_cols, pk_tuple))
|
|
if logger.isEnabledFor(logging.DEBUG):
|
|
logger.debug(
|
|
"PAGE task=%s idx=%s page=%s records=%s missing=%s skipped=%s",
|
|
spec.code,
|
|
window_idx,
|
|
page_no,
|
|
len(records),
|
|
window_missing,
|
|
window_skipped,
|
|
)
|
|
if sleep_per_page > 0:
|
|
time_mod.sleep(sleep_per_page)
|
|
except Exception as exc:
|
|
result["errors"] += 1
|
|
result["error_detail"] = f"{type(exc).__name__}: {exc}"
|
|
logger.exception(
|
|
"WINDOW_ERROR task=%s idx=%s window=%s error=%s",
|
|
spec.code,
|
|
window_idx,
|
|
window_label,
|
|
result["error_detail"],
|
|
)
|
|
break
|
|
logger.info(
|
|
"WINDOW_DONE task=%s idx=%s window=%s pages=%s records=%s missing=%s skipped=%s",
|
|
spec.code,
|
|
window_idx,
|
|
window_label,
|
|
window_pages,
|
|
window_records,
|
|
window_missing,
|
|
window_skipped,
|
|
)
|
|
if sleep_per_window > 0:
|
|
logger.debug(
|
|
"SLEEP_WINDOW task=%s idx=%s seconds=%.2f",
|
|
spec.code,
|
|
window_idx,
|
|
sleep_per_window,
|
|
)
|
|
time_mod.sleep(sleep_per_window)
|
|
|
|
return result
|
|
|
|
|
|
def _check_settlement_tickets(
|
|
*,
|
|
client: APIClient,
|
|
db_conn,
|
|
cfg: AppConfig,
|
|
tz: ZoneInfo,
|
|
logger: logging.Logger,
|
|
store_id: int,
|
|
start: datetime | None,
|
|
end: datetime | None,
|
|
window_days: int,
|
|
window_hours: int,
|
|
page_size: int,
|
|
chunk_size: int,
|
|
sample_limit: int,
|
|
sleep_per_window: float,
|
|
sleep_per_page: float,
|
|
) -> dict:
|
|
table_name = "billiards_ods.settlement_ticket_details"
|
|
pk_cols = _get_table_pk_columns(db_conn, table_name)
|
|
result = {
|
|
"task_code": "ODS_SETTLEMENT_TICKET",
|
|
"table": table_name,
|
|
"endpoint": "/Order/GetOrderSettleTicketNew",
|
|
"pk_columns": pk_cols,
|
|
"records": 0,
|
|
"records_with_pk": 0,
|
|
"missing": 0,
|
|
"missing_samples": [],
|
|
"pages": 0,
|
|
"skipped_missing_pk": 0,
|
|
"errors": 0,
|
|
"error_detail": None,
|
|
"source_endpoint": "/PayLog/GetPayLogListPage",
|
|
}
|
|
|
|
if not pk_cols:
|
|
result["errors"] = 1
|
|
result["error_detail"] = "no primary key columns found"
|
|
return result
|
|
if not start or not end:
|
|
result["errors"] = 1
|
|
result["error_detail"] = "missing start/end for ticket check"
|
|
return result
|
|
|
|
missing_seen: set[tuple] = set()
|
|
pay_endpoint = "/PayLog/GetPayLogListPage"
|
|
|
|
window_size = timedelta(hours=window_hours) if window_hours > 0 else timedelta(days=window_days)
|
|
windows = list(_iter_windows(start, end, window_size))
|
|
logger.info(
|
|
"CHECK_START task=%s table=%s windows=%s start=%s end=%s",
|
|
result["task_code"],
|
|
table_name,
|
|
len(windows),
|
|
start.isoformat() if start else None,
|
|
end.isoformat() if end else None,
|
|
)
|
|
|
|
for window_idx, (window_start, window_end) in enumerate(windows, start=1):
|
|
window_label = f"{window_start.isoformat()}~{window_end.isoformat()}"
|
|
logger.info(
|
|
"WINDOW_START task=%s idx=%s window=%s",
|
|
result["task_code"],
|
|
window_idx,
|
|
window_label,
|
|
)
|
|
window_pages = 0
|
|
window_records = 0
|
|
window_missing = 0
|
|
window_skipped = 0
|
|
base = {
|
|
"siteId": store_id,
|
|
"StartPayTime": TypeParser.format_timestamp(window_start, tz),
|
|
"EndPayTime": TypeParser.format_timestamp(window_end, tz),
|
|
}
|
|
params = _merge_common_params(cfg, "ODS_PAYMENT", base)
|
|
try:
|
|
for page_no, records, _, _ in client.iter_paginated(
|
|
endpoint=pay_endpoint,
|
|
params=params,
|
|
page_size=page_size,
|
|
data_path=("data",),
|
|
list_key=None,
|
|
):
|
|
window_pages += 1
|
|
window_records += len(records)
|
|
result["pages"] += 1
|
|
result["records"] += len(records)
|
|
pk_tuples: list[tuple] = []
|
|
for rec in records:
|
|
if not isinstance(rec, dict):
|
|
result["skipped_missing_pk"] += 1
|
|
window_skipped += 1
|
|
continue
|
|
relate_id = TypeParser.parse_int(
|
|
(rec or {}).get("relateId")
|
|
or (rec or {}).get("orderSettleId")
|
|
or (rec or {}).get("order_settle_id")
|
|
)
|
|
pk_tuple = _pk_tuple_from_ticket_candidate(relate_id)
|
|
if not pk_tuple:
|
|
result["skipped_missing_pk"] += 1
|
|
window_skipped += 1
|
|
continue
|
|
pk_tuples.append(pk_tuple)
|
|
|
|
if not pk_tuples:
|
|
continue
|
|
|
|
result["records_with_pk"] += len(pk_tuples)
|
|
pk_unique = list(dict.fromkeys(pk_tuples))
|
|
existing = _fetch_existing_pk_set(db_conn, table_name, pk_cols, pk_unique, chunk_size)
|
|
for pk_tuple in pk_unique:
|
|
if pk_tuple in existing:
|
|
continue
|
|
if pk_tuple in missing_seen:
|
|
continue
|
|
missing_seen.add(pk_tuple)
|
|
result["missing"] += 1
|
|
window_missing += 1
|
|
if len(result["missing_samples"]) < sample_limit:
|
|
result["missing_samples"].append(_format_missing_sample(pk_cols, pk_tuple))
|
|
if logger.isEnabledFor(logging.DEBUG):
|
|
logger.debug(
|
|
"PAGE task=%s idx=%s page=%s records=%s missing=%s skipped=%s",
|
|
result["task_code"],
|
|
window_idx,
|
|
page_no,
|
|
len(records),
|
|
window_missing,
|
|
window_skipped,
|
|
)
|
|
if sleep_per_page > 0:
|
|
time_mod.sleep(sleep_per_page)
|
|
except Exception as exc:
|
|
result["errors"] += 1
|
|
result["error_detail"] = f"{type(exc).__name__}: {exc}"
|
|
logger.exception(
|
|
"WINDOW_ERROR task=%s idx=%s window=%s error=%s",
|
|
result["task_code"],
|
|
window_idx,
|
|
window_label,
|
|
result["error_detail"],
|
|
)
|
|
break
|
|
logger.info(
|
|
"WINDOW_DONE task=%s idx=%s window=%s pages=%s records=%s missing=%s skipped=%s",
|
|
result["task_code"],
|
|
window_idx,
|
|
window_label,
|
|
window_pages,
|
|
window_records,
|
|
window_missing,
|
|
window_skipped,
|
|
)
|
|
if sleep_per_window > 0:
|
|
logger.debug(
|
|
"SLEEP_WINDOW task=%s idx=%s seconds=%.2f",
|
|
result["task_code"],
|
|
window_idx,
|
|
sleep_per_window,
|
|
)
|
|
time_mod.sleep(sleep_per_window)
|
|
|
|
return result
|
|
|
|
|
|
def _compute_ods_cutoff(conn, ods_tables: Sequence[str]) -> datetime | None:
|
|
values: list[datetime] = []
|
|
with conn.cursor() as cur:
|
|
for table in ods_tables:
|
|
try:
|
|
cur.execute(f"SELECT MAX(fetched_at) FROM {table}")
|
|
row = cur.fetchone()
|
|
if row and row[0]:
|
|
values.append(row[0])
|
|
except Exception:
|
|
continue
|
|
if not values:
|
|
return None
|
|
return min(values)
|
|
|
|
|
|
def _resolve_window_from_cutoff(
|
|
*,
|
|
conn,
|
|
ods_tables: Sequence[str],
|
|
tz: ZoneInfo,
|
|
overlap_hours: int,
|
|
) -> tuple[datetime, datetime, datetime | None]:
|
|
cutoff = _compute_ods_cutoff(conn, ods_tables)
|
|
now = datetime.now(tz)
|
|
if cutoff is None:
|
|
start = now - timedelta(hours=max(1, overlap_hours))
|
|
return start, now, None
|
|
if cutoff.tzinfo is None:
|
|
cutoff = cutoff.replace(tzinfo=tz)
|
|
else:
|
|
cutoff = cutoff.astimezone(tz)
|
|
start = cutoff - timedelta(hours=max(0, overlap_hours))
|
|
return start, now, cutoff
|
|
|
|
|
|
def main() -> int:
|
|
_reconfigure_stdout_utf8()
|
|
ap = argparse.ArgumentParser(description="Check missing ODS records by comparing API vs ODS PKs.")
|
|
ap.add_argument("--start", default=DEFAULT_START, help="start datetime (default: 2025-07-01)")
|
|
ap.add_argument("--end", default="", help="end datetime (default: now)")
|
|
ap.add_argument("--window-days", type=int, default=1, help="days per API window (default: 1)")
|
|
ap.add_argument("--window-hours", type=int, default=0, help="hours per API window (default: 0)")
|
|
ap.add_argument("--page-size", type=int, default=200, help="API page size (default: 200)")
|
|
ap.add_argument("--chunk-size", type=int, default=500, help="DB query chunk size (default: 500)")
|
|
ap.add_argument("--sample-limit", type=int, default=50, help="max missing PK samples per table")
|
|
ap.add_argument("--sleep-per-window-seconds", type=float, default=0, help="sleep seconds after each window")
|
|
ap.add_argument("--sleep-per-page-seconds", type=float, default=0, help="sleep seconds after each page")
|
|
ap.add_argument("--task-codes", default="", help="comma-separated task codes to check (optional)")
|
|
ap.add_argument("--out", default="", help="output JSON path (optional)")
|
|
ap.add_argument("--tag", default="", help="tag suffix for output filename")
|
|
ap.add_argument("--from-cutoff", action="store_true", help="derive start from ODS cutoff")
|
|
ap.add_argument(
|
|
"--cutoff-overlap-hours",
|
|
type=int,
|
|
default=24,
|
|
help="overlap hours when using --from-cutoff (default: 24)",
|
|
)
|
|
ap.add_argument("--log-file", default="", help="log file path (default: logs/check_ods_gaps_YYYYMMDD_HHMMSS.log)")
|
|
ap.add_argument("--log-dir", default="", help="log directory (default: logs)")
|
|
ap.add_argument("--log-level", default="INFO", help="log level (default: INFO)")
|
|
ap.add_argument("--no-log-console", action="store_true", help="disable console logging")
|
|
args = ap.parse_args()
|
|
|
|
log_dir = Path(args.log_dir) if args.log_dir else (PROJECT_ROOT / "logs")
|
|
log_file = Path(args.log_file) if args.log_file else build_log_path(log_dir, "check_ods_gaps", args.tag)
|
|
log_console = not args.no_log_console
|
|
|
|
with configure_logging(
|
|
"ods_gap_check",
|
|
log_file,
|
|
level=args.log_level,
|
|
console=log_console,
|
|
tee_std=True,
|
|
) as logger:
|
|
cfg = AppConfig.load({})
|
|
tz = ZoneInfo(cfg.get("app.timezone", "Asia/Taipei"))
|
|
store_id = int(cfg.get("app.store_id"))
|
|
|
|
if not cfg.get("api.token"):
|
|
logger.error("missing api.token; please set API_TOKEN in .env")
|
|
raise SystemExit("missing api.token; please set API_TOKEN in .env")
|
|
|
|
window_days = int(args.window_days)
|
|
window_hours = int(args.window_hours)
|
|
if not args.from_cutoff:
|
|
min_hours = MIN_COMPLETENESS_WINDOW_DAYS * 24
|
|
if window_hours > 0:
|
|
if window_hours < min_hours:
|
|
logger.warning(
|
|
"window_hours=%s too small for completeness check; adjust to %s",
|
|
window_hours,
|
|
min_hours,
|
|
)
|
|
window_hours = min_hours
|
|
elif window_days < MIN_COMPLETENESS_WINDOW_DAYS:
|
|
logger.warning(
|
|
"window_days=%s too small for completeness check; adjust to %s",
|
|
window_days,
|
|
MIN_COMPLETENESS_WINDOW_DAYS,
|
|
)
|
|
window_days = MIN_COMPLETENESS_WINDOW_DAYS
|
|
|
|
end = datetime.now(tz) if not args.end else _parse_dt(args.end, tz, is_end=True)
|
|
if args.from_cutoff:
|
|
db_tmp = DatabaseConnection(dsn=cfg["db"]["dsn"], session=cfg["db"].get("session"))
|
|
ods_tables = [s.table_name for s in ODS_TASK_SPECS if s.code in ENABLED_ODS_CODES]
|
|
start, end, cutoff = _resolve_window_from_cutoff(
|
|
conn=db_tmp.conn,
|
|
ods_tables=ods_tables,
|
|
tz=tz,
|
|
overlap_hours=args.cutoff_overlap_hours,
|
|
)
|
|
db_tmp.close()
|
|
else:
|
|
start = _parse_dt(args.start, tz, is_end=False)
|
|
cutoff = None
|
|
|
|
logger.info(
|
|
"START range=%s~%s window_days=%s window_hours=%s page_size=%s chunk_size=%s",
|
|
start.isoformat() if start else None,
|
|
end.isoformat() if end else None,
|
|
window_days,
|
|
window_hours,
|
|
args.page_size,
|
|
args.chunk_size,
|
|
)
|
|
if cutoff:
|
|
logger.info("CUTOFF=%s overlap_hours=%s", cutoff.isoformat(), args.cutoff_overlap_hours)
|
|
|
|
client = APIClient(
|
|
base_url=cfg["api"]["base_url"],
|
|
token=cfg["api"]["token"],
|
|
timeout=int(cfg["api"].get("timeout_sec") or 20),
|
|
retry_max=int(cfg["api"].get("retries", {}).get("max_attempts") or 3),
|
|
headers_extra=cfg["api"].get("headers_extra") or {},
|
|
)
|
|
|
|
db_conn = DatabaseConnection(dsn=cfg["db"]["dsn"], session=cfg["db"].get("session"))
|
|
try:
|
|
db_conn.conn.rollback()
|
|
except Exception:
|
|
pass
|
|
db_conn.conn.autocommit = True
|
|
try:
|
|
task_filter = {t.strip().upper() for t in args.task_codes.split(",") if t.strip()}
|
|
specs = [s for s in ODS_TASK_SPECS if s.code in ENABLED_ODS_CODES]
|
|
if task_filter:
|
|
specs = [s for s in specs if s.code in task_filter]
|
|
|
|
results: list[dict] = []
|
|
for spec in specs:
|
|
if spec.code == "ODS_SETTLEMENT_TICKET":
|
|
continue
|
|
result = _check_spec(
|
|
client=client,
|
|
db_conn=db_conn.conn,
|
|
cfg=cfg,
|
|
tz=tz,
|
|
logger=logger,
|
|
spec=spec,
|
|
store_id=store_id,
|
|
start=start,
|
|
end=end,
|
|
window_days=window_days,
|
|
window_hours=window_hours,
|
|
page_size=args.page_size,
|
|
chunk_size=args.chunk_size,
|
|
sample_limit=args.sample_limit,
|
|
sleep_per_window=args.sleep_per_window_seconds,
|
|
sleep_per_page=args.sleep_per_page_seconds,
|
|
)
|
|
results.append(result)
|
|
logger.info(
|
|
"CHECK_DONE task=%s missing=%s records=%s errors=%s",
|
|
result.get("task_code"),
|
|
result.get("missing"),
|
|
result.get("records"),
|
|
result.get("errors"),
|
|
)
|
|
|
|
if (not task_filter) or ("ODS_SETTLEMENT_TICKET" in task_filter):
|
|
ticket_result = _check_settlement_tickets(
|
|
client=client,
|
|
db_conn=db_conn.conn,
|
|
cfg=cfg,
|
|
tz=tz,
|
|
logger=logger,
|
|
store_id=store_id,
|
|
start=start,
|
|
end=end,
|
|
window_days=window_days,
|
|
window_hours=window_hours,
|
|
page_size=args.page_size,
|
|
chunk_size=args.chunk_size,
|
|
sample_limit=args.sample_limit,
|
|
sleep_per_window=args.sleep_per_window_seconds,
|
|
sleep_per_page=args.sleep_per_page_seconds,
|
|
)
|
|
results.append(ticket_result)
|
|
logger.info(
|
|
"CHECK_DONE task=%s missing=%s records=%s errors=%s",
|
|
ticket_result.get("task_code"),
|
|
ticket_result.get("missing"),
|
|
ticket_result.get("records"),
|
|
ticket_result.get("errors"),
|
|
)
|
|
|
|
total_missing = sum(int(r.get("missing") or 0) for r in results)
|
|
total_errors = sum(int(r.get("errors") or 0) for r in results)
|
|
|
|
if args.out:
|
|
out_path = Path(args.out)
|
|
else:
|
|
tag = f"_{args.tag}" if args.tag else ""
|
|
stamp = datetime.now(tz).strftime("%Y%m%d_%H%M%S")
|
|
out_path = PROJECT_ROOT / "reports" / f"ods_gap_check{tag}_{stamp}.json"
|
|
out_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
payload = {
|
|
"start": start.isoformat(),
|
|
"end": end.isoformat(),
|
|
"cutoff": cutoff.isoformat() if cutoff else None,
|
|
"window_days": window_days,
|
|
"window_hours": window_hours,
|
|
"page_size": args.page_size,
|
|
"chunk_size": args.chunk_size,
|
|
"sample_limit": args.sample_limit,
|
|
"store_id": store_id,
|
|
"base_url": cfg.get("api.base_url"),
|
|
"results": results,
|
|
"total_missing": total_missing,
|
|
"total_errors": total_errors,
|
|
"generated_at": datetime.now(tz).isoformat(),
|
|
}
|
|
out_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
|
|
logger.info("REPORT_WRITTEN path=%s", out_path)
|
|
logger.info("SUMMARY missing=%s errors=%s", total_missing, total_errors)
|
|
finally:
|
|
db_conn.close()
|
|
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|