改 相对路径 完成客户端

This commit is contained in:
Neo
2026-01-27 22:14:01 +08:00
parent 04c064793a
commit 9f8976e75a
292 changed files with 307062 additions and 678 deletions

View File

@@ -34,6 +34,7 @@ from database.connection import DatabaseConnection
from models.parsers import TypeParser
from tasks.ods_tasks import ENABLED_ODS_CODES, ODS_TASK_SPECS
from utils.logging_utils import build_log_path, configure_logging
from utils.windowing import split_window
DEFAULT_START = "2025-07-01"
MIN_COMPLETENESS_WINDOW_DAYS = 30
@@ -214,8 +215,7 @@ def _check_spec(
store_id: int,
start: datetime | None,
end: datetime | None,
window_days: int,
window_hours: int,
windows: list[tuple[datetime, datetime]] | None,
page_size: int,
chunk_size: int,
sample_limit: int,
@@ -249,8 +249,7 @@ def _check_spec(
result["errors"] = 1
result["error_detail"] = "missing start/end for windowed endpoint"
return result
window_size = timedelta(hours=window_hours) if window_hours > 0 else timedelta(days=window_days)
windows = list(_iter_windows(start, end, window_size))
windows = list(windows or [(start, end)])
else:
windows = [(None, None)]
@@ -377,8 +376,7 @@ def _check_settlement_tickets(
store_id: int,
start: datetime | None,
end: datetime | None,
window_days: int,
window_hours: int,
windows: list[tuple[datetime, datetime]] | None,
page_size: int,
chunk_size: int,
sample_limit: int,
@@ -415,8 +413,7 @@ def _check_settlement_tickets(
missing_seen: set[tuple] = set()
pay_endpoint = "/PayLog/GetPayLogListPage"
window_size = timedelta(hours=window_hours) if window_hours > 0 else timedelta(days=window_days)
windows = list(_iter_windows(start, end, window_size))
windows = list(windows or [(start, end)])
logger.info(
"CHECK_START task=%s table=%s windows=%s start=%s end=%s",
result["task_code"],
@@ -571,6 +568,222 @@ def _resolve_window_from_cutoff(
return start, now, cutoff
def run_gap_check(
*,
cfg: AppConfig | None,
start: datetime | str | None,
end: datetime | str | None,
window_days: int,
window_hours: int,
page_size: int,
chunk_size: int,
sample_limit: int,
sleep_per_window: float,
sleep_per_page: float,
task_codes: str,
from_cutoff: bool,
cutoff_overlap_hours: int,
allow_small_window: bool,
logger: logging.Logger,
window_split_unit: str | None = None,
window_compensation_hours: int | None = None,
) -> dict:
cfg = cfg or AppConfig.load({})
tz = ZoneInfo(cfg.get("app.timezone", "Asia/Taipei"))
store_id = int(cfg.get("app.store_id") or 0)
if not cfg.get("api.token"):
raise ValueError("missing api.token; please set API_TOKEN in .env")
window_days = int(window_days)
window_hours = int(window_hours)
split_unit = (window_split_unit or cfg.get("run.window_split.unit", "month") or "month").strip()
comp_hours = window_compensation_hours
if comp_hours is None:
comp_hours = cfg.get("run.window_split.compensation_hours", 0)
use_split = split_unit.lower() not in ("", "none", "off", "false", "0")
if not use_split and not from_cutoff and not allow_small_window:
min_hours = MIN_COMPLETENESS_WINDOW_DAYS * 24
if window_hours > 0:
if window_hours < min_hours:
logger.warning(
"window_hours=%s too small for completeness check; adjust to %s",
window_hours,
min_hours,
)
window_hours = min_hours
elif window_days < MIN_COMPLETENESS_WINDOW_DAYS:
logger.warning(
"window_days=%s too small for completeness check; adjust to %s",
window_days,
MIN_COMPLETENESS_WINDOW_DAYS,
)
window_days = MIN_COMPLETENESS_WINDOW_DAYS
cutoff = None
if from_cutoff:
db_tmp = DatabaseConnection(dsn=cfg["db"]["dsn"], session=cfg["db"].get("session"))
ods_tables = [s.table_name for s in ODS_TASK_SPECS if s.code in ENABLED_ODS_CODES]
start, end, cutoff = _resolve_window_from_cutoff(
conn=db_tmp.conn,
ods_tables=ods_tables,
tz=tz,
overlap_hours=cutoff_overlap_hours,
)
db_tmp.close()
else:
if not start:
start = DEFAULT_START
if not end:
end = datetime.now(tz)
if isinstance(start, str):
start = _parse_dt(start, tz, is_end=False)
if isinstance(end, str):
end = _parse_dt(end, tz, is_end=True)
windows = None
if use_split:
windows = split_window(
start,
end,
tz=tz,
split_unit=split_unit,
compensation_hours=comp_hours,
)
else:
adjusted = split_window(
start,
end,
tz=tz,
split_unit="none",
compensation_hours=comp_hours,
)
if adjusted:
start, end = adjusted[0]
window_size = timedelta(hours=window_hours) if window_hours > 0 else timedelta(days=window_days)
windows = list(_iter_windows(start, end, window_size))
if windows:
start, end = windows[0][0], windows[-1][1]
logger.info(
"START range=%s~%s window_days=%s window_hours=%s split_unit=%s comp_hours=%s page_size=%s chunk_size=%s",
start.isoformat() if isinstance(start, datetime) else None,
end.isoformat() if isinstance(end, datetime) else None,
window_days,
window_hours,
split_unit,
comp_hours,
page_size,
chunk_size,
)
if cutoff:
logger.info("CUTOFF=%s overlap_hours=%s", cutoff.isoformat(), cutoff_overlap_hours)
client = APIClient(
base_url=cfg["api"]["base_url"],
token=cfg["api"]["token"],
timeout=int(cfg["api"].get("timeout_sec") or 20),
retry_max=int(cfg["api"].get("retries", {}).get("max_attempts") or 3),
headers_extra=cfg["api"].get("headers_extra") or {},
)
db_conn = DatabaseConnection(dsn=cfg["db"]["dsn"], session=cfg["db"].get("session"))
try:
db_conn.conn.rollback()
except Exception:
pass
db_conn.conn.autocommit = True
try:
task_filter = {t.strip().upper() for t in (task_codes or "").split(",") if t.strip()}
specs = [s for s in ODS_TASK_SPECS if s.code in ENABLED_ODS_CODES]
if task_filter:
specs = [s for s in specs if s.code in task_filter]
results: list[dict] = []
for spec in specs:
if spec.code == "ODS_SETTLEMENT_TICKET":
continue
result = _check_spec(
client=client,
db_conn=db_conn.conn,
cfg=cfg,
tz=tz,
logger=logger,
spec=spec,
store_id=store_id,
start=start,
end=end,
windows=windows,
page_size=page_size,
chunk_size=chunk_size,
sample_limit=sample_limit,
sleep_per_window=sleep_per_window,
sleep_per_page=sleep_per_page,
)
results.append(result)
logger.info(
"CHECK_DONE task=%s missing=%s records=%s errors=%s",
result.get("task_code"),
result.get("missing"),
result.get("records"),
result.get("errors"),
)
if (not task_filter) or ("ODS_SETTLEMENT_TICKET" in task_filter):
ticket_result = _check_settlement_tickets(
client=client,
db_conn=db_conn.conn,
cfg=cfg,
tz=tz,
logger=logger,
store_id=store_id,
start=start,
end=end,
windows=windows,
page_size=page_size,
chunk_size=chunk_size,
sample_limit=sample_limit,
sleep_per_window=sleep_per_window,
sleep_per_page=sleep_per_page,
)
results.append(ticket_result)
logger.info(
"CHECK_DONE task=%s missing=%s records=%s errors=%s",
ticket_result.get("task_code"),
ticket_result.get("missing"),
ticket_result.get("records"),
ticket_result.get("errors"),
)
total_missing = sum(int(r.get("missing") or 0) for r in results)
total_errors = sum(int(r.get("errors") or 0) for r in results)
payload = {
"window_split_unit": split_unit,
"window_compensation_hours": comp_hours,
"start": start.isoformat() if isinstance(start, datetime) else None,
"end": end.isoformat() if isinstance(end, datetime) else None,
"cutoff": cutoff.isoformat() if cutoff else None,
"window_days": window_days,
"window_hours": window_hours,
"page_size": page_size,
"chunk_size": chunk_size,
"sample_limit": sample_limit,
"store_id": store_id,
"base_url": cfg.get("api.base_url"),
"results": results,
"total_missing": total_missing,
"total_errors": total_errors,
"generated_at": datetime.now(tz).isoformat(),
}
return payload
finally:
db_conn.close()
def main() -> int:
_reconfigure_stdout_utf8()
ap = argparse.ArgumentParser(description="Check missing ODS records by comparing API vs ODS PKs.")
@@ -578,6 +791,8 @@ def main() -> int:
ap.add_argument("--end", default="", help="end datetime (default: now)")
ap.add_argument("--window-days", type=int, default=1, help="days per API window (default: 1)")
ap.add_argument("--window-hours", type=int, default=0, help="hours per API window (default: 0)")
ap.add_argument("--window-split-unit", default="", help="split unit (month/none), default from config")
ap.add_argument("--window-compensation-hours", type=int, default=None, help="window compensation hours, default from config")
ap.add_argument("--page-size", type=int, default=200, help="API page size (default: 200)")
ap.add_argument("--chunk-size", type=int, default=500, help="DB query chunk size (default: 500)")
ap.add_argument("--sample-limit", type=int, default=50, help="max missing PK samples per table")
@@ -593,6 +808,11 @@ def main() -> int:
default=24,
help="overlap hours when using --from-cutoff (default: 24)",
)
ap.add_argument(
"--allow-small-window",
action="store_true",
help="allow windows smaller than default completeness guard",
)
ap.add_argument("--log-file", default="", help="log file path (default: logs/check_ods_gaps_YYYYMMDD_HHMMSS.log)")
ap.add_argument("--log-dir", default="", help="log directory (default: logs)")
ap.add_argument("--log-level", default="INFO", help="log level (default: INFO)")
@@ -611,170 +831,41 @@ def main() -> int:
tee_std=True,
) as logger:
cfg = AppConfig.load({})
payload = run_gap_check(
cfg=cfg,
start=args.start,
end=args.end,
window_days=args.window_days,
window_hours=args.window_hours,
page_size=args.page_size,
chunk_size=args.chunk_size,
sample_limit=args.sample_limit,
sleep_per_window=args.sleep_per_window_seconds,
sleep_per_page=args.sleep_per_page_seconds,
task_codes=args.task_codes,
from_cutoff=args.from_cutoff,
cutoff_overlap_hours=args.cutoff_overlap_hours,
allow_small_window=args.allow_small_window,
logger=logger,
window_split_unit=args.window_split_unit or None,
window_compensation_hours=args.window_compensation_hours,
)
tz = ZoneInfo(cfg.get("app.timezone", "Asia/Taipei"))
store_id = int(cfg.get("app.store_id"))
if not cfg.get("api.token"):
logger.error("missing api.token; please set API_TOKEN in .env")
raise SystemExit("missing api.token; please set API_TOKEN in .env")
window_days = int(args.window_days)
window_hours = int(args.window_hours)
if not args.from_cutoff:
min_hours = MIN_COMPLETENESS_WINDOW_DAYS * 24
if window_hours > 0:
if window_hours < min_hours:
logger.warning(
"window_hours=%s too small for completeness check; adjust to %s",
window_hours,
min_hours,
)
window_hours = min_hours
elif window_days < MIN_COMPLETENESS_WINDOW_DAYS:
logger.warning(
"window_days=%s too small for completeness check; adjust to %s",
window_days,
MIN_COMPLETENESS_WINDOW_DAYS,
)
window_days = MIN_COMPLETENESS_WINDOW_DAYS
end = datetime.now(tz) if not args.end else _parse_dt(args.end, tz, is_end=True)
if args.from_cutoff:
db_tmp = DatabaseConnection(dsn=cfg["db"]["dsn"], session=cfg["db"].get("session"))
ods_tables = [s.table_name for s in ODS_TASK_SPECS if s.code in ENABLED_ODS_CODES]
start, end, cutoff = _resolve_window_from_cutoff(
conn=db_tmp.conn,
ods_tables=ods_tables,
tz=tz,
overlap_hours=args.cutoff_overlap_hours,
)
db_tmp.close()
if args.out:
out_path = Path(args.out)
else:
start = _parse_dt(args.start, tz, is_end=False)
cutoff = None
tag = f"_{args.tag}" if args.tag else ""
stamp = datetime.now(tz).strftime("%Y%m%d_%H%M%S")
out_path = PROJECT_ROOT / "reports" / f"ods_gap_check{tag}_{stamp}.json"
out_path.parent.mkdir(parents=True, exist_ok=True)
out_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
logger.info("REPORT_WRITTEN path=%s", out_path)
logger.info(
"START range=%s~%s window_days=%s window_hours=%s page_size=%s chunk_size=%s",
start.isoformat() if start else None,
end.isoformat() if end else None,
window_days,
window_hours,
args.page_size,
args.chunk_size,
"SUMMARY missing=%s errors=%s",
payload.get("total_missing"),
payload.get("total_errors"),
)
if cutoff:
logger.info("CUTOFF=%s overlap_hours=%s", cutoff.isoformat(), args.cutoff_overlap_hours)
client = APIClient(
base_url=cfg["api"]["base_url"],
token=cfg["api"]["token"],
timeout=int(cfg["api"].get("timeout_sec") or 20),
retry_max=int(cfg["api"].get("retries", {}).get("max_attempts") or 3),
headers_extra=cfg["api"].get("headers_extra") or {},
)
db_conn = DatabaseConnection(dsn=cfg["db"]["dsn"], session=cfg["db"].get("session"))
try:
db_conn.conn.rollback()
except Exception:
pass
db_conn.conn.autocommit = True
try:
task_filter = {t.strip().upper() for t in args.task_codes.split(",") if t.strip()}
specs = [s for s in ODS_TASK_SPECS if s.code in ENABLED_ODS_CODES]
if task_filter:
specs = [s for s in specs if s.code in task_filter]
results: list[dict] = []
for spec in specs:
if spec.code == "ODS_SETTLEMENT_TICKET":
continue
result = _check_spec(
client=client,
db_conn=db_conn.conn,
cfg=cfg,
tz=tz,
logger=logger,
spec=spec,
store_id=store_id,
start=start,
end=end,
window_days=window_days,
window_hours=window_hours,
page_size=args.page_size,
chunk_size=args.chunk_size,
sample_limit=args.sample_limit,
sleep_per_window=args.sleep_per_window_seconds,
sleep_per_page=args.sleep_per_page_seconds,
)
results.append(result)
logger.info(
"CHECK_DONE task=%s missing=%s records=%s errors=%s",
result.get("task_code"),
result.get("missing"),
result.get("records"),
result.get("errors"),
)
if (not task_filter) or ("ODS_SETTLEMENT_TICKET" in task_filter):
ticket_result = _check_settlement_tickets(
client=client,
db_conn=db_conn.conn,
cfg=cfg,
tz=tz,
logger=logger,
store_id=store_id,
start=start,
end=end,
window_days=window_days,
window_hours=window_hours,
page_size=args.page_size,
chunk_size=args.chunk_size,
sample_limit=args.sample_limit,
sleep_per_window=args.sleep_per_window_seconds,
sleep_per_page=args.sleep_per_page_seconds,
)
results.append(ticket_result)
logger.info(
"CHECK_DONE task=%s missing=%s records=%s errors=%s",
ticket_result.get("task_code"),
ticket_result.get("missing"),
ticket_result.get("records"),
ticket_result.get("errors"),
)
total_missing = sum(int(r.get("missing") or 0) for r in results)
total_errors = sum(int(r.get("errors") or 0) for r in results)
if args.out:
out_path = Path(args.out)
else:
tag = f"_{args.tag}" if args.tag else ""
stamp = datetime.now(tz).strftime("%Y%m%d_%H%M%S")
out_path = PROJECT_ROOT / "reports" / f"ods_gap_check{tag}_{stamp}.json"
out_path.parent.mkdir(parents=True, exist_ok=True)
payload = {
"start": start.isoformat(),
"end": end.isoformat(),
"cutoff": cutoff.isoformat() if cutoff else None,
"window_days": window_days,
"window_hours": window_hours,
"page_size": args.page_size,
"chunk_size": args.chunk_size,
"sample_limit": args.sample_limit,
"store_id": store_id,
"base_url": cfg.get("api.base_url"),
"results": results,
"total_missing": total_missing,
"total_errors": total_errors,
"generated_at": datetime.now(tz).isoformat(),
}
out_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
logger.info("REPORT_WRITTEN path=%s", out_path)
logger.info("SUMMARY missing=%s errors=%s", total_missing, total_errors)
finally:
db_conn.close()
return 0