Files
feiqiu-ETL/etl_billiards/orchestration/run_tracker.py
2026-01-18 22:37:38 +08:00

128 lines
4.4 KiB
Python

# -*- coding: utf-8 -*-
"""运行记录追踪器"""
import json
from datetime import datetime
class RunTracker:
"""ETL运行记录管理"""
def __init__(self, db_connection):
self.db = db_connection
def create_run(self, task_id: int, store_id: int, run_uuid: str,
export_dir: str, log_path: str, status: str,
window_start: datetime = None, window_end: datetime = None,
window_minutes: int = None, overlap_seconds: int = None,
request_params: dict = None) -> int:
"""创建运行记录"""
sql = """
INSERT INTO etl_admin.etl_run(
run_uuid, task_id, store_id, status, started_at, window_start, window_end,
window_minutes, overlap_seconds, fetched_count, loaded_count, updated_count,
skipped_count, error_count, unknown_fields, export_dir, log_path,
request_params, manifest, error_message, extra
) VALUES (
%s, %s, %s, %s, now(), %s, %s, %s, %s, 0, 0, 0, 0, 0, 0, %s, %s, %s,
'{}'::jsonb, NULL, '{}'::jsonb
)
RETURNING run_id
"""
result = self.db.query(
sql,
(run_uuid, task_id, store_id, status, window_start, window_end,
window_minutes, overlap_seconds, export_dir, log_path,
json.dumps(request_params or {}, ensure_ascii=False))
)
run_id = result[0]["run_id"]
self.db.commit()
return run_id
def update_run(
self,
run_id: int,
counts: dict,
status: str,
ended_at: datetime = None,
manifest: dict = None,
error_message: str = None,
window: dict | None = None,
request_params: dict | None = None,
overlap_seconds: int | None = None,
):
"""更新运行记录"""
sql = """
UPDATE etl_admin.etl_run
SET fetched_count = %s,
loaded_count = %s,
updated_count = %s,
skipped_count = %s,
error_count = %s,
unknown_fields = %s,
status = %s,
ended_at = %s,
manifest = %s,
error_message = %s,
window_start = COALESCE(%s, window_start),
window_end = COALESCE(%s, window_end),
window_minutes = COALESCE(%s, window_minutes),
overlap_seconds = COALESCE(%s, overlap_seconds),
request_params = CASE WHEN %s IS NULL THEN request_params ELSE %s::jsonb END
WHERE run_id = %s
"""
def _count(v, default: int = 0) -> int:
if v is None:
return default
if isinstance(v, bool):
return int(v)
if isinstance(v, int):
return int(v)
if isinstance(v, str):
try:
return int(v)
except Exception:
return default
if isinstance(v, (list, tuple, set, dict)):
try:
return len(v)
except Exception:
return default
return default
safe_counts = counts or {}
window_start = None
window_end = None
window_minutes = None
if isinstance(window, dict):
window_start = window.get("start") or window.get("window_start")
window_end = window.get("end") or window.get("window_end")
window_minutes = window.get("minutes") or window.get("window_minutes")
request_json = None if request_params is None else json.dumps(request_params or {}, ensure_ascii=False)
self.db.execute(
sql,
(
_count(safe_counts.get("fetched", 0)),
_count(safe_counts.get("inserted", 0)),
_count(safe_counts.get("updated", 0)),
_count(safe_counts.get("skipped", 0)),
_count(safe_counts.get("errors", 0)),
_count(safe_counts.get("unknown_fields", 0)),
status,
ended_at,
json.dumps(manifest or {}, ensure_ascii=False),
error_message,
window_start,
window_end,
window_minutes,
overlap_seconds,
request_json,
request_json,
run_id,
),
)
self.db.commit()