Files
feiqiu-ETL/etl_billiards/tasks/dws_build_order_summary_task.py
2026-01-27 22:47:05 +08:00

193 lines
7.3 KiB
Python

# -*- coding: utf-8 -*-
"""Build DWS order summary table from DWD fact tables."""
from __future__ import annotations
from datetime import date
from typing import Any
from .base_task import BaseTask, TaskContext
from utils.windowing import build_window_segments, calc_window_minutes
from scripts.build_dws_order_summary import SQL_BUILD_SUMMARY
class DwsBuildOrderSummaryTask(BaseTask):
"""Recompute/refresh `billiards_dws.dws_order_summary` for a date window."""
def get_task_code(self) -> str:
return "DWS_BUILD_ORDER_SUMMARY"
def execute(self, cursor_data: dict | None = None) -> dict:
base_context = self._build_context(cursor_data)
task_code = self.get_task_code()
segments = build_window_segments(
self.config,
base_context.window_start,
base_context.window_end,
tz=self.tz,
override_only=True,
)
if not segments:
segments = [(base_context.window_start, base_context.window_end)]
total_segments = len(segments)
if total_segments > 1:
self.logger.info("%s: 分段执行 共%s", task_code, total_segments)
total_counts: dict = {}
segment_results: list[dict] = []
request_params_list: list[dict] = []
total_deleted = 0
for idx, (window_start, window_end) in enumerate(segments, start=1):
context = self._build_context_for_window(window_start, window_end, cursor_data)
self.logger.info(
"%s: 开始执行(%s/%s), 窗口[%s ~ %s]",
task_code,
idx,
total_segments,
context.window_start,
context.window_end,
)
try:
extracted = self.extract(context)
transformed = self.transform(extracted, context)
load_result = self.load(transformed, context) or {}
self.db.commit()
except Exception:
self.db.rollback()
self.logger.error("%s: 执行失败", task_code, exc_info=True)
raise
counts = load_result.get("counts") or {}
self._accumulate_counts(total_counts, counts)
extra = load_result.get("extra") or {}
deleted = int(extra.get("deleted") or 0)
total_deleted += deleted
request_params = load_result.get("request_params")
if request_params:
request_params_list.append(request_params)
if total_segments > 1:
segment_results.append(
{
"window": {
"start": context.window_start,
"end": context.window_end,
"minutes": context.window_minutes,
},
"counts": counts,
"extra": extra,
}
)
overall_start = segments[0][0]
overall_end = segments[-1][1]
result = {"status": "SUCCESS", "counts": total_counts}
result["window"] = {
"start": overall_start,
"end": overall_end,
"minutes": calc_window_minutes(overall_start, overall_end),
}
if segment_results:
result["segments"] = segment_results
if request_params_list:
result["request_params"] = request_params_list[0] if len(request_params_list) == 1 else request_params_list
if total_deleted:
result["extra"] = {"deleted": total_deleted}
self.logger.info("%s: 完成, 统计=%s", task_code, total_counts)
return result
def extract(self, context: TaskContext) -> dict[str, Any]:
store_id = int(self.config.get("app.store_id"))
full_refresh = bool(self.config.get("dws.order_summary.full_refresh", False))
site_id = self.config.get("dws.order_summary.site_id", store_id)
if site_id in ("", None, "null", "NULL"):
site_id = None
start_date = self.config.get("dws.order_summary.start_date")
end_date = self.config.get("dws.order_summary.end_date")
if not full_refresh:
if not start_date:
start_date = context.window_start.date()
if not end_date:
end_date = context.window_end.date()
else:
start_date = None
end_date = None
delete_before_insert = bool(self.config.get("dws.order_summary.delete_before_insert", True))
return {
"site_id": site_id,
"start_date": start_date,
"end_date": end_date,
"full_refresh": full_refresh,
"delete_before_insert": delete_before_insert,
}
def load(self, extracted: dict[str, Any], context: TaskContext) -> dict:
sql_params = {
"site_id": extracted["site_id"],
"start_date": extracted["start_date"],
"end_date": extracted["end_date"],
}
request_params = {
"site_id": extracted["site_id"],
"start_date": _jsonable_date(extracted["start_date"]),
"end_date": _jsonable_date(extracted["end_date"]),
}
with self.db.conn.cursor() as cur:
cur.execute("SELECT to_regclass('billiards_dws.dws_order_summary') AS reg;")
row = cur.fetchone()
reg = row[0] if row else None
if not reg:
raise RuntimeError("DWS 表不存在:请先运行任务 INIT_DWS_SCHEMA")
deleted = 0
if extracted["delete_before_insert"]:
if extracted["full_refresh"] and extracted["site_id"] is None:
cur.execute("TRUNCATE TABLE billiards_dws.dws_order_summary;")
self.logger.info("DWS订单汇总: 已清空 billiards_dws.dws_order_summary")
else:
delete_sql = "DELETE FROM billiards_dws.dws_order_summary WHERE 1=1"
delete_args: list[Any] = []
if extracted["site_id"] is not None:
delete_sql += " AND site_id = %s"
delete_args.append(extracted["site_id"])
if extracted["start_date"] is not None:
delete_sql += " AND order_date >= %s"
delete_args.append(_as_date(extracted["start_date"]))
if extracted["end_date"] is not None:
delete_sql += " AND order_date <= %s"
delete_args.append(_as_date(extracted["end_date"]))
cur.execute(delete_sql, delete_args)
deleted = cur.rowcount
self.logger.info("DWS订单汇总: 删除=%s 语句=%s", deleted, delete_sql)
cur.execute(SQL_BUILD_SUMMARY, sql_params)
affected = cur.rowcount
return {
"counts": {"fetched": 0, "inserted": affected, "updated": 0, "skipped": 0, "errors": 0},
"request_params": request_params,
"extra": {"deleted": deleted},
}
def _as_date(v: Any) -> date:
if isinstance(v, date):
return v
return date.fromisoformat(str(v))
def _jsonable_date(v: Any):
if v is None:
return None
if isinstance(v, date):
return v.isoformat()
return str(v)