ODS 完成

This commit is contained in:
Neo
2025-11-30 07:18:55 +08:00
parent cbd16a39ba
commit b9b050bb5d
28 changed files with 41867 additions and 977 deletions

View File

@@ -1,80 +1,77 @@
# -*- coding: utf-8 -*-
"""订单ETL任务"""
import json
from .base_task import BaseTask
from .base_task import BaseTask, TaskContext
from loaders.facts.order import OrderLoader
from models.parsers import TypeParser
class OrdersTask(BaseTask):
"""订单数据ETL任务"""
def get_task_code(self) -> str:
return "ORDERS"
def execute(self) -> dict:
"""执行订单数据ETL"""
self.logger.info(f"开始执行 {self.get_task_code()} 任务")
# 1. 获取时间窗口
window_start, window_end, window_minutes = self._get_time_window()
# 2. 调用API获取数据
params = {
"storeId": self.config.get("app.store_id"),
"startTime": TypeParser.format_timestamp(window_start, self.tz),
"endTime": TypeParser.format_timestamp(window_end, self.tz),
}
try:
records, pages_meta = self.api.get_paginated(
endpoint="/order/list",
params=params,
page_size=self.config.get("api.page_size", 200),
data_path=("data",)
)
# 3. 解析并清洗数据
parsed_records = []
for rec in records:
parsed = self._parse_order(rec)
if parsed:
parsed_records.append(parsed)
# 4. 加载数据
loader = OrderLoader(self.db)
store_id = self.config.get("app.store_id")
inserted, updated, skipped = loader.upsert_orders(
parsed_records,
store_id
)
# 5. 提交事务
self.db.commit()
counts = {
"fetched": len(records),
"inserted": inserted,
"updated": updated,
"skipped": skipped,
"errors": 0
# ------------------------------------------------------------------ E/T/L hooks
def extract(self, context: TaskContext) -> dict:
"""调用 API 拉取订单记录"""
params = self._merge_common_params(
{
"siteId": context.store_id,
"rangeStartTime": TypeParser.format_timestamp(context.window_start, self.tz),
"rangeEndTime": TypeParser.format_timestamp(context.window_end, self.tz),
}
self.logger.info(
f"{self.get_task_code()} 完成: {counts}"
)
return self._build_result("SUCCESS", counts)
except Exception as e:
self.db.rollback()
self.logger.error(f"{self.get_task_code()} 失败", exc_info=True)
raise
def _parse_order(self, raw: dict) -> dict:
)
records, pages_meta = self.api.get_paginated(
endpoint="/Site/GetAllOrderSettleList",
params=params,
page_size=self.config.get("api.page_size", 200),
data_path=("data",),
list_key="settleList",
)
return {"records": records, "meta": pages_meta}
def transform(self, extracted: dict, context: TaskContext) -> dict:
"""解析原始订单 JSON"""
parsed_records = []
skipped = 0
for rec in extracted.get("records", []):
parsed = self._parse_order(rec, context.store_id)
if parsed:
parsed_records.append(parsed)
else:
skipped += 1
return {
"records": parsed_records,
"fetched": len(extracted.get("records", [])),
"skipped": skipped,
}
def load(self, transformed: dict, context: TaskContext) -> dict:
"""写入 fact_order"""
loader = OrderLoader(self.db)
inserted, updated, loader_skipped = loader.upsert_orders(
transformed["records"], context.store_id
)
counts = {
"fetched": transformed["fetched"],
"inserted": inserted,
"updated": updated,
"skipped": transformed["skipped"] + loader_skipped,
"errors": 0,
}
return counts
# ------------------------------------------------------------------ helpers
def _parse_order(self, raw: dict, store_id: int) -> dict | None:
"""解析单条订单记录"""
try:
return {
"store_id": self.config.get("app.store_id"),
"store_id": store_id,
"order_id": TypeParser.parse_int(raw.get("orderId")),
"order_no": raw.get("orderNo"),
"member_id": TypeParser.parse_int(raw.get("memberId")),
@@ -87,8 +84,8 @@ class OrdersTask(BaseTask):
"pay_status": raw.get("payStatus"),
"order_status": raw.get("orderStatus"),
"remark": raw.get("remark"),
"raw_data": json.dumps(raw, ensure_ascii=False)
"raw_data": json.dumps(raw, ensure_ascii=False),
}
except Exception as e:
self.logger.warning(f"解析订单失败: {e}, 原始数据: {raw}")
except Exception as exc:
self.logger.warning("解析订单失败: %s, 原始数据: %s", exc, raw)
return None