ODS 完成

This commit is contained in:
Neo
2025-11-30 07:18:55 +08:00
parent cbd16a39ba
commit b9b050bb5d
28 changed files with 41867 additions and 977 deletions

View File

@@ -3,7 +3,7 @@
import json
from .base_task import BaseTask
from .base_task import BaseTask, TaskContext
from loaders.dimensions.product import ProductLoader
from models.parsers import TypeParser
@@ -12,95 +12,56 @@ class ProductsTask(BaseTask):
"""商品维度 ETL 任务"""
def get_task_code(self) -> str:
"""任务代码,应与 etl_admin.etl_task.task_code 一致"""
return "PRODUCTS"
def execute(self) -> dict:
"""
执行商品档案 ETL
def extract(self, context: TaskContext) -> dict:
params = self._merge_common_params({"siteId": context.store_id})
records, _ = self.api.get_paginated(
endpoint="/TenantGoods/QueryTenantGoods",
params=params,
page_size=self.config.get("api.page_size", 200),
data_path=("data",),
list_key="tenantGoodsList",
)
return {"records": records}
流程:
1. 调用上游 /TenantGoods/QueryTenantGoods 分页拉取商品列表
2. 解析/清洗字段
3. 通过 ProductLoader 写入 dim_product 和 dim_product_price_scd
"""
self.logger.info(f"开始执行 {self.get_task_code()} 任务")
params = {
"storeId": self.config.get("app.store_id"),
def transform(self, extracted: dict, context: TaskContext) -> dict:
parsed, skipped = [], 0
for raw in extracted.get("records", []):
parsed_row = self._parse_product(raw, context.store_id)
if parsed_row:
parsed.append(parsed_row)
else:
skipped += 1
return {
"records": parsed,
"fetched": len(extracted.get("records", [])),
"skipped": skipped,
}
def load(self, transformed: dict, context: TaskContext) -> dict:
loader = ProductLoader(self.db)
inserted, updated, loader_skipped = loader.upsert_products(
transformed["records"], context.store_id
)
return {
"fetched": transformed["fetched"],
"inserted": inserted,
"updated": updated,
"skipped": transformed["skipped"] + loader_skipped,
"errors": 0,
}
def _parse_product(self, raw: dict, store_id: int) -> dict | None:
try:
# 1. 分页拉取数据
records, pages_meta = self.api.get_paginated(
endpoint="/TenantGoods/QueryTenantGoods",
params=params,
page_size=self.config.get("api.page_size", 200),
data_path=("data",),
)
# 2. 解析/清洗
parsed_records = []
for raw in records:
parsed = self._parse_product(raw)
if parsed:
parsed_records.append(parsed)
# 3. 加载入库(维度主表 + 价格SCD2
loader = ProductLoader(self.db)
store_id = self.config.get("app.store_id")
inserted, updated, skipped = loader.upsert_products(
parsed_records, store_id
)
# 4. 提交事务
self.db.commit()
counts = {
"fetched": len(records),
"inserted": inserted,
"updated": updated,
"skipped": skipped,
"errors": 0,
}
self.logger.info(f"{self.get_task_code()} 完成: {counts}")
return self._build_result("SUCCESS", counts)
except Exception:
# 明确回滚,避免部分成功
self.db.rollback()
self.logger.error(f"{self.get_task_code()} 失败", exc_info=True)
raise
def _parse_product(self, raw: dict) -> dict | None:
"""
解析单条商品记录,字段映射参考旧版 upsert_dim_product_and_price_scd
上游字段示例:
- siteGoodsId / tenantGoodsId / productId
- goodsName / productName
- tenantGoodsCategoryId / goodsCategoryId / categoryName / goodsCategorySecondId
- goodsUnit
- costPrice / goodsPrice / salePrice
- goodsState / status
- supplierId / barcode / isCombo
- createTime / updateTime
"""
try:
product_id = (
TypeParser.parse_int(
raw.get("siteGoodsId")
or raw.get("tenantGoodsId")
or raw.get("productId")
)
product_id = TypeParser.parse_int(
raw.get("siteGoodsId") or raw.get("tenantGoodsId") or raw.get("productId")
)
if not product_id:
# 主键缺失,直接跳过
return None
return {
"store_id": self.config.get("app.store_id"),
"store_id": store_id,
"product_id": product_id,
"site_product_id": TypeParser.parse_int(raw.get("siteGoodsId")),
"product_name": raw.get("goodsName") or raw.get("productName"),
@@ -108,15 +69,12 @@ class ProductsTask(BaseTask):
raw.get("tenantGoodsCategoryId") or raw.get("goodsCategoryId")
),
"category_name": raw.get("categoryName"),
"second_category_id": TypeParser.parse_int(
raw.get("goodsCategorySecondId")
),
"second_category_id": TypeParser.parse_int(raw.get("goodsCategorySecondId")),
"unit": raw.get("goodsUnit"),
"cost_price": TypeParser.parse_decimal(raw.get("costPrice")),
"sale_price": TypeParser.parse_decimal(
raw.get("goodsPrice") or raw.get("salePrice")
),
# 旧版这里就是 None如后面有明确字段可以再补
"allow_discount": None,
"status": raw.get("goodsState") or raw.get("status"),
"supplier_id": TypeParser.parse_int(raw.get("supplierId"))
@@ -126,14 +84,10 @@ class ProductsTask(BaseTask):
"is_combo": bool(raw.get("isCombo"))
if raw.get("isCombo") is not None
else None,
"created_time": TypeParser.parse_timestamp(
raw.get("createTime"), self.tz
),
"updated_time": TypeParser.parse_timestamp(
raw.get("updateTime"), self.tz
),
"created_time": TypeParser.parse_timestamp(raw.get("createTime"), self.tz),
"updated_time": TypeParser.parse_timestamp(raw.get("updateTime"), self.tz),
"raw_data": json.dumps(raw, ensure_ascii=False),
}
except Exception as e:
self.logger.warning(f"解析商品记录失败: {e}, 原始数据: {raw}")
return None
except Exception as exc:
self.logger.warning("解析商品记录失败: %s, 原始数据: %s", exc, raw)
return None