139 lines
5.0 KiB
Python
139 lines
5.0 KiB
Python
# -*- coding: utf-8 -*-
|
||
"""商品档案(PRODUCTS)ETL任务"""
|
||
|
||
import json
|
||
|
||
from .base_task import BaseTask
|
||
from loaders.dimensions.product import ProductLoader
|
||
from models.parsers import TypeParser
|
||
|
||
|
||
class ProductsTask(BaseTask):
|
||
"""商品维度 ETL 任务"""
|
||
|
||
def get_task_code(self) -> str:
|
||
"""任务代码,应与 etl_admin.etl_task.task_code 一致"""
|
||
return "PRODUCTS"
|
||
|
||
def execute(self) -> dict:
|
||
"""
|
||
执行商品档案 ETL
|
||
|
||
流程:
|
||
1. 调用上游 /TenantGoods/QueryTenantGoods 分页拉取商品列表
|
||
2. 解析/清洗字段
|
||
3. 通过 ProductLoader 写入 dim_product 和 dim_product_price_scd
|
||
"""
|
||
self.logger.info(f"开始执行 {self.get_task_code()} 任务")
|
||
|
||
params = {
|
||
"storeId": self.config.get("app.store_id"),
|
||
}
|
||
|
||
try:
|
||
# 1. 分页拉取数据
|
||
records, pages_meta = self.api.get_paginated(
|
||
endpoint="/TenantGoods/QueryTenantGoods",
|
||
params=params,
|
||
page_size=self.config.get("api.page_size", 200),
|
||
data_path=("data",),
|
||
)
|
||
|
||
# 2. 解析/清洗
|
||
parsed_records = []
|
||
for raw in records:
|
||
parsed = self._parse_product(raw)
|
||
if parsed:
|
||
parsed_records.append(parsed)
|
||
|
||
# 3. 加载入库(维度主表 + 价格SCD2)
|
||
loader = ProductLoader(self.db)
|
||
store_id = self.config.get("app.store_id")
|
||
inserted, updated, skipped = loader.upsert_products(
|
||
parsed_records, store_id
|
||
)
|
||
|
||
# 4. 提交事务
|
||
self.db.commit()
|
||
|
||
counts = {
|
||
"fetched": len(records),
|
||
"inserted": inserted,
|
||
"updated": updated,
|
||
"skipped": skipped,
|
||
"errors": 0,
|
||
}
|
||
|
||
self.logger.info(f"{self.get_task_code()} 完成: {counts}")
|
||
return self._build_result("SUCCESS", counts)
|
||
|
||
except Exception:
|
||
# 明确回滚,避免部分成功
|
||
self.db.rollback()
|
||
self.logger.error(f"{self.get_task_code()} 失败", exc_info=True)
|
||
raise
|
||
|
||
def _parse_product(self, raw: dict) -> dict | None:
|
||
"""
|
||
解析单条商品记录,字段映射参考旧版 upsert_dim_product_and_price_scd
|
||
|
||
上游字段示例:
|
||
- siteGoodsId / tenantGoodsId / productId
|
||
- goodsName / productName
|
||
- tenantGoodsCategoryId / goodsCategoryId / categoryName / goodsCategorySecondId
|
||
- goodsUnit
|
||
- costPrice / goodsPrice / salePrice
|
||
- goodsState / status
|
||
- supplierId / barcode / isCombo
|
||
- createTime / updateTime
|
||
"""
|
||
try:
|
||
product_id = (
|
||
TypeParser.parse_int(
|
||
raw.get("siteGoodsId")
|
||
or raw.get("tenantGoodsId")
|
||
or raw.get("productId")
|
||
)
|
||
)
|
||
if not product_id:
|
||
# 主键缺失,直接跳过
|
||
return None
|
||
|
||
return {
|
||
"store_id": self.config.get("app.store_id"),
|
||
"product_id": product_id,
|
||
"site_product_id": TypeParser.parse_int(raw.get("siteGoodsId")),
|
||
"product_name": raw.get("goodsName") or raw.get("productName"),
|
||
"category_id": TypeParser.parse_int(
|
||
raw.get("tenantGoodsCategoryId") or raw.get("goodsCategoryId")
|
||
),
|
||
"category_name": raw.get("categoryName"),
|
||
"second_category_id": TypeParser.parse_int(
|
||
raw.get("goodsCategorySecondId")
|
||
),
|
||
"unit": raw.get("goodsUnit"),
|
||
"cost_price": TypeParser.parse_decimal(raw.get("costPrice")),
|
||
"sale_price": TypeParser.parse_decimal(
|
||
raw.get("goodsPrice") or raw.get("salePrice")
|
||
),
|
||
# 旧版这里就是 None,如后面有明确字段可以再补
|
||
"allow_discount": None,
|
||
"status": raw.get("goodsState") or raw.get("status"),
|
||
"supplier_id": TypeParser.parse_int(raw.get("supplierId"))
|
||
if raw.get("supplierId")
|
||
else None,
|
||
"barcode": raw.get("barcode"),
|
||
"is_combo": bool(raw.get("isCombo"))
|
||
if raw.get("isCombo") is not None
|
||
else None,
|
||
"created_time": TypeParser.parse_timestamp(
|
||
raw.get("createTime"), self.tz
|
||
),
|
||
"updated_time": TypeParser.parse_timestamp(
|
||
raw.get("updateTime"), self.tz
|
||
),
|
||
"raw_data": json.dumps(raw, ensure_ascii=False),
|
||
}
|
||
except Exception as e:
|
||
self.logger.warning(f"解析商品记录失败: {e}, 原始数据: {raw}")
|
||
return None |