Files
feiqiu-ETL/etl_billiards/tasks/products_task.py
2025-11-18 02:32:00 +08:00

139 lines
5.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
"""商品档案PRODUCTSETL任务"""
import json
from .base_task import BaseTask
from loaders.dimensions.product import ProductLoader
from models.parsers import TypeParser
class ProductsTask(BaseTask):
"""商品维度 ETL 任务"""
def get_task_code(self) -> str:
"""任务代码,应与 etl_admin.etl_task.task_code 一致"""
return "PRODUCTS"
def execute(self) -> dict:
"""
执行商品档案 ETL
流程:
1. 调用上游 /TenantGoods/QueryTenantGoods 分页拉取商品列表
2. 解析/清洗字段
3. 通过 ProductLoader 写入 dim_product 和 dim_product_price_scd
"""
self.logger.info(f"开始执行 {self.get_task_code()} 任务")
params = {
"storeId": self.config.get("app.store_id"),
}
try:
# 1. 分页拉取数据
records, pages_meta = self.api.get_paginated(
endpoint="/TenantGoods/QueryTenantGoods",
params=params,
page_size=self.config.get("api.page_size", 200),
data_path=("data",),
)
# 2. 解析/清洗
parsed_records = []
for raw in records:
parsed = self._parse_product(raw)
if parsed:
parsed_records.append(parsed)
# 3. 加载入库(维度主表 + 价格SCD2
loader = ProductLoader(self.db)
store_id = self.config.get("app.store_id")
inserted, updated, skipped = loader.upsert_products(
parsed_records, store_id
)
# 4. 提交事务
self.db.commit()
counts = {
"fetched": len(records),
"inserted": inserted,
"updated": updated,
"skipped": skipped,
"errors": 0,
}
self.logger.info(f"{self.get_task_code()} 完成: {counts}")
return self._build_result("SUCCESS", counts)
except Exception:
# 明确回滚,避免部分成功
self.db.rollback()
self.logger.error(f"{self.get_task_code()} 失败", exc_info=True)
raise
def _parse_product(self, raw: dict) -> dict | None:
"""
解析单条商品记录,字段映射参考旧版 upsert_dim_product_and_price_scd
上游字段示例:
- siteGoodsId / tenantGoodsId / productId
- goodsName / productName
- tenantGoodsCategoryId / goodsCategoryId / categoryName / goodsCategorySecondId
- goodsUnit
- costPrice / goodsPrice / salePrice
- goodsState / status
- supplierId / barcode / isCombo
- createTime / updateTime
"""
try:
product_id = (
TypeParser.parse_int(
raw.get("siteGoodsId")
or raw.get("tenantGoodsId")
or raw.get("productId")
)
)
if not product_id:
# 主键缺失,直接跳过
return None
return {
"store_id": self.config.get("app.store_id"),
"product_id": product_id,
"site_product_id": TypeParser.parse_int(raw.get("siteGoodsId")),
"product_name": raw.get("goodsName") or raw.get("productName"),
"category_id": TypeParser.parse_int(
raw.get("tenantGoodsCategoryId") or raw.get("goodsCategoryId")
),
"category_name": raw.get("categoryName"),
"second_category_id": TypeParser.parse_int(
raw.get("goodsCategorySecondId")
),
"unit": raw.get("goodsUnit"),
"cost_price": TypeParser.parse_decimal(raw.get("costPrice")),
"sale_price": TypeParser.parse_decimal(
raw.get("goodsPrice") or raw.get("salePrice")
),
# 旧版这里就是 None如后面有明确字段可以再补
"allow_discount": None,
"status": raw.get("goodsState") or raw.get("status"),
"supplier_id": TypeParser.parse_int(raw.get("supplierId"))
if raw.get("supplierId")
else None,
"barcode": raw.get("barcode"),
"is_combo": bool(raw.get("isCombo"))
if raw.get("isCombo") is not None
else None,
"created_time": TypeParser.parse_timestamp(
raw.get("createTime"), self.tz
),
"updated_time": TypeParser.parse_timestamp(
raw.get("updateTime"), self.tz
),
"raw_data": json.dumps(raw, ensure_ascii=False),
}
except Exception as e:
self.logger.warning(f"解析商品记录失败: {e}, 原始数据: {raw}")
return None