# -*- coding: utf-8 -*- """商品档案(PRODUCTS)ETL任务""" import json from .base_task import BaseTask from loaders.dimensions.product import ProductLoader from models.parsers import TypeParser class ProductsTask(BaseTask): """商品维度 ETL 任务""" def get_task_code(self) -> str: """任务代码,应与 etl_admin.etl_task.task_code 一致""" return "PRODUCTS" def execute(self) -> dict: """ 执行商品档案 ETL 流程: 1. 调用上游 /TenantGoods/QueryTenantGoods 分页拉取商品列表 2. 解析/清洗字段 3. 通过 ProductLoader 写入 dim_product 和 dim_product_price_scd """ self.logger.info(f"开始执行 {self.get_task_code()} 任务") params = { "storeId": self.config.get("app.store_id"), } try: # 1. 分页拉取数据 records, pages_meta = self.api.get_paginated( endpoint="/TenantGoods/QueryTenantGoods", params=params, page_size=self.config.get("api.page_size", 200), data_path=("data",), ) # 2. 解析/清洗 parsed_records = [] for raw in records: parsed = self._parse_product(raw) if parsed: parsed_records.append(parsed) # 3. 加载入库(维度主表 + 价格SCD2) loader = ProductLoader(self.db) store_id = self.config.get("app.store_id") inserted, updated, skipped = loader.upsert_products( parsed_records, store_id ) # 4. 提交事务 self.db.commit() counts = { "fetched": len(records), "inserted": inserted, "updated": updated, "skipped": skipped, "errors": 0, } self.logger.info(f"{self.get_task_code()} 完成: {counts}") return self._build_result("SUCCESS", counts) except Exception: # 明确回滚,避免部分成功 self.db.rollback() self.logger.error(f"{self.get_task_code()} 失败", exc_info=True) raise def _parse_product(self, raw: dict) -> dict | None: """ 解析单条商品记录,字段映射参考旧版 upsert_dim_product_and_price_scd 上游字段示例: - siteGoodsId / tenantGoodsId / productId - goodsName / productName - tenantGoodsCategoryId / goodsCategoryId / categoryName / goodsCategorySecondId - goodsUnit - costPrice / goodsPrice / salePrice - goodsState / status - supplierId / barcode / isCombo - createTime / updateTime """ try: product_id = ( TypeParser.parse_int( raw.get("siteGoodsId") or raw.get("tenantGoodsId") or raw.get("productId") ) ) if not product_id: # 主键缺失,直接跳过 return None return { "store_id": self.config.get("app.store_id"), "product_id": product_id, "site_product_id": TypeParser.parse_int(raw.get("siteGoodsId")), "product_name": raw.get("goodsName") or raw.get("productName"), "category_id": TypeParser.parse_int( raw.get("tenantGoodsCategoryId") or raw.get("goodsCategoryId") ), "category_name": raw.get("categoryName"), "second_category_id": TypeParser.parse_int( raw.get("goodsCategorySecondId") ), "unit": raw.get("goodsUnit"), "cost_price": TypeParser.parse_decimal(raw.get("costPrice")), "sale_price": TypeParser.parse_decimal( raw.get("goodsPrice") or raw.get("salePrice") ), # 旧版这里就是 None,如后面有明确字段可以再补 "allow_discount": None, "status": raw.get("goodsState") or raw.get("status"), "supplier_id": TypeParser.parse_int(raw.get("supplierId")) if raw.get("supplierId") else None, "barcode": raw.get("barcode"), "is_combo": bool(raw.get("isCombo")) if raw.get("isCombo") is not None else None, "created_time": TypeParser.parse_timestamp( raw.get("createTime"), self.tz ), "updated_time": TypeParser.parse_timestamp( raw.get("updateTime"), self.tz ), "raw_data": json.dumps(raw, ensure_ascii=False), } except Exception as e: self.logger.warning(f"解析商品记录失败: {e}, 原始数据: {raw}") return None