# -*- coding: utf-8 -*- """商品维度 + 价格SCD2 加载器""" from ..base_loader import BaseLoader from scd.scd2_handler import SCD2Handler class ProductLoader(BaseLoader): """商品维度加载器(dim_product + dim_product_price_scd)""" def __init__(self, db_ops): super().__init__(db_ops) # SCD2 处理器,复用通用逻辑 self.scd_handler = SCD2Handler(db_ops) def upsert_products(self, records: list, store_id: int) -> tuple: """ 加载商品维度及价格SCD 返回: (inserted_count, updated_count, skipped_count) """ if not records: return (0, 0, 0) # 1) 维度主表:billiards.dim_product sql_base = """ INSERT INTO billiards.dim_product ( store_id, product_id, site_product_id, product_name, category_id, category_name, second_category_id, unit, cost_price, sale_price, allow_discount, status, supplier_id, barcode, is_combo, created_time, updated_time, raw_data ) VALUES ( %(store_id)s, %(product_id)s, %(site_product_id)s, %(product_name)s, %(category_id)s, %(category_name)s, %(second_category_id)s, %(unit)s, %(cost_price)s, %(sale_price)s, %(allow_discount)s, %(status)s, %(supplier_id)s, %(barcode)s, %(is_combo)s, %(created_time)s, %(updated_time)s, %(raw_data)s ) ON CONFLICT (store_id, product_id) DO UPDATE SET site_product_id = EXCLUDED.site_product_id, product_name = EXCLUDED.product_name, category_id = EXCLUDED.category_id, category_name = EXCLUDED.category_name, second_category_id = EXCLUDED.second_category_id, unit = EXCLUDED.unit, cost_price = EXCLUDED.cost_price, sale_price = EXCLUDED.sale_price, allow_discount = EXCLUDED.allow_discount, status = EXCLUDED.status, supplier_id = EXCLUDED.supplier_id, barcode = EXCLUDED.barcode, is_combo = EXCLUDED.is_combo, updated_time = COALESCE(EXCLUDED.updated_time, now()), raw_data = EXCLUDED.raw_data RETURNING (xmax = 0) AS inserted """ inserted, updated = self.db.batch_upsert_with_returning( sql_base, records, page_size=self._batch_size(), ) # 2) 价格 SCD2:billiards.dim_product_price_scd # 只追踪 price + 类目 + 名称等字段的历史 tracked_fields = [ "product_name", "category_id", "category_name", "second_category_id", "cost_price", "sale_price", "allow_discount", "status", ] natural_key = ["store_id", "product_id"] for rec in records: effective_date = rec.get("updated_time") or rec.get("created_time") scd_record = { "store_id": rec["store_id"], "product_id": rec["product_id"], "product_name": rec.get("product_name"), "category_id": rec.get("category_id"), "category_name": rec.get("category_name"), "second_category_id": rec.get("second_category_id"), "cost_price": rec.get("cost_price"), "sale_price": rec.get("sale_price"), "allow_discount": rec.get("allow_discount"), "status": rec.get("status"), # 原表中有 raw_data jsonb 字段,这里直接复用 task 传入的 raw_data "raw_data": rec.get("raw_data"), } # 这里我们不强行区分 INSERT/UPDATE/SKIP,对 ETL 统计来说意义不大 self.scd_handler.upsert( table_name="billiards.dim_product_price_scd", natural_key=natural_key, tracked_fields=tracked_fields, record=scd_record, effective_date=effective_date, ) # skipped_count 统一按 0 返回(真正被丢弃的记录在 Task 端已经过滤) return (inserted, updated, 0)