Files
feiqiu-ETL/etl_billiards/loaders/dimensions/product.py
2025-11-18 02:32:00 +08:00

135 lines
4.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
"""商品维度 + 价格SCD2 加载器"""
from ..base_loader import BaseLoader
from scd.scd2_handler import SCD2Handler
class ProductLoader(BaseLoader):
"""商品维度加载器dim_product + dim_product_price_scd"""
def __init__(self, db_ops):
super().__init__(db_ops)
# SCD2 处理器,复用通用逻辑
self.scd_handler = SCD2Handler(db_ops)
def upsert_products(self, records: list, store_id: int) -> tuple:
"""
加载商品维度及价格SCD
返回: (inserted_count, updated_count, skipped_count)
"""
if not records:
return (0, 0, 0)
# 1) 维度主表billiards.dim_product
sql_base = """
INSERT INTO billiards.dim_product (
store_id,
product_id,
site_product_id,
product_name,
category_id,
category_name,
second_category_id,
unit,
cost_price,
sale_price,
allow_discount,
status,
supplier_id,
barcode,
is_combo,
created_time,
updated_time,
raw_data
)
VALUES (
%(store_id)s,
%(product_id)s,
%(site_product_id)s,
%(product_name)s,
%(category_id)s,
%(category_name)s,
%(second_category_id)s,
%(unit)s,
%(cost_price)s,
%(sale_price)s,
%(allow_discount)s,
%(status)s,
%(supplier_id)s,
%(barcode)s,
%(is_combo)s,
%(created_time)s,
%(updated_time)s,
%(raw_data)s
)
ON CONFLICT (store_id, product_id) DO UPDATE SET
site_product_id = EXCLUDED.site_product_id,
product_name = EXCLUDED.product_name,
category_id = EXCLUDED.category_id,
category_name = EXCLUDED.category_name,
second_category_id = EXCLUDED.second_category_id,
unit = EXCLUDED.unit,
cost_price = EXCLUDED.cost_price,
sale_price = EXCLUDED.sale_price,
allow_discount = EXCLUDED.allow_discount,
status = EXCLUDED.status,
supplier_id = EXCLUDED.supplier_id,
barcode = EXCLUDED.barcode,
is_combo = EXCLUDED.is_combo,
updated_time = COALESCE(EXCLUDED.updated_time, now()),
raw_data = EXCLUDED.raw_data
RETURNING (xmax = 0) AS inserted
"""
inserted, updated = self.db.batch_upsert_with_returning(
sql_base,
records,
page_size=self._batch_size(),
)
# 2) 价格 SCD2billiards.dim_product_price_scd
# 只追踪 price + 类目 + 名称等字段的历史
tracked_fields = [
"product_name",
"category_id",
"category_name",
"second_category_id",
"cost_price",
"sale_price",
"allow_discount",
"status",
]
natural_key = ["store_id", "product_id"]
for rec in records:
effective_date = rec.get("updated_time") or rec.get("created_time")
scd_record = {
"store_id": rec["store_id"],
"product_id": rec["product_id"],
"product_name": rec.get("product_name"),
"category_id": rec.get("category_id"),
"category_name": rec.get("category_name"),
"second_category_id": rec.get("second_category_id"),
"cost_price": rec.get("cost_price"),
"sale_price": rec.get("sale_price"),
"allow_discount": rec.get("allow_discount"),
"status": rec.get("status"),
# 原表中有 raw_data jsonb 字段,这里直接复用 task 传入的 raw_data
"raw_data": rec.get("raw_data"),
}
# 这里我们不强行区分 INSERT/UPDATE/SKIP对 ETL 统计来说意义不大
self.scd_handler.upsert(
table_name="billiards.dim_product_price_scd",
natural_key=natural_key,
tracked_fields=tracked_fields,
record=scd_record,
effective_date=effective_date,
)
# skipped_count 统一按 0 返回(真正被丢弃的记录在 Task 端已经过滤)
return (inserted, updated, 0)