# -*- coding: utf-8 -*- import os import json from datetime import datetime from .base_task import BaseTask from loaders.ods.generic import GenericODSLoader class ManualIngestTask(BaseTask): """ Task to ingest manually fetched JSON files from a directory into ODS tables. """ FILE_MAPPING = { "小票详情": "billiards_ods.ods_ticket_detail", "结账记录": "billiards_ods.ods_order_settle", "支付记录": "billiards_ods.ods_payment", "助教流水": "billiards_ods.ods_assistant_ledger", "助教废除": "billiards_ods.ods_assistant_abolish", "商品档案": "billiards_ods.ods_goods_ledger", # Note: This might be dim_product source, but mapping to ledger for now if it's sales "库存变化": "billiards_ods.ods_inventory_change", "会员档案": "billiards_ods.ods_member", "充值记录": "billiards_ods.ods_member_card", # Approx "团购套餐": "billiards_ods.ods_package_coupon", "库存汇总": "billiards_ods.ods_inventory_stock" } def get_task_code(self) -> str: return "MANUAL_INGEST" def execute(self) -> dict: self.logger.info("Starting Manual Ingest Task") # Configurable directory, default to tests/testdata_json for now data_dir = self.config.get("manual.data_dir", r"c:\dev\LLTQ\ETL\feiqiu-ETL\etl_billiards\tests\testdata_json") if not os.path.exists(data_dir): self.logger.error(f"Data directory not found: {data_dir}") return {"status": "error", "message": "Directory not found"} total_files = 0 total_rows = 0 for filename in os.listdir(data_dir): if not filename.endswith(".json"): continue # Determine target table target_table = None for key, table in self.FILE_MAPPING.items(): if key in filename: target_table = table break if not target_table: self.logger.warning(f"No mapping found for file: {filename}, skipping.") continue self.logger.info(f"Ingesting {filename} into {target_table}") try: with open(os.path.join(data_dir, filename), 'r', encoding='utf-8') as f: data = json.load(f) if not isinstance(data, list): data = [data] # Prepare rows for GenericODSLoader # We need to adapt the data to what GenericODSLoader expects (or update it) # GenericODSLoader expects dicts. It handles normalization. # But we need to ensure the primary keys are present in the payload or extracted. # The GenericODSLoader might need configuration for PK extraction if it's not standard. # For now, let's assume the payload IS the row, and we wrap it. # Actually, GenericODSLoader.upsert_rows expects the raw API result list. # It calls _normalize_row. # We need to make sure _normalize_row works for these files. # Most files have 'id' or similar. # Let's instantiate a loader for this table # We need to know the PK for the table. # This is usually defined in ODS_TASK_CLASSES but here we are dynamic. # We might need a simpler loader or reuse GenericODSLoader with specific PK config. # For simplicity, let's use a custom ingestion here that mimics GenericODSLoader but is file-aware. rows_to_insert = [] for item in data: # Extract Store ID (usually in siteProfile or data root) store_id = self._extract_store_id(item) or self.config.get("app.store_id") # Extract PK (id, orderSettleId, etc.) pk_val = self._extract_pk(item, target_table) if not pk_val: # Try to find 'id' in the item pk_val = item.get("id") if not pk_val: # Special case for Ticket Detail if "ods_ticket_detail" in target_table: pk_val = item.get("orderSettleId") if not pk_val: continue row = { "store_id": store_id, "payload": json.dumps(item, ensure_ascii=False), "source_file": filename, "fetched_at": datetime.now() } # Add specific PK column pk_col = self._get_pk_column(target_table) row[pk_col] = pk_val rows_to_insert.append(row) if rows_to_insert: self._bulk_insert(target_table, rows_to_insert) total_rows += len(rows_to_insert) total_files += 1 except Exception as e: self.logger.error(f"Error processing {filename}: {e}", exc_info=True) return {"status": "success", "files_processed": total_files, "rows_inserted": total_rows} def _extract_store_id(self, item): # Try common paths if "store_id" in item: return item["store_id"] if "siteProfile" in item and "id" in item["siteProfile"]: return item["siteProfile"]["id"] if "data" in item and "data" in item["data"] and "siteId" in item["data"]["data"]: return item["data"]["data"]["siteId"] return None def _extract_pk(self, item, table): # Helper to find PK based on table if "ods_order_settle" in table: # Check for nested structure in some files if "settleList" in item and "settleList" in item["settleList"]: return item["settleList"]["settleList"].get("id") return item.get("id") return item.get("id") def _get_pk_column(self, table): if "ods_ticket_detail" in table: return "order_settle_id" if "ods_order_settle" in table: return "order_settle_id" if "ods_payment" in table: return "pay_id" if "ods_member" in table: return "member_id" if "ods_assistant_ledger" in table: return "ledger_id" if "ods_goods_ledger" in table: return "order_goods_id" if "ods_inventory_change" in table: return "change_id" if "ods_assistant_abolish" in table: return "abolish_id" if "ods_coupon_verify" in table: return "coupon_id" if "ods_member_card" in table: return "card_id" if "ods_package_coupon" in table: return "package_id" return "id" # Fallback def _bulk_insert(self, table, rows): if not rows: return keys = list(rows[0].keys()) cols = ", ".join(keys) vals = ", ".join([f"%({k})s" for k in keys]) # Determine PK col for conflict pk_col = self._get_pk_column(table) sql = f""" INSERT INTO {table} ({cols}) VALUES ({vals}) ON CONFLICT (store_id, {pk_col}) DO UPDATE SET payload = EXCLUDED.payload, fetched_at = EXCLUDED.fetched_at, source_file = EXCLUDED.source_file; """ self.db.batch_execute(sql, rows)