177 lines
7.5 KiB
Python
177 lines
7.5 KiB
Python
# -*- coding: utf-8 -*-
|
|
import os
|
|
import json
|
|
from datetime import datetime
|
|
from .base_task import BaseTask
|
|
from loaders.ods.generic import GenericODSLoader
|
|
|
|
class ManualIngestTask(BaseTask):
|
|
"""
|
|
Task to ingest manually fetched JSON files from a directory into ODS tables.
|
|
"""
|
|
|
|
FILE_MAPPING = {
|
|
"小票详情": "billiards_ods.ods_ticket_detail",
|
|
"结账记录": "billiards_ods.ods_order_settle",
|
|
"支付记录": "billiards_ods.ods_payment",
|
|
"助教流水": "billiards_ods.ods_assistant_ledger",
|
|
"助教废除": "billiards_ods.ods_assistant_abolish",
|
|
"商品档案": "billiards_ods.ods_goods_ledger", # Note: This might be dim_product source, but mapping to ledger for now if it's sales
|
|
"库存变化": "billiards_ods.ods_inventory_change",
|
|
"会员档案": "billiards_ods.ods_member",
|
|
"充值记录": "billiards_ods.ods_member_card", # Approx
|
|
"团购套餐": "billiards_ods.ods_package_coupon",
|
|
"库存汇总": "billiards_ods.ods_inventory_stock"
|
|
}
|
|
|
|
def get_task_code(self) -> str:
|
|
return "MANUAL_INGEST"
|
|
|
|
def execute(self) -> dict:
|
|
self.logger.info("Starting Manual Ingest Task")
|
|
|
|
# Configurable directory, default to tests/testdata_json for now
|
|
data_dir = self.config.get("manual.data_dir", r"c:\dev\LLTQ\ETL\feiqiu-ETL\etl_billiards\tests\testdata_json")
|
|
|
|
if not os.path.exists(data_dir):
|
|
self.logger.error(f"Data directory not found: {data_dir}")
|
|
return {"status": "error", "message": "Directory not found"}
|
|
|
|
total_files = 0
|
|
total_rows = 0
|
|
|
|
for filename in os.listdir(data_dir):
|
|
if not filename.endswith(".json"):
|
|
continue
|
|
|
|
# Determine target table
|
|
target_table = None
|
|
for key, table in self.FILE_MAPPING.items():
|
|
if key in filename:
|
|
target_table = table
|
|
break
|
|
|
|
if not target_table:
|
|
self.logger.warning(f"No mapping found for file: {filename}, skipping.")
|
|
continue
|
|
|
|
self.logger.info(f"Ingesting {filename} into {target_table}")
|
|
|
|
try:
|
|
with open(os.path.join(data_dir, filename), 'r', encoding='utf-8') as f:
|
|
data = json.load(f)
|
|
|
|
if not isinstance(data, list):
|
|
data = [data]
|
|
|
|
# Prepare rows for GenericODSLoader
|
|
# We need to adapt the data to what GenericODSLoader expects (or update it)
|
|
# GenericODSLoader expects dicts. It handles normalization.
|
|
# But we need to ensure the primary keys are present in the payload or extracted.
|
|
# The GenericODSLoader might need configuration for PK extraction if it's not standard.
|
|
# For now, let's assume the payload IS the row, and we wrap it.
|
|
|
|
# Actually, GenericODSLoader.upsert_rows expects the raw API result list.
|
|
# It calls _normalize_row.
|
|
# We need to make sure _normalize_row works for these files.
|
|
# Most files have 'id' or similar.
|
|
|
|
# Let's instantiate a loader for this table
|
|
# We need to know the PK for the table.
|
|
# This is usually defined in ODS_TASK_CLASSES but here we are dynamic.
|
|
# We might need a simpler loader or reuse GenericODSLoader with specific PK config.
|
|
|
|
# For simplicity, let's use a custom ingestion here that mimics GenericODSLoader but is file-aware.
|
|
rows_to_insert = []
|
|
for item in data:
|
|
# Extract Store ID (usually in siteProfile or data root)
|
|
store_id = self._extract_store_id(item) or self.config.get("app.store_id")
|
|
|
|
# Extract PK (id, orderSettleId, etc.)
|
|
pk_val = self._extract_pk(item, target_table)
|
|
|
|
if not pk_val:
|
|
# Try to find 'id' in the item
|
|
pk_val = item.get("id")
|
|
|
|
if not pk_val:
|
|
# Special case for Ticket Detail
|
|
if "ods_ticket_detail" in target_table:
|
|
pk_val = item.get("orderSettleId")
|
|
|
|
if not pk_val:
|
|
continue
|
|
|
|
row = {
|
|
"store_id": store_id,
|
|
"payload": json.dumps(item, ensure_ascii=False),
|
|
"source_file": filename,
|
|
"fetched_at": datetime.now()
|
|
}
|
|
|
|
# Add specific PK column
|
|
pk_col = self._get_pk_column(target_table)
|
|
row[pk_col] = pk_val
|
|
|
|
rows_to_insert.append(row)
|
|
|
|
if rows_to_insert:
|
|
self._bulk_insert(target_table, rows_to_insert)
|
|
total_rows += len(rows_to_insert)
|
|
total_files += 1
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error processing {filename}: {e}", exc_info=True)
|
|
|
|
return {"status": "success", "files_processed": total_files, "rows_inserted": total_rows}
|
|
|
|
def _extract_store_id(self, item):
|
|
# Try common paths
|
|
if "store_id" in item: return item["store_id"]
|
|
if "siteProfile" in item and "id" in item["siteProfile"]: return item["siteProfile"]["id"]
|
|
if "data" in item and "data" in item["data"] and "siteId" in item["data"]["data"]: return item["data"]["data"]["siteId"]
|
|
return None
|
|
|
|
def _extract_pk(self, item, table):
|
|
# Helper to find PK based on table
|
|
if "ods_order_settle" in table:
|
|
# Check for nested structure in some files
|
|
if "settleList" in item and "settleList" in item["settleList"]:
|
|
return item["settleList"]["settleList"].get("id")
|
|
return item.get("id")
|
|
return item.get("id")
|
|
|
|
def _get_pk_column(self, table):
|
|
if "ods_ticket_detail" in table: return "order_settle_id"
|
|
if "ods_order_settle" in table: return "order_settle_id"
|
|
if "ods_payment" in table: return "pay_id"
|
|
if "ods_member" in table: return "member_id"
|
|
if "ods_assistant_ledger" in table: return "ledger_id"
|
|
if "ods_goods_ledger" in table: return "order_goods_id"
|
|
if "ods_inventory_change" in table: return "change_id"
|
|
if "ods_assistant_abolish" in table: return "abolish_id"
|
|
if "ods_coupon_verify" in table: return "coupon_id"
|
|
if "ods_member_card" in table: return "card_id"
|
|
if "ods_package_coupon" in table: return "package_id"
|
|
return "id" # Fallback
|
|
|
|
def _bulk_insert(self, table, rows):
|
|
if not rows: return
|
|
|
|
keys = list(rows[0].keys())
|
|
cols = ", ".join(keys)
|
|
vals = ", ".join([f"%({k})s" for k in keys])
|
|
|
|
# Determine PK col for conflict
|
|
pk_col = self._get_pk_column(table)
|
|
|
|
sql = f"""
|
|
INSERT INTO {table} ({cols})
|
|
VALUES ({vals})
|
|
ON CONFLICT (store_id, {pk_col}) DO UPDATE SET
|
|
payload = EXCLUDED.payload,
|
|
fetched_at = EXCLUDED.fetched_at,
|
|
source_file = EXCLUDED.source_file;
|
|
"""
|
|
self.db.batch_execute(sql, rows)
|