阶段性更新
This commit is contained in:
176
etl_billiards/tasks/manual_ingest_task.py
Normal file
176
etl_billiards/tasks/manual_ingest_task.py
Normal file
@@ -0,0 +1,176 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import os
|
||||
import json
|
||||
from datetime import datetime
|
||||
from .base_task import BaseTask
|
||||
from loaders.ods.generic import GenericODSLoader
|
||||
|
||||
class ManualIngestTask(BaseTask):
|
||||
"""
|
||||
Task to ingest manually fetched JSON files from a directory into ODS tables.
|
||||
"""
|
||||
|
||||
FILE_MAPPING = {
|
||||
"小票详情": "billiards_ods.ods_ticket_detail",
|
||||
"结账记录": "billiards_ods.ods_order_settle",
|
||||
"支付记录": "billiards_ods.ods_payment",
|
||||
"助教流水": "billiards_ods.ods_assistant_ledger",
|
||||
"助教废除": "billiards_ods.ods_assistant_abolish",
|
||||
"商品档案": "billiards_ods.ods_goods_ledger", # Note: This might be dim_product source, but mapping to ledger for now if it's sales
|
||||
"库存变化": "billiards_ods.ods_inventory_change",
|
||||
"会员档案": "billiards_ods.ods_member",
|
||||
"充值记录": "billiards_ods.ods_member_card", # Approx
|
||||
"团购套餐": "billiards_ods.ods_package_coupon",
|
||||
"库存汇总": "billiards_ods.ods_inventory_stock"
|
||||
}
|
||||
|
||||
def get_task_code(self) -> str:
|
||||
return "MANUAL_INGEST"
|
||||
|
||||
def execute(self) -> dict:
|
||||
self.logger.info("Starting Manual Ingest Task")
|
||||
|
||||
# Configurable directory, default to tests/testdata_json for now
|
||||
data_dir = self.config.get("manual.data_dir", r"c:\dev\LLTQ\ETL\feiqiu-ETL\etl_billiards\tests\testdata_json")
|
||||
|
||||
if not os.path.exists(data_dir):
|
||||
self.logger.error(f"Data directory not found: {data_dir}")
|
||||
return {"status": "error", "message": "Directory not found"}
|
||||
|
||||
total_files = 0
|
||||
total_rows = 0
|
||||
|
||||
for filename in os.listdir(data_dir):
|
||||
if not filename.endswith(".json"):
|
||||
continue
|
||||
|
||||
# Determine target table
|
||||
target_table = None
|
||||
for key, table in self.FILE_MAPPING.items():
|
||||
if key in filename:
|
||||
target_table = table
|
||||
break
|
||||
|
||||
if not target_table:
|
||||
self.logger.warning(f"No mapping found for file: {filename}, skipping.")
|
||||
continue
|
||||
|
||||
self.logger.info(f"Ingesting {filename} into {target_table}")
|
||||
|
||||
try:
|
||||
with open(os.path.join(data_dir, filename), 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
if not isinstance(data, list):
|
||||
data = [data]
|
||||
|
||||
# Prepare rows for GenericODSLoader
|
||||
# We need to adapt the data to what GenericODSLoader expects (or update it)
|
||||
# GenericODSLoader expects dicts. It handles normalization.
|
||||
# But we need to ensure the primary keys are present in the payload or extracted.
|
||||
# The GenericODSLoader might need configuration for PK extraction if it's not standard.
|
||||
# For now, let's assume the payload IS the row, and we wrap it.
|
||||
|
||||
# Actually, GenericODSLoader.upsert_rows expects the raw API result list.
|
||||
# It calls _normalize_row.
|
||||
# We need to make sure _normalize_row works for these files.
|
||||
# Most files have 'id' or similar.
|
||||
|
||||
# Let's instantiate a loader for this table
|
||||
# We need to know the PK for the table.
|
||||
# This is usually defined in ODS_TASK_CLASSES but here we are dynamic.
|
||||
# We might need a simpler loader or reuse GenericODSLoader with specific PK config.
|
||||
|
||||
# For simplicity, let's use a custom ingestion here that mimics GenericODSLoader but is file-aware.
|
||||
rows_to_insert = []
|
||||
for item in data:
|
||||
# Extract Store ID (usually in siteProfile or data root)
|
||||
store_id = self._extract_store_id(item) or self.config.get("app.store_id")
|
||||
|
||||
# Extract PK (id, orderSettleId, etc.)
|
||||
pk_val = self._extract_pk(item, target_table)
|
||||
|
||||
if not pk_val:
|
||||
# Try to find 'id' in the item
|
||||
pk_val = item.get("id")
|
||||
|
||||
if not pk_val:
|
||||
# Special case for Ticket Detail
|
||||
if "ods_ticket_detail" in target_table:
|
||||
pk_val = item.get("orderSettleId")
|
||||
|
||||
if not pk_val:
|
||||
continue
|
||||
|
||||
row = {
|
||||
"store_id": store_id,
|
||||
"payload": json.dumps(item, ensure_ascii=False),
|
||||
"source_file": filename,
|
||||
"fetched_at": datetime.now()
|
||||
}
|
||||
|
||||
# Add specific PK column
|
||||
pk_col = self._get_pk_column(target_table)
|
||||
row[pk_col] = pk_val
|
||||
|
||||
rows_to_insert.append(row)
|
||||
|
||||
if rows_to_insert:
|
||||
self._bulk_insert(target_table, rows_to_insert)
|
||||
total_rows += len(rows_to_insert)
|
||||
total_files += 1
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error processing {filename}: {e}", exc_info=True)
|
||||
|
||||
return {"status": "success", "files_processed": total_files, "rows_inserted": total_rows}
|
||||
|
||||
def _extract_store_id(self, item):
|
||||
# Try common paths
|
||||
if "store_id" in item: return item["store_id"]
|
||||
if "siteProfile" in item and "id" in item["siteProfile"]: return item["siteProfile"]["id"]
|
||||
if "data" in item and "data" in item["data"] and "siteId" in item["data"]["data"]: return item["data"]["data"]["siteId"]
|
||||
return None
|
||||
|
||||
def _extract_pk(self, item, table):
|
||||
# Helper to find PK based on table
|
||||
if "ods_order_settle" in table:
|
||||
# Check for nested structure in some files
|
||||
if "settleList" in item and "settleList" in item["settleList"]:
|
||||
return item["settleList"]["settleList"].get("id")
|
||||
return item.get("id")
|
||||
return item.get("id")
|
||||
|
||||
def _get_pk_column(self, table):
|
||||
if "ods_ticket_detail" in table: return "order_settle_id"
|
||||
if "ods_order_settle" in table: return "order_settle_id"
|
||||
if "ods_payment" in table: return "pay_id"
|
||||
if "ods_member" in table: return "member_id"
|
||||
if "ods_assistant_ledger" in table: return "ledger_id"
|
||||
if "ods_goods_ledger" in table: return "order_goods_id"
|
||||
if "ods_inventory_change" in table: return "change_id"
|
||||
if "ods_assistant_abolish" in table: return "abolish_id"
|
||||
if "ods_coupon_verify" in table: return "coupon_id"
|
||||
if "ods_member_card" in table: return "card_id"
|
||||
if "ods_package_coupon" in table: return "package_id"
|
||||
return "id" # Fallback
|
||||
|
||||
def _bulk_insert(self, table, rows):
|
||||
if not rows: return
|
||||
|
||||
keys = list(rows[0].keys())
|
||||
cols = ", ".join(keys)
|
||||
vals = ", ".join([f"%({k})s" for k in keys])
|
||||
|
||||
# Determine PK col for conflict
|
||||
pk_col = self._get_pk_column(table)
|
||||
|
||||
sql = f"""
|
||||
INSERT INTO {table} ({cols})
|
||||
VALUES ({vals})
|
||||
ON CONFLICT (store_id, {pk_col}) DO UPDATE SET
|
||||
payload = EXCLUDED.payload,
|
||||
fetched_at = EXCLUDED.fetched_at,
|
||||
source_file = EXCLUDED.source_file;
|
||||
"""
|
||||
self.db.batch_execute(sql, rows)
|
||||
Reference in New Issue
Block a user