阶段性更新
This commit is contained in:
79
etl_billiards/tasks/base_dwd_task.py
Normal file
79
etl_billiards/tasks/base_dwd_task.py
Normal file
@@ -0,0 +1,79 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""DWD任务基类"""
|
||||
import json
|
||||
from typing import Any, Dict, Iterator, List, Optional, Tuple
|
||||
from datetime import datetime
|
||||
|
||||
from .base_task import BaseTask
|
||||
from models.parsers import TypeParser
|
||||
|
||||
class BaseDwdTask(BaseTask):
|
||||
"""
|
||||
DWD 层任务基类
|
||||
负责从 ODS 表读取数据,供子类清洗和写入事实/维度表
|
||||
"""
|
||||
|
||||
def _get_ods_cursor(self, task_code: str) -> datetime:
|
||||
"""
|
||||
获取上次处理的 ODS 数据的时间点 (fetched_at)
|
||||
这里简化处理,实际应该从 etl_cursor 表读取
|
||||
目前先依赖 BaseTask 的时间窗口逻辑,或者子类自己管理
|
||||
"""
|
||||
# TODO: 对接真正的 CursorManager
|
||||
# 暂时返回一个较早的时间,或者由子类通过 _get_time_window 获取
|
||||
return None
|
||||
|
||||
def iter_ods_rows(
|
||||
self,
|
||||
table_name: str,
|
||||
columns: List[str],
|
||||
start_time: datetime,
|
||||
end_time: datetime,
|
||||
time_col: str = "fetched_at",
|
||||
batch_size: int = 1000
|
||||
) -> Iterator[List[Dict[str, Any]]]:
|
||||
"""
|
||||
分批迭代读取 ODS 表数据
|
||||
|
||||
Args:
|
||||
table_name: ODS 表名
|
||||
columns: 需要查询的字段列表 (必须包含 payload)
|
||||
start_time: 开始时间 (包含)
|
||||
end_time: 结束时间 (包含)
|
||||
time_col: 时间过滤字段,默认 fetched_at
|
||||
batch_size: 批次大小
|
||||
"""
|
||||
offset = 0
|
||||
cols_str = ", ".join(columns)
|
||||
|
||||
while True:
|
||||
sql = f"""
|
||||
SELECT {cols_str}
|
||||
FROM {table_name}
|
||||
WHERE {time_col} >= %s AND {time_col} <= %s
|
||||
ORDER BY {time_col} ASC
|
||||
LIMIT %s OFFSET %s
|
||||
"""
|
||||
|
||||
rows = self.db.fetch_all(sql, (start_time, end_time, batch_size, offset))
|
||||
|
||||
if not rows:
|
||||
break
|
||||
|
||||
yield [dict(row) for row in rows]
|
||||
|
||||
if len(rows) < batch_size:
|
||||
break
|
||||
|
||||
offset += batch_size
|
||||
|
||||
def parse_payload(self, row: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
解析 ODS 行中的 payload JSON
|
||||
"""
|
||||
payload = row.get("payload")
|
||||
if isinstance(payload, str):
|
||||
return json.loads(payload)
|
||||
elif isinstance(payload, dict):
|
||||
return payload
|
||||
return {}
|
||||
176
etl_billiards/tasks/manual_ingest_task.py
Normal file
176
etl_billiards/tasks/manual_ingest_task.py
Normal file
@@ -0,0 +1,176 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import os
|
||||
import json
|
||||
from datetime import datetime
|
||||
from .base_task import BaseTask
|
||||
from loaders.ods.generic import GenericODSLoader
|
||||
|
||||
class ManualIngestTask(BaseTask):
|
||||
"""
|
||||
Task to ingest manually fetched JSON files from a directory into ODS tables.
|
||||
"""
|
||||
|
||||
FILE_MAPPING = {
|
||||
"小票详情": "billiards_ods.ods_ticket_detail",
|
||||
"结账记录": "billiards_ods.ods_order_settle",
|
||||
"支付记录": "billiards_ods.ods_payment",
|
||||
"助教流水": "billiards_ods.ods_assistant_ledger",
|
||||
"助教废除": "billiards_ods.ods_assistant_abolish",
|
||||
"商品档案": "billiards_ods.ods_goods_ledger", # Note: This might be dim_product source, but mapping to ledger for now if it's sales
|
||||
"库存变化": "billiards_ods.ods_inventory_change",
|
||||
"会员档案": "billiards_ods.ods_member",
|
||||
"充值记录": "billiards_ods.ods_member_card", # Approx
|
||||
"团购套餐": "billiards_ods.ods_package_coupon",
|
||||
"库存汇总": "billiards_ods.ods_inventory_stock"
|
||||
}
|
||||
|
||||
def get_task_code(self) -> str:
|
||||
return "MANUAL_INGEST"
|
||||
|
||||
def execute(self) -> dict:
|
||||
self.logger.info("Starting Manual Ingest Task")
|
||||
|
||||
# Configurable directory, default to tests/testdata_json for now
|
||||
data_dir = self.config.get("manual.data_dir", r"c:\dev\LLTQ\ETL\feiqiu-ETL\etl_billiards\tests\testdata_json")
|
||||
|
||||
if not os.path.exists(data_dir):
|
||||
self.logger.error(f"Data directory not found: {data_dir}")
|
||||
return {"status": "error", "message": "Directory not found"}
|
||||
|
||||
total_files = 0
|
||||
total_rows = 0
|
||||
|
||||
for filename in os.listdir(data_dir):
|
||||
if not filename.endswith(".json"):
|
||||
continue
|
||||
|
||||
# Determine target table
|
||||
target_table = None
|
||||
for key, table in self.FILE_MAPPING.items():
|
||||
if key in filename:
|
||||
target_table = table
|
||||
break
|
||||
|
||||
if not target_table:
|
||||
self.logger.warning(f"No mapping found for file: {filename}, skipping.")
|
||||
continue
|
||||
|
||||
self.logger.info(f"Ingesting {filename} into {target_table}")
|
||||
|
||||
try:
|
||||
with open(os.path.join(data_dir, filename), 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
if not isinstance(data, list):
|
||||
data = [data]
|
||||
|
||||
# Prepare rows for GenericODSLoader
|
||||
# We need to adapt the data to what GenericODSLoader expects (or update it)
|
||||
# GenericODSLoader expects dicts. It handles normalization.
|
||||
# But we need to ensure the primary keys are present in the payload or extracted.
|
||||
# The GenericODSLoader might need configuration for PK extraction if it's not standard.
|
||||
# For now, let's assume the payload IS the row, and we wrap it.
|
||||
|
||||
# Actually, GenericODSLoader.upsert_rows expects the raw API result list.
|
||||
# It calls _normalize_row.
|
||||
# We need to make sure _normalize_row works for these files.
|
||||
# Most files have 'id' or similar.
|
||||
|
||||
# Let's instantiate a loader for this table
|
||||
# We need to know the PK for the table.
|
||||
# This is usually defined in ODS_TASK_CLASSES but here we are dynamic.
|
||||
# We might need a simpler loader or reuse GenericODSLoader with specific PK config.
|
||||
|
||||
# For simplicity, let's use a custom ingestion here that mimics GenericODSLoader but is file-aware.
|
||||
rows_to_insert = []
|
||||
for item in data:
|
||||
# Extract Store ID (usually in siteProfile or data root)
|
||||
store_id = self._extract_store_id(item) or self.config.get("app.store_id")
|
||||
|
||||
# Extract PK (id, orderSettleId, etc.)
|
||||
pk_val = self._extract_pk(item, target_table)
|
||||
|
||||
if not pk_val:
|
||||
# Try to find 'id' in the item
|
||||
pk_val = item.get("id")
|
||||
|
||||
if not pk_val:
|
||||
# Special case for Ticket Detail
|
||||
if "ods_ticket_detail" in target_table:
|
||||
pk_val = item.get("orderSettleId")
|
||||
|
||||
if not pk_val:
|
||||
continue
|
||||
|
||||
row = {
|
||||
"store_id": store_id,
|
||||
"payload": json.dumps(item, ensure_ascii=False),
|
||||
"source_file": filename,
|
||||
"fetched_at": datetime.now()
|
||||
}
|
||||
|
||||
# Add specific PK column
|
||||
pk_col = self._get_pk_column(target_table)
|
||||
row[pk_col] = pk_val
|
||||
|
||||
rows_to_insert.append(row)
|
||||
|
||||
if rows_to_insert:
|
||||
self._bulk_insert(target_table, rows_to_insert)
|
||||
total_rows += len(rows_to_insert)
|
||||
total_files += 1
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error processing {filename}: {e}", exc_info=True)
|
||||
|
||||
return {"status": "success", "files_processed": total_files, "rows_inserted": total_rows}
|
||||
|
||||
def _extract_store_id(self, item):
|
||||
# Try common paths
|
||||
if "store_id" in item: return item["store_id"]
|
||||
if "siteProfile" in item and "id" in item["siteProfile"]: return item["siteProfile"]["id"]
|
||||
if "data" in item and "data" in item["data"] and "siteId" in item["data"]["data"]: return item["data"]["data"]["siteId"]
|
||||
return None
|
||||
|
||||
def _extract_pk(self, item, table):
|
||||
# Helper to find PK based on table
|
||||
if "ods_order_settle" in table:
|
||||
# Check for nested structure in some files
|
||||
if "settleList" in item and "settleList" in item["settleList"]:
|
||||
return item["settleList"]["settleList"].get("id")
|
||||
return item.get("id")
|
||||
return item.get("id")
|
||||
|
||||
def _get_pk_column(self, table):
|
||||
if "ods_ticket_detail" in table: return "order_settle_id"
|
||||
if "ods_order_settle" in table: return "order_settle_id"
|
||||
if "ods_payment" in table: return "pay_id"
|
||||
if "ods_member" in table: return "member_id"
|
||||
if "ods_assistant_ledger" in table: return "ledger_id"
|
||||
if "ods_goods_ledger" in table: return "order_goods_id"
|
||||
if "ods_inventory_change" in table: return "change_id"
|
||||
if "ods_assistant_abolish" in table: return "abolish_id"
|
||||
if "ods_coupon_verify" in table: return "coupon_id"
|
||||
if "ods_member_card" in table: return "card_id"
|
||||
if "ods_package_coupon" in table: return "package_id"
|
||||
return "id" # Fallback
|
||||
|
||||
def _bulk_insert(self, table, rows):
|
||||
if not rows: return
|
||||
|
||||
keys = list(rows[0].keys())
|
||||
cols = ", ".join(keys)
|
||||
vals = ", ".join([f"%({k})s" for k in keys])
|
||||
|
||||
# Determine PK col for conflict
|
||||
pk_col = self._get_pk_column(table)
|
||||
|
||||
sql = f"""
|
||||
INSERT INTO {table} ({cols})
|
||||
VALUES ({vals})
|
||||
ON CONFLICT (store_id, {pk_col}) DO UPDATE SET
|
||||
payload = EXCLUDED.payload,
|
||||
fetched_at = EXCLUDED.fetched_at,
|
||||
source_file = EXCLUDED.source_file;
|
||||
"""
|
||||
self.db.batch_execute(sql, rows)
|
||||
89
etl_billiards/tasks/members_dwd_task.py
Normal file
89
etl_billiards/tasks/members_dwd_task.py
Normal file
@@ -0,0 +1,89 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from .base_dwd_task import BaseDwdTask
|
||||
from loaders.dimensions.member import MemberLoader
|
||||
from models.parsers import TypeParser
|
||||
import json
|
||||
|
||||
class MembersDwdTask(BaseDwdTask):
|
||||
"""
|
||||
DWD Task: Process Member Records from ODS to Dimension Table
|
||||
Source: billiards_ods.ods_member
|
||||
Target: billiards.dim_member
|
||||
"""
|
||||
|
||||
def get_task_code(self) -> str:
|
||||
return "MEMBERS_DWD"
|
||||
|
||||
def execute(self) -> dict:
|
||||
self.logger.info(f"Starting {self.get_task_code()} task")
|
||||
|
||||
window_start, window_end, _ = self._get_time_window()
|
||||
self.logger.info(f"Processing window: {window_start} to {window_end}")
|
||||
|
||||
loader = MemberLoader(self.db)
|
||||
store_id = self.config.get("app.store_id")
|
||||
|
||||
total_inserted = 0
|
||||
total_updated = 0
|
||||
total_errors = 0
|
||||
|
||||
# Iterate ODS Data
|
||||
batches = self.iter_ods_rows(
|
||||
table_name="billiards_ods.ods_member",
|
||||
columns=["store_id", "member_id", "payload", "fetched_at"],
|
||||
start_time=window_start,
|
||||
end_time=window_end
|
||||
)
|
||||
|
||||
for batch in batches:
|
||||
if not batch:
|
||||
continue
|
||||
|
||||
parsed_rows = []
|
||||
for row in batch:
|
||||
payload = self.parse_payload(row)
|
||||
if not payload:
|
||||
continue
|
||||
|
||||
parsed = self._parse_member(payload, store_id)
|
||||
if parsed:
|
||||
parsed_rows.append(parsed)
|
||||
|
||||
if parsed_rows:
|
||||
inserted, updated, skipped = loader.upsert_members(parsed_rows, store_id)
|
||||
total_inserted += inserted
|
||||
total_updated += updated
|
||||
|
||||
self.db.commit()
|
||||
|
||||
self.logger.info(f"Task {self.get_task_code()} completed. Inserted: {total_inserted}, Updated: {total_updated}")
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"inserted": total_inserted,
|
||||
"updated": total_updated,
|
||||
"window_start": window_start.isoformat(),
|
||||
"window_end": window_end.isoformat()
|
||||
}
|
||||
|
||||
def _parse_member(self, raw: dict, store_id: int) -> dict:
|
||||
"""Parse ODS payload into Dim structure"""
|
||||
try:
|
||||
# Handle both API structure (camelCase) and manual structure
|
||||
member_id = raw.get("id") or raw.get("memberId")
|
||||
if not member_id:
|
||||
return None
|
||||
|
||||
return {
|
||||
"store_id": store_id,
|
||||
"member_id": member_id,
|
||||
"member_name": raw.get("name") or raw.get("memberName"),
|
||||
"phone": raw.get("phone") or raw.get("mobile"),
|
||||
"balance": raw.get("balance", 0),
|
||||
"status": str(raw.get("status", "NORMAL")),
|
||||
"register_time": raw.get("createTime") or raw.get("registerTime"),
|
||||
"raw_data": json.dumps(raw, ensure_ascii=False)
|
||||
}
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error parsing member: {e}")
|
||||
return None
|
||||
400
etl_billiards/tasks/ods_tasks.py
Normal file
400
etl_billiards/tasks/ods_tasks.py
Normal file
@@ -0,0 +1,400 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""ODS ingestion tasks."""
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from typing import Any, Callable, Dict, Iterable, List, Sequence, Tuple, Type
|
||||
|
||||
from loaders.ods import GenericODSLoader
|
||||
from models.parsers import TypeParser
|
||||
from .base_task import BaseTask
|
||||
|
||||
|
||||
ColumnTransform = Callable[[Any], Any]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ColumnSpec:
|
||||
"""Mapping between DB column and source JSON field."""
|
||||
|
||||
column: str
|
||||
sources: Tuple[str, ...] = ()
|
||||
required: bool = False
|
||||
default: Any = None
|
||||
transform: ColumnTransform | None = None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class OdsTaskSpec:
|
||||
"""Definition of a single ODS ingestion task."""
|
||||
|
||||
code: str
|
||||
class_name: str
|
||||
table_name: str
|
||||
endpoint: str
|
||||
data_path: Tuple[str, ...] = ("data",)
|
||||
list_key: str | None = None
|
||||
pk_columns: Tuple[ColumnSpec, ...] = ()
|
||||
extra_columns: Tuple[ColumnSpec, ...] = ()
|
||||
include_page_size: bool = False
|
||||
include_page_no: bool = True
|
||||
include_source_file: bool = True
|
||||
include_source_endpoint: bool = True
|
||||
requires_window: bool = True
|
||||
description: str = ""
|
||||
extra_params: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
class BaseOdsTask(BaseTask):
|
||||
"""Shared functionality for ODS ingestion tasks."""
|
||||
|
||||
SPEC: OdsTaskSpec
|
||||
|
||||
def get_task_code(self) -> str:
|
||||
return self.SPEC.code
|
||||
|
||||
def execute(self) -> dict:
|
||||
spec = self.SPEC
|
||||
self.logger.info("开始执行 %s (ODS)", spec.code)
|
||||
|
||||
store_id = TypeParser.parse_int(self.config.get("app.store_id"))
|
||||
if not store_id:
|
||||
raise ValueError("app.store_id 未配置,无法执行 ODS 任务")
|
||||
|
||||
page_size = self.config.get("api.page_size", 200)
|
||||
params = self._build_params(spec, store_id)
|
||||
columns = self._resolve_columns(spec)
|
||||
conflict_columns = ["store_id"] + [col.column for col in spec.pk_columns]
|
||||
loader = GenericODSLoader(
|
||||
self.db,
|
||||
spec.table_name,
|
||||
columns,
|
||||
conflict_columns,
|
||||
)
|
||||
|
||||
counts = {"fetched": 0, "inserted": 0, "updated": 0, "skipped": 0, "errors": 0}
|
||||
source_file = self._resolve_source_file_hint(spec)
|
||||
|
||||
try:
|
||||
for page_no, page_records, _, _ in self.api.iter_paginated(
|
||||
endpoint=spec.endpoint,
|
||||
params=params,
|
||||
page_size=page_size,
|
||||
data_path=spec.data_path,
|
||||
list_key=spec.list_key,
|
||||
):
|
||||
rows: List[dict] = []
|
||||
for raw in page_records:
|
||||
row = self._build_row(
|
||||
spec=spec,
|
||||
store_id=store_id,
|
||||
record=raw,
|
||||
page_no=page_no if spec.include_page_no else None,
|
||||
page_size_value=len(page_records)
|
||||
if spec.include_page_size
|
||||
else None,
|
||||
source_file=source_file,
|
||||
)
|
||||
if row is None:
|
||||
counts["skipped"] += 1
|
||||
continue
|
||||
rows.append(row)
|
||||
|
||||
inserted, updated, _ = loader.upsert_rows(rows)
|
||||
counts["inserted"] += inserted
|
||||
counts["updated"] += updated
|
||||
counts["fetched"] += len(page_records)
|
||||
|
||||
self.db.commit()
|
||||
self.logger.info("%s ODS 任务完成: %s", spec.code, counts)
|
||||
return self._build_result("SUCCESS", counts)
|
||||
|
||||
except Exception:
|
||||
self.db.rollback()
|
||||
counts["errors"] += 1
|
||||
self.logger.error("%s ODS 任务失败", spec.code, exc_info=True)
|
||||
raise
|
||||
|
||||
def _build_params(self, spec: OdsTaskSpec, store_id: int) -> dict:
|
||||
params: dict[str, Any] = {"storeId": store_id}
|
||||
params.update(spec.extra_params)
|
||||
if spec.requires_window:
|
||||
window_start, window_end, _ = self._get_time_window()
|
||||
params["startTime"] = TypeParser.format_timestamp(window_start, self.tz)
|
||||
params["endTime"] = TypeParser.format_timestamp(window_end, self.tz)
|
||||
return params
|
||||
|
||||
def _resolve_columns(self, spec: OdsTaskSpec) -> List[str]:
|
||||
columns: List[str] = ["store_id"]
|
||||
seen = set(columns)
|
||||
for col_spec in list(spec.pk_columns) + list(spec.extra_columns):
|
||||
if col_spec.column not in seen:
|
||||
columns.append(col_spec.column)
|
||||
seen.add(col_spec.column)
|
||||
|
||||
if spec.include_page_no and "page_no" not in seen:
|
||||
columns.append("page_no")
|
||||
seen.add("page_no")
|
||||
|
||||
if spec.include_page_size and "page_size" not in seen:
|
||||
columns.append("page_size")
|
||||
seen.add("page_size")
|
||||
|
||||
if spec.include_source_file and "source_file" not in seen:
|
||||
columns.append("source_file")
|
||||
seen.add("source_file")
|
||||
|
||||
if spec.include_source_endpoint and "source_endpoint" not in seen:
|
||||
columns.append("source_endpoint")
|
||||
seen.add("source_endpoint")
|
||||
|
||||
if "fetched_at" not in seen:
|
||||
columns.append("fetched_at")
|
||||
seen.add("fetched_at")
|
||||
if "payload" not in seen:
|
||||
columns.append("payload")
|
||||
|
||||
return columns
|
||||
|
||||
def _build_row(
|
||||
self,
|
||||
spec: OdsTaskSpec,
|
||||
store_id: int,
|
||||
record: dict,
|
||||
page_no: int | None,
|
||||
page_size_value: int | None,
|
||||
source_file: str | None,
|
||||
) -> dict | None:
|
||||
row: dict[str, Any] = {"store_id": store_id}
|
||||
|
||||
for col_spec in spec.pk_columns + spec.extra_columns:
|
||||
value = self._extract_value(record, col_spec)
|
||||
if value is None and col_spec.required:
|
||||
self.logger.warning(
|
||||
"%s 缺少必填字段 %s,原始记录: %s",
|
||||
spec.code,
|
||||
col_spec.column,
|
||||
record,
|
||||
)
|
||||
return None
|
||||
row[col_spec.column] = value
|
||||
|
||||
if spec.include_page_no:
|
||||
row["page_no"] = page_no
|
||||
if spec.include_page_size:
|
||||
row["page_size"] = page_size_value
|
||||
if spec.include_source_file:
|
||||
row["source_file"] = source_file
|
||||
if spec.include_source_endpoint:
|
||||
row["source_endpoint"] = spec.endpoint
|
||||
|
||||
row["fetched_at"] = datetime.now(self.tz)
|
||||
row["payload"] = record
|
||||
return row
|
||||
|
||||
def _extract_value(self, record: dict, spec: ColumnSpec):
|
||||
value = None
|
||||
for key in spec.sources:
|
||||
value = self._dig(record, key)
|
||||
if value is not None:
|
||||
break
|
||||
if value is None and spec.default is not None:
|
||||
value = spec.default
|
||||
if value is not None and spec.transform:
|
||||
value = spec.transform(value)
|
||||
return value
|
||||
|
||||
@staticmethod
|
||||
def _dig(record: Any, path: str | None):
|
||||
if not path:
|
||||
return None
|
||||
current = record
|
||||
for part in path.split("."):
|
||||
if isinstance(current, dict):
|
||||
current = current.get(part)
|
||||
else:
|
||||
return None
|
||||
return current
|
||||
|
||||
def _resolve_source_file_hint(self, spec: OdsTaskSpec) -> str | None:
|
||||
resolver = getattr(self.api, "get_source_hint", None)
|
||||
if callable(resolver):
|
||||
return resolver(spec.endpoint)
|
||||
return None
|
||||
|
||||
|
||||
def _int_col(name: str, *sources: str, required: bool = False) -> ColumnSpec:
|
||||
return ColumnSpec(
|
||||
column=name,
|
||||
sources=sources,
|
||||
required=required,
|
||||
transform=TypeParser.parse_int,
|
||||
)
|
||||
|
||||
|
||||
ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
|
||||
OdsTaskSpec(
|
||||
code="ODS_ORDER_SETTLE",
|
||||
class_name="OdsOrderSettleTask",
|
||||
table_name="billiards_ods.ods_order_settle",
|
||||
endpoint="/order/list",
|
||||
data_path=("data",),
|
||||
pk_columns=(_int_col("order_settle_id", "orderSettleId", "order_settle_id", "id", required=True),),
|
||||
extra_columns=(_int_col("order_trade_no", "orderTradeNo", "order_trade_no"),),
|
||||
include_page_size=True,
|
||||
description="订单/结算 ODS 原始记录",
|
||||
),
|
||||
OdsTaskSpec(
|
||||
code="ODS_TABLE_USE",
|
||||
class_name="OdsTableUseTask",
|
||||
table_name="billiards_ods.ods_table_use_detail",
|
||||
endpoint="/Table/UseDetailList",
|
||||
data_path=("data", "siteTableUseDetailsList"),
|
||||
pk_columns=(_int_col("ledger_id", "id", required=True),),
|
||||
extra_columns=(
|
||||
_int_col("order_trade_no", "order_trade_no", "orderTradeNo"),
|
||||
_int_col("order_settle_id", "order_settle_id", "orderSettleId"),
|
||||
),
|
||||
description="台费/开台流水 ODS",
|
||||
),
|
||||
OdsTaskSpec(
|
||||
code="ODS_ASSISTANT_LEDGER",
|
||||
class_name="OdsAssistantLedgerTask",
|
||||
table_name="billiards_ods.ods_assistant_ledger",
|
||||
endpoint="/Assistant/LedgerList",
|
||||
data_path=("data", "orderAssistantDetails"),
|
||||
pk_columns=(_int_col("ledger_id", "id", required=True),),
|
||||
extra_columns=(
|
||||
_int_col("order_trade_no", "order_trade_no", "orderTradeNo"),
|
||||
_int_col("order_settle_id", "order_settle_id", "orderSettleId"),
|
||||
),
|
||||
description="助教流水 ODS",
|
||||
),
|
||||
OdsTaskSpec(
|
||||
code="ODS_ASSISTANT_ABOLISH",
|
||||
class_name="OdsAssistantAbolishTask",
|
||||
table_name="billiards_ods.ods_assistant_abolish",
|
||||
endpoint="/Assistant/AbolishList",
|
||||
data_path=("data", "abolitionAssistants"),
|
||||
pk_columns=(_int_col("abolish_id", "id", required=True),),
|
||||
description="助教作废记录 ODS",
|
||||
),
|
||||
OdsTaskSpec(
|
||||
code="ODS_GOODS_LEDGER",
|
||||
class_name="OdsGoodsLedgerTask",
|
||||
table_name="billiards_ods.ods_goods_ledger",
|
||||
endpoint="/Order/GoodsLedgerList",
|
||||
data_path=("data", "orderGoodsLedgers"),
|
||||
pk_columns=(_int_col("order_goods_id", "orderGoodsId", "id", required=True),),
|
||||
extra_columns=(
|
||||
_int_col("order_trade_no", "order_trade_no", "orderTradeNo"),
|
||||
_int_col("order_settle_id", "order_settle_id", "orderSettleId"),
|
||||
),
|
||||
description="商品销售流水 ODS",
|
||||
),
|
||||
OdsTaskSpec(
|
||||
code="ODS_PAYMENT",
|
||||
class_name="OdsPaymentTask",
|
||||
table_name="billiards_ods.ods_payment",
|
||||
endpoint="/pay/records",
|
||||
data_path=("data",),
|
||||
pk_columns=(_int_col("pay_id", "payId", "id", required=True),),
|
||||
extra_columns=(
|
||||
ColumnSpec(column="relate_type", sources=("relate_type", "relateType")),
|
||||
_int_col("relate_id", "relate_id", "relateId"),
|
||||
),
|
||||
include_page_size=False,
|
||||
description="支付流水 ODS",
|
||||
),
|
||||
OdsTaskSpec(
|
||||
code="ODS_REFUND",
|
||||
class_name="OdsRefundTask",
|
||||
table_name="billiards_ods.ods_refund",
|
||||
endpoint="/Pay/RefundList",
|
||||
data_path=(),
|
||||
pk_columns=(_int_col("refund_id", "id", required=True),),
|
||||
extra_columns=(
|
||||
ColumnSpec(column="relate_type", sources=("relate_type", "relateType")),
|
||||
_int_col("relate_id", "relate_id", "relateId"),
|
||||
),
|
||||
description="退款流水 ODS",
|
||||
),
|
||||
OdsTaskSpec(
|
||||
code="ODS_COUPON_VERIFY",
|
||||
class_name="OdsCouponVerifyTask",
|
||||
table_name="billiards_ods.ods_coupon_verify",
|
||||
endpoint="/Coupon/UsageList",
|
||||
data_path=(),
|
||||
pk_columns=(_int_col("coupon_id", "id", "couponId", required=True),),
|
||||
description="平台验券/团购流水 ODS",
|
||||
),
|
||||
OdsTaskSpec(
|
||||
code="ODS_MEMBER",
|
||||
class_name="OdsMemberTask",
|
||||
table_name="billiards_ods.ods_member",
|
||||
endpoint="/MemberProfile/GetTenantMemberList",
|
||||
data_path=("data",),
|
||||
pk_columns=(_int_col("member_id", "memberId", required=True),),
|
||||
requires_window=False,
|
||||
description="会员档案 ODS",
|
||||
),
|
||||
OdsTaskSpec(
|
||||
code="ODS_MEMBER_CARD",
|
||||
class_name="OdsMemberCardTask",
|
||||
table_name="billiards_ods.ods_member_card",
|
||||
endpoint="/MemberCard/List",
|
||||
data_path=("data", "tenantMemberCards"),
|
||||
pk_columns=(_int_col("card_id", "tenantMemberCardId", "cardId", required=True),),
|
||||
requires_window=False,
|
||||
description="会员卡/储值卡 ODS",
|
||||
),
|
||||
OdsTaskSpec(
|
||||
code="ODS_PACKAGE",
|
||||
class_name="OdsPackageTask",
|
||||
table_name="billiards_ods.ods_package_coupon",
|
||||
endpoint="/Package/List",
|
||||
data_path=("data", "packageCouponList"),
|
||||
pk_columns=(_int_col("package_id", "id", "packageId", required=True),),
|
||||
requires_window=False,
|
||||
description="团购/套餐定义 ODS",
|
||||
),
|
||||
OdsTaskSpec(
|
||||
code="ODS_INVENTORY_STOCK",
|
||||
class_name="OdsInventoryStockTask",
|
||||
table_name="billiards_ods.ods_inventory_stock",
|
||||
endpoint="/Inventory/StockSummary",
|
||||
data_path=(),
|
||||
pk_columns=(
|
||||
_int_col("site_goods_id", "siteGoodsId", required=True),
|
||||
ColumnSpec(column="snapshot_key", default="default", required=True),
|
||||
),
|
||||
requires_window=False,
|
||||
description="库存汇总 ODS",
|
||||
),
|
||||
OdsTaskSpec(
|
||||
code="ODS_INVENTORY_CHANGE",
|
||||
class_name="OdsInventoryChangeTask",
|
||||
table_name="billiards_ods.ods_inventory_change",
|
||||
endpoint="/Inventory/ChangeList",
|
||||
data_path=("data", "queryDeliveryRecordsList"),
|
||||
pk_columns=(_int_col("change_id", "siteGoodsStockId", "id", required=True),),
|
||||
description="库存变动 ODS",
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def _build_task_class(spec: OdsTaskSpec) -> Type[BaseOdsTask]:
|
||||
attrs = {
|
||||
"SPEC": spec,
|
||||
"__doc__": spec.description or f"ODS ingestion task {spec.code}",
|
||||
"__module__": __name__,
|
||||
}
|
||||
return type(spec.class_name, (BaseOdsTask,), attrs)
|
||||
|
||||
|
||||
ODS_TASK_CLASSES: Dict[str, Type[BaseOdsTask]] = {
|
||||
spec.code: _build_task_class(spec) for spec in ODS_TASK_SPECS
|
||||
}
|
||||
|
||||
__all__ = ["ODS_TASK_CLASSES", "ODS_TASK_SPECS", "BaseOdsTask"]
|
||||
124
etl_billiards/tasks/payments_dwd_task.py
Normal file
124
etl_billiards/tasks/payments_dwd_task.py
Normal file
@@ -0,0 +1,124 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from .base_dwd_task import BaseDwdTask
|
||||
from loaders.facts.payment import PaymentLoader
|
||||
from models.parsers import TypeParser
|
||||
import json
|
||||
|
||||
class PaymentsDwdTask(BaseDwdTask):
|
||||
"""
|
||||
DWD Task: Process Payment Records from ODS to Fact Table
|
||||
Source: billiards_ods.ods_payment
|
||||
Target: billiards.fact_payment
|
||||
"""
|
||||
|
||||
def get_task_code(self) -> str:
|
||||
return "PAYMENTS_DWD"
|
||||
|
||||
def execute(self) -> dict:
|
||||
self.logger.info(f"Starting {self.get_task_code()} task")
|
||||
|
||||
window_start, window_end, _ = self._get_time_window()
|
||||
self.logger.info(f"Processing window: {window_start} to {window_end}")
|
||||
|
||||
loader = PaymentLoader(self.db)
|
||||
store_id = self.config.get("app.store_id")
|
||||
|
||||
total_inserted = 0
|
||||
total_errors = 0
|
||||
|
||||
# Iterate ODS Data
|
||||
batches = self.iter_ods_rows(
|
||||
table_name="billiards_ods.ods_payment",
|
||||
columns=["store_id", "pay_id", "payload", "fetched_at"],
|
||||
start_time=window_start,
|
||||
end_time=window_end
|
||||
)
|
||||
|
||||
for batch in batches:
|
||||
if not batch:
|
||||
continue
|
||||
|
||||
parsed_rows = []
|
||||
for row in batch:
|
||||
payload = self.parse_payload(row)
|
||||
if not payload:
|
||||
continue
|
||||
|
||||
parsed = self._parse_payment(payload, store_id)
|
||||
if parsed:
|
||||
parsed_rows.append(parsed)
|
||||
|
||||
if parsed_rows:
|
||||
inserted, errors = loader.upsert_payments(parsed_rows, store_id)
|
||||
total_inserted += inserted
|
||||
total_errors += errors
|
||||
|
||||
self.db.commit()
|
||||
|
||||
self.logger.info(f"Task {self.get_task_code()} completed. Inserted: {total_inserted}, Errors: {total_errors}")
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"inserted": total_inserted,
|
||||
"errors": total_errors,
|
||||
"window_start": window_start.isoformat(),
|
||||
"window_end": window_end.isoformat()
|
||||
}
|
||||
|
||||
def _parse_payment(self, raw: dict, store_id: int) -> dict:
|
||||
"""Parse ODS payload into Fact structure"""
|
||||
try:
|
||||
pay_id = TypeParser.parse_int(raw.get("payId") or raw.get("id"))
|
||||
if not pay_id:
|
||||
return None
|
||||
|
||||
relate_type = str(raw.get("relateType") or raw.get("relate_type") or "")
|
||||
relate_id = TypeParser.parse_int(raw.get("relateId") or raw.get("relate_id"))
|
||||
|
||||
# Attempt to populate settlement / trade identifiers
|
||||
order_settle_id = TypeParser.parse_int(
|
||||
raw.get("orderSettleId") or raw.get("order_settle_id")
|
||||
)
|
||||
order_trade_no = TypeParser.parse_int(
|
||||
raw.get("orderTradeNo") or raw.get("order_trade_no")
|
||||
)
|
||||
|
||||
if relate_type in {"1", "SETTLE", "ORDER"}:
|
||||
order_settle_id = order_settle_id or relate_id
|
||||
|
||||
return {
|
||||
"store_id": store_id,
|
||||
"pay_id": pay_id,
|
||||
"order_settle_id": order_settle_id,
|
||||
"order_trade_no": order_trade_no,
|
||||
"relate_type": relate_type,
|
||||
"relate_id": relate_id,
|
||||
"site_id": TypeParser.parse_int(
|
||||
raw.get("siteId") or raw.get("site_id") or store_id
|
||||
),
|
||||
"tenant_id": TypeParser.parse_int(raw.get("tenantId") or raw.get("tenant_id")),
|
||||
"create_time": TypeParser.parse_timestamp(
|
||||
raw.get("createTime") or raw.get("create_time"), self.tz
|
||||
),
|
||||
"pay_time": TypeParser.parse_timestamp(raw.get("payTime"), self.tz),
|
||||
"pay_amount": TypeParser.parse_decimal(raw.get("payAmount")),
|
||||
"fee_amount": TypeParser.parse_decimal(
|
||||
raw.get("feeAmount")
|
||||
or raw.get("serviceFee")
|
||||
or raw.get("channelFee")
|
||||
or raw.get("fee_amount")
|
||||
),
|
||||
"discount_amount": TypeParser.parse_decimal(
|
||||
raw.get("discountAmount")
|
||||
or raw.get("couponAmount")
|
||||
or raw.get("discount_amount")
|
||||
),
|
||||
"payment_method": str(raw.get("paymentMethod") or raw.get("payment_method") or ""),
|
||||
"online_pay_channel": raw.get("onlinePayChannel") or raw.get("online_pay_channel"),
|
||||
"pay_terminal": raw.get("payTerminal") or raw.get("pay_terminal"),
|
||||
"pay_status": str(raw.get("payStatus") or raw.get("pay_status") or ""),
|
||||
"raw_data": json.dumps(raw, ensure_ascii=False)
|
||||
}
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error parsing payment: {e}")
|
||||
return None
|
||||
@@ -62,16 +62,42 @@ class PaymentsTask(BaseTask):
|
||||
def _parse_payment(self, raw: dict) -> dict:
|
||||
"""解析支付记录"""
|
||||
try:
|
||||
store_id = self.config.get("app.store_id")
|
||||
return {
|
||||
"store_id": self.config.get("app.store_id"),
|
||||
"pay_id": TypeParser.parse_int(raw.get("payId")),
|
||||
"store_id": store_id,
|
||||
"pay_id": TypeParser.parse_int(raw.get("payId") or raw.get("id")),
|
||||
"order_id": TypeParser.parse_int(raw.get("orderId")),
|
||||
"order_settle_id": TypeParser.parse_int(
|
||||
raw.get("orderSettleId") or raw.get("order_settle_id")
|
||||
),
|
||||
"order_trade_no": TypeParser.parse_int(
|
||||
raw.get("orderTradeNo") or raw.get("order_trade_no")
|
||||
),
|
||||
"relate_type": raw.get("relateType") or raw.get("relate_type"),
|
||||
"relate_id": TypeParser.parse_int(raw.get("relateId") or raw.get("relate_id")),
|
||||
"site_id": TypeParser.parse_int(raw.get("siteId") or raw.get("site_id") or store_id),
|
||||
"tenant_id": TypeParser.parse_int(raw.get("tenantId") or raw.get("tenant_id")),
|
||||
"pay_time": TypeParser.parse_timestamp(raw.get("payTime"), self.tz),
|
||||
"create_time": TypeParser.parse_timestamp(
|
||||
raw.get("createTime") or raw.get("create_time"), self.tz
|
||||
),
|
||||
"pay_amount": TypeParser.parse_decimal(raw.get("payAmount")),
|
||||
"fee_amount": TypeParser.parse_decimal(
|
||||
raw.get("feeAmount")
|
||||
or raw.get("serviceFee")
|
||||
or raw.get("channelFee")
|
||||
or raw.get("fee_amount")
|
||||
),
|
||||
"discount_amount": TypeParser.parse_decimal(
|
||||
raw.get("discountAmount") or raw.get("couponAmount") or raw.get("discount_amount")
|
||||
),
|
||||
"pay_type": raw.get("payType"),
|
||||
"payment_method": raw.get("paymentMethod") or raw.get("payment_method"),
|
||||
"online_pay_channel": raw.get("onlinePayChannel") or raw.get("online_pay_channel"),
|
||||
"pay_status": raw.get("payStatus"),
|
||||
"pay_terminal": raw.get("payTerminal") or raw.get("pay_terminal"),
|
||||
"remark": raw.get("remark"),
|
||||
"raw_data": json.dumps(raw, ensure_ascii=False)
|
||||
"raw_data": json.dumps(raw, ensure_ascii=False),
|
||||
}
|
||||
except Exception as e:
|
||||
self.logger.warning(f"解析支付记录失败: {e}, 原始数据: {raw}")
|
||||
|
||||
69
etl_billiards/tasks/ticket_dwd_task.py
Normal file
69
etl_billiards/tasks/ticket_dwd_task.py
Normal file
@@ -0,0 +1,69 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from .base_dwd_task import BaseDwdTask
|
||||
from loaders.facts.ticket import TicketLoader
|
||||
|
||||
class TicketDwdTask(BaseDwdTask):
|
||||
"""
|
||||
DWD Task: Process Ticket Details from ODS to Fact Tables
|
||||
Source: billiards_ods.ods_ticket_detail
|
||||
Targets:
|
||||
- billiards.fact_order
|
||||
- billiards.fact_order_goods
|
||||
- billiards.fact_table_usage
|
||||
- billiards.fact_assistant_service
|
||||
"""
|
||||
|
||||
def get_task_code(self) -> str:
|
||||
return "TICKET_DWD"
|
||||
|
||||
def execute(self) -> dict:
|
||||
self.logger.info(f"Starting {self.get_task_code()} task")
|
||||
|
||||
# 1. Get Time Window (Incremental Load)
|
||||
window_start, window_end, _ = self._get_time_window()
|
||||
self.logger.info(f"Processing window: {window_start} to {window_end}")
|
||||
|
||||
# 2. Initialize Loader
|
||||
loader = TicketLoader(self.db)
|
||||
store_id = self.config.get("app.store_id")
|
||||
|
||||
total_inserted = 0
|
||||
total_errors = 0
|
||||
|
||||
# 3. Iterate ODS Data
|
||||
# We query ods_ticket_detail based on fetched_at
|
||||
batches = self.iter_ods_rows(
|
||||
table_name="billiards_ods.ods_ticket_detail",
|
||||
columns=["store_id", "order_settle_id", "payload", "fetched_at"],
|
||||
start_time=window_start,
|
||||
end_time=window_end
|
||||
)
|
||||
|
||||
for batch in batches:
|
||||
if not batch:
|
||||
continue
|
||||
|
||||
# Extract payloads
|
||||
tickets = []
|
||||
for row in batch:
|
||||
payload = self.parse_payload(row)
|
||||
if payload:
|
||||
tickets.append(payload)
|
||||
|
||||
# Process Batch
|
||||
inserted, errors = loader.process_tickets(tickets, store_id)
|
||||
total_inserted += inserted
|
||||
total_errors += errors
|
||||
|
||||
# 4. Commit
|
||||
self.db.commit()
|
||||
|
||||
self.logger.info(f"Task {self.get_task_code()} completed. Inserted: {total_inserted}, Errors: {total_errors}")
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"inserted": total_inserted,
|
||||
"errors": total_errors,
|
||||
"window_start": window_start.isoformat(),
|
||||
"window_end": window_end.isoformat()
|
||||
}
|
||||
Reference in New Issue
Block a user