阶段性更新

This commit is contained in:
Neo
2025-11-20 01:27:33 +08:00
parent 92f219b575
commit cbd16a39ba
25 changed files with 7825 additions and 721 deletions

View File

@@ -0,0 +1,79 @@
# -*- coding: utf-8 -*-
"""DWD任务基类"""
import json
from typing import Any, Dict, Iterator, List, Optional, Tuple
from datetime import datetime
from .base_task import BaseTask
from models.parsers import TypeParser
class BaseDwdTask(BaseTask):
"""
DWD 层任务基类
负责从 ODS 表读取数据,供子类清洗和写入事实/维度表
"""
def _get_ods_cursor(self, task_code: str) -> datetime:
"""
获取上次处理的 ODS 数据的时间点 (fetched_at)
这里简化处理,实际应该从 etl_cursor 表读取
目前先依赖 BaseTask 的时间窗口逻辑,或者子类自己管理
"""
# TODO: 对接真正的 CursorManager
# 暂时返回一个较早的时间,或者由子类通过 _get_time_window 获取
return None
def iter_ods_rows(
self,
table_name: str,
columns: List[str],
start_time: datetime,
end_time: datetime,
time_col: str = "fetched_at",
batch_size: int = 1000
) -> Iterator[List[Dict[str, Any]]]:
"""
分批迭代读取 ODS 表数据
Args:
table_name: ODS 表名
columns: 需要查询的字段列表 (必须包含 payload)
start_time: 开始时间 (包含)
end_time: 结束时间 (包含)
time_col: 时间过滤字段,默认 fetched_at
batch_size: 批次大小
"""
offset = 0
cols_str = ", ".join(columns)
while True:
sql = f"""
SELECT {cols_str}
FROM {table_name}
WHERE {time_col} >= %s AND {time_col} <= %s
ORDER BY {time_col} ASC
LIMIT %s OFFSET %s
"""
rows = self.db.fetch_all(sql, (start_time, end_time, batch_size, offset))
if not rows:
break
yield [dict(row) for row in rows]
if len(rows) < batch_size:
break
offset += batch_size
def parse_payload(self, row: Dict[str, Any]) -> Dict[str, Any]:
"""
解析 ODS 行中的 payload JSON
"""
payload = row.get("payload")
if isinstance(payload, str):
return json.loads(payload)
elif isinstance(payload, dict):
return payload
return {}

View File

@@ -0,0 +1,176 @@
# -*- coding: utf-8 -*-
import os
import json
from datetime import datetime
from .base_task import BaseTask
from loaders.ods.generic import GenericODSLoader
class ManualIngestTask(BaseTask):
"""
Task to ingest manually fetched JSON files from a directory into ODS tables.
"""
FILE_MAPPING = {
"小票详情": "billiards_ods.ods_ticket_detail",
"结账记录": "billiards_ods.ods_order_settle",
"支付记录": "billiards_ods.ods_payment",
"助教流水": "billiards_ods.ods_assistant_ledger",
"助教废除": "billiards_ods.ods_assistant_abolish",
"商品档案": "billiards_ods.ods_goods_ledger", # Note: This might be dim_product source, but mapping to ledger for now if it's sales
"库存变化": "billiards_ods.ods_inventory_change",
"会员档案": "billiards_ods.ods_member",
"充值记录": "billiards_ods.ods_member_card", # Approx
"团购套餐": "billiards_ods.ods_package_coupon",
"库存汇总": "billiards_ods.ods_inventory_stock"
}
def get_task_code(self) -> str:
return "MANUAL_INGEST"
def execute(self) -> dict:
self.logger.info("Starting Manual Ingest Task")
# Configurable directory, default to tests/testdata_json for now
data_dir = self.config.get("manual.data_dir", r"c:\dev\LLTQ\ETL\feiqiu-ETL\etl_billiards\tests\testdata_json")
if not os.path.exists(data_dir):
self.logger.error(f"Data directory not found: {data_dir}")
return {"status": "error", "message": "Directory not found"}
total_files = 0
total_rows = 0
for filename in os.listdir(data_dir):
if not filename.endswith(".json"):
continue
# Determine target table
target_table = None
for key, table in self.FILE_MAPPING.items():
if key in filename:
target_table = table
break
if not target_table:
self.logger.warning(f"No mapping found for file: {filename}, skipping.")
continue
self.logger.info(f"Ingesting {filename} into {target_table}")
try:
with open(os.path.join(data_dir, filename), 'r', encoding='utf-8') as f:
data = json.load(f)
if not isinstance(data, list):
data = [data]
# Prepare rows for GenericODSLoader
# We need to adapt the data to what GenericODSLoader expects (or update it)
# GenericODSLoader expects dicts. It handles normalization.
# But we need to ensure the primary keys are present in the payload or extracted.
# The GenericODSLoader might need configuration for PK extraction if it's not standard.
# For now, let's assume the payload IS the row, and we wrap it.
# Actually, GenericODSLoader.upsert_rows expects the raw API result list.
# It calls _normalize_row.
# We need to make sure _normalize_row works for these files.
# Most files have 'id' or similar.
# Let's instantiate a loader for this table
# We need to know the PK for the table.
# This is usually defined in ODS_TASK_CLASSES but here we are dynamic.
# We might need a simpler loader or reuse GenericODSLoader with specific PK config.
# For simplicity, let's use a custom ingestion here that mimics GenericODSLoader but is file-aware.
rows_to_insert = []
for item in data:
# Extract Store ID (usually in siteProfile or data root)
store_id = self._extract_store_id(item) or self.config.get("app.store_id")
# Extract PK (id, orderSettleId, etc.)
pk_val = self._extract_pk(item, target_table)
if not pk_val:
# Try to find 'id' in the item
pk_val = item.get("id")
if not pk_val:
# Special case for Ticket Detail
if "ods_ticket_detail" in target_table:
pk_val = item.get("orderSettleId")
if not pk_val:
continue
row = {
"store_id": store_id,
"payload": json.dumps(item, ensure_ascii=False),
"source_file": filename,
"fetched_at": datetime.now()
}
# Add specific PK column
pk_col = self._get_pk_column(target_table)
row[pk_col] = pk_val
rows_to_insert.append(row)
if rows_to_insert:
self._bulk_insert(target_table, rows_to_insert)
total_rows += len(rows_to_insert)
total_files += 1
except Exception as e:
self.logger.error(f"Error processing {filename}: {e}", exc_info=True)
return {"status": "success", "files_processed": total_files, "rows_inserted": total_rows}
def _extract_store_id(self, item):
# Try common paths
if "store_id" in item: return item["store_id"]
if "siteProfile" in item and "id" in item["siteProfile"]: return item["siteProfile"]["id"]
if "data" in item and "data" in item["data"] and "siteId" in item["data"]["data"]: return item["data"]["data"]["siteId"]
return None
def _extract_pk(self, item, table):
# Helper to find PK based on table
if "ods_order_settle" in table:
# Check for nested structure in some files
if "settleList" in item and "settleList" in item["settleList"]:
return item["settleList"]["settleList"].get("id")
return item.get("id")
return item.get("id")
def _get_pk_column(self, table):
if "ods_ticket_detail" in table: return "order_settle_id"
if "ods_order_settle" in table: return "order_settle_id"
if "ods_payment" in table: return "pay_id"
if "ods_member" in table: return "member_id"
if "ods_assistant_ledger" in table: return "ledger_id"
if "ods_goods_ledger" in table: return "order_goods_id"
if "ods_inventory_change" in table: return "change_id"
if "ods_assistant_abolish" in table: return "abolish_id"
if "ods_coupon_verify" in table: return "coupon_id"
if "ods_member_card" in table: return "card_id"
if "ods_package_coupon" in table: return "package_id"
return "id" # Fallback
def _bulk_insert(self, table, rows):
if not rows: return
keys = list(rows[0].keys())
cols = ", ".join(keys)
vals = ", ".join([f"%({k})s" for k in keys])
# Determine PK col for conflict
pk_col = self._get_pk_column(table)
sql = f"""
INSERT INTO {table} ({cols})
VALUES ({vals})
ON CONFLICT (store_id, {pk_col}) DO UPDATE SET
payload = EXCLUDED.payload,
fetched_at = EXCLUDED.fetched_at,
source_file = EXCLUDED.source_file;
"""
self.db.batch_execute(sql, rows)

View File

@@ -0,0 +1,89 @@
# -*- coding: utf-8 -*-
from .base_dwd_task import BaseDwdTask
from loaders.dimensions.member import MemberLoader
from models.parsers import TypeParser
import json
class MembersDwdTask(BaseDwdTask):
"""
DWD Task: Process Member Records from ODS to Dimension Table
Source: billiards_ods.ods_member
Target: billiards.dim_member
"""
def get_task_code(self) -> str:
return "MEMBERS_DWD"
def execute(self) -> dict:
self.logger.info(f"Starting {self.get_task_code()} task")
window_start, window_end, _ = self._get_time_window()
self.logger.info(f"Processing window: {window_start} to {window_end}")
loader = MemberLoader(self.db)
store_id = self.config.get("app.store_id")
total_inserted = 0
total_updated = 0
total_errors = 0
# Iterate ODS Data
batches = self.iter_ods_rows(
table_name="billiards_ods.ods_member",
columns=["store_id", "member_id", "payload", "fetched_at"],
start_time=window_start,
end_time=window_end
)
for batch in batches:
if not batch:
continue
parsed_rows = []
for row in batch:
payload = self.parse_payload(row)
if not payload:
continue
parsed = self._parse_member(payload, store_id)
if parsed:
parsed_rows.append(parsed)
if parsed_rows:
inserted, updated, skipped = loader.upsert_members(parsed_rows, store_id)
total_inserted += inserted
total_updated += updated
self.db.commit()
self.logger.info(f"Task {self.get_task_code()} completed. Inserted: {total_inserted}, Updated: {total_updated}")
return {
"status": "success",
"inserted": total_inserted,
"updated": total_updated,
"window_start": window_start.isoformat(),
"window_end": window_end.isoformat()
}
def _parse_member(self, raw: dict, store_id: int) -> dict:
"""Parse ODS payload into Dim structure"""
try:
# Handle both API structure (camelCase) and manual structure
member_id = raw.get("id") or raw.get("memberId")
if not member_id:
return None
return {
"store_id": store_id,
"member_id": member_id,
"member_name": raw.get("name") or raw.get("memberName"),
"phone": raw.get("phone") or raw.get("mobile"),
"balance": raw.get("balance", 0),
"status": str(raw.get("status", "NORMAL")),
"register_time": raw.get("createTime") or raw.get("registerTime"),
"raw_data": json.dumps(raw, ensure_ascii=False)
}
except Exception as e:
self.logger.warning(f"Error parsing member: {e}")
return None

View File

@@ -0,0 +1,400 @@
# -*- coding: utf-8 -*-
"""ODS ingestion tasks."""
from __future__ import annotations
from dataclasses import dataclass, field
from datetime import datetime
from typing import Any, Callable, Dict, Iterable, List, Sequence, Tuple, Type
from loaders.ods import GenericODSLoader
from models.parsers import TypeParser
from .base_task import BaseTask
ColumnTransform = Callable[[Any], Any]
@dataclass(frozen=True)
class ColumnSpec:
"""Mapping between DB column and source JSON field."""
column: str
sources: Tuple[str, ...] = ()
required: bool = False
default: Any = None
transform: ColumnTransform | None = None
@dataclass(frozen=True)
class OdsTaskSpec:
"""Definition of a single ODS ingestion task."""
code: str
class_name: str
table_name: str
endpoint: str
data_path: Tuple[str, ...] = ("data",)
list_key: str | None = None
pk_columns: Tuple[ColumnSpec, ...] = ()
extra_columns: Tuple[ColumnSpec, ...] = ()
include_page_size: bool = False
include_page_no: bool = True
include_source_file: bool = True
include_source_endpoint: bool = True
requires_window: bool = True
description: str = ""
extra_params: Dict[str, Any] = field(default_factory=dict)
class BaseOdsTask(BaseTask):
"""Shared functionality for ODS ingestion tasks."""
SPEC: OdsTaskSpec
def get_task_code(self) -> str:
return self.SPEC.code
def execute(self) -> dict:
spec = self.SPEC
self.logger.info("开始执行 %s (ODS)", spec.code)
store_id = TypeParser.parse_int(self.config.get("app.store_id"))
if not store_id:
raise ValueError("app.store_id 未配置,无法执行 ODS 任务")
page_size = self.config.get("api.page_size", 200)
params = self._build_params(spec, store_id)
columns = self._resolve_columns(spec)
conflict_columns = ["store_id"] + [col.column for col in spec.pk_columns]
loader = GenericODSLoader(
self.db,
spec.table_name,
columns,
conflict_columns,
)
counts = {"fetched": 0, "inserted": 0, "updated": 0, "skipped": 0, "errors": 0}
source_file = self._resolve_source_file_hint(spec)
try:
for page_no, page_records, _, _ in self.api.iter_paginated(
endpoint=spec.endpoint,
params=params,
page_size=page_size,
data_path=spec.data_path,
list_key=spec.list_key,
):
rows: List[dict] = []
for raw in page_records:
row = self._build_row(
spec=spec,
store_id=store_id,
record=raw,
page_no=page_no if spec.include_page_no else None,
page_size_value=len(page_records)
if spec.include_page_size
else None,
source_file=source_file,
)
if row is None:
counts["skipped"] += 1
continue
rows.append(row)
inserted, updated, _ = loader.upsert_rows(rows)
counts["inserted"] += inserted
counts["updated"] += updated
counts["fetched"] += len(page_records)
self.db.commit()
self.logger.info("%s ODS 任务完成: %s", spec.code, counts)
return self._build_result("SUCCESS", counts)
except Exception:
self.db.rollback()
counts["errors"] += 1
self.logger.error("%s ODS 任务失败", spec.code, exc_info=True)
raise
def _build_params(self, spec: OdsTaskSpec, store_id: int) -> dict:
params: dict[str, Any] = {"storeId": store_id}
params.update(spec.extra_params)
if spec.requires_window:
window_start, window_end, _ = self._get_time_window()
params["startTime"] = TypeParser.format_timestamp(window_start, self.tz)
params["endTime"] = TypeParser.format_timestamp(window_end, self.tz)
return params
def _resolve_columns(self, spec: OdsTaskSpec) -> List[str]:
columns: List[str] = ["store_id"]
seen = set(columns)
for col_spec in list(spec.pk_columns) + list(spec.extra_columns):
if col_spec.column not in seen:
columns.append(col_spec.column)
seen.add(col_spec.column)
if spec.include_page_no and "page_no" not in seen:
columns.append("page_no")
seen.add("page_no")
if spec.include_page_size and "page_size" not in seen:
columns.append("page_size")
seen.add("page_size")
if spec.include_source_file and "source_file" not in seen:
columns.append("source_file")
seen.add("source_file")
if spec.include_source_endpoint and "source_endpoint" not in seen:
columns.append("source_endpoint")
seen.add("source_endpoint")
if "fetched_at" not in seen:
columns.append("fetched_at")
seen.add("fetched_at")
if "payload" not in seen:
columns.append("payload")
return columns
def _build_row(
self,
spec: OdsTaskSpec,
store_id: int,
record: dict,
page_no: int | None,
page_size_value: int | None,
source_file: str | None,
) -> dict | None:
row: dict[str, Any] = {"store_id": store_id}
for col_spec in spec.pk_columns + spec.extra_columns:
value = self._extract_value(record, col_spec)
if value is None and col_spec.required:
self.logger.warning(
"%s 缺少必填字段 %s,原始记录: %s",
spec.code,
col_spec.column,
record,
)
return None
row[col_spec.column] = value
if spec.include_page_no:
row["page_no"] = page_no
if spec.include_page_size:
row["page_size"] = page_size_value
if spec.include_source_file:
row["source_file"] = source_file
if spec.include_source_endpoint:
row["source_endpoint"] = spec.endpoint
row["fetched_at"] = datetime.now(self.tz)
row["payload"] = record
return row
def _extract_value(self, record: dict, spec: ColumnSpec):
value = None
for key in spec.sources:
value = self._dig(record, key)
if value is not None:
break
if value is None and spec.default is not None:
value = spec.default
if value is not None and spec.transform:
value = spec.transform(value)
return value
@staticmethod
def _dig(record: Any, path: str | None):
if not path:
return None
current = record
for part in path.split("."):
if isinstance(current, dict):
current = current.get(part)
else:
return None
return current
def _resolve_source_file_hint(self, spec: OdsTaskSpec) -> str | None:
resolver = getattr(self.api, "get_source_hint", None)
if callable(resolver):
return resolver(spec.endpoint)
return None
def _int_col(name: str, *sources: str, required: bool = False) -> ColumnSpec:
return ColumnSpec(
column=name,
sources=sources,
required=required,
transform=TypeParser.parse_int,
)
ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
OdsTaskSpec(
code="ODS_ORDER_SETTLE",
class_name="OdsOrderSettleTask",
table_name="billiards_ods.ods_order_settle",
endpoint="/order/list",
data_path=("data",),
pk_columns=(_int_col("order_settle_id", "orderSettleId", "order_settle_id", "id", required=True),),
extra_columns=(_int_col("order_trade_no", "orderTradeNo", "order_trade_no"),),
include_page_size=True,
description="订单/结算 ODS 原始记录",
),
OdsTaskSpec(
code="ODS_TABLE_USE",
class_name="OdsTableUseTask",
table_name="billiards_ods.ods_table_use_detail",
endpoint="/Table/UseDetailList",
data_path=("data", "siteTableUseDetailsList"),
pk_columns=(_int_col("ledger_id", "id", required=True),),
extra_columns=(
_int_col("order_trade_no", "order_trade_no", "orderTradeNo"),
_int_col("order_settle_id", "order_settle_id", "orderSettleId"),
),
description="台费/开台流水 ODS",
),
OdsTaskSpec(
code="ODS_ASSISTANT_LEDGER",
class_name="OdsAssistantLedgerTask",
table_name="billiards_ods.ods_assistant_ledger",
endpoint="/Assistant/LedgerList",
data_path=("data", "orderAssistantDetails"),
pk_columns=(_int_col("ledger_id", "id", required=True),),
extra_columns=(
_int_col("order_trade_no", "order_trade_no", "orderTradeNo"),
_int_col("order_settle_id", "order_settle_id", "orderSettleId"),
),
description="助教流水 ODS",
),
OdsTaskSpec(
code="ODS_ASSISTANT_ABOLISH",
class_name="OdsAssistantAbolishTask",
table_name="billiards_ods.ods_assistant_abolish",
endpoint="/Assistant/AbolishList",
data_path=("data", "abolitionAssistants"),
pk_columns=(_int_col("abolish_id", "id", required=True),),
description="助教作废记录 ODS",
),
OdsTaskSpec(
code="ODS_GOODS_LEDGER",
class_name="OdsGoodsLedgerTask",
table_name="billiards_ods.ods_goods_ledger",
endpoint="/Order/GoodsLedgerList",
data_path=("data", "orderGoodsLedgers"),
pk_columns=(_int_col("order_goods_id", "orderGoodsId", "id", required=True),),
extra_columns=(
_int_col("order_trade_no", "order_trade_no", "orderTradeNo"),
_int_col("order_settle_id", "order_settle_id", "orderSettleId"),
),
description="商品销售流水 ODS",
),
OdsTaskSpec(
code="ODS_PAYMENT",
class_name="OdsPaymentTask",
table_name="billiards_ods.ods_payment",
endpoint="/pay/records",
data_path=("data",),
pk_columns=(_int_col("pay_id", "payId", "id", required=True),),
extra_columns=(
ColumnSpec(column="relate_type", sources=("relate_type", "relateType")),
_int_col("relate_id", "relate_id", "relateId"),
),
include_page_size=False,
description="支付流水 ODS",
),
OdsTaskSpec(
code="ODS_REFUND",
class_name="OdsRefundTask",
table_name="billiards_ods.ods_refund",
endpoint="/Pay/RefundList",
data_path=(),
pk_columns=(_int_col("refund_id", "id", required=True),),
extra_columns=(
ColumnSpec(column="relate_type", sources=("relate_type", "relateType")),
_int_col("relate_id", "relate_id", "relateId"),
),
description="退款流水 ODS",
),
OdsTaskSpec(
code="ODS_COUPON_VERIFY",
class_name="OdsCouponVerifyTask",
table_name="billiards_ods.ods_coupon_verify",
endpoint="/Coupon/UsageList",
data_path=(),
pk_columns=(_int_col("coupon_id", "id", "couponId", required=True),),
description="平台验券/团购流水 ODS",
),
OdsTaskSpec(
code="ODS_MEMBER",
class_name="OdsMemberTask",
table_name="billiards_ods.ods_member",
endpoint="/MemberProfile/GetTenantMemberList",
data_path=("data",),
pk_columns=(_int_col("member_id", "memberId", required=True),),
requires_window=False,
description="会员档案 ODS",
),
OdsTaskSpec(
code="ODS_MEMBER_CARD",
class_name="OdsMemberCardTask",
table_name="billiards_ods.ods_member_card",
endpoint="/MemberCard/List",
data_path=("data", "tenantMemberCards"),
pk_columns=(_int_col("card_id", "tenantMemberCardId", "cardId", required=True),),
requires_window=False,
description="会员卡/储值卡 ODS",
),
OdsTaskSpec(
code="ODS_PACKAGE",
class_name="OdsPackageTask",
table_name="billiards_ods.ods_package_coupon",
endpoint="/Package/List",
data_path=("data", "packageCouponList"),
pk_columns=(_int_col("package_id", "id", "packageId", required=True),),
requires_window=False,
description="团购/套餐定义 ODS",
),
OdsTaskSpec(
code="ODS_INVENTORY_STOCK",
class_name="OdsInventoryStockTask",
table_name="billiards_ods.ods_inventory_stock",
endpoint="/Inventory/StockSummary",
data_path=(),
pk_columns=(
_int_col("site_goods_id", "siteGoodsId", required=True),
ColumnSpec(column="snapshot_key", default="default", required=True),
),
requires_window=False,
description="库存汇总 ODS",
),
OdsTaskSpec(
code="ODS_INVENTORY_CHANGE",
class_name="OdsInventoryChangeTask",
table_name="billiards_ods.ods_inventory_change",
endpoint="/Inventory/ChangeList",
data_path=("data", "queryDeliveryRecordsList"),
pk_columns=(_int_col("change_id", "siteGoodsStockId", "id", required=True),),
description="库存变动 ODS",
),
)
def _build_task_class(spec: OdsTaskSpec) -> Type[BaseOdsTask]:
attrs = {
"SPEC": spec,
"__doc__": spec.description or f"ODS ingestion task {spec.code}",
"__module__": __name__,
}
return type(spec.class_name, (BaseOdsTask,), attrs)
ODS_TASK_CLASSES: Dict[str, Type[BaseOdsTask]] = {
spec.code: _build_task_class(spec) for spec in ODS_TASK_SPECS
}
__all__ = ["ODS_TASK_CLASSES", "ODS_TASK_SPECS", "BaseOdsTask"]

View File

@@ -0,0 +1,124 @@
# -*- coding: utf-8 -*-
from .base_dwd_task import BaseDwdTask
from loaders.facts.payment import PaymentLoader
from models.parsers import TypeParser
import json
class PaymentsDwdTask(BaseDwdTask):
"""
DWD Task: Process Payment Records from ODS to Fact Table
Source: billiards_ods.ods_payment
Target: billiards.fact_payment
"""
def get_task_code(self) -> str:
return "PAYMENTS_DWD"
def execute(self) -> dict:
self.logger.info(f"Starting {self.get_task_code()} task")
window_start, window_end, _ = self._get_time_window()
self.logger.info(f"Processing window: {window_start} to {window_end}")
loader = PaymentLoader(self.db)
store_id = self.config.get("app.store_id")
total_inserted = 0
total_errors = 0
# Iterate ODS Data
batches = self.iter_ods_rows(
table_name="billiards_ods.ods_payment",
columns=["store_id", "pay_id", "payload", "fetched_at"],
start_time=window_start,
end_time=window_end
)
for batch in batches:
if not batch:
continue
parsed_rows = []
for row in batch:
payload = self.parse_payload(row)
if not payload:
continue
parsed = self._parse_payment(payload, store_id)
if parsed:
parsed_rows.append(parsed)
if parsed_rows:
inserted, errors = loader.upsert_payments(parsed_rows, store_id)
total_inserted += inserted
total_errors += errors
self.db.commit()
self.logger.info(f"Task {self.get_task_code()} completed. Inserted: {total_inserted}, Errors: {total_errors}")
return {
"status": "success",
"inserted": total_inserted,
"errors": total_errors,
"window_start": window_start.isoformat(),
"window_end": window_end.isoformat()
}
def _parse_payment(self, raw: dict, store_id: int) -> dict:
"""Parse ODS payload into Fact structure"""
try:
pay_id = TypeParser.parse_int(raw.get("payId") or raw.get("id"))
if not pay_id:
return None
relate_type = str(raw.get("relateType") or raw.get("relate_type") or "")
relate_id = TypeParser.parse_int(raw.get("relateId") or raw.get("relate_id"))
# Attempt to populate settlement / trade identifiers
order_settle_id = TypeParser.parse_int(
raw.get("orderSettleId") or raw.get("order_settle_id")
)
order_trade_no = TypeParser.parse_int(
raw.get("orderTradeNo") or raw.get("order_trade_no")
)
if relate_type in {"1", "SETTLE", "ORDER"}:
order_settle_id = order_settle_id or relate_id
return {
"store_id": store_id,
"pay_id": pay_id,
"order_settle_id": order_settle_id,
"order_trade_no": order_trade_no,
"relate_type": relate_type,
"relate_id": relate_id,
"site_id": TypeParser.parse_int(
raw.get("siteId") or raw.get("site_id") or store_id
),
"tenant_id": TypeParser.parse_int(raw.get("tenantId") or raw.get("tenant_id")),
"create_time": TypeParser.parse_timestamp(
raw.get("createTime") or raw.get("create_time"), self.tz
),
"pay_time": TypeParser.parse_timestamp(raw.get("payTime"), self.tz),
"pay_amount": TypeParser.parse_decimal(raw.get("payAmount")),
"fee_amount": TypeParser.parse_decimal(
raw.get("feeAmount")
or raw.get("serviceFee")
or raw.get("channelFee")
or raw.get("fee_amount")
),
"discount_amount": TypeParser.parse_decimal(
raw.get("discountAmount")
or raw.get("couponAmount")
or raw.get("discount_amount")
),
"payment_method": str(raw.get("paymentMethod") or raw.get("payment_method") or ""),
"online_pay_channel": raw.get("onlinePayChannel") or raw.get("online_pay_channel"),
"pay_terminal": raw.get("payTerminal") or raw.get("pay_terminal"),
"pay_status": str(raw.get("payStatus") or raw.get("pay_status") or ""),
"raw_data": json.dumps(raw, ensure_ascii=False)
}
except Exception as e:
self.logger.warning(f"Error parsing payment: {e}")
return None

View File

@@ -62,16 +62,42 @@ class PaymentsTask(BaseTask):
def _parse_payment(self, raw: dict) -> dict:
"""解析支付记录"""
try:
store_id = self.config.get("app.store_id")
return {
"store_id": self.config.get("app.store_id"),
"pay_id": TypeParser.parse_int(raw.get("payId")),
"store_id": store_id,
"pay_id": TypeParser.parse_int(raw.get("payId") or raw.get("id")),
"order_id": TypeParser.parse_int(raw.get("orderId")),
"order_settle_id": TypeParser.parse_int(
raw.get("orderSettleId") or raw.get("order_settle_id")
),
"order_trade_no": TypeParser.parse_int(
raw.get("orderTradeNo") or raw.get("order_trade_no")
),
"relate_type": raw.get("relateType") or raw.get("relate_type"),
"relate_id": TypeParser.parse_int(raw.get("relateId") or raw.get("relate_id")),
"site_id": TypeParser.parse_int(raw.get("siteId") or raw.get("site_id") or store_id),
"tenant_id": TypeParser.parse_int(raw.get("tenantId") or raw.get("tenant_id")),
"pay_time": TypeParser.parse_timestamp(raw.get("payTime"), self.tz),
"create_time": TypeParser.parse_timestamp(
raw.get("createTime") or raw.get("create_time"), self.tz
),
"pay_amount": TypeParser.parse_decimal(raw.get("payAmount")),
"fee_amount": TypeParser.parse_decimal(
raw.get("feeAmount")
or raw.get("serviceFee")
or raw.get("channelFee")
or raw.get("fee_amount")
),
"discount_amount": TypeParser.parse_decimal(
raw.get("discountAmount") or raw.get("couponAmount") or raw.get("discount_amount")
),
"pay_type": raw.get("payType"),
"payment_method": raw.get("paymentMethod") or raw.get("payment_method"),
"online_pay_channel": raw.get("onlinePayChannel") or raw.get("online_pay_channel"),
"pay_status": raw.get("payStatus"),
"pay_terminal": raw.get("payTerminal") or raw.get("pay_terminal"),
"remark": raw.get("remark"),
"raw_data": json.dumps(raw, ensure_ascii=False)
"raw_data": json.dumps(raw, ensure_ascii=False),
}
except Exception as e:
self.logger.warning(f"解析支付记录失败: {e}, 原始数据: {raw}")

View File

@@ -0,0 +1,69 @@
# -*- coding: utf-8 -*-
from .base_dwd_task import BaseDwdTask
from loaders.facts.ticket import TicketLoader
class TicketDwdTask(BaseDwdTask):
"""
DWD Task: Process Ticket Details from ODS to Fact Tables
Source: billiards_ods.ods_ticket_detail
Targets:
- billiards.fact_order
- billiards.fact_order_goods
- billiards.fact_table_usage
- billiards.fact_assistant_service
"""
def get_task_code(self) -> str:
return "TICKET_DWD"
def execute(self) -> dict:
self.logger.info(f"Starting {self.get_task_code()} task")
# 1. Get Time Window (Incremental Load)
window_start, window_end, _ = self._get_time_window()
self.logger.info(f"Processing window: {window_start} to {window_end}")
# 2. Initialize Loader
loader = TicketLoader(self.db)
store_id = self.config.get("app.store_id")
total_inserted = 0
total_errors = 0
# 3. Iterate ODS Data
# We query ods_ticket_detail based on fetched_at
batches = self.iter_ods_rows(
table_name="billiards_ods.ods_ticket_detail",
columns=["store_id", "order_settle_id", "payload", "fetched_at"],
start_time=window_start,
end_time=window_end
)
for batch in batches:
if not batch:
continue
# Extract payloads
tickets = []
for row in batch:
payload = self.parse_payload(row)
if payload:
tickets.append(payload)
# Process Batch
inserted, errors = loader.process_tickets(tickets, store_id)
total_inserted += inserted
total_errors += errors
# 4. Commit
self.db.commit()
self.logger.info(f"Task {self.get_task_code()} completed. Inserted: {total_inserted}, Errors: {total_errors}")
return {
"status": "success",
"inserted": total_inserted,
"errors": total_errors,
"window_start": window_start.isoformat(),
"window_end": window_end.isoformat()
}