迁移代码到Git

This commit is contained in:
Neo
2025-11-18 02:32:00 +08:00
parent 7f87421678
commit c3749474c6
85 changed files with 185478 additions and 0 deletions

View File

View File

@@ -0,0 +1,62 @@
# -*- coding: utf-8 -*-
"""ETL任务基类"""
from datetime import datetime, timedelta
from zoneinfo import ZoneInfo
class BaseTask:
"""ETL任务基类"""
def __init__(self, config, db_connection, api_client, logger):
self.config = config
self.db = db_connection
self.api = api_client
self.logger = logger
self.tz = ZoneInfo(config.get("app.timezone", "Asia/Taipei"))
def get_task_code(self) -> str:
"""获取任务代码"""
raise NotImplementedError("子类需实现 get_task_code 方法")
def execute(self) -> dict:
"""执行任务"""
raise NotImplementedError("子类需实现 execute 方法")
def _get_time_window(self, cursor_data: dict = None) -> tuple:
"""计算时间窗口"""
now = datetime.now(self.tz)
# 判断是否在闲时窗口
idle_start = self.config.get("run.idle_window.start", "04:00")
idle_end = self.config.get("run.idle_window.end", "16:00")
is_idle = self._is_in_idle_window(now, idle_start, idle_end)
# 获取窗口大小
if is_idle:
window_minutes = self.config.get("run.window_minutes.default_idle", 180)
else:
window_minutes = self.config.get("run.window_minutes.default_busy", 30)
# 计算窗口
overlap_seconds = self.config.get("run.overlap_seconds", 120)
if cursor_data and cursor_data.get("last_end"):
window_start = cursor_data["last_end"] - timedelta(seconds=overlap_seconds)
else:
window_start = now - timedelta(minutes=window_minutes)
window_end = now
return window_start, window_end, window_minutes
def _is_in_idle_window(self, dt: datetime, start_time: str, end_time: str) -> bool:
"""判断是否在闲时窗口"""
current_time = dt.strftime("%H:%M")
return start_time <= current_time <= end_time
def _build_result(self, status: str, counts: dict) -> dict:
"""构建结果字典"""
return {
"status": status,
"counts": counts
}

View File

@@ -0,0 +1,73 @@
# -*- coding: utf-8 -*-
"""会员ETL任务"""
import json
from .base_task import BaseTask
from loaders.dimensions.member import MemberLoader
from models.parsers import TypeParser
class MembersTask(BaseTask):
"""会员ETL任务"""
def get_task_code(self) -> str:
return "MEMBERS"
def execute(self) -> dict:
"""执行会员ETL"""
self.logger.info(f"开始执行 {self.get_task_code()} 任务")
params = {
"storeId": self.config.get("app.store_id"),
}
try:
records, pages_meta = self.api.get_paginated(
endpoint="/MemberProfile/GetTenantMemberList",
params=params,
page_size=self.config.get("api.page_size", 200),
data_path=("data",)
)
parsed_records = []
for rec in records:
parsed = self._parse_member(rec)
if parsed:
parsed_records.append(parsed)
loader = MemberLoader(self.db)
store_id = self.config.get("app.store_id")
inserted, updated, skipped = loader.upsert_members(parsed_records, store_id)
self.db.commit()
counts = {
"fetched": len(records),
"inserted": inserted,
"updated": updated,
"skipped": skipped,
"errors": 0
}
self.logger.info(f"{self.get_task_code()} 完成: {counts}")
return self._build_result("SUCCESS", counts)
except Exception as e:
self.db.rollback()
self.logger.error(f"{self.get_task_code()} 失败", exc_info=True)
raise
def _parse_member(self, raw: dict) -> dict:
"""解析会员记录"""
try:
return {
"store_id": self.config.get("app.store_id"),
"member_id": TypeParser.parse_int(raw.get("memberId")),
"member_name": raw.get("memberName"),
"phone": raw.get("phone"),
"balance": TypeParser.parse_decimal(raw.get("balance")),
"status": raw.get("status"),
"register_time": TypeParser.parse_timestamp(raw.get("registerTime"), self.tz),
"raw_data": json.dumps(raw, ensure_ascii=False)
}
except Exception as e:
self.logger.warning(f"解析会员记录失败: {e}, 原始数据: {raw}")
return None

View File

@@ -0,0 +1,94 @@
# -*- coding: utf-8 -*-
"""订单ETL任务"""
import json
from .base_task import BaseTask
from loaders.facts.order import OrderLoader
from models.parsers import TypeParser
class OrdersTask(BaseTask):
"""订单数据ETL任务"""
def get_task_code(self) -> str:
return "ORDERS"
def execute(self) -> dict:
"""执行订单数据ETL"""
self.logger.info(f"开始执行 {self.get_task_code()} 任务")
# 1. 获取时间窗口
window_start, window_end, window_minutes = self._get_time_window()
# 2. 调用API获取数据
params = {
"storeId": self.config.get("app.store_id"),
"startTime": TypeParser.format_timestamp(window_start, self.tz),
"endTime": TypeParser.format_timestamp(window_end, self.tz),
}
try:
records, pages_meta = self.api.get_paginated(
endpoint="/order/list",
params=params,
page_size=self.config.get("api.page_size", 200),
data_path=("data",)
)
# 3. 解析并清洗数据
parsed_records = []
for rec in records:
parsed = self._parse_order(rec)
if parsed:
parsed_records.append(parsed)
# 4. 加载数据
loader = OrderLoader(self.db)
store_id = self.config.get("app.store_id")
inserted, updated, skipped = loader.upsert_orders(
parsed_records,
store_id
)
# 5. 提交事务
self.db.commit()
counts = {
"fetched": len(records),
"inserted": inserted,
"updated": updated,
"skipped": skipped,
"errors": 0
}
self.logger.info(
f"{self.get_task_code()} 完成: {counts}"
)
return self._build_result("SUCCESS", counts)
except Exception as e:
self.db.rollback()
self.logger.error(f"{self.get_task_code()} 失败", exc_info=True)
raise
def _parse_order(self, raw: dict) -> dict:
"""解析单条订单记录"""
try:
return {
"store_id": self.config.get("app.store_id"),
"order_id": TypeParser.parse_int(raw.get("orderId")),
"order_no": raw.get("orderNo"),
"member_id": TypeParser.parse_int(raw.get("memberId")),
"table_id": TypeParser.parse_int(raw.get("tableId")),
"order_time": TypeParser.parse_timestamp(raw.get("orderTime"), self.tz),
"end_time": TypeParser.parse_timestamp(raw.get("endTime"), self.tz),
"total_amount": TypeParser.parse_decimal(raw.get("totalAmount")),
"discount_amount": TypeParser.parse_decimal(raw.get("discountAmount")),
"final_amount": TypeParser.parse_decimal(raw.get("finalAmount")),
"pay_status": raw.get("payStatus"),
"order_status": raw.get("orderStatus"),
"remark": raw.get("remark"),
"raw_data": json.dumps(raw, ensure_ascii=False)
}
except Exception as e:
self.logger.warning(f"解析订单失败: {e}, 原始数据: {raw}")
return None

View File

@@ -0,0 +1,78 @@
# -*- coding: utf-8 -*-
"""支付记录ETL任务"""
import json
from .base_task import BaseTask
from loaders.facts.payment import PaymentLoader
from models.parsers import TypeParser
class PaymentsTask(BaseTask):
"""支付记录ETL任务"""
def get_task_code(self) -> str:
return "PAYMENTS"
def execute(self) -> dict:
"""执行支付记录ETL"""
self.logger.info(f"开始执行 {self.get_task_code()} 任务")
window_start, window_end, window_minutes = self._get_time_window()
params = {
"storeId": self.config.get("app.store_id"),
"startTime": TypeParser.format_timestamp(window_start, self.tz),
"endTime": TypeParser.format_timestamp(window_end, self.tz),
}
try:
records, pages_meta = self.api.get_paginated(
endpoint="/pay/records",
params=params,
page_size=self.config.get("api.page_size", 200),
data_path=("data",)
)
parsed_records = []
for rec in records:
parsed = self._parse_payment(rec)
if parsed:
parsed_records.append(parsed)
loader = PaymentLoader(self.db)
store_id = self.config.get("app.store_id")
inserted, updated, skipped = loader.upsert_payments(parsed_records, store_id)
self.db.commit()
counts = {
"fetched": len(records),
"inserted": inserted,
"updated": updated,
"skipped": skipped,
"errors": 0
}
self.logger.info(f"{self.get_task_code()} 完成: {counts}")
return self._build_result("SUCCESS", counts)
except Exception as e:
self.db.rollback()
self.logger.error(f"{self.get_task_code()} 失败", exc_info=True)
raise
def _parse_payment(self, raw: dict) -> dict:
"""解析支付记录"""
try:
return {
"store_id": self.config.get("app.store_id"),
"pay_id": TypeParser.parse_int(raw.get("payId")),
"order_id": TypeParser.parse_int(raw.get("orderId")),
"pay_time": TypeParser.parse_timestamp(raw.get("payTime"), self.tz),
"pay_amount": TypeParser.parse_decimal(raw.get("payAmount")),
"pay_type": raw.get("payType"),
"pay_status": raw.get("payStatus"),
"remark": raw.get("remark"),
"raw_data": json.dumps(raw, ensure_ascii=False)
}
except Exception as e:
self.logger.warning(f"解析支付记录失败: {e}, 原始数据: {raw}")
return None

View File

@@ -0,0 +1,139 @@
# -*- coding: utf-8 -*-
"""商品档案PRODUCTSETL任务"""
import json
from .base_task import BaseTask
from loaders.dimensions.product import ProductLoader
from models.parsers import TypeParser
class ProductsTask(BaseTask):
"""商品维度 ETL 任务"""
def get_task_code(self) -> str:
"""任务代码,应与 etl_admin.etl_task.task_code 一致"""
return "PRODUCTS"
def execute(self) -> dict:
"""
执行商品档案 ETL
流程:
1. 调用上游 /TenantGoods/QueryTenantGoods 分页拉取商品列表
2. 解析/清洗字段
3. 通过 ProductLoader 写入 dim_product 和 dim_product_price_scd
"""
self.logger.info(f"开始执行 {self.get_task_code()} 任务")
params = {
"storeId": self.config.get("app.store_id"),
}
try:
# 1. 分页拉取数据
records, pages_meta = self.api.get_paginated(
endpoint="/TenantGoods/QueryTenantGoods",
params=params,
page_size=self.config.get("api.page_size", 200),
data_path=("data",),
)
# 2. 解析/清洗
parsed_records = []
for raw in records:
parsed = self._parse_product(raw)
if parsed:
parsed_records.append(parsed)
# 3. 加载入库(维度主表 + 价格SCD2
loader = ProductLoader(self.db)
store_id = self.config.get("app.store_id")
inserted, updated, skipped = loader.upsert_products(
parsed_records, store_id
)
# 4. 提交事务
self.db.commit()
counts = {
"fetched": len(records),
"inserted": inserted,
"updated": updated,
"skipped": skipped,
"errors": 0,
}
self.logger.info(f"{self.get_task_code()} 完成: {counts}")
return self._build_result("SUCCESS", counts)
except Exception:
# 明确回滚,避免部分成功
self.db.rollback()
self.logger.error(f"{self.get_task_code()} 失败", exc_info=True)
raise
def _parse_product(self, raw: dict) -> dict | None:
"""
解析单条商品记录,字段映射参考旧版 upsert_dim_product_and_price_scd
上游字段示例:
- siteGoodsId / tenantGoodsId / productId
- goodsName / productName
- tenantGoodsCategoryId / goodsCategoryId / categoryName / goodsCategorySecondId
- goodsUnit
- costPrice / goodsPrice / salePrice
- goodsState / status
- supplierId / barcode / isCombo
- createTime / updateTime
"""
try:
product_id = (
TypeParser.parse_int(
raw.get("siteGoodsId")
or raw.get("tenantGoodsId")
or raw.get("productId")
)
)
if not product_id:
# 主键缺失,直接跳过
return None
return {
"store_id": self.config.get("app.store_id"),
"product_id": product_id,
"site_product_id": TypeParser.parse_int(raw.get("siteGoodsId")),
"product_name": raw.get("goodsName") or raw.get("productName"),
"category_id": TypeParser.parse_int(
raw.get("tenantGoodsCategoryId") or raw.get("goodsCategoryId")
),
"category_name": raw.get("categoryName"),
"second_category_id": TypeParser.parse_int(
raw.get("goodsCategorySecondId")
),
"unit": raw.get("goodsUnit"),
"cost_price": TypeParser.parse_decimal(raw.get("costPrice")),
"sale_price": TypeParser.parse_decimal(
raw.get("goodsPrice") or raw.get("salePrice")
),
# 旧版这里就是 None如后面有明确字段可以再补
"allow_discount": None,
"status": raw.get("goodsState") or raw.get("status"),
"supplier_id": TypeParser.parse_int(raw.get("supplierId"))
if raw.get("supplierId")
else None,
"barcode": raw.get("barcode"),
"is_combo": bool(raw.get("isCombo"))
if raw.get("isCombo") is not None
else None,
"created_time": TypeParser.parse_timestamp(
raw.get("createTime"), self.tz
),
"updated_time": TypeParser.parse_timestamp(
raw.get("updateTime"), self.tz
),
"raw_data": json.dumps(raw, ensure_ascii=False),
}
except Exception as e:
self.logger.warning(f"解析商品记录失败: {e}, 原始数据: {raw}")
return None

View File

@@ -0,0 +1,4 @@
class TablesTask(BaseTask):
def get_task_code(self) -> str: # 返回 "TABLES"
def execute(self) -> dict: # 拉取 /Table/GetSiteTables
def _parse_table(self, raw: dict) -> dict | None: