ODS 完成

This commit is contained in:
Neo
2025-11-30 07:19:05 +08:00
parent b9b050bb5d
commit a6ad343092
81 changed files with 15695 additions and 227180 deletions

View File

@@ -26,6 +26,7 @@ from tasks.refunds_task import RefundsTask
from tasks.table_discount_task import TableDiscountTask
from tasks.tables_task import TablesTask
from tasks.topups_task import TopupsTask
from utils.json_store import endpoint_to_filename
DEFAULT_STORE_ID = 2790685415443269
BASE_TS = "2025-01-01 10:00:00"
@@ -47,12 +48,6 @@ class TaskSpec:
return endpoint_to_filename(self.endpoint)
def endpoint_to_filename(endpoint: str) -> str:
"""根据 API endpoint 生成稳定可复用的文件名,便于离线模式在目录中直接定位归档 JSON。"""
normalized = endpoint.strip("/").replace("/", "__").replace(" ", "_").lower()
return f"{normalized or 'root'}.json"
def wrap_records(records: List[Dict], data_path: Sequence[str]):
"""按照 data_path 逐层包裹记录列表,使其结构与真实 API 返回体一致,方便离线回放。"""
payload = records
@@ -140,6 +135,8 @@ class FakeDBOperations:
self.commits = 0
self.rollbacks = 0
self.conn = FakeConnection()
# Pre-seeded query results (FIFO) to let tests control DB-returned rows
self.query_results: List[List[Dict]] = []
def batch_upsert_with_returning(self, sql: str, rows: List[Dict], page_size: int = 1000):
self.upserts.append(
@@ -167,6 +164,8 @@ class FakeDBOperations:
def query(self, sql: str, params=None):
self.executes.append({"sql": sql.strip(), "params": params, "type": "query"})
if self.query_results:
return self.query_results.pop(0)
return []
def cursor(self):

View File

@@ -0,0 +1,59 @@
# -*- coding: utf-8 -*-
"""验证 14 个任务的 E/T/L 分阶段调用FakeDB/FakeAPI不访问真实接口或数据库"""
import logging
import sys
from pathlib import Path
from datetime import datetime, timedelta
from zoneinfo import ZoneInfo
import pytest
PROJECT_ROOT = Path(__file__).resolve().parents[2]
if str(PROJECT_ROOT) not in sys.path:
sys.path.insert(0, str(PROJECT_ROOT))
from tasks.base_task import TaskContext
from tests.unit.task_test_utils import (
TASK_SPECS,
create_test_config,
get_db_operations,
FakeAPIClient,
)
def _build_context(store_id: int) -> TaskContext:
now = datetime.now(ZoneInfo("Asia/Taipei"))
return TaskContext(
store_id=store_id,
window_start=now - timedelta(minutes=30),
window_end=now,
window_minutes=30,
cursor=None,
)
@pytest.mark.parametrize("spec", TASK_SPECS)
def test_etl_stage_flow(spec, tmp_path):
"""对每个任务,单独调用 transform/load验证 counts 结构与 FakeDB 写入。"""
config = create_test_config("ONLINE", tmp_path / "archive", tmp_path / "temp")
api = FakeAPIClient({spec.endpoint: spec.sample_records})
logger = logging.getLogger(f"test_{spec.code.lower()}")
task_cls = spec.task_cls
with get_db_operations() as db_ops:
task = task_cls(config, db_ops, api, logger)
ctx = _build_context(config.get("app.store_id"))
# 跳过 extract直接验证 transform + load
extracted = {"records": spec.sample_records}
transformed = task.transform(extracted, ctx)
counts = task.load(transformed, ctx)
assert set(counts.keys()) == {"fetched", "inserted", "updated", "skipped", "errors"}
assert counts["fetched"] == len(spec.sample_records)
assert counts["errors"] == 0
# FakeDB 记录upserts/executes至少有一条
upserts = getattr(db_ops, "upserts", [])
executes = getattr(db_ops, "executes", [])
assert upserts or executes, "expected db operations to be recorded"

View File

@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
"""Unit tests for the new ODS ingestion tasks."""
import logging
import os
import sys
from pathlib import Path
@@ -9,6 +10,8 @@ PROJECT_ROOT = Path(__file__).resolve().parents[2]
if str(PROJECT_ROOT) not in sys.path:
sys.path.insert(0, str(PROJECT_ROOT))
os.environ.setdefault("ETL_SKIP_DOTENV", "1")
from tasks.ods_tasks import ODS_TASK_CLASSES
from .task_test_utils import create_test_config, get_db_operations, FakeAPIClient
@@ -19,44 +22,80 @@ def _build_config(tmp_path):
return create_test_config("ONLINE", archive_dir, temp_dir)
def test_ods_order_settle_ingest(tmp_path):
"""Ensure ODS_ORDER_SETTLE task writes raw payload + metadata."""
def test_ods_assistant_accounts_ingest(tmp_path):
"""Ensure ODS_ASSISTANT_ACCOUNTS task stores raw payload with record_index dedup keys."""
config = _build_config(tmp_path)
sample = [
{
"orderSettleId": 701,
"orderTradeNo": 8001,
"anyField": "value",
"id": 5001,
"assistant_no": "A01",
"nickname": "小张",
}
]
api = FakeAPIClient({"/Site/GetAllOrderSettleList": sample})
task_cls = ODS_TASK_CLASSES["ODS_ORDER_SETTLE"]
api = FakeAPIClient({"/PersonnelManagement/SearchAssistantInfo": sample})
task_cls = ODS_TASK_CLASSES["ODS_ASSISTANT_ACCOUNTS"]
with get_db_operations() as db_ops:
task = task_cls(config, db_ops, api, logging.getLogger("test_ods_order"))
task = task_cls(config, db_ops, api, logging.getLogger("test_ods_assistant_accounts"))
result = task.execute()
assert result["status"] == "SUCCESS"
assert result["counts"]["fetched"] == 1
assert db_ops.commits == 1
row = db_ops.upserts[0]["rows"][0]
assert row["order_settle_id"] == 701
assert row["order_trade_no"] == 8001
assert row["source_endpoint"] == "/Site/GetAllOrderSettleList"
assert '"orderSettleId": 701' in row["payload"]
assert row["id"] == 5001
assert row["record_index"] == 0
assert row["source_file"] is None or row["source_file"] == ""
assert '"id": 5001' in row["payload"]
def test_ods_payment_ingest(tmp_path):
"""Ensure ODS_PAYMENT task stores relate fields and payload."""
def test_ods_inventory_change_ingest(tmp_path):
"""Ensure ODS_INVENTORY_CHANGE task stores raw payload with record_index dedup keys."""
config = _build_config(tmp_path)
sample = [
{
"payId": 901,
"relateType": "ORDER",
"relateId": 123,
"payAmount": "100.00",
"siteGoodsStockId": 123456,
"stockType": 1,
"goodsName": "测试商品",
}
]
api = FakeAPIClient({"/GoodsStockManage/QueryGoodsOutboundReceipt": sample})
task_cls = ODS_TASK_CLASSES["ODS_INVENTORY_CHANGE"]
with get_db_operations() as db_ops:
task = task_cls(config, db_ops, api, logging.getLogger("test_ods_inventory_change"))
result = task.execute()
assert result["status"] == "SUCCESS"
assert result["counts"]["fetched"] == 1
assert db_ops.commits == 1
row = db_ops.upserts[0]["rows"][0]
assert row["sitegoodsstockid"] == 123456
assert row["record_index"] == 0
assert '"siteGoodsStockId": 123456' in row["payload"]
def test_ods_member_profiles_ingest(tmp_path):
"""Ensure ODS_MEMBER task stores tenantMemberInfos raw JSON."""
config = _build_config(tmp_path)
sample = [{"tenantMemberInfos": [{"id": 101, "mobile": "13800000000"}]}]
api = FakeAPIClient({"/MemberProfile/GetTenantMemberList": sample})
task_cls = ODS_TASK_CLASSES["ODS_MEMBER"]
with get_db_operations() as db_ops:
task = task_cls(config, db_ops, api, logging.getLogger("test_ods_member"))
result = task.execute()
assert result["status"] == "SUCCESS"
row = db_ops.upserts[0]["rows"][0]
assert row["record_index"] == 0
assert '"id": 101' in row["payload"]
def test_ods_payment_ingest(tmp_path):
"""Ensure ODS_PAYMENT task stores payment_transactions raw JSON."""
config = _build_config(tmp_path)
sample = [{"payId": 901, "payAmount": "100.00"}]
api = FakeAPIClient({"/PayLog/GetPayLogListPage": sample})
task_cls = ODS_TASK_CLASSES["ODS_PAYMENT"]
@@ -65,10 +104,57 @@ def test_ods_payment_ingest(tmp_path):
result = task.execute()
assert result["status"] == "SUCCESS"
assert result["counts"]["fetched"] == 1
assert db_ops.commits == 1
row = db_ops.upserts[0]["rows"][0]
assert row["pay_id"] == 901
assert row["relate_type"] == "ORDER"
assert row["relate_id"] == 123
assert row["record_index"] == 0
assert '"payId": 901' in row["payload"]
def test_ods_settlement_records_ingest(tmp_path):
"""Ensure ODS_ORDER_SETTLE task stores settleList raw JSON."""
config = _build_config(tmp_path)
sample = [{"data": {"settleList": [{"id": 701, "orderTradeNo": 8001}]}}]
api = FakeAPIClient({"/Site/GetAllOrderSettleList": sample})
task_cls = ODS_TASK_CLASSES["ODS_ORDER_SETTLE"]
with get_db_operations() as db_ops:
task = task_cls(config, db_ops, api, logging.getLogger("test_ods_order_settle"))
result = task.execute()
assert result["status"] == "SUCCESS"
row = db_ops.upserts[0]["rows"][0]
assert row["record_index"] == 0
assert '"orderTradeNo": 8001' in row["payload"]
def test_ods_settlement_ticket_by_payment_relate_ids(tmp_path):
"""Ensure settlement tickets are fetched per payment relate_id and skip existing ones."""
config = _build_config(tmp_path)
ticket_payload = {"data": {"data": {"orderSettleId": 9001, "orderSettleNumber": "T001"}}}
api = FakeAPIClient({"/Order/GetOrderSettleTicketNew": [ticket_payload]})
task_cls = ODS_TASK_CLASSES["ODS_SETTLEMENT_TICKET"]
with get_db_operations() as db_ops:
# First query: existing ticket ids; Second query: payment relate_ids
db_ops.query_results = [
[{"order_settle_id": 9002}],
[
{"order_settle_id": 9001},
{"order_settle_id": 9002},
{"order_settle_id": None},
],
]
task = task_cls(config, db_ops, api, logging.getLogger("test_ods_settlement_ticket"))
result = task.execute()
assert result["status"] == "SUCCESS"
counts = result["counts"]
assert counts["fetched"] == 1
assert counts["inserted"] == 1
assert counts["updated"] == 0
assert counts["skipped"] == 0
assert '"orderSettleId": 9001' in db_ops.upserts[0]["rows"][0]["payload"]
assert any(
call["endpoint"] == "/Order/GetOrderSettleTicketNew"
and call.get("params", {}).get("orderSettleId") == 9001
for call in api.calls
)

View File

@@ -0,0 +1,22 @@
# -*- coding: utf-8 -*-
"""汇总与报告工具的单测。"""
from utils.reporting import summarize_counts, format_report
def test_summarize_counts_and_format():
task_results = [
{"task_code": "ORDERS", "counts": {"fetched": 2, "inserted": 2, "updated": 0, "skipped": 0, "errors": 0}},
{"task_code": "PAYMENTS", "counts": {"fetched": 3, "inserted": 2, "updated": 1, "skipped": 0, "errors": 0}},
]
summary = summarize_counts(task_results)
assert summary["total"]["fetched"] == 5
assert summary["total"]["inserted"] == 4
assert summary["total"]["updated"] == 1
assert summary["total"]["errors"] == 0
assert len(summary["details"]) == 2
report = format_report(summary)
assert "TOTAL fetched=5" in report
assert "ORDERS:" in report
assert "PAYMENTS:" in report