ODS 完成

This commit is contained in:
Neo
2025-11-30 07:18:55 +08:00
parent cbd16a39ba
commit b9b050bb5d
28 changed files with 41867 additions and 977 deletions

View File

@@ -1,94 +1,199 @@
# -*- coding: utf-8 -*-
"""API客户端"""
"""API客户端:统一封装 POST/重试/分页与列表提取逻辑。"""
from __future__ import annotations
from typing import Iterable, Sequence, Tuple
import requests
from urllib3.util.retry import Retry
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
DEFAULT_BROWSER_HEADERS = {
"Accept": "application/json, text/plain, */*",
"Content-Type": "application/json",
"Origin": "https://pc.ficoo.vip",
"Referer": "https://pc.ficoo.vip/",
"User-Agent": (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
),
"Accept-Language": "zh-CN,zh;q=0.9",
"sec-ch-ua": '"Google Chrome";v="120", "Not?A_Brand";v="8", "Chromium";v="120"',
"sec-ch-ua-platform": '"Windows"',
"sec-ch-ua-mobile": "?0",
"sec-fetch-site": "same-origin",
"sec-fetch-mode": "cors",
"sec-fetch-dest": "empty",
"priority": "u=1, i",
"X-Requested-With": "XMLHttpRequest",
}
DEFAULT_LIST_KEYS: Tuple[str, ...] = (
"list",
"rows",
"records",
"items",
"dataList",
"data_list",
"tenantMemberInfos",
"tenantMemberCardLogs",
"tenantMemberCards",
"settleList",
"orderAssistantDetails",
"assistantInfos",
"siteTables",
"taiFeeAdjustInfos",
"siteTableUseDetailsList",
"tenantGoodsList",
"packageCouponList",
"queryDeliveryRecordsList",
"goodsCategoryList",
"orderGoodsList",
"orderGoodsLedgers",
)
class APIClient:
"""HTTP API客户端"""
def __init__(self, base_url: str, token: str = None, timeout: int = 20,
retry_max: int = 3, headers_extra: dict = None):
self.base_url = base_url.rstrip("/")
self.token = token
"""HTTP API 客户端(默认使用 POST + JSON 请求体)"""
def __init__(
self,
base_url: str,
token: str | None = None,
timeout: int = 20,
retry_max: int = 3,
headers_extra: dict | None = None,
):
self.base_url = (base_url or "").rstrip("/")
self.token = self._normalize_token(token)
self.timeout = timeout
self.retry_max = retry_max
self.headers_extra = headers_extra or {}
self._session = None
def _get_session(self):
"""获取或创建会话"""
self._session: requests.Session | None = None
# ------------------------------------------------------------------ HTTP 基础
def _get_session(self) -> requests.Session:
"""获取或创建带重试的 Session。"""
if self._session is None:
self._session = requests.Session()
retries = max(0, int(self.retry_max) - 1)
retry = Retry(
total=None,
connect=retries,
read=retries,
status=retries,
allowed_methods=frozenset(["GET"]),
allowed_methods=frozenset(["GET", "POST"]),
status_forcelist=(429, 500, 502, 503, 504),
backoff_factor=1.0,
backoff_factor=0.5,
respect_retry_after_header=True,
raise_on_status=False,
)
adapter = HTTPAdapter(max_retries=retry)
self._session.mount("http://", adapter)
self._session.mount("https://", adapter)
if self.headers_extra:
self._session.headers.update(self.headers_extra)
self._session.headers.update(self._build_headers())
return self._session
def get(self, endpoint: str, params: dict = None) -> dict:
"""执行GET请求"""
def get(self, endpoint: str, params: dict | None = None) -> dict:
"""
兼容旧名的请求入口(实际以 POST JSON 方式请求)。
"""
return self._post_json(endpoint, params)
def _post_json(self, endpoint: str, payload: dict | None = None) -> dict:
if not self.base_url:
raise ValueError("API base_url 未配置")
url = f"{self.base_url}/{endpoint.lstrip('/')}"
headers = {"Authorization": self.token} if self.token else {}
headers.update(self.headers_extra)
sess = self._get_session()
resp = sess.get(url, headers=headers, params=params, timeout=self.timeout)
resp = sess.post(url, json=payload or {}, timeout=self.timeout)
resp.raise_for_status()
return resp.json()
data = resp.json()
self._ensure_success(data)
return data
def _build_headers(self) -> dict:
headers = dict(DEFAULT_BROWSER_HEADERS)
headers.update(self.headers_extra)
if self.token:
headers["Authorization"] = self.token
return headers
@staticmethod
def _normalize_token(token: str | None) -> str | None:
if not token:
return None
t = str(token).strip()
if not t.lower().startswith("bearer "):
t = f"Bearer {t}"
return t
@staticmethod
def _ensure_success(payload: dict):
"""API 返回 code 非 0 时主动抛错,便于上层重试/记录。"""
if isinstance(payload, dict) and "code" in payload:
code = payload.get("code")
if code not in (0, "0", None):
msg = payload.get("msg") or payload.get("message") or ""
raise ValueError(f"API 返回错误 code={code} msg={msg}")
# ------------------------------------------------------------------ 分页
def iter_paginated(
self,
endpoint: str,
params: dict | None,
page_size: int = 200,
page_field: str = "pageIndex",
size_field: str = "pageSize",
page_size: int | None = 200,
page_field: str = "page",
size_field: str = "limit",
data_path: tuple = ("data",),
list_key: str | None = None,
):
"""分页迭代器:逐页拉取数据并产出 (page_no, records, request_params, raw_response)。"""
list_key: str | Sequence[str] | None = None,
page_start: int = 1,
page_end: int | None = None,
) -> Iterable[tuple[int, list, dict, dict]]:
"""
分页迭代器:逐页拉取数据并产出 (page_no, records, request_params, raw_response)。
page_size=None 时不附带分页参数,仅拉取一次。
"""
base_params = dict(params or {})
page = 1
page = page_start
while True:
page_params = dict(base_params)
page_params[page_field] = page
page_params[size_field] = page_size
if page_size is not None:
page_params[page_field] = page
page_params[size_field] = page_size
payload = self.get(endpoint, page_params)
payload = self._post_json(endpoint, page_params)
records = self._extract_list(payload, data_path, list_key)
yield page, records, page_params, payload
if len(records) < page_size:
if page_size is None:
break
if page_end is not None and page >= page_end:
break
if len(records) < (page_size or 0):
break
if len(records) == 0:
break
page += 1
def get_paginated(self, endpoint: str, params: dict, page_size: int = 200,
page_field: str = "pageIndex", size_field: str = "pageSize",
data_path: tuple = ("data",), list_key: str = None) -> tuple:
def get_paginated(
self,
endpoint: str,
params: dict,
page_size: int | None = 200,
page_field: str = "page",
size_field: str = "limit",
data_path: tuple = ("data",),
list_key: str | Sequence[str] | None = None,
page_start: int = 1,
page_end: int | None = None,
) -> tuple[list, list]:
"""分页获取数据并将所有记录汇总在一个列表中。"""
records, pages_meta = [], []
@@ -100,6 +205,8 @@ class APIClient:
size_field=size_field,
data_path=data_path,
list_key=list_key,
page_start=page_start,
page_end=page_end,
):
records.extend(page_records)
pages_meta.append(
@@ -108,10 +215,17 @@ class APIClient:
return records, pages_meta
@staticmethod
def _extract_list(payload: dict, data_path: tuple, list_key: str | None):
"""辅助函数:根据 data_path/list_key 提取列表结构。"""
cur = payload
# ------------------------------------------------------------------ 响应解析
@classmethod
def _extract_list(
cls, payload: dict | list, data_path: tuple, list_key: str | Sequence[str] | None
) -> list:
"""根据 data_path/list_key 提取列表结构,兼容常见字段名。"""
cur: object = payload
if isinstance(cur, list):
return cur
for key in data_path:
if isinstance(cur, dict):
cur = cur.get(key)
@@ -120,10 +234,22 @@ class APIClient:
if cur is None:
break
if list_key and isinstance(cur, dict):
cur = cur.get(list_key)
if isinstance(cur, list):
return cur
if not isinstance(cur, list):
cur = []
if isinstance(cur, dict):
if list_key:
keys = (list_key,) if isinstance(list_key, str) else tuple(list_key)
for k in keys:
if isinstance(cur.get(k), list):
return cur[k]
return cur
for k in DEFAULT_LIST_KEYS:
if isinstance(cur.get(k), list):
return cur[k]
for v in cur.values():
if isinstance(v, list):
return v
return []

View File

@@ -0,0 +1,74 @@
# -*- coding: utf-8 -*-
"""本地 JSON 客户端,模拟 APIClient 的分页接口,从落盘的 JSON 回放数据。"""
from __future__ import annotations
import json
from pathlib import Path
from typing import Iterable, Tuple
from api.client import APIClient
from utils.json_store import endpoint_to_filename
class LocalJsonClient:
"""
读取 RecordingAPIClient 生成的 JSON提供 iter_paginated/get_paginated 接口。
"""
def __init__(self, base_dir: str | Path):
self.base_dir = Path(base_dir)
if not self.base_dir.exists():
raise FileNotFoundError(f"JSON 目录不存在: {self.base_dir}")
def iter_paginated(
self,
endpoint: str,
params: dict | None,
page_size: int = 200,
page_field: str = "page",
size_field: str = "limit",
data_path: tuple = ("data",),
list_key: str | None = None,
) -> Iterable[Tuple[int, list, dict, dict]]:
file_path = self.base_dir / endpoint_to_filename(endpoint)
if not file_path.exists():
raise FileNotFoundError(f"未找到匹配的 JSON 文件: {file_path}")
with file_path.open("r", encoding="utf-8") as fp:
payload = json.load(fp)
pages = payload.get("pages")
if not isinstance(pages, list) or not pages:
pages = [{"page": 1, "request": params or {}, "response": payload}]
for idx, page in enumerate(pages, start=1):
response = page.get("response", {})
request_params = page.get("request") or {}
page_no = page.get("page") or idx
records = APIClient._extract_list(response, data_path, list_key) # type: ignore[attr-defined]
yield page_no, records, request_params, response
def get_paginated(
self,
endpoint: str,
params: dict,
page_size: int = 200,
page_field: str = "page",
size_field: str = "limit",
data_path: tuple = ("data",),
list_key: str | None = None,
) -> tuple[list, list]:
records: list = []
pages_meta: list = []
for page_no, page_records, request_params, response in self.iter_paginated(
endpoint=endpoint,
params=params,
page_size=page_size,
page_field=page_field,
size_field=size_field,
data_path=data_path,
list_key=list_key,
):
records.extend(page_records)
pages_meta.append({"page": page_no, "request": request_params, "response": response})
return records, pages_meta

View File

@@ -0,0 +1,118 @@
# -*- coding: utf-8 -*-
"""包装 APIClient将分页响应落盘便于后续本地清洗。"""
from __future__ import annotations
from datetime import datetime
from pathlib import Path
from typing import Any, Iterable, Tuple
from api.client import APIClient
from utils.json_store import dump_json, endpoint_to_filename
class RecordingAPIClient:
"""
代理 APIClient在调用 iter_paginated/get_paginated 时同时把响应写入 JSON 文件。
文件名根据 endpoint 生成,写入到指定 output_dir。
"""
def __init__(
self,
base_client: APIClient,
output_dir: Path | str,
task_code: str,
run_id: int,
write_pretty: bool = False,
):
self.base = base_client
self.output_dir = Path(output_dir)
self.output_dir.mkdir(parents=True, exist_ok=True)
self.task_code = task_code
self.run_id = run_id
self.write_pretty = write_pretty
self.last_dump: dict[str, Any] | None = None
# ------------------------------------------------------------------ public API
def iter_paginated(
self,
endpoint: str,
params: dict | None,
page_size: int = 200,
page_field: str = "page",
size_field: str = "limit",
data_path: tuple = ("data",),
list_key: str | None = None,
) -> Iterable[Tuple[int, list, dict, dict]]:
pages: list[dict[str, Any]] = []
total_records = 0
for page_no, records, request_params, response in self.base.iter_paginated(
endpoint=endpoint,
params=params,
page_size=page_size,
page_field=page_field,
size_field=size_field,
data_path=data_path,
list_key=list_key,
):
pages.append({"page": page_no, "request": request_params, "response": response})
total_records += len(records)
yield page_no, records, request_params, response
self._dump(endpoint, params, page_size, pages, total_records)
def get_paginated(
self,
endpoint: str,
params: dict,
page_size: int = 200,
page_field: str = "page",
size_field: str = "limit",
data_path: tuple = ("data",),
list_key: str | None = None,
) -> tuple[list, list]:
records: list = []
pages_meta: list = []
for page_no, page_records, request_params, response in self.iter_paginated(
endpoint=endpoint,
params=params,
page_size=page_size,
page_field=page_field,
size_field=size_field,
data_path=data_path,
list_key=list_key,
):
records.extend(page_records)
pages_meta.append({"page": page_no, "request": request_params, "response": response})
return records, pages_meta
# ------------------------------------------------------------------ internal
def _dump(
self,
endpoint: str,
params: dict | None,
page_size: int,
pages: list[dict[str, Any]],
total_records: int,
):
filename = endpoint_to_filename(endpoint)
path = self.output_dir / filename
payload = {
"task_code": self.task_code,
"run_id": self.run_id,
"endpoint": endpoint,
"params": params or {},
"page_size": page_size,
"pages": pages,
"total_records": total_records,
"dumped_at": datetime.utcnow().isoformat() + "Z",
}
dump_json(path, payload, pretty=self.write_pretty)
self.last_dump = {
"file": str(path),
"endpoint": endpoint,
"pages": len(pages),
"records": total_records,
}

View File

@@ -36,7 +36,7 @@ def parse_args():
# API参数
parser.add_argument("--api-base", help="API基础URL")
parser.add_argument("--api-token", help="API令牌")
parser.add_argument("--api-token", "--token", dest="api_token", help="API令牌Bearer Token")
parser.add_argument("--api-timeout", type=int, help="API超时(秒)")
parser.add_argument("--api-page-size", type=int, help="分页大小")

View File

@@ -15,7 +15,7 @@ DEFAULTS = {
"name": "",
"user": "",
"password": "",
"connect_timeout_sec": 5,
"connect_timeout_sec": 20,
"batch_size": 1000,
"session": {
"timezone": "Asia/Taipei",
@@ -25,10 +25,11 @@ DEFAULTS = {
},
},
"api": {
"base_url": None,
"base_url": "https://pc.ficoo.vip/apiprod/admin/v1",
"token": None,
"timeout_sec": 20,
"page_size": 200,
"params": {},
"retries": {
"max_attempts": 3,
"backoff_sec": [1, 2, 4],
@@ -71,6 +72,14 @@ DEFAULTS = {
"write_pretty_json": False,
"max_file_bytes": 50 * 1024 * 1024,
},
"pipeline": {
# 运行流程FETCH_ONLY仅在线抓取落盘、INGEST_ONLY本地清洗入库、FULL抓取 + 清洗入库)
"flow": "FULL",
# 在线抓取 JSON 输出根目录按任务、run_id 与时间自动创建子目录)
"fetch_root": r"D:\LLZQ\DB\json_fetch",
# 本地清洗入库时的 JSON 输入目录(为空则默认使用本次抓取目录)
"ingest_source_dir": "",
},
"clean": {
"log_unknown_fields": True,
"unknown_fields_limit": 50,
@@ -86,14 +95,6 @@ DEFAULTS = {
"redact_keys": ["token", "password", "Authorization"],
"echo_token_in_logs": False,
},
"testing": {
# ONLINE: 正常实时 ETLOFFLINE: 读取归档 JSON 做 T/L
"mode": "ONLINE",
# 离线归档 JSON 所在目录(测试/离线回放使用)
"json_archive_dir": "",
# 测试运行时用于生成/复制临时 JSON 的目录
"temp_json_dir": "",
},
}
# 任务代码常量

View File

@@ -2,6 +2,7 @@
"""环境变量解析"""
import os
import json
from pathlib import Path
from copy import deepcopy
ENV_MAP = {
@@ -15,18 +16,22 @@ ENV_MAP = {
"PG_NAME": ("db.name",),
"PG_USER": ("db.user",),
"PG_PASSWORD": ("db.password",),
"PG_CONNECT_TIMEOUT": ("db.connect_timeout_sec",),
"API_BASE": ("api.base_url",),
"API_TOKEN": ("api.token",),
"FICOO_TOKEN": ("api.token",),
"API_TIMEOUT": ("api.timeout_sec",),
"API_PAGE_SIZE": ("api.page_size",),
"API_PARAMS": ("api.params",),
"EXPORT_ROOT": ("io.export_root",),
"LOG_ROOT": ("io.log_root",),
"RUN_TASKS": ("run.tasks",),
"OVERLAP_SECONDS": ("run.overlap_seconds",),
"WINDOW_BUSY_MIN": ("run.window_minutes.default_busy",),
"WINDOW_IDLE_MIN": ("run.window_minutes.default_idle",),
"TEST_MODE": ("testing.mode",),
"TEST_JSON_ARCHIVE_DIR": ("testing.json_archive_dir",),
"TEST_JSON_TEMP_DIR": ("testing.temp_json_dir",),
"PIPELINE_FLOW": ("pipeline.flow",),
"JSON_FETCH_ROOT": ("pipeline.fetch_root",),
"JSON_SOURCE_DIR": ("pipeline.ingest_source_dir",),
}
def _deep_set(d, dotted_keys, value):
@@ -53,13 +58,97 @@ def _coerce_env(v: str):
return s
return s
def load_env_overrides(defaults: dict) -> dict:
cfg = deepcopy(defaults)
def _strip_inline_comment(value: str) -> str:
"""去掉未被引号包裹的内联注释"""
result = []
in_quote = False
quote_char = ""
escape = False
for ch in value:
if escape:
result.append(ch)
escape = False
continue
if ch == "\\":
escape = True
result.append(ch)
continue
if ch in ("'", '"'):
if not in_quote:
in_quote = True
quote_char = ch
elif quote_char == ch:
in_quote = False
quote_char = ""
result.append(ch)
continue
if ch == "#" and not in_quote:
break
result.append(ch)
return "".join(result).rstrip()
def _unquote_value(value: str) -> str:
"""处理引号/原始字符串以及尾随逗号"""
trimmed = value.strip()
trimmed = _strip_inline_comment(trimmed)
trimmed = trimmed.rstrip(",").rstrip()
if not trimmed:
return trimmed
if len(trimmed) >= 2 and trimmed[0] in ("'", '"') and trimmed[-1] == trimmed[0]:
return trimmed[1:-1]
if (
len(trimmed) >= 3
and trimmed[0] in ("r", "R")
and trimmed[1] in ("'", '"')
and trimmed[-1] == trimmed[1]
):
return trimmed[2:-1]
return trimmed
def _parse_dotenv_line(line: str) -> tuple[str, str] | None:
"""解析 .env 文件中的单行"""
stripped = line.strip()
if not stripped or stripped.startswith("#"):
return None
if stripped.startswith("export "):
stripped = stripped[len("export ") :].strip()
if "=" not in stripped:
return None
key, value = stripped.split("=", 1)
key = key.strip()
value = _unquote_value(value)
return key, value
def _load_dotenv_values() -> dict:
"""从项目根目录的 .env 文件读取键值"""
root = Path(__file__).resolve().parents[1]
dotenv_path = root / ".env"
if not dotenv_path.exists():
return {}
values: dict[str, str] = {}
for line in dotenv_path.read_text(encoding="utf-8").splitlines():
parsed = _parse_dotenv_line(line)
if parsed:
key, value = parsed
values[key] = value
return values
def _apply_env_values(cfg: dict, source: dict):
for env_key, dotted in ENV_MAP.items():
val = os.environ.get(env_key)
val = source.get(env_key)
if val is None:
continue
v2 = _coerce_env(val)
for path in dotted:
if path == "run.tasks" and isinstance(v2, str):
v2 = [item.strip() for item in v2.split(",") if item.strip()]
_deep_set(cfg, path.split("."), v2)
def load_env_overrides(defaults: dict) -> dict:
cfg = deepcopy(defaults)
# 先读取 .env再读取真实环境变量确保 CLI 仍然最高优先级
_apply_env_values(cfg, _load_dotenv_values())
_apply_env_values(cfg, os.environ)
return cfg

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,889 @@
-- Data warehouse schema for the entertainment chain (ODS -> DWD -> DWS)
-- ASCII only to keep cross-platform friendly.
-- ---------- Schemas ----------
CREATE SCHEMA IF NOT EXISTS billiards_ods;
CREATE SCHEMA IF NOT EXISTS billiards_dwd;
CREATE SCHEMA IF NOT EXISTS billiards_dws;
-- ---------- ODS (raw, lightly typed) ----------
-- Each ODS table keeps the source payload for replay/debug while exposing key fields.
CREATE TABLE IF NOT EXISTS billiards_ods.ods_member_profile (
tenant_id BIGINT,
site_id BIGINT NOT NULL,
member_id BIGINT NOT NULL,
member_name TEXT,
nickname TEXT,
mobile TEXT,
gender TEXT,
birthday DATE,
register_time TIMESTAMPTZ,
member_type_id BIGINT,
member_type_name TEXT,
status TEXT,
balance NUMERIC(18,2),
points NUMERIC(18,2),
last_visit_time TIMESTAMPTZ,
wechat_id TEXT,
alipay_id TEXT,
member_card_no TEXT,
remarks TEXT,
source_file TEXT,
source_endpoint TEXT,
fetched_at TIMESTAMPTZ DEFAULT now(),
payload JSONB NOT NULL,
PRIMARY KEY (site_id, member_id)
);
CREATE TABLE IF NOT EXISTS billiards_ods.ods_member_card (
tenant_id BIGINT,
site_id BIGINT NOT NULL,
card_id BIGINT NOT NULL,
member_id BIGINT,
card_type_id BIGINT,
card_type_name TEXT,
card_balance NUMERIC(18,2),
discount_rate NUMERIC(8,4),
valid_start_date DATE,
valid_end_date DATE,
last_consume_time TIMESTAMPTZ,
status TEXT,
activate_time TIMESTAMPTZ,
deactivate_time TIMESTAMPTZ,
issuer_id BIGINT,
issuer_name TEXT,
source_file TEXT,
source_endpoint TEXT,
fetched_at TIMESTAMPTZ DEFAULT now(),
payload JSONB NOT NULL,
PRIMARY KEY (site_id, card_id)
);
CREATE TABLE IF NOT EXISTS billiards_ods.ods_balance_change (
tenant_id BIGINT,
site_id BIGINT NOT NULL,
change_id BIGINT NOT NULL,
member_id BIGINT,
change_amount NUMERIC(18,2),
balance_before NUMERIC(18,2),
balance_after NUMERIC(18,2),
change_type INT,
relate_id BIGINT,
pay_method INT,
remark TEXT,
operator_id BIGINT,
operator_name TEXT,
change_time TIMESTAMPTZ,
is_deleted BOOLEAN DEFAULT FALSE,
source_file TEXT,
source_endpoint TEXT,
fetched_at TIMESTAMPTZ DEFAULT now(),
payload JSONB NOT NULL,
PRIMARY KEY (site_id, change_id)
);
CREATE TABLE IF NOT EXISTS billiards_ods.ods_recharge_record (
tenant_id BIGINT,
site_id BIGINT NOT NULL,
recharge_id BIGINT NOT NULL,
member_id BIGINT,
recharge_amount NUMERIC(18,2),
gift_amount NUMERIC(18,2),
pay_method INT,
pay_trade_no TEXT,
order_trade_no TEXT,
recharge_time TIMESTAMPTZ,
status TEXT,
operator_id BIGINT,
operator_name TEXT,
source_file TEXT,
source_endpoint TEXT,
fetched_at TIMESTAMPTZ DEFAULT now(),
payload JSONB NOT NULL,
PRIMARY KEY (site_id, recharge_id)
);
CREATE TABLE IF NOT EXISTS billiards_ods.ods_product (
tenant_id BIGINT,
site_id BIGINT NOT NULL,
goods_id BIGINT NOT NULL,
goods_name TEXT,
goods_code TEXT,
category_id BIGINT,
category_name TEXT,
unit TEXT,
price NUMERIC(18,2),
status TEXT,
source_file TEXT,
source_endpoint TEXT,
fetched_at TIMESTAMPTZ DEFAULT now(),
payload JSONB NOT NULL,
PRIMARY KEY (site_id, goods_id)
);
CREATE TABLE IF NOT EXISTS billiards_ods.ods_store_product (
tenant_id BIGINT,
site_id BIGINT NOT NULL,
site_goods_id BIGINT NOT NULL,
goods_id BIGINT,
goods_name TEXT,
category_id BIGINT,
category_name TEXT,
sale_price NUMERIC(18,2),
cost_price NUMERIC(18,2),
status TEXT,
source_file TEXT,
source_endpoint TEXT,
fetched_at TIMESTAMPTZ DEFAULT now(),
payload JSONB NOT NULL,
PRIMARY KEY (site_id, site_goods_id)
);
CREATE TABLE IF NOT EXISTS billiards_ods.ods_store_sale_item (
tenant_id BIGINT,
site_id BIGINT NOT NULL,
sale_item_id BIGINT NOT NULL,
order_trade_no TEXT,
order_settle_id BIGINT,
goods_id BIGINT,
goods_name TEXT,
category_id BIGINT,
quantity NUMERIC(18,4),
original_amount NUMERIC(18,2),
discount_amount NUMERIC(18,2),
final_amount NUMERIC(18,2),
is_gift BOOLEAN DEFAULT FALSE,
sale_time TIMESTAMPTZ,
source_file TEXT,
source_endpoint TEXT,
fetched_at TIMESTAMPTZ DEFAULT now(),
payload JSONB NOT NULL,
PRIMARY KEY (site_id, sale_item_id)
);
CREATE TABLE IF NOT EXISTS billiards_ods.ods_table_info (
tenant_id BIGINT,
site_id BIGINT NOT NULL,
table_id BIGINT NOT NULL,
table_code TEXT,
table_name TEXT,
table_type TEXT,
area_name TEXT,
status TEXT,
created_time TIMESTAMPTZ,
updated_time TIMESTAMPTZ,
source_file TEXT,
source_endpoint TEXT,
fetched_at TIMESTAMPTZ DEFAULT now(),
payload JSONB NOT NULL,
PRIMARY KEY (site_id, table_id)
);
CREATE TABLE IF NOT EXISTS billiards_ods.ods_table_use_log (
tenant_id BIGINT,
site_id BIGINT NOT NULL,
ledger_id BIGINT NOT NULL,
table_id BIGINT,
order_trade_no TEXT,
order_settle_id BIGINT,
start_time TIMESTAMPTZ,
end_time TIMESTAMPTZ,
duration_minutes INT,
original_table_fee NUMERIC(18,2),
discount_amount NUMERIC(18,2),
final_table_fee NUMERIC(18,2),
member_id BIGINT,
status TEXT,
source_file TEXT,
source_endpoint TEXT,
fetched_at TIMESTAMPTZ DEFAULT now(),
payload JSONB NOT NULL,
PRIMARY KEY (site_id, ledger_id)
);
CREATE TABLE IF NOT EXISTS billiards_ods.ods_table_fee_adjust (
tenant_id BIGINT,
site_id BIGINT NOT NULL,
adjust_id BIGINT NOT NULL,
ledger_id BIGINT,
order_trade_no TEXT,
discount_amount NUMERIC(18,2),
reason TEXT,
operator_id BIGINT,
operator_name TEXT,
created_at TIMESTAMPTZ,
source_file TEXT,
source_endpoint TEXT,
fetched_at TIMESTAMPTZ DEFAULT now(),
payload JSONB NOT NULL,
PRIMARY KEY (site_id, adjust_id)
);
CREATE TABLE IF NOT EXISTS billiards_ods.ods_assistant_account (
tenant_id BIGINT,
site_id BIGINT NOT NULL,
assistant_id BIGINT NOT NULL,
assistant_name TEXT,
mobile TEXT,
team_id BIGINT,
team_name TEXT,
status TEXT,
hired_date DATE,
left_date DATE,
source_file TEXT,
source_endpoint TEXT,
fetched_at TIMESTAMPTZ DEFAULT now(),
payload JSONB NOT NULL,
PRIMARY KEY (site_id, assistant_id)
);
CREATE TABLE IF NOT EXISTS billiards_ods.ods_assistant_service_log (
tenant_id BIGINT,
site_id BIGINT NOT NULL,
ledger_id BIGINT NOT NULL,
assistant_id BIGINT,
service_type TEXT,
order_trade_no TEXT,
order_settle_id BIGINT,
start_time TIMESTAMPTZ,
end_time TIMESTAMPTZ,
duration_minutes INT,
original_fee NUMERIC(18,2),
discount_amount NUMERIC(18,2),
final_fee NUMERIC(18,2),
member_id BIGINT,
status TEXT,
source_file TEXT,
source_endpoint TEXT,
fetched_at TIMESTAMPTZ DEFAULT now(),
payload JSONB NOT NULL,
PRIMARY KEY (site_id, ledger_id)
);
CREATE TABLE IF NOT EXISTS billiards_ods.ods_assistant_cancel_log (
tenant_id BIGINT,
site_id BIGINT NOT NULL,
cancel_id BIGINT NOT NULL,
ledger_id BIGINT,
assistant_id BIGINT,
order_trade_no TEXT,
reason TEXT,
cancel_time TIMESTAMPTZ,
operator_id BIGINT,
operator_name TEXT,
source_file TEXT,
source_endpoint TEXT,
fetched_at TIMESTAMPTZ DEFAULT now(),
payload JSONB NOT NULL,
PRIMARY KEY (site_id, cancel_id)
);
CREATE TABLE IF NOT EXISTS billiards_ods.ods_group_package (
tenant_id BIGINT,
site_id BIGINT NOT NULL,
package_id BIGINT NOT NULL,
package_name TEXT,
platform_code TEXT,
status TEXT,
face_price NUMERIC(18,2),
settle_price NUMERIC(18,2),
valid_from DATE,
valid_to DATE,
source_file TEXT,
source_endpoint TEXT,
fetched_at TIMESTAMPTZ DEFAULT now(),
payload JSONB NOT NULL,
PRIMARY KEY (site_id, package_id)
);
CREATE TABLE IF NOT EXISTS billiards_ods.ods_group_package_log (
tenant_id BIGINT,
site_id BIGINT NOT NULL,
usage_id BIGINT NOT NULL,
package_id BIGINT,
coupon_id BIGINT,
order_trade_no TEXT,
order_settle_id BIGINT,
member_id BIGINT,
status TEXT,
used_time TIMESTAMPTZ,
deduct_amount NUMERIC(18,2),
settle_price NUMERIC(18,2),
source_file TEXT,
source_endpoint TEXT,
fetched_at TIMESTAMPTZ DEFAULT now(),
payload JSONB NOT NULL,
PRIMARY KEY (site_id, usage_id)
);
CREATE TABLE IF NOT EXISTS billiards_ods.ods_platform_coupon_log (
tenant_id BIGINT,
site_id BIGINT NOT NULL,
coupon_id BIGINT NOT NULL,
platform_code TEXT,
verify_code TEXT,
order_trade_no TEXT,
order_settle_id BIGINT,
member_id BIGINT,
status TEXT,
used_time TIMESTAMPTZ,
deduct_amount NUMERIC(18,2),
settle_price NUMERIC(18,2),
source_file TEXT,
source_endpoint TEXT,
fetched_at TIMESTAMPTZ DEFAULT now(),
payload JSONB NOT NULL,
PRIMARY KEY (site_id, coupon_id)
);
CREATE TABLE IF NOT EXISTS billiards_ods.ods_inventory_change (
tenant_id BIGINT,
site_id BIGINT NOT NULL,
change_id BIGINT NOT NULL,
site_goods_id BIGINT,
goods_id BIGINT,
change_amount NUMERIC(18,2),
before_stock NUMERIC(18,2),
after_stock NUMERIC(18,2),
change_type TEXT,
relate_id BIGINT,
remark TEXT,
operator_id BIGINT,
operator_name TEXT,
change_time TIMESTAMPTZ,
source_file TEXT,
source_endpoint TEXT,
fetched_at TIMESTAMPTZ DEFAULT now(),
payload JSONB NOT NULL,
PRIMARY KEY (site_id, change_id)
);
CREATE TABLE IF NOT EXISTS billiards_ods.ods_inventory_stock (
tenant_id BIGINT,
site_id BIGINT NOT NULL,
site_goods_id BIGINT NOT NULL,
goods_id BIGINT,
current_stock NUMERIC(18,2),
cost_price NUMERIC(18,2),
snapshot_key TEXT NOT NULL DEFAULT 'default',
source_file TEXT,
source_endpoint TEXT,
fetched_at TIMESTAMPTZ DEFAULT now(),
payload JSONB NOT NULL,
PRIMARY KEY (site_id, site_goods_id, snapshot_key)
);
CREATE TABLE IF NOT EXISTS billiards_ods.ods_order_settle (
tenant_id BIGINT,
site_id BIGINT NOT NULL,
order_settle_id BIGINT NOT NULL,
settle_relate_id BIGINT,
settle_name TEXT,
settle_type INT,
settle_status INT,
member_id BIGINT,
member_phone TEXT,
table_id BIGINT,
consume_money NUMERIC(18,2),
table_charge_money NUMERIC(18,2),
goods_money NUMERIC(18,2),
service_money NUMERIC(18,2),
assistant_pd_money NUMERIC(18,2),
assistant_cx_money NUMERIC(18,2),
pay_amount NUMERIC(18,2),
coupon_amount NUMERIC(18,2),
card_amount NUMERIC(18,2),
balance_amount NUMERIC(18,2),
refund_amount NUMERIC(18,2),
prepay_money NUMERIC(18,2),
adjust_amount NUMERIC(18,2),
rounding_amount NUMERIC(18,2),
payment_method INT,
create_time TIMESTAMPTZ,
pay_time TIMESTAMPTZ,
operator_id BIGINT,
operator_name TEXT,
source_file TEXT,
source_endpoint TEXT,
fetched_at TIMESTAMPTZ DEFAULT now(),
payload JSONB NOT NULL,
PRIMARY KEY (site_id, order_settle_id)
);
CREATE TABLE IF NOT EXISTS billiards_ods.ods_goods_category (
tenant_id BIGINT,
site_id BIGINT NOT NULL,
category_id BIGINT NOT NULL,
category_name TEXT,
parent_id BIGINT,
level_no INT,
status TEXT,
remark TEXT,
source_file TEXT,
source_endpoint TEXT,
fetched_at TIMESTAMPTZ DEFAULT now(),
payload JSONB NOT NULL,
PRIMARY KEY (site_id, category_id)
);
CREATE TABLE IF NOT EXISTS billiards_ods.ods_payment_record (
tenant_id BIGINT,
site_id BIGINT NOT NULL,
pay_id BIGINT NOT NULL,
order_trade_no TEXT,
order_settle_id BIGINT,
member_id BIGINT,
pay_method_code TEXT,
pay_method_name TEXT,
pay_amount NUMERIC(18,2),
pay_time TIMESTAMPTZ,
relate_type TEXT,
relate_id BIGINT,
source_file TEXT,
source_endpoint TEXT,
fetched_at TIMESTAMPTZ DEFAULT now(),
payload JSONB NOT NULL,
PRIMARY KEY (site_id, pay_id)
);
CREATE TABLE IF NOT EXISTS billiards_ods.ods_refund_record (
tenant_id BIGINT,
site_id BIGINT NOT NULL,
refund_id BIGINT NOT NULL,
order_trade_no TEXT,
order_settle_id BIGINT,
member_id BIGINT,
pay_method_code TEXT,
refund_amount NUMERIC(18,2),
refund_time TIMESTAMPTZ,
status TEXT,
source_file TEXT,
source_endpoint TEXT,
fetched_at TIMESTAMPTZ DEFAULT now(),
payload JSONB NOT NULL,
PRIMARY KEY (site_id, refund_id)
);
CREATE TABLE IF NOT EXISTS billiards_ods.ods_order_receipt_detail (
tenant_id BIGINT,
site_id BIGINT NOT NULL,
order_settle_id BIGINT NOT NULL,
order_trade_no TEXT,
receipt_no TEXT,
receipt_time TIMESTAMPTZ,
total_amount NUMERIC(18,2),
discount_amount NUMERIC(18,2),
final_amount NUMERIC(18,2),
member_id BIGINT,
snapshot_raw JSONB,
source_file TEXT,
source_endpoint TEXT,
fetched_at TIMESTAMPTZ DEFAULT now(),
payload JSONB NOT NULL,
PRIMARY KEY (site_id, order_settle_id)
);
-- ---------- DWD Dimensions ----------
CREATE TABLE IF NOT EXISTS billiards_dwd.dim_tenant (
tenant_id BIGINT PRIMARY KEY,
tenant_name TEXT,
short_name TEXT,
status TEXT,
created_at TIMESTAMPTZ DEFAULT now(),
updated_at TIMESTAMPTZ DEFAULT now()
);
CREATE TABLE IF NOT EXISTS billiards_dwd.dim_site (
site_id BIGINT PRIMARY KEY,
tenant_id BIGINT,
site_code TEXT,
site_name TEXT,
city TEXT,
region TEXT,
status TEXT,
open_date DATE,
close_date DATE,
created_at TIMESTAMPTZ DEFAULT now(),
updated_at TIMESTAMPTZ DEFAULT now()
);
CREATE TABLE IF NOT EXISTS billiards_dwd.dim_member_card_type (
card_type_id BIGINT PRIMARY KEY,
card_type_name TEXT,
discount_rate NUMERIC(8,4),
description TEXT,
remark TEXT
);
CREATE TABLE IF NOT EXISTS billiards_dwd.dim_product_category (
category_id BIGINT PRIMARY KEY,
category_name TEXT,
parent_id BIGINT,
level_no INT,
status TEXT,
remark TEXT
);
CREATE TABLE IF NOT EXISTS billiards_dwd.dim_product (
goods_id BIGINT PRIMARY KEY,
goods_name TEXT,
goods_code TEXT,
category_id BIGINT REFERENCES billiards_dwd.dim_product_category (category_id),
category_name TEXT,
unit TEXT,
default_price NUMERIC(18,2),
status TEXT,
updated_at TIMESTAMPTZ DEFAULT now()
);
CREATE TABLE IF NOT EXISTS billiards_dwd.dim_table (
table_id BIGINT PRIMARY KEY,
site_id BIGINT,
table_code TEXT,
table_name TEXT,
table_type TEXT,
area_name TEXT,
status TEXT,
created_time TIMESTAMPTZ,
updated_time TIMESTAMPTZ
);
CREATE TABLE IF NOT EXISTS billiards_dwd.dim_assistant_team (
team_id BIGINT PRIMARY KEY,
team_name TEXT,
remark TEXT,
updated_at TIMESTAMPTZ DEFAULT now()
);
CREATE TABLE IF NOT EXISTS billiards_dwd.dim_assistant (
assistant_id BIGINT PRIMARY KEY,
assistant_name TEXT,
mobile TEXT,
team_id BIGINT REFERENCES billiards_dwd.dim_assistant_team (team_id),
status TEXT,
hired_date DATE,
left_date DATE,
updated_at TIMESTAMPTZ DEFAULT now()
);
CREATE TABLE IF NOT EXISTS billiards_dwd.dim_pay_method (
pay_method_code TEXT PRIMARY KEY,
pay_method_name TEXT,
is_stored_value BOOLEAN DEFAULT FALSE,
status TEXT,
updated_at TIMESTAMPTZ DEFAULT now()
);
CREATE TABLE IF NOT EXISTS billiards_dwd.dim_order_assist_type (
assist_type_code TEXT PRIMARY KEY,
assist_type_name TEXT,
description TEXT
);
CREATE TABLE IF NOT EXISTS billiards_dwd.dim_coupon_platform (
platform_code TEXT PRIMARY KEY,
platform_name TEXT,
description TEXT
);
CREATE TABLE IF NOT EXISTS billiards_dwd.dim_date (
date_key DATE PRIMARY KEY,
year_no INT,
month_no INT,
day_no INT,
week_no INT,
day_of_week INT,
month_name TEXT
);
CREATE TABLE IF NOT EXISTS billiards_dwd.dim_member (
site_id BIGINT,
member_id BIGINT,
tenant_id BIGINT,
member_name TEXT,
nickname TEXT,
gender TEXT,
birthday DATE,
mobile TEXT,
member_type_id BIGINT REFERENCES billiards_dwd.dim_member_card_type (card_type_id),
member_type_name TEXT,
status TEXT,
register_time TIMESTAMPTZ,
valid_from DATE,
valid_to DATE,
last_visit_time TIMESTAMPTZ,
balance NUMERIC(18,2),
total_recharge_amount NUMERIC(18,2),
total_consumed_amount NUMERIC(18,2),
wechat_id TEXT,
alipay_id TEXT,
remark TEXT,
updated_at TIMESTAMPTZ DEFAULT now(),
PRIMARY KEY (site_id, member_id)
);
-- ---------- DWD Facts ----------
CREATE TABLE IF NOT EXISTS billiards_dwd.fact_sale_item (
site_id BIGINT NOT NULL,
sale_item_id BIGINT NOT NULL,
order_trade_no TEXT,
order_settle_id BIGINT,
member_id BIGINT,
goods_id BIGINT REFERENCES billiards_dwd.dim_product (goods_id),
category_id BIGINT REFERENCES billiards_dwd.dim_product_category (category_id),
quantity NUMERIC(18,4),
original_amount NUMERIC(18,2),
discount_amount NUMERIC(18,2),
final_amount NUMERIC(18,2),
is_gift BOOLEAN DEFAULT FALSE,
sale_time TIMESTAMPTZ,
created_at TIMESTAMPTZ DEFAULT now(),
updated_at TIMESTAMPTZ DEFAULT now(),
PRIMARY KEY (site_id, sale_item_id)
);
CREATE TABLE IF NOT EXISTS billiards_dwd.fact_table_usage (
site_id BIGINT NOT NULL,
ledger_id BIGINT NOT NULL,
order_trade_no TEXT,
order_settle_id BIGINT,
table_id BIGINT REFERENCES billiards_dwd.dim_table (table_id),
member_id BIGINT,
start_time TIMESTAMPTZ,
end_time TIMESTAMPTZ,
duration_minutes INT,
original_table_fee NUMERIC(18,2),
member_discount_amount NUMERIC(18,2),
manual_discount_amount NUMERIC(18,2),
final_table_fee NUMERIC(18,2),
is_canceled BOOLEAN DEFAULT FALSE,
cancel_time TIMESTAMPTZ,
created_at TIMESTAMPTZ DEFAULT now(),
updated_at TIMESTAMPTZ DEFAULT now(),
PRIMARY KEY (site_id, ledger_id)
);
CREATE TABLE IF NOT EXISTS billiards_dwd.fact_assistant_service (
site_id BIGINT NOT NULL,
ledger_id BIGINT NOT NULL,
order_trade_no TEXT,
order_settle_id BIGINT,
assistant_id BIGINT REFERENCES billiards_dwd.dim_assistant (assistant_id),
assist_type_code TEXT REFERENCES billiards_dwd.dim_order_assist_type (assist_type_code),
member_id BIGINT,
start_time TIMESTAMPTZ,
end_time TIMESTAMPTZ,
duration_minutes INT,
original_fee NUMERIC(18,2),
member_discount_amount NUMERIC(18,2),
manual_discount_amount NUMERIC(18,2),
final_fee NUMERIC(18,2),
is_canceled BOOLEAN DEFAULT FALSE,
cancel_time TIMESTAMPTZ,
created_at TIMESTAMPTZ DEFAULT now(),
updated_at TIMESTAMPTZ DEFAULT now(),
PRIMARY KEY (site_id, ledger_id)
);
CREATE TABLE IF NOT EXISTS billiards_dwd.fact_coupon_usage (
site_id BIGINT NOT NULL,
coupon_id BIGINT NOT NULL,
package_id BIGINT,
order_trade_no TEXT,
order_settle_id BIGINT,
member_id BIGINT,
platform_code TEXT REFERENCES billiards_dwd.dim_coupon_platform (platform_code),
status TEXT,
deduct_amount NUMERIC(18,2),
settle_price NUMERIC(18,2),
used_time TIMESTAMPTZ,
created_at TIMESTAMPTZ DEFAULT now(),
updated_at TIMESTAMPTZ DEFAULT now(),
PRIMARY KEY (site_id, coupon_id)
);
CREATE TABLE IF NOT EXISTS billiards_dwd.fact_payment (
site_id BIGINT NOT NULL,
pay_id BIGINT NOT NULL,
order_trade_no TEXT,
order_settle_id BIGINT,
member_id BIGINT,
pay_method_code TEXT REFERENCES billiards_dwd.dim_pay_method (pay_method_code),
pay_amount NUMERIC(18,2),
pay_time TIMESTAMPTZ,
relate_type TEXT,
relate_id BIGINT,
created_at TIMESTAMPTZ DEFAULT now(),
updated_at TIMESTAMPTZ DEFAULT now(),
PRIMARY KEY (site_id, pay_id)
);
CREATE TABLE IF NOT EXISTS billiards_dwd.fact_refund (
site_id BIGINT NOT NULL,
refund_id BIGINT NOT NULL,
order_trade_no TEXT,
order_settle_id BIGINT,
member_id BIGINT,
pay_method_code TEXT REFERENCES billiards_dwd.dim_pay_method (pay_method_code),
refund_amount NUMERIC(18,2),
refund_time TIMESTAMPTZ,
status TEXT,
created_at TIMESTAMPTZ DEFAULT now(),
updated_at TIMESTAMPTZ DEFAULT now(),
PRIMARY KEY (site_id, refund_id)
);
CREATE TABLE IF NOT EXISTS billiards_dwd.fact_balance_change (
site_id BIGINT NOT NULL,
change_id BIGINT NOT NULL,
member_id BIGINT,
change_type INT,
relate_type TEXT,
relate_id BIGINT,
pay_method_code TEXT REFERENCES billiards_dwd.dim_pay_method (pay_method_code),
change_amount NUMERIC(18,2),
balance_before NUMERIC(18,2),
balance_after NUMERIC(18,2),
change_time TIMESTAMPTZ,
created_at TIMESTAMPTZ DEFAULT now(),
updated_at TIMESTAMPTZ DEFAULT now(),
PRIMARY KEY (site_id, change_id)
);
-- ---------- DWS (serving layers) ----------
CREATE TABLE IF NOT EXISTS billiards_dws.dws_order_summary (
site_id BIGINT NOT NULL,
order_settle_id BIGINT NOT NULL,
order_trade_no TEXT,
order_date DATE,
tenant_id BIGINT,
member_id BIGINT,
member_flag BOOLEAN DEFAULT FALSE,
recharge_order_flag BOOLEAN DEFAULT FALSE,
item_count INT,
total_item_quantity NUMERIC(18,4),
table_fee_amount NUMERIC(18,2),
assistant_service_amount NUMERIC(18,2),
goods_amount NUMERIC(18,2),
group_amount NUMERIC(18,2),
total_coupon_deduction NUMERIC(18,2),
member_discount_amount NUMERIC(18,2),
manual_discount_amount NUMERIC(18,2),
order_original_amount NUMERIC(18,2),
order_final_amount NUMERIC(18,2),
stored_card_deduct NUMERIC(18,2),
external_paid_amount NUMERIC(18,2),
total_paid_amount NUMERIC(18,2),
book_table_flow NUMERIC(18,2),
book_assistant_flow NUMERIC(18,2),
book_goods_flow NUMERIC(18,2),
book_group_flow NUMERIC(18,2),
book_order_flow NUMERIC(18,2),
order_effective_consume_cash NUMERIC(18,2),
order_effective_recharge_cash NUMERIC(18,2),
order_effective_flow NUMERIC(18,2),
refund_amount NUMERIC(18,2),
net_income NUMERIC(18,2),
created_at TIMESTAMPTZ DEFAULT now(),
updated_at TIMESTAMPTZ DEFAULT now(),
PRIMARY KEY (site_id, order_settle_id)
);
-- ---------- etl_admin (scheduler, cursor, run tracking) ----------
CREATE SCHEMA IF NOT EXISTS etl_admin;
CREATE TABLE IF NOT EXISTS etl_admin.etl_task (
task_id BIGSERIAL PRIMARY KEY,
task_code TEXT NOT NULL,
store_id BIGINT NOT NULL,
enabled BOOLEAN DEFAULT TRUE,
cursor_field TEXT,
window_minutes_default INT DEFAULT 30,
overlap_seconds INT DEFAULT 120,
page_size INT DEFAULT 200,
retry_max INT DEFAULT 3,
params JSONB DEFAULT '{}'::jsonb,
created_at TIMESTAMPTZ DEFAULT now(),
updated_at TIMESTAMPTZ DEFAULT now(),
UNIQUE (task_code, store_id)
);
CREATE TABLE IF NOT EXISTS etl_admin.etl_cursor (
cursor_id BIGSERIAL PRIMARY KEY,
task_id BIGINT NOT NULL REFERENCES etl_admin.etl_task(task_id) ON DELETE CASCADE,
store_id BIGINT NOT NULL,
last_start TIMESTAMPTZ,
last_end TIMESTAMPTZ,
last_id BIGINT,
last_run_id BIGINT,
extra JSONB DEFAULT '{}'::jsonb,
created_at TIMESTAMPTZ DEFAULT now(),
updated_at TIMESTAMPTZ DEFAULT now(),
UNIQUE (task_id, store_id)
);
CREATE TABLE IF NOT EXISTS etl_admin.etl_run (
run_id BIGSERIAL PRIMARY KEY,
run_uuid TEXT NOT NULL,
task_id BIGINT NOT NULL REFERENCES etl_admin.etl_task(task_id) ON DELETE CASCADE,
store_id BIGINT NOT NULL,
status TEXT NOT NULL,
started_at TIMESTAMPTZ DEFAULT now(),
ended_at TIMESTAMPTZ,
window_start TIMESTAMPTZ,
window_end TIMESTAMPTZ,
window_minutes INT,
overlap_seconds INT,
fetched_count INT DEFAULT 0,
loaded_count INT DEFAULT 0,
updated_count INT DEFAULT 0,
skipped_count INT DEFAULT 0,
error_count INT DEFAULT 0,
unknown_fields INT DEFAULT 0,
export_dir TEXT,
log_path TEXT,
request_params JSONB DEFAULT '{}'::jsonb,
manifest JSONB DEFAULT '{}'::jsonb,
error_message TEXT,
extra JSONB DEFAULT '{}'::jsonb
);
-- Default task registry seed (idempotent)
INSERT INTO etl_admin.etl_task (task_code, store_id, enabled)
VALUES
('PRODUCTS', 2790685415443269, TRUE),
('TABLES', 2790685415443269, TRUE),
('MEMBERS', 2790685415443269, TRUE),
('ASSISTANTS', 2790685415443269, TRUE),
('PACKAGES_DEF', 2790685415443269, TRUE),
('ORDERS', 2790685415443269, TRUE),
('PAYMENTS', 2790685415443269, TRUE),
('REFUNDS', 2790685415443269, TRUE),
('COUPON_USAGE', 2790685415443269, TRUE),
('INVENTORY_CHANGE', 2790685415443269, TRUE),
('TOPUPS', 2790685415443269, TRUE),
('TABLE_DISCOUNT', 2790685415443269, TRUE),
('ASSISTANT_ABOLISH', 2790685415443269, TRUE),
('LEDGER', 2790685415443269, TRUE),
('TICKET_DWD', 2790685415443269, TRUE),
('PAYMENTS_DWD', 2790685415443269, TRUE),
('MEMBERS_DWD', 2790685415443269, TRUE),
('MANUAL_INGEST', 2790685415443269, TRUE),
('ODS_ORDER_SETTLE', 2790685415443269, TRUE),
('ODS_TABLE_USE', 2790685415443269, TRUE),
('ODS_ASSISTANT_LEDGER', 2790685415443269, TRUE),
('ODS_ASSISTANT_ABOLISH', 2790685415443269, TRUE),
('ODS_GOODS_LEDGER', 2790685415443269, TRUE),
('ODS_PAYMENT', 2790685415443269, TRUE),
('ODS_REFUND', 2790685415443269, TRUE),
('ODS_COUPON_VERIFY', 2790685415443269, TRUE),
('ODS_MEMBER', 2790685415443269, TRUE),
('ODS_MEMBER_CARD', 2790685415443269, TRUE),
('ODS_PACKAGE', 2790685415443269, TRUE),
('ODS_INVENTORY_STOCK', 2790685415443269, TRUE),
('ODS_INVENTORY_CHANGE', 2790685415443269, TRUE)
ON CONFLICT (task_code, store_id) DO NOTHING;

View File

@@ -3,7 +3,7 @@
import json
from .base_task import BaseTask
from .base_task import BaseTask, TaskContext
from loaders.facts.assistant_abolish import AssistantAbolishLoader
from models.parsers import TypeParser
@@ -14,54 +14,54 @@ class AssistantAbolishTask(BaseTask):
def get_task_code(self) -> str:
return "ASSISTANT_ABOLISH"
def execute(self) -> dict:
self.logger.info("开始执行 ASSISTANT_ABOLISH 任务")
window_start, window_end, _ = self._get_time_window()
params = {
"storeId": self.config.get("app.store_id"),
"startTime": TypeParser.format_timestamp(window_start, self.tz),
"endTime": TypeParser.format_timestamp(window_end, self.tz),
def extract(self, context: TaskContext) -> dict:
params = self._merge_common_params(
{
"siteId": context.store_id,
"startTime": TypeParser.format_timestamp(context.window_start, self.tz),
"endTime": TypeParser.format_timestamp(context.window_end, self.tz),
}
)
records, _ = self.api.get_paginated(
endpoint="/AssistantPerformance/GetAbolitionAssistant",
params=params,
page_size=self.config.get("api.page_size", 200),
data_path=("data",),
list_key="abolitionAssistants",
)
return {"records": records}
def transform(self, extracted: dict, context: TaskContext) -> dict:
parsed, skipped = [], 0
for raw in extracted.get("records", []):
mapped = self._parse_record(raw, context.store_id)
if mapped:
parsed.append(mapped)
else:
skipped += 1
return {
"records": parsed,
"fetched": len(extracted.get("records", [])),
"skipped": skipped,
}
try:
records, _ = self.api.get_paginated(
endpoint="/Assistant/AbolishList",
params=params,
page_size=self.config.get("api.page_size", 200),
data_path=("data", "abolitionAssistants"),
)
def load(self, transformed: dict, context: TaskContext) -> dict:
loader = AssistantAbolishLoader(self.db)
inserted, updated, loader_skipped = loader.upsert_records(transformed["records"])
return {
"fetched": transformed["fetched"],
"inserted": inserted,
"updated": updated,
"skipped": transformed["skipped"] + loader_skipped,
"errors": 0,
}
parsed = []
for raw in records:
mapped = self._parse_record(raw)
if mapped:
parsed.append(mapped)
loader = AssistantAbolishLoader(self.db)
inserted, updated, skipped = loader.upsert_records(parsed)
self.db.commit()
counts = {
"fetched": len(records),
"inserted": inserted,
"updated": updated,
"skipped": skipped,
"errors": 0,
}
self.logger.info(f"ASSISTANT_ABOLISH 完成: {counts}")
return self._build_result("SUCCESS", counts)
except Exception:
self.db.rollback()
self.logger.error("ASSISTANT_ABOLISH 失败", exc_info=True)
raise
def _parse_record(self, raw: dict) -> dict | None:
def _parse_record(self, raw: dict, store_id: int) -> dict | None:
abolish_id = TypeParser.parse_int(raw.get("id"))
if not abolish_id:
self.logger.warning("跳过缺少 id 的助教作废记录: %s", raw)
self.logger.warning("跳过缺少作废ID的记录: %s", raw)
return None
store_id = self.config.get("app.store_id")
return {
"store_id": store_id,
"abolish_id": abolish_id,
@@ -72,9 +72,7 @@ class AssistantAbolishTask(BaseTask):
"assistant_no": raw.get("assistantOn"),
"assistant_name": raw.get("assistantName"),
"charge_minutes": TypeParser.parse_int(raw.get("pdChargeMinutes")),
"abolish_amount": TypeParser.parse_decimal(
raw.get("assistantAbolishAmount")
),
"abolish_amount": TypeParser.parse_decimal(raw.get("assistantAbolishAmount")),
"create_time": TypeParser.parse_timestamp(
raw.get("createTime") or raw.get("create_time"), self.tz
),

View File

@@ -3,7 +3,7 @@
import json
from .base_task import BaseTask
from .base_task import BaseTask, TaskContext
from loaders.dimensions.assistant import AssistantLoader
from models.parsers import TypeParser
@@ -14,49 +14,48 @@ class AssistantsTask(BaseTask):
def get_task_code(self) -> str:
return "ASSISTANTS"
def execute(self) -> dict:
self.logger.info("开始执行 ASSISTANTS 任务")
params = {"storeId": self.config.get("app.store_id")}
def extract(self, context: TaskContext) -> dict:
params = self._merge_common_params({"siteId": context.store_id})
records, _ = self.api.get_paginated(
endpoint="/PersonnelManagement/SearchAssistantInfo",
params=params,
page_size=self.config.get("api.page_size", 200),
data_path=("data",),
list_key="assistantInfos",
)
return {"records": records}
try:
records, _ = self.api.get_paginated(
endpoint="/Assistant/List",
params=params,
page_size=self.config.get("api.page_size", 200),
data_path=("data", "assistantInfos"),
)
def transform(self, extracted: dict, context: TaskContext) -> dict:
parsed, skipped = [], 0
for raw in extracted.get("records", []):
mapped = self._parse_assistant(raw, context.store_id)
if mapped:
parsed.append(mapped)
else:
skipped += 1
return {
"records": parsed,
"fetched": len(extracted.get("records", [])),
"skipped": skipped,
}
parsed = []
for raw in records:
mapped = self._parse_assistant(raw)
if mapped:
parsed.append(mapped)
def load(self, transformed: dict, context: TaskContext) -> dict:
loader = AssistantLoader(self.db)
inserted, updated, loader_skipped = loader.upsert_assistants(transformed["records"])
return {
"fetched": transformed["fetched"],
"inserted": inserted,
"updated": updated,
"skipped": transformed["skipped"] + loader_skipped,
"errors": 0,
}
loader = AssistantLoader(self.db)
inserted, updated, skipped = loader.upsert_assistants(parsed)
self.db.commit()
counts = {
"fetched": len(records),
"inserted": inserted,
"updated": updated,
"skipped": skipped,
"errors": 0,
}
self.logger.info(f"ASSISTANTS 完成: {counts}")
return self._build_result("SUCCESS", counts)
except Exception:
self.db.rollback()
self.logger.error("ASSISTANTS 失败", exc_info=True)
raise
def _parse_assistant(self, raw: dict) -> dict | None:
def _parse_assistant(self, raw: dict, store_id: int) -> dict | None:
assistant_id = TypeParser.parse_int(raw.get("id"))
if not assistant_id:
self.logger.warning("跳过缺少 id 的助教数据: %s", raw)
self.logger.warning("跳过缺少助教ID的数据: %s", raw)
return None
store_id = self.config.get("app.store_id")
return {
"store_id": store_id,
"assistant_id": assistant_id,

View File

@@ -1,62 +1,141 @@
# -*- coding: utf-8 -*-
"""ETL任务基类"""
"""ETL任务基类(引入 Extract/Transform/Load 模板方法)"""
from __future__ import annotations
from dataclasses import dataclass
from datetime import datetime, timedelta
from zoneinfo import ZoneInfo
@dataclass(frozen=True)
class TaskContext:
"""统一透传给 Extract/Transform/Load 的运行期信息。"""
store_id: int
window_start: datetime
window_end: datetime
window_minutes: int
cursor: dict | None = None
class BaseTask:
"""ETL任务基类"""
"""提供 E/T/L 模板的任务基类"""
def __init__(self, config, db_connection, api_client, logger):
self.config = config
self.db = db_connection
self.api = api_client
self.logger = logger
self.tz = ZoneInfo(config.get("app.timezone", "Asia/Taipei"))
# ------------------------------------------------------------------ 基本信息
def get_task_code(self) -> str:
"""获取任务代码"""
raise NotImplementedError("子类需实现 get_task_code 方法")
def execute(self) -> dict:
"""执行任务"""
raise NotImplementedError("子类需实现 execute 方法")
# ------------------------------------------------------------------ E/T/L 钩子
def extract(self, context: TaskContext):
"""提取数据"""
raise NotImplementedError("子类需实现 extract 方法")
def transform(self, extracted, context: TaskContext):
"""转换数据"""
return extracted
def load(self, transformed, context: TaskContext) -> dict:
"""加载数据并返回统计信息"""
raise NotImplementedError("子类需实现 load 方法")
# ------------------------------------------------------------------ 主流程
def execute(self, cursor_data: dict | None = None) -> dict:
"""统一 orchestrate Extract → Transform → Load"""
context = self._build_context(cursor_data)
task_code = self.get_task_code()
self.logger.info(
"%s: 开始执行,窗口[%s ~ %s]",
task_code,
context.window_start,
context.window_end,
)
try:
extracted = self.extract(context)
transformed = self.transform(extracted, context)
counts = self.load(transformed, context) or {}
self.db.commit()
except Exception:
self.db.rollback()
self.logger.error("%s: 执行失败", task_code, exc_info=True)
raise
result = self._build_result("SUCCESS", counts)
result["window"] = {
"start": context.window_start,
"end": context.window_end,
"minutes": context.window_minutes,
}
self.logger.info("%s: 完成,统计=%s", task_code, result["counts"])
return result
# ------------------------------------------------------------------ 辅助方法
def _build_context(self, cursor_data: dict | None) -> TaskContext:
window_start, window_end, window_minutes = self._get_time_window(cursor_data)
return TaskContext(
store_id=self.config.get("app.store_id"),
window_start=window_start,
window_end=window_end,
window_minutes=window_minutes,
cursor=cursor_data,
)
def _get_time_window(self, cursor_data: dict = None) -> tuple:
"""计算时间窗口"""
now = datetime.now(self.tz)
# 判断是否在闲时窗口
idle_start = self.config.get("run.idle_window.start", "04:00")
idle_end = self.config.get("run.idle_window.end", "16:00")
is_idle = self._is_in_idle_window(now, idle_start, idle_end)
# 获取窗口大小
if is_idle:
window_minutes = self.config.get("run.window_minutes.default_idle", 180)
else:
window_minutes = self.config.get("run.window_minutes.default_busy", 30)
# 计算窗口
overlap_seconds = self.config.get("run.overlap_seconds", 120)
if cursor_data and cursor_data.get("last_end"):
window_start = cursor_data["last_end"] - timedelta(seconds=overlap_seconds)
else:
window_start = now - timedelta(minutes=window_minutes)
window_end = now
return window_start, window_end, window_minutes
def _is_in_idle_window(self, dt: datetime, start_time: str, end_time: str) -> bool:
"""判断是否在闲时窗口"""
current_time = dt.strftime("%H:%M")
return start_time <= current_time <= end_time
def _merge_common_params(self, base: dict) -> dict:
"""
合并全局/任务级参数池便于在配置中统一覆<E4B880>?/追加过滤条件。
支持:
- api.params 下的通用键<E794A8>?
- api.params.<task_code_lower> 下的任务级键<E7BAA7>?
"""
merged: dict = {}
common = self.config.get("api.params", {}) or {}
if isinstance(common, dict):
merged.update(common)
task_key = f"api.params.{self.get_task_code().lower()}"
scoped = self.config.get(task_key, {}) or {}
if isinstance(scoped, dict):
merged.update(scoped)
merged.update(base)
return merged
def _build_result(self, status: str, counts: dict) -> dict:
"""构建结果字典"""
return {
"status": status,
"counts": counts
}
return {"status": status, "counts": counts}

View File

@@ -3,65 +3,66 @@
import json
from .base_task import BaseTask
from .base_task import BaseTask, TaskContext
from loaders.facts.coupon_usage import CouponUsageLoader
from models.parsers import TypeParser
class CouponUsageTask(BaseTask):
"""同步平台券验/核销记录"""
"""同步平台券验/核销记录"""
def get_task_code(self) -> str:
return "COUPON_USAGE"
def execute(self) -> dict:
self.logger.info("开始执行 COUPON_USAGE 任务")
window_start, window_end, _ = self._get_time_window()
params = {
"storeId": self.config.get("app.store_id"),
"startTime": TypeParser.format_timestamp(window_start, self.tz),
"endTime": TypeParser.format_timestamp(window_end, self.tz),
def extract(self, context: TaskContext) -> dict:
params = self._merge_common_params(
{
"siteId": context.store_id,
"startTime": TypeParser.format_timestamp(context.window_start, self.tz),
"endTime": TypeParser.format_timestamp(context.window_end, self.tz),
}
)
records, _ = self.api.get_paginated(
endpoint="/Promotion/GetOfflineCouponConsumePageList",
params=params,
page_size=self.config.get("api.page_size", 200),
data_path=("data",),
)
return {"records": records}
def transform(self, extracted: dict, context: TaskContext) -> dict:
parsed, skipped = [], 0
for raw in extracted.get("records", []):
mapped = self._parse_usage(raw, context.store_id)
if mapped:
parsed.append(mapped)
else:
skipped += 1
return {
"records": parsed,
"fetched": len(extracted.get("records", [])),
"skipped": skipped,
}
try:
records, _ = self.api.get_paginated(
endpoint="/Coupon/UsageList",
params=params,
page_size=self.config.get("api.page_size", 200),
data_path=(),
)
def load(self, transformed: dict, context: TaskContext) -> dict:
loader = CouponUsageLoader(self.db)
inserted, updated, loader_skipped = loader.upsert_coupon_usage(
transformed["records"]
)
return {
"fetched": transformed["fetched"],
"inserted": inserted,
"updated": updated,
"skipped": transformed["skipped"] + loader_skipped,
"errors": 0,
}
parsed = []
for raw in records:
mapped = self._parse_usage(raw)
if mapped:
parsed.append(mapped)
loader = CouponUsageLoader(self.db)
inserted, updated, skipped = loader.upsert_coupon_usage(parsed)
self.db.commit()
counts = {
"fetched": len(records),
"inserted": inserted,
"updated": updated,
"skipped": skipped,
"errors": 0,
}
self.logger.info(f"COUPON_USAGE 完成: {counts}")
return self._build_result("SUCCESS", counts)
except Exception:
self.db.rollback()
self.logger.error("COUPON_USAGE 失败", exc_info=True)
raise
def _parse_usage(self, raw: dict) -> dict | None:
def _parse_usage(self, raw: dict, store_id: int) -> dict | None:
usage_id = TypeParser.parse_int(raw.get("id"))
if not usage_id:
self.logger.warning("跳过缺少 id 的券核销记录: %s", raw)
self.logger.warning("跳过缺少券核销ID的记录: %s", raw)
return None
store_id = self.config.get("app.store_id")
return {
"store_id": store_id,
"usage_id": usage_id,

View File

@@ -3,7 +3,7 @@
import json
from .base_task import BaseTask
from .base_task import BaseTask, TaskContext
from loaders.facts.inventory_change import InventoryChangeLoader
from models.parsers import TypeParser
@@ -14,56 +14,56 @@ class InventoryChangeTask(BaseTask):
def get_task_code(self) -> str:
return "INVENTORY_CHANGE"
def execute(self) -> dict:
self.logger.info("开始执行 INVENTORY_CHANGE 任务")
window_start, window_end, _ = self._get_time_window()
params = {
"storeId": self.config.get("app.store_id"),
"startTime": TypeParser.format_timestamp(window_start, self.tz),
"endTime": TypeParser.format_timestamp(window_end, self.tz),
def extract(self, context: TaskContext) -> dict:
params = self._merge_common_params(
{
"siteId": context.store_id,
"startTime": TypeParser.format_timestamp(context.window_start, self.tz),
"endTime": TypeParser.format_timestamp(context.window_end, self.tz),
}
)
records, _ = self.api.get_paginated(
endpoint="/GoodsStockManage/QueryGoodsOutboundReceipt",
params=params,
page_size=self.config.get("api.page_size", 200),
data_path=("data",),
list_key="queryDeliveryRecordsList",
)
return {"records": records}
def transform(self, extracted: dict, context: TaskContext) -> dict:
parsed, skipped = [], 0
for raw in extracted.get("records", []):
mapped = self._parse_change(raw, context.store_id)
if mapped:
parsed.append(mapped)
else:
skipped += 1
return {
"records": parsed,
"fetched": len(extracted.get("records", [])),
"skipped": skipped,
}
try:
records, _ = self.api.get_paginated(
endpoint="/Inventory/ChangeList",
params=params,
page_size=self.config.get("api.page_size", 200),
data_path=("data", "queryDeliveryRecordsList"),
)
def load(self, transformed: dict, context: TaskContext) -> dict:
loader = InventoryChangeLoader(self.db)
inserted, updated, loader_skipped = loader.upsert_changes(transformed["records"])
return {
"fetched": transformed["fetched"],
"inserted": inserted,
"updated": updated,
"skipped": transformed["skipped"] + loader_skipped,
"errors": 0,
}
parsed = []
for raw in records:
mapped = self._parse_change(raw)
if mapped:
parsed.append(mapped)
loader = InventoryChangeLoader(self.db)
inserted, updated, skipped = loader.upsert_changes(parsed)
self.db.commit()
counts = {
"fetched": len(records),
"inserted": inserted,
"updated": updated,
"skipped": skipped,
"errors": 0,
}
self.logger.info(f"INVENTORY_CHANGE 完成: {counts}")
return self._build_result("SUCCESS", counts)
except Exception:
self.db.rollback()
self.logger.error("INVENTORY_CHANGE 失败", exc_info=True)
raise
def _parse_change(self, raw: dict) -> dict | None:
def _parse_change(self, raw: dict, store_id: int) -> dict | None:
change_id = TypeParser.parse_int(
raw.get("siteGoodsStockId") or raw.get("site_goods_stock_id")
)
if not change_id:
self.logger.warning("跳过缺少变动 id 的库存记录: %s", raw)
self.logger.warning("跳过缺少库存变动ID的记录: %s", raw)
return None
store_id = self.config.get("app.store_id")
return {
"store_id": store_id,
"change_id": change_id,

View File

@@ -3,7 +3,7 @@
import json
from .base_task import BaseTask
from .base_task import BaseTask, TaskContext
from loaders.facts.assistant_ledger import AssistantLedgerLoader
from models.parsers import TypeParser
@@ -14,54 +14,54 @@ class LedgerTask(BaseTask):
def get_task_code(self) -> str:
return "LEDGER"
def execute(self) -> dict:
self.logger.info("开始执行 LEDGER 任务")
window_start, window_end, _ = self._get_time_window()
params = {
"storeId": self.config.get("app.store_id"),
"startTime": TypeParser.format_timestamp(window_start, self.tz),
"endTime": TypeParser.format_timestamp(window_end, self.tz),
def extract(self, context: TaskContext) -> dict:
params = self._merge_common_params(
{
"siteId": context.store_id,
"startTime": TypeParser.format_timestamp(context.window_start, self.tz),
"endTime": TypeParser.format_timestamp(context.window_end, self.tz),
}
)
records, _ = self.api.get_paginated(
endpoint="/AssistantPerformance/GetOrderAssistantDetails",
params=params,
page_size=self.config.get("api.page_size", 200),
data_path=("data",),
list_key="orderAssistantDetails",
)
return {"records": records}
def transform(self, extracted: dict, context: TaskContext) -> dict:
parsed, skipped = [], 0
for raw in extracted.get("records", []):
mapped = self._parse_ledger(raw, context.store_id)
if mapped:
parsed.append(mapped)
else:
skipped += 1
return {
"records": parsed,
"fetched": len(extracted.get("records", [])),
"skipped": skipped,
}
try:
records, _ = self.api.get_paginated(
endpoint="/Assistant/LedgerList",
params=params,
page_size=self.config.get("api.page_size", 200),
data_path=("data", "orderAssistantDetails"),
)
def load(self, transformed: dict, context: TaskContext) -> dict:
loader = AssistantLedgerLoader(self.db)
inserted, updated, loader_skipped = loader.upsert_ledgers(transformed["records"])
return {
"fetched": transformed["fetched"],
"inserted": inserted,
"updated": updated,
"skipped": transformed["skipped"] + loader_skipped,
"errors": 0,
}
parsed = []
for raw in records:
mapped = self._parse_ledger(raw)
if mapped:
parsed.append(mapped)
loader = AssistantLedgerLoader(self.db)
inserted, updated, skipped = loader.upsert_ledgers(parsed)
self.db.commit()
counts = {
"fetched": len(records),
"inserted": inserted,
"updated": updated,
"skipped": skipped,
"errors": 0,
}
self.logger.info(f"LEDGER 完成: {counts}")
return self._build_result("SUCCESS", counts)
except Exception:
self.db.rollback()
self.logger.error("LEDGER 失败", exc_info=True)
raise
def _parse_ledger(self, raw: dict) -> dict | None:
def _parse_ledger(self, raw: dict, store_id: int) -> dict | None:
ledger_id = TypeParser.parse_int(raw.get("id"))
if not ledger_id:
self.logger.warning("跳过缺少 id 的助教流水: %s", raw)
self.logger.warning("跳过缺少助教流水ID的记录: %s", raw)
return None
store_id = self.config.get("app.store_id")
return {
"store_id": store_id,
"ledger_id": ledger_id,
@@ -100,12 +100,8 @@ class LedgerTask(BaseTask):
"ledger_end_time": TypeParser.parse_timestamp(
raw.get("ledger_end_time"), self.tz
),
"start_use_time": TypeParser.parse_timestamp(
raw.get("start_use_time"), self.tz
),
"last_use_time": TypeParser.parse_timestamp(
raw.get("last_use_time"), self.tz
),
"start_use_time": TypeParser.parse_timestamp(raw.get("start_use_time"), self.tz),
"last_use_time": TypeParser.parse_timestamp(raw.get("last_use_time"), self.tz),
"income_seconds": TypeParser.parse_int(raw.get("income_seconds")),
"real_use_seconds": TypeParser.parse_int(raw.get("real_use_seconds")),
"is_trash": raw.get("is_trash"),

View File

@@ -1,176 +1,719 @@
# -*- coding: utf-8 -*-
import os
"""Manual ingestion task that replays archived JSON into ODS tables."""
from __future__ import annotations
import json
import os
from datetime import datetime
from typing import Iterable, Iterator
from .base_task import BaseTask
from loaders.ods.generic import GenericODSLoader
class ManualIngestTask(BaseTask):
"""
Task to ingest manually fetched JSON files from a directory into ODS tables.
Load archived API responses (tests/source-data-doc) into billiards_ods.* tables.
Used when upstream API is unavailable and we need to replay captured payloads.
"""
FILE_MAPPING = {
"小票详情": "billiards_ods.ods_ticket_detail",
"结账记录": "billiards_ods.ods_order_settle",
"支付记录": "billiards_ods.ods_payment",
"助教流水": "billiards_ods.ods_assistant_ledger",
"助教废除": "billiards_ods.ods_assistant_abolish",
"商品档案": "billiards_ods.ods_goods_ledger", # Note: This might be dim_product source, but mapping to ledger for now if it's sales
"库存变化": "billiards_ods.ods_inventory_change",
"会员档案": "billiards_ods.ods_member",
"充值记录": "billiards_ods.ods_member_card", # Approx
"团购套餐": "billiards_ods.ods_package_coupon",
"库存汇总": "billiards_ods.ods_inventory_stock"
}
FILE_MAPPING: list[tuple[tuple[str, ...], str]] = [
(("会员档案",), "billiards_ods.ods_member_profile"),
(("储值卡列表", "储值卡"), "billiards_ods.ods_member_card"),
(("充值记录",), "billiards_ods.ods_recharge_record"),
(("余额变动",), "billiards_ods.ods_balance_change"),
(("助教账号",), "billiards_ods.ods_assistant_account"),
(("助教流水",), "billiards_ods.ods_assistant_service_log"),
(("助教废除", "助教作废"), "billiards_ods.ods_assistant_cancel_log"),
(("台桌列表",), "billiards_ods.ods_table_info"),
(("台费流水",), "billiards_ods.ods_table_use_log"),
(("台费打折",), "billiards_ods.ods_table_fee_adjust"),
(("商品档案",), "billiards_ods.ods_store_product"),
(("门店商品销售", "销售记录"), "billiards_ods.ods_store_sale_item"),
(("团购套餐定义", "套餐定义"), "billiards_ods.ods_group_package"),
(("团购套餐使用", "套餐使用"), "billiards_ods.ods_group_package_log"),
(("平台验券", "验券记录"), "billiards_ods.ods_platform_coupon_log"),
(("库存汇总",), "billiards_ods.ods_inventory_stock"),
(("库存变化记录1",), "billiards_ods.ods_inventory_change"),
(("库存变化记录2", "分类配置"), "billiards_ods.ods_goods_category"),
(("结账记录",), "billiards_ods.ods_order_settle"),
(("小票详情", "小票明细", "票详"), "billiards_ods.ods_order_receipt_detail"),
(("支付记录",), "billiards_ods.ods_payment_record"),
(("退款记录",), "billiards_ods.ods_refund_record"),
]
WRAPPER_META_KEYS = {"code", "message", "msg", "success", "error", "status"}
def get_task_code(self) -> str:
return "MANUAL_INGEST"
def execute(self) -> dict:
self.logger.info("Starting Manual Ingest Task")
# Configurable directory, default to tests/testdata_json for now
data_dir = self.config.get("manual.data_dir", r"c:\dev\LLTQ\ETL\feiqiu-ETL\etl_billiards\tests\testdata_json")
data_dir = self.config.get(
"manual.data_dir",
r"c:\dev\LLTQ\ETL\feiqiu-ETL\etl_billiards\tests\testdata_json",
)
if not os.path.exists(data_dir):
self.logger.error(f"Data directory not found: {data_dir}")
self.logger.error("Data directory not found: %s", data_dir)
return {"status": "error", "message": "Directory not found"}
total_files = 0
total_rows = 0
counts = {"fetched": 0, "inserted": 0, "updated": 0, "skipped": 0, "errors": 0}
for filename in os.listdir(data_dir):
for filename in sorted(os.listdir(data_dir)):
if not filename.endswith(".json"):
continue
# Determine target table
target_table = None
for key, table in self.FILE_MAPPING.items():
if key in filename:
target_table = table
break
if not target_table:
self.logger.warning(f"No mapping found for file: {filename}, skipping.")
filepath = os.path.join(data_dir, filename)
try:
with open(filepath, "r", encoding="utf-8") as fh:
raw_entries = json.load(fh)
except Exception:
counts["errors"] += 1
self.logger.exception("Failed to read %s", filename)
continue
self.logger.info(f"Ingesting {filename} into {target_table}")
if not isinstance(raw_entries, list):
raw_entries = [raw_entries]
records = self._normalize_records(raw_entries)
if not records:
counts["skipped"] += 1
continue
target_table = self._match_by_filename(filename) or self._match_by_content(
records, raw_entries
)
if not target_table:
self.logger.warning("No mapping found for file: %s", filename)
counts["skipped"] += 1
continue
self.logger.info("Ingesting %s into %s", filename, target_table)
try:
with open(os.path.join(data_dir, filename), 'r', encoding='utf-8') as f:
data = json.load(f)
if not isinstance(data, list):
data = [data]
# Prepare rows for GenericODSLoader
# We need to adapt the data to what GenericODSLoader expects (or update it)
# GenericODSLoader expects dicts. It handles normalization.
# But we need to ensure the primary keys are present in the payload or extracted.
# The GenericODSLoader might need configuration for PK extraction if it's not standard.
# For now, let's assume the payload IS the row, and we wrap it.
# Actually, GenericODSLoader.upsert_rows expects the raw API result list.
# It calls _normalize_row.
# We need to make sure _normalize_row works for these files.
# Most files have 'id' or similar.
# Let's instantiate a loader for this table
# We need to know the PK for the table.
# This is usually defined in ODS_TASK_CLASSES but here we are dynamic.
# We might need a simpler loader or reuse GenericODSLoader with specific PK config.
# For simplicity, let's use a custom ingestion here that mimics GenericODSLoader but is file-aware.
rows_to_insert = []
for item in data:
# Extract Store ID (usually in siteProfile or data root)
store_id = self._extract_store_id(item) or self.config.get("app.store_id")
# Extract PK (id, orderSettleId, etc.)
pk_val = self._extract_pk(item, target_table)
if not pk_val:
# Try to find 'id' in the item
pk_val = item.get("id")
if not pk_val:
# Special case for Ticket Detail
if "ods_ticket_detail" in target_table:
pk_val = item.get("orderSettleId")
if not pk_val:
rows = []
for record in records:
site_id = self._extract_store_id(record) or self.config.get(
"app.store_id"
)
pk_value = self._extract_pk(record, target_table)
pk_tuple = self._ensure_tuple(pk_value)
if not all(value not in (None, "") for value in pk_tuple):
continue
row = {
"store_id": store_id,
"payload": json.dumps(item, ensure_ascii=False),
"site_id": site_id,
"payload": json.dumps(record, ensure_ascii=False),
"source_file": filename,
"fetched_at": datetime.now()
"fetched_at": datetime.now(),
}
# Add specific PK column
pk_col = self._get_pk_column(target_table)
row[pk_col] = pk_val
rows_to_insert.append(row)
for column, value in zip(
self._get_conflict_columns(target_table), pk_tuple
):
row[column] = value
self._enrich_row(row, record, target_table)
rows.append(row)
if rows_to_insert:
self._bulk_insert(target_table, rows_to_insert)
total_rows += len(rows_to_insert)
total_files += 1
if rows:
self._bulk_insert(target_table, rows)
counts["inserted"] += len(rows)
else:
counts["skipped"] += 1
counts["fetched"] += 1
except Exception as e:
self.logger.error(f"Error processing {filename}: {e}", exc_info=True)
except Exception:
counts["errors"] += 1
self.logger.exception("Error processing %s", filename)
self.db.rollback()
return {"status": "success", "files_processed": total_files, "rows_inserted": total_rows}
try:
self.db.commit()
except Exception:
self.db.rollback()
raise
def _extract_store_id(self, item):
# Try common paths
if "store_id" in item: return item["store_id"]
if "siteProfile" in item and "id" in item["siteProfile"]: return item["siteProfile"]["id"]
if "data" in item and "data" in item["data"] and "siteId" in item["data"]["data"]: return item["data"]["data"]["siteId"]
return self._build_result("SUCCESS", counts)
# ------------------------------------------------------------------ helpers
def _match_by_filename(self, filename: str) -> str | None:
for keywords, table in self.FILE_MAPPING:
if any(keyword and keyword in filename for keyword in keywords):
return table
return None
def _extract_pk(self, item, table):
# Helper to find PK based on table
def _match_by_content(
self, records: list[dict], raw_entries: list[dict]
) -> str | None:
"""
Map content to PRD ODS tables.
"""
sample_record = records[0] if records else None
wrapper = self._extract_sample(raw_entries)
data_node = wrapper.get("data") if isinstance(wrapper, dict) else None
data_keys = set(data_node.keys()) if isinstance(data_node, dict) else set()
record_keys = set(sample_record.keys()) if isinstance(sample_record, dict) else set()
# Data node based hints
if "tenantMemberInfos" in data_keys:
return "billiards_ods.ods_member_profile"
if "tenantMemberCards" in data_keys:
return "billiards_ods.ods_member_card"
if "queryDeliveryRecordsList" in data_keys:
return "billiards_ods.ods_inventory_change"
if "goodsStockA" in data_keys or "rangeStartStock" in data_keys:
return "billiards_ods.ods_inventory_stock"
if "goodsCategoryList" in data_keys:
return "billiards_ods.ods_goods_category"
if "orderAssistantDetails" in data_keys:
return "billiards_ods.ods_assistant_service_log"
if "abolitionAssistants" in data_keys:
return "billiards_ods.ods_assistant_cancel_log"
if "siteTableUseDetailsList" in data_keys:
return "billiards_ods.ods_table_use_log"
if "taiFeeAdjustInfos" in data_keys:
return "billiards_ods.ods_table_fee_adjust"
if "orderGoodsLedgers" in data_keys or "orderGoodsList" in data_keys:
return "billiards_ods.ods_store_sale_item"
if "tenantGoodsList" in data_keys:
return "billiards_ods.ods_store_product"
if "packageCouponList" in data_keys:
return "billiards_ods.ods_group_package"
if "settleList" in data_keys and "total" in data_keys:
return "billiards_ods.ods_order_settle"
# Record key based hints
if sample_record:
if {"pay_amount", "pay_status"} <= record_keys or {"payAmount", "payStatus"} <= record_keys:
return "billiards_ods.ods_payment_record"
if "refundAmount" in record_keys or "refund_amount" in record_keys:
return "billiards_ods.ods_refund_record"
if "orderSettleId" in record_keys or "order_settle_id" in record_keys:
return "billiards_ods.ods_order_receipt_detail"
if "coupon_channel" in record_keys or "groupPackageId" in record_keys:
return "billiards_ods.ods_platform_coupon_log"
if "packageId" in record_keys or "package_id" in record_keys:
return "billiards_ods.ods_group_package_log"
if "memberCardId" in record_keys or "cardId" in record_keys:
return "billiards_ods.ods_member_card"
if "memberId" in record_keys:
return "billiards_ods.ods_member_profile"
if "siteGoodsId" in record_keys and "currentStock" in record_keys:
return "billiards_ods.ods_inventory_stock"
if "goodsId" in record_keys:
return "billiards_ods.ods_product"
return None
def _extract_sample(self, payloads: Iterable[dict]) -> dict:
for item in payloads:
if isinstance(item, dict):
return item
return {}
def _normalize_records(self, payloads: list[dict]) -> list[dict]:
records: list[dict] = []
for payload in payloads:
records.extend(self._unwrap_payload(payload))
return records
def _unwrap_payload(self, payload) -> list[dict]:
if isinstance(payload, dict):
data_node = payload.get("data")
extra_keys = set(payload.keys()) - {"data"} - self.WRAPPER_META_KEYS
if isinstance(data_node, dict) and not extra_keys:
flattened: list[dict] = []
found_list = False
for value in data_node.values():
if isinstance(value, list):
flattened.extend(value)
found_list = True
if found_list:
return flattened
return [data_node]
return [payload]
if isinstance(payload, list):
flattened: list[dict] = []
for item in payload:
flattened.extend(self._unwrap_payload(item))
return flattened
return []
def _extract_store_id(self, item: dict):
"""Extract site_id from record/siteProfile wrappers."""
site_profile = item.get("siteProfile") or item.get("site_profile")
if isinstance(site_profile, dict) and site_profile.get("id"):
return site_profile["id"]
for key in ("site_id", "siteId", "register_site_id"):
if item.get(key):
return item[key]
data_node = item.get("data")
if isinstance(data_node, dict):
return data_node.get("siteId") or data_node.get("site_id")
return None
def _extract_pk(self, item: dict, table: str):
if "ods_order_receipt_detail" in table:
return item.get("orderSettleId") or item.get("order_settle_id") or item.get("id")
if "ods_order_settle" in table:
# Check for nested structure in some files
if "settleList" in item and "settleList" in item["settleList"]:
return item["settleList"]["settleList"].get("id")
settle = item.get("settleList") or item.get("settle") or item
if isinstance(settle, dict):
return settle.get("id") or settle.get("settleId") or item.get("id")
return item.get("id")
if "ods_payment_record" in table:
return item.get("payId") or item.get("id")
if "ods_refund_record" in table:
return item.get("refundId") or item.get("id")
if "ods_platform_coupon_log" in table:
return item.get("couponId") or item.get("id")
if "ods_assistant_service_log" in table or "ods_table_use_log" in table:
return item.get("ledgerId") or item.get("ledger_id") or item.get("id")
if "ods_assistant_cancel_log" in table:
return item.get("cancel_id") or item.get("cancelId") or item.get("abolishId") or item.get("id")
if "ods_store_sale_item" in table:
return (
item.get("sale_item_id")
or item.get("saleItemId")
or item.get("orderGoodsId")
or item.get("order_goods_id")
or item.get("id")
)
if "ods_inventory_change" in table:
return item.get("siteGoodsStockId") or item.get("id")
if "ods_inventory_stock" in table:
return (
item.get("siteGoodsId")
or item.get("id"),
item.get("snapshotKey") or item.get("snapshot_key") or "default",
)
if "ods_member_card" in table:
return item.get("cardId") or item.get("memberCardId") or item.get("id")
if "ods_member_profile" in table:
return item.get("memberId") or item.get("id")
if "ods_group_package_log" in table:
return item.get("usage_id") or item.get("usageId") or item.get("couponId") or item.get("id")
if "ods_group_package" in table:
return item.get("package_id") or item.get("packageId") or item.get("groupPackageId") or item.get("id")
if "ods_goods_category" in table:
return item.get("category_id") or item.get("categoryId") or item.get("id")
if "ods_table_fee_adjust" in table:
return item.get("adjust_id") or item.get("adjustId") or item.get("id")
if "ods_table_info" in table:
return item.get("table_id") or item.get("tableId") or item.get("id")
if "ods_assistant_account" in table:
return item.get("assistantId") or item.get("assistant_id") or item.get("id")
if "ods_store_product" in table:
return item.get("siteGoodsId") or item.get("site_goods_id") or item.get("id")
if "ods_product" in table:
return item.get("goodsId") or item.get("goods_id") or item.get("id")
if "ods_balance_change" in table:
return item.get("change_id") or item.get("changeId") or item.get("id")
if "ods_recharge_record" in table:
return item.get("recharge_id") or item.get("rechargeId") or item.get("id")
return item.get("id")
def _get_pk_column(self, table):
if "ods_ticket_detail" in table: return "order_settle_id"
if "ods_order_settle" in table: return "order_settle_id"
if "ods_payment" in table: return "pay_id"
if "ods_member" in table: return "member_id"
if "ods_assistant_ledger" in table: return "ledger_id"
if "ods_goods_ledger" in table: return "order_goods_id"
if "ods_inventory_change" in table: return "change_id"
if "ods_assistant_abolish" in table: return "abolish_id"
if "ods_coupon_verify" in table: return "coupon_id"
if "ods_member_card" in table: return "card_id"
if "ods_package_coupon" in table: return "package_id"
return "id" # Fallback
def _get_conflict_columns(self, table: str) -> list[str]:
if "ods_order_receipt_detail" in table:
return ["order_settle_id"]
if "ods_payment_record" in table:
return ["pay_id"]
if "ods_refund_record" in table:
return ["refund_id"]
if "ods_platform_coupon_log" in table:
return ["coupon_id"]
if "ods_assistant_service_log" in table or "ods_table_use_log" in table:
return ["ledger_id"]
if "ods_assistant_cancel_log" in table:
return ["cancel_id"]
if "ods_store_sale_item" in table:
return ["sale_item_id"]
if "ods_order_settle" in table:
return ["order_settle_id"]
if "ods_inventory_change" in table:
return ["change_id"]
if "ods_inventory_stock" in table:
return ["site_goods_id", "snapshot_key"]
if "ods_member_card" in table:
return ["card_id"]
if "ods_member_profile" in table:
return ["member_id"]
if "ods_group_package_log" in table:
return ["usage_id"]
if "ods_group_package" in table:
return ["package_id"]
if "ods_goods_category" in table:
return ["category_id"]
if "ods_table_info" in table:
return ["table_id"]
if "ods_table_fee_adjust" in table:
return ["adjust_id"]
if "ods_assistant_account" in table:
return ["assistant_id"]
if "ods_store_product" in table:
return ["site_goods_id"]
if "ods_product" in table:
return ["goods_id"]
if "ods_balance_change" in table:
return ["change_id"]
if "ods_recharge_record" in table:
return ["recharge_id"]
return ["id"]
def _enrich_row(self, row: dict, record: dict, table: str):
"""Best-effort populate important columns from payload for PRD ODS schema."""
def pick(obj, *keys):
for k in keys:
if isinstance(obj, dict) and obj.get(k) not in (None, ""):
return obj.get(k)
return None
if "ods_member_profile" in table:
row["tenant_id"] = pick(record, "tenantId", "tenant_id")
row["member_name"] = pick(record, "name", "memberName")
row["nickname"] = record.get("nickname")
row["mobile"] = record.get("mobile")
row["gender"] = record.get("sex")
row["birthday"] = record.get("birthday")
row["register_time"] = record.get("register_time") or record.get("registerTime")
row["member_type_id"] = pick(record, "cardTypeId", "member_type_id")
row["member_type_name"] = record.get("cardTypeName")
row["status"] = pick(record, "status", "state")
row["balance"] = record.get("balance")
row["points"] = record.get("points") or record.get("point")
row["last_visit_time"] = record.get("lastVisitTime")
row["wechat_id"] = record.get("wechatId")
row["alipay_id"] = record.get("alipayId")
row["member_card_no"] = record.get("cardNo")
row["remarks"] = record.get("remark")
if "ods_member_card" in table:
row["tenant_id"] = pick(record, "tenantId", "tenant_id")
row["member_id"] = pick(record, "memberId", "member_id")
row["card_type_id"] = record.get("cardTypeId")
row["card_type_name"] = record.get("cardTypeName")
row["card_balance"] = record.get("balance")
row["discount_rate"] = record.get("discount") or record.get("discount_rate")
row["valid_start_date"] = record.get("validStart")
row["valid_end_date"] = record.get("validEnd")
row["last_consume_time"] = record.get("lastConsumeTime")
row["status"] = record.get("status")
row["activate_time"] = record.get("activateTime")
row["deactivate_time"] = record.get("cancelTime")
row["issuer_id"] = record.get("issuerId")
row["issuer_name"] = record.get("issuerName")
if "ods_recharge_record" in table:
row["tenant_id"] = pick(record, "tenantId", "tenant_id")
row["member_id"] = pick(record, "memberId", "member_id")
row["recharge_amount"] = record.get("amount") or record.get("rechargeAmount")
row["gift_amount"] = record.get("giftAmount")
row["pay_method"] = record.get("payType") or record.get("pay_method")
row["pay_trade_no"] = record.get("payTradeNo")
row["order_trade_no"] = record.get("orderTradeNo")
row["recharge_time"] = record.get("createTime") or record.get("rechargeTime")
row["status"] = record.get("status")
row["operator_id"] = record.get("operatorId")
row["operator_name"] = record.get("operatorName")
if "ods_balance_change" in table:
row["tenant_id"] = pick(record, "tenantId", "tenant_id")
row["site_id"] = row.get("site_id") or pick(record, "siteId", "site_id")
row["member_id"] = pick(record, "memberId", "member_id")
row["change_amount"] = record.get("change_amount")
row["balance_before"] = record.get("before_balance")
row["balance_after"] = record.get("after_balance")
row["change_type"] = record.get("from_type") or record.get("type")
row["relate_id"] = record.get("relate_id")
row["pay_method"] = record.get("pay_type")
row["remark"] = record.get("remark")
row["operator_id"] = record.get("operatorId")
row["operator_name"] = record.get("operatorName")
row["change_time"] = record.get("create_time") or record.get("changeTime")
row["is_deleted"] = record.get("is_delete") or record.get("is_deleted")
row["source_file"] = row.get("source_file")
row["fetched_at"] = row.get("fetched_at")
if "ods_assistant_account" in table:
row["tenant_id"] = pick(record, "tenantId", "tenant_id")
row["assistant_name"] = record.get("assistantName") or record.get("name")
row["mobile"] = record.get("mobile")
row["team_id"] = record.get("teamId")
row["team_name"] = record.get("teamName")
row["status"] = record.get("status")
row["hired_date"] = record.get("hireDate")
row["left_date"] = record.get("leaveDate")
if "ods_assistant_service_log" in table:
row["tenant_id"] = pick(record, "tenantId", "tenant_id")
row["assistant_id"] = record.get("assistantId")
row["service_type"] = record.get("serviceType")
row["order_trade_no"] = record.get("orderTradeNo")
row["order_settle_id"] = record.get("orderSettleId")
row["start_time"] = record.get("startTime")
row["end_time"] = record.get("endTime")
row["duration_minutes"] = record.get("duration")
row["original_fee"] = record.get("originFee") or record.get("original_fee")
row["discount_amount"] = record.get("discountAmount")
row["final_fee"] = record.get("finalFee") or record.get("final_fee")
row["member_id"] = record.get("memberId")
row["status"] = record.get("status")
if "ods_assistant_cancel_log" in table:
row["tenant_id"] = pick(record, "tenantId", "tenant_id")
row["ledger_id"] = record.get("ledgerId")
row["assistant_id"] = record.get("assistantId")
row["order_trade_no"] = record.get("orderTradeNo")
row["reason"] = record.get("reason")
row["cancel_time"] = record.get("cancel_time") or record.get("cancelTime")
row["operator_id"] = record.get("operatorId")
row["operator_name"] = record.get("operatorName")
if "ods_table_info" in table:
row["tenant_id"] = pick(record, "tenantId", "tenant_id")
row["table_code"] = record.get("tableCode")
row["table_name"] = record.get("tableName")
row["table_type"] = record.get("tableType")
row["area_name"] = record.get("areaName")
row["status"] = record.get("status")
row["created_time"] = record.get("createTime")
row["updated_time"] = record.get("updateTime")
if "ods_table_use_log" in table:
row["tenant_id"] = pick(record, "tenantId", "tenant_id")
row["table_id"] = record.get("tableId")
row["order_trade_no"] = record.get("orderTradeNo")
row["order_settle_id"] = record.get("orderSettleId")
row["start_time"] = record.get("startTime")
row["end_time"] = record.get("endTime")
row["duration_minutes"] = record.get("duration")
row["original_table_fee"] = record.get("originFee") or record.get("original_table_fee")
row["discount_amount"] = record.get("discountAmount")
row["final_table_fee"] = record.get("finalFee") or record.get("final_table_fee")
row["member_id"] = record.get("memberId")
row["status"] = record.get("status")
if "ods_table_fee_adjust" in table:
row["tenant_id"] = pick(record, "tenantId", "tenant_id")
row["ledger_id"] = record.get("ledgerId")
row["order_trade_no"] = record.get("orderTradeNo")
row["discount_amount"] = record.get("discountAmount")
row["reason"] = record.get("reason")
row["operator_id"] = record.get("operatorId")
row["operator_name"] = record.get("operatorName")
row["created_at"] = record.get("created_at") or record.get("createTime")
if "ods_store_product" in table:
row["tenant_id"] = pick(record, "tenantId", "tenant_id")
row["goods_id"] = record.get("goodsId")
row["goods_name"] = record.get("goodsName")
row["category_id"] = record.get("categoryId")
row["category_name"] = record.get("categoryName")
row["sale_price"] = record.get("salePrice")
row["cost_price"] = record.get("costPrice")
row["status"] = record.get("status")
if "ods_store_sale_item" in table:
row["tenant_id"] = pick(record, "tenantId", "tenant_id")
row["order_trade_no"] = record.get("orderTradeNo")
row["order_settle_id"] = record.get("orderSettleId")
row["goods_id"] = record.get("goodsId")
row["goods_name"] = record.get("goodsName")
row["category_id"] = record.get("categoryId")
row["quantity"] = record.get("quantity")
row["original_amount"] = record.get("originalAmount")
row["discount_amount"] = record.get("discountAmount")
row["final_amount"] = record.get("finalAmount")
row["is_gift"] = record.get("isGift")
row["sale_time"] = record.get("saleTime")
if "ods_group_package_log" in table:
row["tenant_id"] = pick(record, "tenantId", "tenant_id")
row["package_id"] = record.get("packageId")
row["coupon_id"] = record.get("couponId")
row["order_trade_no"] = record.get("orderTradeNo")
row["order_settle_id"] = record.get("orderSettleId")
row["member_id"] = record.get("memberId")
row["status"] = record.get("status")
row["used_time"] = record.get("usedTime")
row["deduct_amount"] = record.get("deductAmount")
row["settle_price"] = record.get("settlePrice")
if "ods_group_package" in table:
row["tenant_id"] = pick(record, "tenantId", "tenant_id")
row["package_name"] = record.get("packageName")
row["platform_code"] = record.get("platformCode")
row["status"] = record.get("status")
row["face_price"] = record.get("facePrice")
row["settle_price"] = record.get("settlePrice")
row["valid_from"] = record.get("validFrom")
row["valid_to"] = record.get("validTo")
if "ods_platform_coupon_log" in table:
row["tenant_id"] = pick(record, "tenantId", "tenant_id")
row["platform_code"] = record.get("platformCode")
row["verify_code"] = record.get("verifyCode")
row["order_trade_no"] = record.get("orderTradeNo")
row["order_settle_id"] = record.get("orderSettleId")
row["member_id"] = record.get("memberId")
row["status"] = record.get("status")
row["used_time"] = record.get("usedTime")
row["deduct_amount"] = record.get("deductAmount")
row["settle_price"] = record.get("settlePrice")
if "ods_payment_record" in table:
row["tenant_id"] = pick(record, "tenantId", "tenant_id")
row["order_trade_no"] = record.get("orderTradeNo")
row["order_settle_id"] = record.get("orderSettleId")
row["member_id"] = record.get("memberId")
row["pay_method_code"] = record.get("payMethodCode") or record.get("pay_type")
row["pay_method_name"] = record.get("payMethodName")
row["pay_amount"] = record.get("payAmount")
row["pay_time"] = record.get("payTime")
row["relate_type"] = record.get("relateType")
row["relate_id"] = record.get("relateId")
if "ods_refund_record" in table:
row["tenant_id"] = pick(record, "tenantId", "tenant_id")
row["order_trade_no"] = record.get("orderTradeNo")
row["order_settle_id"] = record.get("orderSettleId")
row["member_id"] = record.get("memberId")
row["pay_method_code"] = record.get("payMethodCode")
row["refund_amount"] = record.get("refundAmount")
row["refund_time"] = record.get("refundTime")
row["status"] = record.get("status")
if "ods_inventory_change" in table:
row["tenant_id"] = pick(record, "tenantId", "tenant_id")
row["site_goods_id"] = record.get("siteGoodsId")
row["goods_id"] = record.get("goodsId")
row["change_amount"] = record.get("changeAmount")
row["before_stock"] = record.get("beforeStock")
row["after_stock"] = record.get("afterStock")
row["change_type"] = record.get("changeType")
row["relate_id"] = record.get("relateId")
row["remark"] = record.get("remark")
row["operator_id"] = record.get("operatorId")
row["operator_name"] = record.get("operatorName")
row["change_time"] = record.get("changeTime")
if "ods_inventory_stock" in table:
row["tenant_id"] = pick(record, "tenantId", "tenant_id")
row["goods_id"] = record.get("goodsId")
row["current_stock"] = record.get("currentStock")
row["cost_price"] = record.get("costPrice")
if "ods_goods_category" in table:
row["tenant_id"] = pick(record, "tenantId", "tenant_id")
row["category_name"] = record.get("categoryName")
row["parent_id"] = record.get("parentId")
row["level_no"] = record.get("levelNo")
row["status"] = record.get("status")
row["remark"] = record.get("remark")
if "ods_order_receipt_detail" in table:
row["tenant_id"] = pick(record, "tenantId", "tenant_id")
row["order_trade_no"] = record.get("orderTradeNo")
row["receipt_no"] = record.get("receiptNo")
row["receipt_time"] = record.get("receiptTime")
row["total_amount"] = record.get("totalAmount")
row["discount_amount"] = record.get("discountAmount")
row["final_amount"] = record.get("finalAmount")
row["member_id"] = record.get("memberId")
row["snapshot_raw"] = record.get("siteProfile") or record.get("site_profile")
if "ods_order_settle" in table:
settle = record.get("settleList") if isinstance(record.get("settleList"), dict) else record
if isinstance(settle, dict):
row["tenant_id"] = pick(settle, "tenantId", "tenant_id")
row["settle_relate_id"] = settle.get("settleRelateId")
row["settle_name"] = settle.get("settleName")
row["settle_type"] = settle.get("settleType")
row["settle_status"] = settle.get("settleStatus")
row["member_id"] = settle.get("memberId")
row["member_phone"] = settle.get("memberPhone")
row["table_id"] = settle.get("tableId")
row["consume_money"] = settle.get("consumeMoney")
row["table_charge_money"] = settle.get("tableChargeMoney")
row["goods_money"] = settle.get("goodsMoney")
row["service_money"] = settle.get("serviceMoney")
row["assistant_pd_money"] = settle.get("assistantPdMoney")
row["assistant_cx_money"] = settle.get("assistantCxMoney")
row["pay_amount"] = settle.get("payAmount")
row["coupon_amount"] = settle.get("couponAmount")
row["card_amount"] = settle.get("cardAmount")
row["balance_amount"] = settle.get("balanceAmount")
row["refund_amount"] = settle.get("refundAmount")
row["prepay_money"] = settle.get("prepayMoney")
row["adjust_amount"] = settle.get("adjustAmount")
row["rounding_amount"] = settle.get("roundingAmount")
row["payment_method"] = settle.get("paymentMethod")
row["create_time"] = settle.get("createTime")
row["pay_time"] = settle.get("payTime")
row["operator_id"] = settle.get("operatorId")
row["operator_name"] = settle.get("operatorName")
if "ods_product" in table:
row["tenant_id"] = pick(record, "tenantId", "tenant_id")
row["goods_id"] = record.get("goodsId")
row["goods_name"] = record.get("goodsName")
row["goods_code"] = record.get("goodsCode")
row["category_id"] = record.get("categoryId")
row["category_name"] = record.get("categoryName")
row["unit"] = record.get("unit")
row["price"] = record.get("price")
row["status"] = record.get("status")
if "ods_platform_coupon_log" in table:
row["tenant_id"] = pick(record, "tenantId", "tenant_id")
if "ods_table_use_log" in table:
row["tenant_id"] = pick(record, "tenantId", "tenant_id")
def _ensure_tuple(self, value):
if isinstance(value, tuple):
return value
return (value,)
def _bulk_insert(self, table: str, rows: list[dict]):
if not rows:
return
columns = list(rows[0].keys())
col_clause = ", ".join(columns)
val_clause = ", ".join(f"%({col})s" for col in columns)
conflict_cols = ["site_id"] + self._get_conflict_columns(table)
conflict_clause = ", ".join(conflict_cols)
def _bulk_insert(self, table, rows):
if not rows: return
keys = list(rows[0].keys())
cols = ", ".join(keys)
vals = ", ".join([f"%({k})s" for k in keys])
# Determine PK col for conflict
pk_col = self._get_pk_column(table)
sql = f"""
INSERT INTO {table} ({cols})
VALUES ({vals})
ON CONFLICT (store_id, {pk_col}) DO UPDATE SET
INSERT INTO {table} ({col_clause})
VALUES ({val_clause})
ON CONFLICT ({conflict_clause}) DO UPDATE SET
payload = EXCLUDED.payload,
fetched_at = EXCLUDED.fetched_at,
source_file = EXCLUDED.source_file;
source_file = EXCLUDED.source_file
"""
self.db.batch_execute(sql, rows)

View File

@@ -1,73 +1,72 @@
# -*- coding: utf-8 -*-
"""会员ETL任务"""
import json
from .base_task import BaseTask
from .base_task import BaseTask, TaskContext
from loaders.dimensions.member import MemberLoader
from models.parsers import TypeParser
class MembersTask(BaseTask):
"""会员ETL任务"""
def get_task_code(self) -> str:
return "MEMBERS"
def execute(self) -> dict:
"""执行会员ETL"""
self.logger.info(f"开始执行 {self.get_task_code()} 任务")
params = {
"storeId": self.config.get("app.store_id"),
def extract(self, context: TaskContext) -> dict:
params = self._merge_common_params({"siteId": context.store_id})
records, _ = self.api.get_paginated(
endpoint="/MemberProfile/GetTenantMemberList",
params=params,
page_size=self.config.get("api.page_size", 200),
data_path=("data",),
list_key="tenantMemberInfos",
)
return {"records": records}
def transform(self, extracted: dict, context: TaskContext) -> dict:
parsed, skipped = [], 0
for raw in extracted.get("records", []):
parsed_row = self._parse_member(raw, context.store_id)
if parsed_row:
parsed.append(parsed_row)
else:
skipped += 1
return {
"records": parsed,
"fetched": len(extracted.get("records", [])),
"skipped": skipped,
}
try:
records, pages_meta = self.api.get_paginated(
endpoint="/MemberProfile/GetTenantMemberList",
params=params,
page_size=self.config.get("api.page_size", 200),
data_path=("data",)
)
parsed_records = []
for rec in records:
parsed = self._parse_member(rec)
if parsed:
parsed_records.append(parsed)
loader = MemberLoader(self.db)
store_id = self.config.get("app.store_id")
inserted, updated, skipped = loader.upsert_members(parsed_records, store_id)
self.db.commit()
counts = {
"fetched": len(records),
"inserted": inserted,
"updated": updated,
"skipped": skipped,
"errors": 0
}
self.logger.info(f"{self.get_task_code()} 完成: {counts}")
return self._build_result("SUCCESS", counts)
except Exception as e:
self.db.rollback()
self.logger.error(f"{self.get_task_code()} 失败", exc_info=True)
raise
def _parse_member(self, raw: dict) -> dict:
def load(self, transformed: dict, context: TaskContext) -> dict:
loader = MemberLoader(self.db)
inserted, updated, loader_skipped = loader.upsert_members(
transformed["records"], context.store_id
)
return {
"fetched": transformed["fetched"],
"inserted": inserted,
"updated": updated,
"skipped": transformed["skipped"] + loader_skipped,
"errors": 0,
}
def _parse_member(self, raw: dict, store_id: int) -> dict | None:
"""解析会员记录"""
try:
member_id = TypeParser.parse_int(raw.get("memberId"))
if not member_id:
return None
return {
"store_id": self.config.get("app.store_id"),
"member_id": TypeParser.parse_int(raw.get("memberId")),
"store_id": store_id,
"member_id": member_id,
"member_name": raw.get("memberName"),
"phone": raw.get("phone"),
"balance": TypeParser.parse_decimal(raw.get("balance")),
"status": raw.get("status"),
"register_time": TypeParser.parse_timestamp(raw.get("registerTime"), self.tz),
"raw_data": json.dumps(raw, ensure_ascii=False)
"raw_data": json.dumps(raw, ensure_ascii=False),
}
except Exception as e:
self.logger.warning(f"解析会员记录失败: {e}, 原始数据: {raw}")
except Exception as exc:
self.logger.warning("解析会员记录失败: %s, 原始数据: %s", exc, raw)
return None

View File

@@ -38,10 +38,12 @@ class OdsTaskSpec:
pk_columns: Tuple[ColumnSpec, ...] = ()
extra_columns: Tuple[ColumnSpec, ...] = ()
include_page_size: bool = False
include_page_no: bool = True
include_page_no: bool = False
include_source_file: bool = True
include_source_endpoint: bool = True
requires_window: bool = True
time_fields: Tuple[str, str] | None = ("startTime", "endTime")
include_site_id: bool = True
description: str = ""
extra_params: Dict[str, Any] = field(default_factory=dict)
@@ -65,7 +67,7 @@ class BaseOdsTask(BaseTask):
page_size = self.config.get("api.page_size", 200)
params = self._build_params(spec, store_id)
columns = self._resolve_columns(spec)
conflict_columns = ["store_id"] + [col.column for col in spec.pk_columns]
conflict_columns = ["site_id"] + [col.column for col in spec.pk_columns]
loader = GenericODSLoader(
self.db,
spec.table_name,
@@ -117,16 +119,21 @@ class BaseOdsTask(BaseTask):
raise
def _build_params(self, spec: OdsTaskSpec, store_id: int) -> dict:
params: dict[str, Any] = {"storeId": store_id}
params.update(spec.extra_params)
if spec.requires_window:
base: dict[str, Any] = {}
if spec.include_site_id:
base["siteId"] = store_id
if spec.requires_window and spec.time_fields:
window_start, window_end, _ = self._get_time_window()
params["startTime"] = TypeParser.format_timestamp(window_start, self.tz)
params["endTime"] = TypeParser.format_timestamp(window_end, self.tz)
start_key, end_key = spec.time_fields
base[start_key] = TypeParser.format_timestamp(window_start, self.tz)
base[end_key] = TypeParser.format_timestamp(window_end, self.tz)
params = self._merge_common_params(base)
params.update(spec.extra_params)
return params
def _resolve_columns(self, spec: OdsTaskSpec) -> List[str]:
columns: List[str] = ["store_id"]
columns: List[str] = ["site_id"]
seen = set(columns)
for col_spec in list(spec.pk_columns) + list(spec.extra_columns):
if col_spec.column not in seen:
@@ -166,7 +173,7 @@ class BaseOdsTask(BaseTask):
page_size_value: int | None,
source_file: str | None,
) -> dict | None:
row: dict[str, Any] = {"store_id": store_id}
row: dict[str, Any] = {"site_id": store_id}
for col_spec in spec.pk_columns + spec.extra_columns:
value = self._extract_value(record, col_spec)
@@ -238,19 +245,33 @@ ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
code="ODS_ORDER_SETTLE",
class_name="OdsOrderSettleTask",
table_name="billiards_ods.ods_order_settle",
endpoint="/order/list",
endpoint="/Site/GetAllOrderSettleList",
data_path=("data",),
pk_columns=(_int_col("order_settle_id", "orderSettleId", "order_settle_id", "id", required=True),),
extra_columns=(_int_col("order_trade_no", "orderTradeNo", "order_trade_no"),),
include_page_size=True,
list_key="settleList",
pk_columns=(
_int_col(
"order_settle_id",
"orderSettleId",
"order_settle_id",
"settleList.id",
"id",
required=True,
),
),
extra_columns=(
_int_col("order_trade_no", "orderTradeNo", "order_trade_no", "settleList.orderTradeNo"),
),
include_page_size=False,
time_fields=("rangeStartTime", "rangeEndTime"),
description="订单/结算 ODS 原始记录",
),
OdsTaskSpec(
code="ODS_TABLE_USE",
class_name="OdsTableUseTask",
table_name="billiards_ods.ods_table_use_detail",
endpoint="/Table/UseDetailList",
data_path=("data", "siteTableUseDetailsList"),
table_name="billiards_ods.ods_table_use_log",
endpoint="/Site/GetSiteTableOrderDetails",
data_path=("data",),
list_key="siteTableUseDetailsList",
pk_columns=(_int_col("ledger_id", "id", required=True),),
extra_columns=(
_int_col("order_trade_no", "order_trade_no", "orderTradeNo"),
@@ -261,9 +282,10 @@ ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
OdsTaskSpec(
code="ODS_ASSISTANT_LEDGER",
class_name="OdsAssistantLedgerTask",
table_name="billiards_ods.ods_assistant_ledger",
endpoint="/Assistant/LedgerList",
data_path=("data", "orderAssistantDetails"),
table_name="billiards_ods.ods_assistant_service_log",
endpoint="/AssistantPerformance/GetOrderAssistantDetails",
data_path=("data",),
list_key="orderAssistantDetails",
pk_columns=(_int_col("ledger_id", "id", required=True),),
extra_columns=(
_int_col("order_trade_no", "order_trade_no", "orderTradeNo"),
@@ -274,18 +296,20 @@ ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
OdsTaskSpec(
code="ODS_ASSISTANT_ABOLISH",
class_name="OdsAssistantAbolishTask",
table_name="billiards_ods.ods_assistant_abolish",
endpoint="/Assistant/AbolishList",
data_path=("data", "abolitionAssistants"),
table_name="billiards_ods.ods_assistant_cancel_log",
endpoint="/AssistantPerformance/GetAbolitionAssistant",
data_path=("data",),
list_key="abolitionAssistants",
pk_columns=(_int_col("abolish_id", "id", required=True),),
description="助教作废记录 ODS",
),
OdsTaskSpec(
code="ODS_GOODS_LEDGER",
class_name="OdsGoodsLedgerTask",
table_name="billiards_ods.ods_goods_ledger",
endpoint="/Order/GoodsLedgerList",
data_path=("data", "orderGoodsLedgers"),
table_name="billiards_ods.ods_store_sale_item",
endpoint="/TenantGoods/GetGoodsSalesList",
data_path=("data",),
list_key="orderGoodsLedgers",
pk_columns=(_int_col("order_goods_id", "orderGoodsId", "id", required=True),),
extra_columns=(
_int_col("order_trade_no", "order_trade_no", "orderTradeNo"),
@@ -296,8 +320,8 @@ ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
OdsTaskSpec(
code="ODS_PAYMENT",
class_name="OdsPaymentTask",
table_name="billiards_ods.ods_payment",
endpoint="/pay/records",
table_name="billiards_ods.ods_payment_record",
endpoint="/PayLog/GetPayLogListPage",
data_path=("data",),
pk_columns=(_int_col("pay_id", "payId", "id", required=True),),
extra_columns=(
@@ -305,14 +329,15 @@ ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
_int_col("relate_id", "relate_id", "relateId"),
),
include_page_size=False,
time_fields=("StartPayTime", "EndPayTime"),
description="支付流水 ODS",
),
OdsTaskSpec(
code="ODS_REFUND",
class_name="OdsRefundTask",
table_name="billiards_ods.ods_refund",
endpoint="/Pay/RefundList",
data_path=(),
table_name="billiards_ods.ods_refund_record",
endpoint="/Order/GetRefundPayLogList",
data_path=("data",),
pk_columns=(_int_col("refund_id", "id", required=True),),
extra_columns=(
ColumnSpec(column="relate_type", sources=("relate_type", "relateType")),
@@ -323,18 +348,19 @@ ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
OdsTaskSpec(
code="ODS_COUPON_VERIFY",
class_name="OdsCouponVerifyTask",
table_name="billiards_ods.ods_coupon_verify",
endpoint="/Coupon/UsageList",
data_path=(),
table_name="billiards_ods.ods_platform_coupon_log",
endpoint="/Promotion/GetOfflineCouponConsumePageList",
data_path=("data",),
pk_columns=(_int_col("coupon_id", "id", "couponId", required=True),),
description="平台验券/团购流水 ODS",
),
OdsTaskSpec(
code="ODS_MEMBER",
class_name="OdsMemberTask",
table_name="billiards_ods.ods_member",
table_name="billiards_ods.ods_member_profile",
endpoint="/MemberProfile/GetTenantMemberList",
data_path=("data",),
list_key="tenantMemberInfos",
pk_columns=(_int_col("member_id", "memberId", required=True),),
requires_window=False,
description="会员档案 ODS",
@@ -343,8 +369,9 @@ ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
code="ODS_MEMBER_CARD",
class_name="OdsMemberCardTask",
table_name="billiards_ods.ods_member_card",
endpoint="/MemberCard/List",
data_path=("data", "tenantMemberCards"),
endpoint="/MemberProfile/GetTenantMemberCardList",
data_path=("data",),
list_key="tenantMemberCards",
pk_columns=(_int_col("card_id", "tenantMemberCardId", "cardId", required=True),),
requires_window=False,
description="会员卡/储值卡 ODS",
@@ -352,9 +379,10 @@ ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
OdsTaskSpec(
code="ODS_PACKAGE",
class_name="OdsPackageTask",
table_name="billiards_ods.ods_package_coupon",
endpoint="/Package/List",
data_path=("data", "packageCouponList"),
table_name="billiards_ods.ods_group_package",
endpoint="/PackageCoupon/QueryPackageCouponList",
data_path=("data",),
list_key="packageCouponList",
pk_columns=(_int_col("package_id", "id", "packageId", required=True),),
requires_window=False,
description="团购/套餐定义 ODS",
@@ -363,8 +391,8 @@ ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
code="ODS_INVENTORY_STOCK",
class_name="OdsInventoryStockTask",
table_name="billiards_ods.ods_inventory_stock",
endpoint="/Inventory/StockSummary",
data_path=(),
endpoint="/TenantGoods/GetGoodsStockReport",
data_path=("data",),
pk_columns=(
_int_col("site_goods_id", "siteGoodsId", required=True),
ColumnSpec(column="snapshot_key", default="default", required=True),
@@ -376,8 +404,9 @@ ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
code="ODS_INVENTORY_CHANGE",
class_name="OdsInventoryChangeTask",
table_name="billiards_ods.ods_inventory_change",
endpoint="/Inventory/ChangeList",
data_path=("data", "queryDeliveryRecordsList"),
endpoint="/GoodsStockManage/QueryGoodsOutboundReceipt",
data_path=("data",),
list_key="queryDeliveryRecordsList",
pk_columns=(_int_col("change_id", "siteGoodsStockId", "id", required=True),),
description="库存变动 ODS",
),

View File

@@ -1,80 +1,77 @@
# -*- coding: utf-8 -*-
"""订单ETL任务"""
import json
from .base_task import BaseTask
from .base_task import BaseTask, TaskContext
from loaders.facts.order import OrderLoader
from models.parsers import TypeParser
class OrdersTask(BaseTask):
"""订单数据ETL任务"""
def get_task_code(self) -> str:
return "ORDERS"
def execute(self) -> dict:
"""执行订单数据ETL"""
self.logger.info(f"开始执行 {self.get_task_code()} 任务")
# 1. 获取时间窗口
window_start, window_end, window_minutes = self._get_time_window()
# 2. 调用API获取数据
params = {
"storeId": self.config.get("app.store_id"),
"startTime": TypeParser.format_timestamp(window_start, self.tz),
"endTime": TypeParser.format_timestamp(window_end, self.tz),
}
try:
records, pages_meta = self.api.get_paginated(
endpoint="/order/list",
params=params,
page_size=self.config.get("api.page_size", 200),
data_path=("data",)
)
# 3. 解析并清洗数据
parsed_records = []
for rec in records:
parsed = self._parse_order(rec)
if parsed:
parsed_records.append(parsed)
# 4. 加载数据
loader = OrderLoader(self.db)
store_id = self.config.get("app.store_id")
inserted, updated, skipped = loader.upsert_orders(
parsed_records,
store_id
)
# 5. 提交事务
self.db.commit()
counts = {
"fetched": len(records),
"inserted": inserted,
"updated": updated,
"skipped": skipped,
"errors": 0
# ------------------------------------------------------------------ E/T/L hooks
def extract(self, context: TaskContext) -> dict:
"""调用 API 拉取订单记录"""
params = self._merge_common_params(
{
"siteId": context.store_id,
"rangeStartTime": TypeParser.format_timestamp(context.window_start, self.tz),
"rangeEndTime": TypeParser.format_timestamp(context.window_end, self.tz),
}
self.logger.info(
f"{self.get_task_code()} 完成: {counts}"
)
return self._build_result("SUCCESS", counts)
except Exception as e:
self.db.rollback()
self.logger.error(f"{self.get_task_code()} 失败", exc_info=True)
raise
def _parse_order(self, raw: dict) -> dict:
)
records, pages_meta = self.api.get_paginated(
endpoint="/Site/GetAllOrderSettleList",
params=params,
page_size=self.config.get("api.page_size", 200),
data_path=("data",),
list_key="settleList",
)
return {"records": records, "meta": pages_meta}
def transform(self, extracted: dict, context: TaskContext) -> dict:
"""解析原始订单 JSON"""
parsed_records = []
skipped = 0
for rec in extracted.get("records", []):
parsed = self._parse_order(rec, context.store_id)
if parsed:
parsed_records.append(parsed)
else:
skipped += 1
return {
"records": parsed_records,
"fetched": len(extracted.get("records", [])),
"skipped": skipped,
}
def load(self, transformed: dict, context: TaskContext) -> dict:
"""写入 fact_order"""
loader = OrderLoader(self.db)
inserted, updated, loader_skipped = loader.upsert_orders(
transformed["records"], context.store_id
)
counts = {
"fetched": transformed["fetched"],
"inserted": inserted,
"updated": updated,
"skipped": transformed["skipped"] + loader_skipped,
"errors": 0,
}
return counts
# ------------------------------------------------------------------ helpers
def _parse_order(self, raw: dict, store_id: int) -> dict | None:
"""解析单条订单记录"""
try:
return {
"store_id": self.config.get("app.store_id"),
"store_id": store_id,
"order_id": TypeParser.parse_int(raw.get("orderId")),
"order_no": raw.get("orderNo"),
"member_id": TypeParser.parse_int(raw.get("memberId")),
@@ -87,8 +84,8 @@ class OrdersTask(BaseTask):
"pay_status": raw.get("payStatus"),
"order_status": raw.get("orderStatus"),
"remark": raw.get("remark"),
"raw_data": json.dumps(raw, ensure_ascii=False)
"raw_data": json.dumps(raw, ensure_ascii=False),
}
except Exception as e:
self.logger.warning(f"解析订单失败: {e}, 原始数据: {raw}")
except Exception as exc:
self.logger.warning("解析订单失败: %s, 原始数据: %s", exc, raw)
return None

View File

@@ -3,7 +3,7 @@
import json
from .base_task import BaseTask
from .base_task import BaseTask, TaskContext
from loaders.dimensions.package import PackageDefinitionLoader
from models.parsers import TypeParser
@@ -14,49 +14,48 @@ class PackagesDefTask(BaseTask):
def get_task_code(self) -> str:
return "PACKAGES_DEF"
def execute(self) -> dict:
self.logger.info("开始执行 PACKAGES_DEF 任务")
params = {"storeId": self.config.get("app.store_id")}
def extract(self, context: TaskContext) -> dict:
params = self._merge_common_params({"siteId": context.store_id})
records, _ = self.api.get_paginated(
endpoint="/PackageCoupon/QueryPackageCouponList",
params=params,
page_size=self.config.get("api.page_size", 200),
data_path=("data",),
list_key="packageCouponList",
)
return {"records": records}
try:
records, _ = self.api.get_paginated(
endpoint="/Package/List",
params=params,
page_size=self.config.get("api.page_size", 200),
data_path=("data", "packageCouponList"),
)
def transform(self, extracted: dict, context: TaskContext) -> dict:
parsed, skipped = [], 0
for raw in extracted.get("records", []):
mapped = self._parse_package(raw, context.store_id)
if mapped:
parsed.append(mapped)
else:
skipped += 1
return {
"records": parsed,
"fetched": len(extracted.get("records", [])),
"skipped": skipped,
}
parsed = []
for raw in records:
mapped = self._parse_package(raw)
if mapped:
parsed.append(mapped)
def load(self, transformed: dict, context: TaskContext) -> dict:
loader = PackageDefinitionLoader(self.db)
inserted, updated, loader_skipped = loader.upsert_packages(transformed["records"])
return {
"fetched": transformed["fetched"],
"inserted": inserted,
"updated": updated,
"skipped": transformed["skipped"] + loader_skipped,
"errors": 0,
}
loader = PackageDefinitionLoader(self.db)
inserted, updated, skipped = loader.upsert_packages(parsed)
self.db.commit()
counts = {
"fetched": len(records),
"inserted": inserted,
"updated": updated,
"skipped": skipped,
"errors": 0,
}
self.logger.info(f"PACKAGES_DEF 完成: {counts}")
return self._build_result("SUCCESS", counts)
except Exception:
self.db.rollback()
self.logger.error("PACKAGES_DEF 失败", exc_info=True)
raise
def _parse_package(self, raw: dict) -> dict | None:
def _parse_package(self, raw: dict, store_id: int) -> dict | None:
package_id = TypeParser.parse_int(raw.get("id"))
if not package_id:
self.logger.warning("跳过缺少 id 的套餐数据: %s", raw)
self.logger.warning("跳过缺少 package id 的套餐记录: %s", raw)
return None
store_id = self.config.get("app.store_id")
return {
"store_id": store_id,
"package_id": package_id,

View File

@@ -1,68 +1,70 @@
# -*- coding: utf-8 -*-
"""支付记录ETL任务"""
import json
from .base_task import BaseTask
from .base_task import BaseTask, TaskContext
from loaders.facts.payment import PaymentLoader
from models.parsers import TypeParser
class PaymentsTask(BaseTask):
"""支付记录ETL任务"""
"""支付记录 E/T/L 任务"""
def get_task_code(self) -> str:
return "PAYMENTS"
def execute(self) -> dict:
"""执行支付记录ETL"""
self.logger.info(f"开始执行 {self.get_task_code()} 任务")
window_start, window_end, window_minutes = self._get_time_window()
params = {
"storeId": self.config.get("app.store_id"),
"startTime": TypeParser.format_timestamp(window_start, self.tz),
"endTime": TypeParser.format_timestamp(window_end, self.tz),
}
try:
records, pages_meta = self.api.get_paginated(
endpoint="/pay/records",
params=params,
page_size=self.config.get("api.page_size", 200),
data_path=("data",)
)
parsed_records = []
for rec in records:
parsed = self._parse_payment(rec)
if parsed:
parsed_records.append(parsed)
loader = PaymentLoader(self.db)
store_id = self.config.get("app.store_id")
inserted, updated, skipped = loader.upsert_payments(parsed_records, store_id)
self.db.commit()
counts = {
"fetched": len(records),
"inserted": inserted,
"updated": updated,
"skipped": skipped,
"errors": 0
# ------------------------------------------------------------------ E/T/L hooks
def extract(self, context: TaskContext) -> dict:
"""调用 API 抓取支付记录"""
params = self._merge_common_params(
{
"siteId": context.store_id,
"StartPayTime": TypeParser.format_timestamp(context.window_start, self.tz),
"EndPayTime": TypeParser.format_timestamp(context.window_end, self.tz),
}
self.logger.info(f"{self.get_task_code()} 完成: {counts}")
return self._build_result("SUCCESS", counts)
except Exception as e:
self.db.rollback()
self.logger.error(f"{self.get_task_code()} 失败", exc_info=True)
raise
def _parse_payment(self, raw: dict) -> dict:
)
records, pages_meta = self.api.get_paginated(
endpoint="/PayLog/GetPayLogListPage",
params=params,
page_size=self.config.get("api.page_size", 200),
data_path=("data",),
)
return {"records": records, "meta": pages_meta}
def transform(self, extracted: dict, context: TaskContext) -> dict:
"""解析支付 JSON"""
parsed, skipped = [], 0
for rec in extracted.get("records", []):
cleaned = self._parse_payment(rec, context.store_id)
if cleaned:
parsed.append(cleaned)
else:
skipped += 1
return {
"records": parsed,
"fetched": len(extracted.get("records", [])),
"skipped": skipped,
}
def load(self, transformed: dict, context: TaskContext) -> dict:
"""写入 fact_payment"""
loader = PaymentLoader(self.db)
inserted, updated, loader_skipped = loader.upsert_payments(
transformed["records"], context.store_id
)
counts = {
"fetched": transformed["fetched"],
"inserted": inserted,
"updated": updated,
"skipped": transformed["skipped"] + loader_skipped,
"errors": 0,
}
return counts
# ------------------------------------------------------------------ helpers
def _parse_payment(self, raw: dict, store_id: int) -> dict | None:
"""解析支付记录"""
try:
store_id = self.config.get("app.store_id")
return {
"store_id": store_id,
"pay_id": TypeParser.parse_int(raw.get("payId") or raw.get("id")),
@@ -75,7 +77,9 @@ class PaymentsTask(BaseTask):
),
"relate_type": raw.get("relateType") or raw.get("relate_type"),
"relate_id": TypeParser.parse_int(raw.get("relateId") or raw.get("relate_id")),
"site_id": TypeParser.parse_int(raw.get("siteId") or raw.get("site_id") or store_id),
"site_id": TypeParser.parse_int(
raw.get("siteId") or raw.get("site_id") or store_id
),
"tenant_id": TypeParser.parse_int(raw.get("tenantId") or raw.get("tenant_id")),
"pay_time": TypeParser.parse_timestamp(raw.get("payTime"), self.tz),
"create_time": TypeParser.parse_timestamp(
@@ -89,16 +93,19 @@ class PaymentsTask(BaseTask):
or raw.get("fee_amount")
),
"discount_amount": TypeParser.parse_decimal(
raw.get("discountAmount") or raw.get("couponAmount") or raw.get("discount_amount")
raw.get("discountAmount")
or raw.get("couponAmount")
or raw.get("discount_amount")
),
"pay_type": raw.get("payType"),
"payment_method": raw.get("paymentMethod") or raw.get("payment_method"),
"online_pay_channel": raw.get("onlinePayChannel") or raw.get("online_pay_channel"),
"online_pay_channel": raw.get("onlinePayChannel")
or raw.get("online_pay_channel"),
"pay_status": raw.get("payStatus"),
"pay_terminal": raw.get("payTerminal") or raw.get("pay_terminal"),
"remark": raw.get("remark"),
"raw_data": json.dumps(raw, ensure_ascii=False),
}
except Exception as e:
self.logger.warning(f"解析支付记录失败: {e}, 原始数据: {raw}")
except Exception as exc:
self.logger.warning("解析支付记录失败: %s, 原始数据: %s", exc, raw)
return None

View File

@@ -3,7 +3,7 @@
import json
from .base_task import BaseTask
from .base_task import BaseTask, TaskContext
from loaders.dimensions.product import ProductLoader
from models.parsers import TypeParser
@@ -12,95 +12,56 @@ class ProductsTask(BaseTask):
"""商品维度 ETL 任务"""
def get_task_code(self) -> str:
"""任务代码,应与 etl_admin.etl_task.task_code 一致"""
return "PRODUCTS"
def execute(self) -> dict:
"""
执行商品档案 ETL
def extract(self, context: TaskContext) -> dict:
params = self._merge_common_params({"siteId": context.store_id})
records, _ = self.api.get_paginated(
endpoint="/TenantGoods/QueryTenantGoods",
params=params,
page_size=self.config.get("api.page_size", 200),
data_path=("data",),
list_key="tenantGoodsList",
)
return {"records": records}
流程:
1. 调用上游 /TenantGoods/QueryTenantGoods 分页拉取商品列表
2. 解析/清洗字段
3. 通过 ProductLoader 写入 dim_product 和 dim_product_price_scd
"""
self.logger.info(f"开始执行 {self.get_task_code()} 任务")
params = {
"storeId": self.config.get("app.store_id"),
def transform(self, extracted: dict, context: TaskContext) -> dict:
parsed, skipped = [], 0
for raw in extracted.get("records", []):
parsed_row = self._parse_product(raw, context.store_id)
if parsed_row:
parsed.append(parsed_row)
else:
skipped += 1
return {
"records": parsed,
"fetched": len(extracted.get("records", [])),
"skipped": skipped,
}
def load(self, transformed: dict, context: TaskContext) -> dict:
loader = ProductLoader(self.db)
inserted, updated, loader_skipped = loader.upsert_products(
transformed["records"], context.store_id
)
return {
"fetched": transformed["fetched"],
"inserted": inserted,
"updated": updated,
"skipped": transformed["skipped"] + loader_skipped,
"errors": 0,
}
def _parse_product(self, raw: dict, store_id: int) -> dict | None:
try:
# 1. 分页拉取数据
records, pages_meta = self.api.get_paginated(
endpoint="/TenantGoods/QueryTenantGoods",
params=params,
page_size=self.config.get("api.page_size", 200),
data_path=("data",),
)
# 2. 解析/清洗
parsed_records = []
for raw in records:
parsed = self._parse_product(raw)
if parsed:
parsed_records.append(parsed)
# 3. 加载入库(维度主表 + 价格SCD2
loader = ProductLoader(self.db)
store_id = self.config.get("app.store_id")
inserted, updated, skipped = loader.upsert_products(
parsed_records, store_id
)
# 4. 提交事务
self.db.commit()
counts = {
"fetched": len(records),
"inserted": inserted,
"updated": updated,
"skipped": skipped,
"errors": 0,
}
self.logger.info(f"{self.get_task_code()} 完成: {counts}")
return self._build_result("SUCCESS", counts)
except Exception:
# 明确回滚,避免部分成功
self.db.rollback()
self.logger.error(f"{self.get_task_code()} 失败", exc_info=True)
raise
def _parse_product(self, raw: dict) -> dict | None:
"""
解析单条商品记录,字段映射参考旧版 upsert_dim_product_and_price_scd
上游字段示例:
- siteGoodsId / tenantGoodsId / productId
- goodsName / productName
- tenantGoodsCategoryId / goodsCategoryId / categoryName / goodsCategorySecondId
- goodsUnit
- costPrice / goodsPrice / salePrice
- goodsState / status
- supplierId / barcode / isCombo
- createTime / updateTime
"""
try:
product_id = (
TypeParser.parse_int(
raw.get("siteGoodsId")
or raw.get("tenantGoodsId")
or raw.get("productId")
)
product_id = TypeParser.parse_int(
raw.get("siteGoodsId") or raw.get("tenantGoodsId") or raw.get("productId")
)
if not product_id:
# 主键缺失,直接跳过
return None
return {
"store_id": self.config.get("app.store_id"),
"store_id": store_id,
"product_id": product_id,
"site_product_id": TypeParser.parse_int(raw.get("siteGoodsId")),
"product_name": raw.get("goodsName") or raw.get("productName"),
@@ -108,15 +69,12 @@ class ProductsTask(BaseTask):
raw.get("tenantGoodsCategoryId") or raw.get("goodsCategoryId")
),
"category_name": raw.get("categoryName"),
"second_category_id": TypeParser.parse_int(
raw.get("goodsCategorySecondId")
),
"second_category_id": TypeParser.parse_int(raw.get("goodsCategorySecondId")),
"unit": raw.get("goodsUnit"),
"cost_price": TypeParser.parse_decimal(raw.get("costPrice")),
"sale_price": TypeParser.parse_decimal(
raw.get("goodsPrice") or raw.get("salePrice")
),
# 旧版这里就是 None如后面有明确字段可以再补
"allow_discount": None,
"status": raw.get("goodsState") or raw.get("status"),
"supplier_id": TypeParser.parse_int(raw.get("supplierId"))
@@ -126,14 +84,10 @@ class ProductsTask(BaseTask):
"is_combo": bool(raw.get("isCombo"))
if raw.get("isCombo") is not None
else None,
"created_time": TypeParser.parse_timestamp(
raw.get("createTime"), self.tz
),
"updated_time": TypeParser.parse_timestamp(
raw.get("updateTime"), self.tz
),
"created_time": TypeParser.parse_timestamp(raw.get("createTime"), self.tz),
"updated_time": TypeParser.parse_timestamp(raw.get("updateTime"), self.tz),
"raw_data": json.dumps(raw, ensure_ascii=False),
}
except Exception as e:
self.logger.warning(f"解析商品记录失败: {e}, 原始数据: {raw}")
return None
except Exception as exc:
self.logger.warning("解析商品记录失败: %s, 原始数据: %s", exc, raw)
return None

View File

@@ -3,7 +3,7 @@
import json
from .base_task import BaseTask
from .base_task import BaseTask, TaskContext
from loaders.facts.refund import RefundLoader
from models.parsers import TypeParser
@@ -14,54 +14,53 @@ class RefundsTask(BaseTask):
def get_task_code(self) -> str:
return "REFUNDS"
def execute(self) -> dict:
self.logger.info("开始执行 REFUNDS 任务")
window_start, window_end, _ = self._get_time_window()
params = {
"storeId": self.config.get("app.store_id"),
"startTime": TypeParser.format_timestamp(window_start, self.tz),
"endTime": TypeParser.format_timestamp(window_end, self.tz),
def extract(self, context: TaskContext) -> dict:
params = self._merge_common_params(
{
"siteId": context.store_id,
"startTime": TypeParser.format_timestamp(context.window_start, self.tz),
"endTime": TypeParser.format_timestamp(context.window_end, self.tz),
}
)
records, _ = self.api.get_paginated(
endpoint="/Order/GetRefundPayLogList",
params=params,
page_size=self.config.get("api.page_size", 200),
data_path=("data",),
)
return {"records": records}
def transform(self, extracted: dict, context: TaskContext) -> dict:
parsed, skipped = [], 0
for raw in extracted.get("records", []):
mapped = self._parse_refund(raw, context.store_id)
if mapped:
parsed.append(mapped)
else:
skipped += 1
return {
"records": parsed,
"fetched": len(extracted.get("records", [])),
"skipped": skipped,
}
try:
records, _ = self.api.get_paginated(
endpoint="/Pay/RefundList",
params=params,
page_size=self.config.get("api.page_size", 200),
data_path=(),
)
def load(self, transformed: dict, context: TaskContext) -> dict:
loader = RefundLoader(self.db)
inserted, updated, loader_skipped = loader.upsert_refunds(transformed["records"])
return {
"fetched": transformed["fetched"],
"inserted": inserted,
"updated": updated,
"skipped": transformed["skipped"] + loader_skipped,
"errors": 0,
}
parsed = []
for raw in records:
mapped = self._parse_refund(raw)
if mapped:
parsed.append(mapped)
loader = RefundLoader(self.db)
inserted, updated, skipped = loader.upsert_refunds(parsed)
self.db.commit()
counts = {
"fetched": len(records),
"inserted": inserted,
"updated": updated,
"skipped": skipped,
"errors": 0,
}
self.logger.info(f"REFUNDS 完成: {counts}")
return self._build_result("SUCCESS", counts)
except Exception:
self.db.rollback()
self.logger.error("REFUNDS 失败", exc_info=True)
raise
def _parse_refund(self, raw: dict) -> dict | None:
def _parse_refund(self, raw: dict, store_id: int) -> dict | None:
refund_id = TypeParser.parse_int(raw.get("id"))
if not refund_id:
self.logger.warning("跳过缺少 id 的退款记录: %s", raw)
self.logger.warning("跳过缺少退款ID的数据: %s", raw)
return None
store_id = self.config.get("app.store_id")
return {
"store_id": store_id,
"refund_id": refund_id,

View File

@@ -3,7 +3,7 @@
import json
from .base_task import BaseTask
from .base_task import BaseTask, TaskContext
from loaders.facts.table_discount import TableDiscountLoader
from models.parsers import TypeParser
@@ -14,55 +14,55 @@ class TableDiscountTask(BaseTask):
def get_task_code(self) -> str:
return "TABLE_DISCOUNT"
def execute(self) -> dict:
self.logger.info("开始执行 TABLE_DISCOUNT 任务")
window_start, window_end, _ = self._get_time_window()
params = {
"storeId": self.config.get("app.store_id"),
"startTime": TypeParser.format_timestamp(window_start, self.tz),
"endTime": TypeParser.format_timestamp(window_end, self.tz),
def extract(self, context: TaskContext) -> dict:
params = self._merge_common_params(
{
"siteId": context.store_id,
"startTime": TypeParser.format_timestamp(context.window_start, self.tz),
"endTime": TypeParser.format_timestamp(context.window_end, self.tz),
}
)
records, _ = self.api.get_paginated(
endpoint="/Site/GetTaiFeeAdjustList",
params=params,
page_size=self.config.get("api.page_size", 200),
data_path=("data",),
list_key="taiFeeAdjustInfos",
)
return {"records": records}
def transform(self, extracted: dict, context: TaskContext) -> dict:
parsed, skipped = [], 0
for raw in extracted.get("records", []):
mapped = self._parse_discount(raw, context.store_id)
if mapped:
parsed.append(mapped)
else:
skipped += 1
return {
"records": parsed,
"fetched": len(extracted.get("records", [])),
"skipped": skipped,
}
try:
records, _ = self.api.get_paginated(
endpoint="/Table/AdjustList",
params=params,
page_size=self.config.get("api.page_size", 200),
data_path=("data", "taiFeeAdjustInfos"),
)
def load(self, transformed: dict, context: TaskContext) -> dict:
loader = TableDiscountLoader(self.db)
inserted, updated, loader_skipped = loader.upsert_discounts(transformed["records"])
return {
"fetched": transformed["fetched"],
"inserted": inserted,
"updated": updated,
"skipped": transformed["skipped"] + loader_skipped,
"errors": 0,
}
parsed = []
for raw in records:
mapped = self._parse_discount(raw)
if mapped:
parsed.append(mapped)
loader = TableDiscountLoader(self.db)
inserted, updated, skipped = loader.upsert_discounts(parsed)
self.db.commit()
counts = {
"fetched": len(records),
"inserted": inserted,
"updated": updated,
"skipped": skipped,
"errors": 0,
}
self.logger.info(f"TABLE_DISCOUNT 完成: {counts}")
return self._build_result("SUCCESS", counts)
except Exception:
self.db.rollback()
self.logger.error("TABLE_DISCOUNT 失败", exc_info=True)
raise
def _parse_discount(self, raw: dict) -> dict | None:
def _parse_discount(self, raw: dict, store_id: int) -> dict | None:
discount_id = TypeParser.parse_int(raw.get("id"))
if not discount_id:
self.logger.warning("跳过缺少 id 的台费折扣记录: %s", raw)
self.logger.warning("跳过缺少折扣ID的记录: %s", raw)
return None
table_profile = raw.get("tableProfile") or {}
store_id = self.config.get("app.store_id")
return {
"store_id": store_id,
"discount_id": discount_id,

View File

@@ -3,7 +3,7 @@
import json
from .base_task import BaseTask
from .base_task import BaseTask, TaskContext
from loaders.dimensions.table import TableLoader
from models.parsers import TypeParser
@@ -14,49 +14,48 @@ class TablesTask(BaseTask):
def get_task_code(self) -> str:
return "TABLES"
def execute(self) -> dict:
self.logger.info("开始执行 TABLES 任务")
params = {"storeId": self.config.get("app.store_id")}
def extract(self, context: TaskContext) -> dict:
params = self._merge_common_params({"siteId": context.store_id})
records, _ = self.api.get_paginated(
endpoint="/Table/GetSiteTables",
params=params,
page_size=self.config.get("api.page_size", 200),
data_path=("data",),
list_key="siteTables",
)
return {"records": records}
try:
records, _ = self.api.get_paginated(
endpoint="/Table/GetSiteTables",
params=params,
page_size=self.config.get("api.page_size", 200),
data_path=("data", "siteTables"),
)
def transform(self, extracted: dict, context: TaskContext) -> dict:
parsed, skipped = [], 0
for raw in extracted.get("records", []):
mapped = self._parse_table(raw, context.store_id)
if mapped:
parsed.append(mapped)
else:
skipped += 1
return {
"records": parsed,
"fetched": len(extracted.get("records", [])),
"skipped": skipped,
}
parsed = []
for raw in records:
mapped = self._parse_table(raw)
if mapped:
parsed.append(mapped)
def load(self, transformed: dict, context: TaskContext) -> dict:
loader = TableLoader(self.db)
inserted, updated, loader_skipped = loader.upsert_tables(transformed["records"])
return {
"fetched": transformed["fetched"],
"inserted": inserted,
"updated": updated,
"skipped": transformed["skipped"] + loader_skipped,
"errors": 0,
}
loader = TableLoader(self.db)
inserted, updated, skipped = loader.upsert_tables(parsed)
self.db.commit()
counts = {
"fetched": len(records),
"inserted": inserted,
"updated": updated,
"skipped": skipped,
"errors": 0,
}
self.logger.info(f"TABLES 完成: {counts}")
return self._build_result("SUCCESS", counts)
except Exception:
self.db.rollback()
self.logger.error("TABLES 失败", exc_info=True)
raise
def _parse_table(self, raw: dict) -> dict | None:
def _parse_table(self, raw: dict, store_id: int) -> dict | None:
table_id = TypeParser.parse_int(raw.get("id"))
if not table_id:
self.logger.warning("跳过缺少 table_id 的台桌记录: %s", raw)
return None
store_id = self.config.get("app.store_id")
return {
"store_id": store_id,
"table_id": table_id,

View File

@@ -3,7 +3,7 @@
import json
from .base_task import BaseTask
from .base_task import BaseTask, TaskContext
from loaders.facts.topup import TopupLoader
from models.parsers import TypeParser
@@ -14,55 +14,55 @@ class TopupsTask(BaseTask):
def get_task_code(self) -> str:
return "TOPUPS"
def execute(self) -> dict:
self.logger.info("开始执行 TOPUPS 任务")
window_start, window_end, _ = self._get_time_window()
params = {
"storeId": self.config.get("app.store_id"),
"startTime": TypeParser.format_timestamp(window_start, self.tz),
"endTime": TypeParser.format_timestamp(window_end, self.tz),
def extract(self, context: TaskContext) -> dict:
params = self._merge_common_params(
{
"siteId": context.store_id,
"rangeStartTime": TypeParser.format_timestamp(context.window_start, self.tz),
"rangeEndTime": TypeParser.format_timestamp(context.window_end, self.tz),
}
)
records, _ = self.api.get_paginated(
endpoint="/Site/GetRechargeSettleList",
params=params,
page_size=self.config.get("api.page_size", 200),
data_path=("data",),
list_key="settleList",
)
return {"records": records}
def transform(self, extracted: dict, context: TaskContext) -> dict:
parsed, skipped = [], 0
for raw in extracted.get("records", []):
mapped = self._parse_topup(raw, context.store_id)
if mapped:
parsed.append(mapped)
else:
skipped += 1
return {
"records": parsed,
"fetched": len(extracted.get("records", [])),
"skipped": skipped,
}
try:
records, _ = self.api.get_paginated(
endpoint="/Topup/SettleList",
params=params,
page_size=self.config.get("api.page_size", 200),
data_path=("data", "settleList"),
)
def load(self, transformed: dict, context: TaskContext) -> dict:
loader = TopupLoader(self.db)
inserted, updated, loader_skipped = loader.upsert_topups(transformed["records"])
return {
"fetched": transformed["fetched"],
"inserted": inserted,
"updated": updated,
"skipped": transformed["skipped"] + loader_skipped,
"errors": 0,
}
parsed = []
for raw in records:
mapped = self._parse_topup(raw)
if mapped:
parsed.append(mapped)
loader = TopupLoader(self.db)
inserted, updated, skipped = loader.upsert_topups(parsed)
self.db.commit()
counts = {
"fetched": len(records),
"inserted": inserted,
"updated": updated,
"skipped": skipped,
"errors": 0,
}
self.logger.info(f"TOPUPS 完成: {counts}")
return self._build_result("SUCCESS", counts)
except Exception:
self.db.rollback()
self.logger.error("TOPUPS 失败", exc_info=True)
raise
def _parse_topup(self, raw: dict) -> dict | None:
def _parse_topup(self, raw: dict, store_id: int) -> dict | None:
node = raw.get("settleList") if isinstance(raw.get("settleList"), dict) else raw
topup_id = TypeParser.parse_int(node.get("id"))
if not topup_id:
self.logger.warning("跳过缺少 id 的充值结算: %s", raw)
self.logger.warning("跳过缺少充值ID的记录: %s", raw)
return None
store_id = self.config.get("app.store_id")
return {
"store_id": store_id,
"topup_id": topup_id,

File diff suppressed because it is too large Load Diff

View File

@@ -68,6 +68,7 @@ def create_test_config(mode: str, archive_dir: Path, temp_dir: Path) -> AppConfi
archive_dir.mkdir(parents=True, exist_ok=True)
temp_dir.mkdir(parents=True, exist_ok=True)
flow = "FULL" if str(mode or "").upper() == "ONLINE" else "INGEST_ONLY"
overrides = {
"app": {"store_id": DEFAULT_STORE_ID, "timezone": "Asia/Taipei"},
"db": {"dsn": "postgresql://user:pass@localhost:5432/etl_billiards_test"},
@@ -77,10 +78,10 @@ def create_test_config(mode: str, archive_dir: Path, temp_dir: Path) -> AppConfi
"timeout_sec": 3,
"page_size": 50,
},
"testing": {
"mode": mode,
"json_archive_dir": str(archive_dir),
"temp_json_dir": str(temp_dir),
"pipeline": {
"flow": flow,
"fetch_root": str(temp_dir / "json_fetch"),
"ingest_source_dir": str(archive_dir),
},
"io": {
"export_root": str(temp_dir / "export"),
@@ -191,8 +192,8 @@ class FakeAPIClient:
endpoint: str,
params=None,
page_size: int = 200,
page_field: str = "pageIndex",
size_field: str = "pageSize",
page_field: str = "page",
size_field: str = "limit",
data_path: Tuple[str, ...] = (),
list_key: str | None = None,
):
@@ -228,8 +229,8 @@ class OfflineAPIClient:
endpoint: str,
params=None,
page_size: int = 200,
page_field: str = "pageIndex",
size_field: str = "pageSize",
page_field: str = "page",
size_field: str = "limit",
data_path: Tuple[str, ...] = (),
list_key: str | None = None,
):
@@ -328,7 +329,7 @@ TASK_SPECS: List[TaskSpec] = [
code="PRODUCTS",
task_cls=ProductsTask,
endpoint="/TenantGoods/QueryTenantGoods",
data_path=("data",),
data_path=("data", "tenantGoodsList"),
sample_records=[
{
"siteGoodsId": 101,
@@ -379,7 +380,7 @@ TASK_SPECS: List[TaskSpec] = [
code="MEMBERS",
task_cls=MembersTask,
endpoint="/MemberProfile/GetTenantMemberList",
data_path=("data",),
data_path=("data", "tenantMemberInfos"),
sample_records=[
{
"memberId": 401,
@@ -394,7 +395,7 @@ TASK_SPECS: List[TaskSpec] = [
TaskSpec(
code="ASSISTANTS",
task_cls=AssistantsTask,
endpoint="/Assistant/List",
endpoint="/PersonnelManagement/SearchAssistantInfo",
data_path=("data", "assistantInfos"),
sample_records=[
{
@@ -432,7 +433,7 @@ TASK_SPECS: List[TaskSpec] = [
TaskSpec(
code="PACKAGES_DEF",
task_cls=PackagesDefTask,
endpoint="/Package/List",
endpoint="/PackageCoupon/QueryPackageCouponList",
data_path=("data", "packageCouponList"),
sample_records=[
{
@@ -462,8 +463,8 @@ TASK_SPECS: List[TaskSpec] = [
TaskSpec(
code="ORDERS",
task_cls=OrdersTask,
endpoint="/order/list",
data_path=("data",),
endpoint="/Site/GetAllOrderSettleList",
data_path=("data", "settleList"),
sample_records=[
{
"orderId": 701,
@@ -484,7 +485,7 @@ TASK_SPECS: List[TaskSpec] = [
TaskSpec(
code="PAYMENTS",
task_cls=PaymentsTask,
endpoint="/pay/records",
endpoint="/PayLog/GetPayLogListPage",
data_path=("data",),
sample_records=[
{
@@ -501,8 +502,8 @@ TASK_SPECS: List[TaskSpec] = [
TaskSpec(
code="REFUNDS",
task_cls=RefundsTask,
endpoint="/Pay/RefundList",
data_path=(),
endpoint="/Order/GetRefundPayLogList",
data_path=("data",),
sample_records=[
{
"id": 901,
@@ -530,8 +531,8 @@ TASK_SPECS: List[TaskSpec] = [
TaskSpec(
code="COUPON_USAGE",
task_cls=CouponUsageTask,
endpoint="/Coupon/UsageList",
data_path=(),
endpoint="/Promotion/GetOfflineCouponConsumePageList",
data_path=("data",),
sample_records=[
{
"id": 1001,
@@ -560,7 +561,7 @@ TASK_SPECS: List[TaskSpec] = [
TaskSpec(
code="INVENTORY_CHANGE",
task_cls=InventoryChangeTask,
endpoint="/Inventory/ChangeList",
endpoint="/GoodsStockManage/QueryGoodsOutboundReceipt",
data_path=("data", "queryDeliveryRecordsList"),
sample_records=[
{
@@ -584,7 +585,7 @@ TASK_SPECS: List[TaskSpec] = [
TaskSpec(
code="TOPUPS",
task_cls=TopupsTask,
endpoint="/Topup/SettleList",
endpoint="/Site/GetRechargeSettleList",
data_path=("data", "settleList"),
sample_records=[
{
@@ -623,7 +624,7 @@ TASK_SPECS: List[TaskSpec] = [
TaskSpec(
code="TABLE_DISCOUNT",
task_cls=TableDiscountTask,
endpoint="/Table/AdjustList",
endpoint="/Site/GetTaiFeeAdjustList",
data_path=("data", "taiFeeAdjustInfos"),
sample_records=[
{
@@ -653,7 +654,7 @@ TASK_SPECS: List[TaskSpec] = [
TaskSpec(
code="ASSISTANT_ABOLISH",
task_cls=AssistantAbolishTask,
endpoint="/Assistant/AbolishList",
endpoint="/AssistantPerformance/GetAbolitionAssistant",
data_path=("data", "abolitionAssistants"),
sample_records=[
{
@@ -674,7 +675,7 @@ TASK_SPECS: List[TaskSpec] = [
TaskSpec(
code="LEDGER",
task_cls=LedgerTask,
endpoint="/Assistant/LedgerList",
endpoint="/AssistantPerformance/GetOrderAssistantDetails",
data_path=("data", "orderAssistantDetails"),
sample_records=[
{

View File

@@ -29,7 +29,7 @@ def test_ods_order_settle_ingest(tmp_path):
"anyField": "value",
}
]
api = FakeAPIClient({"/order/list": sample})
api = FakeAPIClient({"/Site/GetAllOrderSettleList": sample})
task_cls = ODS_TASK_CLASSES["ODS_ORDER_SETTLE"]
with get_db_operations() as db_ops:
@@ -42,7 +42,7 @@ def test_ods_order_settle_ingest(tmp_path):
row = db_ops.upserts[0]["rows"][0]
assert row["order_settle_id"] == 701
assert row["order_trade_no"] == 8001
assert row["source_endpoint"] == "/order/list"
assert row["source_endpoint"] == "/Site/GetAllOrderSettleList"
assert '"orderSettleId": 701' in row["payload"]
@@ -57,7 +57,7 @@ def test_ods_payment_ingest(tmp_path):
"payAmount": "100.00",
}
]
api = FakeAPIClient({"/pay/records": sample})
api = FakeAPIClient({"/PayLog/GetPayLogListPage": sample})
task_cls = ODS_TASK_CLASSES["ODS_PAYMENT"]
with get_db_operations() as db_ops: