更新20260201-1

This commit is contained in:
Neo
2026-02-01 22:04:15 +08:00
parent 076f5755ca
commit 9b2c2c5c78
20 changed files with 32463 additions and 408 deletions

View File

@@ -32,13 +32,13 @@ SCHEMA_ETL=etl_admin
# API 配置
# ------------------------------------------------------------------------------
API_BASE=https://pc.ficoo.vip/apiprod/admin/v1/
API_TOKEN=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnQtdHlwZSI6IjQiLCJ1c2VyLXR5cGUiOiIxIiwiaHR0cDovL3NjaGVtYXMubWljcm9zb2Z0LmNvbS93cy8yMDA4LzA2L2lkZW50aXR5L2NsYWltcy9yb2xlIjoiMTIiLCJyb2xlLWlkIjoiMTIiLCJ0ZW5hbnQtaWQiOiIyNzkwNjgzMTYwNzA5OTU3Iiwibmlja25hbWUiOiLnp5_miLfnrqHnkIblkZjvvJrmganmgakxIiwic2l0ZS1pZCI6IjAiLCJtb2JpbGUiOiIxMzgxMDUwMjMwNCIsInNpZCI6IjI5NTA0ODk2NTgzOTU4NDUiLCJzdGFmZi1pZCI6IjMwMDk5MTg2OTE1NTkwNDUiLCJvcmctaWQiOiIwIiwicm9sZS10eXBlIjoiMyIsInJlZnJlc2hUb2tlbiI6Ik1oKzFpTitjclRHMTY3cUp5SzFXYllteVBaaUhjdDI2ZTZDZkJvd1pxSVk9IiwicmVmcmVzaEV4cGlyeVRpbWUiOiIyMDI2LzIvNyDkuIvljYg5OjU2OjE4IiwibmVlZENoZWNrVG9rZW4iOiJmYWxzZSIsImV4cCI6MTc3MDQ3MjU3OCwiaXNzIjoidGVzdCIsImF1ZCI6IlVzZXIifQ.rY03o82SKznD7NOktXKzTOI1btl2FHsklMCChOlZUeY
API_TOKEN=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnQtdHlwZSI6IjQiLCJ1c2VyLXR5cGUiOiIxIiwiaHR0cDovL3NjaGVtYXMubWljcm9zb2Z0LmNvbS93cy8yMDA4LzA2L2lkZW50aXR5L2NsYWltcy9yb2xlIjoiMTIiLCJyb2xlLWlkIjoiMTIiLCJ0ZW5hbnQtaWQiOiIyNzkwNjgzMTYwNzA5OTU3Iiwibmlja25hbWUiOiLnp5_miLfnrqHnkIblkZjvvJrmganmgakxIiwic2l0ZS1pZCI6IjAiLCJtb2JpbGUiOiIxMzgxMDUwMjMwNCIsInNpZCI6IjI5NTA0ODk2NTgzOTU4NDUiLCJzdGFmZi1pZCI6IjMwMDk5MTg2OTE1NTkwNDUiLCJvcmctaWQiOiIwIiwicm9sZS10eXBlIjoiMyIsInJlZnJlc2hUb2tlbiI6IktlbTVsdHRqZ2tSUExOcVA2ajhNakdQYnFrNW5mRzBQNzRvMHE0b295VVE9IiwicmVmcmVzaEV4cGlyeVRpbWUiOiIyMDI2LzIvOCDkuIvljYg2OjU3OjA1IiwibmVlZENoZWNrVG9rZW4iOiJmYWxzZSIsImV4cCI6MTc3MDU0ODIyNSwiaXNzIjoidGVzdCIsImF1ZCI6IlVzZXIifQ.wJlm7pTqUzp769nUGdxx0e1bVMy4x9Prp9U_UMWQvlk
# API 请求超时(秒)
API_TIMEOUT=20
# 分页大小
API_PAGE_SIZE=200
API_PAGE_SIZE=200
# 最大重试次数
API_RETRY_MAX=3

View File

@@ -4,7 +4,9 @@ from __future__ import annotations
from datetime import datetime
from pathlib import Path
import time
from typing import Any, Iterable, Tuple
from zoneinfo import ZoneInfo
from api.client import APIClient
from api.endpoint_routing import plan_calls
@@ -128,3 +130,56 @@ class RecordingAPIClient:
"pages": len(pages),
"records": total_records,
}
def _cfg_get(cfg, key: str, default=None):
if isinstance(cfg, dict):
cur = cfg
for part in key.split("."):
if not isinstance(cur, dict) or part not in cur:
return default
cur = cur[part]
return cur
getter = getattr(cfg, "get", None)
if callable(getter):
return getter(key, default)
return default
def build_recording_client(
cfg,
*,
task_code: str,
output_dir: Path | str | None = None,
run_id: int | None = None,
write_pretty: bool | None = None,
):
"""Build RecordingAPIClient from AppConfig or dict config."""
base_client = APIClient(
base_url=_cfg_get(cfg, "api.base_url") or "",
token=_cfg_get(cfg, "api.token"),
timeout=int(_cfg_get(cfg, "api.timeout_sec", 20) or 20),
retry_max=int(_cfg_get(cfg, "api.retries.max_attempts", 3) or 3),
headers_extra=_cfg_get(cfg, "api.headers_extra") or {},
)
if write_pretty is None:
write_pretty = bool(_cfg_get(cfg, "io.write_pretty_json", False))
if run_id is None:
run_id = int(time.time())
if output_dir is None:
tz_name = _cfg_get(cfg, "app.timezone", "Asia/Taipei") or "Asia/Taipei"
tz = ZoneInfo(tz_name)
ts = datetime.now(tz).strftime("%Y%m%d-%H%M%S")
fetch_root = _cfg_get(cfg, "pipeline.fetch_root") or _cfg_get(cfg, "io.export_root") or "export/JSON"
output_dir = Path(fetch_root) / f"{str(task_code).upper()}-{run_id}-{ts}"
return RecordingAPIClient(
base_client=base_client,
output_dir=output_dir,
task_code=str(task_code),
run_id=int(run_id),
write_pretty=bool(write_pretty),
)

View File

@@ -36,8 +36,8 @@
| 17 | person_org_id | BIGINT | YES | | 人事组织 ID |
| 18 | assistant_level | INTEGER | YES | | 助教等级。**枚举值**: 8=助教管理, 10=初级, 20=中级, 30=高级, 40=星级 |
| 19 | level_name | VARCHAR | YES | | 等级名称。**枚举值**: "助教管理", "初级", "中级", "高级", "星级" |
| 20 | skill_id | BIGINT | YES | | 技能 ID |
| 21 | skill_name | VARCHAR | YES | | 技能名称。**枚举值**: "基础课", "附加课/激励课", "包厢课" |
| 20 | skill_id | BIGINT | YES | | 技能 ID **枚举值**: 2790683529513797 = 基础课 , 2790683529513798 = 附加课/激励课, 3039912271463941 = 包厢课 |
| 21 | skill_name | VARCHAR | YES | | 技能名称。 **枚举值**: "基础课","附加课","包厢课"|
| 22 | ledger_unit_price | NUMERIC(10,2) | YES | | 单价(元/小时),**样本值**: 98.00/108.00/190.00 等 |
| 23 | ledger_amount | NUMERIC(10,2) | YES | | 计费金额 |
| 24 | projected_income | NUMERIC(10,2) | YES | | 预估收入 |

View File

@@ -26,7 +26,7 @@ PROJECT_ROOT = Path(__file__).resolve().parents[1]
if str(PROJECT_ROOT) not in sys.path:
sys.path.insert(0, str(PROJECT_ROOT))
from api.client import APIClient
from api.recording_client import build_recording_client
from api.endpoint_routing import derive_former_endpoint as derive_former_endpoint_shared
from config.settings import AppConfig
from models.parsers import TypeParser
@@ -265,13 +265,7 @@ def main() -> int:
if not cfg["api"].get("token"):
raise SystemExit("缺少 api.token请在 .env 配置 API_TOKEN 或 FICOO_TOKEN")
client = APIClient(
base_url=cfg["api"]["base_url"],
token=cfg["api"]["token"],
timeout=int(cfg["api"].get("timeout_sec") or 20),
retry_max=int(cfg["api"].get("retries", {}).get("max_attempts") or 3),
headers_extra=cfg["api"].get("headers_extra") or {},
)
client = build_recording_client(cfg, task_code="FETCH_TEST_COMPARE")
common_params = cfg["api"].get("params", {}) or {}
if not isinstance(common_params, dict):

View File

@@ -5,6 +5,11 @@ from .task_model import TaskItem, TaskStatus, TaskHistory, TaskConfig, QueuedTas
from .schedule_model import (
ScheduledTask, ScheduleConfig, ScheduleType, IntervalUnit, ScheduleStore
)
from .task_registry import (
TaskRegistry, TaskDefinition, BusinessDomain, DOMAIN_LABELS,
task_registry, get_ods_task_codes, get_fact_ods_task_codes,
get_dimension_ods_task_codes, get_all_task_tuples
)
__all__ = [
"TaskItem",
@@ -17,4 +22,14 @@ __all__ = [
"ScheduleType",
"IntervalUnit",
"ScheduleStore",
# 任务注册表
"TaskRegistry",
"TaskDefinition",
"BusinessDomain",
"DOMAIN_LABELS",
"task_registry",
"get_ods_task_codes",
"get_fact_ods_task_codes",
"get_dimension_ods_task_codes",
"get_all_task_tuples",
]

View File

@@ -0,0 +1,353 @@
# -*- coding: utf-8 -*-
"""任务注册表:定义所有可用任务及其业务域分组。
从后端 ods_tasks 动态获取任务定义,并按业务域分组,供 UI 使用。
"""
from dataclasses import dataclass, field
from enum import Enum
from typing import Dict, List, Optional, Tuple
# 尝试从后端导入 ODS 任务定义
try:
from tasks.ods_tasks import ENABLED_ODS_CODES, ODS_TASK_SPECS
_HAS_BACKEND = True
except ImportError:
_HAS_BACKEND = False
ENABLED_ODS_CODES = set()
ODS_TASK_SPECS = ()
class BusinessDomain(Enum):
"""业务域枚举"""
MEMBER = "member" # 会员
SETTLEMENT = "settlement" # 结算/支付
ASSISTANT = "assistant" # 助教
GOODS = "goods" # 商品/销售
TABLE = "table" # 台桌
PROMOTION = "promotion" # 团购/优惠券
INVENTORY = "inventory" # 库存
SCHEMA = "schema" # Schema 初始化
DWD = "dwd" # DWD 装载
QUALITY = "quality" # 质量检查
OTHER = "other" # 其他
# 业务域显示名称
DOMAIN_LABELS: Dict[BusinessDomain, str] = {
BusinessDomain.MEMBER: "会员",
BusinessDomain.SETTLEMENT: "结算/支付",
BusinessDomain.ASSISTANT: "助教",
BusinessDomain.GOODS: "商品/销售",
BusinessDomain.TABLE: "台桌",
BusinessDomain.PROMOTION: "团购/优惠券",
BusinessDomain.INVENTORY: "库存",
BusinessDomain.SCHEMA: "Schema 初始化",
BusinessDomain.DWD: "DWD 装载",
BusinessDomain.QUALITY: "质量检查",
BusinessDomain.OTHER: "其他",
}
@dataclass
class TaskDefinition:
"""任务定义"""
code: str # 任务编码
name: str # 显示名称
description: str # 描述
domain: BusinessDomain # 业务域
requires_window: bool = True # 是否需要时间窗口
is_ods: bool = False # 是否为 ODS 任务
is_dimension: bool = False # 是否为维度类任务(校验时区分)
default_enabled: bool = True # 默认是否选中
# ODS 任务到业务域的映射
ODS_DOMAIN_MAP: Dict[str, BusinessDomain] = {
# 会员相关
"ODS_MEMBER": BusinessDomain.MEMBER,
"ODS_MEMBER_CARD": BusinessDomain.MEMBER,
"ODS_MEMBER_BALANCE": BusinessDomain.MEMBER,
# 结算/支付相关
"ODS_PAYMENT": BusinessDomain.SETTLEMENT,
"ODS_REFUND": BusinessDomain.SETTLEMENT,
"ODS_SETTLEMENT_RECORDS": BusinessDomain.SETTLEMENT,
"ODS_RECHARGE_SETTLE": BusinessDomain.SETTLEMENT,
"ODS_SETTLEMENT_TICKET": BusinessDomain.SETTLEMENT,
# 助教相关
"ODS_ASSISTANT_ACCOUNT": BusinessDomain.ASSISTANT,
"ODS_ASSISTANT_LEDGER": BusinessDomain.ASSISTANT,
"ODS_ASSISTANT_ABOLISH": BusinessDomain.ASSISTANT,
# 商品/销售相关
"ODS_TENANT_GOODS": BusinessDomain.GOODS,
"ODS_STORE_GOODS": BusinessDomain.GOODS,
"ODS_STORE_GOODS_SALES": BusinessDomain.GOODS,
"ODS_GOODS_CATEGORY": BusinessDomain.GOODS,
# 台桌相关
"ODS_TABLES": BusinessDomain.TABLE,
"ODS_TABLE_USE": BusinessDomain.TABLE,
"ODS_TABLE_FEE_DISCOUNT": BusinessDomain.TABLE,
# 团购/优惠券相关
"ODS_GROUP_PACKAGE": BusinessDomain.PROMOTION,
"ODS_GROUP_BUY_REDEMPTION": BusinessDomain.PROMOTION,
"ODS_PLATFORM_COUPON": BusinessDomain.PROMOTION,
# 库存相关
"ODS_INVENTORY_STOCK": BusinessDomain.INVENTORY,
"ODS_INVENTORY_CHANGE": BusinessDomain.INVENTORY,
}
# ODS 任务显示名称(中文)
ODS_DISPLAY_NAMES: Dict[str, str] = {
"ODS_MEMBER": "会员档案",
"ODS_MEMBER_CARD": "会员储值卡",
"ODS_MEMBER_BALANCE": "会员余额变动",
"ODS_PAYMENT": "支付流水",
"ODS_REFUND": "退款流水",
"ODS_SETTLEMENT_RECORDS": "结账记录",
"ODS_RECHARGE_SETTLE": "充值结算",
"ODS_SETTLEMENT_TICKET": "结账小票",
"ODS_ASSISTANT_ACCOUNT": "助教账号",
"ODS_ASSISTANT_LEDGER": "助教流水",
"ODS_ASSISTANT_ABOLISH": "助教作废",
"ODS_TENANT_GOODS": "租户商品",
"ODS_STORE_GOODS": "门店商品",
"ODS_STORE_GOODS_SALES": "商品销售流水",
"ODS_GOODS_CATEGORY": "商品分类",
"ODS_TABLES": "台桌维表",
"ODS_TABLE_USE": "台费计费流水",
"ODS_TABLE_FEE_DISCOUNT": "台费折扣调账",
"ODS_GROUP_PACKAGE": "团购套餐",
"ODS_GROUP_BUY_REDEMPTION": "团购核销",
"ODS_PLATFORM_COUPON": "平台券核销",
"ODS_INVENTORY_STOCK": "库存汇总",
"ODS_INVENTORY_CHANGE": "库存变化",
}
# 维度类 ODS 任务(校验时通常单独处理)
DIMENSION_ODS_CODES = {
"ODS_MEMBER",
"ODS_MEMBER_CARD",
"ODS_ASSISTANT_ACCOUNT",
"ODS_TENANT_GOODS",
"ODS_STORE_GOODS",
"ODS_GOODS_CATEGORY",
"ODS_TABLES",
"ODS_GROUP_PACKAGE",
}
# 事实类 ODS 任务(需要时间窗口)
FACT_ODS_CODES = {
"ODS_MEMBER_BALANCE",
"ODS_PAYMENT",
"ODS_REFUND",
"ODS_SETTLEMENT_RECORDS",
"ODS_RECHARGE_SETTLE",
"ODS_SETTLEMENT_TICKET",
"ODS_ASSISTANT_LEDGER",
"ODS_ASSISTANT_ABOLISH",
"ODS_STORE_GOODS_SALES",
"ODS_TABLE_USE",
"ODS_TABLE_FEE_DISCOUNT",
"ODS_GROUP_BUY_REDEMPTION",
"ODS_PLATFORM_COUPON",
"ODS_INVENTORY_CHANGE",
}
# 非 ODS 任务定义
NON_ODS_TASKS: List[TaskDefinition] = [
# DWD 装载
TaskDefinition(
code="DWD_LOAD_FROM_ODS",
name="ODS→DWD 装载",
description="从 ODS 增量装载到 DWD",
domain=BusinessDomain.DWD,
requires_window=True,
),
TaskDefinition(
code="DWD_QUALITY_CHECK",
name="DWD 质量检查",
description="执行 DWD 数据质量检查",
domain=BusinessDomain.QUALITY,
requires_window=False,
),
TaskDefinition(
code="DWS_BUILD_ORDER_SUMMARY",
name="构建订单汇总",
description="重算 DWS 订单汇总表",
domain=BusinessDomain.DWD,
requires_window=False,
),
# Schema 初始化
TaskDefinition(
code="INIT_ODS_SCHEMA",
name="初始化 ODS Schema",
description="创建/重建 ODS 表结构",
domain=BusinessDomain.SCHEMA,
requires_window=False,
default_enabled=False,
),
TaskDefinition(
code="INIT_DWD_SCHEMA",
name="初始化 DWD Schema",
description="创建/重建 DWD 表结构",
domain=BusinessDomain.SCHEMA,
requires_window=False,
default_enabled=False,
),
TaskDefinition(
code="INIT_DWS_SCHEMA",
name="初始化 DWS Schema",
description="创建/重建 DWS 表结构",
domain=BusinessDomain.SCHEMA,
requires_window=False,
default_enabled=False,
),
# 其他
TaskDefinition(
code="MANUAL_INGEST",
name="手工数据灌入",
description="从本地 JSON 回放入库",
domain=BusinessDomain.OTHER,
requires_window=False,
default_enabled=False,
),
TaskDefinition(
code="CHECK_CUTOFF",
name="检查 Cutoff",
description="查看各表数据截止时间",
domain=BusinessDomain.QUALITY,
requires_window=False,
),
TaskDefinition(
code="DATA_INTEGRITY_CHECK",
name="数据完整性检查",
description="检查 ODS/DWD 数据完整性",
domain=BusinessDomain.QUALITY,
requires_window=True,
),
]
def _build_ods_task_definition(code: str) -> TaskDefinition:
"""根据 ODS 任务编码构建任务定义"""
domain = ODS_DOMAIN_MAP.get(code, BusinessDomain.OTHER)
name = ODS_DISPLAY_NAMES.get(code, code)
is_dimension = code in DIMENSION_ODS_CODES
# 从后端获取描述(如果可用)
description = f"抓取{name}到 ODS"
if _HAS_BACKEND:
for spec in ODS_TASK_SPECS:
if spec.code == code:
# 尝试解码描述(可能是乱码)
desc = spec.description
if desc and not any(ord(c) > 0x4e00 for c in desc[:10] if desc):
description = f"抓取{name}到 ODS"
break
return TaskDefinition(
code=code,
name=name,
description=description,
domain=domain,
requires_window=code not in DIMENSION_ODS_CODES,
is_ods=True,
is_dimension=is_dimension,
)
class TaskRegistry:
"""任务注册表:管理所有可用任务"""
_instance: Optional["TaskRegistry"] = None
def __new__(cls):
if cls._instance is None:
cls._instance = super().__new__(cls)
cls._instance._initialized = False
return cls._instance
def __init__(self):
if self._initialized:
return
self._initialized = True
self._tasks: Dict[str, TaskDefinition] = {}
self._load_tasks()
def _load_tasks(self):
"""加载所有任务定义"""
# 加载 ODS 任务
ods_codes = ENABLED_ODS_CODES if _HAS_BACKEND else set(ODS_DOMAIN_MAP.keys())
for code in ods_codes:
self._tasks[code] = _build_ods_task_definition(code)
# 加载非 ODS 任务
for task_def in NON_ODS_TASKS:
self._tasks[task_def.code] = task_def
def get_task(self, code: str) -> Optional[TaskDefinition]:
"""获取任务定义"""
return self._tasks.get(code)
def get_all_tasks(self) -> List[TaskDefinition]:
"""获取所有任务"""
return list(self._tasks.values())
def get_ods_tasks(self) -> List[TaskDefinition]:
"""获取所有 ODS 任务"""
return [t for t in self._tasks.values() if t.is_ods]
def get_fact_ods_tasks(self) -> List[TaskDefinition]:
"""获取事实类 ODS 任务(需要时间窗口)"""
return [t for t in self._tasks.values() if t.is_ods and not t.is_dimension]
def get_dimension_ods_tasks(self) -> List[TaskDefinition]:
"""获取维度类 ODS 任务"""
return [t for t in self._tasks.values() if t.is_ods and t.is_dimension]
def get_tasks_by_domain(self, domain: BusinessDomain) -> List[TaskDefinition]:
"""按业务域获取任务"""
return [t for t in self._tasks.values() if t.domain == domain]
def get_ods_tasks_grouped(self) -> Dict[BusinessDomain, List[TaskDefinition]]:
"""获取按业务域分组的 ODS 任务"""
grouped: Dict[BusinessDomain, List[TaskDefinition]] = {}
for task in self.get_ods_tasks():
if task.domain not in grouped:
grouped[task.domain] = []
grouped[task.domain].append(task)
return grouped
def get_non_ods_tasks(self) -> List[TaskDefinition]:
"""获取非 ODS 任务"""
return [t for t in self._tasks.values() if not t.is_ods]
# 全局注册表实例
task_registry = TaskRegistry()
# 便捷函数
def get_ods_task_codes() -> List[str]:
"""获取所有 ODS 任务编码"""
return [t.code for t in task_registry.get_ods_tasks()]
def get_fact_ods_task_codes() -> List[str]:
"""获取事实类 ODS 任务编码"""
return [t.code for t in task_registry.get_fact_ods_tasks()]
def get_dimension_ods_task_codes() -> List[str]:
"""获取维度类 ODS 任务编码"""
return [t.code for t in task_registry.get_dimension_ods_tasks()]
def get_all_task_tuples() -> List[Tuple[str, str, str]]:
"""获取所有任务的 (code, name, description) 元组列表"""
return [(t.code, t.name, t.description) for t in task_registry.get_all_tasks()]
def get_ods_tasks_for_ui() -> List[Tuple[str, str, BusinessDomain]]:
"""获取 ODS 任务列表供 UI 使用:(code, display_name, domain)"""
return [(t.code, t.name, t.domain) for t in task_registry.get_ods_tasks()]

View File

@@ -7,6 +7,7 @@ from .log_viewer import LogViewer
from .db_viewer import DBViewer
from .status_panel import StatusPanel
from .task_manager import TaskManager
from .task_selector import TaskSelectorWidget, CompactTaskSelector
__all__ = [
"TaskPanel",
@@ -15,4 +16,6 @@ __all__ = [
"DBViewer",
"StatusPanel",
"TaskManager",
"TaskSelectorWidget",
"CompactTaskSelector",
]

View File

@@ -26,28 +26,45 @@ from ..utils.app_settings import app_settings
from ..workers.task_worker import TaskWorker
# 可调度的任务列表(包含所有 ODS 任务 + DWD/质量检查任务)
SCHEDULABLE_TASKS = [
# ODS 数据抓取任务(与 task_panel.AUTO_UPDATE_TASKS 保持一致)
("ODS_PAYMENT", "支付流水"),
("ODS_MEMBER", "会员档案"),
("ODS_MEMBER_CARD", "会员储值卡"),
("ODS_MEMBER_BALANCE", "会员余额变动"),
("ODS_SETTLEMENT_RECORDS", "结账记录"),
("ODS_TABLE_USE", "台费计费流水"),
("ODS_ASSISTANT_ACCOUNT", "助教账号"),
("ODS_ASSISTANT_LEDGER", "助教流水"),
("ODS_ASSISTANT_ABOLISH", "助教作废"),
("ODS_REFUND", "退款流水"),
("ODS_PLATFORM_COUPON", "平台券核销"),
("ODS_RECHARGE_SETTLE", "充值结算"),
("ODS_SETTLEMENT_TICKET", "结账小票"),
# DWD 和质量检查任务
("DWD_LOAD_FROM_ODS", "ODS→DWD 装载"),
("DWD_QUALITY_CHECK", "DWD 质量检查"),
("DATA_INTEGRITY_CHECK", "数据完整性检查"),
("CHECK_CUTOFF", "检查 Cutoff"),
]
# 动态获取可调度的任务列表
def _get_schedulable_tasks():
"""从任务注册表动态获取可调度任务列表"""
try:
from ..models.task_registry import task_registry
tasks = []
# 添加所有 ODS 任务
for task_def in task_registry.get_ods_tasks():
tasks.append((task_def.code, task_def.name))
# 添加非 ODS 任务(排除 Schema 初始化和手工灌入)
exclude_codes = {"INIT_ODS_SCHEMA", "INIT_DWD_SCHEMA", "INIT_DWS_SCHEMA", "MANUAL_INGEST"}
for task_def in task_registry.get_non_ods_tasks():
if task_def.code not in exclude_codes:
tasks.append((task_def.code, task_def.name))
return tasks
except ImportError:
# 回退到静态列表
return [
("ODS_PAYMENT", "支付流水"),
("ODS_MEMBER", "会员档案"),
("ODS_MEMBER_CARD", "会员储值卡"),
("ODS_MEMBER_BALANCE", "会员余额变动"),
("ODS_SETTLEMENT_RECORDS", "结账记录"),
("ODS_TABLE_USE", "台费计费流水"),
("ODS_ASSISTANT_ACCOUNT", "助教账号"),
("ODS_ASSISTANT_LEDGER", "助教流水"),
("ODS_ASSISTANT_ABOLISH", "助教作废"),
("ODS_REFUND", "退款流水"),
("ODS_PLATFORM_COUPON", "平台券核销"),
("ODS_RECHARGE_SETTLE", "充值结算"),
("ODS_SETTLEMENT_TICKET", "结账小票"),
("DWD_LOAD_FROM_ODS", "ODS→DWD 装载"),
("DWD_QUALITY_CHECK", "DWD 质量检查"),
("DATA_INTEGRITY_CHECK", "数据完整性检查"),
("CHECK_CUTOFF", "检查 Cutoff"),
]
SCHEDULABLE_TASKS = _get_schedulable_tasks()
class TaskLogDialog(QDialog):
@@ -1584,6 +1601,7 @@ class TaskManager(QWidget):
# 统计关键数据
total_inserted = 0
total_updated = 0
total_missing = 0
total_records = 0
@@ -1596,11 +1614,30 @@ class TaskManager(QWidget):
import json
stats_str = match.group(1).replace("'", '"')
stats = json.loads(stats_str)
if 'tables' in stats:
for tbl in stats['tables']:
inserted = tbl.get('inserted', 0)
processed = tbl.get('processed', 0)
total_inserted += inserted + processed
inserted = int(tbl.get('inserted', 0) or 0)
updated = int(tbl.get('updated', 0) or 0)
processed = int(tbl.get('processed', 0) or 0)
has_new_counts = ('inserted' in tbl) or ('updated' in tbl)
if has_new_counts:
total_inserted += inserted
total_updated += updated
else:
total_inserted += inserted + processed
except Exception:
pass
@@ -1622,8 +1659,11 @@ class TaskManager(QWidget):
total_records += int(match.group(1))
# 构建摘要
if total_inserted > 0:
summary_parts.append(f"处理 {total_inserted}")
if total_inserted > 0 or total_updated > 0:
if total_updated > 0:
summary_parts.append(f"?? {total_inserted} ?, ?? {total_updated} ?")
else:
summary_parts.append(f"?? {total_inserted} ?")
if total_records > 0:
if total_missing > 0:

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,398 @@
# -*- coding: utf-8 -*-
"""可复用的 ODS 任务选择组件:按业务域分组显示,支持全选/反选。"""
from typing import Dict, List, Optional, Set
from PySide6.QtWidgets import (
QWidget, QVBoxLayout, QHBoxLayout, QGroupBox,
QCheckBox, QPushButton, QScrollArea, QFrame,
QLabel, QSizePolicy
)
from PySide6.QtCore import Signal, Qt
from ..models.task_registry import (
TaskRegistry, TaskDefinition, BusinessDomain, DOMAIN_LABELS,
task_registry, get_fact_ods_task_codes, get_dimension_ods_task_codes
)
class TaskSelectorWidget(QWidget):
"""ODS 任务选择组件:按业务域分组显示"""
# 选择变化信号
selection_changed = Signal(list) # 选中的任务编码列表
def __init__(
self,
parent: Optional[QWidget] = None,
show_dimensions: bool = True,
show_facts: bool = True,
default_select_facts: bool = True,
default_select_dimensions: bool = False,
compact: bool = False,
max_height: int = 0,
):
"""
初始化任务选择器
Args:
parent: 父组件
show_dimensions: 是否显示维度类任务
show_facts: 是否显示事实类任务
default_select_facts: 默认选中事实类任务
default_select_dimensions: 默认选中维度类任务
compact: 紧凑模式(更小的间距)
max_height: 最大高度0 表示不限制)
"""
super().__init__(parent)
self.show_dimensions = show_dimensions
self.show_facts = show_facts
self.default_select_facts = default_select_facts
self.default_select_dimensions = default_select_dimensions
self.compact = compact
self.max_height = max_height
# 任务复选框映射code -> QCheckBox
self._checkboxes: Dict[str, QCheckBox] = {}
# 业务域分组框映射domain -> QGroupBox
self._domain_groups: Dict[BusinessDomain, QGroupBox] = {}
self._init_ui()
self._apply_default_selection()
def _init_ui(self):
"""初始化界面"""
layout = QVBoxLayout(self)
layout.setContentsMargins(0, 0, 0, 0)
spacing = 4 if self.compact else 8
layout.setSpacing(spacing)
# 顶部工具栏
toolbar = QHBoxLayout()
toolbar.setSpacing(8)
self.select_all_btn = QPushButton("全选")
self.select_all_btn.setProperty("secondary", True)
self.select_all_btn.setFixedWidth(60)
self.select_all_btn.clicked.connect(self._select_all)
toolbar.addWidget(self.select_all_btn)
self.deselect_all_btn = QPushButton("全不选")
self.deselect_all_btn.setProperty("secondary", True)
self.deselect_all_btn.setFixedWidth(60)
self.deselect_all_btn.clicked.connect(self._deselect_all)
toolbar.addWidget(self.deselect_all_btn)
self.select_facts_btn = QPushButton("选事实表")
self.select_facts_btn.setProperty("secondary", True)
self.select_facts_btn.setFixedWidth(70)
self.select_facts_btn.setToolTip("选中所有事实类任务(需要时间窗口的任务)")
self.select_facts_btn.clicked.connect(self._select_facts_only)
toolbar.addWidget(self.select_facts_btn)
toolbar.addStretch()
self.selected_count_label = QLabel("已选: 0")
self.selected_count_label.setProperty("subheading", True)
toolbar.addWidget(self.selected_count_label)
layout.addLayout(toolbar)
# 滚动区域
scroll_area = QScrollArea()
scroll_area.setWidgetResizable(True)
scroll_area.setFrameShape(QFrame.NoFrame)
if self.max_height > 0:
scroll_area.setMaximumHeight(self.max_height)
# 内容容器
content_widget = QWidget()
content_layout = QVBoxLayout(content_widget)
content_layout.setContentsMargins(0, 0, 0, 0)
content_layout.setSpacing(spacing)
# 按业务域分组创建复选框
grouped_tasks = task_registry.get_ods_tasks_grouped()
# 定义业务域显示顺序
domain_order = [
BusinessDomain.MEMBER,
BusinessDomain.SETTLEMENT,
BusinessDomain.ASSISTANT,
BusinessDomain.GOODS,
BusinessDomain.TABLE,
BusinessDomain.PROMOTION,
BusinessDomain.INVENTORY,
]
for domain in domain_order:
if domain not in grouped_tasks:
continue
tasks = grouped_tasks[domain]
# 过滤任务
filtered_tasks = []
for task in tasks:
if task.is_dimension and not self.show_dimensions:
continue
if not task.is_dimension and not self.show_facts:
continue
filtered_tasks.append(task)
if not filtered_tasks:
continue
# 创建业务域分组
group_box = self._create_domain_group(domain, filtered_tasks)
self._domain_groups[domain] = group_box
content_layout.addWidget(group_box)
content_layout.addStretch()
scroll_area.setWidget(content_widget)
layout.addWidget(scroll_area, 1)
def _create_domain_group(self, domain: BusinessDomain, tasks: List[TaskDefinition]) -> QGroupBox:
"""创建业务域分组框"""
group_box = QGroupBox(DOMAIN_LABELS.get(domain, str(domain.value)))
group_layout = QVBoxLayout(group_box)
group_layout.setContentsMargins(8, 4, 8, 4)
group_layout.setSpacing(2)
for task in tasks:
checkbox = QCheckBox(f"{task.name}")
checkbox.setToolTip(f"{task.code}: {task.description}")
checkbox.setProperty("task_code", task.code)
checkbox.setProperty("is_dimension", task.is_dimension)
checkbox.stateChanged.connect(self._on_selection_changed)
self._checkboxes[task.code] = checkbox
group_layout.addWidget(checkbox)
return group_box
def _apply_default_selection(self):
"""应用默认选择"""
for code, checkbox in self._checkboxes.items():
is_dimension = checkbox.property("is_dimension")
if is_dimension:
checkbox.setChecked(self.default_select_dimensions)
else:
checkbox.setChecked(self.default_select_facts)
self._update_count_label()
def _on_selection_changed(self):
"""选择变化时"""
self._update_count_label()
self.selection_changed.emit(self.get_selected_codes())
def _update_count_label(self):
"""更新选中计数标签"""
count = len(self.get_selected_codes())
total = len(self._checkboxes)
self.selected_count_label.setText(f"已选: {count}/{total}")
def _select_all(self):
"""全选"""
for checkbox in self._checkboxes.values():
checkbox.blockSignals(True)
checkbox.setChecked(True)
checkbox.blockSignals(False)
self._on_selection_changed()
def _deselect_all(self):
"""全不选"""
for checkbox in self._checkboxes.values():
checkbox.blockSignals(True)
checkbox.setChecked(False)
checkbox.blockSignals(False)
self._on_selection_changed()
def _select_facts_only(self):
"""只选事实表任务"""
for code, checkbox in self._checkboxes.items():
checkbox.blockSignals(True)
is_dimension = checkbox.property("is_dimension")
checkbox.setChecked(not is_dimension)
checkbox.blockSignals(False)
self._on_selection_changed()
def get_selected_codes(self) -> List[str]:
"""获取选中的任务编码列表"""
selected = []
for code, checkbox in self._checkboxes.items():
if checkbox.isChecked():
selected.append(code)
return selected
def set_selected_codes(self, codes: List[str]):
"""设置选中的任务编码"""
codes_set = set(codes)
for code, checkbox in self._checkboxes.items():
checkbox.blockSignals(True)
checkbox.setChecked(code in codes_set)
checkbox.blockSignals(False)
self._on_selection_changed()
def get_all_codes(self) -> List[str]:
"""获取所有任务编码"""
return list(self._checkboxes.keys())
def is_any_selected(self) -> bool:
"""是否有任何任务被选中"""
return len(self.get_selected_codes()) > 0
class CompactTaskSelector(QWidget):
"""紧凑型任务选择器:单行显示业务域,点击展开选择"""
selection_changed = Signal(list)
def __init__(
self,
parent: Optional[QWidget] = None,
show_dimensions: bool = True,
show_facts: bool = True,
default_select_facts: bool = True,
default_select_dimensions: bool = False,
):
super().__init__(parent)
self.show_dimensions = show_dimensions
self.show_facts = show_facts
self.default_select_facts = default_select_facts
self.default_select_dimensions = default_select_dimensions
# 业务域复选框
self._domain_checkboxes: Dict[BusinessDomain, QCheckBox] = {}
# 业务域下的任务编码
self._domain_tasks: Dict[BusinessDomain, List[str]] = {}
self._init_ui()
self._apply_default_selection()
def _init_ui(self):
"""初始化界面"""
layout = QVBoxLayout(self)
layout.setContentsMargins(0, 0, 0, 0)
layout.setSpacing(4)
# 工具栏
toolbar = QHBoxLayout()
toolbar.setSpacing(8)
self.select_all_btn = QPushButton("全选")
self.select_all_btn.setProperty("secondary", True)
self.select_all_btn.setFixedWidth(50)
self.select_all_btn.clicked.connect(self._select_all)
toolbar.addWidget(self.select_all_btn)
self.deselect_all_btn = QPushButton("清空")
self.deselect_all_btn.setProperty("secondary", True)
self.deselect_all_btn.setFixedWidth(50)
self.deselect_all_btn.clicked.connect(self._deselect_all)
toolbar.addWidget(self.deselect_all_btn)
toolbar.addStretch()
self.count_label = QLabel("已选: 0")
self.count_label.setProperty("subheading", True)
toolbar.addWidget(self.count_label)
layout.addLayout(toolbar)
# 业务域复选框(横向排列)
domains_layout = QHBoxLayout()
domains_layout.setSpacing(12)
grouped_tasks = task_registry.get_ods_tasks_grouped()
domain_order = [
BusinessDomain.MEMBER,
BusinessDomain.SETTLEMENT,
BusinessDomain.ASSISTANT,
BusinessDomain.GOODS,
BusinessDomain.TABLE,
BusinessDomain.PROMOTION,
BusinessDomain.INVENTORY,
]
for domain in domain_order:
if domain not in grouped_tasks:
continue
tasks = grouped_tasks[domain]
# 过滤任务
task_codes = []
for task in tasks:
if task.is_dimension and not self.show_dimensions:
continue
if not task.is_dimension and not self.show_facts:
continue
task_codes.append(task.code)
if not task_codes:
continue
self._domain_tasks[domain] = task_codes
checkbox = QCheckBox(DOMAIN_LABELS.get(domain, str(domain.value)))
checkbox.setToolTip(f"包含: {', '.join(task_codes)}")
checkbox.stateChanged.connect(self._on_selection_changed)
self._domain_checkboxes[domain] = checkbox
domains_layout.addWidget(checkbox)
domains_layout.addStretch()
layout.addLayout(domains_layout)
def _apply_default_selection(self):
"""应用默认选择"""
# 默认选中所有业务域
for domain, checkbox in self._domain_checkboxes.items():
checkbox.setChecked(True)
self._update_count_label()
def _on_selection_changed(self):
"""选择变化时"""
self._update_count_label()
self.selection_changed.emit(self.get_selected_codes())
def _update_count_label(self):
"""更新计数标签"""
count = len(self.get_selected_codes())
self.count_label.setText(f"已选: {count} 个任务")
def _select_all(self):
"""全选所有业务域"""
for checkbox in self._domain_checkboxes.values():
checkbox.blockSignals(True)
checkbox.setChecked(True)
checkbox.blockSignals(False)
self._on_selection_changed()
def _deselect_all(self):
"""取消全选"""
for checkbox in self._domain_checkboxes.values():
checkbox.blockSignals(True)
checkbox.setChecked(False)
checkbox.blockSignals(False)
self._on_selection_changed()
def get_selected_codes(self) -> List[str]:
"""获取选中的任务编码"""
selected = []
for domain, checkbox in self._domain_checkboxes.items():
if checkbox.isChecked():
selected.extend(self._domain_tasks.get(domain, []))
return selected
def set_selected_domains(self, domains: List[BusinessDomain]):
"""设置选中的业务域"""
domains_set = set(domains)
for domain, checkbox in self._domain_checkboxes.items():
checkbox.blockSignals(True)
checkbox.setChecked(domain in domains_set)
checkbox.blockSignals(False)
self._on_selection_changed()
def is_any_selected(self) -> bool:
"""是否有任何任务被选中"""
return len(self.get_selected_codes()) > 0

View File

@@ -189,28 +189,123 @@ class TaskWorker(QThread):
# 解析 DWD 装载统计
if 'tables' in stats:
total_processed = 0
total_inserted = 0
tables_with_data = []
total_dim_inserted = 0
total_dim_updated = 0
total_fact_inserted = 0
total_fact_updated = 0
dim_tables = [] # ?????
fact_tables = [] # ?????
for tbl in stats['tables']:
table_name = tbl.get('table', '').replace('billiards_dwd.', '')
processed = tbl.get('processed', 0)
inserted = tbl.get('inserted', 0)
if processed > 0:
total_processed += processed
tables_with_data.append(f"{table_name}({processed})")
elif inserted > 0:
total_inserted += inserted
tables_with_data.append(f"{table_name}(+{inserted})")
mode = tbl.get('mode', '')
processed = int(tbl.get('processed', 0) or 0)
inserted = int(tbl.get('inserted', 0) or 0)
updated = int(tbl.get('updated', 0) or 0)
has_new_counts = ('inserted' in tbl) or ('updated' in tbl)
if total_processed > 0 or total_inserted > 0:
dwd_stats.append(f"处理维度: {total_processed}条, 新增事实: {total_inserted}")
if len(tables_with_data) <= 5:
dwd_stats.append(f"涉及表: {', '.join(tables_with_data)}")
# ?? _ex ?????????
if table_name.endswith('_ex'):
continue
is_dim = table_name.startswith('dim_') or mode == 'SCD2'
if is_dim:
if has_new_counts:
total_dim_inserted += inserted
total_dim_updated += updated
if inserted or updated:
dim_tables.append(f"{table_name}: +{inserted}, ~{updated}")
elif processed > 0:
total_dim_updated += processed
dim_tables.append(f"{table_name}: {processed}")
else:
dwd_stats.append(f"涉及 {len(tables_with_data)} 张表")
if has_new_counts:
total_fact_inserted += inserted
total_fact_updated += updated
if inserted or updated:
fact_tables.append(f"{table_name}: +{inserted}, ~{updated}")
elif processed > 0 or inserted > 0:
total_fact_inserted += inserted
if inserted > 0:
fact_tables.append(f"{table_name}: +{inserted}")
if (total_dim_inserted or total_dim_updated or total_fact_inserted or total_fact_updated):
dwd_stats.append(
f"????: {total_dim_inserted}?, ????: {total_dim_updated}?, "
f"????: {total_fact_inserted}?, ????: {total_fact_updated}?"
)
# ???????
if dim_tables:
dwd_stats.append(" ???: " + ", ".join(dim_tables))
# ???????
if fact_tables:
dwd_stats.append(" ???: " + ", ".join(fact_tables))
# 解析错误信息
if 'errors' in stats and stats['errors']:
for err in stats['errors']:
err_table = err.get('table', '').replace('billiards_dwd.', '')
err_msg = err.get('error', '')
errors.append(f"{err_table}: {err_msg}")
except Exception:
pass
continue
@@ -263,7 +358,9 @@ class TaskWorker(QThread):
summary_parts[-1] += f"{len(ods_stats)}"
if dwd_stats:
summary_parts.append("【DWD 装载】" + "; ".join(dwd_stats))
summary_parts.append("【DWD 装载】" + dwd_stats[0]) # 第一行是汇总
for detail in dwd_stats[1:]: # 后面是详情
summary_parts.append(detail)
if integrity_stats:
total_missing = integrity_stats.get('final_missing', integrity_stats.get('total_missing', 0))

View File

@@ -357,8 +357,20 @@ class ETLScheduler:
try:
# 创建任务实例(不需要 API client使用 None
api_client = None
if task_code == "ODS_JSON_ARCHIVE":
run_id = int(datetime.now(self.tz).timestamp())
fetch_dir = self._build_fetch_dir(task_code, run_id)
api_client = RecordingAPIClient(
base_client=self.api_client,
output_dir=fetch_dir,
task_code=task_code,
run_id=run_id,
write_pretty=self.write_pretty_json,
)
task = self.task_registry.create_task(
task_code, self.config, self.db_ops, None, self.logger
task_code, self.config, self.db_ops, api_client, self.logger
)
# 执行任务(工具类任务通常不需要 cursor_data

File diff suppressed because it is too large Load Diff

View File

@@ -28,7 +28,7 @@ PROJECT_ROOT = Path(__file__).resolve().parents[1]
if str(PROJECT_ROOT) not in sys.path:
sys.path.insert(0, str(PROJECT_ROOT))
from api.client import APIClient
from api.recording_client import build_recording_client
from config.settings import AppConfig
from database.connection import DatabaseConnection
from models.parsers import TypeParser
@@ -211,13 +211,7 @@ class MissingDataBackfiller:
self.store_id = int(cfg.get("app.store_id") or 0)
# API 客户端
self.api = APIClient(
base_url=cfg["api"]["base_url"],
token=cfg["api"]["token"],
timeout=int(cfg["api"].get("timeout_sec") or 20),
retry_max=int(cfg["api"].get("retries", {}).get("max_attempts") or 3),
headers_extra=cfg["api"].get("headers_extra") or {},
)
self.api = build_recording_client(cfg, task_code="BACKFILL_MISSING_DATA")
# 数据库连接DatabaseConnection 构造时已设置 autocommit=False
self.db = DatabaseConnection(dsn=cfg["db"]["dsn"], session=cfg["db"].get("session"))

View File

@@ -29,7 +29,7 @@ PROJECT_ROOT = Path(__file__).resolve().parents[1]
if str(PROJECT_ROOT) not in sys.path:
sys.path.insert(0, str(PROJECT_ROOT))
from api.client import APIClient
from api.recording_client import build_recording_client
from config.settings import AppConfig
from database.connection import DatabaseConnection
from models.parsers import TypeParser
@@ -800,13 +800,8 @@ def run_gap_check(
if cutoff:
logger.info("CUTOFF=%s overlap_hours=%s", cutoff.isoformat(), cutoff_overlap_hours)
client = APIClient(
base_url=cfg["api"]["base_url"],
token=cfg["api"]["token"],
timeout=int(cfg["api"].get("timeout_sec") or 20),
retry_max=int(cfg["api"].get("retries", {}).get("max_attempts") or 3),
headers_extra=cfg["api"].get("headers_extra") or {},
)
tag_suffix = f"_{args.tag}" if args.tag else ""
client = build_recording_client(cfg, task_code=f"ODS_GAP_CHECK{tag_suffix}")
db_state = _init_db_state(cfg)
try:

View File

@@ -522,9 +522,9 @@ class DwdLoadTask(BaseTask):
continue
if self._table_base(dwd_table).startswith("dim_"):
processed = self._merge_dim(cur, dwd_table, ods_table, dwd_cols, ods_cols, now)
dim_counts = self._merge_dim(cur, dwd_table, ods_table, dwd_cols, ods_cols, now)
self.db.conn.commit()
summary.append({"table": dwd_table, "mode": "SCD2", "processed": processed})
summary.append({"table": dwd_table, "mode": "SCD2", **dim_counts})
else:
dwd_types = self._get_column_types(cur, dwd_table, "billiards_dwd")
ods_types = self._get_column_types(cur, ods_table, "billiards_ods")
@@ -532,7 +532,7 @@ class DwdLoadTask(BaseTask):
self.config.get("run.window_override.start")
and self.config.get("run.window_override.end")
)
inserted = self._merge_fact_increment(
fact_counts = self._merge_fact_increment(
cur,
dwd_table,
ods_table,
@@ -544,7 +544,7 @@ class DwdLoadTask(BaseTask):
window_end=context.window_end if use_window else None,
)
self.db.conn.commit()
summary.append({"table": dwd_table, "mode": "INCREMENT", "inserted": inserted})
summary.append({"table": dwd_table, "mode": "INCREMENT", **fact_counts})
elapsed = time.monotonic() - started
self.logger.info("DWD 装载完成:%s,用时 %.2fs", dwd_table, elapsed)
@@ -675,7 +675,7 @@ class DwdLoadTask(BaseTask):
dwd_cols: Sequence[str],
ods_cols: Sequence[str],
now: datetime,
) -> int:
) -> Dict[str, int]:
"""
维表合并策略:
- 若主键包含 scd2 列(如 scd2_start_time/scd2_version执行真正的 SCD2关闭旧版+插入新版)。
@@ -699,8 +699,8 @@ class DwdLoadTask(BaseTask):
ods_cols: Sequence[str],
pk_cols: Sequence[str],
now: datetime,
) -> int:
"""维表 Type1 Upsert主键冲突则更新兼容带 scd2 字段但主键不支持多版本的表"""
) -> Dict[str, int]:
"""维表 Type1 Upsert主键冲突则更新返回真实新增/更新计数"""
mapping = self._build_column_mapping(dwd_table, pk_cols, ods_cols)
ods_set = {c.lower() for c in ods_cols}
ods_table_sql = self._format_table(ods_table, "billiards_ods")
@@ -731,7 +731,7 @@ class DwdLoadTask(BaseTask):
added.add(lc)
if not select_exprs:
return 0
return {"processed": 0, "inserted": 0, "updated": 0, "skipped": 0}
order_col = self._pick_snapshot_order_column(ods_cols)
business_keys = self._strip_scd2_keys(pk_cols)
@@ -768,7 +768,7 @@ class DwdLoadTask(BaseTask):
src_rows.append(row)
if not src_rows:
return 0
return {"processed": 0, "inserted": 0, "updated": 0, "skipped": 0}
dwd_table_sql = self._format_table(dwd_table, "billiards_dwd")
sorted_cols = [c.lower() for c in sorted(dwd_cols)]
@@ -802,12 +802,19 @@ class DwdLoadTask(BaseTask):
else:
set_exprs.append(f'\"{c}\" = EXCLUDED.\"{c}\"')
compare_cols = [c for c in sorted_cols if c not in pk_lower_set and c not in self.SCD_COLS]
diff_exprs = [f'{dwd_table_sql}."{c}" IS DISTINCT FROM EXCLUDED."{c}"' for c in compare_cols]
where_clause = f" WHERE {' OR '.join(diff_exprs)}" if diff_exprs else ""
upsert_sql = (
f"INSERT INTO {dwd_table_sql} ({insert_cols_sql}) VALUES %s "
f"ON CONFLICT ({pk_sql}) DO UPDATE SET {', '.join(set_exprs)}"
f"ON CONFLICT ({pk_sql}) DO UPDATE SET {', '.join(set_exprs)}{where_clause} "
f"RETURNING (xmax = 0) AS inserted"
)
execute_values(cur, upsert_sql, [build_row(r) for r in src_rows], page_size=500)
return len(src_rows)
rows = execute_values(cur, upsert_sql, [build_row(r) for r in src_rows], page_size=500, fetch=True)
inserted, updated = self._count_returning_flags(rows or [])
processed = len(src_rows)
skipped = max(0, processed - inserted - updated)
return {"processed": processed, "inserted": inserted, "updated": updated, "skipped": skipped}
def _merge_dim_scd2(
self,
@@ -817,7 +824,7 @@ class DwdLoadTask(BaseTask):
dwd_cols: Sequence[str],
ods_cols: Sequence[str],
now: datetime,
) -> int:
) -> Dict[str, int]:
"""对维表执行 SCD2 合并:对比变更关闭旧版并插入新版。"""
pk_cols = self._get_primary_keys(cur, dwd_table)
if not pk_cols:
@@ -860,7 +867,7 @@ class DwdLoadTask(BaseTask):
added.add(lc)
if not select_exprs:
return 0
return {"processed": 0, "inserted": 0, "updated": 0, "skipped": 0}
order_col = self._pick_snapshot_order_column(ods_cols)
key_exprs: list[str] = []
@@ -906,7 +913,7 @@ class DwdLoadTask(BaseTask):
src_rows_by_pk[pk_key] = mapped_row
if not src_rows_by_pk:
return 0
return {"processed": 0, "inserted": 0, "updated": 0, "skipped": 0}
# 预加载当前版本scd2_is_current=1避免逐行 SELECT 造成大量 round-trip
table_sql_dwd = self._format_table(dwd_table, "billiards_dwd")
@@ -941,7 +948,11 @@ class DwdLoadTask(BaseTask):
if to_insert:
self._insert_dim_rows_bulk(cur, dwd_table, dwd_cols, to_insert, now)
return len(src_rows_by_pk)
processed = len(src_rows_by_pk)
updated = len(to_close)
inserted = max(0, len(to_insert) - updated)
skipped = max(0, processed - inserted - updated)
return {"processed": processed, "inserted": inserted, "updated": updated, "skipped": skipped}
def _close_current_dim_bulk(
self,
@@ -1129,9 +1140,13 @@ class DwdLoadTask(BaseTask):
value = datetime.combine(value, datetime.min.time())
if not isinstance(value, datetime):
return value
if value.tzinfo is None:
return value.replace(tzinfo=self.tz)
return value.astimezone(self.tz)
try:
if value.tzinfo is None:
return value.replace(tzinfo=self.tz)
return value.astimezone(self.tz)
except (OverflowError, OSError):
# 极端日期值(如 9999-12-31无法转换时区直接返回原值
return value
def _looks_numeric(self, value: Any) -> bool:
if isinstance(value, (int, float, Decimal)) and not isinstance(value, bool):
@@ -1184,6 +1199,22 @@ class DwdLoadTask(BaseTask):
return False
return None
@staticmethod
def _count_returning_flags(rows: Iterable[Any]) -> tuple[int, int]:
"""Count inserted vs updated from RETURNING (xmax = 0) rows."""
inserted = 0
updated = 0
for row in rows:
if isinstance(row, dict):
flag = row.get("inserted")
else:
flag = row[0] if row else None
if flag:
inserted += 1
else:
updated += 1
return inserted, updated
def _merge_fact_increment(
self,
cur,
@@ -1195,8 +1226,8 @@ class DwdLoadTask(BaseTask):
ods_types: Dict[str, str],
window_start: datetime | None = None,
window_end: datetime | None = None,
) -> int:
"""事实表按时间增量插入,默认按列名交集写入"""
) -> Dict[str, int]:
"""事实表按时间增量插入,返回真实新增/更新计数"""
mapping_entries = self.FACT_MAPPINGS.get(dwd_table) or []
mapping: Dict[str, tuple[str, str | None]] = {
dst.lower(): (src, cast_type) for dst, src, cast_type in mapping_entries
@@ -1306,18 +1337,31 @@ class DwdLoadTask(BaseTask):
set_exprs = [f'"{c}" = EXCLUDED."{c}"' for c in insert_cols if c.lower() not in pk_lower]
if snapshot_mode or fact_upsert:
if set_exprs:
sql += f" ON CONFLICT ({pk_sql}) DO UPDATE SET {', '.join(set_exprs)}"
compare_cols = [c for c in insert_cols if c.lower() not in pk_lower]
diff_exprs = [f'{dwd_table_sql}."{c}" IS DISTINCT FROM EXCLUDED."{c}"' for c in compare_cols]
where_clause = f" WHERE {' OR '.join(diff_exprs)}" if diff_exprs else ""
sql += f" ON CONFLICT ({pk_sql}) DO UPDATE SET {', '.join(set_exprs)}{where_clause}"
else:
sql += f" ON CONFLICT ({pk_sql}) DO NOTHING"
else:
sql += f" ON CONFLICT ({pk_sql}) DO NOTHING"
sql += " RETURNING (xmax = 0) AS inserted"
cur.execute(sql, params)
inserted = cur.rowcount
inserted = 0
updated = 0
while True:
rows = cur.fetchmany(10000)
if not rows:
break
ins, upd = self._count_returning_flags(rows)
inserted += ins
updated += upd
# 回补缺失主键记录处理历史回补导致的“create_time 水位”遗漏)
if dwd_table.lower() in self.FACT_MISSING_FILL_TABLES:
inserted += self._insert_missing_by_pk(
missing_inserted = self._insert_missing_by_pk(
cur,
dwd_table,
ods_table,
@@ -1328,8 +1372,9 @@ class DwdLoadTask(BaseTask):
dwd_types,
ods_types,
)
inserted += missing_inserted
return inserted
return {"inserted": inserted, "updated": updated, "processed": inserted + updated}
def _pick_order_column(self, dwd_table: str, dwd_cols: Iterable[str], ods_cols: Iterable[str]) -> str | None:
"""Pick an incremental order column that exists in both DWD and ODS."""
lower_cols = {c.lower() for c in dwd_cols} & {c.lower() for c in ods_cols}

View File

@@ -177,11 +177,12 @@ class BaseOdsTask(BaseTask):
def _resolve_window(self, cursor_data: dict | None) -> tuple[datetime, datetime, int]:
base_start, base_end, base_minutes = self._get_time_window(cursor_data)
if self.config.get("run.force_window_override"):
override_start = self.config.get("run.window_override.start")
override_end = self.config.get("run.window_override.end")
if override_start and override_end:
return base_start, base_end, base_minutes
# 如果用户显式指定了窗口(window_override.start/end),则直接使用,不走 MAX(fetched_at) 兜底
override_start = self.config.get("run.window_override.start")
override_end = self.config.get("run.window_override.end")
if override_start and override_end:
# 用户明确指定了窗口,尊重用户选择
return base_start, base_end, base_minutes
# 以 ODS 表 MAX(fetched_at) 兜底:避免“窗口游标推进但未实际入库”导致漏数。
last_fetched = self._get_max_fetched_at(self.SPEC.table_name)

1184
tmp/py_inventory.md Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,31 @@
import psycopg2
import sys
sys.stdout.reconfigure(encoding='utf-8')
dsn = 'postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test'
conn = psycopg2.connect(dsn)
cur = conn.cursor()
# 查询 skill_id 和 skill_name 的对应关系及数量
cur.execute("""
SELECT
skill_id,
skill_name,
COUNT(*) as count
FROM billiards_dwd.dwd_assistant_service_log
GROUP BY skill_id, skill_name
ORDER BY skill_id, count DESC
""")
results = cur.fetchall()
print("| skill_id | skill_name | 记录数 |")
print("|----------|------------|--------|")
for row in results:
skill_id, skill_name, count = row
name = skill_name if skill_name else "(NULL)"
print(f"| {skill_id} | {name} | {count} |")
print(f"\n{len(results)} 种组合")
conn.close()

85
tmp/task_inventory.md Normal file
View File

@@ -0,0 +1,85 @@
# Task Inventory
## Registered Tasks (from task_registry.py)
| Task Code | Class |
|---|---|
| PRODUCTS | ProductsTask |
| TABLES | TablesTask |
| MEMBERS | MembersTask |
| ASSISTANTS | AssistantsTask |
| PACKAGES_DEF | PackagesDefTask |
| ORDERS | OrdersTask |
| PAYMENTS | PaymentsTask |
| REFUNDS | RefundsTask |
| COUPON_USAGE | CouponUsageTask |
| INVENTORY_CHANGE | InventoryChangeTask |
| TOPUPS | TopupsTask |
| TABLE_DISCOUNT | TableDiscountTask |
| ASSISTANT_ABOLISH | AssistantAbolishTask |
| LEDGER | LedgerTask |
| TICKET_DWD | TicketDwdTask |
| MANUAL_INGEST | ManualIngestTask |
| PAYMENTS_DWD | PaymentsDwdTask |
| MEMBERS_DWD | MembersDwdTask |
| INIT_ODS_SCHEMA | InitOdsSchemaTask |
| INIT_DWD_SCHEMA | InitDwdSchemaTask |
| DWD_LOAD_FROM_ODS | DwdLoadTask |
| DWD_QUALITY_CHECK | DwdQualityTask |
| ODS_JSON_ARCHIVE | OdsJsonArchiveTask |
| CHECK_CUTOFF | CheckCutoffTask |
| DATA_INTEGRITY_CHECK | DataIntegrityTask |
| INIT_DWS_SCHEMA | InitDwsSchemaTask |
| DWS_BUILD_ORDER_SUMMARY | DwsBuildOrderSummaryTask |
## Task Class Details
| Task Code | Class | File | Flow | API Fetch | Key Calls |
|---|---|---|---|---|---|
| | DwdLoadTask | etl_billiards/tasks/dwd_load_task.py | BaseTask.execute (extract/transform/load) | No | isinstance, c.lower, ', '.join, cur.execute, self._format_table, select_exprs.append, values.append, self._cast_expr |
| | DwdQualityTask | etl_billiards/tasks/dwd_quality_task.py | BaseTask.execute (extract/transform/load) | No | cur.execute, self._split_table_name, cur.fetchone, self._get_numeric_amount_columns, Path, self.REPORT_PATH.parent.mkdir, self.REPORT_PATH.write_text, self.logger.info |
| | InitDwdSchemaTask | etl_billiards/tasks/init_dwd_schema_task.py | BaseTask.execute (extract/transform/load) | No | Path, self.config.get, self.logger.info, cur.execute, dwd_path.exists, FileNotFoundError, dwd_path.read_text, self.db.conn.cursor |
| | InitOdsSchemaTask | etl_billiards/tasks/init_schema_task.py | BaseTask.execute (extract/transform/load) | No | Path, self.config.get, self.logger.info, FileNotFoundError, cur.execute, ods_sql_raw.find, ods_sql_raw.splitlines, '\n'.join |
| | ManualIngestTask | etl_billiards/tasks/manual_ingest_task.py | custom execute | No | isinstance, cur.execute, row_vals.append, site_profile.get, merged_rec.get, self.config.get, any, ', '.join |
| ASSISTANTS | AssistantsTask | etl_billiards/tasks/assistants_task.py | BaseTask.execute (extract/transform/load) | Yes | raw.get, TypeParser.parse_timestamp, extracted.get, TypeParser.parse_int, TypeParser.parse_decimal, self._merge_common_params, self.api.get_paginated, AssistantLoader |
| ASSISTANT_ABOLISH | AssistantAbolishTask | etl_billiards/tasks/assistant_abolish_task.py | BaseTask.execute (extract/transform/load) | Yes | raw.get, TypeParser.parse_int, extracted.get, TypeParser.format_timestamp, self._merge_common_params, self.api.get_paginated, AssistantAbolishLoader, loader.upsert_records |
| CHECK_CUTOFF | CheckCutoffTask | etl_billiards/tasks/check_cutoff_task.py | custom execute | No | r.get, _ts, self.logger.info, self.db.query, row.get, sorted, self.config.get, min |
| COUPON_USAGE | CouponUsageTask | etl_billiards/tasks/coupon_usage_task.py | BaseTask.execute (extract/transform/load) | Yes | raw.get, TypeParser.parse_int, extracted.get, TypeParser.parse_decimal, TypeParser.parse_timestamp, TypeParser.format_timestamp, self._merge_common_params, self.api.get_paginated |
| DWS_BUILD_ORDER_SUMMARY | DwsBuildOrderSummaryTask | etl_billiards/tasks/dws_build_order_summary_task.py | custom execute | No | self.config.get, self.logger.info, cur.execute, load_result.get, delete_args.append, bool, _jsonable_date, _as_date |
| INIT_DWS_SCHEMA | InitDwsSchemaTask | etl_billiards/tasks/init_dws_schema_task.py | BaseTask.execute (extract/transform/load) | No | Path, self.config.get, self.logger.info, cur.execute, bool, dws_path.exists, FileNotFoundError, dws_path.read_text |
| INVENTORY_CHANGE | InventoryChangeTask | etl_billiards/tasks/inventory_change_task.py | BaseTask.execute (extract/transform/load) | Yes | raw.get, TypeParser.parse_int, extracted.get, TypeParser.format_timestamp, self._merge_common_params, self.api.get_paginated, InventoryChangeLoader, loader.upsert_changes |
| LEDGER | LedgerTask | etl_billiards/tasks/ledger_task.py | BaseTask.execute (extract/transform/load) | Yes | raw.get, TypeParser.parse_int, TypeParser.parse_decimal, TypeParser.parse_timestamp, extracted.get, TypeParser.format_timestamp, self._merge_common_params, self.api.get_paginated |
| MEMBERS | MembersTask | etl_billiards/tasks/members_task.py | BaseTask.execute (extract/transform/load) | Yes | raw.get, extracted.get, self._merge_common_params, self.api.get_paginated, MemberLoader, loader.upsert_members, self._parse_member, TypeParser.parse_int |
| ODS_ASSISTANT_ABOLISH | (dynamic) | etl_billiards/tasks/ods_tasks.py | BaseOdsTask.execute | Yes | self.api.iter_paginated, _insert_records_schema_aware |
| ODS_ASSISTANT_ACCOUNT | (dynamic) | etl_billiards/tasks/ods_tasks.py | BaseOdsTask.execute | Yes | self.api.iter_paginated, _insert_records_schema_aware |
| ODS_ASSISTANT_LEDGER | (dynamic) | etl_billiards/tasks/ods_tasks.py | BaseOdsTask.execute | Yes | self.api.iter_paginated, _insert_records_schema_aware |
| ODS_GOODS_CATEGORY | (dynamic) | etl_billiards/tasks/ods_tasks.py | BaseOdsTask.execute | Yes | self.api.iter_paginated, _insert_records_schema_aware |
| ODS_GROUP_BUY_REDEMPTION | (dynamic) | etl_billiards/tasks/ods_tasks.py | BaseOdsTask.execute | Yes | self.api.iter_paginated, _insert_records_schema_aware |
| ODS_GROUP_PACKAGE | (dynamic) | etl_billiards/tasks/ods_tasks.py | BaseOdsTask.execute | Yes | self.api.iter_paginated, _insert_records_schema_aware |
| ODS_INVENTORY_CHANGE | (dynamic) | etl_billiards/tasks/ods_tasks.py | BaseOdsTask.execute | Yes | self.api.iter_paginated, _insert_records_schema_aware |
| ODS_INVENTORY_STOCK | (dynamic) | etl_billiards/tasks/ods_tasks.py | BaseOdsTask.execute | Yes | self.api.iter_paginated, _insert_records_schema_aware |
| ODS_JSON_ARCHIVE | OdsJsonArchiveTask | etl_billiards/tasks/ods_json_archive_task.py | BaseTask.execute (extract/transform/load) | No | EndpointSpec, Path, TypeParser.format_timestamp, self.logger.info, dump_json, self.config.get, endpoint_to_filename, (rec or {}).get |
| ODS_MEMBER | (dynamic) | etl_billiards/tasks/ods_tasks.py | BaseOdsTask.execute | Yes | self.api.iter_paginated, _insert_records_schema_aware |
| ODS_MEMBER_BALANCE | (dynamic) | etl_billiards/tasks/ods_tasks.py | BaseOdsTask.execute | Yes | self.api.iter_paginated, _insert_records_schema_aware |
| ODS_MEMBER_CARD | (dynamic) | etl_billiards/tasks/ods_tasks.py | BaseOdsTask.execute | Yes | self.api.iter_paginated, _insert_records_schema_aware |
| ODS_PAYMENT | (dynamic) | etl_billiards/tasks/ods_tasks.py | BaseOdsTask.execute | Yes | self.api.iter_paginated, _insert_records_schema_aware |
| ODS_PLATFORM_COUPON | (dynamic) | etl_billiards/tasks/ods_tasks.py | BaseOdsTask.execute | Yes | self.api.iter_paginated, _insert_records_schema_aware |
| ODS_RECHARGE_SETTLE | (dynamic) | etl_billiards/tasks/ods_tasks.py | BaseOdsTask.execute | Yes | self.api.iter_paginated, _insert_records_schema_aware |
| ODS_REFUND | (dynamic) | etl_billiards/tasks/ods_tasks.py | BaseOdsTask.execute | Yes | self.api.iter_paginated, _insert_records_schema_aware |
| ODS_SETTLEMENT_RECORDS | (dynamic) | etl_billiards/tasks/ods_tasks.py | BaseOdsTask.execute | Yes | self.api.iter_paginated, _insert_records_schema_aware |
| ODS_SETTLEMENT_TICKET | (dynamic) | etl_billiards/tasks/ods_tasks.py | BaseOdsTask.execute | Yes | self.api.iter_paginated, _insert_records_schema_aware |
| ODS_STORE_GOODS | (dynamic) | etl_billiards/tasks/ods_tasks.py | BaseOdsTask.execute | Yes | self.api.iter_paginated, _insert_records_schema_aware |
| ODS_STORE_GOODS_SALES | (dynamic) | etl_billiards/tasks/ods_tasks.py | BaseOdsTask.execute | Yes | self.api.iter_paginated, _insert_records_schema_aware |
| ODS_TABLES | (dynamic) | etl_billiards/tasks/ods_tasks.py | BaseOdsTask.execute | Yes | self.api.iter_paginated, _insert_records_schema_aware |
| ODS_TABLE_FEE_DISCOUNT | (dynamic) | etl_billiards/tasks/ods_tasks.py | BaseOdsTask.execute | Yes | self.api.iter_paginated, _insert_records_schema_aware |
| ODS_TABLE_USE | (dynamic) | etl_billiards/tasks/ods_tasks.py | BaseOdsTask.execute | Yes | self.api.iter_paginated, _insert_records_schema_aware |
| ODS_TENANT_GOODS | (dynamic) | etl_billiards/tasks/ods_tasks.py | BaseOdsTask.execute | Yes | self.api.iter_paginated, _insert_records_schema_aware |
| ORDERS | OrdersTask | etl_billiards/tasks/orders_task.py | BaseTask.execute (extract/transform/load) | Yes | raw.get, TypeParser.parse_int, TypeParser.parse_decimal, extracted.get, TypeParser.format_timestamp, TypeParser.parse_timestamp, self._merge_common_params, self.api.get_paginated |
| PACKAGES_DEF | PackagesDefTask | etl_billiards/tasks/packages_task.py | BaseTask.execute (extract/transform/load) | Yes | raw.get, TypeParser.parse_int, extracted.get, TypeParser.parse_decimal, TypeParser.parse_timestamp, self._merge_common_params, self.api.get_paginated, PackageDefinitionLoader |
| PAYMENTS | PaymentsTask | etl_billiards/tasks/payments_task.py | BaseTask.execute (extract/transform/load) | Yes | raw.get, TypeParser.parse_int, TypeParser.parse_decimal, extracted.get, TypeParser.format_timestamp, TypeParser.parse_timestamp, self._merge_common_params, self.api.get_paginated |
| PRODUCTS | ProductsTask | etl_billiards/tasks/products_task.py | BaseTask.execute (extract/transform/load) | Yes | raw.get, TypeParser.parse_int, extracted.get, TypeParser.parse_decimal, TypeParser.parse_timestamp, self._merge_common_params, self.api.get_paginated, ProductLoader |
| REFUNDS | RefundsTask | etl_billiards/tasks/refunds_task.py | BaseTask.execute (extract/transform/load) | Yes | raw.get, TypeParser.parse_int, TypeParser.parse_decimal, extracted.get, TypeParser.parse_timestamp, TypeParser.format_timestamp, self._merge_common_params, self.api.get_paginated |
| TABLES | TablesTask | etl_billiards/tasks/tables_task.py | BaseTask.execute (extract/transform/load) | Yes | raw.get, TypeParser.parse_int, extracted.get, self._merge_common_params, self.api.get_paginated, TableLoader, loader.upsert_tables, self._parse_table |
| TABLE_DISCOUNT | TableDiscountTask | etl_billiards/tasks/table_discount_task.py | BaseTask.execute (extract/transform/load) | Yes | raw.get, TypeParser.parse_int, table_profile.get, extracted.get, TypeParser.format_timestamp, self._merge_common_params, self.api.get_paginated, TableDiscountLoader |
| TICKET_DWD | TicketDwdTask | etl_billiards/tasks/ticket_dwd_task.py | custom execute | No | self.logger.info, self.get_task_code, self._get_time_window, build_window_segments, TicketLoader, self.config.get, enumerate, self.iter_ods_rows |
| TOPUPS | TopupsTask | etl_billiards/tasks/topups_task.py | BaseTask.execute (extract/transform/load) | Yes | node.get, TypeParser.parse_decimal, TypeParser.parse_int, extracted.get, raw.get, TypeParser.parse_timestamp, TypeParser.format_timestamp, self._merge_common_params |