更新20260201-1
This commit is contained in:
@@ -32,13 +32,13 @@ SCHEMA_ETL=etl_admin
|
||||
# API 配置
|
||||
# ------------------------------------------------------------------------------
|
||||
API_BASE=https://pc.ficoo.vip/apiprod/admin/v1/
|
||||
API_TOKEN=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnQtdHlwZSI6IjQiLCJ1c2VyLXR5cGUiOiIxIiwiaHR0cDovL3NjaGVtYXMubWljcm9zb2Z0LmNvbS93cy8yMDA4LzA2L2lkZW50aXR5L2NsYWltcy9yb2xlIjoiMTIiLCJyb2xlLWlkIjoiMTIiLCJ0ZW5hbnQtaWQiOiIyNzkwNjgzMTYwNzA5OTU3Iiwibmlja25hbWUiOiLnp5_miLfnrqHnkIblkZjvvJrmganmgakxIiwic2l0ZS1pZCI6IjAiLCJtb2JpbGUiOiIxMzgxMDUwMjMwNCIsInNpZCI6IjI5NTA0ODk2NTgzOTU4NDUiLCJzdGFmZi1pZCI6IjMwMDk5MTg2OTE1NTkwNDUiLCJvcmctaWQiOiIwIiwicm9sZS10eXBlIjoiMyIsInJlZnJlc2hUb2tlbiI6Ik1oKzFpTitjclRHMTY3cUp5SzFXYllteVBaaUhjdDI2ZTZDZkJvd1pxSVk9IiwicmVmcmVzaEV4cGlyeVRpbWUiOiIyMDI2LzIvNyDkuIvljYg5OjU2OjE4IiwibmVlZENoZWNrVG9rZW4iOiJmYWxzZSIsImV4cCI6MTc3MDQ3MjU3OCwiaXNzIjoidGVzdCIsImF1ZCI6IlVzZXIifQ.rY03o82SKznD7NOktXKzTOI1btl2FHsklMCChOlZUeY
|
||||
API_TOKEN=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnQtdHlwZSI6IjQiLCJ1c2VyLXR5cGUiOiIxIiwiaHR0cDovL3NjaGVtYXMubWljcm9zb2Z0LmNvbS93cy8yMDA4LzA2L2lkZW50aXR5L2NsYWltcy9yb2xlIjoiMTIiLCJyb2xlLWlkIjoiMTIiLCJ0ZW5hbnQtaWQiOiIyNzkwNjgzMTYwNzA5OTU3Iiwibmlja25hbWUiOiLnp5_miLfnrqHnkIblkZjvvJrmganmgakxIiwic2l0ZS1pZCI6IjAiLCJtb2JpbGUiOiIxMzgxMDUwMjMwNCIsInNpZCI6IjI5NTA0ODk2NTgzOTU4NDUiLCJzdGFmZi1pZCI6IjMwMDk5MTg2OTE1NTkwNDUiLCJvcmctaWQiOiIwIiwicm9sZS10eXBlIjoiMyIsInJlZnJlc2hUb2tlbiI6IktlbTVsdHRqZ2tSUExOcVA2ajhNakdQYnFrNW5mRzBQNzRvMHE0b295VVE9IiwicmVmcmVzaEV4cGlyeVRpbWUiOiIyMDI2LzIvOCDkuIvljYg2OjU3OjA1IiwibmVlZENoZWNrVG9rZW4iOiJmYWxzZSIsImV4cCI6MTc3MDU0ODIyNSwiaXNzIjoidGVzdCIsImF1ZCI6IlVzZXIifQ.wJlm7pTqUzp769nUGdxx0e1bVMy4x9Prp9U_UMWQvlk
|
||||
|
||||
# API 请求超时(秒)
|
||||
API_TIMEOUT=20
|
||||
|
||||
# 分页大小
|
||||
API_PAGE_SIZE=200
|
||||
API_PAGE_SIZE=200
|
||||
|
||||
# 最大重试次数
|
||||
API_RETRY_MAX=3
|
||||
|
||||
@@ -4,7 +4,9 @@ from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
import time
|
||||
from typing import Any, Iterable, Tuple
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
from api.client import APIClient
|
||||
from api.endpoint_routing import plan_calls
|
||||
@@ -128,3 +130,56 @@ class RecordingAPIClient:
|
||||
"pages": len(pages),
|
||||
"records": total_records,
|
||||
}
|
||||
|
||||
|
||||
def _cfg_get(cfg, key: str, default=None):
|
||||
if isinstance(cfg, dict):
|
||||
cur = cfg
|
||||
for part in key.split("."):
|
||||
if not isinstance(cur, dict) or part not in cur:
|
||||
return default
|
||||
cur = cur[part]
|
||||
return cur
|
||||
getter = getattr(cfg, "get", None)
|
||||
if callable(getter):
|
||||
return getter(key, default)
|
||||
return default
|
||||
|
||||
|
||||
def build_recording_client(
|
||||
cfg,
|
||||
*,
|
||||
task_code: str,
|
||||
output_dir: Path | str | None = None,
|
||||
run_id: int | None = None,
|
||||
write_pretty: bool | None = None,
|
||||
):
|
||||
"""Build RecordingAPIClient from AppConfig or dict config."""
|
||||
base_client = APIClient(
|
||||
base_url=_cfg_get(cfg, "api.base_url") or "",
|
||||
token=_cfg_get(cfg, "api.token"),
|
||||
timeout=int(_cfg_get(cfg, "api.timeout_sec", 20) or 20),
|
||||
retry_max=int(_cfg_get(cfg, "api.retries.max_attempts", 3) or 3),
|
||||
headers_extra=_cfg_get(cfg, "api.headers_extra") or {},
|
||||
)
|
||||
|
||||
if write_pretty is None:
|
||||
write_pretty = bool(_cfg_get(cfg, "io.write_pretty_json", False))
|
||||
|
||||
if run_id is None:
|
||||
run_id = int(time.time())
|
||||
|
||||
if output_dir is None:
|
||||
tz_name = _cfg_get(cfg, "app.timezone", "Asia/Taipei") or "Asia/Taipei"
|
||||
tz = ZoneInfo(tz_name)
|
||||
ts = datetime.now(tz).strftime("%Y%m%d-%H%M%S")
|
||||
fetch_root = _cfg_get(cfg, "pipeline.fetch_root") or _cfg_get(cfg, "io.export_root") or "export/JSON"
|
||||
output_dir = Path(fetch_root) / f"{str(task_code).upper()}-{run_id}-{ts}"
|
||||
|
||||
return RecordingAPIClient(
|
||||
base_client=base_client,
|
||||
output_dir=output_dir,
|
||||
task_code=str(task_code),
|
||||
run_id=int(run_id),
|
||||
write_pretty=bool(write_pretty),
|
||||
)
|
||||
|
||||
@@ -36,8 +36,8 @@
|
||||
| 17 | person_org_id | BIGINT | YES | | 人事组织 ID |
|
||||
| 18 | assistant_level | INTEGER | YES | | 助教等级。**枚举值**: 8=助教管理, 10=初级, 20=中级, 30=高级, 40=星级 |
|
||||
| 19 | level_name | VARCHAR | YES | | 等级名称。**枚举值**: "助教管理", "初级", "中级", "高级", "星级" |
|
||||
| 20 | skill_id | BIGINT | YES | | 技能 ID |
|
||||
| 21 | skill_name | VARCHAR | YES | | 技能名称。**枚举值**: "基础课", "附加课/激励课", "包厢课" |
|
||||
| 20 | skill_id | BIGINT | YES | | 技能 ID **枚举值**: 2790683529513797 = 基础课 , 2790683529513798 = 附加课/激励课, 3039912271463941 = 包厢课 |
|
||||
| 21 | skill_name | VARCHAR | YES | | 技能名称。 **枚举值**: "基础课","附加课","包厢课"|
|
||||
| 22 | ledger_unit_price | NUMERIC(10,2) | YES | | 单价(元/小时),**样本值**: 98.00/108.00/190.00 等 |
|
||||
| 23 | ledger_amount | NUMERIC(10,2) | YES | | 计费金额 |
|
||||
| 24 | projected_income | NUMERIC(10,2) | YES | | 预估收入 |
|
||||
|
||||
@@ -26,7 +26,7 @@ PROJECT_ROOT = Path(__file__).resolve().parents[1]
|
||||
if str(PROJECT_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(PROJECT_ROOT))
|
||||
|
||||
from api.client import APIClient
|
||||
from api.recording_client import build_recording_client
|
||||
from api.endpoint_routing import derive_former_endpoint as derive_former_endpoint_shared
|
||||
from config.settings import AppConfig
|
||||
from models.parsers import TypeParser
|
||||
@@ -265,13 +265,7 @@ def main() -> int:
|
||||
if not cfg["api"].get("token"):
|
||||
raise SystemExit("缺少 api.token(请在 .env 配置 API_TOKEN 或 FICOO_TOKEN)")
|
||||
|
||||
client = APIClient(
|
||||
base_url=cfg["api"]["base_url"],
|
||||
token=cfg["api"]["token"],
|
||||
timeout=int(cfg["api"].get("timeout_sec") or 20),
|
||||
retry_max=int(cfg["api"].get("retries", {}).get("max_attempts") or 3),
|
||||
headers_extra=cfg["api"].get("headers_extra") or {},
|
||||
)
|
||||
client = build_recording_client(cfg, task_code="FETCH_TEST_COMPARE")
|
||||
|
||||
common_params = cfg["api"].get("params", {}) or {}
|
||||
if not isinstance(common_params, dict):
|
||||
|
||||
@@ -5,6 +5,11 @@ from .task_model import TaskItem, TaskStatus, TaskHistory, TaskConfig, QueuedTas
|
||||
from .schedule_model import (
|
||||
ScheduledTask, ScheduleConfig, ScheduleType, IntervalUnit, ScheduleStore
|
||||
)
|
||||
from .task_registry import (
|
||||
TaskRegistry, TaskDefinition, BusinessDomain, DOMAIN_LABELS,
|
||||
task_registry, get_ods_task_codes, get_fact_ods_task_codes,
|
||||
get_dimension_ods_task_codes, get_all_task_tuples
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"TaskItem",
|
||||
@@ -17,4 +22,14 @@ __all__ = [
|
||||
"ScheduleType",
|
||||
"IntervalUnit",
|
||||
"ScheduleStore",
|
||||
# 任务注册表
|
||||
"TaskRegistry",
|
||||
"TaskDefinition",
|
||||
"BusinessDomain",
|
||||
"DOMAIN_LABELS",
|
||||
"task_registry",
|
||||
"get_ods_task_codes",
|
||||
"get_fact_ods_task_codes",
|
||||
"get_dimension_ods_task_codes",
|
||||
"get_all_task_tuples",
|
||||
]
|
||||
|
||||
353
etl_billiards/gui/models/task_registry.py
Normal file
353
etl_billiards/gui/models/task_registry.py
Normal file
@@ -0,0 +1,353 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""任务注册表:定义所有可用任务及其业务域分组。
|
||||
|
||||
从后端 ods_tasks 动态获取任务定义,并按业务域分组,供 UI 使用。
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
# 尝试从后端导入 ODS 任务定义
|
||||
try:
|
||||
from tasks.ods_tasks import ENABLED_ODS_CODES, ODS_TASK_SPECS
|
||||
_HAS_BACKEND = True
|
||||
except ImportError:
|
||||
_HAS_BACKEND = False
|
||||
ENABLED_ODS_CODES = set()
|
||||
ODS_TASK_SPECS = ()
|
||||
|
||||
|
||||
class BusinessDomain(Enum):
|
||||
"""业务域枚举"""
|
||||
MEMBER = "member" # 会员
|
||||
SETTLEMENT = "settlement" # 结算/支付
|
||||
ASSISTANT = "assistant" # 助教
|
||||
GOODS = "goods" # 商品/销售
|
||||
TABLE = "table" # 台桌
|
||||
PROMOTION = "promotion" # 团购/优惠券
|
||||
INVENTORY = "inventory" # 库存
|
||||
SCHEMA = "schema" # Schema 初始化
|
||||
DWD = "dwd" # DWD 装载
|
||||
QUALITY = "quality" # 质量检查
|
||||
OTHER = "other" # 其他
|
||||
|
||||
|
||||
# 业务域显示名称
|
||||
DOMAIN_LABELS: Dict[BusinessDomain, str] = {
|
||||
BusinessDomain.MEMBER: "会员",
|
||||
BusinessDomain.SETTLEMENT: "结算/支付",
|
||||
BusinessDomain.ASSISTANT: "助教",
|
||||
BusinessDomain.GOODS: "商品/销售",
|
||||
BusinessDomain.TABLE: "台桌",
|
||||
BusinessDomain.PROMOTION: "团购/优惠券",
|
||||
BusinessDomain.INVENTORY: "库存",
|
||||
BusinessDomain.SCHEMA: "Schema 初始化",
|
||||
BusinessDomain.DWD: "DWD 装载",
|
||||
BusinessDomain.QUALITY: "质量检查",
|
||||
BusinessDomain.OTHER: "其他",
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class TaskDefinition:
|
||||
"""任务定义"""
|
||||
code: str # 任务编码
|
||||
name: str # 显示名称
|
||||
description: str # 描述
|
||||
domain: BusinessDomain # 业务域
|
||||
requires_window: bool = True # 是否需要时间窗口
|
||||
is_ods: bool = False # 是否为 ODS 任务
|
||||
is_dimension: bool = False # 是否为维度类任务(校验时区分)
|
||||
default_enabled: bool = True # 默认是否选中
|
||||
|
||||
|
||||
# ODS 任务到业务域的映射
|
||||
ODS_DOMAIN_MAP: Dict[str, BusinessDomain] = {
|
||||
# 会员相关
|
||||
"ODS_MEMBER": BusinessDomain.MEMBER,
|
||||
"ODS_MEMBER_CARD": BusinessDomain.MEMBER,
|
||||
"ODS_MEMBER_BALANCE": BusinessDomain.MEMBER,
|
||||
# 结算/支付相关
|
||||
"ODS_PAYMENT": BusinessDomain.SETTLEMENT,
|
||||
"ODS_REFUND": BusinessDomain.SETTLEMENT,
|
||||
"ODS_SETTLEMENT_RECORDS": BusinessDomain.SETTLEMENT,
|
||||
"ODS_RECHARGE_SETTLE": BusinessDomain.SETTLEMENT,
|
||||
"ODS_SETTLEMENT_TICKET": BusinessDomain.SETTLEMENT,
|
||||
# 助教相关
|
||||
"ODS_ASSISTANT_ACCOUNT": BusinessDomain.ASSISTANT,
|
||||
"ODS_ASSISTANT_LEDGER": BusinessDomain.ASSISTANT,
|
||||
"ODS_ASSISTANT_ABOLISH": BusinessDomain.ASSISTANT,
|
||||
# 商品/销售相关
|
||||
"ODS_TENANT_GOODS": BusinessDomain.GOODS,
|
||||
"ODS_STORE_GOODS": BusinessDomain.GOODS,
|
||||
"ODS_STORE_GOODS_SALES": BusinessDomain.GOODS,
|
||||
"ODS_GOODS_CATEGORY": BusinessDomain.GOODS,
|
||||
# 台桌相关
|
||||
"ODS_TABLES": BusinessDomain.TABLE,
|
||||
"ODS_TABLE_USE": BusinessDomain.TABLE,
|
||||
"ODS_TABLE_FEE_DISCOUNT": BusinessDomain.TABLE,
|
||||
# 团购/优惠券相关
|
||||
"ODS_GROUP_PACKAGE": BusinessDomain.PROMOTION,
|
||||
"ODS_GROUP_BUY_REDEMPTION": BusinessDomain.PROMOTION,
|
||||
"ODS_PLATFORM_COUPON": BusinessDomain.PROMOTION,
|
||||
# 库存相关
|
||||
"ODS_INVENTORY_STOCK": BusinessDomain.INVENTORY,
|
||||
"ODS_INVENTORY_CHANGE": BusinessDomain.INVENTORY,
|
||||
}
|
||||
|
||||
# ODS 任务显示名称(中文)
|
||||
ODS_DISPLAY_NAMES: Dict[str, str] = {
|
||||
"ODS_MEMBER": "会员档案",
|
||||
"ODS_MEMBER_CARD": "会员储值卡",
|
||||
"ODS_MEMBER_BALANCE": "会员余额变动",
|
||||
"ODS_PAYMENT": "支付流水",
|
||||
"ODS_REFUND": "退款流水",
|
||||
"ODS_SETTLEMENT_RECORDS": "结账记录",
|
||||
"ODS_RECHARGE_SETTLE": "充值结算",
|
||||
"ODS_SETTLEMENT_TICKET": "结账小票",
|
||||
"ODS_ASSISTANT_ACCOUNT": "助教账号",
|
||||
"ODS_ASSISTANT_LEDGER": "助教流水",
|
||||
"ODS_ASSISTANT_ABOLISH": "助教作废",
|
||||
"ODS_TENANT_GOODS": "租户商品",
|
||||
"ODS_STORE_GOODS": "门店商品",
|
||||
"ODS_STORE_GOODS_SALES": "商品销售流水",
|
||||
"ODS_GOODS_CATEGORY": "商品分类",
|
||||
"ODS_TABLES": "台桌维表",
|
||||
"ODS_TABLE_USE": "台费计费流水",
|
||||
"ODS_TABLE_FEE_DISCOUNT": "台费折扣调账",
|
||||
"ODS_GROUP_PACKAGE": "团购套餐",
|
||||
"ODS_GROUP_BUY_REDEMPTION": "团购核销",
|
||||
"ODS_PLATFORM_COUPON": "平台券核销",
|
||||
"ODS_INVENTORY_STOCK": "库存汇总",
|
||||
"ODS_INVENTORY_CHANGE": "库存变化",
|
||||
}
|
||||
|
||||
# 维度类 ODS 任务(校验时通常单独处理)
|
||||
DIMENSION_ODS_CODES = {
|
||||
"ODS_MEMBER",
|
||||
"ODS_MEMBER_CARD",
|
||||
"ODS_ASSISTANT_ACCOUNT",
|
||||
"ODS_TENANT_GOODS",
|
||||
"ODS_STORE_GOODS",
|
||||
"ODS_GOODS_CATEGORY",
|
||||
"ODS_TABLES",
|
||||
"ODS_GROUP_PACKAGE",
|
||||
}
|
||||
|
||||
# 事实类 ODS 任务(需要时间窗口)
|
||||
FACT_ODS_CODES = {
|
||||
"ODS_MEMBER_BALANCE",
|
||||
"ODS_PAYMENT",
|
||||
"ODS_REFUND",
|
||||
"ODS_SETTLEMENT_RECORDS",
|
||||
"ODS_RECHARGE_SETTLE",
|
||||
"ODS_SETTLEMENT_TICKET",
|
||||
"ODS_ASSISTANT_LEDGER",
|
||||
"ODS_ASSISTANT_ABOLISH",
|
||||
"ODS_STORE_GOODS_SALES",
|
||||
"ODS_TABLE_USE",
|
||||
"ODS_TABLE_FEE_DISCOUNT",
|
||||
"ODS_GROUP_BUY_REDEMPTION",
|
||||
"ODS_PLATFORM_COUPON",
|
||||
"ODS_INVENTORY_CHANGE",
|
||||
}
|
||||
|
||||
# 非 ODS 任务定义
|
||||
NON_ODS_TASKS: List[TaskDefinition] = [
|
||||
# DWD 装载
|
||||
TaskDefinition(
|
||||
code="DWD_LOAD_FROM_ODS",
|
||||
name="ODS→DWD 装载",
|
||||
description="从 ODS 增量装载到 DWD",
|
||||
domain=BusinessDomain.DWD,
|
||||
requires_window=True,
|
||||
),
|
||||
TaskDefinition(
|
||||
code="DWD_QUALITY_CHECK",
|
||||
name="DWD 质量检查",
|
||||
description="执行 DWD 数据质量检查",
|
||||
domain=BusinessDomain.QUALITY,
|
||||
requires_window=False,
|
||||
),
|
||||
TaskDefinition(
|
||||
code="DWS_BUILD_ORDER_SUMMARY",
|
||||
name="构建订单汇总",
|
||||
description="重算 DWS 订单汇总表",
|
||||
domain=BusinessDomain.DWD,
|
||||
requires_window=False,
|
||||
),
|
||||
# Schema 初始化
|
||||
TaskDefinition(
|
||||
code="INIT_ODS_SCHEMA",
|
||||
name="初始化 ODS Schema",
|
||||
description="创建/重建 ODS 表结构",
|
||||
domain=BusinessDomain.SCHEMA,
|
||||
requires_window=False,
|
||||
default_enabled=False,
|
||||
),
|
||||
TaskDefinition(
|
||||
code="INIT_DWD_SCHEMA",
|
||||
name="初始化 DWD Schema",
|
||||
description="创建/重建 DWD 表结构",
|
||||
domain=BusinessDomain.SCHEMA,
|
||||
requires_window=False,
|
||||
default_enabled=False,
|
||||
),
|
||||
TaskDefinition(
|
||||
code="INIT_DWS_SCHEMA",
|
||||
name="初始化 DWS Schema",
|
||||
description="创建/重建 DWS 表结构",
|
||||
domain=BusinessDomain.SCHEMA,
|
||||
requires_window=False,
|
||||
default_enabled=False,
|
||||
),
|
||||
# 其他
|
||||
TaskDefinition(
|
||||
code="MANUAL_INGEST",
|
||||
name="手工数据灌入",
|
||||
description="从本地 JSON 回放入库",
|
||||
domain=BusinessDomain.OTHER,
|
||||
requires_window=False,
|
||||
default_enabled=False,
|
||||
),
|
||||
TaskDefinition(
|
||||
code="CHECK_CUTOFF",
|
||||
name="检查 Cutoff",
|
||||
description="查看各表数据截止时间",
|
||||
domain=BusinessDomain.QUALITY,
|
||||
requires_window=False,
|
||||
),
|
||||
TaskDefinition(
|
||||
code="DATA_INTEGRITY_CHECK",
|
||||
name="数据完整性检查",
|
||||
description="检查 ODS/DWD 数据完整性",
|
||||
domain=BusinessDomain.QUALITY,
|
||||
requires_window=True,
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
def _build_ods_task_definition(code: str) -> TaskDefinition:
|
||||
"""根据 ODS 任务编码构建任务定义"""
|
||||
domain = ODS_DOMAIN_MAP.get(code, BusinessDomain.OTHER)
|
||||
name = ODS_DISPLAY_NAMES.get(code, code)
|
||||
is_dimension = code in DIMENSION_ODS_CODES
|
||||
|
||||
# 从后端获取描述(如果可用)
|
||||
description = f"抓取{name}到 ODS"
|
||||
if _HAS_BACKEND:
|
||||
for spec in ODS_TASK_SPECS:
|
||||
if spec.code == code:
|
||||
# 尝试解码描述(可能是乱码)
|
||||
desc = spec.description
|
||||
if desc and not any(ord(c) > 0x4e00 for c in desc[:10] if desc):
|
||||
description = f"抓取{name}到 ODS"
|
||||
break
|
||||
|
||||
return TaskDefinition(
|
||||
code=code,
|
||||
name=name,
|
||||
description=description,
|
||||
domain=domain,
|
||||
requires_window=code not in DIMENSION_ODS_CODES,
|
||||
is_ods=True,
|
||||
is_dimension=is_dimension,
|
||||
)
|
||||
|
||||
|
||||
class TaskRegistry:
|
||||
"""任务注册表:管理所有可用任务"""
|
||||
|
||||
_instance: Optional["TaskRegistry"] = None
|
||||
|
||||
def __new__(cls):
|
||||
if cls._instance is None:
|
||||
cls._instance = super().__new__(cls)
|
||||
cls._instance._initialized = False
|
||||
return cls._instance
|
||||
|
||||
def __init__(self):
|
||||
if self._initialized:
|
||||
return
|
||||
self._initialized = True
|
||||
self._tasks: Dict[str, TaskDefinition] = {}
|
||||
self._load_tasks()
|
||||
|
||||
def _load_tasks(self):
|
||||
"""加载所有任务定义"""
|
||||
# 加载 ODS 任务
|
||||
ods_codes = ENABLED_ODS_CODES if _HAS_BACKEND else set(ODS_DOMAIN_MAP.keys())
|
||||
for code in ods_codes:
|
||||
self._tasks[code] = _build_ods_task_definition(code)
|
||||
|
||||
# 加载非 ODS 任务
|
||||
for task_def in NON_ODS_TASKS:
|
||||
self._tasks[task_def.code] = task_def
|
||||
|
||||
def get_task(self, code: str) -> Optional[TaskDefinition]:
|
||||
"""获取任务定义"""
|
||||
return self._tasks.get(code)
|
||||
|
||||
def get_all_tasks(self) -> List[TaskDefinition]:
|
||||
"""获取所有任务"""
|
||||
return list(self._tasks.values())
|
||||
|
||||
def get_ods_tasks(self) -> List[TaskDefinition]:
|
||||
"""获取所有 ODS 任务"""
|
||||
return [t for t in self._tasks.values() if t.is_ods]
|
||||
|
||||
def get_fact_ods_tasks(self) -> List[TaskDefinition]:
|
||||
"""获取事实类 ODS 任务(需要时间窗口)"""
|
||||
return [t for t in self._tasks.values() if t.is_ods and not t.is_dimension]
|
||||
|
||||
def get_dimension_ods_tasks(self) -> List[TaskDefinition]:
|
||||
"""获取维度类 ODS 任务"""
|
||||
return [t for t in self._tasks.values() if t.is_ods and t.is_dimension]
|
||||
|
||||
def get_tasks_by_domain(self, domain: BusinessDomain) -> List[TaskDefinition]:
|
||||
"""按业务域获取任务"""
|
||||
return [t for t in self._tasks.values() if t.domain == domain]
|
||||
|
||||
def get_ods_tasks_grouped(self) -> Dict[BusinessDomain, List[TaskDefinition]]:
|
||||
"""获取按业务域分组的 ODS 任务"""
|
||||
grouped: Dict[BusinessDomain, List[TaskDefinition]] = {}
|
||||
for task in self.get_ods_tasks():
|
||||
if task.domain not in grouped:
|
||||
grouped[task.domain] = []
|
||||
grouped[task.domain].append(task)
|
||||
return grouped
|
||||
|
||||
def get_non_ods_tasks(self) -> List[TaskDefinition]:
|
||||
"""获取非 ODS 任务"""
|
||||
return [t for t in self._tasks.values() if not t.is_ods]
|
||||
|
||||
|
||||
# 全局注册表实例
|
||||
task_registry = TaskRegistry()
|
||||
|
||||
|
||||
# 便捷函数
|
||||
def get_ods_task_codes() -> List[str]:
|
||||
"""获取所有 ODS 任务编码"""
|
||||
return [t.code for t in task_registry.get_ods_tasks()]
|
||||
|
||||
|
||||
def get_fact_ods_task_codes() -> List[str]:
|
||||
"""获取事实类 ODS 任务编码"""
|
||||
return [t.code for t in task_registry.get_fact_ods_tasks()]
|
||||
|
||||
|
||||
def get_dimension_ods_task_codes() -> List[str]:
|
||||
"""获取维度类 ODS 任务编码"""
|
||||
return [t.code for t in task_registry.get_dimension_ods_tasks()]
|
||||
|
||||
|
||||
def get_all_task_tuples() -> List[Tuple[str, str, str]]:
|
||||
"""获取所有任务的 (code, name, description) 元组列表"""
|
||||
return [(t.code, t.name, t.description) for t in task_registry.get_all_tasks()]
|
||||
|
||||
|
||||
def get_ods_tasks_for_ui() -> List[Tuple[str, str, BusinessDomain]]:
|
||||
"""获取 ODS 任务列表供 UI 使用:(code, display_name, domain)"""
|
||||
return [(t.code, t.name, t.domain) for t in task_registry.get_ods_tasks()]
|
||||
@@ -7,6 +7,7 @@ from .log_viewer import LogViewer
|
||||
from .db_viewer import DBViewer
|
||||
from .status_panel import StatusPanel
|
||||
from .task_manager import TaskManager
|
||||
from .task_selector import TaskSelectorWidget, CompactTaskSelector
|
||||
|
||||
__all__ = [
|
||||
"TaskPanel",
|
||||
@@ -15,4 +16,6 @@ __all__ = [
|
||||
"DBViewer",
|
||||
"StatusPanel",
|
||||
"TaskManager",
|
||||
"TaskSelectorWidget",
|
||||
"CompactTaskSelector",
|
||||
]
|
||||
|
||||
@@ -26,28 +26,45 @@ from ..utils.app_settings import app_settings
|
||||
from ..workers.task_worker import TaskWorker
|
||||
|
||||
|
||||
# 可调度的任务列表(包含所有 ODS 任务 + DWD/质量检查任务)
|
||||
SCHEDULABLE_TASKS = [
|
||||
# ODS 数据抓取任务(与 task_panel.AUTO_UPDATE_TASKS 保持一致)
|
||||
("ODS_PAYMENT", "支付流水"),
|
||||
("ODS_MEMBER", "会员档案"),
|
||||
("ODS_MEMBER_CARD", "会员储值卡"),
|
||||
("ODS_MEMBER_BALANCE", "会员余额变动"),
|
||||
("ODS_SETTLEMENT_RECORDS", "结账记录"),
|
||||
("ODS_TABLE_USE", "台费计费流水"),
|
||||
("ODS_ASSISTANT_ACCOUNT", "助教账号"),
|
||||
("ODS_ASSISTANT_LEDGER", "助教流水"),
|
||||
("ODS_ASSISTANT_ABOLISH", "助教作废"),
|
||||
("ODS_REFUND", "退款流水"),
|
||||
("ODS_PLATFORM_COUPON", "平台券核销"),
|
||||
("ODS_RECHARGE_SETTLE", "充值结算"),
|
||||
("ODS_SETTLEMENT_TICKET", "结账小票"),
|
||||
# DWD 和质量检查任务
|
||||
("DWD_LOAD_FROM_ODS", "ODS→DWD 装载"),
|
||||
("DWD_QUALITY_CHECK", "DWD 质量检查"),
|
||||
("DATA_INTEGRITY_CHECK", "数据完整性检查"),
|
||||
("CHECK_CUTOFF", "检查 Cutoff"),
|
||||
]
|
||||
# 动态获取可调度的任务列表
|
||||
def _get_schedulable_tasks():
|
||||
"""从任务注册表动态获取可调度任务列表"""
|
||||
try:
|
||||
from ..models.task_registry import task_registry
|
||||
tasks = []
|
||||
# 添加所有 ODS 任务
|
||||
for task_def in task_registry.get_ods_tasks():
|
||||
tasks.append((task_def.code, task_def.name))
|
||||
# 添加非 ODS 任务(排除 Schema 初始化和手工灌入)
|
||||
exclude_codes = {"INIT_ODS_SCHEMA", "INIT_DWD_SCHEMA", "INIT_DWS_SCHEMA", "MANUAL_INGEST"}
|
||||
for task_def in task_registry.get_non_ods_tasks():
|
||||
if task_def.code not in exclude_codes:
|
||||
tasks.append((task_def.code, task_def.name))
|
||||
return tasks
|
||||
except ImportError:
|
||||
# 回退到静态列表
|
||||
return [
|
||||
("ODS_PAYMENT", "支付流水"),
|
||||
("ODS_MEMBER", "会员档案"),
|
||||
("ODS_MEMBER_CARD", "会员储值卡"),
|
||||
("ODS_MEMBER_BALANCE", "会员余额变动"),
|
||||
("ODS_SETTLEMENT_RECORDS", "结账记录"),
|
||||
("ODS_TABLE_USE", "台费计费流水"),
|
||||
("ODS_ASSISTANT_ACCOUNT", "助教账号"),
|
||||
("ODS_ASSISTANT_LEDGER", "助教流水"),
|
||||
("ODS_ASSISTANT_ABOLISH", "助教作废"),
|
||||
("ODS_REFUND", "退款流水"),
|
||||
("ODS_PLATFORM_COUPON", "平台券核销"),
|
||||
("ODS_RECHARGE_SETTLE", "充值结算"),
|
||||
("ODS_SETTLEMENT_TICKET", "结账小票"),
|
||||
("DWD_LOAD_FROM_ODS", "ODS→DWD 装载"),
|
||||
("DWD_QUALITY_CHECK", "DWD 质量检查"),
|
||||
("DATA_INTEGRITY_CHECK", "数据完整性检查"),
|
||||
("CHECK_CUTOFF", "检查 Cutoff"),
|
||||
]
|
||||
|
||||
|
||||
SCHEDULABLE_TASKS = _get_schedulable_tasks()
|
||||
|
||||
|
||||
class TaskLogDialog(QDialog):
|
||||
@@ -1584,6 +1601,7 @@ class TaskManager(QWidget):
|
||||
|
||||
# 统计关键数据
|
||||
total_inserted = 0
|
||||
total_updated = 0
|
||||
total_missing = 0
|
||||
total_records = 0
|
||||
|
||||
@@ -1596,11 +1614,30 @@ class TaskManager(QWidget):
|
||||
import json
|
||||
stats_str = match.group(1).replace("'", '"')
|
||||
stats = json.loads(stats_str)
|
||||
|
||||
|
||||
if 'tables' in stats:
|
||||
|
||||
for tbl in stats['tables']:
|
||||
inserted = tbl.get('inserted', 0)
|
||||
processed = tbl.get('processed', 0)
|
||||
total_inserted += inserted + processed
|
||||
|
||||
inserted = int(tbl.get('inserted', 0) or 0)
|
||||
|
||||
updated = int(tbl.get('updated', 0) or 0)
|
||||
|
||||
processed = int(tbl.get('processed', 0) or 0)
|
||||
|
||||
has_new_counts = ('inserted' in tbl) or ('updated' in tbl)
|
||||
|
||||
if has_new_counts:
|
||||
|
||||
total_inserted += inserted
|
||||
|
||||
total_updated += updated
|
||||
|
||||
else:
|
||||
|
||||
total_inserted += inserted + processed
|
||||
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -1622,8 +1659,11 @@ class TaskManager(QWidget):
|
||||
total_records += int(match.group(1))
|
||||
|
||||
# 构建摘要
|
||||
if total_inserted > 0:
|
||||
summary_parts.append(f"处理 {total_inserted} 条")
|
||||
if total_inserted > 0 or total_updated > 0:
|
||||
if total_updated > 0:
|
||||
summary_parts.append(f"?? {total_inserted} ?, ?? {total_updated} ?")
|
||||
else:
|
||||
summary_parts.append(f"?? {total_inserted} ?")
|
||||
|
||||
if total_records > 0:
|
||||
if total_missing > 0:
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
398
etl_billiards/gui/widgets/task_selector.py
Normal file
398
etl_billiards/gui/widgets/task_selector.py
Normal file
@@ -0,0 +1,398 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""可复用的 ODS 任务选择组件:按业务域分组显示,支持全选/反选。"""
|
||||
|
||||
from typing import Dict, List, Optional, Set
|
||||
|
||||
from PySide6.QtWidgets import (
|
||||
QWidget, QVBoxLayout, QHBoxLayout, QGroupBox,
|
||||
QCheckBox, QPushButton, QScrollArea, QFrame,
|
||||
QLabel, QSizePolicy
|
||||
)
|
||||
from PySide6.QtCore import Signal, Qt
|
||||
|
||||
from ..models.task_registry import (
|
||||
TaskRegistry, TaskDefinition, BusinessDomain, DOMAIN_LABELS,
|
||||
task_registry, get_fact_ods_task_codes, get_dimension_ods_task_codes
|
||||
)
|
||||
|
||||
|
||||
class TaskSelectorWidget(QWidget):
|
||||
"""ODS 任务选择组件:按业务域分组显示"""
|
||||
|
||||
# 选择变化信号
|
||||
selection_changed = Signal(list) # 选中的任务编码列表
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
parent: Optional[QWidget] = None,
|
||||
show_dimensions: bool = True,
|
||||
show_facts: bool = True,
|
||||
default_select_facts: bool = True,
|
||||
default_select_dimensions: bool = False,
|
||||
compact: bool = False,
|
||||
max_height: int = 0,
|
||||
):
|
||||
"""
|
||||
初始化任务选择器
|
||||
|
||||
Args:
|
||||
parent: 父组件
|
||||
show_dimensions: 是否显示维度类任务
|
||||
show_facts: 是否显示事实类任务
|
||||
default_select_facts: 默认选中事实类任务
|
||||
default_select_dimensions: 默认选中维度类任务
|
||||
compact: 紧凑模式(更小的间距)
|
||||
max_height: 最大高度(0 表示不限制)
|
||||
"""
|
||||
super().__init__(parent)
|
||||
self.show_dimensions = show_dimensions
|
||||
self.show_facts = show_facts
|
||||
self.default_select_facts = default_select_facts
|
||||
self.default_select_dimensions = default_select_dimensions
|
||||
self.compact = compact
|
||||
self.max_height = max_height
|
||||
|
||||
# 任务复选框映射:code -> QCheckBox
|
||||
self._checkboxes: Dict[str, QCheckBox] = {}
|
||||
# 业务域分组框映射:domain -> QGroupBox
|
||||
self._domain_groups: Dict[BusinessDomain, QGroupBox] = {}
|
||||
|
||||
self._init_ui()
|
||||
self._apply_default_selection()
|
||||
|
||||
def _init_ui(self):
|
||||
"""初始化界面"""
|
||||
layout = QVBoxLayout(self)
|
||||
layout.setContentsMargins(0, 0, 0, 0)
|
||||
spacing = 4 if self.compact else 8
|
||||
layout.setSpacing(spacing)
|
||||
|
||||
# 顶部工具栏
|
||||
toolbar = QHBoxLayout()
|
||||
toolbar.setSpacing(8)
|
||||
|
||||
self.select_all_btn = QPushButton("全选")
|
||||
self.select_all_btn.setProperty("secondary", True)
|
||||
self.select_all_btn.setFixedWidth(60)
|
||||
self.select_all_btn.clicked.connect(self._select_all)
|
||||
toolbar.addWidget(self.select_all_btn)
|
||||
|
||||
self.deselect_all_btn = QPushButton("全不选")
|
||||
self.deselect_all_btn.setProperty("secondary", True)
|
||||
self.deselect_all_btn.setFixedWidth(60)
|
||||
self.deselect_all_btn.clicked.connect(self._deselect_all)
|
||||
toolbar.addWidget(self.deselect_all_btn)
|
||||
|
||||
self.select_facts_btn = QPushButton("选事实表")
|
||||
self.select_facts_btn.setProperty("secondary", True)
|
||||
self.select_facts_btn.setFixedWidth(70)
|
||||
self.select_facts_btn.setToolTip("选中所有事实类任务(需要时间窗口的任务)")
|
||||
self.select_facts_btn.clicked.connect(self._select_facts_only)
|
||||
toolbar.addWidget(self.select_facts_btn)
|
||||
|
||||
toolbar.addStretch()
|
||||
|
||||
self.selected_count_label = QLabel("已选: 0")
|
||||
self.selected_count_label.setProperty("subheading", True)
|
||||
toolbar.addWidget(self.selected_count_label)
|
||||
|
||||
layout.addLayout(toolbar)
|
||||
|
||||
# 滚动区域
|
||||
scroll_area = QScrollArea()
|
||||
scroll_area.setWidgetResizable(True)
|
||||
scroll_area.setFrameShape(QFrame.NoFrame)
|
||||
if self.max_height > 0:
|
||||
scroll_area.setMaximumHeight(self.max_height)
|
||||
|
||||
# 内容容器
|
||||
content_widget = QWidget()
|
||||
content_layout = QVBoxLayout(content_widget)
|
||||
content_layout.setContentsMargins(0, 0, 0, 0)
|
||||
content_layout.setSpacing(spacing)
|
||||
|
||||
# 按业务域分组创建复选框
|
||||
grouped_tasks = task_registry.get_ods_tasks_grouped()
|
||||
|
||||
# 定义业务域显示顺序
|
||||
domain_order = [
|
||||
BusinessDomain.MEMBER,
|
||||
BusinessDomain.SETTLEMENT,
|
||||
BusinessDomain.ASSISTANT,
|
||||
BusinessDomain.GOODS,
|
||||
BusinessDomain.TABLE,
|
||||
BusinessDomain.PROMOTION,
|
||||
BusinessDomain.INVENTORY,
|
||||
]
|
||||
|
||||
for domain in domain_order:
|
||||
if domain not in grouped_tasks:
|
||||
continue
|
||||
|
||||
tasks = grouped_tasks[domain]
|
||||
# 过滤任务
|
||||
filtered_tasks = []
|
||||
for task in tasks:
|
||||
if task.is_dimension and not self.show_dimensions:
|
||||
continue
|
||||
if not task.is_dimension and not self.show_facts:
|
||||
continue
|
||||
filtered_tasks.append(task)
|
||||
|
||||
if not filtered_tasks:
|
||||
continue
|
||||
|
||||
# 创建业务域分组
|
||||
group_box = self._create_domain_group(domain, filtered_tasks)
|
||||
self._domain_groups[domain] = group_box
|
||||
content_layout.addWidget(group_box)
|
||||
|
||||
content_layout.addStretch()
|
||||
scroll_area.setWidget(content_widget)
|
||||
layout.addWidget(scroll_area, 1)
|
||||
|
||||
def _create_domain_group(self, domain: BusinessDomain, tasks: List[TaskDefinition]) -> QGroupBox:
|
||||
"""创建业务域分组框"""
|
||||
group_box = QGroupBox(DOMAIN_LABELS.get(domain, str(domain.value)))
|
||||
group_layout = QVBoxLayout(group_box)
|
||||
group_layout.setContentsMargins(8, 4, 8, 4)
|
||||
group_layout.setSpacing(2)
|
||||
|
||||
for task in tasks:
|
||||
checkbox = QCheckBox(f"{task.name}")
|
||||
checkbox.setToolTip(f"{task.code}: {task.description}")
|
||||
checkbox.setProperty("task_code", task.code)
|
||||
checkbox.setProperty("is_dimension", task.is_dimension)
|
||||
checkbox.stateChanged.connect(self._on_selection_changed)
|
||||
|
||||
self._checkboxes[task.code] = checkbox
|
||||
group_layout.addWidget(checkbox)
|
||||
|
||||
return group_box
|
||||
|
||||
def _apply_default_selection(self):
|
||||
"""应用默认选择"""
|
||||
for code, checkbox in self._checkboxes.items():
|
||||
is_dimension = checkbox.property("is_dimension")
|
||||
if is_dimension:
|
||||
checkbox.setChecked(self.default_select_dimensions)
|
||||
else:
|
||||
checkbox.setChecked(self.default_select_facts)
|
||||
|
||||
self._update_count_label()
|
||||
|
||||
def _on_selection_changed(self):
|
||||
"""选择变化时"""
|
||||
self._update_count_label()
|
||||
self.selection_changed.emit(self.get_selected_codes())
|
||||
|
||||
def _update_count_label(self):
|
||||
"""更新选中计数标签"""
|
||||
count = len(self.get_selected_codes())
|
||||
total = len(self._checkboxes)
|
||||
self.selected_count_label.setText(f"已选: {count}/{total}")
|
||||
|
||||
def _select_all(self):
|
||||
"""全选"""
|
||||
for checkbox in self._checkboxes.values():
|
||||
checkbox.blockSignals(True)
|
||||
checkbox.setChecked(True)
|
||||
checkbox.blockSignals(False)
|
||||
self._on_selection_changed()
|
||||
|
||||
def _deselect_all(self):
|
||||
"""全不选"""
|
||||
for checkbox in self._checkboxes.values():
|
||||
checkbox.blockSignals(True)
|
||||
checkbox.setChecked(False)
|
||||
checkbox.blockSignals(False)
|
||||
self._on_selection_changed()
|
||||
|
||||
def _select_facts_only(self):
|
||||
"""只选事实表任务"""
|
||||
for code, checkbox in self._checkboxes.items():
|
||||
checkbox.blockSignals(True)
|
||||
is_dimension = checkbox.property("is_dimension")
|
||||
checkbox.setChecked(not is_dimension)
|
||||
checkbox.blockSignals(False)
|
||||
self._on_selection_changed()
|
||||
|
||||
def get_selected_codes(self) -> List[str]:
|
||||
"""获取选中的任务编码列表"""
|
||||
selected = []
|
||||
for code, checkbox in self._checkboxes.items():
|
||||
if checkbox.isChecked():
|
||||
selected.append(code)
|
||||
return selected
|
||||
|
||||
def set_selected_codes(self, codes: List[str]):
|
||||
"""设置选中的任务编码"""
|
||||
codes_set = set(codes)
|
||||
for code, checkbox in self._checkboxes.items():
|
||||
checkbox.blockSignals(True)
|
||||
checkbox.setChecked(code in codes_set)
|
||||
checkbox.blockSignals(False)
|
||||
self._on_selection_changed()
|
||||
|
||||
def get_all_codes(self) -> List[str]:
|
||||
"""获取所有任务编码"""
|
||||
return list(self._checkboxes.keys())
|
||||
|
||||
def is_any_selected(self) -> bool:
|
||||
"""是否有任何任务被选中"""
|
||||
return len(self.get_selected_codes()) > 0
|
||||
|
||||
|
||||
class CompactTaskSelector(QWidget):
|
||||
"""紧凑型任务选择器:单行显示业务域,点击展开选择"""
|
||||
|
||||
selection_changed = Signal(list)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
parent: Optional[QWidget] = None,
|
||||
show_dimensions: bool = True,
|
||||
show_facts: bool = True,
|
||||
default_select_facts: bool = True,
|
||||
default_select_dimensions: bool = False,
|
||||
):
|
||||
super().__init__(parent)
|
||||
self.show_dimensions = show_dimensions
|
||||
self.show_facts = show_facts
|
||||
self.default_select_facts = default_select_facts
|
||||
self.default_select_dimensions = default_select_dimensions
|
||||
|
||||
# 业务域复选框
|
||||
self._domain_checkboxes: Dict[BusinessDomain, QCheckBox] = {}
|
||||
# 业务域下的任务编码
|
||||
self._domain_tasks: Dict[BusinessDomain, List[str]] = {}
|
||||
|
||||
self._init_ui()
|
||||
self._apply_default_selection()
|
||||
|
||||
def _init_ui(self):
|
||||
"""初始化界面"""
|
||||
layout = QVBoxLayout(self)
|
||||
layout.setContentsMargins(0, 0, 0, 0)
|
||||
layout.setSpacing(4)
|
||||
|
||||
# 工具栏
|
||||
toolbar = QHBoxLayout()
|
||||
toolbar.setSpacing(8)
|
||||
|
||||
self.select_all_btn = QPushButton("全选")
|
||||
self.select_all_btn.setProperty("secondary", True)
|
||||
self.select_all_btn.setFixedWidth(50)
|
||||
self.select_all_btn.clicked.connect(self._select_all)
|
||||
toolbar.addWidget(self.select_all_btn)
|
||||
|
||||
self.deselect_all_btn = QPushButton("清空")
|
||||
self.deselect_all_btn.setProperty("secondary", True)
|
||||
self.deselect_all_btn.setFixedWidth(50)
|
||||
self.deselect_all_btn.clicked.connect(self._deselect_all)
|
||||
toolbar.addWidget(self.deselect_all_btn)
|
||||
|
||||
toolbar.addStretch()
|
||||
|
||||
self.count_label = QLabel("已选: 0")
|
||||
self.count_label.setProperty("subheading", True)
|
||||
toolbar.addWidget(self.count_label)
|
||||
|
||||
layout.addLayout(toolbar)
|
||||
|
||||
# 业务域复选框(横向排列)
|
||||
domains_layout = QHBoxLayout()
|
||||
domains_layout.setSpacing(12)
|
||||
|
||||
grouped_tasks = task_registry.get_ods_tasks_grouped()
|
||||
domain_order = [
|
||||
BusinessDomain.MEMBER,
|
||||
BusinessDomain.SETTLEMENT,
|
||||
BusinessDomain.ASSISTANT,
|
||||
BusinessDomain.GOODS,
|
||||
BusinessDomain.TABLE,
|
||||
BusinessDomain.PROMOTION,
|
||||
BusinessDomain.INVENTORY,
|
||||
]
|
||||
|
||||
for domain in domain_order:
|
||||
if domain not in grouped_tasks:
|
||||
continue
|
||||
|
||||
tasks = grouped_tasks[domain]
|
||||
# 过滤任务
|
||||
task_codes = []
|
||||
for task in tasks:
|
||||
if task.is_dimension and not self.show_dimensions:
|
||||
continue
|
||||
if not task.is_dimension and not self.show_facts:
|
||||
continue
|
||||
task_codes.append(task.code)
|
||||
|
||||
if not task_codes:
|
||||
continue
|
||||
|
||||
self._domain_tasks[domain] = task_codes
|
||||
|
||||
checkbox = QCheckBox(DOMAIN_LABELS.get(domain, str(domain.value)))
|
||||
checkbox.setToolTip(f"包含: {', '.join(task_codes)}")
|
||||
checkbox.stateChanged.connect(self._on_selection_changed)
|
||||
self._domain_checkboxes[domain] = checkbox
|
||||
domains_layout.addWidget(checkbox)
|
||||
|
||||
domains_layout.addStretch()
|
||||
layout.addLayout(domains_layout)
|
||||
|
||||
def _apply_default_selection(self):
|
||||
"""应用默认选择"""
|
||||
# 默认选中所有业务域
|
||||
for domain, checkbox in self._domain_checkboxes.items():
|
||||
checkbox.setChecked(True)
|
||||
self._update_count_label()
|
||||
|
||||
def _on_selection_changed(self):
|
||||
"""选择变化时"""
|
||||
self._update_count_label()
|
||||
self.selection_changed.emit(self.get_selected_codes())
|
||||
|
||||
def _update_count_label(self):
|
||||
"""更新计数标签"""
|
||||
count = len(self.get_selected_codes())
|
||||
self.count_label.setText(f"已选: {count} 个任务")
|
||||
|
||||
def _select_all(self):
|
||||
"""全选所有业务域"""
|
||||
for checkbox in self._domain_checkboxes.values():
|
||||
checkbox.blockSignals(True)
|
||||
checkbox.setChecked(True)
|
||||
checkbox.blockSignals(False)
|
||||
self._on_selection_changed()
|
||||
|
||||
def _deselect_all(self):
|
||||
"""取消全选"""
|
||||
for checkbox in self._domain_checkboxes.values():
|
||||
checkbox.blockSignals(True)
|
||||
checkbox.setChecked(False)
|
||||
checkbox.blockSignals(False)
|
||||
self._on_selection_changed()
|
||||
|
||||
def get_selected_codes(self) -> List[str]:
|
||||
"""获取选中的任务编码"""
|
||||
selected = []
|
||||
for domain, checkbox in self._domain_checkboxes.items():
|
||||
if checkbox.isChecked():
|
||||
selected.extend(self._domain_tasks.get(domain, []))
|
||||
return selected
|
||||
|
||||
def set_selected_domains(self, domains: List[BusinessDomain]):
|
||||
"""设置选中的业务域"""
|
||||
domains_set = set(domains)
|
||||
for domain, checkbox in self._domain_checkboxes.items():
|
||||
checkbox.blockSignals(True)
|
||||
checkbox.setChecked(domain in domains_set)
|
||||
checkbox.blockSignals(False)
|
||||
self._on_selection_changed()
|
||||
|
||||
def is_any_selected(self) -> bool:
|
||||
"""是否有任何任务被选中"""
|
||||
return len(self.get_selected_codes()) > 0
|
||||
@@ -189,28 +189,123 @@ class TaskWorker(QThread):
|
||||
|
||||
# 解析 DWD 装载统计
|
||||
if 'tables' in stats:
|
||||
total_processed = 0
|
||||
total_inserted = 0
|
||||
tables_with_data = []
|
||||
|
||||
|
||||
|
||||
total_dim_inserted = 0
|
||||
|
||||
total_dim_updated = 0
|
||||
|
||||
total_fact_inserted = 0
|
||||
|
||||
total_fact_updated = 0
|
||||
|
||||
dim_tables = [] # ?????
|
||||
|
||||
fact_tables = [] # ?????
|
||||
|
||||
|
||||
|
||||
for tbl in stats['tables']:
|
||||
|
||||
table_name = tbl.get('table', '').replace('billiards_dwd.', '')
|
||||
processed = tbl.get('processed', 0)
|
||||
inserted = tbl.get('inserted', 0)
|
||||
|
||||
if processed > 0:
|
||||
total_processed += processed
|
||||
tables_with_data.append(f"{table_name}({processed})")
|
||||
elif inserted > 0:
|
||||
total_inserted += inserted
|
||||
tables_with_data.append(f"{table_name}(+{inserted})")
|
||||
|
||||
mode = tbl.get('mode', '')
|
||||
|
||||
processed = int(tbl.get('processed', 0) or 0)
|
||||
|
||||
inserted = int(tbl.get('inserted', 0) or 0)
|
||||
|
||||
updated = int(tbl.get('updated', 0) or 0)
|
||||
|
||||
has_new_counts = ('inserted' in tbl) or ('updated' in tbl)
|
||||
|
||||
|
||||
if total_processed > 0 or total_inserted > 0:
|
||||
dwd_stats.append(f"处理维度: {total_processed}条, 新增事实: {total_inserted}条")
|
||||
if len(tables_with_data) <= 5:
|
||||
dwd_stats.append(f"涉及表: {', '.join(tables_with_data)}")
|
||||
|
||||
# ?? _ex ?????????
|
||||
|
||||
if table_name.endswith('_ex'):
|
||||
|
||||
continue
|
||||
|
||||
|
||||
|
||||
is_dim = table_name.startswith('dim_') or mode == 'SCD2'
|
||||
|
||||
if is_dim:
|
||||
|
||||
if has_new_counts:
|
||||
|
||||
total_dim_inserted += inserted
|
||||
|
||||
total_dim_updated += updated
|
||||
|
||||
if inserted or updated:
|
||||
|
||||
dim_tables.append(f"{table_name}: +{inserted}, ~{updated}")
|
||||
|
||||
elif processed > 0:
|
||||
|
||||
total_dim_updated += processed
|
||||
|
||||
dim_tables.append(f"{table_name}: {processed}")
|
||||
|
||||
else:
|
||||
dwd_stats.append(f"涉及 {len(tables_with_data)} 张表")
|
||||
|
||||
if has_new_counts:
|
||||
|
||||
total_fact_inserted += inserted
|
||||
|
||||
total_fact_updated += updated
|
||||
|
||||
if inserted or updated:
|
||||
|
||||
fact_tables.append(f"{table_name}: +{inserted}, ~{updated}")
|
||||
|
||||
elif processed > 0 or inserted > 0:
|
||||
|
||||
total_fact_inserted += inserted
|
||||
|
||||
if inserted > 0:
|
||||
|
||||
fact_tables.append(f"{table_name}: +{inserted}")
|
||||
|
||||
|
||||
|
||||
if (total_dim_inserted or total_dim_updated or total_fact_inserted or total_fact_updated):
|
||||
|
||||
dwd_stats.append(
|
||||
|
||||
f"????: {total_dim_inserted}?, ????: {total_dim_updated}?, "
|
||||
|
||||
f"????: {total_fact_inserted}?, ????: {total_fact_updated}?"
|
||||
|
||||
)
|
||||
|
||||
|
||||
|
||||
# ???????
|
||||
|
||||
if dim_tables:
|
||||
|
||||
dwd_stats.append(" ???: " + ", ".join(dim_tables))
|
||||
|
||||
|
||||
|
||||
# ???????
|
||||
|
||||
if fact_tables:
|
||||
|
||||
dwd_stats.append(" ???: " + ", ".join(fact_tables))
|
||||
|
||||
|
||||
|
||||
# 解析错误信息
|
||||
if 'errors' in stats and stats['errors']:
|
||||
for err in stats['errors']:
|
||||
err_table = err.get('table', '').replace('billiards_dwd.', '')
|
||||
err_msg = err.get('error', '')
|
||||
errors.append(f"{err_table}: {err_msg}")
|
||||
except Exception:
|
||||
pass
|
||||
continue
|
||||
@@ -263,7 +358,9 @@ class TaskWorker(QThread):
|
||||
summary_parts[-1] += f" 等{len(ods_stats)}项"
|
||||
|
||||
if dwd_stats:
|
||||
summary_parts.append("【DWD 装载】" + "; ".join(dwd_stats))
|
||||
summary_parts.append("【DWD 装载】" + dwd_stats[0]) # 第一行是汇总
|
||||
for detail in dwd_stats[1:]: # 后面是详情
|
||||
summary_parts.append(detail)
|
||||
|
||||
if integrity_stats:
|
||||
total_missing = integrity_stats.get('final_missing', integrity_stats.get('total_missing', 0))
|
||||
|
||||
@@ -357,8 +357,20 @@ class ETLScheduler:
|
||||
|
||||
try:
|
||||
# 创建任务实例(不需要 API client,使用 None)
|
||||
api_client = None
|
||||
if task_code == "ODS_JSON_ARCHIVE":
|
||||
run_id = int(datetime.now(self.tz).timestamp())
|
||||
fetch_dir = self._build_fetch_dir(task_code, run_id)
|
||||
api_client = RecordingAPIClient(
|
||||
base_client=self.api_client,
|
||||
output_dir=fetch_dir,
|
||||
task_code=task_code,
|
||||
run_id=run_id,
|
||||
write_pretty=self.write_pretty_json,
|
||||
)
|
||||
|
||||
task = self.task_registry.create_task(
|
||||
task_code, self.config, self.db_ops, None, self.logger
|
||||
task_code, self.config, self.db_ops, api_client, self.logger
|
||||
)
|
||||
|
||||
# 执行任务(工具类任务通常不需要 cursor_data)
|
||||
|
||||
29540
etl_billiards/reports/data_integrity_window_20260201_175048.json
Normal file
29540
etl_billiards/reports/data_integrity_window_20260201_175048.json
Normal file
File diff suppressed because it is too large
Load Diff
@@ -28,7 +28,7 @@ PROJECT_ROOT = Path(__file__).resolve().parents[1]
|
||||
if str(PROJECT_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(PROJECT_ROOT))
|
||||
|
||||
from api.client import APIClient
|
||||
from api.recording_client import build_recording_client
|
||||
from config.settings import AppConfig
|
||||
from database.connection import DatabaseConnection
|
||||
from models.parsers import TypeParser
|
||||
@@ -211,13 +211,7 @@ class MissingDataBackfiller:
|
||||
self.store_id = int(cfg.get("app.store_id") or 0)
|
||||
|
||||
# API 客户端
|
||||
self.api = APIClient(
|
||||
base_url=cfg["api"]["base_url"],
|
||||
token=cfg["api"]["token"],
|
||||
timeout=int(cfg["api"].get("timeout_sec") or 20),
|
||||
retry_max=int(cfg["api"].get("retries", {}).get("max_attempts") or 3),
|
||||
headers_extra=cfg["api"].get("headers_extra") or {},
|
||||
)
|
||||
self.api = build_recording_client(cfg, task_code="BACKFILL_MISSING_DATA")
|
||||
|
||||
# 数据库连接(DatabaseConnection 构造时已设置 autocommit=False)
|
||||
self.db = DatabaseConnection(dsn=cfg["db"]["dsn"], session=cfg["db"].get("session"))
|
||||
|
||||
@@ -29,7 +29,7 @@ PROJECT_ROOT = Path(__file__).resolve().parents[1]
|
||||
if str(PROJECT_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(PROJECT_ROOT))
|
||||
|
||||
from api.client import APIClient
|
||||
from api.recording_client import build_recording_client
|
||||
from config.settings import AppConfig
|
||||
from database.connection import DatabaseConnection
|
||||
from models.parsers import TypeParser
|
||||
@@ -800,13 +800,8 @@ def run_gap_check(
|
||||
if cutoff:
|
||||
logger.info("CUTOFF=%s overlap_hours=%s", cutoff.isoformat(), cutoff_overlap_hours)
|
||||
|
||||
client = APIClient(
|
||||
base_url=cfg["api"]["base_url"],
|
||||
token=cfg["api"]["token"],
|
||||
timeout=int(cfg["api"].get("timeout_sec") or 20),
|
||||
retry_max=int(cfg["api"].get("retries", {}).get("max_attempts") or 3),
|
||||
headers_extra=cfg["api"].get("headers_extra") or {},
|
||||
)
|
||||
tag_suffix = f"_{args.tag}" if args.tag else ""
|
||||
client = build_recording_client(cfg, task_code=f"ODS_GAP_CHECK{tag_suffix}")
|
||||
|
||||
db_state = _init_db_state(cfg)
|
||||
try:
|
||||
|
||||
@@ -522,9 +522,9 @@ class DwdLoadTask(BaseTask):
|
||||
continue
|
||||
|
||||
if self._table_base(dwd_table).startswith("dim_"):
|
||||
processed = self._merge_dim(cur, dwd_table, ods_table, dwd_cols, ods_cols, now)
|
||||
dim_counts = self._merge_dim(cur, dwd_table, ods_table, dwd_cols, ods_cols, now)
|
||||
self.db.conn.commit()
|
||||
summary.append({"table": dwd_table, "mode": "SCD2", "processed": processed})
|
||||
summary.append({"table": dwd_table, "mode": "SCD2", **dim_counts})
|
||||
else:
|
||||
dwd_types = self._get_column_types(cur, dwd_table, "billiards_dwd")
|
||||
ods_types = self._get_column_types(cur, ods_table, "billiards_ods")
|
||||
@@ -532,7 +532,7 @@ class DwdLoadTask(BaseTask):
|
||||
self.config.get("run.window_override.start")
|
||||
and self.config.get("run.window_override.end")
|
||||
)
|
||||
inserted = self._merge_fact_increment(
|
||||
fact_counts = self._merge_fact_increment(
|
||||
cur,
|
||||
dwd_table,
|
||||
ods_table,
|
||||
@@ -544,7 +544,7 @@ class DwdLoadTask(BaseTask):
|
||||
window_end=context.window_end if use_window else None,
|
||||
)
|
||||
self.db.conn.commit()
|
||||
summary.append({"table": dwd_table, "mode": "INCREMENT", "inserted": inserted})
|
||||
summary.append({"table": dwd_table, "mode": "INCREMENT", **fact_counts})
|
||||
|
||||
elapsed = time.monotonic() - started
|
||||
self.logger.info("DWD 装载完成:%s,用时 %.2fs", dwd_table, elapsed)
|
||||
@@ -675,7 +675,7 @@ class DwdLoadTask(BaseTask):
|
||||
dwd_cols: Sequence[str],
|
||||
ods_cols: Sequence[str],
|
||||
now: datetime,
|
||||
) -> int:
|
||||
) -> Dict[str, int]:
|
||||
"""
|
||||
维表合并策略:
|
||||
- 若主键包含 scd2 列(如 scd2_start_time/scd2_version),执行真正的 SCD2(关闭旧版+插入新版)。
|
||||
@@ -699,8 +699,8 @@ class DwdLoadTask(BaseTask):
|
||||
ods_cols: Sequence[str],
|
||||
pk_cols: Sequence[str],
|
||||
now: datetime,
|
||||
) -> int:
|
||||
"""维表 Type1 Upsert(主键冲突则更新),兼容带 scd2 字段但主键不支持多版本的表。"""
|
||||
) -> Dict[str, int]:
|
||||
"""维表 Type1 Upsert(主键冲突则更新),返回真实新增/更新计数。"""
|
||||
mapping = self._build_column_mapping(dwd_table, pk_cols, ods_cols)
|
||||
ods_set = {c.lower() for c in ods_cols}
|
||||
ods_table_sql = self._format_table(ods_table, "billiards_ods")
|
||||
@@ -731,7 +731,7 @@ class DwdLoadTask(BaseTask):
|
||||
added.add(lc)
|
||||
|
||||
if not select_exprs:
|
||||
return 0
|
||||
return {"processed": 0, "inserted": 0, "updated": 0, "skipped": 0}
|
||||
|
||||
order_col = self._pick_snapshot_order_column(ods_cols)
|
||||
business_keys = self._strip_scd2_keys(pk_cols)
|
||||
@@ -768,7 +768,7 @@ class DwdLoadTask(BaseTask):
|
||||
src_rows.append(row)
|
||||
|
||||
if not src_rows:
|
||||
return 0
|
||||
return {"processed": 0, "inserted": 0, "updated": 0, "skipped": 0}
|
||||
|
||||
dwd_table_sql = self._format_table(dwd_table, "billiards_dwd")
|
||||
sorted_cols = [c.lower() for c in sorted(dwd_cols)]
|
||||
@@ -802,12 +802,19 @@ class DwdLoadTask(BaseTask):
|
||||
else:
|
||||
set_exprs.append(f'\"{c}\" = EXCLUDED.\"{c}\"')
|
||||
|
||||
compare_cols = [c for c in sorted_cols if c not in pk_lower_set and c not in self.SCD_COLS]
|
||||
diff_exprs = [f'{dwd_table_sql}."{c}" IS DISTINCT FROM EXCLUDED."{c}"' for c in compare_cols]
|
||||
where_clause = f" WHERE {' OR '.join(diff_exprs)}" if diff_exprs else ""
|
||||
upsert_sql = (
|
||||
f"INSERT INTO {dwd_table_sql} ({insert_cols_sql}) VALUES %s "
|
||||
f"ON CONFLICT ({pk_sql}) DO UPDATE SET {', '.join(set_exprs)}"
|
||||
f"ON CONFLICT ({pk_sql}) DO UPDATE SET {', '.join(set_exprs)}{where_clause} "
|
||||
f"RETURNING (xmax = 0) AS inserted"
|
||||
)
|
||||
execute_values(cur, upsert_sql, [build_row(r) for r in src_rows], page_size=500)
|
||||
return len(src_rows)
|
||||
rows = execute_values(cur, upsert_sql, [build_row(r) for r in src_rows], page_size=500, fetch=True)
|
||||
inserted, updated = self._count_returning_flags(rows or [])
|
||||
processed = len(src_rows)
|
||||
skipped = max(0, processed - inserted - updated)
|
||||
return {"processed": processed, "inserted": inserted, "updated": updated, "skipped": skipped}
|
||||
|
||||
def _merge_dim_scd2(
|
||||
self,
|
||||
@@ -817,7 +824,7 @@ class DwdLoadTask(BaseTask):
|
||||
dwd_cols: Sequence[str],
|
||||
ods_cols: Sequence[str],
|
||||
now: datetime,
|
||||
) -> int:
|
||||
) -> Dict[str, int]:
|
||||
"""对维表执行 SCD2 合并:对比变更关闭旧版并插入新版。"""
|
||||
pk_cols = self._get_primary_keys(cur, dwd_table)
|
||||
if not pk_cols:
|
||||
@@ -860,7 +867,7 @@ class DwdLoadTask(BaseTask):
|
||||
added.add(lc)
|
||||
|
||||
if not select_exprs:
|
||||
return 0
|
||||
return {"processed": 0, "inserted": 0, "updated": 0, "skipped": 0}
|
||||
|
||||
order_col = self._pick_snapshot_order_column(ods_cols)
|
||||
key_exprs: list[str] = []
|
||||
@@ -906,7 +913,7 @@ class DwdLoadTask(BaseTask):
|
||||
src_rows_by_pk[pk_key] = mapped_row
|
||||
|
||||
if not src_rows_by_pk:
|
||||
return 0
|
||||
return {"processed": 0, "inserted": 0, "updated": 0, "skipped": 0}
|
||||
|
||||
# 预加载当前版本(scd2_is_current=1),避免逐行 SELECT 造成大量 round-trip
|
||||
table_sql_dwd = self._format_table(dwd_table, "billiards_dwd")
|
||||
@@ -941,7 +948,11 @@ class DwdLoadTask(BaseTask):
|
||||
if to_insert:
|
||||
self._insert_dim_rows_bulk(cur, dwd_table, dwd_cols, to_insert, now)
|
||||
|
||||
return len(src_rows_by_pk)
|
||||
processed = len(src_rows_by_pk)
|
||||
updated = len(to_close)
|
||||
inserted = max(0, len(to_insert) - updated)
|
||||
skipped = max(0, processed - inserted - updated)
|
||||
return {"processed": processed, "inserted": inserted, "updated": updated, "skipped": skipped}
|
||||
|
||||
def _close_current_dim_bulk(
|
||||
self,
|
||||
@@ -1129,9 +1140,13 @@ class DwdLoadTask(BaseTask):
|
||||
value = datetime.combine(value, datetime.min.time())
|
||||
if not isinstance(value, datetime):
|
||||
return value
|
||||
if value.tzinfo is None:
|
||||
return value.replace(tzinfo=self.tz)
|
||||
return value.astimezone(self.tz)
|
||||
try:
|
||||
if value.tzinfo is None:
|
||||
return value.replace(tzinfo=self.tz)
|
||||
return value.astimezone(self.tz)
|
||||
except (OverflowError, OSError):
|
||||
# 极端日期值(如 9999-12-31)无法转换时区,直接返回原值
|
||||
return value
|
||||
|
||||
def _looks_numeric(self, value: Any) -> bool:
|
||||
if isinstance(value, (int, float, Decimal)) and not isinstance(value, bool):
|
||||
@@ -1184,6 +1199,22 @@ class DwdLoadTask(BaseTask):
|
||||
return False
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _count_returning_flags(rows: Iterable[Any]) -> tuple[int, int]:
|
||||
"""Count inserted vs updated from RETURNING (xmax = 0) rows."""
|
||||
inserted = 0
|
||||
updated = 0
|
||||
for row in rows:
|
||||
if isinstance(row, dict):
|
||||
flag = row.get("inserted")
|
||||
else:
|
||||
flag = row[0] if row else None
|
||||
if flag:
|
||||
inserted += 1
|
||||
else:
|
||||
updated += 1
|
||||
return inserted, updated
|
||||
|
||||
def _merge_fact_increment(
|
||||
self,
|
||||
cur,
|
||||
@@ -1195,8 +1226,8 @@ class DwdLoadTask(BaseTask):
|
||||
ods_types: Dict[str, str],
|
||||
window_start: datetime | None = None,
|
||||
window_end: datetime | None = None,
|
||||
) -> int:
|
||||
"""事实表按时间增量插入,默认按列名交集写入。"""
|
||||
) -> Dict[str, int]:
|
||||
"""事实表按时间增量插入,返回真实新增/更新计数。"""
|
||||
mapping_entries = self.FACT_MAPPINGS.get(dwd_table) or []
|
||||
mapping: Dict[str, tuple[str, str | None]] = {
|
||||
dst.lower(): (src, cast_type) for dst, src, cast_type in mapping_entries
|
||||
@@ -1306,18 +1337,31 @@ class DwdLoadTask(BaseTask):
|
||||
set_exprs = [f'"{c}" = EXCLUDED."{c}"' for c in insert_cols if c.lower() not in pk_lower]
|
||||
if snapshot_mode or fact_upsert:
|
||||
if set_exprs:
|
||||
sql += f" ON CONFLICT ({pk_sql}) DO UPDATE SET {', '.join(set_exprs)}"
|
||||
compare_cols = [c for c in insert_cols if c.lower() not in pk_lower]
|
||||
diff_exprs = [f'{dwd_table_sql}."{c}" IS DISTINCT FROM EXCLUDED."{c}"' for c in compare_cols]
|
||||
where_clause = f" WHERE {' OR '.join(diff_exprs)}" if diff_exprs else ""
|
||||
sql += f" ON CONFLICT ({pk_sql}) DO UPDATE SET {', '.join(set_exprs)}{where_clause}"
|
||||
else:
|
||||
sql += f" ON CONFLICT ({pk_sql}) DO NOTHING"
|
||||
else:
|
||||
sql += f" ON CONFLICT ({pk_sql}) DO NOTHING"
|
||||
|
||||
sql += " RETURNING (xmax = 0) AS inserted"
|
||||
cur.execute(sql, params)
|
||||
inserted = cur.rowcount
|
||||
|
||||
inserted = 0
|
||||
updated = 0
|
||||
while True:
|
||||
rows = cur.fetchmany(10000)
|
||||
if not rows:
|
||||
break
|
||||
ins, upd = self._count_returning_flags(rows)
|
||||
inserted += ins
|
||||
updated += upd
|
||||
|
||||
# 回补缺失主键记录(处理历史回补导致的“create_time 水位”遗漏)
|
||||
if dwd_table.lower() in self.FACT_MISSING_FILL_TABLES:
|
||||
inserted += self._insert_missing_by_pk(
|
||||
missing_inserted = self._insert_missing_by_pk(
|
||||
cur,
|
||||
dwd_table,
|
||||
ods_table,
|
||||
@@ -1328,8 +1372,9 @@ class DwdLoadTask(BaseTask):
|
||||
dwd_types,
|
||||
ods_types,
|
||||
)
|
||||
inserted += missing_inserted
|
||||
|
||||
return inserted
|
||||
return {"inserted": inserted, "updated": updated, "processed": inserted + updated}
|
||||
def _pick_order_column(self, dwd_table: str, dwd_cols: Iterable[str], ods_cols: Iterable[str]) -> str | None:
|
||||
"""Pick an incremental order column that exists in both DWD and ODS."""
|
||||
lower_cols = {c.lower() for c in dwd_cols} & {c.lower() for c in ods_cols}
|
||||
|
||||
@@ -177,11 +177,12 @@ class BaseOdsTask(BaseTask):
|
||||
def _resolve_window(self, cursor_data: dict | None) -> tuple[datetime, datetime, int]:
|
||||
base_start, base_end, base_minutes = self._get_time_window(cursor_data)
|
||||
|
||||
if self.config.get("run.force_window_override"):
|
||||
override_start = self.config.get("run.window_override.start")
|
||||
override_end = self.config.get("run.window_override.end")
|
||||
if override_start and override_end:
|
||||
return base_start, base_end, base_minutes
|
||||
# 如果用户显式指定了窗口(window_override.start/end),则直接使用,不走 MAX(fetched_at) 兜底
|
||||
override_start = self.config.get("run.window_override.start")
|
||||
override_end = self.config.get("run.window_override.end")
|
||||
if override_start and override_end:
|
||||
# 用户明确指定了窗口,尊重用户选择
|
||||
return base_start, base_end, base_minutes
|
||||
|
||||
# 以 ODS 表 MAX(fetched_at) 兜底:避免“窗口游标推进但未实际入库”导致漏数。
|
||||
last_fetched = self._get_max_fetched_at(self.SPEC.table_name)
|
||||
|
||||
Reference in New Issue
Block a user