init: 项目初始提交 - NeoZQYY Monorepo 完整代码

This commit is contained in:
Neo
2026-02-15 14:58:14 +08:00
commit ded6dfb9d8
769 changed files with 182616 additions and 0 deletions

35
gui/models/__init__.py Normal file
View File

@@ -0,0 +1,35 @@
# -*- coding: utf-8 -*-
"""数据模型模块"""
from .task_model import TaskItem, TaskStatus, TaskHistory, TaskConfig, QueuedTask
from .schedule_model import (
ScheduledTask, ScheduleConfig, ScheduleType, IntervalUnit, ScheduleStore
)
from .task_registry import (
TaskRegistry, TaskDefinition, BusinessDomain, DOMAIN_LABELS,
task_registry, get_ods_task_codes, get_fact_ods_task_codes,
get_dimension_ods_task_codes, get_all_task_tuples
)
__all__ = [
"TaskItem",
"TaskStatus",
"TaskHistory",
"TaskConfig",
"QueuedTask",
"ScheduledTask",
"ScheduleConfig",
"ScheduleType",
"IntervalUnit",
"ScheduleStore",
# 任务注册表
"TaskRegistry",
"TaskDefinition",
"BusinessDomain",
"DOMAIN_LABELS",
"task_registry",
"get_ods_task_codes",
"get_fact_ods_task_codes",
"get_dimension_ods_task_codes",
"get_all_task_tuples",
]

View File

@@ -0,0 +1,391 @@
# -*- coding: utf-8 -*-
"""调度任务数据模型"""
import json
from dataclasses import dataclass, field, asdict
from datetime import datetime, timedelta
from enum import Enum
from typing import Optional, List, Dict, Any
from pathlib import Path
class ScheduleType(Enum):
"""调度类型"""
ONCE = "once" # 一次性
INTERVAL = "interval" # 固定间隔
DAILY = "daily" # 每天
WEEKLY = "weekly" # 每周
CRON = "cron" # Cron 表达式
class IntervalUnit(Enum):
"""间隔单位"""
MINUTES = "minutes"
HOURS = "hours"
DAYS = "days"
@dataclass
class ScheduleConfig:
"""调度配置"""
schedule_type: ScheduleType = ScheduleType.ONCE
# 间隔调度
interval_value: int = 1
interval_unit: IntervalUnit = IntervalUnit.HOURS
# 每日调度
daily_time: str = "04:00" # HH:MM
# 每周调度
weekly_days: List[int] = field(default_factory=lambda: [1]) # 1-7, 1=周一
weekly_time: str = "04:00"
# Cron 表达式
cron_expression: str = "0 4 * * *"
# 通用设置
enabled: bool = True
start_date: Optional[str] = None # YYYY-MM-DD
end_date: Optional[str] = None # YYYY-MM-DD
def to_dict(self) -> dict:
"""转换为字典"""
return {
"schedule_type": self.schedule_type.value,
"interval_value": self.interval_value,
"interval_unit": self.interval_unit.value,
"daily_time": self.daily_time,
"weekly_days": self.weekly_days,
"weekly_time": self.weekly_time,
"cron_expression": self.cron_expression,
"enabled": self.enabled,
"start_date": self.start_date,
"end_date": self.end_date,
}
@classmethod
def from_dict(cls, data: dict) -> "ScheduleConfig":
"""从字典创建"""
return cls(
schedule_type=ScheduleType(data.get("schedule_type", "once")),
interval_value=data.get("interval_value", 1),
interval_unit=IntervalUnit(data.get("interval_unit", "hours")),
daily_time=data.get("daily_time", "04:00"),
weekly_days=data.get("weekly_days", [1]),
weekly_time=data.get("weekly_time", "04:00"),
cron_expression=data.get("cron_expression", "0 4 * * *"),
enabled=data.get("enabled", True),
start_date=data.get("start_date"),
end_date=data.get("end_date"),
)
def get_description(self) -> str:
"""获取调度描述"""
if self.schedule_type == ScheduleType.ONCE:
return "一次性执行"
elif self.schedule_type == ScheduleType.INTERVAL:
unit_names = {"minutes": "分钟", "hours": "小时", "days": ""}
return f"{self.interval_value} {unit_names[self.interval_unit.value]}"
elif self.schedule_type == ScheduleType.DAILY:
return f"每天 {self.daily_time}"
elif self.schedule_type == ScheduleType.WEEKLY:
day_names = {1: "", 2: "", 3: "", 4: "", 5: "", 6: "", 7: ""}
days = "".join(f"{day_names[d]}" for d in sorted(self.weekly_days))
return f"每周 {days} {self.weekly_time}"
elif self.schedule_type == ScheduleType.CRON:
return f"Cron: {self.cron_expression}"
return "未知"
# 首次执行延迟秒数
FIRST_RUN_DELAY_SECONDS = 60
def get_next_run_time(self, last_run: Optional[datetime] = None) -> Optional[datetime]:
"""计算下次运行时间
注意首次执行last_run 为 None时会延迟 60 秒,避免创建后立即执行
"""
now = datetime.now()
# 检查日期范围
if self.start_date:
start = datetime.strptime(self.start_date, "%Y-%m-%d")
if now < start:
now = start
if self.end_date:
end = datetime.strptime(self.end_date, "%Y-%m-%d") + timedelta(days=1)
if now >= end:
return None
# 首次执行延迟 60 秒
first_run_time = now + timedelta(seconds=self.FIRST_RUN_DELAY_SECONDS)
if self.schedule_type == ScheduleType.ONCE:
return None if last_run else first_run_time
elif self.schedule_type == ScheduleType.INTERVAL:
if not last_run:
return first_run_time
if self.interval_unit == IntervalUnit.MINUTES:
delta = timedelta(minutes=self.interval_value)
elif self.interval_unit == IntervalUnit.HOURS:
delta = timedelta(hours=self.interval_value)
else:
delta = timedelta(days=self.interval_value)
return last_run + delta
elif self.schedule_type == ScheduleType.DAILY:
hour, minute = map(int, self.daily_time.split(":"))
next_run = now.replace(hour=hour, minute=minute, second=0, microsecond=0)
if next_run <= now:
next_run += timedelta(days=1)
return next_run
elif self.schedule_type == ScheduleType.WEEKLY:
hour, minute = map(int, self.weekly_time.split(":"))
# 找到下一个匹配的日期
for i in range(8):
check_date = now + timedelta(days=i)
weekday = check_date.isoweekday() # 1-7
if weekday in self.weekly_days:
next_run = check_date.replace(hour=hour, minute=minute, second=0, microsecond=0)
if next_run > now:
return next_run
return None
elif self.schedule_type == ScheduleType.CRON:
# 简化版 Cron 解析(只支持基本格式)
try:
return self._parse_simple_cron(now)
except Exception:
return None
return None
def _parse_simple_cron(self, now: datetime) -> Optional[datetime]:
"""简化版 Cron 解析"""
parts = self.cron_expression.split()
if len(parts) != 5:
return None
minute, hour, day, month, weekday = parts
# 只处理简单情况
if minute.isdigit() and hour.isdigit():
next_run = now.replace(
hour=int(hour),
minute=int(minute),
second=0,
microsecond=0
)
if next_run <= now:
next_run += timedelta(days=1)
return next_run
return None
@dataclass
class ScheduleExecutionRecord:
"""调度执行记录"""
task_id: str # 关联的 QueuedTask ID
executed_at: datetime # 执行时间
status: str = "" # 状态success, failed, pending
exit_code: Optional[int] = None # 退出码
duration_seconds: float = 0.0 # 耗时(秒)
summary: str = "" # 执行摘要
output: str = "" # 完整执行日志
error: str = "" # 错误信息
# 日志最大长度限制(字符数)
MAX_OUTPUT_LENGTH: int = 100000 # 100KB
def to_dict(self) -> dict:
return {
"task_id": self.task_id,
"executed_at": self.executed_at.isoformat(),
"status": self.status,
"exit_code": self.exit_code,
"duration_seconds": self.duration_seconds,
"summary": self.summary,
"output": self.output[:self.MAX_OUTPUT_LENGTH] if self.output else "",
"error": self.error[:5000] if self.error else "",
}
@classmethod
def from_dict(cls, data: dict) -> "ScheduleExecutionRecord":
return cls(
task_id=data.get("task_id", ""),
executed_at=datetime.fromisoformat(data["executed_at"]) if data.get("executed_at") else datetime.now(),
status=data.get("status", ""),
exit_code=data.get("exit_code"),
duration_seconds=data.get("duration_seconds", 0.0),
summary=data.get("summary", ""),
output=data.get("output", ""),
error=data.get("error", ""),
)
@dataclass
class ScheduledTask:
"""调度任务"""
id: str
name: str
task_codes: List[str]
schedule: ScheduleConfig
task_config: Dict[str, Any] = field(default_factory=dict)
# 运行状态
enabled: bool = True
last_run: Optional[datetime] = None
next_run: Optional[datetime] = None
run_count: int = 0
last_status: str = ""
# 执行历史(最近 N 次执行记录)
execution_history: List[ScheduleExecutionRecord] = field(default_factory=list)
MAX_HISTORY_SIZE: int = field(default=50, repr=False) # 保留最近50次执行记录
created_at: datetime = field(default_factory=datetime.now)
updated_at: datetime = field(default_factory=datetime.now)
def add_execution_record(self, record: ScheduleExecutionRecord):
"""添加执行记录"""
self.execution_history.insert(0, record)
# 限制历史记录数量
if len(self.execution_history) > self.MAX_HISTORY_SIZE:
self.execution_history = self.execution_history[:self.MAX_HISTORY_SIZE]
def update_execution_record(self, task_id: str, status: str, exit_code: int, duration: float,
summary: str, output: str = "", error: str = ""):
"""更新执行记录状态"""
for record in self.execution_history:
if record.task_id == task_id:
record.status = status
record.exit_code = exit_code
record.duration_seconds = duration
record.summary = summary
record.output = output
record.error = error
break
def to_dict(self) -> dict:
"""转换为字典"""
return {
"id": self.id,
"name": self.name,
"task_codes": self.task_codes,
"schedule": self.schedule.to_dict(),
"task_config": self.task_config,
"enabled": self.enabled,
"last_run": self.last_run.isoformat() if self.last_run else None,
"next_run": self.next_run.isoformat() if self.next_run else None,
"run_count": self.run_count,
"last_status": self.last_status,
"execution_history": [r.to_dict() for r in self.execution_history],
"created_at": self.created_at.isoformat(),
"updated_at": self.updated_at.isoformat(),
}
@classmethod
def from_dict(cls, data: dict) -> "ScheduledTask":
"""从字典创建"""
history_data = data.get("execution_history", [])
execution_history = [ScheduleExecutionRecord.from_dict(r) for r in history_data]
return cls(
id=data["id"],
name=data["name"],
task_codes=data["task_codes"],
schedule=ScheduleConfig.from_dict(data.get("schedule", {})),
task_config=data.get("task_config", {}),
enabled=data.get("enabled", True),
last_run=datetime.fromisoformat(data["last_run"]) if data.get("last_run") else None,
next_run=datetime.fromisoformat(data["next_run"]) if data.get("next_run") else None,
run_count=data.get("run_count", 0),
last_status=data.get("last_status", ""),
execution_history=execution_history,
created_at=datetime.fromisoformat(data["created_at"]) if data.get("created_at") else datetime.now(),
updated_at=datetime.fromisoformat(data["updated_at"]) if data.get("updated_at") else datetime.now(),
)
def update_next_run(self):
"""更新下次运行时间"""
if self.enabled and self.schedule.enabled:
self.next_run = self.schedule.get_next_run_time(self.last_run)
else:
self.next_run = None
self.updated_at = datetime.now()
class ScheduleStore:
"""调度任务存储"""
def __init__(self, storage_path: Optional[Path] = None):
if storage_path is None:
storage_path = Path(__file__).resolve().parents[2] / "config" / "scheduled_tasks.json"
self.storage_path = storage_path
self.tasks: Dict[str, ScheduledTask] = {}
self.load()
def load(self):
"""加载任务"""
if self.storage_path.exists():
try:
data = json.loads(self.storage_path.read_text(encoding="utf-8"))
self.tasks = {
task_id: ScheduledTask.from_dict(task_data)
for task_id, task_data in data.get("tasks", {}).items()
}
except Exception:
self.tasks = {}
def save(self):
"""保存任务"""
data = {
"tasks": {
task_id: task.to_dict()
for task_id, task in self.tasks.items()
}
}
self.storage_path.write_text(
json.dumps(data, ensure_ascii=False, indent=2),
encoding="utf-8"
)
def add_task(self, task: ScheduledTask):
"""添加任务"""
task.update_next_run()
self.tasks[task.id] = task
self.save()
def remove_task(self, task_id: str):
"""移除任务"""
if task_id in self.tasks:
del self.tasks[task_id]
self.save()
def update_task(self, task: ScheduledTask):
"""更新任务"""
task.update_next_run()
task.updated_at = datetime.now()
self.tasks[task.id] = task
self.save()
def get_task(self, task_id: str) -> Optional[ScheduledTask]:
"""获取任务"""
return self.tasks.get(task_id)
def get_all_tasks(self) -> List[ScheduledTask]:
"""获取所有任务"""
return list(self.tasks.values())
def get_due_tasks(self) -> List[ScheduledTask]:
"""获取到期需要执行的任务"""
now = datetime.now()
due_tasks = []
for task in self.tasks.values():
if task.enabled and task.next_run and task.next_run <= now:
due_tasks.append(task)
return due_tasks

208
gui/models/task_model.py Normal file
View File

@@ -0,0 +1,208 @@
# -*- coding: utf-8 -*-
# AI_CHANGELOG [2026-02-13] 移除 DWS_RECALL_INDEX/DWS_INTIMACY_INDEX 任务分类映射
"""任务数据模型"""
from dataclasses import dataclass, field
from datetime import datetime
from enum import Enum
from typing import Optional, List, Dict, Any
class TaskStatus(Enum):
"""任务状态枚举"""
PENDING = "pending" # 待执行
RUNNING = "running" # 执行中
SUCCESS = "success" # 成功
FAILED = "failed" # 失败
CANCELLED = "cancelled" # 已取消
class TaskCategory(Enum):
"""任务分类"""
ODS = "ODS" # ODS 数据抓取任务
DWD = "DWD" # DWD 装载任务
DWS = "DWS" # DWS 汇总任务
SCHEMA = "Schema" # Schema 初始化任务
QUALITY = "Quality" # 质量检查任务
OTHER = "Other" # 其他任务
# 任务分类映射
TASK_CATEGORIES: Dict[str, TaskCategory] = {
# ODS 任务
"ODS_PAYMENT": TaskCategory.ODS,
"ODS_MEMBER": TaskCategory.ODS,
"ODS_MEMBER_CARD": TaskCategory.ODS,
"ODS_MEMBER_BALANCE": TaskCategory.ODS,
"ODS_SETTLEMENT_RECORDS": TaskCategory.ODS,
"ODS_TABLE_USE": TaskCategory.ODS,
"ODS_ASSISTANT_ACCOUNT": TaskCategory.ODS,
"ODS_ASSISTANT_LEDGER": TaskCategory.ODS,
"ODS_ASSISTANT_ABOLISH": TaskCategory.ODS,
"ODS_REFUND": TaskCategory.ODS,
"ODS_PLATFORM_COUPON": TaskCategory.ODS,
"ODS_RECHARGE_SETTLE": TaskCategory.ODS,
"ODS_GROUP_PACKAGE": TaskCategory.ODS,
"ODS_GROUP_BUY_REDEMPTION": TaskCategory.ODS,
"ODS_INVENTORY_STOCK": TaskCategory.ODS,
"ODS_INVENTORY_CHANGE": TaskCategory.ODS,
"ODS_TABLES": TaskCategory.ODS,
"ODS_GOODS_CATEGORY": TaskCategory.ODS,
"ODS_STORE_GOODS": TaskCategory.ODS,
"ODS_STORE_GOODS_SALES": TaskCategory.ODS,
"ODS_TABLE_FEE_DISCOUNT": TaskCategory.ODS,
"ODS_TENANT_GOODS": TaskCategory.ODS,
"ODS_SETTLEMENT_TICKET": TaskCategory.ODS,
# DWD 任务
"DWD_LOAD_FROM_ODS": TaskCategory.DWD,
"DWD_QUALITY_CHECK": TaskCategory.QUALITY,
"PAYMENTS_DWD": TaskCategory.DWD,
"MEMBERS_DWD": TaskCategory.DWD,
"TICKET_DWD": TaskCategory.DWD,
# DWS 任务
"INIT_DWS_SCHEMA": TaskCategory.SCHEMA,
"SEED_DWS_CONFIG": TaskCategory.SCHEMA,
"DWS_BUILD_ORDER_SUMMARY": TaskCategory.DWS,
"DWS_WINBACK_INDEX": TaskCategory.DWS,
"DWS_NEWCONV_INDEX": TaskCategory.DWS,
"DWS_RELATION_INDEX": TaskCategory.DWS,
"DWS_ML_MANUAL_IMPORT": TaskCategory.DWS,
"DWS_ASSISTANT_DAILY": TaskCategory.DWS,
"DWS_ASSISTANT_MONTHLY": TaskCategory.DWS,
"DWS_ASSISTANT_CUSTOMER": TaskCategory.DWS,
"DWS_ASSISTANT_SALARY": TaskCategory.DWS,
"DWS_ASSISTANT_FINANCE": TaskCategory.DWS,
"DWS_MEMBER_CONSUMPTION": TaskCategory.DWS,
"DWS_MEMBER_VISIT": TaskCategory.DWS,
"DWS_FINANCE_DAILY": TaskCategory.DWS,
"DWS_FINANCE_RECHARGE": TaskCategory.DWS,
"DWS_FINANCE_INCOME_STRUCTURE": TaskCategory.DWS,
"DWS_FINANCE_DISCOUNT_DETAIL": TaskCategory.DWS,
"DWS_RETENTION_CLEANUP": TaskCategory.DWS,
"DWS_MV_REFRESH_FINANCE_DAILY": TaskCategory.DWS,
"DWS_MV_REFRESH_ASSISTANT_DAILY": TaskCategory.DWS,
# Schema 任务
"INIT_ODS_SCHEMA": TaskCategory.SCHEMA,
"INIT_DWD_SCHEMA": TaskCategory.SCHEMA,
# 其他任务
"MANUAL_INGEST": TaskCategory.OTHER,
"CHECK_CUTOFF": TaskCategory.OTHER,
"DATA_INTEGRITY_CHECK": TaskCategory.QUALITY,
"ODS_JSON_ARCHIVE": TaskCategory.OTHER,
# 旧版任务(兼容)
"PRODUCTS": TaskCategory.ODS,
"TABLES": TaskCategory.ODS,
"MEMBERS": TaskCategory.ODS,
"ASSISTANTS": TaskCategory.ODS,
"PACKAGES_DEF": TaskCategory.ODS,
"ORDERS": TaskCategory.ODS,
"PAYMENTS": TaskCategory.ODS,
"REFUNDS": TaskCategory.ODS,
"COUPON_USAGE": TaskCategory.ODS,
"INVENTORY_CHANGE": TaskCategory.ODS,
"TOPUPS": TaskCategory.ODS,
"TABLE_DISCOUNT": TaskCategory.ODS,
"ASSISTANT_ABOLISH": TaskCategory.ODS,
"LEDGER": TaskCategory.ODS,
}
def get_task_category(task_code: str) -> TaskCategory:
"""获取任务分类"""
return TASK_CATEGORIES.get(task_code.upper(), TaskCategory.OTHER)
@dataclass
class TaskItem:
"""任务项"""
task_code: str
name: str = ""
description: str = ""
category: TaskCategory = TaskCategory.OTHER
enabled: bool = True
def __post_init__(self):
if not self.name:
self.name = self.task_code
if not self.category or self.category == TaskCategory.OTHER:
self.category = get_task_category(self.task_code)
@dataclass
class TaskConfig:
"""任务执行配置"""
tasks: List[str] = field(default_factory=list)
pipeline_flow: str = "FULL" # FULL, FETCH_ONLY, INGEST_ONLY
dry_run: bool = False
window_start: Optional[str] = None
window_end: Optional[str] = None
window_split: Optional[str] = None # none, day, week, month
window_split_days: Optional[int] = None # 按天切分的天数1/10/30
window_compensation: int = 0 # 补偿小时数
ingest_source: Optional[str] = None
store_id: Optional[int] = None
pg_dsn: Optional[str] = None
api_token: Optional[str] = None
extra_args: Dict[str, Any] = field(default_factory=dict)
env_vars: Dict[str, str] = field(default_factory=dict) # 额外环境变量
# 新增:管道配置
pipeline: str = "api_ods_dwd" # 管道类型
processing_mode: str = "increment_only" # increment_only / verify_only / increment_verify
fetch_before_verify: bool = False # 校验前从 API 获取数据(仅 verify_only 模式有效)
window_mode: str = "lookback" # lookback / custom
lookback_hours: int = 24 # 回溯小时数
overlap_seconds: int = 600 # 冗余秒数
@dataclass
class TaskHistory:
"""任务执行历史"""
id: str
task_codes: List[str]
status: TaskStatus
start_time: datetime
end_time: Optional[datetime] = None
exit_code: Optional[int] = None
command: str = ""
output_log: str = ""
error_message: str = ""
summary: Dict[str, Any] = field(default_factory=dict)
@property
def duration_seconds(self) -> Optional[float]:
"""执行时长(秒)"""
if self.end_time and self.start_time:
return (self.end_time - self.start_time).total_seconds()
return None
@property
def duration_str(self) -> str:
"""格式化的执行时长"""
secs = self.duration_seconds
if secs is None:
return "-"
if secs < 60:
return f"{secs:.1f}"
elif secs < 3600:
mins = int(secs // 60)
secs = secs % 60
return f"{mins}{secs:.0f}"
else:
hours = int(secs // 3600)
mins = int((secs % 3600) // 60)
return f"{hours}{mins}"
@dataclass
class QueuedTask:
"""队列中的任务"""
id: str
config: TaskConfig
status: TaskStatus = TaskStatus.PENDING
created_at: datetime = field(default_factory=datetime.now)
started_at: Optional[datetime] = None
finished_at: Optional[datetime] = None
output: str = ""
error: str = ""
exit_code: Optional[int] = None

669
gui/models/task_registry.py Normal file
View File

@@ -0,0 +1,669 @@
# -*- coding: utf-8 -*-
# AI_CHANGELOG [2026-02-13] 移除 DWS_RECALL_INDEX/DWS_INTIMACY_INDEX 任务定义
"""任务注册表:定义所有可用任务及其业务域分组。
从后端 ods_tasks 动态获取任务定义,并按业务域分组,供 UI 使用。
"""
from dataclasses import dataclass, field
from enum import Enum
from typing import Dict, List, Optional, Sequence, Tuple
# 尝试从后端导入 ODS 任务定义
try:
from tasks.ods.ods_tasks import ENABLED_ODS_CODES, ODS_TASK_SPECS
_HAS_BACKEND = True
except ImportError:
_HAS_BACKEND = False
ENABLED_ODS_CODES = set()
ODS_TASK_SPECS = ()
class BusinessDomain(Enum):
"""业务域枚举"""
MEMBER = "member" # 会员
SETTLEMENT = "settlement" # 结算/支付
ASSISTANT = "assistant" # 助教
GOODS = "goods" # 商品/销售
TABLE = "table" # 台桌
PROMOTION = "promotion" # 团购/优惠券
INVENTORY = "inventory" # 库存
SCHEMA = "schema" # Schema 初始化
DWD = "dwd" # DWD 装载
DWS = "dws" # DWS 汇总
INDEX = "index" # 指数计算
QUALITY = "quality" # 质量检查
OTHER = "other" # 其他
# 业务域显示名称
DOMAIN_LABELS: Dict[BusinessDomain, str] = {
BusinessDomain.MEMBER: "会员",
BusinessDomain.SETTLEMENT: "结算/支付",
BusinessDomain.ASSISTANT: "助教",
BusinessDomain.GOODS: "商品/销售",
BusinessDomain.TABLE: "台桌",
BusinessDomain.PROMOTION: "团购/优惠券",
BusinessDomain.INVENTORY: "库存",
BusinessDomain.SCHEMA: "Schema 初始化",
BusinessDomain.DWD: "DWD 装载",
BusinessDomain.DWS: "DWS 汇总",
BusinessDomain.INDEX: "指数计算",
BusinessDomain.QUALITY: "质量检查",
BusinessDomain.OTHER: "其他",
}
@dataclass
class TaskDefinition:
"""任务定义"""
code: str # 任务编码
name: str # 显示名称
description: str # 描述
domain: BusinessDomain # 业务域
requires_window: bool = True # 是否需要时间窗口
is_ods: bool = False # 是否为 ODS 任务
is_dimension: bool = False # 是否为维度类任务(校验时区分)
default_enabled: bool = True # 默认是否选中
# ODS 任务到业务域的映射
ODS_DOMAIN_MAP: Dict[str, BusinessDomain] = {
# 会员相关
"ODS_MEMBER": BusinessDomain.MEMBER,
"ODS_MEMBER_CARD": BusinessDomain.MEMBER,
"ODS_MEMBER_BALANCE": BusinessDomain.MEMBER,
# 结算/支付相关
"ODS_PAYMENT": BusinessDomain.SETTLEMENT,
"ODS_REFUND": BusinessDomain.SETTLEMENT,
"ODS_SETTLEMENT_RECORDS": BusinessDomain.SETTLEMENT,
"ODS_RECHARGE_SETTLE": BusinessDomain.SETTLEMENT,
"ODS_SETTLEMENT_TICKET": BusinessDomain.SETTLEMENT,
# 助教相关
"ODS_ASSISTANT_ACCOUNT": BusinessDomain.ASSISTANT,
"ODS_ASSISTANT_LEDGER": BusinessDomain.ASSISTANT,
"ODS_ASSISTANT_ABOLISH": BusinessDomain.ASSISTANT,
# 商品/销售相关
"ODS_TENANT_GOODS": BusinessDomain.GOODS,
"ODS_STORE_GOODS": BusinessDomain.GOODS,
"ODS_STORE_GOODS_SALES": BusinessDomain.GOODS,
"ODS_GOODS_CATEGORY": BusinessDomain.GOODS,
# 台桌相关
"ODS_TABLES": BusinessDomain.TABLE,
"ODS_TABLE_USE": BusinessDomain.TABLE,
"ODS_TABLE_FEE_DISCOUNT": BusinessDomain.TABLE,
# 团购/优惠券相关
"ODS_GROUP_PACKAGE": BusinessDomain.PROMOTION,
"ODS_GROUP_BUY_REDEMPTION": BusinessDomain.PROMOTION,
"ODS_PLATFORM_COUPON": BusinessDomain.PROMOTION,
# 库存相关
"ODS_INVENTORY_STOCK": BusinessDomain.INVENTORY,
"ODS_INVENTORY_CHANGE": BusinessDomain.INVENTORY,
}
# ODS 任务显示名称(中文)
ODS_DISPLAY_NAMES: Dict[str, str] = {
"ODS_MEMBER": "会员档案",
"ODS_MEMBER_CARD": "会员储值卡",
"ODS_MEMBER_BALANCE": "会员余额变动",
"ODS_PAYMENT": "支付流水",
"ODS_REFUND": "退款流水",
"ODS_SETTLEMENT_RECORDS": "结账记录",
"ODS_RECHARGE_SETTLE": "充值结算",
"ODS_SETTLEMENT_TICKET": "结账小票",
"ODS_ASSISTANT_ACCOUNT": "助教账号",
"ODS_ASSISTANT_LEDGER": "助教流水",
"ODS_ASSISTANT_ABOLISH": "助教作废",
"ODS_TENANT_GOODS": "租户商品",
"ODS_STORE_GOODS": "门店商品",
"ODS_STORE_GOODS_SALES": "商品销售流水",
"ODS_GOODS_CATEGORY": "商品分类",
"ODS_TABLES": "台桌维表",
"ODS_TABLE_USE": "台费计费流水",
"ODS_TABLE_FEE_DISCOUNT": "台费折扣调账",
"ODS_GROUP_PACKAGE": "团购套餐",
"ODS_GROUP_BUY_REDEMPTION": "团购核销",
"ODS_PLATFORM_COUPON": "平台券核销",
"ODS_INVENTORY_STOCK": "库存汇总",
"ODS_INVENTORY_CHANGE": "库存变化",
}
# 维度类 ODS 任务(校验时通常单独处理)
DIMENSION_ODS_CODES = {
"ODS_MEMBER",
"ODS_MEMBER_CARD",
"ODS_ASSISTANT_ACCOUNT",
"ODS_TENANT_GOODS",
"ODS_STORE_GOODS",
"ODS_GOODS_CATEGORY",
"ODS_TABLES",
"ODS_GROUP_PACKAGE",
}
# 事实类 ODS 任务(需要时间窗口)
FACT_ODS_CODES = {
"ODS_MEMBER_BALANCE",
"ODS_PAYMENT",
"ODS_REFUND",
"ODS_SETTLEMENT_RECORDS",
"ODS_RECHARGE_SETTLE",
"ODS_SETTLEMENT_TICKET",
"ODS_ASSISTANT_LEDGER",
"ODS_ASSISTANT_ABOLISH",
"ODS_STORE_GOODS_SALES",
"ODS_TABLE_USE",
"ODS_TABLE_FEE_DISCOUNT",
"ODS_GROUP_BUY_REDEMPTION",
"ODS_PLATFORM_COUPON",
"ODS_INVENTORY_CHANGE",
}
# ======================== DWD 表定义 ========================
@dataclass
class DwdTableDefinition:
"""DWD 表定义(用于 GUI 表级选择)"""
code: str # 表编码(不含 schema如 dim_member
name: str # 中文显示名称
description: str # 描述
domain: BusinessDomain # 业务域
is_dimension: bool = False # 是否维度表
tables: List[str] = field(default_factory=list) # 完整表名列表(含 _ex
# DWD 表定义列表(按业务域分组)
DWD_TABLE_DEFINITIONS: List[DwdTableDefinition] = [
# ---- 会员 ----
DwdTableDefinition(
"dim_member", "会员维度", "会员基本信息维度表",
BusinessDomain.MEMBER, True,
["billiards_dwd.dim_member", "billiards_dwd.dim_member_ex"],
),
DwdTableDefinition(
"dim_member_card_account", "会员储值卡", "会员储值卡账户维度表",
BusinessDomain.MEMBER, True,
["billiards_dwd.dim_member_card_account", "billiards_dwd.dim_member_card_account_ex"],
),
DwdTableDefinition(
"dwd_member_balance_change", "余额变动", "会员余额变动事实表",
BusinessDomain.MEMBER, False,
["billiards_dwd.dwd_member_balance_change", "billiards_dwd.dwd_member_balance_change_ex"],
),
# ---- 结算/支付 ----
DwdTableDefinition(
"dwd_settlement_head", "结账记录", "结账/结算事实表",
BusinessDomain.SETTLEMENT, False,
["billiards_dwd.dwd_settlement_head", "billiards_dwd.dwd_settlement_head_ex"],
),
DwdTableDefinition(
"dwd_payment", "支付流水", "支付明细事实表",
BusinessDomain.SETTLEMENT, False,
["billiards_dwd.dwd_payment"],
),
DwdTableDefinition(
"dwd_refund", "退款流水", "退款明细事实表",
BusinessDomain.SETTLEMENT, False,
["billiards_dwd.dwd_refund", "billiards_dwd.dwd_refund_ex"],
),
DwdTableDefinition(
"dwd_recharge_order", "充值订单", "充值结算事实表",
BusinessDomain.SETTLEMENT, False,
["billiards_dwd.dwd_recharge_order", "billiards_dwd.dwd_recharge_order_ex"],
),
# ---- 助教 ----
DwdTableDefinition(
"dim_assistant", "助教维度", "助教基本信息维度表",
BusinessDomain.ASSISTANT, True,
["billiards_dwd.dim_assistant", "billiards_dwd.dim_assistant_ex"],
),
DwdTableDefinition(
"dwd_assistant_service_log", "助教服务流水", "助教服务计费事实表",
BusinessDomain.ASSISTANT, False,
["billiards_dwd.dwd_assistant_service_log", "billiards_dwd.dwd_assistant_service_log_ex"],
),
DwdTableDefinition(
"dwd_assistant_trash_event", "助教作废", "助教作废事件事实表",
BusinessDomain.ASSISTANT, False,
["billiards_dwd.dwd_assistant_trash_event", "billiards_dwd.dwd_assistant_trash_event_ex"],
),
# ---- 商品/销售 ----
DwdTableDefinition(
"dim_tenant_goods", "租户商品", "租户商品维度表",
BusinessDomain.GOODS, True,
["billiards_dwd.dim_tenant_goods", "billiards_dwd.dim_tenant_goods_ex"],
),
DwdTableDefinition(
"dim_store_goods", "门店商品", "门店商品维度表",
BusinessDomain.GOODS, True,
["billiards_dwd.dim_store_goods", "billiards_dwd.dim_store_goods_ex"],
),
DwdTableDefinition(
"dim_goods_category", "商品分类", "商品分类维度表",
BusinessDomain.GOODS, True,
["billiards_dwd.dim_goods_category"],
),
DwdTableDefinition(
"dwd_store_goods_sale", "商品销售", "商品销售事实表",
BusinessDomain.GOODS, False,
["billiards_dwd.dwd_store_goods_sale", "billiards_dwd.dwd_store_goods_sale_ex"],
),
# ---- 台桌 ----
DwdTableDefinition(
"dim_site", "门店维度", "门店基本信息维度表",
BusinessDomain.TABLE, True,
["billiards_dwd.dim_site", "billiards_dwd.dim_site_ex"],
),
DwdTableDefinition(
"dim_table", "台桌维度", "台桌基本信息维度表",
BusinessDomain.TABLE, True,
["billiards_dwd.dim_table", "billiards_dwd.dim_table_ex"],
),
DwdTableDefinition(
"dwd_table_fee_log", "台费流水", "台费计费事实表",
BusinessDomain.TABLE, False,
["billiards_dwd.dwd_table_fee_log", "billiards_dwd.dwd_table_fee_log_ex"],
),
DwdTableDefinition(
"dwd_table_fee_adjust", "台费折扣调账", "台费折扣调账事实表",
BusinessDomain.TABLE, False,
["billiards_dwd.dwd_table_fee_adjust", "billiards_dwd.dwd_table_fee_adjust_ex"],
),
# ---- 团购/优惠券 ----
DwdTableDefinition(
"dim_groupbuy_package", "团购套餐", "团购套餐维度表",
BusinessDomain.PROMOTION, True,
["billiards_dwd.dim_groupbuy_package", "billiards_dwd.dim_groupbuy_package_ex"],
),
DwdTableDefinition(
"dwd_groupbuy_redemption", "团购核销", "团购核销事实表",
BusinessDomain.PROMOTION, False,
["billiards_dwd.dwd_groupbuy_redemption", "billiards_dwd.dwd_groupbuy_redemption_ex"],
),
DwdTableDefinition(
"dwd_platform_coupon_redemption", "平台券核销", "平台券核销事实表",
BusinessDomain.PROMOTION, False,
["billiards_dwd.dwd_platform_coupon_redemption", "billiards_dwd.dwd_platform_coupon_redemption_ex"],
),
]
# DWD 表按业务域显示顺序
DWD_TABLE_DOMAIN_ORDER: List[BusinessDomain] = [
BusinessDomain.MEMBER,
BusinessDomain.SETTLEMENT,
BusinessDomain.ASSISTANT,
BusinessDomain.GOODS,
BusinessDomain.TABLE,
BusinessDomain.PROMOTION,
]
def get_dwd_tables_grouped() -> Dict[BusinessDomain, List[DwdTableDefinition]]:
"""获取按业务域分组的 DWD 表定义"""
grouped: Dict[BusinessDomain, List[DwdTableDefinition]] = {}
for tbl in DWD_TABLE_DEFINITIONS:
grouped.setdefault(tbl.domain, []).append(tbl)
return grouped
def get_all_dwd_table_codes() -> List[str]:
"""获取所有 DWD 表编码"""
return [t.code for t in DWD_TABLE_DEFINITIONS]
def resolve_dwd_table_names(codes: Sequence[str]) -> List[str]:
"""将 DWD 表编码解析为完整表名列表(含 _ex"""
code_set = {c.lower() for c in codes}
result: List[str] = []
for tbl in DWD_TABLE_DEFINITIONS:
if tbl.code.lower() in code_set:
result.extend(tbl.tables)
return result
# 非 ODS 任务定义
NON_ODS_TASKS: List[TaskDefinition] = [
# DWD 装载(保留为单一调度任务,表级选择通过 DWD_ONLY_TABLES 环境变量控制)
TaskDefinition(
code="DWD_LOAD_FROM_ODS",
name="ODS→DWD 装载",
description="从 ODS 增量装载到 DWD",
domain=BusinessDomain.DWD,
requires_window=True,
),
TaskDefinition(
code="DWD_QUALITY_CHECK",
name="DWD 质量检查",
description="执行 DWD 数据质量检查",
domain=BusinessDomain.QUALITY,
requires_window=False,
),
TaskDefinition(
code="DWS_BUILD_ORDER_SUMMARY",
name="构建订单汇总",
description="重算 DWS 订单汇总表",
domain=BusinessDomain.DWS,
requires_window=False,
),
# DWS 汇总任务
TaskDefinition(
code="DWS_ASSISTANT_DAILY",
name="助教日度明细",
description="汇总助教日度服务、时长与收入指标",
domain=BusinessDomain.DWS,
requires_window=True,
),
TaskDefinition(
code="DWS_ASSISTANT_MONTHLY",
name="助教月度汇总",
description="汇总助教月度绩效与服务指标",
domain=BusinessDomain.DWS,
requires_window=True,
),
TaskDefinition(
code="DWS_ASSISTANT_CUSTOMER",
name="助教客户统计",
description="统计助教与客户的服务关系与滚动窗口指标",
domain=BusinessDomain.DWS,
requires_window=True,
),
TaskDefinition(
code="DWS_ASSISTANT_SALARY",
name="助教工资计算",
description="计算助教月度工资与奖金明细",
domain=BusinessDomain.DWS,
requires_window=True,
default_enabled=False,
),
TaskDefinition(
code="DWS_ASSISTANT_FINANCE",
name="助教财务分析",
description="汇总助教日度财务分析指标",
domain=BusinessDomain.DWS,
requires_window=True,
),
TaskDefinition(
code="DWS_MEMBER_CONSUMPTION",
name="会员消费汇总",
description="汇总会员消费行为与滚动窗口指标",
domain=BusinessDomain.DWS,
requires_window=True,
),
TaskDefinition(
code="DWS_MEMBER_VISIT",
name="会员来店明细",
description="记录会员来店消费明细与服务列表",
domain=BusinessDomain.DWS,
requires_window=True,
),
TaskDefinition(
code="DWS_FINANCE_DAILY",
name="财务日度汇总",
description="汇总当日财务发生额、优惠与现金流",
domain=BusinessDomain.DWS,
requires_window=True,
),
TaskDefinition(
code="DWS_FINANCE_RECHARGE",
name="财务充值统计",
description="统计充值笔数、金额与卡余额",
domain=BusinessDomain.DWS,
requires_window=True,
),
TaskDefinition(
code="DWS_FINANCE_INCOME_STRUCTURE",
name="财务收入结构",
description="统计收入结构分布",
domain=BusinessDomain.DWS,
requires_window=True,
),
TaskDefinition(
code="DWS_FINANCE_DISCOUNT_DETAIL",
name="优惠明细分析",
description="拆分优惠构成与占比",
domain=BusinessDomain.DWS,
requires_window=True,
),
TaskDefinition(
code="DWS_MV_REFRESH_FINANCE_DAILY",
name="物化刷新-财务日汇总",
description="刷新财务日汇总物化视图L1-L4",
domain=BusinessDomain.DWS,
requires_window=False,
default_enabled=False,
),
TaskDefinition(
code="DWS_MV_REFRESH_ASSISTANT_DAILY",
name="物化刷新-助教日明细",
description="刷新助教日明细物化视图L1-L4",
domain=BusinessDomain.DWS,
requires_window=False,
default_enabled=False,
),
TaskDefinition(
code="DWS_RETENTION_CLEANUP",
name="时间分层清理",
description="按配置清理历史 DWS 数据",
domain=BusinessDomain.DWS,
requires_window=True,
default_enabled=False,
),
# DWS 指数计算
TaskDefinition(
code="DWS_WINBACK_INDEX",
name="老客挽回指数WBI",
description="计算老客挽回优先级,基于个人周期超期、降频、价值与充值压力",
domain=BusinessDomain.INDEX,
requires_window=False,
),
TaskDefinition(
code="DWS_NEWCONV_INDEX",
name="新客转化指数NCI",
description="计算新客二访/三访转化紧迫度与价值",
domain=BusinessDomain.INDEX,
requires_window=False,
),
TaskDefinition(
code="DWS_RELATION_INDEX",
name="关系指数RS/OS/MS/ML",
description="单任务计算关系强度、归属份额、升温动量、付费关联",
domain=BusinessDomain.INDEX,
requires_window=False,
),
TaskDefinition(
code="DWS_ML_MANUAL_IMPORT",
name="ML人工台账导入",
description="导入人工台账并按日/30天批次覆盖写入 ML 归因明细",
domain=BusinessDomain.INDEX,
requires_window=False,
default_enabled=False,
),
# Schema 初始化
TaskDefinition(
code="INIT_ODS_SCHEMA",
name="初始化 ODS Schema",
description="创建/重建 ODS 表结构",
domain=BusinessDomain.SCHEMA,
requires_window=False,
default_enabled=False,
),
TaskDefinition(
code="INIT_DWD_SCHEMA",
name="初始化 DWD Schema",
description="创建/重建 DWD 表结构",
domain=BusinessDomain.SCHEMA,
requires_window=False,
default_enabled=False,
),
TaskDefinition(
code="INIT_DWS_SCHEMA",
name="初始化 DWS Schema",
description="创建/重建 DWS 表结构",
domain=BusinessDomain.SCHEMA,
requires_window=False,
default_enabled=False,
),
TaskDefinition(
code="SEED_DWS_CONFIG",
name="初始化 DWS 配置",
description="写入 DWS 配置表基础数据",
domain=BusinessDomain.SCHEMA,
requires_window=False,
default_enabled=False,
),
# 其他
TaskDefinition(
code="MANUAL_INGEST",
name="手工数据灌入",
description="从本地 JSON 回放入库",
domain=BusinessDomain.OTHER,
requires_window=False,
default_enabled=False,
),
TaskDefinition(
code="ODS_JSON_ARCHIVE",
name="ODS JSON 归档",
description="在线抓取 ODS 接口数据并落盘 JSON",
domain=BusinessDomain.OTHER,
requires_window=True,
default_enabled=False,
),
TaskDefinition(
code="CHECK_CUTOFF",
name="检查 Cutoff",
description="查看各表数据截止时间",
domain=BusinessDomain.QUALITY,
requires_window=False,
),
TaskDefinition(
code="DATA_INTEGRITY_CHECK",
name="数据完整性检查",
description="检查 ODS/DWD 数据完整性",
domain=BusinessDomain.QUALITY,
requires_window=True,
),
]
def _build_ods_task_definition(code: str) -> TaskDefinition:
"""根据 ODS 任务编码构建任务定义"""
domain = ODS_DOMAIN_MAP.get(code, BusinessDomain.OTHER)
name = ODS_DISPLAY_NAMES.get(code, code)
is_dimension = code in DIMENSION_ODS_CODES
# 从后端获取描述(如果可用)
description = f"抓取{name}到 ODS"
if _HAS_BACKEND:
for spec in ODS_TASK_SPECS:
if spec.code == code:
# 尝试解码描述(可能是乱码)
desc = spec.description
if desc and not any(ord(c) > 0x4e00 for c in desc[:10] if desc):
description = f"抓取{name}到 ODS"
break
return TaskDefinition(
code=code,
name=name,
description=description,
domain=domain,
requires_window=code not in DIMENSION_ODS_CODES,
is_ods=True,
is_dimension=is_dimension,
)
class TaskRegistry:
"""任务注册表:管理所有可用任务"""
_instance: Optional["TaskRegistry"] = None
def __new__(cls):
if cls._instance is None:
cls._instance = super().__new__(cls)
cls._instance._initialized = False
return cls._instance
def __init__(self):
if self._initialized:
return
self._initialized = True
self._tasks: Dict[str, TaskDefinition] = {}
self._load_tasks()
def _load_tasks(self):
"""加载所有任务定义"""
# 加载 ODS 任务
ods_codes = ENABLED_ODS_CODES if _HAS_BACKEND else set(ODS_DOMAIN_MAP.keys())
for code in ods_codes:
self._tasks[code] = _build_ods_task_definition(code)
# 加载非 ODS 任务
for task_def in NON_ODS_TASKS:
self._tasks[task_def.code] = task_def
def get_task(self, code: str) -> Optional[TaskDefinition]:
"""获取任务定义"""
return self._tasks.get(code)
def get_all_tasks(self) -> List[TaskDefinition]:
"""获取所有任务"""
return list(self._tasks.values())
def get_ods_tasks(self) -> List[TaskDefinition]:
"""获取所有 ODS 任务"""
return [t for t in self._tasks.values() if t.is_ods]
def get_fact_ods_tasks(self) -> List[TaskDefinition]:
"""获取事实类 ODS 任务(需要时间窗口)"""
return [t for t in self._tasks.values() if t.is_ods and not t.is_dimension]
def get_dimension_ods_tasks(self) -> List[TaskDefinition]:
"""获取维度类 ODS 任务"""
return [t for t in self._tasks.values() if t.is_ods and t.is_dimension]
def get_tasks_by_domain(self, domain: BusinessDomain) -> List[TaskDefinition]:
"""按业务域获取任务"""
return [t for t in self._tasks.values() if t.domain == domain]
def get_ods_tasks_grouped(self) -> Dict[BusinessDomain, List[TaskDefinition]]:
"""获取按业务域分组的 ODS 任务"""
grouped: Dict[BusinessDomain, List[TaskDefinition]] = {}
for task in self.get_ods_tasks():
if task.domain not in grouped:
grouped[task.domain] = []
grouped[task.domain].append(task)
return grouped
def get_non_ods_tasks(self) -> List[TaskDefinition]:
"""获取非 ODS 任务"""
return [t for t in self._tasks.values() if not t.is_ods]
# 全局注册表实例
task_registry = TaskRegistry()
# 便捷函数
def get_ods_task_codes() -> List[str]:
"""获取所有 ODS 任务编码"""
return [t.code for t in task_registry.get_ods_tasks()]
def get_fact_ods_task_codes() -> List[str]:
"""获取事实类 ODS 任务编码"""
return [t.code for t in task_registry.get_fact_ods_tasks()]
def get_dimension_ods_task_codes() -> List[str]:
"""获取维度类 ODS 任务编码"""
return [t.code for t in task_registry.get_dimension_ods_tasks()]
def get_all_task_tuples() -> List[Tuple[str, str, str]]:
"""获取所有任务的 (code, name, description) 元组列表"""
return [(t.code, t.name, t.description) for t in task_registry.get_all_tasks()]
def get_ods_tasks_for_ui() -> List[Tuple[str, str, BusinessDomain]]:
"""获取 ODS 任务列表供 UI 使用:(code, display_name, domain)"""
return [(t.code, t.name, t.domain) for t in task_registry.get_ods_tasks()]