This commit is contained in:
Neo
2026-01-27 22:47:05 +08:00
parent a6ad343092
commit f5f9a7eb66
476 changed files with 381543 additions and 5819 deletions

View File

@@ -0,0 +1,8 @@
# -*- coding: utf-8 -*-
"""工具模块"""
from .cli_builder import CLIBuilder
from .config_helper import ConfigHelper
from .app_settings import app_settings, AppSettings
__all__ = ["CLIBuilder", "ConfigHelper", "app_settings", "AppSettings"]

View File

@@ -0,0 +1,434 @@
# -*- coding: utf-8 -*-
"""应用程序设置管理"""
import json
import os
import sys
from pathlib import Path
from typing import Any, Dict, Optional
class AppSettings:
"""应用程序设置单例"""
_instance: Optional["AppSettings"] = None
def __new__(cls):
if cls._instance is None:
cls._instance = super().__new__(cls)
cls._instance._initialized = False
return cls._instance
def __init__(self):
if self._initialized:
return
self._initialized = True
# 配置文件路径
self._settings_file = self._get_settings_path()
# 默认设置
self._settings = {
"etl_project_path": "", # ETL 项目路径
"env_file_path": "", # .env 文件路径
# 自动更新配置
"auto_update": {
"hours": 24,
"overlap_seconds": 3600,
"include_dwd": True,
"auto_verify": False,
"selected_tasks": [],
},
# 数据校验配置
"integrity_check": {
"mode": "history",
"history_start": "",
"history_end": "",
"lookback_hours": 24,
"include_dimensions": False,
"auto_backfill": False,
"ods_tasks": "",
},
# 高级配置
"advanced": {
"pipeline_flow": "FULL",
"dry_run": False,
"window_start": "",
"window_end": "",
"window_split": "none",
"window_compensation": 0,
"ingest_source": "",
"store_id": "",
"pg_dsn": "",
"api_token": "",
},
}
# 加载设置
self._load()
# 如果没有配置,尝试自动检测
if not self._settings["etl_project_path"]:
self._auto_detect_paths()
def _get_settings_path(self) -> Path:
"""获取设置文件路径"""
# 优先使用用户目录
if sys.platform == "win32":
app_data = os.environ.get("APPDATA", "")
if app_data:
settings_dir = Path(app_data) / "ETL管理系统"
else:
settings_dir = Path.home() / ".etl_gui"
else:
settings_dir = Path.home() / ".etl_gui"
settings_dir.mkdir(parents=True, exist_ok=True)
return settings_dir / "settings.json"
def _auto_detect_paths(self):
"""自动检测 ETL 项目路径"""
# 方法1: 检查是否从源码目录运行
try:
source_dir = Path(__file__).resolve().parents[2]
cli_main = source_dir / "cli" / "main.py"
if cli_main.exists():
rel_source = Path(os.path.relpath(source_dir, Path.cwd()))
self._settings["etl_project_path"] = str(rel_source)
env_file = rel_source / ".env"
if env_file.exists():
self._settings["env_file_path"] = str(env_file)
self._save()
return
except Exception:
pass
# 方法2: 检查常见位置
common_paths = [
Path("etl_billiards"),
Path("."),
]
for path in common_paths:
if path.exists() and (path / "cli" / "main.py").exists():
self._settings["etl_project_path"] = str(path)
env_file = path / ".env"
if env_file.exists():
self._settings["env_file_path"] = str(env_file)
self._save()
return
def _load(self):
"""加载设置"""
if self._settings_file.exists():
try:
data = json.loads(self._settings_file.read_text(encoding="utf-8"))
self._settings.update(data)
except Exception:
pass
def _save(self):
"""保存设置"""
try:
self._settings_file.write_text(
json.dumps(self._settings, ensure_ascii=False, indent=2),
encoding="utf-8"
)
except Exception:
pass
@property
def etl_project_path(self) -> str:
"""获取 ETL 项目路径"""
return self._settings.get("etl_project_path", "")
@etl_project_path.setter
def etl_project_path(self, value: str):
"""设置 ETL 项目路径"""
self._settings["etl_project_path"] = value
# 同时更新 .env 路径
if value:
env_path = Path(value) / ".env"
if env_path.exists():
self._settings["env_file_path"] = str(env_path)
self._save()
@property
def env_file_path(self) -> str:
"""获取 .env 文件路径"""
path = self._settings.get("env_file_path", "")
if not path and self.etl_project_path:
path = str(Path(self.etl_project_path) / ".env")
return path
@env_file_path.setter
def env_file_path(self, value: str):
"""设置 .env 文件路径"""
self._settings["env_file_path"] = value
self._save()
def is_configured(self) -> bool:
"""检查是否已配置"""
path = self.etl_project_path
if not path:
return False
return Path(path).exists() and (Path(path) / "cli" / "main.py").exists()
def validate(self) -> tuple[bool, str]:
"""验证配置"""
path = self.etl_project_path
if not path:
return False, "未配置 ETL 项目路径"
project_path = Path(path)
if not project_path.exists():
return False, f"ETL 项目路径不存在: {path}"
cli_main = project_path / "cli" / "main.py"
if not cli_main.exists():
return False, f"找不到 CLI 入口: {cli_main}"
return True, "配置有效"
# ==================== 自动更新配置 ====================
@property
def auto_update_hours(self) -> int:
return self._settings.get("auto_update", {}).get("hours", 24)
@auto_update_hours.setter
def auto_update_hours(self, value: int):
self._settings.setdefault("auto_update", {})["hours"] = value
self._save()
@property
def auto_update_overlap_seconds(self) -> int:
return self._settings.get("auto_update", {}).get("overlap_seconds", 3600)
@auto_update_overlap_seconds.setter
def auto_update_overlap_seconds(self, value: int):
self._settings.setdefault("auto_update", {})["overlap_seconds"] = value
self._save()
@property
def auto_update_include_dwd(self) -> bool:
return self._settings.get("auto_update", {}).get("include_dwd", True)
@auto_update_include_dwd.setter
def auto_update_include_dwd(self, value: bool):
self._settings.setdefault("auto_update", {})["include_dwd"] = value
self._save()
@property
def auto_update_auto_verify(self) -> bool:
return self._settings.get("auto_update", {}).get("auto_verify", False)
@auto_update_auto_verify.setter
def auto_update_auto_verify(self, value: bool):
self._settings.setdefault("auto_update", {})["auto_verify"] = value
self._save()
@property
def auto_update_selected_tasks(self) -> list:
return self._settings.get("auto_update", {}).get("selected_tasks", [])
@auto_update_selected_tasks.setter
def auto_update_selected_tasks(self, value: list):
self._settings.setdefault("auto_update", {})["selected_tasks"] = value
self._save()
# ==================== 数据校验配置 ====================
@property
def integrity_mode(self) -> str:
return self._settings.get("integrity_check", {}).get("mode", "history")
@integrity_mode.setter
def integrity_mode(self, value: str):
self._settings.setdefault("integrity_check", {})["mode"] = value
self._save()
@property
def integrity_history_start(self) -> str:
return self._settings.get("integrity_check", {}).get("history_start", "")
@integrity_history_start.setter
def integrity_history_start(self, value: str):
self._settings.setdefault("integrity_check", {})["history_start"] = value
self._save()
@property
def integrity_history_end(self) -> str:
return self._settings.get("integrity_check", {}).get("history_end", "")
@integrity_history_end.setter
def integrity_history_end(self, value: str):
self._settings.setdefault("integrity_check", {})["history_end"] = value
self._save()
@property
def integrity_lookback_hours(self) -> int:
return self._settings.get("integrity_check", {}).get("lookback_hours", 24)
@integrity_lookback_hours.setter
def integrity_lookback_hours(self, value: int):
self._settings.setdefault("integrity_check", {})["lookback_hours"] = value
self._save()
@property
def integrity_include_dimensions(self) -> bool:
return self._settings.get("integrity_check", {}).get("include_dimensions", False)
@integrity_include_dimensions.setter
def integrity_include_dimensions(self, value: bool):
self._settings.setdefault("integrity_check", {})["include_dimensions"] = value
self._save()
@property
def integrity_auto_backfill(self) -> bool:
return self._settings.get("integrity_check", {}).get("auto_backfill", False)
@integrity_auto_backfill.setter
def integrity_auto_backfill(self, value: bool):
self._settings.setdefault("integrity_check", {})["auto_backfill"] = value
self._save()
@property
def integrity_ods_tasks(self) -> str:
return self._settings.get("integrity_check", {}).get("ods_tasks", "")
@integrity_ods_tasks.setter
def integrity_ods_tasks(self, value: str):
self._settings.setdefault("integrity_check", {})["ods_tasks"] = value
self._save()
# ==================== 高级配置 ====================
@property
def advanced_pipeline_flow(self) -> str:
return self._settings.get("advanced", {}).get("pipeline_flow", "FULL")
@advanced_pipeline_flow.setter
def advanced_pipeline_flow(self, value: str):
self._settings.setdefault("advanced", {})["pipeline_flow"] = value
self._save()
@property
def advanced_dry_run(self) -> bool:
return self._settings.get("advanced", {}).get("dry_run", False)
@advanced_dry_run.setter
def advanced_dry_run(self, value: bool):
self._settings.setdefault("advanced", {})["dry_run"] = value
self._save()
@property
def advanced_window_start(self) -> str:
return self._settings.get("advanced", {}).get("window_start", "")
@advanced_window_start.setter
def advanced_window_start(self, value: str):
self._settings.setdefault("advanced", {})["window_start"] = value
self._save()
@property
def advanced_window_end(self) -> str:
return self._settings.get("advanced", {}).get("window_end", "")
@advanced_window_end.setter
def advanced_window_end(self, value: str):
self._settings.setdefault("advanced", {})["window_end"] = value
self._save()
@property
def advanced_ingest_source(self) -> str:
return self._settings.get("advanced", {}).get("ingest_source", "")
@advanced_ingest_source.setter
def advanced_ingest_source(self, value: str):
self._settings.setdefault("advanced", {})["ingest_source"] = value
self._save()
@property
def advanced_window_split(self) -> str:
return self._settings.get("advanced", {}).get("window_split", "none")
@advanced_window_split.setter
def advanced_window_split(self, value: str):
self._settings.setdefault("advanced", {})["window_split"] = value
self._save()
@property
def advanced_window_compensation(self) -> int:
return self._settings.get("advanced", {}).get("window_compensation", 0)
@advanced_window_compensation.setter
def advanced_window_compensation(self, value: int):
self._settings.setdefault("advanced", {})["window_compensation"] = value
self._save()
def get_all_settings(self) -> Dict[str, Any]:
"""获取所有设置(用于调试)"""
return self._settings.copy()
def save_all(self):
"""强制保存所有设置"""
self._save()
# ==================== 任务历史存储 ====================
def _get_history_path(self) -> Path:
"""获取任务历史文件路径"""
return self._settings_file.parent / "task_history.json"
def save_task_history(self, history_list: list):
"""保存任务历史到文件"""
try:
history_path = self._get_history_path()
# 序列化任务历史
serialized = []
for task in history_list[:100]: # 最多保存100条
try:
task_data = {
"id": task.id,
"tasks": task.config.tasks if hasattr(task, 'config') else [],
"status": task.status.value if hasattr(task.status, 'value') else str(task.status),
"created_at": task.created_at.isoformat() if task.created_at else None,
"started_at": task.started_at.isoformat() if task.started_at else None,
"finished_at": task.finished_at.isoformat() if task.finished_at else None,
"exit_code": task.exit_code,
"error": task.error[:500] if task.error else "", # 限制长度
"output_preview": task.output[:1000] if task.output else "", # 输出预览
# 保存配置信息
"pipeline_flow": task.config.pipeline_flow if hasattr(task, 'config') else "FULL",
"window_start": task.config.window_start if hasattr(task, 'config') else None,
"window_end": task.config.window_end if hasattr(task, 'config') else None,
}
serialized.append(task_data)
except Exception:
continue
history_path.write_text(
json.dumps(serialized, ensure_ascii=False, indent=2),
encoding="utf-8"
)
except Exception as e:
print(f"保存任务历史失败: {e}")
def load_task_history(self) -> list:
"""从文件加载任务历史"""
try:
history_path = self._get_history_path()
if not history_path.exists():
return []
data = json.loads(history_path.read_text(encoding="utf-8"))
return data
except Exception as e:
print(f"加载任务历史失败: {e}")
return []
# 全局单例
app_settings = AppSettings()

View File

@@ -0,0 +1,131 @@
# -*- coding: utf-8 -*-
"""CLI 命令构建器"""
from typing import List, Dict, Any, Optional
from ..models.task_model import TaskConfig
# CLI 支持的命令行参数(来自 cli/main.py
CLI_SUPPORTED_ARGS = {
# 值类型参数
"store_id", "tasks", "pg_dsn", "pg_host", "pg_port", "pg_name",
"pg_user", "pg_password", "api_base", "api_token", "api_timeout",
"api_page_size", "api_retry_max", "window_start", "window_end",
"export_root", "log_root", "pipeline_flow", "fetch_root",
"ingest_source", "idle_start", "idle_end",
# 布尔类型参数
"dry_run", "force_window_override", "write_pretty_json", "allow_empty_advance",
}
class CLIBuilder:
"""构建 CLI 命令行参数"""
def __init__(self, python_executable: str = "python"):
self.python_executable = python_executable
def build_command(self, config: TaskConfig) -> List[str]:
"""
根据任务配置构建命令行参数列表
Args:
config: 任务配置对象
Returns:
命令行参数列表
"""
cmd = [self.python_executable, "-m", "cli.main"]
# 任务列表
if config.tasks:
cmd.extend(["--tasks", ",".join(config.tasks)])
# Pipeline 流程
if config.pipeline_flow:
cmd.extend(["--pipeline-flow", config.pipeline_flow])
# Dry-run 模式
if config.dry_run:
cmd.append("--dry-run")
# 时间窗口
if config.window_start:
cmd.extend(["--window-start", config.window_start])
if config.window_end:
cmd.extend(["--window-end", config.window_end])
# 数据源目录
if config.ingest_source:
cmd.extend(["--ingest-source", config.ingest_source])
# 门店 ID
if config.store_id is not None:
cmd.extend(["--store-id", str(config.store_id)])
# 数据库 DSN
if config.pg_dsn:
cmd.extend(["--pg-dsn", config.pg_dsn])
# API Token
if config.api_token:
cmd.extend(["--api-token", config.api_token])
# 额外参数(只传递 CLI 支持的参数)
for key, value in config.extra_args.items():
if value is not None and key in CLI_SUPPORTED_ARGS:
arg_name = f"--{key.replace('_', '-')}"
if isinstance(value, bool):
if value:
cmd.append(arg_name)
else:
cmd.extend([arg_name, str(value)])
return cmd
def build_command_string(self, config: TaskConfig) -> str:
"""
构建命令行字符串(用于显示)
Args:
config: 任务配置对象
Returns:
命令行字符串
"""
cmd = self.build_command(config)
# 对包含空格的参数添加引号
quoted_cmd = []
for arg in cmd:
if ' ' in arg or '"' in arg:
quoted_cmd.append(f'"{arg}"')
else:
quoted_cmd.append(arg)
return " ".join(quoted_cmd)
def build_from_dict(self, params: Dict[str, Any]) -> List[str]:
"""
从字典构建命令行参数
Args:
params: 参数字典
Returns:
命令行参数列表
"""
config = TaskConfig(
tasks=params.get("tasks", []),
pipeline_flow=params.get("pipeline_flow", "FULL"),
dry_run=params.get("dry_run", False),
window_start=params.get("window_start"),
window_end=params.get("window_end"),
ingest_source=params.get("ingest_source"),
store_id=params.get("store_id"),
pg_dsn=params.get("pg_dsn"),
api_token=params.get("api_token"),
extra_args=params.get("extra_args", {}),
)
return self.build_command(config)
# 全局实例
cli_builder = CLIBuilder()

View File

@@ -0,0 +1,309 @@
# -*- coding: utf-8 -*-
"""配置辅助工具"""
import os
import re
from pathlib import Path
from typing import Dict, List, Tuple, Optional, Any
# 环境变量分组
ENV_GROUPS = {
"database": {
"title": "数据库配置",
"keys": ["PG_DSN", "PG_HOST", "PG_PORT", "PG_NAME", "PG_USER", "PG_PASSWORD", "PG_CONNECT_TIMEOUT"],
"sensitive": ["PG_PASSWORD"],
},
"api": {
"title": "API 配置",
"keys": ["API_BASE", "API_TOKEN", "FICOO_TOKEN", "API_TIMEOUT", "API_PAGE_SIZE", "API_RETRY_MAX"],
"sensitive": ["API_TOKEN", "FICOO_TOKEN"],
},
"store": {
"title": "门店配置",
"keys": ["STORE_ID", "TIMEZONE", "SCHEMA_OLTP", "SCHEMA_ETL"],
"sensitive": [],
},
"paths": {
"title": "路径配置",
"keys": ["EXPORT_ROOT", "LOG_ROOT", "FETCH_ROOT", "INGEST_SOURCE_DIR", "JSON_FETCH_ROOT", "JSON_SOURCE_DIR"],
"sensitive": [],
},
"pipeline": {
"title": "流水线配置",
"keys": ["PIPELINE_FLOW", "RUN_TASKS", "OVERLAP_SECONDS"],
"sensitive": [],
},
"window": {
"title": "时间窗口配置",
"keys": ["WINDOW_START", "WINDOW_END", "WINDOW_BUSY_MIN", "WINDOW_IDLE_MIN", "IDLE_START", "IDLE_END"],
"sensitive": [],
},
"integrity": {
"title": "数据完整性配置",
"keys": ["INTEGRITY_MODE", "INTEGRITY_HISTORY_START", "INTEGRITY_HISTORY_END",
"INTEGRITY_INCLUDE_DIMENSIONS", "INTEGRITY_AUTO_CHECK", "INTEGRITY_ODS_TASK_CODES"],
"sensitive": [],
},
}
class ConfigHelper:
"""配置文件辅助类"""
def __init__(self, env_path: Optional[Path] = None):
"""
初始化配置辅助器
Args:
env_path: .env 文件路径,默认使用 AppSettings 中的路径
"""
if env_path is not None:
self.env_path = Path(env_path)
else:
# 从 AppSettings 获取路径
from .app_settings import app_settings
settings_path = app_settings.env_file_path
if settings_path:
self.env_path = Path(settings_path)
else:
# 回退到源码目录
self.env_path = Path(__file__).resolve().parents[2] / ".env"
def load_env(self) -> Dict[str, str]:
"""
加载 .env 文件内容
Returns:
环境变量字典
"""
env_vars = {}
if not self.env_path.exists():
return env_vars
try:
content = self.env_path.read_text(encoding="utf-8", errors="ignore")
for line in content.splitlines():
parsed = self._parse_line(line)
if parsed:
key, value = parsed
env_vars[key] = value
except Exception:
pass
return env_vars
def save_env(self, env_vars: Dict[str, str]) -> bool:
"""
保存环境变量到 .env 文件
Args:
env_vars: 环境变量字典
Returns:
是否保存成功
"""
try:
lines = []
# 按分组输出
written_keys = set()
for group_id, group_info in ENV_GROUPS.items():
group_lines = []
for key in group_info["keys"]:
if key in env_vars:
value = env_vars[key]
group_lines.append(self._format_line(key, value))
written_keys.add(key)
if group_lines:
lines.append(f"\n# {group_info['title']}")
lines.extend(group_lines)
# 写入未分组的变量
other_lines = []
for key, value in env_vars.items():
if key not in written_keys:
other_lines.append(self._format_line(key, value))
if other_lines:
lines.append("\n# 其他配置")
lines.extend(other_lines)
content = "\n".join(lines).strip() + "\n"
self.env_path.write_text(content, encoding="utf-8")
return True
except Exception:
return False
def get_grouped_env(self) -> Dict[str, List[Tuple[str, str, bool]]]:
"""
获取分组的环境变量
Returns:
分组字典 {group_id: [(key, value, is_sensitive), ...]}
"""
env_vars = self.load_env()
result = {}
used_keys = set()
for group_id, group_info in ENV_GROUPS.items():
items = []
for key in group_info["keys"]:
value = env_vars.get(key, "")
is_sensitive = key in group_info.get("sensitive", [])
items.append((key, value, is_sensitive))
if key in env_vars:
used_keys.add(key)
result[group_id] = items
# 添加未分组的变量到 "other" 组
other_items = []
for key, value in env_vars.items():
if key not in used_keys:
other_items.append((key, value, False))
if other_items:
result["other"] = other_items
return result
def validate_env(self, env_vars: Dict[str, str]) -> List[str]:
"""
验证环境变量
Args:
env_vars: 环境变量字典
Returns:
错误消息列表
"""
errors = []
# 验证 PG_DSN 格式
pg_dsn = env_vars.get("PG_DSN", "")
if pg_dsn and not pg_dsn.startswith("postgresql://"):
errors.append("PG_DSN 应以 'postgresql://' 开头")
# 验证端口号
pg_port = env_vars.get("PG_PORT", "")
if pg_port:
try:
port = int(pg_port)
if port < 1 or port > 65535:
errors.append("PG_PORT 应在 1-65535 范围内")
except ValueError:
errors.append("PG_PORT 应为数字")
# 验证 STORE_ID
store_id = env_vars.get("STORE_ID", "")
if store_id:
try:
int(store_id)
except ValueError:
errors.append("STORE_ID 应为数字")
# 验证路径存在性(可选)
for key in ["EXPORT_ROOT", "LOG_ROOT", "FETCH_ROOT"]:
path = env_vars.get(key, "")
if path and not os.path.isabs(path):
errors.append(f"{key} 建议使用绝对路径")
return errors
def mask_sensitive(self, value: str, visible_chars: int = 4) -> str:
"""
脱敏敏感值
Args:
value: 原始值
visible_chars: 可见字符数
Returns:
脱敏后的值
"""
if not value or len(value) <= visible_chars:
return "*" * len(value) if value else ""
return value[:visible_chars] + "*" * (len(value) - visible_chars)
def _parse_line(self, line: str) -> Optional[Tuple[str, str]]:
"""解析 .env 文件的一行"""
stripped = line.strip()
if not stripped or stripped.startswith("#"):
return None
if stripped.startswith("export "):
stripped = stripped[7:].strip()
if "=" not in stripped:
return None
key, value = stripped.split("=", 1)
key = key.strip()
value = self._unquote_value(value)
return key, value
def _unquote_value(self, value: str) -> str:
"""处理引号和注释"""
# 去除内联注释
value = self._strip_inline_comment(value)
value = value.rstrip(",").strip()
if not value:
return value
# 去除引号
if len(value) >= 2 and value[0] in ("'", '"') and value[-1] == value[0]:
return value[1:-1]
if len(value) >= 3 and value[0] in ("r", "R") and value[1] in ("'", '"') and value[-1] == value[1]:
return value[2:-1]
return value
def _strip_inline_comment(self, value: str) -> str:
"""去除内联注释"""
result = []
in_quote = False
quote_char = ""
escape = False
for ch in value:
if escape:
result.append(ch)
escape = False
continue
if ch == "\\":
escape = True
result.append(ch)
continue
if ch in ("'", '"'):
if not in_quote:
in_quote = True
quote_char = ch
elif quote_char == ch:
in_quote = False
quote_char = ""
result.append(ch)
continue
if ch == "#" and not in_quote:
break
result.append(ch)
return "".join(result).rstrip()
def _format_line(self, key: str, value: str) -> str:
"""格式化为 .env 行"""
# 如果值包含特殊字符,使用引号包裹
if any(c in value for c in [' ', '"', "'", '#', '\n', '\r']):
# 使用双引号,转义内部的双引号
escaped = value.replace('\\', '\\\\').replace('"', '\\"')
return f'{key}="{escaped}"'
return f"{key}={value}"
@staticmethod
def get_group_title(group_id: str) -> str:
"""获取分组标题"""
if group_id in ENV_GROUPS:
return ENV_GROUPS[group_id]["title"]
return "其他配置"
# 全局实例
config_helper = ConfigHelper()