# -*- coding: utf-8 -*- """任务:初始化运行环境,执行 ODS 与 etl_admin 的 DDL,并准备日志/导出目录。""" from __future__ import annotations from pathlib import Path from typing import Any from .base_task import BaseTask, TaskContext class InitOdsSchemaTask(BaseTask): """通过调度执行初始化:创建必要目录,执行 ODS 与 etl_admin 的 DDL。""" def get_task_code(self) -> str: """返回任务编码。""" return "INIT_ODS_SCHEMA" def extract(self, context: TaskContext) -> dict[str, Any]: """读取 SQL 文件路径,收集需创建的目录。""" base_dir = Path(__file__).resolve().parents[1] / "database" ods_path = Path(self.config.get("schema.ods_file", base_dir / "schema_ODS_doc.sql")) admin_path = Path(self.config.get("schema.etl_admin_file", base_dir / "schema_etl_admin.sql")) if not ods_path.exists(): raise FileNotFoundError(f"找不到 ODS schema 文件: {ods_path}") if not admin_path.exists(): raise FileNotFoundError(f"找不到 etl_admin schema 文件: {admin_path}") log_root = Path(self.config.get("io.log_root") or self.config["io"]["log_root"]) export_root = Path(self.config.get("io.export_root") or self.config["io"]["export_root"]) fetch_root = Path(self.config.get("pipeline.fetch_root") or self.config["pipeline"]["fetch_root"]) ingest_dir = Path(self.config.get("pipeline.ingest_source_dir") or fetch_root) return { "ods_sql": ods_path.read_text(encoding="utf-8"), "admin_sql": admin_path.read_text(encoding="utf-8"), "ods_file": str(ods_path), "admin_file": str(admin_path), "dirs": [log_root, export_root, fetch_root, ingest_dir], } def load(self, extracted: dict[str, Any], context: TaskContext) -> dict: """执行 DDL 并创建必要目录。 安全提示: ODS DDL 文件可能携带头部说明或异常注释,为避免因非 SQL 文本导致执行失败,这里会做一次轻量清洗后再执行。 """ for d in extracted["dirs"]: Path(d).mkdir(parents=True, exist_ok=True) self.logger.info("已确保目录存在: %s", d) # 处理 ODS SQL:去掉头部说明行,以及易出错的 COMMENT ON 行(如 CamelCase 未加引号) ods_sql_raw: str = extracted["ods_sql"] drop_idx = ods_sql_raw.find("DROP SCHEMA") if drop_idx > 0: ods_sql_raw = ods_sql_raw[drop_idx:] cleaned_lines: list[str] = [] for line in ods_sql_raw.splitlines(): if line.strip().upper().startswith("COMMENT ON "): continue cleaned_lines.append(line) ods_sql = "\n".join(cleaned_lines) with self.db.conn.cursor() as cur: self.logger.info("执行 etl_admin schema 文件: %s", extracted["admin_file"]) cur.execute(extracted["admin_sql"]) self.logger.info("执行 ODS schema 文件: %s", extracted["ods_file"]) cur.execute(ods_sql) return { "executed": 2, "files": [extracted["admin_file"], extracted["ods_file"]], "dirs_prepared": [str(p) for p in extracted["dirs"]], }