74 lines
3.3 KiB
Python
74 lines
3.3 KiB
Python
# -*- coding: utf-8 -*-
|
||
"""任务:初始化运行环境,执行 ODS 与 etl_admin 的 DDL,并准备日志/导出目录。"""
|
||
from __future__ import annotations
|
||
|
||
from pathlib import Path
|
||
from typing import Any
|
||
|
||
from .base_task import BaseTask, TaskContext
|
||
|
||
|
||
class InitOdsSchemaTask(BaseTask):
|
||
"""通过调度执行初始化:创建必要目录,执行 ODS 与 etl_admin 的 DDL。"""
|
||
|
||
def get_task_code(self) -> str:
|
||
"""返回任务编码。"""
|
||
return "INIT_ODS_SCHEMA"
|
||
|
||
def extract(self, context: TaskContext) -> dict[str, Any]:
|
||
"""读取 SQL 文件路径,收集需创建的目录。"""
|
||
base_dir = Path(__file__).resolve().parents[1] / "database"
|
||
ods_path = Path(self.config.get("schema.ods_file", base_dir / "schema_ODS_doc.sql"))
|
||
admin_path = Path(self.config.get("schema.etl_admin_file", base_dir / "schema_etl_admin.sql"))
|
||
if not ods_path.exists():
|
||
raise FileNotFoundError(f"找不到 ODS schema 文件: {ods_path}")
|
||
if not admin_path.exists():
|
||
raise FileNotFoundError(f"找不到 etl_admin schema 文件: {admin_path}")
|
||
|
||
log_root = Path(self.config.get("io.log_root") or self.config["io"]["log_root"])
|
||
export_root = Path(self.config.get("io.export_root") or self.config["io"]["export_root"])
|
||
fetch_root = Path(self.config.get("pipeline.fetch_root") or self.config["pipeline"]["fetch_root"])
|
||
ingest_dir = Path(self.config.get("pipeline.ingest_source_dir") or fetch_root)
|
||
|
||
return {
|
||
"ods_sql": ods_path.read_text(encoding="utf-8"),
|
||
"admin_sql": admin_path.read_text(encoding="utf-8"),
|
||
"ods_file": str(ods_path),
|
||
"admin_file": str(admin_path),
|
||
"dirs": [log_root, export_root, fetch_root, ingest_dir],
|
||
}
|
||
|
||
def load(self, extracted: dict[str, Any], context: TaskContext) -> dict:
|
||
"""执行 DDL 并创建必要目录。
|
||
|
||
安全提示:
|
||
ODS DDL 文件可能携带头部说明或异常注释,为避免因非 SQL 文本导致执行失败,这里会做一次轻量清洗后再执行。
|
||
"""
|
||
for d in extracted["dirs"]:
|
||
Path(d).mkdir(parents=True, exist_ok=True)
|
||
self.logger.info("已确保目录存在: %s", d)
|
||
|
||
# 处理 ODS SQL:去掉头部说明行,以及易出错的 COMMENT ON 行(如 CamelCase 未加引号)
|
||
ods_sql_raw: str = extracted["ods_sql"]
|
||
drop_idx = ods_sql_raw.find("DROP SCHEMA")
|
||
if drop_idx > 0:
|
||
ods_sql_raw = ods_sql_raw[drop_idx:]
|
||
cleaned_lines: list[str] = []
|
||
for line in ods_sql_raw.splitlines():
|
||
if line.strip().upper().startswith("COMMENT ON "):
|
||
continue
|
||
cleaned_lines.append(line)
|
||
ods_sql = "\n".join(cleaned_lines)
|
||
|
||
with self.db.conn.cursor() as cur:
|
||
self.logger.info("执行 etl_admin schema 文件: %s", extracted["admin_file"])
|
||
cur.execute(extracted["admin_sql"])
|
||
self.logger.info("执行 ODS schema 文件: %s", extracted["ods_file"])
|
||
cur.execute(ods_sql)
|
||
|
||
return {
|
||
"executed": 2,
|
||
"files": [extracted["admin_file"], extracted["ods_file"]],
|
||
"dirs_prepared": [str(p) for p in extracted["dirs"]],
|
||
}
|