Files
feiqiu-ETL/etl_billiards/tasks/init_schema_task.py
2025-12-09 04:57:05 +08:00

74 lines
3.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
"""任务:初始化运行环境,执行 ODS 与 etl_admin 的 DDL并准备日志/导出目录。"""
from __future__ import annotations
from pathlib import Path
from typing import Any
from .base_task import BaseTask, TaskContext
class InitOdsSchemaTask(BaseTask):
"""通过调度执行初始化:创建必要目录,执行 ODS 与 etl_admin 的 DDL。"""
def get_task_code(self) -> str:
"""返回任务编码。"""
return "INIT_ODS_SCHEMA"
def extract(self, context: TaskContext) -> dict[str, Any]:
"""读取 SQL 文件路径,收集需创建的目录。"""
base_dir = Path(__file__).resolve().parents[1] / "database"
ods_path = Path(self.config.get("schema.ods_file", base_dir / "schema_ODS_doc.sql"))
admin_path = Path(self.config.get("schema.etl_admin_file", base_dir / "schema_etl_admin.sql"))
if not ods_path.exists():
raise FileNotFoundError(f"找不到 ODS schema 文件: {ods_path}")
if not admin_path.exists():
raise FileNotFoundError(f"找不到 etl_admin schema 文件: {admin_path}")
log_root = Path(self.config.get("io.log_root") or self.config["io"]["log_root"])
export_root = Path(self.config.get("io.export_root") or self.config["io"]["export_root"])
fetch_root = Path(self.config.get("pipeline.fetch_root") or self.config["pipeline"]["fetch_root"])
ingest_dir = Path(self.config.get("pipeline.ingest_source_dir") or fetch_root)
return {
"ods_sql": ods_path.read_text(encoding="utf-8"),
"admin_sql": admin_path.read_text(encoding="utf-8"),
"ods_file": str(ods_path),
"admin_file": str(admin_path),
"dirs": [log_root, export_root, fetch_root, ingest_dir],
}
def load(self, extracted: dict[str, Any], context: TaskContext) -> dict:
"""执行 DDL 并创建必要目录。
安全提示:
ODS DDL 文件可能携带头部说明或异常注释,为避免因非 SQL 文本导致执行失败,这里会做一次轻量清洗后再执行。
"""
for d in extracted["dirs"]:
Path(d).mkdir(parents=True, exist_ok=True)
self.logger.info("已确保目录存在: %s", d)
# 处理 ODS SQL去掉头部说明行以及易出错的 COMMENT ON 行(如 CamelCase 未加引号)
ods_sql_raw: str = extracted["ods_sql"]
drop_idx = ods_sql_raw.find("DROP SCHEMA")
if drop_idx > 0:
ods_sql_raw = ods_sql_raw[drop_idx:]
cleaned_lines: list[str] = []
for line in ods_sql_raw.splitlines():
if line.strip().upper().startswith("COMMENT ON "):
continue
cleaned_lines.append(line)
ods_sql = "\n".join(cleaned_lines)
with self.db.conn.cursor() as cur:
self.logger.info("执行 etl_admin schema 文件: %s", extracted["admin_file"])
cur.execute(extracted["admin_sql"])
self.logger.info("执行 ODS schema 文件: %s", extracted["ods_file"])
cur.execute(ods_sql)
return {
"executed": 2,
"files": [extracted["admin_file"], extracted["ods_file"]],
"dirs_prepared": [str(p) for p in extracted["dirs"]],
}