初始提交:飞球 ETL 系统全量代码
This commit is contained in:
213
config/env_parser.py
Normal file
213
config/env_parser.py
Normal file
@@ -0,0 +1,213 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""环境变量解析"""
|
||||
import os
|
||||
import json
|
||||
from pathlib import Path
|
||||
from copy import deepcopy
|
||||
|
||||
ENV_MAP = {
|
||||
"TIMEZONE": ("app.timezone",),
|
||||
"STORE_ID": ("app.store_id",),
|
||||
"SCHEMA_OLTP": ("app.schema_oltp",),
|
||||
"SCHEMA_ETL": ("app.schema_etl",),
|
||||
"PG_DSN": ("db.dsn",),
|
||||
"PG_HOST": ("db.host",),
|
||||
"PG_PORT": ("db.port",),
|
||||
"PG_NAME": ("db.name",),
|
||||
"PG_USER": ("db.user",),
|
||||
"PG_PASSWORD": ("db.password",),
|
||||
"PG_CONNECT_TIMEOUT": ("db.connect_timeout_sec",),
|
||||
"API_BASE": ("api.base_url",),
|
||||
"API_TOKEN": ("api.token",),
|
||||
"FICOO_TOKEN": ("api.token",),
|
||||
"API_TIMEOUT": ("api.timeout_sec",),
|
||||
"API_PAGE_SIZE": ("api.page_size",),
|
||||
"API_RETRY_MAX": ("api.retries.max_attempts",),
|
||||
"API_RETRY_BACKOFF": ("api.retries.backoff_sec",),
|
||||
"API_PARAMS": ("api.params",),
|
||||
"EXPORT_ROOT": ("io.export_root",),
|
||||
"LOG_ROOT": ("io.log_root",),
|
||||
"MANIFEST_NAME": ("io.manifest_name",),
|
||||
"INGEST_REPORT_NAME": ("io.ingest_report_name",),
|
||||
"WRITE_PRETTY_JSON": ("io.write_pretty_json",),
|
||||
"RUN_TASKS": ("run.tasks",),
|
||||
"RUN_DWS_TASKS": ("run.dws_tasks",),
|
||||
"RUN_INDEX_TASKS": ("run.index_tasks",),
|
||||
"INDEX_LOOKBACK_DAYS": ("run.index_lookback_days",),
|
||||
"OVERLAP_SECONDS": ("run.overlap_seconds",),
|
||||
"WINDOW_BUSY_MIN": ("run.window_minutes.default_busy",),
|
||||
"WINDOW_IDLE_MIN": ("run.window_minutes.default_idle",),
|
||||
"IDLE_START": ("run.idle_window.start",),
|
||||
"IDLE_END": ("run.idle_window.end",),
|
||||
"IDLE_WINDOW_START": ("run.idle_window.start",),
|
||||
"IDLE_WINDOW_END": ("run.idle_window.end",),
|
||||
"ALLOW_EMPTY_RESULT_ADVANCE": ("run.allow_empty_result_advance",),
|
||||
"ALLOW_EMPTY_ADVANCE": ("run.allow_empty_result_advance",),
|
||||
"SNAPSHOT_MISSING_DELETE": ("run.snapshot_missing_delete",),
|
||||
"SNAPSHOT_ALLOW_EMPTY_DELETE": ("run.snapshot_allow_empty_delete",),
|
||||
"WINDOW_START": ("run.window_override.start",),
|
||||
"WINDOW_END": ("run.window_override.end",),
|
||||
"WINDOW_SPLIT_UNIT": ("run.window_split.unit",),
|
||||
"WINDOW_SPLIT_DAYS": ("run.window_split.days",),
|
||||
"WINDOW_COMPENSATION_HOURS": ("run.window_split.compensation_hours",),
|
||||
"PIPELINE_FLOW": ("pipeline.flow",),
|
||||
"JSON_FETCH_ROOT": ("pipeline.fetch_root",),
|
||||
"JSON_SOURCE_DIR": ("pipeline.ingest_source_dir",),
|
||||
"FETCH_ROOT": ("pipeline.fetch_root",),
|
||||
"INGEST_SOURCE_DIR": ("pipeline.ingest_source_dir",),
|
||||
"INTEGRITY_MODE": ("integrity.mode",),
|
||||
"INTEGRITY_HISTORY_START": ("integrity.history_start",),
|
||||
"INTEGRITY_HISTORY_END": ("integrity.history_end",),
|
||||
"INTEGRITY_INCLUDE_DIMENSIONS": ("integrity.include_dimensions",),
|
||||
"INTEGRITY_AUTO_CHECK": ("integrity.auto_check",),
|
||||
"INTEGRITY_AUTO_BACKFILL": ("integrity.auto_backfill",),
|
||||
"INTEGRITY_COMPARE_CONTENT": ("integrity.compare_content",),
|
||||
"INTEGRITY_CONTENT_SAMPLE_LIMIT": ("integrity.content_sample_limit",),
|
||||
"INTEGRITY_BACKFILL_MISMATCH": ("integrity.backfill_mismatch",),
|
||||
"INTEGRITY_RECHECK_AFTER_BACKFILL": ("integrity.recheck_after_backfill",),
|
||||
"INTEGRITY_ODS_TASK_CODES": ("integrity.ods_task_codes",),
|
||||
"VERIFY_SKIP_ODS_ON_FETCH": ("verification.skip_ods_when_fetch_before_verify",),
|
||||
"VERIFY_ODS_LOCAL_JSON": ("verification.ods_use_local_json",),
|
||||
"DWD_FACT_UPSERT": ("dwd.fact_upsert",),
|
||||
# DWS 月度/薪资配置
|
||||
"DWS_MONTHLY_ALLOW_HISTORY": ("dws.monthly.allow_history",),
|
||||
"DWS_MONTHLY_PREV_GRACE_DAYS": ("dws.monthly.prev_month_grace_days",),
|
||||
"DWS_MONTHLY_HISTORY_MONTHS": ("dws.monthly.history_months",),
|
||||
"DWS_MONTHLY_NEW_HIRE_CAP_EFFECTIVE_FROM": ("dws.monthly.new_hire_cap_effective_from",),
|
||||
"DWS_MONTHLY_NEW_HIRE_CAP_DAY": ("dws.monthly.new_hire_cap_day",),
|
||||
"DWS_MONTHLY_NEW_HIRE_MAX_TIER_LEVEL": ("dws.monthly.new_hire_max_tier_level",),
|
||||
"DWS_SALARY_RUN_DAYS": ("dws.salary.run_days",),
|
||||
"DWS_SALARY_ALLOW_OUT_OF_CYCLE": ("dws.salary.allow_out_of_cycle",),
|
||||
"DWS_SALARY_ROOM_COURSE_PRICE": ("dws.salary.room_course_price",),
|
||||
# ODS 离线回放配置
|
||||
"ODS_JSON_DOC_DIR": ("ods.json_doc_dir",),
|
||||
"ODS_INCLUDE_FILES": ("ods.include_files",),
|
||||
"ODS_DROP_SCHEMA_FIRST": ("ods.drop_schema_first",),
|
||||
}
|
||||
|
||||
|
||||
def _deep_set(d, dotted_keys, value):
|
||||
cur = d
|
||||
for k in dotted_keys[:-1]:
|
||||
cur = cur.setdefault(k, {})
|
||||
cur[dotted_keys[-1]] = value
|
||||
|
||||
|
||||
def _coerce_env(v: str):
|
||||
if v is None:
|
||||
return None
|
||||
s = v.strip()
|
||||
if s.lower() in ("true", "false"):
|
||||
return s.lower() == "true"
|
||||
try:
|
||||
if s.isdigit() or (s.startswith("-") and s[1:].isdigit()):
|
||||
return int(s)
|
||||
except Exception:
|
||||
pass
|
||||
if (s.startswith("{") and s.endswith("}")) or (s.startswith("[") and s.endswith("]")):
|
||||
try:
|
||||
return json.loads(s)
|
||||
except Exception:
|
||||
return s
|
||||
return s
|
||||
|
||||
|
||||
def _strip_inline_comment(value: str) -> str:
|
||||
"""去掉未被引号包裹的内联注释"""
|
||||
result = []
|
||||
in_quote = False
|
||||
quote_char = ""
|
||||
escape = False
|
||||
for ch in value:
|
||||
if escape:
|
||||
result.append(ch)
|
||||
escape = False
|
||||
continue
|
||||
if ch == "\\":
|
||||
escape = True
|
||||
result.append(ch)
|
||||
continue
|
||||
if ch in ("'", '"'):
|
||||
if not in_quote:
|
||||
in_quote = True
|
||||
quote_char = ch
|
||||
elif quote_char == ch:
|
||||
in_quote = False
|
||||
quote_char = ""
|
||||
result.append(ch)
|
||||
continue
|
||||
if ch == "#" and not in_quote:
|
||||
break
|
||||
result.append(ch)
|
||||
return "".join(result).rstrip()
|
||||
|
||||
|
||||
def _unquote_value(value: str) -> str:
|
||||
"""处理引号/原始字符串以及尾随逗号"""
|
||||
trimmed = value.strip()
|
||||
trimmed = _strip_inline_comment(trimmed)
|
||||
trimmed = trimmed.rstrip(",").rstrip()
|
||||
if not trimmed:
|
||||
return trimmed
|
||||
if len(trimmed) >= 2 and trimmed[0] in ("'", '"') and trimmed[-1] == trimmed[0]:
|
||||
return trimmed[1:-1]
|
||||
if (
|
||||
len(trimmed) >= 3
|
||||
and trimmed[0] in ("r", "R")
|
||||
and trimmed[1] in ("'", '"')
|
||||
and trimmed[-1] == trimmed[1]
|
||||
):
|
||||
return trimmed[2:-1]
|
||||
return trimmed
|
||||
|
||||
|
||||
def _parse_dotenv_line(line: str) -> tuple[str, str] | None:
|
||||
"""解析 .env 文件中的单行"""
|
||||
stripped = line.strip()
|
||||
if not stripped or stripped.startswith("#"):
|
||||
return None
|
||||
if stripped.startswith("export "):
|
||||
stripped = stripped[len("export ") :].strip()
|
||||
if "=" not in stripped:
|
||||
return None
|
||||
key, value = stripped.split("=", 1)
|
||||
key = key.strip()
|
||||
value = _unquote_value(value)
|
||||
return key, value
|
||||
|
||||
|
||||
def _load_dotenv_values() -> dict:
|
||||
"""从项目根目录读取 .env 文件键值"""
|
||||
if os.environ.get("ETL_SKIP_DOTENV") in ("1", "true", "TRUE", "True"):
|
||||
return {}
|
||||
root = Path(__file__).resolve().parents[1]
|
||||
dotenv_path = root / ".env"
|
||||
if not dotenv_path.exists():
|
||||
return {}
|
||||
values: dict[str, str] = {}
|
||||
for line in dotenv_path.read_text(encoding="utf-8", errors="ignore").splitlines():
|
||||
parsed = _parse_dotenv_line(line)
|
||||
if parsed:
|
||||
key, value = parsed
|
||||
values[key] = value
|
||||
return values
|
||||
|
||||
|
||||
def _apply_env_values(cfg: dict, source: dict):
|
||||
for env_key, dotted in ENV_MAP.items():
|
||||
val = source.get(env_key)
|
||||
if val is None:
|
||||
continue
|
||||
v2 = _coerce_env(val)
|
||||
for path in dotted:
|
||||
if path in ("run.tasks", "run.dws_tasks", "run.index_tasks") and isinstance(v2, str):
|
||||
v2 = [item.strip() for item in v2.split(",") if item.strip()]
|
||||
_deep_set(cfg, path.split("."), v2)
|
||||
|
||||
|
||||
def load_env_overrides(defaults: dict) -> dict:
|
||||
cfg = deepcopy(defaults)
|
||||
# 先读取 .env,再读取真实环境变量,确保 CLI 仍然最高优先级
|
||||
_apply_env_values(cfg, _load_dotenv_values())
|
||||
_apply_env_values(cfg, os.environ)
|
||||
return cfg
|
||||
Reference in New Issue
Block a user