ODS 完成

This commit is contained in:
Neo
2025-11-30 07:18:55 +08:00
parent cbd16a39ba
commit b9b050bb5d
28 changed files with 41867 additions and 977 deletions

View File

@@ -2,6 +2,7 @@
"""环境变量解析"""
import os
import json
from pathlib import Path
from copy import deepcopy
ENV_MAP = {
@@ -15,18 +16,22 @@ ENV_MAP = {
"PG_NAME": ("db.name",),
"PG_USER": ("db.user",),
"PG_PASSWORD": ("db.password",),
"PG_CONNECT_TIMEOUT": ("db.connect_timeout_sec",),
"API_BASE": ("api.base_url",),
"API_TOKEN": ("api.token",),
"FICOO_TOKEN": ("api.token",),
"API_TIMEOUT": ("api.timeout_sec",),
"API_PAGE_SIZE": ("api.page_size",),
"API_PARAMS": ("api.params",),
"EXPORT_ROOT": ("io.export_root",),
"LOG_ROOT": ("io.log_root",),
"RUN_TASKS": ("run.tasks",),
"OVERLAP_SECONDS": ("run.overlap_seconds",),
"WINDOW_BUSY_MIN": ("run.window_minutes.default_busy",),
"WINDOW_IDLE_MIN": ("run.window_minutes.default_idle",),
"TEST_MODE": ("testing.mode",),
"TEST_JSON_ARCHIVE_DIR": ("testing.json_archive_dir",),
"TEST_JSON_TEMP_DIR": ("testing.temp_json_dir",),
"PIPELINE_FLOW": ("pipeline.flow",),
"JSON_FETCH_ROOT": ("pipeline.fetch_root",),
"JSON_SOURCE_DIR": ("pipeline.ingest_source_dir",),
}
def _deep_set(d, dotted_keys, value):
@@ -53,13 +58,97 @@ def _coerce_env(v: str):
return s
return s
def load_env_overrides(defaults: dict) -> dict:
cfg = deepcopy(defaults)
def _strip_inline_comment(value: str) -> str:
"""去掉未被引号包裹的内联注释"""
result = []
in_quote = False
quote_char = ""
escape = False
for ch in value:
if escape:
result.append(ch)
escape = False
continue
if ch == "\\":
escape = True
result.append(ch)
continue
if ch in ("'", '"'):
if not in_quote:
in_quote = True
quote_char = ch
elif quote_char == ch:
in_quote = False
quote_char = ""
result.append(ch)
continue
if ch == "#" and not in_quote:
break
result.append(ch)
return "".join(result).rstrip()
def _unquote_value(value: str) -> str:
"""处理引号/原始字符串以及尾随逗号"""
trimmed = value.strip()
trimmed = _strip_inline_comment(trimmed)
trimmed = trimmed.rstrip(",").rstrip()
if not trimmed:
return trimmed
if len(trimmed) >= 2 and trimmed[0] in ("'", '"') and trimmed[-1] == trimmed[0]:
return trimmed[1:-1]
if (
len(trimmed) >= 3
and trimmed[0] in ("r", "R")
and trimmed[1] in ("'", '"')
and trimmed[-1] == trimmed[1]
):
return trimmed[2:-1]
return trimmed
def _parse_dotenv_line(line: str) -> tuple[str, str] | None:
"""解析 .env 文件中的单行"""
stripped = line.strip()
if not stripped or stripped.startswith("#"):
return None
if stripped.startswith("export "):
stripped = stripped[len("export ") :].strip()
if "=" not in stripped:
return None
key, value = stripped.split("=", 1)
key = key.strip()
value = _unquote_value(value)
return key, value
def _load_dotenv_values() -> dict:
"""从项目根目录的 .env 文件读取键值"""
root = Path(__file__).resolve().parents[1]
dotenv_path = root / ".env"
if not dotenv_path.exists():
return {}
values: dict[str, str] = {}
for line in dotenv_path.read_text(encoding="utf-8").splitlines():
parsed = _parse_dotenv_line(line)
if parsed:
key, value = parsed
values[key] = value
return values
def _apply_env_values(cfg: dict, source: dict):
for env_key, dotted in ENV_MAP.items():
val = os.environ.get(env_key)
val = source.get(env_key)
if val is None:
continue
v2 = _coerce_env(val)
for path in dotted:
if path == "run.tasks" and isinstance(v2, str):
v2 = [item.strip() for item in v2.split(",") if item.strip()]
_deep_set(cfg, path.split("."), v2)
def load_env_overrides(defaults: dict) -> dict:
cfg = deepcopy(defaults)
# 先读取 .env再读取真实环境变量确保 CLI 仍然最高优先级
_apply_env_values(cfg, _load_dotenv_values())
_apply_env_values(cfg, os.environ)
return cfg