在前后端开发联调前 的提交20260223

This commit is contained in:
Neo
2026-02-23 23:02:20 +08:00
parent 254ccb1e77
commit fafc95e64c
1142 changed files with 10366960 additions and 36957 deletions

View File

@@ -24,7 +24,8 @@ class AnalyzerConfig:
date_to: date | None = None
limit: int = 200
tables: list[str] | None = None
output_dir: Path = field(default_factory=lambda: Path("docs/reports"))
# 调用方必须显式传入(从 SYSTEM_ANALYZE_ROOT 环境变量读取)
output_dir: Path = field(default_factory=lambda: Path(""))
pg_dsn: str = ""
api_base: str = ""
api_token: str = ""
@@ -420,12 +421,41 @@ def dump_collection_results(
返回 {类别: 目录路径} 的字典。
"""
json_trees_dir = output_dir / "json_trees"
db_schemas_dir = output_dir / "db_schemas"
field_mappings_dir = output_dir / "field_mappings"
json_trees_dir.mkdir(parents=True, exist_ok=True)
db_schemas_dir.mkdir(parents=True, exist_ok=True)
field_mappings_dir.mkdir(parents=True, exist_ok=True)
# CHANGE 2026-02-21 | 清理旧子目录后重建,避免 Windows 文件锁导致写入失败
import shutil as _shutil, time as _time
_sub_dirs = ["json_trees", "db_schemas", "field_mappings"]
for _name in _sub_dirs:
_d = output_dir / _name
if _d.exists():
try:
_shutil.rmtree(_d)
except (PermissionError, OSError):
# Windows 文件锁:无法删除也无法遍历,跳过(后面用备选名)
pass
# Windows rmtree 后句柄可能未释放,等待后再 mkdir
_time.sleep(1)
def _ensure_writable_dir(base: Path, name: str) -> Path:
"""确保目录可写,如果被锁则用带后缀的备选名"""
d = base / name
for _attempt in range(3):
try:
d.mkdir(parents=True, exist_ok=True)
_test = d / ".write_test"
_test.write_text("ok", encoding="utf-8")
_test.unlink()
return d
except (FileNotFoundError, PermissionError, OSError):
_time.sleep(1)
# 旧目录不可用,用带后缀的新目录
d = base / f"{name}_new"
d.mkdir(parents=True, exist_ok=True)
print(f" [警告] {name}/ 被锁定,使用备选目录 {d.name}/")
return d
json_trees_dir = _ensure_writable_dir(output_dir, "json_trees")
db_schemas_dir = _ensure_writable_dir(output_dir, "db_schemas")
field_mappings_dir = _ensure_writable_dir(output_dir, "field_mappings")
# 解析 TABLE_MAP / FACT_MAPPINGS用于构建字段映射
table_map = parse_table_map()
@@ -508,10 +538,18 @@ def dump_collection_results(
def _write_json(path: Path, data: Any) -> None:
"""UTF-8 编码写入 JSON 文件ensure_ascii=Falseindent=2。"""
path.write_text(
json.dumps(data, ensure_ascii=False, indent=2, default=str),
encoding="utf-8",
)
content = json.dumps(data, ensure_ascii=False, indent=2, default=str)
try:
path.write_text(content, encoding="utf-8")
except PermissionError:
# CHANGE 2026-02-21 | Windows 文件锁重试:先删再写
import time
time.sleep(1)
try:
path.unlink(missing_ok=True)
except PermissionError:
pass
path.write_text(content, encoding="utf-8")
# ══════════════════════════════════════════════════════════════════
@@ -571,18 +609,6 @@ ODS_SPECS: list[dict] = [
"extra_params": {},
"description": "助教服务流水",
},
{
"code": "ODS_ASSISTANT_ABOLISH",
"table": "assistant_cancellation_records",
"dwd_table": "dwd_assistant_trash_event",
"endpoint": "/AssistantPerformance/GetAbolitionAssistant",
"data_path": ("data",),
"list_key": "abolitionAssistants",
"time_fields": ("startTime", "endTime"),
"requires_window": True,
"extra_params": {},
"description": "助教废除记录",
},
{
"code": "ODS_STORE_GOODS_SALES",
"table": "store_goods_sales_records",
@@ -788,18 +814,6 @@ ODS_SPECS: list[dict] = [
"extra_params": {},
"description": "租户商品档案",
},
{
"code": "ODS_SETTLEMENT_TICKET",
"table": "settlement_ticket_details",
"dwd_table": None,
"endpoint": "/Order/GetOrderSettleTicketNew",
"data_path": (),
"list_key": None,
"time_fields": None,
"requires_window": False,
"extra_params": {},
"description": "结账小票详情(按 orderSettleId 逐条获取,不走常规分页)",
},
]
# 默认 list_key 候选(与 APIClient 一致)
@@ -896,7 +910,6 @@ def fetch_records(spec: dict, config: AnalyzerConfig) -> list[dict]:
- 有时间字段的表:从今天往回 10 天一批,不够则继续扩展,最多 10 次重试
- 无时间字段的表:单次请求
- 特殊表settlement_ticket_details跳过
参数:
spec: ODS_SPECS 中的单项配置
@@ -912,10 +925,6 @@ def fetch_records(spec: dict, config: AnalyzerConfig) -> list[dict]:
extra_params = _resolve_extra_params(spec.get("extra_params", {}), config)
target_count = config.limit
# 结账小票是逐条获取的,跳过
if spec["table"] == "settlement_ticket_details":
return []
tz = ZoneInfo("Asia/Shanghai")
all_records: list[dict] = []
@@ -973,8 +982,10 @@ def fetch_records(spec: dict, config: AnalyzerConfig) -> list[dict]:
import re
# DWD 加载任务源码的默认路径
_DWD_TASK_PY = Path("apps/etl/connectors/feiqiu/tasks/dwd/dwd_load_task.py")
# DWD 加载任务源码的默认路径(使用绝对路径,避免 cwd 不在项目根时找不到)
# CHANGE 2026-02-21 | 相对路径 → 绝对路径,与 _env_paths 同源
_PROJECT_ROOT = Path(__file__).resolve().parents[2]
_DWD_TASK_PY = _PROJECT_ROOT / "apps" / "etl" / "connectors" / "feiqiu" / "tasks" / "dwd" / "dwd_load_task.py"
def parse_table_map(py_path: Path | None = None) -> dict[str, str]:
@@ -1059,8 +1070,9 @@ def parse_fact_mappings(py_path: Path | None = None) -> dict[str, list[tuple]]:
# BD_manual 文档解析:提取字段级业务描述
# ══════════════════════════════════════════════════════════════════
# BD_manual 文档根目录
_BD_DOCS_ROOT = Path("apps/etl/connectors/feiqiu/docs/database")
# BD_manual 文档根目录(使用绝对路径,与 _DWD_TASK_PY 同源)
# CHANGE 2026-02-21 | 相对路径 → 绝对路径,避免 cwd 不在项目根时找不到
_BD_DOCS_ROOT = _PROJECT_ROOT / "apps" / "etl" / "connectors" / "feiqiu" / "docs" / "database"
def parse_bd_manual_fields(doc_path: Path) -> dict[str, str]: