577 lines
21 KiB
Python
577 lines
21 KiB
Python
# -*- coding: utf-8 -*-
|
||
"""INDEX 层逐任务调试脚本。
|
||
|
||
连接真实数据库,逐个执行 4 个 INDEX 层指数任务(WBI/NCI/RS/ML),
|
||
验证指数计算结果的合理性(非空、范围检查)。
|
||
|
||
用法:
|
||
cd apps/etl/connectors/feiqiu
|
||
python -m scripts.debug.debug_index [--hours 720] [--tasks DWS_WINBACK_INDEX,DWS_NEWCONV_INDEX]
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
import argparse
|
||
import json
|
||
import logging
|
||
import sys
|
||
import time
|
||
import traceback
|
||
from dataclasses import asdict, dataclass, field
|
||
from datetime import datetime, timedelta
|
||
from pathlib import Path
|
||
from zoneinfo import ZoneInfo
|
||
|
||
# ── 确保项目根目录在 sys.path ──
|
||
_FEIQIU_ROOT = Path(__file__).resolve().parents[2]
|
||
if str(_FEIQIU_ROOT) not in sys.path:
|
||
sys.path.insert(0, str(_FEIQIU_ROOT))
|
||
|
||
from config.settings import AppConfig
|
||
from database.connection import DatabaseConnection
|
||
from database.operations import DatabaseOperations
|
||
from api.client import APIClient
|
||
from orchestration.task_registry import default_registry
|
||
from orchestration.cursor_manager import CursorManager
|
||
from orchestration.run_tracker import RunTracker
|
||
from orchestration.task_executor import TaskExecutor
|
||
|
||
|
||
@dataclass
|
||
class DebugResult:
|
||
"""单个 INDEX 任务的调试结果"""
|
||
layer: str = "INDEX"
|
||
task_code: str = ""
|
||
status: str = "" # PASS / FAIL / WARN / ERROR
|
||
message: str = ""
|
||
counts: dict = field(default_factory=dict)
|
||
target_table: str = ""
|
||
pre_row_count: int | None = None
|
||
post_row_count: int | None = None
|
||
range_check: dict | None = None
|
||
duration_sec: float = 0.0
|
||
error_detail: str | None = None
|
||
fix_applied: str | None = None
|
||
|
||
|
||
# ── INDEX 任务 → 目标表 + 指数列 映射 ──
|
||
# 用于执行后的范围检查:指数列值应在 [0, 100] 或合理范围内
|
||
_INDEX_TABLE_META: dict[str, dict] = {
|
||
"DWS_WINBACK_INDEX": {
|
||
"target_table": "dws.dws_member_winback_index",
|
||
"score_columns": ["display_score", "raw_score"],
|
||
"display_range": (0, 100),
|
||
"description": "老客挽回指数(WBI)",
|
||
},
|
||
"DWS_NEWCONV_INDEX": {
|
||
"target_table": "dws.dws_member_newconv_index",
|
||
"score_columns": ["display_score", "raw_score"],
|
||
"display_range": (0, 100),
|
||
"description": "新客转化指数(NCI)",
|
||
},
|
||
"DWS_RELATION_INDEX": {
|
||
"target_table": "dws.dws_member_assistant_relation_index",
|
||
"score_columns": ["rs_display", "os_display", "ms_display", "ml_display"],
|
||
"display_range": (0, 100),
|
||
"description": "关系指数(RS/OS/MS/ML)",
|
||
},
|
||
"DWS_ML_MANUAL_IMPORT": {
|
||
"target_table": "dws.dws_ml_manual_order_source",
|
||
"score_columns": [], # ML 导入无指数列,仅检查行数
|
||
"display_range": None,
|
||
"description": "ML 人工台账导入",
|
||
},
|
||
}
|
||
|
||
|
||
# ── 工具函数 ──────────────────────────────────────────────────
|
||
|
||
def _setup_logging() -> logging.Logger:
|
||
logger = logging.getLogger("debug_index")
|
||
logger.setLevel(logging.INFO)
|
||
if not logger.handlers:
|
||
handler = logging.StreamHandler(sys.stdout)
|
||
handler.setFormatter(logging.Formatter(
|
||
"%(asctime)s [%(levelname)s] %(message)s", datefmt="%H:%M:%S"
|
||
))
|
||
logger.addHandler(handler)
|
||
return logger
|
||
|
||
|
||
def _build_components(config: AppConfig, logger: logging.Logger):
|
||
"""构建 DB / API / TaskExecutor 等组件。"""
|
||
db_conn = DatabaseConnection(
|
||
dsn=config["db"]["dsn"],
|
||
session=config["db"].get("session"),
|
||
connect_timeout=config["db"].get("connect_timeout_sec"),
|
||
)
|
||
api_client = APIClient(
|
||
base_url=config["api"]["base_url"],
|
||
token=config["api"]["token"],
|
||
timeout=config["api"].get("timeout_sec", 20),
|
||
retry_max=config["api"].get("retries", {}).get("max_attempts", 3),
|
||
headers_extra=config["api"].get("headers_extra"),
|
||
)
|
||
db_ops = DatabaseOperations(db_conn)
|
||
cursor_mgr = CursorManager(db_conn)
|
||
run_tracker = RunTracker(db_conn)
|
||
|
||
executor = TaskExecutor(
|
||
config, db_ops, api_client,
|
||
cursor_mgr, run_tracker, default_registry, logger,
|
||
)
|
||
return db_conn, api_client, db_ops, executor
|
||
|
||
|
||
def _query_count(db_conn: DatabaseConnection, table: str) -> int:
|
||
"""查询表的总行数。"""
|
||
rows = db_conn.query(f"SELECT COUNT(*) AS cnt FROM {table}")
|
||
return int(rows[0]["cnt"]) if rows else 0
|
||
|
||
|
||
def _table_exists(db_conn: DatabaseConnection, table: str) -> bool:
|
||
"""检查表/视图是否存在。"""
|
||
rows = db_conn.query("SELECT to_regclass(%s) AS reg", (table,))
|
||
return bool(rows and rows[0].get("reg"))
|
||
|
||
|
||
def _has_column(db_conn: DatabaseConnection, table: str, column: str) -> bool:
|
||
"""检查表是否包含指定列。"""
|
||
sql = """
|
||
SELECT 1 FROM information_schema.columns
|
||
WHERE table_schema || '.' || table_name = %s
|
||
AND column_name = %s
|
||
LIMIT 1
|
||
"""
|
||
rows = db_conn.query(sql, (table, column))
|
||
return bool(rows)
|
||
|
||
|
||
# ── 指数范围检查 ──────────────────────────────────────────────
|
||
|
||
def _check_index_range(
|
||
db_conn: DatabaseConnection,
|
||
table: str,
|
||
score_columns: list[str],
|
||
display_range: tuple[float, float] | None,
|
||
logger: logging.Logger,
|
||
) -> dict:
|
||
"""检查指数列的值是否在合理范围内,并统计基本分布。
|
||
|
||
返回:
|
||
{
|
||
"columns_checked": [...],
|
||
"issues": [...],
|
||
"stats": {col: {min, max, avg, null_count, out_of_range_count, total}}
|
||
}
|
||
"""
|
||
result: dict = {"columns_checked": [], "issues": [], "stats": {}}
|
||
|
||
if not score_columns:
|
||
result["issues"].append("ℹ 该任务无指数列,跳过范围检查")
|
||
return result
|
||
|
||
for col in score_columns:
|
||
if not _has_column(db_conn, table, col):
|
||
result["issues"].append(f"⚠ 列 {col} 不存在于 {table}")
|
||
continue
|
||
|
||
result["columns_checked"].append(col)
|
||
|
||
# 统计基本分布
|
||
stats_sql = f"""
|
||
SELECT
|
||
COUNT(*) AS total,
|
||
COUNT("{col}") AS non_null,
|
||
COUNT(*) - COUNT("{col}") AS null_count,
|
||
ROUND(MIN("{col}")::numeric, 4) AS min_val,
|
||
ROUND(MAX("{col}")::numeric, 4) AS max_val,
|
||
ROUND(AVG("{col}")::numeric, 4) AS avg_val
|
||
FROM {table}
|
||
"""
|
||
try:
|
||
rows = db_conn.query(stats_sql)
|
||
if not rows:
|
||
result["issues"].append(f"⚠ {col}: 查询统计失败(无返回行)")
|
||
continue
|
||
|
||
row = rows[0]
|
||
total = int(row["total"])
|
||
non_null = int(row["non_null"])
|
||
null_count = int(row["null_count"])
|
||
min_val = row["min_val"]
|
||
max_val = row["max_val"]
|
||
avg_val = row["avg_val"]
|
||
|
||
col_stats = {
|
||
"total": total,
|
||
"non_null": non_null,
|
||
"null_count": null_count,
|
||
"min": float(min_val) if min_val is not None else None,
|
||
"max": float(max_val) if max_val is not None else None,
|
||
"avg": float(avg_val) if avg_val is not None else None,
|
||
}
|
||
|
||
# 范围检查
|
||
if display_range and non_null > 0:
|
||
lo, hi = display_range
|
||
oor_sql = f"""
|
||
SELECT COUNT(*) AS cnt FROM {table}
|
||
WHERE "{col}" IS NOT NULL
|
||
AND ("{col}" < {lo} OR "{col}" > {hi})
|
||
"""
|
||
oor_rows = db_conn.query(oor_sql)
|
||
oor_count = int(oor_rows[0]["cnt"]) if oor_rows else 0
|
||
col_stats["out_of_range_count"] = oor_count
|
||
|
||
if oor_count > 0:
|
||
result["issues"].append(
|
||
f"⚠ {col}: {oor_count}/{non_null} 条记录超出 [{lo}, {hi}] 范围"
|
||
)
|
||
|
||
# 全 NULL 检查
|
||
if total > 0 and non_null == 0:
|
||
result["issues"].append(f"⚠ {col}: 全部为 NULL({total} 行)")
|
||
|
||
result["stats"][col] = col_stats
|
||
|
||
except Exception as exc:
|
||
result["issues"].append(f"✗ {col}: 统计查询异常: {exc}")
|
||
|
||
return result
|
||
|
||
|
||
# ── 核心调试逻辑 ──────────────────────────────────────────────
|
||
|
||
def debug_single_index_task(
|
||
task_code: str,
|
||
executor: TaskExecutor,
|
||
db_conn: DatabaseConnection,
|
||
config: AppConfig,
|
||
api_client,
|
||
logger: logging.Logger,
|
||
window_start: datetime,
|
||
window_end: datetime,
|
||
) -> DebugResult:
|
||
"""执行单个 INDEX 任务并验证结果。"""
|
||
result = DebugResult(task_code=task_code)
|
||
|
||
meta = _INDEX_TABLE_META.get(task_code, {})
|
||
target_table = meta.get("target_table", "")
|
||
score_columns = meta.get("score_columns", [])
|
||
display_range = meta.get("display_range")
|
||
description = meta.get("description", task_code)
|
||
result.target_table = target_table
|
||
|
||
store_id = int(config.get("app.store_id"))
|
||
run_uuid = f"debug-index-{task_code.lower()}-{int(time.time())}"
|
||
|
||
logger.info("━" * 60)
|
||
logger.info("▶ 开始调试: %s (%s, 表: %s)", task_code, description, target_table or "未知")
|
||
|
||
# 执行前查询表行数
|
||
if target_table and _table_exists(db_conn, target_table):
|
||
try:
|
||
result.pre_row_count = _query_count(db_conn, target_table)
|
||
logger.info(" 执行前表行数: %d", result.pre_row_count)
|
||
except Exception as exc:
|
||
logger.warning(" 查询执行前行数失败: %s", exc)
|
||
elif target_table:
|
||
logger.warning(" 目标表不存在: %s", target_table)
|
||
|
||
# 执行任务
|
||
t0 = time.monotonic()
|
||
try:
|
||
task_result = executor.run_single_task(
|
||
task_code=task_code,
|
||
run_uuid=run_uuid,
|
||
store_id=store_id,
|
||
data_source="online",
|
||
)
|
||
result.duration_sec = round(time.monotonic() - t0, 2)
|
||
except Exception as exc:
|
||
result.duration_sec = round(time.monotonic() - t0, 2)
|
||
result.status = "ERROR"
|
||
result.message = f"任务执行异常: {exc}"
|
||
result.error_detail = traceback.format_exc()
|
||
logger.error(" ✗ 执行异常: %s", exc)
|
||
return result
|
||
|
||
# 解析返回结果
|
||
task_status = (task_result.get("status") or "").upper()
|
||
counts = task_result.get("counts") or {}
|
||
result.counts = counts
|
||
|
||
logger.info(" 返回状态: %s", task_status)
|
||
logger.info(" counts: %s", counts)
|
||
|
||
# 执行后查询表行数
|
||
if target_table and _table_exists(db_conn, target_table):
|
||
try:
|
||
result.post_row_count = _query_count(db_conn, target_table)
|
||
logger.info(" 执行后表行数: %d", result.post_row_count)
|
||
|
||
if result.pre_row_count is not None:
|
||
delta = result.post_row_count - result.pre_row_count
|
||
logger.info(" 行数变化: %+d", delta)
|
||
except Exception as exc:
|
||
logger.warning(" 查询执行后行数失败: %s", exc)
|
||
|
||
# 指数范围检查
|
||
if target_table and _table_exists(db_conn, target_table) and score_columns:
|
||
try:
|
||
range_check = _check_index_range(
|
||
db_conn, target_table, score_columns, display_range, logger,
|
||
)
|
||
result.range_check = range_check
|
||
|
||
for col, stats in range_check.get("stats", {}).items():
|
||
logger.info(
|
||
" %s: min=%.2f, max=%.2f, avg=%.2f, null=%d/%d",
|
||
col,
|
||
stats.get("min") or 0,
|
||
stats.get("max") or 0,
|
||
stats.get("avg") or 0,
|
||
stats.get("null_count", 0),
|
||
stats.get("total", 0),
|
||
)
|
||
for issue in range_check.get("issues", []):
|
||
logger.info(" 范围检查: %s", issue)
|
||
except Exception as exc:
|
||
logger.warning(" ⚠ 范围检查异常: %s", exc)
|
||
|
||
# 最终状态判定
|
||
issues = []
|
||
errors_count = counts.get("errors", 0)
|
||
if errors_count:
|
||
issues.append(f"执行有 {errors_count} 个错误")
|
||
|
||
if result.post_row_count is not None and result.post_row_count == 0:
|
||
issues.append("执行后表为空")
|
||
|
||
if result.range_check:
|
||
oor_total = sum(
|
||
s.get("out_of_range_count", 0)
|
||
for s in result.range_check.get("stats", {}).values()
|
||
)
|
||
if oor_total > 0:
|
||
issues.append(f"指数范围检查: {oor_total} 条超出范围")
|
||
|
||
all_null = all(
|
||
s.get("non_null", 0) == 0
|
||
for s in result.range_check.get("stats", {}).values()
|
||
) if result.range_check.get("stats") else False
|
||
if all_null:
|
||
issues.append("所有指数列均为 NULL")
|
||
|
||
if issues:
|
||
result.status = "WARN"
|
||
result.message = "; ".join(issues)
|
||
elif task_status in ("SUCCESS", "PARTIAL", "COMPLETE"):
|
||
result.status = "PASS"
|
||
result.message = f"执行成功, counts={counts}"
|
||
elif task_status == "SKIP":
|
||
result.status = "WARN"
|
||
result.message = "任务被跳过(未启用或不存在)"
|
||
else:
|
||
result.status = "WARN"
|
||
result.message = f"未知状态: {task_status}"
|
||
|
||
icon = {"PASS": "✓", "WARN": "⚠", "ERROR": "✗", "FAIL": "✗"}.get(result.status, "?")
|
||
logger.info(" %s 结果: %s - %s (耗时 %.1fs)", icon, result.status, result.message, result.duration_sec)
|
||
return result
|
||
|
||
|
||
# ── 主流程 ────────────────────────────────────────────────────
|
||
|
||
def run_index_debug(
|
||
hours: float = 720.0,
|
||
task_filter: list[str] | None = None,
|
||
) -> list[DebugResult]:
|
||
"""执行 INDEX 层全量调试。
|
||
|
||
Args:
|
||
hours: 回溯窗口小时数(默认 720 = 30 天,指数计算通常需要较长历史数据)
|
||
task_filter: 仅调试指定的任务代码列表,None 表示全部
|
||
Returns:
|
||
所有任务的 DebugResult 列表
|
||
"""
|
||
logger = _setup_logging()
|
||
logger.info("=" * 60)
|
||
logger.info("INDEX 层调试开始")
|
||
logger.info("=" * 60)
|
||
|
||
# 加载配置(从 .env)
|
||
config = AppConfig.load()
|
||
tz = ZoneInfo(config.get("app.timezone", "Asia/Shanghai"))
|
||
window_end = datetime.now(tz)
|
||
window_start = window_end - timedelta(hours=hours)
|
||
|
||
logger.info("门店 ID: %s", config.get("app.store_id"))
|
||
logger.info("数据库: %s", config.get("db.name", ""))
|
||
logger.info("API: %s", config.get("api.base_url", ""))
|
||
logger.info("时间窗口: %s ~ %s (%.1f 小时)", window_start, window_end, hours)
|
||
|
||
# 设置 window_override 让所有任务使用统一窗口
|
||
config.config.setdefault("run", {}).setdefault("window_override", {})
|
||
config.config["run"]["window_override"]["start"] = window_start
|
||
config.config["run"]["window_override"]["end"] = window_end
|
||
|
||
# 构建组件
|
||
db_conn, api_client, db_ops, executor = _build_components(config, logger)
|
||
|
||
# 获取所有 INDEX 层任务
|
||
all_index_codes = sorted(default_registry.get_tasks_by_layer("INDEX"))
|
||
if task_filter:
|
||
filter_set = {t.upper() for t in task_filter}
|
||
index_codes = [c for c in all_index_codes if c in filter_set]
|
||
skipped = filter_set - set(index_codes)
|
||
if skipped:
|
||
logger.warning("以下任务不在 INDEX 层注册表中,已跳过: %s", skipped)
|
||
else:
|
||
index_codes = all_index_codes
|
||
|
||
logger.info("待调试 INDEX 任务: %d 个", len(index_codes))
|
||
logger.info("任务列表: %s", ", ".join(index_codes))
|
||
logger.info("")
|
||
|
||
# 逐个执行
|
||
results: list[DebugResult] = []
|
||
for idx, task_code in enumerate(index_codes, start=1):
|
||
logger.info("[%d/%d] %s", idx, len(index_codes), task_code)
|
||
try:
|
||
r = debug_single_index_task(
|
||
task_code=task_code,
|
||
executor=executor,
|
||
db_conn=db_conn,
|
||
config=config,
|
||
api_client=api_client,
|
||
logger=logger,
|
||
window_start=window_start,
|
||
window_end=window_end,
|
||
)
|
||
except Exception as exc:
|
||
r = DebugResult(
|
||
task_code=task_code,
|
||
status="ERROR",
|
||
message=f"未捕获异常: {exc}",
|
||
error_detail=traceback.format_exc(),
|
||
)
|
||
logger.error(" ✗ 未捕获异常: %s", exc)
|
||
results.append(r)
|
||
|
||
# 确保连接可用
|
||
db_conn.ensure_open()
|
||
|
||
# 汇总
|
||
_print_summary(results, logger)
|
||
|
||
# 输出 JSON 结果
|
||
output_dir = _FEIQIU_ROOT / "scripts" / "debug" / "output"
|
||
output_dir.mkdir(parents=True, exist_ok=True)
|
||
ts = datetime.now(tz).strftime("%Y%m%d_%H%M%S")
|
||
output_file = output_dir / f"debug_index_{ts}.json"
|
||
_save_results(results, output_file)
|
||
logger.info("结果已保存: %s", output_file)
|
||
|
||
# 清理
|
||
db_conn.close()
|
||
return results
|
||
|
||
|
||
# ── 汇总与输出 ────────────────────────────────────────────────
|
||
|
||
def _print_summary(results: list[DebugResult], logger: logging.Logger):
|
||
"""打印调试汇总。"""
|
||
logger.info("")
|
||
logger.info("=" * 60)
|
||
logger.info("INDEX 层调试汇总")
|
||
logger.info("=" * 60)
|
||
|
||
pass_count = sum(1 for r in results if r.status == "PASS")
|
||
warn_count = sum(1 for r in results if r.status == "WARN")
|
||
error_count = sum(1 for r in results if r.status in ("ERROR", "FAIL"))
|
||
total_duration = sum(r.duration_sec for r in results)
|
||
|
||
logger.info("总计: %d 个任务", len(results))
|
||
logger.info(" ✓ PASS: %d", pass_count)
|
||
logger.info(" ⚠ WARN: %d", warn_count)
|
||
logger.info(" ✗ ERROR: %d", error_count)
|
||
logger.info(" 总耗时: %.1f 秒", total_duration)
|
||
logger.info("")
|
||
|
||
# 按任务分类统计
|
||
score_tasks = [r for r in results if r.task_code != "DWS_ML_MANUAL_IMPORT"]
|
||
ml_tasks = [r for r in results if r.task_code == "DWS_ML_MANUAL_IMPORT"]
|
||
|
||
if score_tasks:
|
||
logger.info("指数计算任务: %d 个 (PASS=%d, WARN=%d, ERROR=%d)",
|
||
len(score_tasks),
|
||
sum(1 for r in score_tasks if r.status == "PASS"),
|
||
sum(1 for r in score_tasks if r.status == "WARN"),
|
||
sum(1 for r in score_tasks if r.status in ("ERROR", "FAIL")))
|
||
if ml_tasks:
|
||
logger.info("ML 导入任务: %d 个 (PASS=%d, WARN=%d, ERROR=%d)",
|
||
len(ml_tasks),
|
||
sum(1 for r in ml_tasks if r.status == "PASS"),
|
||
sum(1 for r in ml_tasks if r.status == "WARN"),
|
||
sum(1 for r in ml_tasks if r.status in ("ERROR", "FAIL")))
|
||
|
||
# 列出非 PASS 的任务
|
||
non_pass = [r for r in results if r.status != "PASS"]
|
||
if non_pass:
|
||
logger.info("")
|
||
logger.info("需关注的任务:")
|
||
for r in non_pass:
|
||
logger.info(" [%s] %s: %s", r.status, r.task_code, r.message)
|
||
else:
|
||
logger.info("")
|
||
logger.info("所有任务均通过 ✓")
|
||
|
||
|
||
def _save_results(results: list[DebugResult], path: Path):
|
||
"""将结果序列化为 JSON。"""
|
||
data = [_sanitize_for_json(asdict(r)) for r in results]
|
||
path.write_text(
|
||
json.dumps(data, ensure_ascii=False, indent=2, default=str),
|
||
encoding="utf-8",
|
||
)
|
||
|
||
|
||
def _sanitize_for_json(obj):
|
||
"""递归处理不可序列化的值。"""
|
||
if isinstance(obj, dict):
|
||
return {k: _sanitize_for_json(v) for k, v in obj.items()}
|
||
if isinstance(obj, (list, tuple)):
|
||
return [_sanitize_for_json(v) for v in obj]
|
||
if isinstance(obj, datetime):
|
||
return obj.isoformat()
|
||
return obj
|
||
|
||
|
||
# ── CLI 入口 ──────────────────────────────────────────────────
|
||
|
||
def parse_args():
|
||
parser = argparse.ArgumentParser(description="INDEX 层逐任务调试")
|
||
parser.add_argument("--hours", type=float, default=720.0,
|
||
help="回溯窗口小时数(默认 720 = 30 天,指数计算需要较长历史)")
|
||
parser.add_argument("--tasks", type=str, default=None,
|
||
help="仅调试指定任务,逗号分隔(如 DWS_WINBACK_INDEX,DWS_NEWCONV_INDEX)")
|
||
return parser.parse_args()
|
||
|
||
|
||
def main():
|
||
args = parse_args()
|
||
task_filter = None
|
||
if args.tasks:
|
||
task_filter = [t.strip().upper() for t in args.tasks.split(",") if t.strip()]
|
||
|
||
results = run_index_debug(hours=args.hours, task_filter=task_filter)
|
||
|
||
# 退出码: 有 ERROR 则非零
|
||
has_error = any(r.status in ("ERROR", "FAIL") for r in results)
|
||
sys.exit(1 if has_error else 0)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|