微信小程序页面迁移校验之前 P5任务处理之前
This commit is contained in:
@@ -57,6 +57,7 @@ SAMPLE_LIMIT = 5
|
||||
FIELD_STATS_DISTINCT_THRESHOLD = 3000
|
||||
|
||||
# ── ODS 任务名 → ODS 表名映射(与 blackbox_test_report.py 保持一致) ──
|
||||
# CHANGE 2026-03-04 | 补充 ODS_STAFF_INFO,此前遗漏导致员工表不参与一致性检查
|
||||
ODS_TASK_TO_TABLE = {
|
||||
"ODS_ASSISTANT_ACCOUNT": "assistant_accounts_master",
|
||||
"ODS_ASSISTANT_LEDGER": "assistant_service_records",
|
||||
@@ -79,9 +80,13 @@ ODS_TASK_TO_TABLE = {
|
||||
"ODS_STORE_GOODS": "store_goods_master",
|
||||
"ODS_STORE_GOODS_SALES": "store_goods_sales_records",
|
||||
"ODS_TENANT_GOODS": "tenant_goods_master",
|
||||
"ODS_STAFF_INFO": "staff_info_master",
|
||||
}
|
||||
|
||||
# DWD 表 → ODS 表映射
|
||||
# CHANGE 2026-03-04 | 补充 dim_staff、dim_site、
|
||||
# dwd_goods_stock_movement、dwd_goods_stock_summary,此前遗漏
|
||||
# CHANGE 2026-03-04 | 移除 dwd_assistant_trash_event(表已于 2026-02-22 DROP,禁止复活)
|
||||
DWD_TO_ODS = {
|
||||
"dwd.dim_assistant": "ods.assistant_accounts_master",
|
||||
"dwd.dim_member": "ods.member_profiles",
|
||||
@@ -91,6 +96,8 @@ DWD_TO_ODS = {
|
||||
"dwd.dim_store_goods": "ods.store_goods_master",
|
||||
"dwd.dim_tenant_goods": "ods.tenant_goods_master",
|
||||
"dwd.dim_goods_category": "ods.stock_goods_category_tree",
|
||||
"dwd.dim_staff": "ods.staff_info_master",
|
||||
"dwd.dim_site": "ods.site_tables_master",
|
||||
"dwd.dwd_assistant_service_log": "ods.assistant_service_records",
|
||||
"dwd.dwd_member_balance_change": "ods.member_balance_changes",
|
||||
"dwd.dwd_recharge_order": "ods.recharge_settlements",
|
||||
@@ -102,6 +109,8 @@ DWD_TO_ODS = {
|
||||
"dwd.dwd_platform_coupon_redemption": "ods.platform_coupon_redemption_records",
|
||||
"dwd.dwd_groupbuy_redemption": "ods.group_buy_redemption_records",
|
||||
"dwd.dwd_store_goods_sale": "ods.store_goods_sales_records",
|
||||
"dwd.dwd_goods_stock_movement": "ods.goods_stock_movements",
|
||||
"dwd.dwd_goods_stock_summary": "ods.goods_stock_summary",
|
||||
}
|
||||
|
||||
# ETL 元数据列(不参与值比对)
|
||||
@@ -400,6 +409,33 @@ def get_field_stats(conn, schema: str, table: str) -> list[dict]:
|
||||
return results
|
||||
|
||||
|
||||
# ── 数据截止日期查询 ──
|
||||
# 每张 ODS 表的截止日期字段映射:大部分用 create_time/createtime,两张维表用 fetched_at
|
||||
_CUTOFF_DATE_COLUMN: dict[str, str] = {
|
||||
"goods_stock_summary": "fetched_at",
|
||||
"stock_goods_category_tree": "fetched_at",
|
||||
# 以下表使用 createtime(无下划线)
|
||||
"goods_stock_movements": "createtime",
|
||||
"settlement_records": "createtime",
|
||||
"recharge_settlements": "createtime",
|
||||
}
|
||||
|
||||
|
||||
def get_data_cutoff_date(conn, schema: str, table: str) -> str | None:
|
||||
"""查询表中数据的最后截止日期(MAX of 时间字段)"""
|
||||
col = _CUTOFF_DATE_COLUMN.get(table, "create_time")
|
||||
with conn.cursor() as cur:
|
||||
try:
|
||||
cur.execute(
|
||||
f'SELECT MAX("{col}")::date::text AS cutoff FROM {schema}.{table}'
|
||||
)
|
||||
row = cur.fetchone()
|
||||
return row["cutoff"] if row and row["cutoff"] else None
|
||||
except Exception:
|
||||
conn.rollback()
|
||||
return None
|
||||
|
||||
|
||||
# ── API vs ODS 字段级比对 ──
|
||||
def check_api_vs_ods(conn, task_name: str, ods_table: str) -> dict:
|
||||
"""比对 API JSON 字段与 ODS 表列,并采样值比对"""
|
||||
@@ -501,6 +537,9 @@ def check_api_vs_ods(conn, task_name: str, ods_table: str) -> dict:
|
||||
# 字段级统计(ODS 表)
|
||||
result["ods_field_stats"] = get_field_stats(conn, "ods", ods_table)
|
||||
|
||||
# 数据截止日期
|
||||
result["data_cutoff"] = get_data_cutoff_date(conn, "ods", ods_table)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
@@ -720,6 +759,9 @@ def check_ods_vs_dwd(conn, dwd_full: str, ods_full: str) -> dict:
|
||||
# 字段级统计(DWD 主表)
|
||||
result["dwd_field_stats"] = get_field_stats(conn, dwd_s, dwd_t)
|
||||
|
||||
# 数据截止日期(从 ODS 源表查询)
|
||||
result["data_cutoff"] = get_data_cutoff_date(conn, ods_s, ods_t)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
@@ -879,13 +921,15 @@ def generate_report(
|
||||
w()
|
||||
w("### 2.1 汇总")
|
||||
w()
|
||||
w("| ODS 表 | API 记录数 | ODS 行数 | ODS 去重ID | 字段匹配 | API独有 | ODS独有 | 值差异 | 白名单 | 状态 |")
|
||||
w("|--------|-----------|---------|-----------|---------|--------|--------|-------|--------|------|")
|
||||
w("| ODS 表 | API 记录数 | ODS 行数 | ODS 去重ID | 数据截止 | 字段匹配 | API独有 | ODS独有 | 值差异 | 白名单 | 状态 |")
|
||||
w("|--------|-----------|---------|-----------|---------|---------|--------|--------|-------|--------|------|")
|
||||
for c in api_ods_checks:
|
||||
fc = c["field_check"]
|
||||
real_diffs = [d for d in c["value_diffs"] if not d.get("whitelist")]
|
||||
wl_diffs = [d for d in c["value_diffs"] if d.get("whitelist")]
|
||||
cutoff = c.get("data_cutoff") or "—"
|
||||
w(f"| `{c['ods_table']}` | {c['api_records']} | {c['ods_rows']} | {c['ods_distinct_ids']} "
|
||||
f"| {cutoff} "
|
||||
f"| {fc['matched']}/{fc['api_fields']} | {len(fc['api_only'])} | {len(fc['ods_only'])} "
|
||||
f"| {len(real_diffs)} | {len(wl_diffs)} | {c['status']} |")
|
||||
w()
|
||||
@@ -943,14 +987,16 @@ def generate_report(
|
||||
w()
|
||||
w("### 3.1 汇总")
|
||||
w()
|
||||
w("| DWD 表 | EX 表 | ODS 表 | 类型 | ODS 行 | ODS 去重ID | DWD 行 | 共同列 | DWD独有 | 值差异 | 白名单 | 状态 |")
|
||||
w("|--------|-------|--------|------|-------|-----------|-------|-------|--------|-------|--------|------|")
|
||||
w("| DWD 表 | EX 表 | ODS 表 | 类型 | ODS 行 | ODS 去重ID | DWD 行 | 数据截止 | 共同列 | DWD独有 | 值差异 | 白名单 | 状态 |")
|
||||
w("|--------|-------|--------|------|-------|-----------|-------|---------|-------|--------|-------|--------|------|")
|
||||
for c in ods_dwd_checks:
|
||||
real_diffs = [d for d in c["value_diffs"] if not d.get("whitelist")]
|
||||
wl_diffs = [d for d in c["value_diffs"] if d.get("whitelist")]
|
||||
ex_label = c.get("ex_table", "—") or "—"
|
||||
cutoff = c.get("data_cutoff") or "—"
|
||||
w(f"| `{c['dwd_table']}` | `{ex_label}` | `{c['ods_table']}` | {c['type']} "
|
||||
f"| {c['ods_rows']} | {c['ods_distinct_ids']} | {c['dwd_rows']} "
|
||||
f"| {cutoff} "
|
||||
f"| {len(c['common_cols'])} | {len(c['dwd_only_cols'])} "
|
||||
f"| {len(real_diffs)} | {len(wl_diffs)} | {c['status']} |")
|
||||
w()
|
||||
|
||||
Reference in New Issue
Block a user