微信小程序页面迁移校验之前 P5任务处理之前

This commit is contained in:
Neo
2026-03-09 01:19:21 +08:00
parent 263bf96035
commit 6e20987d2f
1112 changed files with 153824 additions and 219694 deletions

View File

@@ -57,6 +57,7 @@ SAMPLE_LIMIT = 5
FIELD_STATS_DISTINCT_THRESHOLD = 3000
# ── ODS 任务名 → ODS 表名映射(与 blackbox_test_report.py 保持一致) ──
# CHANGE 2026-03-04 | 补充 ODS_STAFF_INFO此前遗漏导致员工表不参与一致性检查
ODS_TASK_TO_TABLE = {
"ODS_ASSISTANT_ACCOUNT": "assistant_accounts_master",
"ODS_ASSISTANT_LEDGER": "assistant_service_records",
@@ -79,9 +80,13 @@ ODS_TASK_TO_TABLE = {
"ODS_STORE_GOODS": "store_goods_master",
"ODS_STORE_GOODS_SALES": "store_goods_sales_records",
"ODS_TENANT_GOODS": "tenant_goods_master",
"ODS_STAFF_INFO": "staff_info_master",
}
# DWD 表 → ODS 表映射
# CHANGE 2026-03-04 | 补充 dim_staff、dim_site、
# dwd_goods_stock_movement、dwd_goods_stock_summary此前遗漏
# CHANGE 2026-03-04 | 移除 dwd_assistant_trash_event表已于 2026-02-22 DROP禁止复活
DWD_TO_ODS = {
"dwd.dim_assistant": "ods.assistant_accounts_master",
"dwd.dim_member": "ods.member_profiles",
@@ -91,6 +96,8 @@ DWD_TO_ODS = {
"dwd.dim_store_goods": "ods.store_goods_master",
"dwd.dim_tenant_goods": "ods.tenant_goods_master",
"dwd.dim_goods_category": "ods.stock_goods_category_tree",
"dwd.dim_staff": "ods.staff_info_master",
"dwd.dim_site": "ods.site_tables_master",
"dwd.dwd_assistant_service_log": "ods.assistant_service_records",
"dwd.dwd_member_balance_change": "ods.member_balance_changes",
"dwd.dwd_recharge_order": "ods.recharge_settlements",
@@ -102,6 +109,8 @@ DWD_TO_ODS = {
"dwd.dwd_platform_coupon_redemption": "ods.platform_coupon_redemption_records",
"dwd.dwd_groupbuy_redemption": "ods.group_buy_redemption_records",
"dwd.dwd_store_goods_sale": "ods.store_goods_sales_records",
"dwd.dwd_goods_stock_movement": "ods.goods_stock_movements",
"dwd.dwd_goods_stock_summary": "ods.goods_stock_summary",
}
# ETL 元数据列(不参与值比对)
@@ -400,6 +409,33 @@ def get_field_stats(conn, schema: str, table: str) -> list[dict]:
return results
# ── 数据截止日期查询 ──
# 每张 ODS 表的截止日期字段映射:大部分用 create_time/createtime两张维表用 fetched_at
_CUTOFF_DATE_COLUMN: dict[str, str] = {
"goods_stock_summary": "fetched_at",
"stock_goods_category_tree": "fetched_at",
# 以下表使用 createtime无下划线
"goods_stock_movements": "createtime",
"settlement_records": "createtime",
"recharge_settlements": "createtime",
}
def get_data_cutoff_date(conn, schema: str, table: str) -> str | None:
"""查询表中数据的最后截止日期MAX of 时间字段)"""
col = _CUTOFF_DATE_COLUMN.get(table, "create_time")
with conn.cursor() as cur:
try:
cur.execute(
f'SELECT MAX("{col}")::date::text AS cutoff FROM {schema}.{table}'
)
row = cur.fetchone()
return row["cutoff"] if row and row["cutoff"] else None
except Exception:
conn.rollback()
return None
# ── API vs ODS 字段级比对 ──
def check_api_vs_ods(conn, task_name: str, ods_table: str) -> dict:
"""比对 API JSON 字段与 ODS 表列,并采样值比对"""
@@ -501,6 +537,9 @@ def check_api_vs_ods(conn, task_name: str, ods_table: str) -> dict:
# 字段级统计ODS 表)
result["ods_field_stats"] = get_field_stats(conn, "ods", ods_table)
# 数据截止日期
result["data_cutoff"] = get_data_cutoff_date(conn, "ods", ods_table)
return result
@@ -720,6 +759,9 @@ def check_ods_vs_dwd(conn, dwd_full: str, ods_full: str) -> dict:
# 字段级统计DWD 主表)
result["dwd_field_stats"] = get_field_stats(conn, dwd_s, dwd_t)
# 数据截止日期(从 ODS 源表查询)
result["data_cutoff"] = get_data_cutoff_date(conn, ods_s, ods_t)
return result
@@ -879,13 +921,15 @@ def generate_report(
w()
w("### 2.1 汇总")
w()
w("| ODS 表 | API 记录数 | ODS 行数 | ODS 去重ID | 字段匹配 | API独有 | ODS独有 | 值差异 | 白名单 | 状态 |")
w("|--------|-----------|---------|-----------|---------|--------|--------|-------|--------|------|")
w("| ODS 表 | API 记录数 | ODS 行数 | ODS 去重ID | 数据截止 | 字段匹配 | API独有 | ODS独有 | 值差异 | 白名单 | 状态 |")
w("|--------|-----------|---------|-----------|---------|---------|--------|--------|-------|--------|------|")
for c in api_ods_checks:
fc = c["field_check"]
real_diffs = [d for d in c["value_diffs"] if not d.get("whitelist")]
wl_diffs = [d for d in c["value_diffs"] if d.get("whitelist")]
cutoff = c.get("data_cutoff") or ""
w(f"| `{c['ods_table']}` | {c['api_records']} | {c['ods_rows']} | {c['ods_distinct_ids']} "
f"| {cutoff} "
f"| {fc['matched']}/{fc['api_fields']} | {len(fc['api_only'])} | {len(fc['ods_only'])} "
f"| {len(real_diffs)} | {len(wl_diffs)} | {c['status']} |")
w()
@@ -943,14 +987,16 @@ def generate_report(
w()
w("### 3.1 汇总")
w()
w("| DWD 表 | EX 表 | ODS 表 | 类型 | ODS 行 | ODS 去重ID | DWD 行 | 共同列 | DWD独有 | 值差异 | 白名单 | 状态 |")
w("|--------|-------|--------|------|-------|-----------|-------|-------|--------|-------|--------|------|")
w("| DWD 表 | EX 表 | ODS 表 | 类型 | ODS 行 | ODS 去重ID | DWD 行 | 数据截止 | 共同列 | DWD独有 | 值差异 | 白名单 | 状态 |")
w("|--------|-------|--------|------|-------|-----------|-------|---------|-------|--------|-------|--------|------|")
for c in ods_dwd_checks:
real_diffs = [d for d in c["value_diffs"] if not d.get("whitelist")]
wl_diffs = [d for d in c["value_diffs"] if d.get("whitelist")]
ex_label = c.get("ex_table", "") or ""
cutoff = c.get("data_cutoff") or ""
w(f"| `{c['dwd_table']}` | `{ex_label}` | `{c['ods_table']}` | {c['type']} "
f"| {c['ods_rows']} | {c['ods_distinct_ids']} | {c['dwd_rows']} "
f"| {cutoff} "
f"| {len(c['common_cols'])} | {len(c['dwd_only_cols'])} "
f"| {len(real_diffs)} | {len(wl_diffs)} | {c['status']} |")
w()