微信小程序页面迁移校验之前 P5任务处理之前

This commit is contained in:
Neo
2026-03-09 01:19:21 +08:00
parent 263bf96035
commit 6e20987d2f
1112 changed files with 153824 additions and 219694 deletions

View File

@@ -71,7 +71,6 @@ class ConsistencyReport:
ODS_TABLE_TO_JSON_FILE: Dict[str, str] = {
"assistant_accounts_master": "assistant_accounts_master.json",
"assistant_service_records": "assistant_service_records.json",
"assistant_cancellation_records": "assistant_cancellation_records.json",
"member_profiles": "member_profiles.json",
"member_stored_value_cards": "member_stored_value_cards.json",
"member_balance_changes": "member_balance_changes.json",
@@ -93,6 +92,35 @@ ODS_TABLE_TO_JSON_FILE: Dict[str, str] = {
"stock_goods_category_tree": "stock_goods_category_tree.json",
}
# CHANGE 2026-02-26 | ODS 表名 → task_code 映射,用于从 FETCH_ROOT 定位分页 JSON
# FETCH_ROOT 目录结构:{task_code}/{task_code}-{run_id}-{date}-{time}/{ods_table}.json
ODS_TABLE_TO_TASK_CODE: Dict[str, str] = {
"assistant_accounts_master": "ODS_ASSISTANT_ACCOUNT",
"assistant_service_records": "ODS_ASSISTANT_LEDGER",
"member_profiles": "ODS_MEMBER",
"member_stored_value_cards": "ODS_MEMBER_CARD",
"member_balance_changes": "ODS_MEMBER_BALANCE",
"recharge_settlements": "ODS_RECHARGE_SETTLE",
"settlement_records": "ODS_SETTLEMENT_RECORDS",
"table_fee_transactions": "ODS_TABLE_USE",
"table_fee_discount_records": "ODS_TABLE_FEE_DISCOUNT",
"store_goods_sales_records": "ODS_STORE_GOODS_SALES",
"store_goods_master": "ODS_STORE_GOODS",
"tenant_goods_master": "ODS_TENANT_GOODS",
"site_tables_master": "ODS_TABLES",
"group_buy_packages": "ODS_GROUP_PACKAGE",
"group_buy_redemption_records": "ODS_GROUP_BUY_REDEMPTION",
"platform_coupon_redemption_records": "ODS_PLATFORM_COUPON",
"payment_transactions": "ODS_PAYMENT",
"refund_transactions": "ODS_REFUND",
"goods_stock_summary": "ODS_INVENTORY_STOCK",
"goods_stock_movements": "ODS_INVENTORY_CHANGE",
"stock_goods_category_tree": "ODS_GOODS_CATEGORY",
"staff_info_master": "ODS_STAFF_INFO",
"settlement_ticket_records": "ODS_SETTLEMENT_TICKET",
"json_archive_records": "ODS_JSON_ARCHIVE",
}
# ODS 元数据列——不来自 API由 ETL 框架自动填充
ODS_META_COLUMNS = frozenset({
"payload", "source_file", "source_endpoint",
@@ -145,6 +173,86 @@ def _extract_records(data: Any) -> list[dict]:
return []
def extract_api_fields_from_fetch_root(
fetch_root: Path,
ods_table: str,
) -> set[str] | None:
"""从 FETCH_ROOT 分页 JSON 中提取 API 原始字段名。
CHANGE 2026-02-26 | 替代 extract_api_fields_from_json 的 API_SAMPLE_CACHE_ROOT 依赖,
直接读取 ETL 实际抓取的分页 JSON无需额外手动生成缓存。
目录结构FETCH_ROOT/{task_code}/{task_code}-{run_id}-{date}-{time}/{ods_table}.json
分页 JSON 结构:{ "pages": [{ "response": { "data": { "{listKey}": [...] } } }] }
"""
task_code = ODS_TABLE_TO_TASK_CODE.get(ods_table)
if not task_code:
return None
task_dir = fetch_root / task_code
if not task_dir.is_dir():
return None
# 取最新 run 目录(按目录名排序,格式含时间戳)
run_dirs = sorted(
(d for d in task_dir.iterdir() if d.is_dir()),
key=lambda d: d.name,
reverse=True,
)
if not run_dirs:
return None
# 在最新 run 目录中查找 {ods_table}.json
json_file = run_dirs[0] / f"{ods_table}.json"
if not json_file.exists():
return None
try:
with json_file.open("r", encoding="utf-8") as f:
data = json.load(f)
except (json.JSONDecodeError, OSError):
return None
records = _extract_records_from_paged_json(data)
if not records:
return None
all_fields: set[str] = set()
for rec in records[:10]:
if isinstance(rec, dict):
all_fields.update(rec.keys())
return all_fields
def _extract_records_from_paged_json(data: Any) -> list[dict]:
"""从 ETL 分页 JSON 中提取业务记录。
分页 JSON 格式:
{ "pages": [{ "response": { "data": { "{listKey}": [record, ...] } } }] }
也兼容 gen_full_dataflow_doc 的扁平缓存格式(直接列表 / {"data": [...]})。
"""
if not isinstance(data, dict):
return _extract_records(data)
pages = data.get("pages")
if not isinstance(pages, list) or not pages:
# 回退到扁平格式
return _extract_records(data)
# 从第一个有数据的 page 中提取记录
for page in pages:
if not isinstance(page, dict):
continue
response = page.get("response")
if not isinstance(response, dict):
continue
records = _extract_records(response)
if records:
return records
return []
def check_api_vs_ods_fields(
api_fields: set[str],
ods_columns: set[str],
@@ -494,6 +602,7 @@ def run_consistency_check(
db_conn,
*,
api_sample_dir: Path | None = None,
fetch_root: Path | None = None,
include_api_vs_ods: bool = True,
include_ods_vs_dwd: bool = True,
sample_limit: int = 5,
@@ -504,7 +613,8 @@ def run_consistency_check(
参数:
db_conn: 数据库连接对象(需有 .conn 属性返回 psycopg2 connection
api_sample_dir: API JSON 缓存目录(用于 API vs ODS 检查
api_sample_dir: API JSON 缓存目录(旧方式,兼容保留
fetch_root: FETCH_ROOT 目录(优先使用,从 ETL 实际抓取的分页 JSON 提取字段)
include_api_vs_ods: 是否执行 API vs ODS 检查
include_ods_vs_dwd: 是否执行 ODS vs DWD 检查
sample_limit: 值不一致时的采样行数
@@ -519,16 +629,28 @@ def run_consistency_check(
with db_conn.conn.cursor() as cur:
# --- 1. API vs ODS 字段完整性检查 ---
if include_api_vs_ods and api_sample_dir:
# CHANGE 2026-02-26 | 优先从 FETCH_ROOT 读取实际抓取数据,回退到 api_sample_dir 缓存
if include_api_vs_ods and (fetch_root or api_sample_dir):
for ods_table, json_file in sorted(ODS_TABLE_TO_JSON_FILE.items()):
json_path = api_sample_dir / json_file
api_fields = extract_api_fields_from_json(json_path)
# 优先尝试 FETCH_ROOTETL 实际抓取的分页 JSON
api_fields = None
source_hint = ""
if fetch_root:
api_fields = extract_api_fields_from_fetch_root(fetch_root, ods_table)
source_hint = "FETCH_ROOT"
# 回退到 api_sample_dirgen_full_dataflow_doc 缓存)
if api_fields is None and api_sample_dir:
json_path = api_sample_dir / json_file
api_fields = extract_api_fields_from_json(json_path)
source_hint = "API_SAMPLE_CACHE"
if api_fields is None:
result = TableCheckResult(
table_name=f"ods.{ods_table}",
check_type="api_vs_ods",
passed=True, # 无 JSON 缓存时跳过,不算失败
error=f"API JSON 缓存不存在: {json_file}",
passed=True, # 无 JSON 数据时跳过,不算失败
error=f"无可用 JSON 数据FETCH_ROOT 和 API 缓存均未找到)",
)
report.api_vs_ods_results.append(result)
continue