微信小程序页面迁移校验之前 P5任务处理之前
This commit is contained in:
@@ -71,7 +71,6 @@ class ConsistencyReport:
|
||||
ODS_TABLE_TO_JSON_FILE: Dict[str, str] = {
|
||||
"assistant_accounts_master": "assistant_accounts_master.json",
|
||||
"assistant_service_records": "assistant_service_records.json",
|
||||
"assistant_cancellation_records": "assistant_cancellation_records.json",
|
||||
"member_profiles": "member_profiles.json",
|
||||
"member_stored_value_cards": "member_stored_value_cards.json",
|
||||
"member_balance_changes": "member_balance_changes.json",
|
||||
@@ -93,6 +92,35 @@ ODS_TABLE_TO_JSON_FILE: Dict[str, str] = {
|
||||
"stock_goods_category_tree": "stock_goods_category_tree.json",
|
||||
}
|
||||
|
||||
# CHANGE 2026-02-26 | ODS 表名 → task_code 映射,用于从 FETCH_ROOT 定位分页 JSON
|
||||
# FETCH_ROOT 目录结构:{task_code}/{task_code}-{run_id}-{date}-{time}/{ods_table}.json
|
||||
ODS_TABLE_TO_TASK_CODE: Dict[str, str] = {
|
||||
"assistant_accounts_master": "ODS_ASSISTANT_ACCOUNT",
|
||||
"assistant_service_records": "ODS_ASSISTANT_LEDGER",
|
||||
"member_profiles": "ODS_MEMBER",
|
||||
"member_stored_value_cards": "ODS_MEMBER_CARD",
|
||||
"member_balance_changes": "ODS_MEMBER_BALANCE",
|
||||
"recharge_settlements": "ODS_RECHARGE_SETTLE",
|
||||
"settlement_records": "ODS_SETTLEMENT_RECORDS",
|
||||
"table_fee_transactions": "ODS_TABLE_USE",
|
||||
"table_fee_discount_records": "ODS_TABLE_FEE_DISCOUNT",
|
||||
"store_goods_sales_records": "ODS_STORE_GOODS_SALES",
|
||||
"store_goods_master": "ODS_STORE_GOODS",
|
||||
"tenant_goods_master": "ODS_TENANT_GOODS",
|
||||
"site_tables_master": "ODS_TABLES",
|
||||
"group_buy_packages": "ODS_GROUP_PACKAGE",
|
||||
"group_buy_redemption_records": "ODS_GROUP_BUY_REDEMPTION",
|
||||
"platform_coupon_redemption_records": "ODS_PLATFORM_COUPON",
|
||||
"payment_transactions": "ODS_PAYMENT",
|
||||
"refund_transactions": "ODS_REFUND",
|
||||
"goods_stock_summary": "ODS_INVENTORY_STOCK",
|
||||
"goods_stock_movements": "ODS_INVENTORY_CHANGE",
|
||||
"stock_goods_category_tree": "ODS_GOODS_CATEGORY",
|
||||
"staff_info_master": "ODS_STAFF_INFO",
|
||||
"settlement_ticket_records": "ODS_SETTLEMENT_TICKET",
|
||||
"json_archive_records": "ODS_JSON_ARCHIVE",
|
||||
}
|
||||
|
||||
# ODS 元数据列——不来自 API,由 ETL 框架自动填充
|
||||
ODS_META_COLUMNS = frozenset({
|
||||
"payload", "source_file", "source_endpoint",
|
||||
@@ -145,6 +173,86 @@ def _extract_records(data: Any) -> list[dict]:
|
||||
return []
|
||||
|
||||
|
||||
def extract_api_fields_from_fetch_root(
|
||||
fetch_root: Path,
|
||||
ods_table: str,
|
||||
) -> set[str] | None:
|
||||
"""从 FETCH_ROOT 分页 JSON 中提取 API 原始字段名。
|
||||
|
||||
CHANGE 2026-02-26 | 替代 extract_api_fields_from_json 的 API_SAMPLE_CACHE_ROOT 依赖,
|
||||
直接读取 ETL 实际抓取的分页 JSON,无需额外手动生成缓存。
|
||||
|
||||
目录结构:FETCH_ROOT/{task_code}/{task_code}-{run_id}-{date}-{time}/{ods_table}.json
|
||||
分页 JSON 结构:{ "pages": [{ "response": { "data": { "{listKey}": [...] } } }] }
|
||||
"""
|
||||
task_code = ODS_TABLE_TO_TASK_CODE.get(ods_table)
|
||||
if not task_code:
|
||||
return None
|
||||
|
||||
task_dir = fetch_root / task_code
|
||||
if not task_dir.is_dir():
|
||||
return None
|
||||
|
||||
# 取最新 run 目录(按目录名排序,格式含时间戳)
|
||||
run_dirs = sorted(
|
||||
(d for d in task_dir.iterdir() if d.is_dir()),
|
||||
key=lambda d: d.name,
|
||||
reverse=True,
|
||||
)
|
||||
if not run_dirs:
|
||||
return None
|
||||
|
||||
# 在最新 run 目录中查找 {ods_table}.json
|
||||
json_file = run_dirs[0] / f"{ods_table}.json"
|
||||
if not json_file.exists():
|
||||
return None
|
||||
|
||||
try:
|
||||
with json_file.open("r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
except (json.JSONDecodeError, OSError):
|
||||
return None
|
||||
|
||||
records = _extract_records_from_paged_json(data)
|
||||
if not records:
|
||||
return None
|
||||
|
||||
all_fields: set[str] = set()
|
||||
for rec in records[:10]:
|
||||
if isinstance(rec, dict):
|
||||
all_fields.update(rec.keys())
|
||||
return all_fields
|
||||
|
||||
|
||||
def _extract_records_from_paged_json(data: Any) -> list[dict]:
|
||||
"""从 ETL 分页 JSON 中提取业务记录。
|
||||
|
||||
分页 JSON 格式:
|
||||
{ "pages": [{ "response": { "data": { "{listKey}": [record, ...] } } }] }
|
||||
也兼容 gen_full_dataflow_doc 的扁平缓存格式(直接列表 / {"data": [...]})。
|
||||
"""
|
||||
if not isinstance(data, dict):
|
||||
return _extract_records(data)
|
||||
|
||||
pages = data.get("pages")
|
||||
if not isinstance(pages, list) or not pages:
|
||||
# 回退到扁平格式
|
||||
return _extract_records(data)
|
||||
|
||||
# 从第一个有数据的 page 中提取记录
|
||||
for page in pages:
|
||||
if not isinstance(page, dict):
|
||||
continue
|
||||
response = page.get("response")
|
||||
if not isinstance(response, dict):
|
||||
continue
|
||||
records = _extract_records(response)
|
||||
if records:
|
||||
return records
|
||||
|
||||
return []
|
||||
|
||||
|
||||
def check_api_vs_ods_fields(
|
||||
api_fields: set[str],
|
||||
ods_columns: set[str],
|
||||
@@ -494,6 +602,7 @@ def run_consistency_check(
|
||||
db_conn,
|
||||
*,
|
||||
api_sample_dir: Path | None = None,
|
||||
fetch_root: Path | None = None,
|
||||
include_api_vs_ods: bool = True,
|
||||
include_ods_vs_dwd: bool = True,
|
||||
sample_limit: int = 5,
|
||||
@@ -504,7 +613,8 @@ def run_consistency_check(
|
||||
|
||||
参数:
|
||||
db_conn: 数据库连接对象(需有 .conn 属性返回 psycopg2 connection)
|
||||
api_sample_dir: API JSON 缓存目录(用于 API vs ODS 检查)
|
||||
api_sample_dir: API JSON 缓存目录(旧方式,兼容保留)
|
||||
fetch_root: FETCH_ROOT 目录(优先使用,从 ETL 实际抓取的分页 JSON 提取字段)
|
||||
include_api_vs_ods: 是否执行 API vs ODS 检查
|
||||
include_ods_vs_dwd: 是否执行 ODS vs DWD 检查
|
||||
sample_limit: 值不一致时的采样行数
|
||||
@@ -519,16 +629,28 @@ def run_consistency_check(
|
||||
|
||||
with db_conn.conn.cursor() as cur:
|
||||
# --- 1. API vs ODS 字段完整性检查 ---
|
||||
if include_api_vs_ods and api_sample_dir:
|
||||
# CHANGE 2026-02-26 | 优先从 FETCH_ROOT 读取实际抓取数据,回退到 api_sample_dir 缓存
|
||||
if include_api_vs_ods and (fetch_root or api_sample_dir):
|
||||
for ods_table, json_file in sorted(ODS_TABLE_TO_JSON_FILE.items()):
|
||||
json_path = api_sample_dir / json_file
|
||||
api_fields = extract_api_fields_from_json(json_path)
|
||||
# 优先尝试 FETCH_ROOT(ETL 实际抓取的分页 JSON)
|
||||
api_fields = None
|
||||
source_hint = ""
|
||||
if fetch_root:
|
||||
api_fields = extract_api_fields_from_fetch_root(fetch_root, ods_table)
|
||||
source_hint = "FETCH_ROOT"
|
||||
|
||||
# 回退到 api_sample_dir(gen_full_dataflow_doc 缓存)
|
||||
if api_fields is None and api_sample_dir:
|
||||
json_path = api_sample_dir / json_file
|
||||
api_fields = extract_api_fields_from_json(json_path)
|
||||
source_hint = "API_SAMPLE_CACHE"
|
||||
|
||||
if api_fields is None:
|
||||
result = TableCheckResult(
|
||||
table_name=f"ods.{ods_table}",
|
||||
check_type="api_vs_ods",
|
||||
passed=True, # 无 JSON 缓存时跳过,不算失败
|
||||
error=f"API JSON 缓存不存在: {json_file}",
|
||||
passed=True, # 无 JSON 数据时跳过,不算失败
|
||||
error=f"无可用 JSON 数据(FETCH_ROOT 和 API 缓存均未找到)",
|
||||
)
|
||||
report.api_vs_ods_results.append(result)
|
||||
continue
|
||||
|
||||
Reference in New Issue
Block a user