Updata2
This commit is contained in:
237
tmp/full_reload_validation.py
Normal file
237
tmp/full_reload_validation.py
Normal file
@@ -0,0 +1,237 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
全量数据回写验证脚本
|
||||
从 2025-07-01 到现在,重新获取 API 数据并入库
|
||||
"""
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
|
||||
# 添加项目路径
|
||||
project_root = Path(__file__).parent.parent / "etl_billiards"
|
||||
sys.path.insert(0, str(project_root))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(project_root / ".env")
|
||||
|
||||
from database.connection import DatabaseConnection
|
||||
|
||||
|
||||
def check_ods_field_coverage(db: DatabaseConnection):
|
||||
"""检查 ODS 表中新增字段的数据覆盖情况"""
|
||||
|
||||
# 需要检查的新增字段
|
||||
fields_to_check = [
|
||||
("billiards_ods.table_fee_transactions", ["activity_discount_amount", "real_service_money", "order_consumption_type"]),
|
||||
("billiards_ods.assistant_service_records", ["real_service_money", "assistantteamname"]),
|
||||
("billiards_ods.assistant_cancellation_records", ["tenant_id"]),
|
||||
("billiards_ods.store_goods_sales_records", ["coupon_share_money"]),
|
||||
("billiards_ods.payment_transactions", ["tenant_id"]),
|
||||
("billiards_ods.member_profiles", ["pay_money_sum", "person_tenant_org_id", "recharge_money_sum", "register_source"]),
|
||||
("billiards_ods.member_stored_value_cards", ["principal_balance", "member_grade", "rechargefreezebalance"]),
|
||||
("billiards_ods.member_balance_changes", ["principal_after", "principal_before", "principal_data"]),
|
||||
("billiards_ods.settlement_records", ["tenant_id"]),
|
||||
("billiards_ods.recharge_settlements", ["tenant_id"]),
|
||||
("billiards_ods.group_buy_packages", ["sort", "is_first_limit", "tenantcouponsaleorderitemid"]),
|
||||
("billiards_ods.group_buy_redemption_records", ["coupon_sale_id", "member_discount_money"]),
|
||||
("billiards_ods.site_tables_master", ["order_id"]),
|
||||
("billiards_ods.store_goods_master", ["commodity_code", "not_sale"]),
|
||||
("billiards_ods.table_fee_discount_records", ["table_name", "table_price", "charge_free"]),
|
||||
("billiards_ods.tenant_goods_master", ["not_sale"]),
|
||||
]
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print("ODS 新增字段数据覆盖检查")
|
||||
print("=" * 80)
|
||||
|
||||
results = []
|
||||
|
||||
for table, columns in fields_to_check:
|
||||
print(f"\n检查表: {table}")
|
||||
|
||||
# 获取总记录数
|
||||
try:
|
||||
total_rows = db.query(f"SELECT COUNT(*) as cnt FROM {table}")[0]["cnt"]
|
||||
except Exception as e:
|
||||
print(f" [错误] 无法获取记录数: {e}")
|
||||
continue
|
||||
|
||||
for col in columns:
|
||||
try:
|
||||
# 检查列是否存在
|
||||
schema, name = table.split(".", 1)
|
||||
col_check = db.query("""
|
||||
SELECT COUNT(*) as cnt FROM information_schema.columns
|
||||
WHERE table_schema = %s AND table_name = %s AND column_name = %s
|
||||
""", (schema, name, col.lower()))
|
||||
|
||||
if col_check[0]["cnt"] == 0:
|
||||
print(f" 列 {col}: [不存在]")
|
||||
continue
|
||||
|
||||
# 统计非空值数量
|
||||
non_null_rows = db.query(f'SELECT COUNT(*) as cnt FROM {table} WHERE "{col}" IS NOT NULL')[0]["cnt"]
|
||||
zero_rows = db.query(f'SELECT COUNT(*) as cnt FROM {table} WHERE "{col}" = 0')[0]["cnt"]
|
||||
|
||||
coverage = (non_null_rows / total_rows * 100) if total_rows > 0 else 0
|
||||
|
||||
print(f" 列 {col}:")
|
||||
print(f" - 总记录: {total_rows}, 非空: {non_null_rows} ({coverage:.1f}%), 值为0: {zero_rows}")
|
||||
|
||||
results.append({
|
||||
"table": table,
|
||||
"column": col,
|
||||
"total": total_rows,
|
||||
"non_null": non_null_rows,
|
||||
"coverage": coverage,
|
||||
"zero_count": zero_rows,
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
print(f" 列 {col}: [错误] {e}")
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def check_dwd_field_coverage(db: DatabaseConnection):
|
||||
"""检查 DWD 表中新增字段的数据覆盖情况"""
|
||||
|
||||
# 需要检查的新增字段
|
||||
fields_to_check = [
|
||||
("billiards_dwd.dwd_table_fee_log", ["activity_discount_amount", "real_service_money"]),
|
||||
("billiards_dwd.dwd_assistant_service_log", ["real_service_money"]),
|
||||
("billiards_dwd.dwd_assistant_trash_event", ["tenant_id"]),
|
||||
("billiards_dwd.dwd_store_goods_sale", ["coupon_share_money"]),
|
||||
("billiards_dwd.dwd_payment", ["tenant_id"]),
|
||||
("billiards_dwd.dim_member", ["pay_money_sum", "recharge_money_sum"]),
|
||||
("billiards_dwd.dim_member_ex", ["person_tenant_org_id", "register_source"]),
|
||||
("billiards_dwd.dim_member_card_account", ["principal_balance", "member_grade"]),
|
||||
("billiards_dwd.dwd_member_balance_change", ["principal_after", "principal_before", "principal_change_amount"]),
|
||||
("billiards_dwd.dwd_settlement_head", ["tenant_id"]),
|
||||
("billiards_dwd.dwd_recharge_order", ["tenant_id"]),
|
||||
("billiards_dwd.dim_groupbuy_package", ["sort", "is_first_limit"]),
|
||||
("billiards_dwd.dwd_groupbuy_redemption", ["coupon_sale_id", "member_discount_money"]),
|
||||
("billiards_dwd.dim_table", ["order_id"]),
|
||||
("billiards_dwd.dim_store_goods", ["commodity_code", "not_sale"]),
|
||||
("billiards_dwd.dwd_table_fee_adjust", ["table_name", "table_price", "charge_free"]),
|
||||
("billiards_dwd.dim_tenant_goods", ["not_sale"]),
|
||||
]
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print("DWD 新增字段数据覆盖检查")
|
||||
print("=" * 80)
|
||||
|
||||
results = []
|
||||
|
||||
for table, columns in fields_to_check:
|
||||
print(f"\n检查表: {table}")
|
||||
|
||||
# 获取总记录数
|
||||
try:
|
||||
total_rows = db.query(f"SELECT COUNT(*) as cnt FROM {table}")[0]["cnt"]
|
||||
except Exception as e:
|
||||
print(f" [错误] 无法获取记录数: {e}")
|
||||
continue
|
||||
|
||||
for col in columns:
|
||||
try:
|
||||
# 检查列是否存在
|
||||
schema, name = table.split(".", 1)
|
||||
col_check = db.query("""
|
||||
SELECT COUNT(*) as cnt FROM information_schema.columns
|
||||
WHERE table_schema = %s AND table_name = %s AND column_name = %s
|
||||
""", (schema, name, col.lower()))
|
||||
|
||||
if col_check[0]["cnt"] == 0:
|
||||
print(f" 列 {col}: [不存在]")
|
||||
continue
|
||||
|
||||
# 统计非空值数量
|
||||
non_null_rows = db.query(f'SELECT COUNT(*) as cnt FROM {table} WHERE "{col}" IS NOT NULL')[0]["cnt"]
|
||||
|
||||
coverage = (non_null_rows / total_rows * 100) if total_rows > 0 else 0
|
||||
|
||||
print(f" 列 {col}: 总记录: {total_rows}, 非空: {non_null_rows} ({coverage:.1f}%)")
|
||||
|
||||
results.append({
|
||||
"table": table,
|
||||
"column": col,
|
||||
"total": total_rows,
|
||||
"non_null": non_null_rows,
|
||||
"coverage": coverage,
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
print(f" 列 {col}: [错误] {e}")
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def main():
|
||||
print("=" * 80)
|
||||
print("全量数据回写验证")
|
||||
print("时间:", datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
|
||||
print("=" * 80)
|
||||
|
||||
# 连接数据库
|
||||
dsn = os.getenv("PG_DSN")
|
||||
if not dsn:
|
||||
print("[错误] 未找到 PG_DSN 环境变量")
|
||||
return False
|
||||
|
||||
db = DatabaseConnection(dsn)
|
||||
|
||||
# 检查 ODS 字段覆盖
|
||||
ods_results = check_ods_field_coverage(db)
|
||||
|
||||
# 检查 DWD 字段覆盖
|
||||
dwd_results = check_dwd_field_coverage(db)
|
||||
|
||||
db.close()
|
||||
|
||||
# 生成汇总
|
||||
print("\n" + "=" * 80)
|
||||
print("汇总")
|
||||
print("=" * 80)
|
||||
|
||||
print("\nODS 新增字段覆盖率统计:")
|
||||
for r in ods_results:
|
||||
if r["coverage"] < 50:
|
||||
status = "[需关注]"
|
||||
elif r["coverage"] < 80:
|
||||
status = "[一般]"
|
||||
else:
|
||||
status = "[良好]"
|
||||
print(f" {r['table']}.{r['column']}: {r['coverage']:.1f}% {status}")
|
||||
|
||||
print("\nDWD 新增字段覆盖率统计:")
|
||||
for r in dwd_results:
|
||||
if r["coverage"] < 50:
|
||||
status = "[需关注]"
|
||||
elif r["coverage"] < 80:
|
||||
status = "[一般]"
|
||||
else:
|
||||
status = "[良好]"
|
||||
print(f" {r['table']}.{r['column']}: {r['coverage']:.1f}% {status}")
|
||||
|
||||
# 保存报告
|
||||
report = {
|
||||
"generated_at": datetime.now().isoformat(),
|
||||
"ods_coverage": ods_results,
|
||||
"dwd_coverage": dwd_results,
|
||||
}
|
||||
|
||||
report_file = Path(__file__).parent / "field_coverage_report.json"
|
||||
with open(report_file, "w", encoding="utf-8") as f:
|
||||
json.dump(report, f, ensure_ascii=False, indent=2)
|
||||
|
||||
print(f"\n报告已保存到: {report_file}")
|
||||
|
||||
return True
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
success = main()
|
||||
sys.exit(0 if success else 1)
|
||||
Reference in New Issue
Block a user