This commit is contained in:
Neo
2026-02-04 21:39:01 +08:00
parent ee773a9b52
commit a3f4d04335
148 changed files with 31455 additions and 182 deletions

View File

@@ -0,0 +1,95 @@
# -*- coding: utf-8 -*-
"""检查新添加字段的数据完整性"""
import psycopg2
DSN = 'postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test'
# 新添加的字段列表
NEW_FIELDS = {
# DWD 主表
'billiards_dwd.dwd_settlement_head': [
'electricity_money', 'real_electricity_money', 'electricity_adjust_money',
'pl_coupon_sale_amount', 'mervou_sales_amount'
],
'billiards_dwd.dwd_table_fee_log': ['activity_discount_amount', 'real_service_money'],
'billiards_dwd.dwd_table_fee_adjust': ['table_name', 'table_price', 'charge_free'],
'billiards_dwd.dim_member': ['pay_money_sum', 'recharge_money_sum'],
'billiards_dwd.dim_member_card_account': ['principal_balance', 'member_grade'],
'billiards_dwd.dim_store_goods': ['commodity_code', 'not_sale'],
'billiards_dwd.dim_table': ['order_id'],
'billiards_dwd.dim_tenant_goods': ['not_sale'],
'billiards_dwd.dim_groupbuy_package': ['sort', 'is_first_limit'],
'billiards_dwd.dwd_assistant_service_log': ['real_service_money'],
'billiards_dwd.dwd_assistant_trash_event': ['tenant_id'],
'billiards_dwd.dwd_groupbuy_redemption': ['member_discount_money', 'coupon_sale_id'],
'billiards_dwd.dwd_member_balance_change': ['principal_before', 'principal_after'],
'billiards_dwd.dwd_payment': ['tenant_id'],
'billiards_dwd.dwd_store_goods_sale': ['coupon_share_money'],
}
def check_field_data(conn, schema_table, fields):
"""检查字段的数据情况"""
results = []
schema, table = schema_table.split('.')
cur = conn.cursor()
# 获取总行数
cur.execute(f"SELECT COUNT(*) FROM {schema_table}")
total_rows = cur.fetchone()[0]
for field in fields:
try:
# 非空计数
cur.execute(f"SELECT COUNT(*) FROM {schema_table} WHERE {field} IS NOT NULL")
non_null_count = cur.fetchone()[0]
# 非空非零计数(对于数值类型)
cur.execute(f"""
SELECT COUNT(*) FROM {schema_table}
WHERE {field} IS NOT NULL
AND CAST({field} AS TEXT) NOT IN ('0', '0.00', '0.0', '')
""")
non_zero_count = cur.fetchone()[0]
results.append({
'field': field,
'total': total_rows,
'non_null': non_null_count,
'non_zero': non_zero_count,
'fill_rate': f"{non_null_count/total_rows*100:.1f}%" if total_rows > 0 else "N/A"
})
except Exception as e:
results.append({
'field': field,
'error': str(e)[:50]
})
cur.close()
return results
def main():
conn = psycopg2.connect(DSN)
print("=" * 90)
print("New Fields Data Completeness Check")
print("=" * 90)
for table, fields in NEW_FIELDS.items():
print(f"\n### {table} ###\n")
results = check_field_data(conn, table, fields)
print(f"{'Field':<30} {'Total':>8} {'Non-Null':>10} {'Non-Zero':>10} {'Fill Rate':>10}")
print("-" * 70)
for r in results:
if 'error' in r:
print(f"{r['field']:<30} ERROR: {r['error']}")
else:
print(f"{r['field']:<30} {r['total']:>8} {r['non_null']:>10} {r['non_zero']:>10} {r['fill_rate']:>10}")
conn.close()
print("\n" + "=" * 90)
if __name__ == '__main__':
main()