# -*- coding: utf-8 -*- """检查新添加字段的数据完整性""" import psycopg2 DSN = 'postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test' # 新添加的字段列表 NEW_FIELDS = { # DWD 主表 'billiards_dwd.dwd_settlement_head': [ 'electricity_money', 'real_electricity_money', 'electricity_adjust_money', 'pl_coupon_sale_amount', 'mervou_sales_amount' ], 'billiards_dwd.dwd_table_fee_log': ['activity_discount_amount', 'real_service_money'], 'billiards_dwd.dwd_table_fee_adjust': ['table_name', 'table_price', 'charge_free'], 'billiards_dwd.dim_member': ['pay_money_sum', 'recharge_money_sum'], 'billiards_dwd.dim_member_card_account': ['principal_balance', 'member_grade'], 'billiards_dwd.dim_store_goods': ['commodity_code', 'not_sale'], 'billiards_dwd.dim_table': ['order_id'], 'billiards_dwd.dim_tenant_goods': ['not_sale'], 'billiards_dwd.dim_groupbuy_package': ['sort', 'is_first_limit'], 'billiards_dwd.dwd_assistant_service_log': ['real_service_money'], 'billiards_dwd.dwd_assistant_trash_event': ['tenant_id'], 'billiards_dwd.dwd_groupbuy_redemption': ['member_discount_money', 'coupon_sale_id'], 'billiards_dwd.dwd_member_balance_change': ['principal_before', 'principal_after'], 'billiards_dwd.dwd_payment': ['tenant_id'], 'billiards_dwd.dwd_store_goods_sale': ['coupon_share_money'], } def check_field_data(conn, schema_table, fields): """检查字段的数据情况""" results = [] schema, table = schema_table.split('.') cur = conn.cursor() # 获取总行数 cur.execute(f"SELECT COUNT(*) FROM {schema_table}") total_rows = cur.fetchone()[0] for field in fields: try: # 非空计数 cur.execute(f"SELECT COUNT(*) FROM {schema_table} WHERE {field} IS NOT NULL") non_null_count = cur.fetchone()[0] # 非空非零计数(对于数值类型) cur.execute(f""" SELECT COUNT(*) FROM {schema_table} WHERE {field} IS NOT NULL AND CAST({field} AS TEXT) NOT IN ('0', '0.00', '0.0', '') """) non_zero_count = cur.fetchone()[0] results.append({ 'field': field, 'total': total_rows, 'non_null': non_null_count, 'non_zero': non_zero_count, 'fill_rate': f"{non_null_count/total_rows*100:.1f}%" if total_rows > 0 else "N/A" }) except Exception as e: results.append({ 'field': field, 'error': str(e)[:50] }) cur.close() return results def main(): conn = psycopg2.connect(DSN) print("=" * 90) print("New Fields Data Completeness Check") print("=" * 90) for table, fields in NEW_FIELDS.items(): print(f"\n### {table} ###\n") results = check_field_data(conn, table, fields) print(f"{'Field':<30} {'Total':>8} {'Non-Null':>10} {'Non-Zero':>10} {'Fill Rate':>10}") print("-" * 70) for r in results: if 'error' in r: print(f"{r['field']:<30} ERROR: {r['error']}") else: print(f"{r['field']:<30} {r['total']:>8} {r['non_null']:>10} {r['non_zero']:>10} {r['fill_rate']:>10}") conn.close() print("\n" + "=" * 90) if __name__ == '__main__': main()