Updata2
This commit is contained in:
95
tmp/check_new_fields_data.py
Normal file
95
tmp/check_new_fields_data.py
Normal file
@@ -0,0 +1,95 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""检查新添加字段的数据完整性"""
|
||||
import psycopg2
|
||||
|
||||
DSN = 'postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test'
|
||||
|
||||
# 新添加的字段列表
|
||||
NEW_FIELDS = {
|
||||
# DWD 主表
|
||||
'billiards_dwd.dwd_settlement_head': [
|
||||
'electricity_money', 'real_electricity_money', 'electricity_adjust_money',
|
||||
'pl_coupon_sale_amount', 'mervou_sales_amount'
|
||||
],
|
||||
'billiards_dwd.dwd_table_fee_log': ['activity_discount_amount', 'real_service_money'],
|
||||
'billiards_dwd.dwd_table_fee_adjust': ['table_name', 'table_price', 'charge_free'],
|
||||
'billiards_dwd.dim_member': ['pay_money_sum', 'recharge_money_sum'],
|
||||
'billiards_dwd.dim_member_card_account': ['principal_balance', 'member_grade'],
|
||||
'billiards_dwd.dim_store_goods': ['commodity_code', 'not_sale'],
|
||||
'billiards_dwd.dim_table': ['order_id'],
|
||||
'billiards_dwd.dim_tenant_goods': ['not_sale'],
|
||||
'billiards_dwd.dim_groupbuy_package': ['sort', 'is_first_limit'],
|
||||
'billiards_dwd.dwd_assistant_service_log': ['real_service_money'],
|
||||
'billiards_dwd.dwd_assistant_trash_event': ['tenant_id'],
|
||||
'billiards_dwd.dwd_groupbuy_redemption': ['member_discount_money', 'coupon_sale_id'],
|
||||
'billiards_dwd.dwd_member_balance_change': ['principal_before', 'principal_after'],
|
||||
'billiards_dwd.dwd_payment': ['tenant_id'],
|
||||
'billiards_dwd.dwd_store_goods_sale': ['coupon_share_money'],
|
||||
}
|
||||
|
||||
def check_field_data(conn, schema_table, fields):
|
||||
"""检查字段的数据情况"""
|
||||
results = []
|
||||
schema, table = schema_table.split('.')
|
||||
|
||||
cur = conn.cursor()
|
||||
|
||||
# 获取总行数
|
||||
cur.execute(f"SELECT COUNT(*) FROM {schema_table}")
|
||||
total_rows = cur.fetchone()[0]
|
||||
|
||||
for field in fields:
|
||||
try:
|
||||
# 非空计数
|
||||
cur.execute(f"SELECT COUNT(*) FROM {schema_table} WHERE {field} IS NOT NULL")
|
||||
non_null_count = cur.fetchone()[0]
|
||||
|
||||
# 非空非零计数(对于数值类型)
|
||||
cur.execute(f"""
|
||||
SELECT COUNT(*) FROM {schema_table}
|
||||
WHERE {field} IS NOT NULL
|
||||
AND CAST({field} AS TEXT) NOT IN ('0', '0.00', '0.0', '')
|
||||
""")
|
||||
non_zero_count = cur.fetchone()[0]
|
||||
|
||||
results.append({
|
||||
'field': field,
|
||||
'total': total_rows,
|
||||
'non_null': non_null_count,
|
||||
'non_zero': non_zero_count,
|
||||
'fill_rate': f"{non_null_count/total_rows*100:.1f}%" if total_rows > 0 else "N/A"
|
||||
})
|
||||
except Exception as e:
|
||||
results.append({
|
||||
'field': field,
|
||||
'error': str(e)[:50]
|
||||
})
|
||||
|
||||
cur.close()
|
||||
return results
|
||||
|
||||
def main():
|
||||
conn = psycopg2.connect(DSN)
|
||||
|
||||
print("=" * 90)
|
||||
print("New Fields Data Completeness Check")
|
||||
print("=" * 90)
|
||||
|
||||
for table, fields in NEW_FIELDS.items():
|
||||
print(f"\n### {table} ###\n")
|
||||
results = check_field_data(conn, table, fields)
|
||||
|
||||
print(f"{'Field':<30} {'Total':>8} {'Non-Null':>10} {'Non-Zero':>10} {'Fill Rate':>10}")
|
||||
print("-" * 70)
|
||||
|
||||
for r in results:
|
||||
if 'error' in r:
|
||||
print(f"{r['field']:<30} ERROR: {r['error']}")
|
||||
else:
|
||||
print(f"{r['field']:<30} {r['total']:>8} {r['non_null']:>10} {r['non_zero']:>10} {r['fill_rate']:>10}")
|
||||
|
||||
conn.close()
|
||||
print("\n" + "=" * 90)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user