# -*- coding: utf-8 -*- """查询 ODS schema 的表、索引、关键列现状,输出到控制台。""" import os import sys from pathlib import Path from dotenv import load_dotenv import psycopg2 # 加载 ETL .env env_path = Path(__file__).resolve().parents[2] / "apps" / "etl" / "pipelines" / "feiqiu" / ".env" load_dotenv(env_path) dsn = os.environ.get("PG_DSN") if not dsn: print("ERROR: PG_DSN 未配置") sys.exit(1) conn = psycopg2.connect(dsn, connect_timeout=10) cur = conn.cursor() print("=" * 80) print("1. ODS 所有表及行数") print("=" * 80) cur.execute(""" SELECT t.tablename, pg_stat_user_tables.n_live_tup AS approx_rows FROM pg_tables t LEFT JOIN pg_stat_user_tables ON pg_stat_user_tables.schemaname = t.schemaname AND pg_stat_user_tables.relname = t.tablename WHERE t.schemaname = 'ods' ORDER BY t.tablename """) tables = cur.fetchall() for tbl, rows in tables: print(f" {tbl:50s} ~{rows or 0} rows") print(f"\n 共 {len(tables)} 张表") print("\n" + "=" * 80) print("2. ODS 所有索引") print("=" * 80) cur.execute(""" SELECT tablename, indexname, indexdef FROM pg_indexes WHERE schemaname = 'ods' ORDER BY tablename, indexname """) indexes = cur.fetchall() for tbl, idx_name, idx_def in indexes: print(f" [{tbl}] {idx_name}") print(f" {idx_def}") print(f"\n 共 {len(indexes)} 个索引") print("\n" + "=" * 80) print("3. 各表是否有 id / fetched_at / is_delete / content_hash 列") print("=" * 80) cur.execute(""" SELECT table_name, column_name FROM information_schema.columns WHERE table_schema = 'ods' AND column_name IN ('id', 'fetched_at', 'is_delete', 'content_hash') ORDER BY table_name, column_name """) col_rows = cur.fetchall() # 按表聚合 from collections import defaultdict col_map = defaultdict(set) for tbl, col in col_rows: col_map[tbl].add(col) check_cols = ['id', 'fetched_at', 'is_delete', 'content_hash'] print(f" {'表名':50s} {'id':5s} {'fetched_at':12s} {'is_delete':10s} {'content_hash':13s}") print(f" {'-'*50} {'-'*5} {'-'*12} {'-'*10} {'-'*13}") for tbl, _ in tables: cols = col_map.get(tbl, set()) flags = [('✓' if c in cols else '✗') for c in check_cols] print(f" {tbl:50s} {flags[0]:5s} {flags[1]:12s} {flags[2]:10s} {flags[3]:13s}") print("\n" + "=" * 80) print("4. 各表主键定义") print("=" * 80) cur.execute(""" SELECT tc.table_name, string_agg(kcu.column_name, ', ' ORDER BY kcu.ordinal_position) AS pk_cols FROM information_schema.table_constraints tc JOIN information_schema.key_column_usage kcu ON tc.constraint_name = kcu.constraint_name AND tc.table_schema = kcu.table_schema WHERE tc.table_schema = 'ods' AND tc.constraint_type = 'PRIMARY KEY' GROUP BY tc.table_name ORDER BY tc.table_name """) pk_rows = cur.fetchall() for tbl, pk_cols in pk_rows: print(f" {tbl:50s} PK: ({pk_cols})") cur.close() conn.close() print("\n完成。")