103 lines
3.0 KiB
Python
103 lines
3.0 KiB
Python
# -*- coding: utf-8 -*-
|
|
"""查询 ODS schema 的表、索引、关键列现状,输出到控制台。"""
|
|
import os
|
|
import sys
|
|
from pathlib import Path
|
|
from dotenv import load_dotenv
|
|
import psycopg2
|
|
|
|
# 加载 ETL .env
|
|
env_path = Path(__file__).resolve().parents[2] / "apps" / "etl" / "pipelines" / "feiqiu" / ".env"
|
|
load_dotenv(env_path)
|
|
|
|
dsn = os.environ.get("PG_DSN")
|
|
if not dsn:
|
|
print("ERROR: PG_DSN 未配置")
|
|
sys.exit(1)
|
|
|
|
conn = psycopg2.connect(dsn, connect_timeout=10)
|
|
cur = conn.cursor()
|
|
|
|
print("=" * 80)
|
|
print("1. ODS 所有表及行数")
|
|
print("=" * 80)
|
|
cur.execute("""
|
|
SELECT t.tablename,
|
|
pg_stat_user_tables.n_live_tup AS approx_rows
|
|
FROM pg_tables t
|
|
LEFT JOIN pg_stat_user_tables
|
|
ON pg_stat_user_tables.schemaname = t.schemaname
|
|
AND pg_stat_user_tables.relname = t.tablename
|
|
WHERE t.schemaname = 'ods'
|
|
ORDER BY t.tablename
|
|
""")
|
|
tables = cur.fetchall()
|
|
for tbl, rows in tables:
|
|
print(f" {tbl:50s} ~{rows or 0} rows")
|
|
|
|
print(f"\n 共 {len(tables)} 张表")
|
|
|
|
print("\n" + "=" * 80)
|
|
print("2. ODS 所有索引")
|
|
print("=" * 80)
|
|
cur.execute("""
|
|
SELECT tablename, indexname, indexdef
|
|
FROM pg_indexes
|
|
WHERE schemaname = 'ods'
|
|
ORDER BY tablename, indexname
|
|
""")
|
|
indexes = cur.fetchall()
|
|
for tbl, idx_name, idx_def in indexes:
|
|
print(f" [{tbl}] {idx_name}")
|
|
print(f" {idx_def}")
|
|
|
|
print(f"\n 共 {len(indexes)} 个索引")
|
|
|
|
print("\n" + "=" * 80)
|
|
print("3. 各表是否有 id / fetched_at / is_delete / content_hash 列")
|
|
print("=" * 80)
|
|
cur.execute("""
|
|
SELECT table_name, column_name
|
|
FROM information_schema.columns
|
|
WHERE table_schema = 'ods'
|
|
AND column_name IN ('id', 'fetched_at', 'is_delete', 'content_hash')
|
|
ORDER BY table_name, column_name
|
|
""")
|
|
col_rows = cur.fetchall()
|
|
# 按表聚合
|
|
from collections import defaultdict
|
|
col_map = defaultdict(set)
|
|
for tbl, col in col_rows:
|
|
col_map[tbl].add(col)
|
|
|
|
check_cols = ['id', 'fetched_at', 'is_delete', 'content_hash']
|
|
print(f" {'表名':50s} {'id':5s} {'fetched_at':12s} {'is_delete':10s} {'content_hash':13s}")
|
|
print(f" {'-'*50} {'-'*5} {'-'*12} {'-'*10} {'-'*13}")
|
|
for tbl, _ in tables:
|
|
cols = col_map.get(tbl, set())
|
|
flags = [('✓' if c in cols else '✗') for c in check_cols]
|
|
print(f" {tbl:50s} {flags[0]:5s} {flags[1]:12s} {flags[2]:10s} {flags[3]:13s}")
|
|
|
|
print("\n" + "=" * 80)
|
|
print("4. 各表主键定义")
|
|
print("=" * 80)
|
|
cur.execute("""
|
|
SELECT tc.table_name,
|
|
string_agg(kcu.column_name, ', ' ORDER BY kcu.ordinal_position) AS pk_cols
|
|
FROM information_schema.table_constraints tc
|
|
JOIN information_schema.key_column_usage kcu
|
|
ON tc.constraint_name = kcu.constraint_name
|
|
AND tc.table_schema = kcu.table_schema
|
|
WHERE tc.table_schema = 'ods'
|
|
AND tc.constraint_type = 'PRIMARY KEY'
|
|
GROUP BY tc.table_name
|
|
ORDER BY tc.table_name
|
|
""")
|
|
pk_rows = cur.fetchall()
|
|
for tbl, pk_cols in pk_rows:
|
|
print(f" {tbl:50s} PK: ({pk_cols})")
|
|
|
|
cur.close()
|
|
conn.close()
|
|
print("\n完成。")
|