在准备环境前提交次全部更改。

This commit is contained in:
Neo
2026-02-19 08:35:13 +08:00
parent ded6dfb9d8
commit 4eac07da47
1387 changed files with 6107191 additions and 33002 deletions

View File

@@ -0,0 +1,102 @@
# -*- coding: utf-8 -*-
"""查询 ODS schema 的表、索引、关键列现状,输出到控制台。"""
import os
import sys
from pathlib import Path
from dotenv import load_dotenv
import psycopg2
# 加载 ETL .env
env_path = Path(__file__).resolve().parents[2] / "apps" / "etl" / "pipelines" / "feiqiu" / ".env"
load_dotenv(env_path)
dsn = os.environ.get("PG_DSN")
if not dsn:
print("ERROR: PG_DSN 未配置")
sys.exit(1)
conn = psycopg2.connect(dsn, connect_timeout=10)
cur = conn.cursor()
print("=" * 80)
print("1. ODS 所有表及行数")
print("=" * 80)
cur.execute("""
SELECT t.tablename,
pg_stat_user_tables.n_live_tup AS approx_rows
FROM pg_tables t
LEFT JOIN pg_stat_user_tables
ON pg_stat_user_tables.schemaname = t.schemaname
AND pg_stat_user_tables.relname = t.tablename
WHERE t.schemaname = 'ods'
ORDER BY t.tablename
""")
tables = cur.fetchall()
for tbl, rows in tables:
print(f" {tbl:50s} ~{rows or 0} rows")
print(f"\n{len(tables)} 张表")
print("\n" + "=" * 80)
print("2. ODS 所有索引")
print("=" * 80)
cur.execute("""
SELECT tablename, indexname, indexdef
FROM pg_indexes
WHERE schemaname = 'ods'
ORDER BY tablename, indexname
""")
indexes = cur.fetchall()
for tbl, idx_name, idx_def in indexes:
print(f" [{tbl}] {idx_name}")
print(f" {idx_def}")
print(f"\n{len(indexes)} 个索引")
print("\n" + "=" * 80)
print("3. 各表是否有 id / fetched_at / is_delete / content_hash 列")
print("=" * 80)
cur.execute("""
SELECT table_name, column_name
FROM information_schema.columns
WHERE table_schema = 'ods'
AND column_name IN ('id', 'fetched_at', 'is_delete', 'content_hash')
ORDER BY table_name, column_name
""")
col_rows = cur.fetchall()
# 按表聚合
from collections import defaultdict
col_map = defaultdict(set)
for tbl, col in col_rows:
col_map[tbl].add(col)
check_cols = ['id', 'fetched_at', 'is_delete', 'content_hash']
print(f" {'表名':50s} {'id':5s} {'fetched_at':12s} {'is_delete':10s} {'content_hash':13s}")
print(f" {'-'*50} {'-'*5} {'-'*12} {'-'*10} {'-'*13}")
for tbl, _ in tables:
cols = col_map.get(tbl, set())
flags = [('' if c in cols else '') for c in check_cols]
print(f" {tbl:50s} {flags[0]:5s} {flags[1]:12s} {flags[2]:10s} {flags[3]:13s}")
print("\n" + "=" * 80)
print("4. 各表主键定义")
print("=" * 80)
cur.execute("""
SELECT tc.table_name,
string_agg(kcu.column_name, ', ' ORDER BY kcu.ordinal_position) AS pk_cols
FROM information_schema.table_constraints tc
JOIN information_schema.key_column_usage kcu
ON tc.constraint_name = kcu.constraint_name
AND tc.table_schema = kcu.table_schema
WHERE tc.table_schema = 'ods'
AND tc.constraint_type = 'PRIMARY KEY'
GROUP BY tc.table_name
ORDER BY tc.table_name
""")
pk_rows = cur.fetchall()
for tbl, pk_cols in pk_rows:
print(f" {tbl:50s} PK: ({pk_cols})")
cur.close()
conn.close()
print("\n完成。")