在准备环境前提交次全部更改。
This commit is contained in:
334
scripts/ops/clone_to_test_db.py
Normal file
334
scripts/ops/clone_to_test_db.py
Normal file
@@ -0,0 +1,334 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
从正式库完整镜像到测试库:
|
||||
etl_feiqiu → test_etl_feiqiu(六层 schema + 数据 + 索引 + 物化视图)
|
||||
zqyy_app → test_zqyy_app(全部表 + 数据 + 索引)
|
||||
|
||||
策略:先用 init_databases.py 的 DDL 建表,再用 COPY 协议迁移数据,
|
||||
最后迁移自定义索引和物化视图。
|
||||
"""
|
||||
import sys
|
||||
import os
|
||||
import io
|
||||
import psycopg2
|
||||
|
||||
if sys.platform == "win32":
|
||||
sys.stdout.reconfigure(encoding="utf-8", errors="replace")
|
||||
sys.stderr.reconfigure(encoding="utf-8", errors="replace")
|
||||
|
||||
DB_HOST = "100.64.0.4"
|
||||
DB_PORT = 5432
|
||||
DB_USER = "local-Python"
|
||||
DB_PASS = "Neo-local-1991125"
|
||||
DB_OPTS = "-c client_encoding=UTF8"
|
||||
|
||||
# 源库 → 测试库
|
||||
CLONE_PAIRS = [
|
||||
("etl_feiqiu", "test_etl_feiqiu"),
|
||||
("zqyy_app", "test_zqyy_app"),
|
||||
]
|
||||
|
||||
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
|
||||
def conn_to(dbname):
|
||||
return psycopg2.connect(
|
||||
host=DB_HOST, port=DB_PORT, dbname=dbname,
|
||||
user=DB_USER, password=DB_PASS, options=DB_OPTS)
|
||||
|
||||
|
||||
def execute_sql_file(conn, filepath, label=""):
|
||||
full = os.path.join(BASE_DIR, filepath)
|
||||
if not os.path.exists(full):
|
||||
print(f" [SKIP] 不存在: {filepath}")
|
||||
return False
|
||||
with open(full, "r", encoding="utf-8") as f:
|
||||
sql = f.read()
|
||||
if not sql.strip():
|
||||
return False
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(sql)
|
||||
conn.commit()
|
||||
print(f" [OK] {label or filepath}")
|
||||
return True
|
||||
except Exception as e:
|
||||
conn.rollback()
|
||||
print(f" [FAIL] {label or filepath}: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def get_schemas(conn):
|
||||
"""获取用户自定义 schema 列表。"""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""
|
||||
SELECT nspname FROM pg_namespace
|
||||
WHERE nspname NOT LIKE 'pg_%' AND nspname != 'information_schema'
|
||||
ORDER BY nspname
|
||||
""")
|
||||
return [r[0] for r in cur.fetchall()]
|
||||
|
||||
|
||||
def get_tables(conn, schema):
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("SELECT tablename FROM pg_tables WHERE schemaname = %s ORDER BY tablename", (schema,))
|
||||
return [r[0] for r in cur.fetchall()]
|
||||
|
||||
|
||||
def get_columns(conn, schema, table):
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""
|
||||
SELECT column_name FROM information_schema.columns
|
||||
WHERE table_schema = %s AND table_name = %s
|
||||
ORDER BY ordinal_position
|
||||
""", (schema, table))
|
||||
return [r[0] for r in cur.fetchall()]
|
||||
|
||||
|
||||
def count_rows(conn, schema, table):
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(f'SELECT COUNT(*) FROM "{schema}"."{table}"')
|
||||
return cur.fetchone()[0]
|
||||
|
||||
|
||||
def copy_table(src, dst, schema, table):
|
||||
"""用 COPY 协议迁移单表数据。"""
|
||||
src_cols = get_columns(src, schema, table)
|
||||
dst_cols = get_columns(dst, schema, table)
|
||||
if not src_cols or not dst_cols:
|
||||
return 0
|
||||
common = [c for c in dst_cols if c in src_cols]
|
||||
if not common:
|
||||
return 0
|
||||
cols_sql = ", ".join(f'"{c}"' for c in common)
|
||||
|
||||
buf = io.BytesIO()
|
||||
with src.cursor() as cur:
|
||||
cur.copy_expert(f'COPY (SELECT {cols_sql} FROM "{schema}"."{table}") TO STDOUT WITH (FORMAT binary)', buf)
|
||||
buf.seek(0)
|
||||
if buf.getbuffer().nbytes <= 11:
|
||||
return 0
|
||||
with dst.cursor() as cur:
|
||||
cur.copy_expert(f'COPY "{schema}"."{table}" ({cols_sql}) FROM STDIN WITH (FORMAT binary)', buf)
|
||||
dst.commit()
|
||||
return count_rows(dst, schema, table)
|
||||
|
||||
|
||||
def migrate_indexes(src, dst, schema):
|
||||
"""迁移用户自定义索引。"""
|
||||
with src.cursor() as cur:
|
||||
cur.execute("""
|
||||
SELECT indexname, indexdef FROM pg_indexes
|
||||
WHERE schemaname = %s
|
||||
AND indexname NOT IN (
|
||||
SELECT conname FROM pg_constraint
|
||||
WHERE connamespace = (SELECT oid FROM pg_namespace WHERE nspname = %s))
|
||||
ORDER BY indexname
|
||||
""", (schema, schema))
|
||||
indexes = cur.fetchall()
|
||||
ok = 0
|
||||
for name, defn in indexes:
|
||||
new_def = defn.replace("CREATE INDEX", "CREATE INDEX IF NOT EXISTS", 1)
|
||||
new_def = new_def.replace("CREATE UNIQUE INDEX", "CREATE UNIQUE INDEX IF NOT EXISTS", 1)
|
||||
try:
|
||||
with dst.cursor() as cur:
|
||||
cur.execute(new_def)
|
||||
dst.commit()
|
||||
ok += 1
|
||||
except Exception as e:
|
||||
dst.rollback()
|
||||
# 物化视图索引可能因视图不存在而失败,后面会处理
|
||||
if "不存在" not in str(e) and "does not exist" not in str(e):
|
||||
print(f" 索引 {name}: {e}")
|
||||
return ok, len(indexes)
|
||||
|
||||
|
||||
def migrate_matviews(src, dst, schema):
|
||||
"""迁移物化视图(从源库获取定义,替换 schema 后在目标库创建)。"""
|
||||
with src.cursor() as cur:
|
||||
cur.execute("SELECT matviewname, definition FROM pg_matviews WHERE schemaname = %s ORDER BY matviewname", (schema,))
|
||||
mvs = cur.fetchall()
|
||||
if not mvs:
|
||||
return 0, 0
|
||||
ok = 0
|
||||
for name, defn in mvs:
|
||||
# 检查目标库是否已存在
|
||||
with dst.cursor() as cur:
|
||||
cur.execute("SELECT 1 FROM pg_matviews WHERE schemaname = %s AND matviewname = %s", (schema, name))
|
||||
if cur.fetchone():
|
||||
ok += 1
|
||||
continue
|
||||
try:
|
||||
# pg_matviews.definition 末尾可能带分号,需去掉后再拼 WITH DATA
|
||||
clean_def = defn.rstrip().rstrip(";").rstrip()
|
||||
with dst.cursor() as cur:
|
||||
cur.execute(f'CREATE MATERIALIZED VIEW "{schema}"."{name}" AS {clean_def} WITH DATA')
|
||||
dst.commit()
|
||||
ok += 1
|
||||
except Exception as e:
|
||||
dst.rollback()
|
||||
print(f" 物化视图 {name}: {e}")
|
||||
return ok, len(mvs)
|
||||
|
||||
|
||||
def init_test_etl_feiqiu(conn):
|
||||
"""用 DDL 文件初始化 test_etl_feiqiu 的六层 schema。"""
|
||||
print(" 初始化 DDL...")
|
||||
files = [
|
||||
("db/etl_feiqiu/schemas/meta.sql", "meta"),
|
||||
("db/etl_feiqiu/schemas/ods.sql", "ods"),
|
||||
("db/etl_feiqiu/schemas/dwd.sql", "dwd"),
|
||||
("db/etl_feiqiu/schemas/core.sql", "core"),
|
||||
("db/etl_feiqiu/schemas/dws.sql", "dws"),
|
||||
("db/etl_feiqiu/schemas/app.sql", "app"),
|
||||
]
|
||||
for fp, label in files:
|
||||
execute_sql_file(conn, fp, label)
|
||||
# 种子数据不导入——后面会从正式库 COPY 全量数据
|
||||
|
||||
|
||||
def init_test_zqyy_app(conn):
|
||||
"""用 DDL 文件初始化 test_zqyy_app。"""
|
||||
print(" 初始化 DDL...")
|
||||
files = [
|
||||
("db/zqyy_app/schemas/init.sql", "zqyy_app schema"),
|
||||
("db/zqyy_app/migrations/20250715_create_admin_web_tables.sql", "admin_web 迁移"),
|
||||
]
|
||||
for fp, label in files:
|
||||
execute_sql_file(conn, fp, label)
|
||||
|
||||
|
||||
def clone_database(src_name, dst_name):
|
||||
"""完整镜像一个数据库。"""
|
||||
print(f"\n{'='*60}")
|
||||
print(f"镜像: {src_name} → {dst_name}")
|
||||
print(f"{'='*60}")
|
||||
|
||||
src = conn_to(src_name)
|
||||
dst = conn_to(dst_name)
|
||||
|
||||
# 步骤 1: 初始化 DDL
|
||||
if dst_name == "test_etl_feiqiu":
|
||||
init_test_etl_feiqiu(dst)
|
||||
elif dst_name == "test_zqyy_app":
|
||||
init_test_zqyy_app(dst)
|
||||
|
||||
# 步骤 2: 迁移数据
|
||||
print("\n 迁移数据...")
|
||||
schemas = get_schemas(src)
|
||||
# 只迁移源库中有表的 schema
|
||||
total_rows = 0
|
||||
total_tables = 0
|
||||
for schema in schemas:
|
||||
tables = get_tables(src, schema)
|
||||
if not tables:
|
||||
continue
|
||||
# 确保目标库有这个 schema
|
||||
with dst.cursor() as cur:
|
||||
cur.execute(f'CREATE SCHEMA IF NOT EXISTS "{schema}"')
|
||||
dst.commit()
|
||||
|
||||
for t in tables:
|
||||
s_cnt = count_rows(src, schema, t)
|
||||
if s_cnt == 0:
|
||||
continue
|
||||
# 检查目标表是否存在
|
||||
dst_cols = get_columns(dst, schema, t)
|
||||
if not dst_cols:
|
||||
continue
|
||||
# 检查是否已有数据
|
||||
d_cnt = count_rows(dst, schema, t)
|
||||
if d_cnt >= s_cnt:
|
||||
total_rows += d_cnt
|
||||
total_tables += 1
|
||||
continue
|
||||
if d_cnt > 0:
|
||||
with dst.cursor() as cur:
|
||||
cur.execute(f'TRUNCATE "{schema}"."{t}" CASCADE')
|
||||
dst.commit()
|
||||
try:
|
||||
migrated = copy_table(src, dst, schema, t)
|
||||
total_rows += migrated
|
||||
total_tables += 1
|
||||
if migrated != s_cnt:
|
||||
print(f" ⚠ {schema}.{t}: src={s_cnt} dst={migrated}")
|
||||
except Exception as e:
|
||||
dst.rollback()
|
||||
print(f" ✗ {schema}.{t}: {e}")
|
||||
|
||||
print(f" 数据: {total_tables} 表, {total_rows} 行")
|
||||
|
||||
# 步骤 3: 物化视图
|
||||
print("\n 迁移物化视图...")
|
||||
for schema in schemas:
|
||||
ok, total = migrate_matviews(src, dst, schema)
|
||||
if total > 0:
|
||||
print(f" {schema}: {ok}/{total}")
|
||||
|
||||
# 步骤 4: 索引
|
||||
print("\n 迁移索引...")
|
||||
total_idx = 0
|
||||
for schema in schemas:
|
||||
ok, total = migrate_indexes(src, dst, schema)
|
||||
total_idx += ok
|
||||
if total > 0:
|
||||
print(f" {schema}: {ok}/{total}")
|
||||
print(f" 索引: {total_idx} 个")
|
||||
|
||||
# 步骤 5: ANALYZE
|
||||
print("\n ANALYZE...")
|
||||
dst.autocommit = True
|
||||
with dst.cursor() as cur:
|
||||
for schema in schemas:
|
||||
cur.execute(f"""
|
||||
SELECT tablename FROM pg_tables WHERE schemaname = '{schema}'
|
||||
UNION ALL
|
||||
SELECT matviewname FROM pg_matviews WHERE schemaname = '{schema}'
|
||||
""")
|
||||
for (obj,) in cur.fetchall():
|
||||
cur.execute(f'ANALYZE "{schema}"."{obj}"')
|
||||
dst.autocommit = False
|
||||
print(" ANALYZE 完成")
|
||||
|
||||
# 步骤 6: 验证
|
||||
print("\n 验证...")
|
||||
all_ok = True
|
||||
for schema in schemas:
|
||||
tables = get_tables(src, schema)
|
||||
for t in tables:
|
||||
s = count_rows(src, schema, t)
|
||||
if s == 0:
|
||||
continue
|
||||
dst_cols = get_columns(dst, schema, t)
|
||||
if not dst_cols:
|
||||
print(f" MISS {schema}.{t}")
|
||||
all_ok = False
|
||||
continue
|
||||
d = count_rows(dst, schema, t)
|
||||
if d != s:
|
||||
print(f" FAIL {schema}.{t}: src={s} dst={d}")
|
||||
all_ok = False
|
||||
|
||||
if all_ok:
|
||||
print(" ✓ 全部一致")
|
||||
else:
|
||||
print(" ✗ 存在不一致")
|
||||
|
||||
src.close()
|
||||
dst.close()
|
||||
return all_ok
|
||||
|
||||
|
||||
def main():
|
||||
results = {}
|
||||
for src_name, dst_name in CLONE_PAIRS:
|
||||
results[dst_name] = clone_database(src_name, dst_name)
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
for db, ok in results.items():
|
||||
print(f" {db}: {'OK' if ok else 'FAIL'}")
|
||||
print(f"{'='*60}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user