在准备环境前提交次全部更改。

2026-02-19 08:35:13 +08:00
parent ded6dfb9d8
commit 4eac07da47
1387 changed files with 6107191 additions and 33002 deletions
--- a/scripts/ops/clone_to_test_db.py
+++ b/scripts/ops/clone_to_test_db.py
@@ -0,0 +1,334 @@
+# -*- coding: utf-8 -*-
+"""
+从正式库完整镜像到测试库：
+  etl_feiqiu → test_etl_feiqiu（六层 schema + 数据 + 索引 + 物化视图）
+  zqyy_app   → test_zqyy_app（全部表 + 数据 + 索引）
+
+策略：先用 init_databases.py 的 DDL 建表，再用 COPY 协议迁移数据，
+最后迁移自定义索引和物化视图。
+"""
+import sys
+import os
+import io
+import psycopg2
+
+if sys.platform == "win32":
+    sys.stdout.reconfigure(encoding="utf-8", errors="replace")
+    sys.stderr.reconfigure(encoding="utf-8", errors="replace")
+
+DB_HOST = "100.64.0.4"
+DB_PORT = 5432
+DB_USER = "local-Python"
+DB_PASS = "Neo-local-1991125"
+DB_OPTS = "-c client_encoding=UTF8"
+
+# 源库 → 测试库
+CLONE_PAIRS = [
+    ("etl_feiqiu", "test_etl_feiqiu"),
+    ("zqyy_app", "test_zqyy_app"),
+]
+
+BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+
+
+def conn_to(dbname):
+    return psycopg2.connect(
+        host=DB_HOST, port=DB_PORT, dbname=dbname,
+        user=DB_USER, password=DB_PASS, options=DB_OPTS)
+
+
+def execute_sql_file(conn, filepath, label=""):
+    full = os.path.join(BASE_DIR, filepath)
+    if not os.path.exists(full):
+        print(f"    [SKIP] 不存在: {filepath}")
+        return False
+    with open(full, "r", encoding="utf-8") as f:
+        sql = f.read()
+    if not sql.strip():
+        return False
+    try:
+        with conn.cursor() as cur:
+            cur.execute(sql)
+        conn.commit()
+        print(f"    [OK] {label or filepath}")
+        return True
+    except Exception as e:
+        conn.rollback()
+        print(f"    [FAIL] {label or filepath}: {e}")
+        return False
+
+
+def get_schemas(conn):
+    """获取用户自定义 schema 列表。"""
+    with conn.cursor() as cur:
+        cur.execute("""
+            SELECT nspname FROM pg_namespace
+            WHERE nspname NOT LIKE 'pg_%' AND nspname != 'information_schema'
+            ORDER BY nspname
+        """)
+        return [r[0] for r in cur.fetchall()]
+
+
+def get_tables(conn, schema):
+    with conn.cursor() as cur:
+        cur.execute("SELECT tablename FROM pg_tables WHERE schemaname = %s ORDER BY tablename", (schema,))
+        return [r[0] for r in cur.fetchall()]
+
+
+def get_columns(conn, schema, table):
+    with conn.cursor() as cur:
+        cur.execute("""
+            SELECT column_name FROM information_schema.columns
+            WHERE table_schema = %s AND table_name = %s
+            ORDER BY ordinal_position
+        """, (schema, table))
+        return [r[0] for r in cur.fetchall()]
+
+
+def count_rows(conn, schema, table):
+    with conn.cursor() as cur:
+        cur.execute(f'SELECT COUNT(*) FROM "{schema}"."{table}"')
+        return cur.fetchone()[0]
+
+
+def copy_table(src, dst, schema, table):
+    """用 COPY 协议迁移单表数据。"""
+    src_cols = get_columns(src, schema, table)
+    dst_cols = get_columns(dst, schema, table)
+    if not src_cols or not dst_cols:
+        return 0
+    common = [c for c in dst_cols if c in src_cols]
+    if not common:
+        return 0
+    cols_sql = ", ".join(f'"{c}"' for c in common)
+
+    buf = io.BytesIO()
+    with src.cursor() as cur:
+        cur.copy_expert(f'COPY (SELECT {cols_sql} FROM "{schema}"."{table}") TO STDOUT WITH (FORMAT binary)', buf)
+    buf.seek(0)
+    if buf.getbuffer().nbytes <= 11:
+        return 0
+    with dst.cursor() as cur:
+        cur.copy_expert(f'COPY "{schema}"."{table}" ({cols_sql}) FROM STDIN WITH (FORMAT binary)', buf)
+    dst.commit()
+    return count_rows(dst, schema, table)
+
+
+def migrate_indexes(src, dst, schema):
+    """迁移用户自定义索引。"""
+    with src.cursor() as cur:
+        cur.execute("""
+            SELECT indexname, indexdef FROM pg_indexes
+            WHERE schemaname = %s
+              AND indexname NOT IN (
+                  SELECT conname FROM pg_constraint
+                  WHERE connamespace = (SELECT oid FROM pg_namespace WHERE nspname = %s))
+            ORDER BY indexname
+        """, (schema, schema))
+        indexes = cur.fetchall()
+    ok = 0
+    for name, defn in indexes:
+        new_def = defn.replace("CREATE INDEX", "CREATE INDEX IF NOT EXISTS", 1)
+        new_def = new_def.replace("CREATE UNIQUE INDEX", "CREATE UNIQUE INDEX IF NOT EXISTS", 1)
+        try:
+            with dst.cursor() as cur:
+                cur.execute(new_def)
+            dst.commit()
+            ok += 1
+        except Exception as e:
+            dst.rollback()
+            # 物化视图索引可能因视图不存在而失败，后面会处理
+            if "不存在" not in str(e) and "does not exist" not in str(e):
+                print(f"      索引 {name}: {e}")
+    return ok, len(indexes)
+
+
+def migrate_matviews(src, dst, schema):
+    """迁移物化视图（从源库获取定义，替换 schema 后在目标库创建）。"""
+    with src.cursor() as cur:
+        cur.execute("SELECT matviewname, definition FROM pg_matviews WHERE schemaname = %s ORDER BY matviewname", (schema,))
+        mvs = cur.fetchall()
+    if not mvs:
+        return 0, 0
+    ok = 0
+    for name, defn in mvs:
+        # 检查目标库是否已存在
+        with dst.cursor() as cur:
+            cur.execute("SELECT 1 FROM pg_matviews WHERE schemaname = %s AND matviewname = %s", (schema, name))
+            if cur.fetchone():
+                ok += 1
+                continue
+        try:
+            # pg_matviews.definition 末尾可能带分号，需去掉后再拼 WITH DATA
+            clean_def = defn.rstrip().rstrip(";").rstrip()
+            with dst.cursor() as cur:
+                cur.execute(f'CREATE MATERIALIZED VIEW "{schema}"."{name}" AS {clean_def} WITH DATA')
+            dst.commit()
+            ok += 1
+        except Exception as e:
+            dst.rollback()
+            print(f"      物化视图 {name}: {e}")
+    return ok, len(mvs)
+
+
+def init_test_etl_feiqiu(conn):
+    """用 DDL 文件初始化 test_etl_feiqiu 的六层 schema。"""
+    print("  初始化 DDL...")
+    files = [
+        ("db/etl_feiqiu/schemas/meta.sql", "meta"),
+        ("db/etl_feiqiu/schemas/ods.sql", "ods"),
+        ("db/etl_feiqiu/schemas/dwd.sql", "dwd"),
+        ("db/etl_feiqiu/schemas/core.sql", "core"),
+        ("db/etl_feiqiu/schemas/dws.sql", "dws"),
+        ("db/etl_feiqiu/schemas/app.sql", "app"),
+    ]
+    for fp, label in files:
+        execute_sql_file(conn, fp, label)
+    # 种子数据不导入——后面会从正式库 COPY 全量数据
+
+
+def init_test_zqyy_app(conn):
+    """用 DDL 文件初始化 test_zqyy_app。"""
+    print("  初始化 DDL...")
+    files = [
+        ("db/zqyy_app/schemas/init.sql", "zqyy_app schema"),
+        ("db/zqyy_app/migrations/20250715_create_admin_web_tables.sql", "admin_web 迁移"),
+    ]
+    for fp, label in files:
+        execute_sql_file(conn, fp, label)
+
+
+def clone_database(src_name, dst_name):
+    """完整镜像一个数据库。"""
+    print(f"\n{'='*60}")
+    print(f"镜像: {src_name} → {dst_name}")
+    print(f"{'='*60}")
+
+    src = conn_to(src_name)
+    dst = conn_to(dst_name)
+
+    # 步骤 1: 初始化 DDL
+    if dst_name == "test_etl_feiqiu":
+        init_test_etl_feiqiu(dst)
+    elif dst_name == "test_zqyy_app":
+        init_test_zqyy_app(dst)
+
+    # 步骤 2: 迁移数据
+    print("\n  迁移数据...")
+    schemas = get_schemas(src)
+    # 只迁移源库中有表的 schema
+    total_rows = 0
+    total_tables = 0
+    for schema in schemas:
+        tables = get_tables(src, schema)
+        if not tables:
+            continue
+        # 确保目标库有这个 schema
+        with dst.cursor() as cur:
+            cur.execute(f'CREATE SCHEMA IF NOT EXISTS "{schema}"')
+        dst.commit()
+
+        for t in tables:
+            s_cnt = count_rows(src, schema, t)
+            if s_cnt == 0:
+                continue
+            # 检查目标表是否存在
+            dst_cols = get_columns(dst, schema, t)
+            if not dst_cols:
+                continue
+            # 检查是否已有数据
+            d_cnt = count_rows(dst, schema, t)
+            if d_cnt >= s_cnt:
+                total_rows += d_cnt
+                total_tables += 1
+                continue
+            if d_cnt > 0:
+                with dst.cursor() as cur:
+                    cur.execute(f'TRUNCATE "{schema}"."{t}" CASCADE')
+                dst.commit()
+            try:
+                migrated = copy_table(src, dst, schema, t)
+                total_rows += migrated
+                total_tables += 1
+                if migrated != s_cnt:
+                    print(f"    ⚠ {schema}.{t}: src={s_cnt} dst={migrated}")
+            except Exception as e:
+                dst.rollback()
+                print(f"    ✗ {schema}.{t}: {e}")
+
+    print(f"  数据: {total_tables} 表, {total_rows} 行")
+
+    # 步骤 3: 物化视图
+    print("\n  迁移物化视图...")
+    for schema in schemas:
+        ok, total = migrate_matviews(src, dst, schema)
+        if total > 0:
+            print(f"    {schema}: {ok}/{total}")
+
+    # 步骤 4: 索引
+    print("\n  迁移索引...")
+    total_idx = 0
+    for schema in schemas:
+        ok, total = migrate_indexes(src, dst, schema)
+        total_idx += ok
+        if total > 0:
+            print(f"    {schema}: {ok}/{total}")
+    print(f"  索引: {total_idx} 个")
+
+    # 步骤 5: ANALYZE
+    print("\n  ANALYZE...")
+    dst.autocommit = True
+    with dst.cursor() as cur:
+        for schema in schemas:
+            cur.execute(f"""
+                SELECT tablename FROM pg_tables WHERE schemaname = '{schema}'
+                UNION ALL
+                SELECT matviewname FROM pg_matviews WHERE schemaname = '{schema}'
+            """)
+            for (obj,) in cur.fetchall():
+                cur.execute(f'ANALYZE "{schema}"."{obj}"')
+    dst.autocommit = False
+    print("  ANALYZE 完成")
+
+    # 步骤 6: 验证
+    print("\n  验证...")
+    all_ok = True
+    for schema in schemas:
+        tables = get_tables(src, schema)
+        for t in tables:
+            s = count_rows(src, schema, t)
+            if s == 0:
+                continue
+            dst_cols = get_columns(dst, schema, t)
+            if not dst_cols:
+                print(f"    MISS {schema}.{t}")
+                all_ok = False
+                continue
+            d = count_rows(dst, schema, t)
+            if d != s:
+                print(f"    FAIL {schema}.{t}: src={s} dst={d}")
+                all_ok = False
+
+    if all_ok:
+        print("  ✓ 全部一致")
+    else:
+        print("  ✗ 存在不一致")
+
+    src.close()
+    dst.close()
+    return all_ok
+
+
+def main():
+    results = {}
+    for src_name, dst_name in CLONE_PAIRS:
+        results[dst_name] = clone_database(src_name, dst_name)
+
+    print(f"\n{'='*60}")
+    for db, ok in results.items():
+        print(f"  {db}: {'OK' if ok else 'FAIL'}")
+    print(f"{'='*60}")
+
+
+if __name__ == "__main__":
+    main()