Neo-ZQYY/scripts/ops/fix_test_db.py

# -*- coding: utf-8 -*-
"""
修复 test_etl_feiqiu：补齐 meta 数据 + 创建物化视图 + 索引 + ANALYZE
"""
import sys
import io
import psycopg2

if sys.platform == "win32":
    sys.stdout.reconfigure(encoding="utf-8", errors="replace")
    sys.stderr.reconfigure(encoding="utf-8", errors="replace")

DB = dict(host="100.64.0.4", port=5432, user="local-Python",
          password="Neo-local-1991125", options="-c client_encoding=UTF8")


def conn(dbname):
    return psycopg2.connect(dbname=dbname, **DB)


def count(c, schema, table):
    with c.cursor() as cur:
        cur.execute(f'SELECT COUNT(*) FROM "{schema}"."{table}"')
        return cur.fetchone()[0]


def get_columns(c, schema, table):
    with c.cursor() as cur:
        cur.execute("""SELECT column_name FROM information_schema.columns
                       WHERE table_schema=%s AND table_name=%s
                       ORDER BY ordinal_position""", (schema, table))
        return [r[0] for r in cur.fetchall()]


def copy_table(src, dst, schema, table):
    src_cols = get_columns(src, schema, table)
    dst_cols = get_columns(dst, schema, table)
    common = [c for c in dst_cols if c in src_cols]
    if not common:
        return 0
    cols = ", ".join(f'"{c}"' for c in common)
    # TRUNCATE 先清空
    with dst.cursor() as cur:
        cur.execute(f'TRUNCATE "{schema}"."{table}" CASCADE')
    dst.commit()
    # COPY
    buf = io.BytesIO()
    with src.cursor() as cur:
        cur.copy_expert(f'COPY (SELECT {cols} FROM "{schema}"."{table}") TO STDOUT WITH (FORMAT binary)', buf)
    buf.seek(0)
    if buf.getbuffer().nbytes <= 11:
        return 0
    with dst.cursor() as cur:
        cur.copy_expert(f'COPY "{schema}"."{table}" ({cols}) FROM STDIN WITH (FORMAT binary)', buf)
    dst.commit()
    return count(dst, schema, table)


def main():
    src = conn("etl_feiqiu")
    dst = conn("test_etl_feiqiu")

    # ── 1. 补齐 meta 数据 ──
    print("=== 补齐 meta 数据 ===")
    for t in ["etl_cursor", "etl_run", "etl_task"]:
        s = count(src, "meta", t)
        d = count(dst, "meta", t)
        if d >= s and s > 0:
            print(f"  {t}: 已一致 ({d} 行)")
            continue
        if s == 0:
            print(f"  {t}: 源为空，跳过")
            continue
        rows = copy_table(src, dst, "meta", t)
        print(f"  {t}: {s} → {rows}")

    # ── 2. 创建物化视图 ──
    print("\n=== 创建物化视图 ===")
    with src.cursor() as cur:
        cur.execute("SELECT matviewname, definition FROM pg_matviews WHERE schemaname='dws' ORDER BY 1")
        mvs = cur.fetchall()

    for name, defn in mvs:
        with dst.cursor() as cur:
            cur.execute("SELECT 1 FROM pg_matviews WHERE schemaname='dws' AND matviewname=%s", (name,))
            if cur.fetchone():
                print(f"  {name}: 已存在")
                continue
        # 去掉末尾分号
        clean = defn.rstrip().rstrip(";").rstrip()
        try:
            with dst.cursor() as cur:
                cur.execute(f'CREATE MATERIALIZED VIEW dws."{name}" AS {clean} WITH DATA')
            dst.commit()
            rows = count(dst, "dws", name)
            print(f"  {name}: 创建成功 ({rows} 行)")
        except Exception as e:
            dst.rollback()
            print(f"  {name}: 失败 - {e}")

    # ── 3. 物化视图索引 ──
    print("\n=== 物化视图索引 ===")
    mv_indexes = [
        "CREATE INDEX IF NOT EXISTS idx_mv_assistant_daily_l1 ON dws.mv_dws_assistant_daily_detail_l1 (site_id, stat_date, assistant_id)",
        "CREATE INDEX IF NOT EXISTS idx_mv_assistant_daily_l2 ON dws.mv_dws_assistant_daily_detail_l2 (site_id, stat_date, assistant_id)",
        "CREATE INDEX IF NOT EXISTS idx_mv_assistant_daily_l3 ON dws.mv_dws_assistant_daily_detail_l3 (site_id, stat_date, assistant_id)",
        "CREATE INDEX IF NOT EXISTS idx_mv_assistant_daily_l4 ON dws.mv_dws_assistant_daily_detail_l4 (site_id, stat_date, assistant_id)",
        "CREATE INDEX IF NOT EXISTS idx_mv_finance_daily_l1 ON dws.mv_dws_finance_daily_summary_l1 (site_id, stat_date)",
        "CREATE INDEX IF NOT EXISTS idx_mv_finance_daily_l2 ON dws.mv_dws_finance_daily_summary_l2 (site_id, stat_date)",
        "CREATE INDEX IF NOT EXISTS idx_mv_finance_daily_l3 ON dws.mv_dws_finance_daily_summary_l3 (site_id, stat_date)",
        "CREATE INDEX IF NOT EXISTS idx_mv_finance_daily_l4 ON dws.mv_dws_finance_daily_summary_l4 (site_id, stat_date)",
    ]
    for sql in mv_indexes:
        idx = sql.split("EXISTS ")[1].split(" ON ")[0]
        try:
            with dst.cursor() as cur:
                cur.execute(sql)
            dst.commit()
            print(f"  {idx}: OK")
        except Exception as e:
            dst.rollback()
            print(f"  {idx}: {e}")

    # ── 4. ANALYZE ──
    print("\n=== ANALYZE ===")
    dst.autocommit = True
    with dst.cursor() as cur:
        for schema in ["ods", "dwd", "dws", "meta", "core", "app"]:
            cur.execute(f"""
                SELECT tablename FROM pg_tables WHERE schemaname='{schema}'
                UNION ALL
                SELECT matviewname FROM pg_matviews WHERE schemaname='{schema}'
            """)
            objs = [r[0] for r in cur.fetchall()]
            for o in objs:
                cur.execute(f'ANALYZE "{schema}"."{o}"')
            print(f"  {schema}: {len(objs)} 个对象")
    dst.autocommit = False

    # ── 5. 最终验证 ──
    print("\n=== 最终验证 ===")
    ok = True
    for schema in ["ods", "dwd", "dws", "meta"]:
        with src.cursor() as cur:
            cur.execute("SELECT tablename FROM pg_tables WHERE schemaname=%s ORDER BY 1", (schema,))
            tables = [r[0] for r in cur.fetchall()]
        for t in tables:
            s = count(src, schema, t)
            if s == 0:
                continue
            d = count(dst, schema, t)
            tag = "OK" if d == s else "FAIL"
            if tag == "FAIL":
                ok = False
            print(f"  {tag:4s} {schema}.{t}: src={s} dst={d}")

    # 物化视图
    with dst.cursor() as cur:
        cur.execute("SELECT matviewname FROM pg_matviews WHERE schemaname='dws' ORDER BY 1")
        mv_names = [r[0] for r in cur.fetchall()]
    print(f"\n  物化视图: {len(mv_names)} 个")
    for n in mv_names:
        r = count(dst, "dws", n)
        print(f"    {n}: {r} 行")

    print(f"\n{'='*50}")
    print("全部通过" if ok else "存在不一致")

    src.close()
    dst.close()


if __name__ == "__main__":
    main()