在准备环境前提交次全部更改。
This commit is contained in:
118
scripts/migrate/batch_schema_rename.py
Normal file
118
scripts/migrate/batch_schema_rename.py
Normal file
@@ -0,0 +1,118 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""批量替换旧 schema 引用为新名称。
|
||||
|
||||
替换规则:
|
||||
billiards_ods → ods
|
||||
billiards_dwd → dwd
|
||||
billiards_dws → dws
|
||||
etl_admin → meta (仅在 SQL schema 上下文中)
|
||||
|
||||
注意:etl_admin 替换需要更精细的控制,避免误改文件名引用。
|
||||
"""
|
||||
import pathlib
|
||||
import sys
|
||||
|
||||
ROOT = pathlib.Path(__file__).resolve().parents[1]
|
||||
|
||||
# ── 简单全文替换(billiards_xxx → xxx)──────────────────────
|
||||
SIMPLE_REPLACEMENTS = {
|
||||
"billiards_ods.": "ods.",
|
||||
"billiards_dwd.": "dwd.",
|
||||
"billiards_dws.": "dws.",
|
||||
"'billiards_ods'": "'ods'",
|
||||
"'billiards_dwd'": "'dwd'",
|
||||
"'billiards_dws'": "'dws'",
|
||||
'"billiards_ods"': '"ods"',
|
||||
'"billiards_dwd"': '"dwd"',
|
||||
'"billiards_dws"': '"dws"',
|
||||
}
|
||||
|
||||
# ── etl_admin SQL schema 引用替换 ──────────────────────────
|
||||
ETL_ADMIN_SQL_REPLACEMENTS = {
|
||||
"etl_admin.etl_task": "meta.etl_task",
|
||||
"etl_admin.etl_cursor": "meta.etl_cursor",
|
||||
"etl_admin.etl_run": "meta.etl_run",
|
||||
"etl_admin.run_tracker": "meta.etl_run",
|
||||
"etl_admin.run_status_enum": "meta.run_status_enum",
|
||||
"'etl_admin'": "'meta'",
|
||||
'"etl_admin"': '"meta"',
|
||||
}
|
||||
|
||||
# ── 需要处理的文件列表 ─────────────────────────────────────
|
||||
FILES_SIMPLE = [
|
||||
# ETL 非测试代码
|
||||
"apps/etl/connectors/feiqiu/tasks/verification/ods_verifier.py",
|
||||
"apps/etl/connectors/feiqiu/tasks/verification/index_verifier.py",
|
||||
"apps/etl/connectors/feiqiu/tasks/utility/manual_ingest_task.py",
|
||||
"apps/etl/connectors/feiqiu/tasks/utility/seed_dws_config_task.py",
|
||||
# ETL 脚本
|
||||
"apps/etl/connectors/feiqiu/scripts/rebuild/rebuild_db_and_run_ods_to_dwd.py",
|
||||
# ETL 集成测试
|
||||
"apps/etl/connectors/feiqiu/tests/integration/test_index_tasks.py",
|
||||
# GUI
|
||||
"gui/workers/db_worker.py",
|
||||
"gui/workers/task_worker.py",
|
||||
"gui/widgets/status_panel.py",
|
||||
"gui/widgets/db_viewer.py",
|
||||
"gui/models/task_registry.py",
|
||||
# 配置
|
||||
".env.template",
|
||||
]
|
||||
|
||||
FILES_ETL_ADMIN = [
|
||||
"apps/etl/connectors/feiqiu/orchestration/cursor_manager.py",
|
||||
"apps/etl/connectors/feiqiu/orchestration/run_tracker.py",
|
||||
"apps/etl/connectors/feiqiu/orchestration/task_executor.py",
|
||||
"apps/etl/connectors/feiqiu/tasks/utility/check_cutoff_task.py",
|
||||
"apps/etl/connectors/feiqiu/scripts/rebuild/rebuild_db_and_run_ods_to_dwd.py",
|
||||
]
|
||||
|
||||
|
||||
def replace_in_file(filepath: pathlib.Path, replacements: dict) -> int:
|
||||
"""在文件中执行替换,返回替换次数。"""
|
||||
if not filepath.exists():
|
||||
return -1
|
||||
text = filepath.read_text(encoding="utf-8")
|
||||
new_text = text
|
||||
total = 0
|
||||
for old, new in replacements.items():
|
||||
count = new_text.count(old)
|
||||
total += count
|
||||
new_text = new_text.replace(old, new)
|
||||
if total > 0:
|
||||
filepath.write_text(new_text, encoding="utf-8")
|
||||
return total
|
||||
|
||||
|
||||
def main():
|
||||
print("=== 批量 schema 重命名 ===\n")
|
||||
|
||||
# 1) 简单替换
|
||||
print("── billiards_xxx → xxx ──")
|
||||
for rel in FILES_SIMPLE:
|
||||
p = ROOT / rel
|
||||
n = replace_in_file(p, SIMPLE_REPLACEMENTS)
|
||||
if n == -1:
|
||||
print(f" SKIP (不存在): {rel}")
|
||||
elif n == 0:
|
||||
print(f" SKIP (无匹配): {rel}")
|
||||
else:
|
||||
print(f" OK: {rel} ({n} 处)")
|
||||
|
||||
# 2) etl_admin SQL 替换
|
||||
print("\n── etl_admin → meta ──")
|
||||
for rel in FILES_ETL_ADMIN:
|
||||
p = ROOT / rel
|
||||
n = replace_in_file(p, ETL_ADMIN_SQL_REPLACEMENTS)
|
||||
if n == -1:
|
||||
print(f" SKIP (不存在): {rel}")
|
||||
elif n == 0:
|
||||
print(f" SKIP (无匹配): {rel}")
|
||||
else:
|
||||
print(f" OK: {rel} ({n} 处)")
|
||||
|
||||
print("\n完成。")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
73
scripts/migrate/fix_remaining.py
Normal file
73
scripts/migrate/fix_remaining.py
Normal file
@@ -0,0 +1,73 @@
|
||||
"""
|
||||
修复并执行之前失败的 DDL/种子:
|
||||
1. etl_feiqiu: app.sql(视图已修复)
|
||||
2. etl_feiqiu: 种子数据(schema 引用已修复)
|
||||
3. zqyy_app: init.sql(BOM 已移除)
|
||||
"""
|
||||
import os
|
||||
import sys
|
||||
import psycopg2
|
||||
|
||||
DB_HOST = "100.64.0.4"
|
||||
DB_PORT = 5432
|
||||
DB_USER = "local-Python"
|
||||
DB_PASSWORD = "Neo-local-1991125"
|
||||
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
|
||||
def execute_sql_file(conn, filepath, label=""):
|
||||
full_path = os.path.join(BASE_DIR, filepath)
|
||||
if not os.path.exists(full_path):
|
||||
print(f" [SKIP] 文件不存在: {filepath}")
|
||||
return False
|
||||
with open(full_path, "r", encoding="utf-8-sig") as f:
|
||||
sql = f.read()
|
||||
if not sql.strip():
|
||||
print(f" [SKIP] 文件为空: {filepath}")
|
||||
return False
|
||||
try:
|
||||
cur = conn.cursor()
|
||||
cur.execute(sql)
|
||||
conn.commit()
|
||||
print(f" [OK] {label or filepath}")
|
||||
return True
|
||||
except Exception as e:
|
||||
conn.rollback()
|
||||
print(f" [FAIL] {label or filepath}: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def main():
|
||||
print("=== 修复 etl_feiqiu 剩余项 ===")
|
||||
conn_etl = psycopg2.connect(
|
||||
host=DB_HOST, port=DB_PORT, user=DB_USER,
|
||||
password=DB_PASSWORD, dbname="etl_feiqiu"
|
||||
)
|
||||
conn_etl.autocommit = False
|
||||
|
||||
# app.sql 视图(已修复列名)
|
||||
execute_sql_file(conn_etl, "db/etl_feiqiu/schemas/app.sql", "app schema(视图已修复)")
|
||||
|
||||
# 种子数据(schema 引用已修复)
|
||||
execute_sql_file(conn_etl, "db/etl_feiqiu/seeds/seed_ods_tasks.sql", "种子:ODS 任务")
|
||||
execute_sql_file(conn_etl, "db/etl_feiqiu/seeds/seed_scheduler_tasks.sql", "种子:调度任务")
|
||||
# seed_dws_config.sql 整体被注释,跳过
|
||||
execute_sql_file(conn_etl, "db/etl_feiqiu/seeds/seed_index_parameters.sql", "种子:指数参数")
|
||||
conn_etl.close()
|
||||
|
||||
print("\n=== 修复 zqyy_app 剩余项 ===")
|
||||
conn_app = psycopg2.connect(
|
||||
host=DB_HOST, port=DB_PORT, user=DB_USER,
|
||||
password=DB_PASSWORD, dbname="zqyy_app"
|
||||
)
|
||||
conn_app.autocommit = False
|
||||
|
||||
# init.sql(BOM 已移除)
|
||||
execute_sql_file(conn_app, "db/zqyy_app/schemas/init.sql", "zqyy_app schema(BOM 已修复)")
|
||||
conn_app.close()
|
||||
|
||||
print("\n完成。")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
74
scripts/migrate/fix_schema_refs.py
Normal file
74
scripts/migrate/fix_schema_refs.py
Normal file
@@ -0,0 +1,74 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""批量替换运行时代码中残留的旧 schema 引用。"""
|
||||
import os
|
||||
|
||||
ROOT = r"C:\NeoZQYY"
|
||||
|
||||
import glob
|
||||
|
||||
# 自动扫描所有运行时 Python 文件(排除 tests 目录)
|
||||
SCAN_DIRS = [
|
||||
"apps/etl/connectors/feiqiu",
|
||||
"apps/backend/app",
|
||||
"gui",
|
||||
]
|
||||
TARGETS = []
|
||||
for d in SCAN_DIRS:
|
||||
full = os.path.join(ROOT, d)
|
||||
for py in glob.glob(os.path.join(full, "**", "*.py"), recursive=True):
|
||||
rel = os.path.relpath(py, ROOT).replace("\\", "/")
|
||||
# 排除测试目录和 hypothesis 缓存
|
||||
if "/tests/" not in rel and "/.hypothesis/" not in rel:
|
||||
TARGETS.append(rel)
|
||||
|
||||
REPLACEMENTS = {
|
||||
"billiards_ods.": "ods.",
|
||||
"billiards_dwd.": "dwd.",
|
||||
"billiards_dws.": "dws.",
|
||||
"'billiards_ods'": "'ods'",
|
||||
"'billiards_dwd'": "'dwd'",
|
||||
"'billiards_dws'": "'dws'",
|
||||
'"billiards_ods"': '"ods"',
|
||||
'"billiards_dwd"': '"dwd"',
|
||||
'"billiards_dws"': '"dws"',
|
||||
# 注释/文档/CLI 中不带点号的引用(空格或行尾结尾)
|
||||
"billiards_ods ": "ods ",
|
||||
"billiards_dwd ": "dwd ",
|
||||
"billiards_dws ": "dws ",
|
||||
"billiards_ods\n": "ods\n",
|
||||
"billiards_dwd\n": "dwd\n",
|
||||
"billiards_dws\n": "dws\n",
|
||||
# 括号包裹
|
||||
"(billiards_ods)": "(ods)",
|
||||
"(billiards_dwd)": "(dwd)",
|
||||
"(billiards_dws)": "(dws)",
|
||||
# 反引号包裹
|
||||
"`billiards_ods`": "`ods`",
|
||||
"`billiards_dwd`": "`dwd`",
|
||||
"`billiards_dws`": "`dws`",
|
||||
}
|
||||
|
||||
total = 0
|
||||
for rel in TARGETS:
|
||||
fp = os.path.join(ROOT, rel)
|
||||
if not os.path.exists(fp):
|
||||
print(f"SKIP (not found): {rel}")
|
||||
continue
|
||||
with open(fp, "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
new_content = content
|
||||
count = 0
|
||||
for old, new in REPLACEMENTS.items():
|
||||
c = new_content.count(old)
|
||||
if c > 0:
|
||||
new_content = new_content.replace(old, new)
|
||||
count += c
|
||||
if count > 0:
|
||||
with open(fp, "w", encoding="utf-8") as f:
|
||||
f.write(new_content)
|
||||
print(f"FIXED ({count} replacements): {rel}")
|
||||
total += count
|
||||
else:
|
||||
print(f"CLEAN: {rel}")
|
||||
|
||||
print(f"\nTotal replacements: {total}")
|
||||
222
scripts/migrate/migrate_data.py
Normal file
222
scripts/migrate/migrate_data.py
Normal file
@@ -0,0 +1,222 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
跨库数据迁移脚本:LLZQ-test → etl_feiqiu
|
||||
从旧 schema (billiards_ods/billiards_dwd/billiards_dws/etl_admin)
|
||||
迁移到新 schema (ods/dwd/dws/meta)
|
||||
|
||||
策略:逐表 SELECT → INSERT,使用 COPY 协议加速大表
|
||||
"""
|
||||
import sys
|
||||
import io
|
||||
import os
|
||||
import psycopg2
|
||||
import psycopg2.extras
|
||||
|
||||
# Windows 控制台 UTF-8 输出
|
||||
if sys.platform == "win32":
|
||||
sys.stdout.reconfigure(encoding="utf-8", errors="replace")
|
||||
sys.stderr.reconfigure(encoding="utf-8", errors="replace")
|
||||
|
||||
DB_HOST = "100.64.0.4"
|
||||
DB_PORT = 5432
|
||||
DB_USER = "local-Python"
|
||||
DB_PASS = "Neo-local-1991125"
|
||||
|
||||
OLD_DB = "LLZQ-test"
|
||||
NEW_DB = "etl_feiqiu"
|
||||
|
||||
# 旧 schema → 新 schema 映射
|
||||
SCHEMA_MAP = {
|
||||
"billiards_ods": "ods",
|
||||
"billiards_dwd": "dwd",
|
||||
"billiards_dws": "dws",
|
||||
"etl_admin": "meta",
|
||||
}
|
||||
|
||||
def get_tables(conn, schema):
|
||||
"""获取指定 schema 下所有用户表(排除物化视图)。"""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""
|
||||
SELECT tablename FROM pg_tables
|
||||
WHERE schemaname = %s
|
||||
ORDER BY tablename
|
||||
""", (schema,))
|
||||
return [r[0] for r in cur.fetchall()]
|
||||
|
||||
def get_columns(conn, schema, table):
|
||||
"""获取表的列名列表(按 ordinal_position 排序)。"""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""
|
||||
SELECT column_name FROM information_schema.columns
|
||||
WHERE table_schema = %s AND table_name = %s
|
||||
ORDER BY ordinal_position
|
||||
""", (schema, table))
|
||||
return [r[0] for r in cur.fetchall()]
|
||||
|
||||
def get_row_count(conn, schema, table):
|
||||
"""精确行数(不用近似值)。"""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(f'SELECT COUNT(*) FROM "{schema}"."{table}"')
|
||||
return cur.fetchone()[0]
|
||||
|
||||
def migrate_table(src_conn, dst_conn, old_schema, new_schema, table, dst_columns):
|
||||
"""使用 COPY 协议迁移单表数据。"""
|
||||
# 获取源表列名
|
||||
src_columns = get_columns(src_conn, old_schema, table)
|
||||
|
||||
# 取交集(按目标表列顺序),处理新旧表列不完全一致的情况
|
||||
common_cols = [c for c in dst_columns if c in src_columns]
|
||||
if not common_cols:
|
||||
print(f" ⚠ 无公共列,跳过")
|
||||
return 0
|
||||
|
||||
cols_sql = ", ".join(f'"{c}"' for c in common_cols)
|
||||
|
||||
# 使用 COPY TO/FROM 通过内存 buffer 传输
|
||||
buf = io.BytesIO()
|
||||
with src_conn.cursor() as src_cur:
|
||||
copy_out_sql = f'COPY (SELECT {cols_sql} FROM "{old_schema}"."{table}") TO STDOUT WITH (FORMAT binary)'
|
||||
src_cur.copy_expert(copy_out_sql, buf)
|
||||
|
||||
buf.seek(0)
|
||||
data_size = buf.getbuffer().nbytes
|
||||
|
||||
if data_size <= 11: # binary COPY 空数据的 header+trailer 约 11 字节
|
||||
return 0
|
||||
|
||||
with dst_conn.cursor() as dst_cur:
|
||||
copy_in_sql = f'COPY "{new_schema}"."{table}" ({cols_sql}) FROM STDIN WITH (FORMAT binary)'
|
||||
dst_cur.copy_expert(copy_in_sql, buf)
|
||||
|
||||
dst_conn.commit()
|
||||
|
||||
# 返回迁移后行数
|
||||
return get_row_count(dst_conn, new_schema, table)
|
||||
|
||||
|
||||
def migrate_indexes(src_conn, dst_conn, old_schema, new_schema):
|
||||
"""迁移用户自定义索引(排除主键/唯一约束自动索引)。"""
|
||||
with src_conn.cursor() as cur:
|
||||
cur.execute("""
|
||||
SELECT indexname, indexdef
|
||||
FROM pg_indexes
|
||||
WHERE schemaname = %s
|
||||
AND indexname NOT IN (
|
||||
SELECT conname FROM pg_constraint
|
||||
WHERE connamespace = (SELECT oid FROM pg_namespace WHERE nspname = %s)
|
||||
)
|
||||
ORDER BY indexname
|
||||
""", (old_schema, old_schema))
|
||||
indexes = cur.fetchall()
|
||||
|
||||
created = 0
|
||||
for idx_name, idx_def in indexes:
|
||||
# 替换 schema 名
|
||||
new_def = idx_def.replace(f'"{old_schema}"', f'"{new_schema}"')
|
||||
new_def = new_def.replace(f'{old_schema}.', f'{new_schema}.')
|
||||
# 添加 IF NOT EXISTS
|
||||
new_def = new_def.replace("CREATE INDEX", "CREATE INDEX IF NOT EXISTS", 1)
|
||||
new_def = new_def.replace("CREATE UNIQUE INDEX", "CREATE UNIQUE INDEX IF NOT EXISTS", 1)
|
||||
|
||||
try:
|
||||
with dst_conn.cursor() as dst_cur:
|
||||
dst_cur.execute(new_def)
|
||||
dst_conn.commit()
|
||||
created += 1
|
||||
except Exception as e:
|
||||
dst_conn.rollback()
|
||||
print(f" ⚠ 索引 {idx_name} 创建失败: {e}")
|
||||
|
||||
return created, len(indexes)
|
||||
|
||||
|
||||
def main():
|
||||
print("=" * 60)
|
||||
print("数据迁移: LLZQ-test → etl_feiqiu")
|
||||
print("=" * 60)
|
||||
|
||||
src_conn = psycopg2.connect(
|
||||
host=DB_HOST, port=DB_PORT, dbname=OLD_DB,
|
||||
user=DB_USER, password=DB_PASS,
|
||||
options="-c client_encoding=UTF8"
|
||||
)
|
||||
dst_conn = psycopg2.connect(
|
||||
host=DB_HOST, port=DB_PORT, dbname=NEW_DB,
|
||||
user=DB_USER, password=DB_PASS,
|
||||
options="-c client_encoding=UTF8"
|
||||
)
|
||||
src_conn.autocommit = False
|
||||
dst_conn.autocommit = False
|
||||
|
||||
total_rows = 0
|
||||
total_tables = 0
|
||||
total_indexes_created = 0
|
||||
|
||||
for old_schema, new_schema in SCHEMA_MAP.items():
|
||||
print(f"\n{'─' * 50}")
|
||||
print(f"Schema: {old_schema} → {new_schema}")
|
||||
print(f"{'─' * 50}")
|
||||
|
||||
tables = get_tables(src_conn, old_schema)
|
||||
print(f"源表数量: {len(tables)}")
|
||||
|
||||
for table in tables:
|
||||
src_count = get_row_count(src_conn, old_schema, table)
|
||||
if src_count == 0:
|
||||
print(f" {table}: 0 行,跳过")
|
||||
continue
|
||||
|
||||
# 检查目标表是否存在
|
||||
dst_columns = get_columns(dst_conn, new_schema, table)
|
||||
if not dst_columns:
|
||||
print(f" ⚠ {table}: 目标表不存在,跳过")
|
||||
continue
|
||||
|
||||
# 检查目标表是否已有数据
|
||||
dst_count = get_row_count(dst_conn, new_schema, table)
|
||||
if dst_count > 0 and dst_count >= src_count:
|
||||
print(f" {table}: 目标已有 {dst_count} 行 (源 {src_count}),跳过")
|
||||
total_rows += dst_count
|
||||
total_tables += 1
|
||||
continue
|
||||
elif dst_count > 0 and dst_count < src_count:
|
||||
# 部分迁移,先清空再重导
|
||||
print(f" {table}: 目标有 {dst_count} 行 < 源 {src_count} 行,清空后重导...")
|
||||
with dst_conn.cursor() as dst_cur:
|
||||
dst_cur.execute(f'TRUNCATE "{new_schema}"."{table}" CASCADE')
|
||||
dst_conn.commit()
|
||||
|
||||
try:
|
||||
migrated = migrate_table(src_conn, dst_conn, old_schema, new_schema, table, dst_columns)
|
||||
print(f" {table}: {src_count} → {migrated} 行 ✓")
|
||||
total_rows += migrated
|
||||
total_tables += 1
|
||||
except Exception as e:
|
||||
dst_conn.rollback()
|
||||
print(f" ✗ {table}: 迁移失败 - {e}")
|
||||
|
||||
# 迁移索引
|
||||
print(f"\n 迁移索引 {old_schema} → {new_schema} ...")
|
||||
created, total_idx = migrate_indexes(src_conn, dst_conn, old_schema, new_schema)
|
||||
total_indexes_created += created
|
||||
print(f" 索引: {created}/{total_idx} 创建成功")
|
||||
|
||||
# 在新库执行 ANALYZE
|
||||
print(f"\n{'─' * 50}")
|
||||
print("执行 ANALYZE ...")
|
||||
dst_conn.autocommit = True
|
||||
with dst_conn.cursor() as cur:
|
||||
for new_schema in SCHEMA_MAP.values():
|
||||
cur.execute(f"ANALYZE {new_schema}") # 不能用引号
|
||||
print("ANALYZE 完成")
|
||||
|
||||
print(f"\n{'=' * 60}")
|
||||
print(f"迁移完成: {total_tables} 表, {total_rows} 行, {total_indexes_created} 索引")
|
||||
print(f"{'=' * 60}")
|
||||
|
||||
src_conn.close()
|
||||
dst_conn.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
277
scripts/migrate/migrate_finalize.py
Normal file
277
scripts/migrate/migrate_finalize.py
Normal file
@@ -0,0 +1,277 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
迁移收尾脚本:物化视图创建 + 索引 + ANALYZE + 最终验证
|
||||
在新库 etl_feiqiu 上完成旧库 LLZQ-test 迁移的最后步骤。
|
||||
"""
|
||||
import sys
|
||||
import psycopg2
|
||||
|
||||
if sys.platform == "win32":
|
||||
sys.stdout.reconfigure(encoding="utf-8", errors="replace")
|
||||
sys.stderr.reconfigure(encoding="utf-8", errors="replace")
|
||||
|
||||
DB_HOST = "100.64.0.4"
|
||||
DB_PORT = 5432
|
||||
DB_USER = "local-Python"
|
||||
DB_PASS = "Neo-local-1991125"
|
||||
OLD_DB = "LLZQ-test"
|
||||
NEW_DB = "etl_feiqiu"
|
||||
|
||||
SCHEMA_MAP = {
|
||||
"billiards_ods": "ods",
|
||||
"billiards_dwd": "dwd",
|
||||
"billiards_dws": "dws",
|
||||
"etl_admin": "meta",
|
||||
}
|
||||
|
||||
# 物化视图定义(从旧库提取,schema 已替换为 dws)
|
||||
MATVIEWS = [
|
||||
("mv_dws_assistant_daily_detail_l1",
|
||||
"""CREATE MATERIALIZED VIEW dws.mv_dws_assistant_daily_detail_l1 AS
|
||||
SELECT * FROM dws.dws_assistant_daily_detail
|
||||
WHERE stat_date >= (CURRENT_DATE - '1 day'::interval)
|
||||
WITH DATA"""),
|
||||
("mv_dws_assistant_daily_detail_l2",
|
||||
"""CREATE MATERIALIZED VIEW dws.mv_dws_assistant_daily_detail_l2 AS
|
||||
SELECT * FROM dws.dws_assistant_daily_detail
|
||||
WHERE stat_date >= (CURRENT_DATE - '30 days'::interval)
|
||||
WITH DATA"""),
|
||||
("mv_dws_assistant_daily_detail_l3",
|
||||
"""CREATE MATERIALIZED VIEW dws.mv_dws_assistant_daily_detail_l3 AS
|
||||
SELECT * FROM dws.dws_assistant_daily_detail
|
||||
WHERE stat_date >= (CURRENT_DATE - '90 days'::interval)
|
||||
WITH DATA"""),
|
||||
("mv_dws_assistant_daily_detail_l4",
|
||||
"""CREATE MATERIALIZED VIEW dws.mv_dws_assistant_daily_detail_l4 AS
|
||||
SELECT * FROM dws.dws_assistant_daily_detail
|
||||
WHERE stat_date >= (date_trunc('month', CURRENT_DATE::timestamp with time zone) - '6 mons'::interval)
|
||||
AND stat_date < date_trunc('month', CURRENT_DATE::timestamp with time zone)
|
||||
WITH DATA"""),
|
||||
("mv_dws_finance_daily_summary_l1",
|
||||
"""CREATE MATERIALIZED VIEW dws.mv_dws_finance_daily_summary_l1 AS
|
||||
SELECT * FROM dws.dws_finance_daily_summary
|
||||
WHERE stat_date >= (CURRENT_DATE - '1 day'::interval)
|
||||
WITH DATA"""),
|
||||
("mv_dws_finance_daily_summary_l2",
|
||||
"""CREATE MATERIALIZED VIEW dws.mv_dws_finance_daily_summary_l2 AS
|
||||
SELECT * FROM dws.dws_finance_daily_summary
|
||||
WHERE stat_date >= (CURRENT_DATE - '30 days'::interval)
|
||||
WITH DATA"""),
|
||||
("mv_dws_finance_daily_summary_l3",
|
||||
"""CREATE MATERIALIZED VIEW dws.mv_dws_finance_daily_summary_l3 AS
|
||||
SELECT * FROM dws.dws_finance_daily_summary
|
||||
WHERE stat_date >= (CURRENT_DATE - '90 days'::interval)
|
||||
WITH DATA"""),
|
||||
("mv_dws_finance_daily_summary_l4",
|
||||
"""CREATE MATERIALIZED VIEW dws.mv_dws_finance_daily_summary_l4 AS
|
||||
SELECT * FROM dws.dws_finance_daily_summary
|
||||
WHERE stat_date >= (date_trunc('month', CURRENT_DATE::timestamp with time zone) - '6 mons'::interval)
|
||||
AND stat_date < date_trunc('month', CURRENT_DATE::timestamp with time zone)
|
||||
WITH DATA"""),
|
||||
]
|
||||
|
||||
# 物化视图索引
|
||||
MV_INDEXES = [
|
||||
"CREATE INDEX IF NOT EXISTS idx_mv_assistant_daily_l1 ON dws.mv_dws_assistant_daily_detail_l1 USING btree (site_id, stat_date, assistant_id)",
|
||||
"CREATE INDEX IF NOT EXISTS idx_mv_assistant_daily_l2 ON dws.mv_dws_assistant_daily_detail_l2 USING btree (site_id, stat_date, assistant_id)",
|
||||
"CREATE INDEX IF NOT EXISTS idx_mv_assistant_daily_l3 ON dws.mv_dws_assistant_daily_detail_l3 USING btree (site_id, stat_date, assistant_id)",
|
||||
"CREATE INDEX IF NOT EXISTS idx_mv_assistant_daily_l4 ON dws.mv_dws_assistant_daily_detail_l4 USING btree (site_id, stat_date, assistant_id)",
|
||||
"CREATE INDEX IF NOT EXISTS idx_mv_finance_daily_l1 ON dws.mv_dws_finance_daily_summary_l1 USING btree (site_id, stat_date)",
|
||||
"CREATE INDEX IF NOT EXISTS idx_mv_finance_daily_l2 ON dws.mv_dws_finance_daily_summary_l2 USING btree (site_id, stat_date)",
|
||||
"CREATE INDEX IF NOT EXISTS idx_mv_finance_daily_l3 ON dws.mv_dws_finance_daily_summary_l3 USING btree (site_id, stat_date)",
|
||||
"CREATE INDEX IF NOT EXISTS idx_mv_finance_daily_l4 ON dws.mv_dws_finance_daily_summary_l4 USING btree (site_id, stat_date)",
|
||||
]
|
||||
|
||||
|
||||
def count_rows(conn, schema, table):
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(f'SELECT COUNT(*) FROM "{schema}"."{table}"')
|
||||
return cur.fetchone()[0]
|
||||
|
||||
|
||||
def step1_create_matviews(conn):
|
||||
"""创建 8 个物化视图。"""
|
||||
print("=" * 60)
|
||||
print("步骤 1: 创建物化视图")
|
||||
print("=" * 60)
|
||||
ok = 0
|
||||
for name, ddl in MATVIEWS:
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
# 先检查是否已存在
|
||||
cur.execute("""
|
||||
SELECT 1 FROM pg_matviews
|
||||
WHERE schemaname = 'dws' AND matviewname = %s
|
||||
""", (name,))
|
||||
if cur.fetchone():
|
||||
print(f" {name}: 已存在,跳过")
|
||||
ok += 1
|
||||
continue
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(ddl)
|
||||
conn.commit()
|
||||
rows = count_rows(conn, "dws", name)
|
||||
print(f" {name}: 创建成功 ({rows} 行)")
|
||||
ok += 1
|
||||
except Exception as e:
|
||||
conn.rollback()
|
||||
print(f" {name}: 创建失败 - {e}")
|
||||
print(f"物化视图: {ok}/{len(MATVIEWS)} 成功\n")
|
||||
return ok
|
||||
|
||||
|
||||
def step2_create_mv_indexes(conn):
|
||||
"""创建物化视图索引。"""
|
||||
print("=" * 60)
|
||||
print("步骤 2: 创建物化视图索引")
|
||||
print("=" * 60)
|
||||
ok = 0
|
||||
for idx_sql in MV_INDEXES:
|
||||
idx_name = idx_sql.split("IF NOT EXISTS ")[1].split(" ON ")[0]
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(idx_sql)
|
||||
conn.commit()
|
||||
print(f" {idx_name}: OK")
|
||||
ok += 1
|
||||
except Exception as e:
|
||||
conn.rollback()
|
||||
print(f" {idx_name}: 失败 - {e}")
|
||||
print(f"索引: {ok}/{len(MV_INDEXES)} 成功\n")
|
||||
return ok
|
||||
|
||||
|
||||
def step3_analyze(conn):
|
||||
"""对所有 schema 执行 ANALYZE。"""
|
||||
print("=" * 60)
|
||||
print("步骤 3: ANALYZE")
|
||||
print("=" * 60)
|
||||
# 关键:必须在 autocommit 模式下执行
|
||||
old_autocommit = conn.autocommit
|
||||
conn.autocommit = True
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
for schema in ["ods", "dwd", "dws", "meta", "core", "app"]:
|
||||
# 获取该 schema 下所有表
|
||||
cur.execute("""
|
||||
SELECT tablename FROM pg_tables WHERE schemaname = %s
|
||||
UNION ALL
|
||||
SELECT matviewname FROM pg_matviews WHERE schemaname = %s
|
||||
""", (schema, schema))
|
||||
tables = [r[0] for r in cur.fetchall()]
|
||||
for t in tables:
|
||||
cur.execute(f'ANALYZE "{schema}"."{t}"')
|
||||
print(f" {schema}: {len(tables)} 个对象已 ANALYZE")
|
||||
print("ANALYZE 完成\n")
|
||||
finally:
|
||||
conn.autocommit = old_autocommit
|
||||
|
||||
|
||||
def step4_verify(src_conn, dst_conn):
|
||||
"""最终验证:对比所有有数据表的行数。"""
|
||||
print("=" * 60)
|
||||
print("步骤 4: 最终验证")
|
||||
print("=" * 60)
|
||||
all_ok = True
|
||||
total_tables = 0
|
||||
total_rows = 0
|
||||
|
||||
for old_s, new_s in SCHEMA_MAP.items():
|
||||
with src_conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"SELECT tablename FROM pg_tables WHERE schemaname = %s ORDER BY tablename",
|
||||
(old_s,))
|
||||
tables = [r[0] for r in cur.fetchall()]
|
||||
|
||||
for t in tables:
|
||||
s_cnt = count_rows(src_conn, old_s, t)
|
||||
if s_cnt == 0:
|
||||
continue
|
||||
|
||||
# 检查目标表是否存在
|
||||
with dst_conn.cursor() as cur:
|
||||
cur.execute("""
|
||||
SELECT 1 FROM information_schema.tables
|
||||
WHERE table_schema = %s AND table_name = %s
|
||||
""", (new_s, t))
|
||||
if not cur.fetchone():
|
||||
print(f" MISS {new_s}.{t}: 目标表不存在")
|
||||
all_ok = False
|
||||
continue
|
||||
|
||||
d_cnt = count_rows(dst_conn, new_s, t)
|
||||
total_tables += 1
|
||||
total_rows += d_cnt
|
||||
|
||||
if d_cnt == s_cnt:
|
||||
print(f" OK {new_s}.{t}: {s_cnt} 行")
|
||||
elif new_s == "meta" and t == "etl_task" and d_cnt > s_cnt:
|
||||
# 新库种子数据多几条,正常
|
||||
print(f" OK* {new_s}.{t}: 源={s_cnt} 目标={d_cnt} (种子数据)")
|
||||
else:
|
||||
print(f" FAIL {new_s}.{t}: 源={s_cnt} 目标={d_cnt}")
|
||||
all_ok = False
|
||||
|
||||
# 验证物化视图存在
|
||||
print(f"\n 物化视图检查:")
|
||||
with dst_conn.cursor() as cur:
|
||||
cur.execute("SELECT matviewname FROM pg_matviews WHERE schemaname = 'dws' ORDER BY matviewname")
|
||||
mvs = [r[0] for r in cur.fetchall()]
|
||||
for mv_name, _ in MATVIEWS:
|
||||
if mv_name in mvs:
|
||||
rows = count_rows(dst_conn, "dws", mv_name)
|
||||
print(f" OK dws.{mv_name}: {rows} 行")
|
||||
else:
|
||||
print(f" MISS dws.{mv_name}")
|
||||
all_ok = False
|
||||
|
||||
# 验证索引数量
|
||||
print(f"\n 索引统计:")
|
||||
with dst_conn.cursor() as cur:
|
||||
for schema in ["ods", "dwd", "dws", "meta"]:
|
||||
cur.execute(
|
||||
"SELECT COUNT(*) FROM pg_indexes WHERE schemaname = %s",
|
||||
(schema,))
|
||||
idx_cnt = cur.fetchone()[0]
|
||||
print(f" {schema}: {idx_cnt} 个索引")
|
||||
|
||||
print(f"\n{'=' * 60}")
|
||||
if all_ok:
|
||||
print(f"验证通过: {total_tables} 表, {total_rows} 行全部一致")
|
||||
else:
|
||||
print("验证发现不一致,请检查上方 FAIL/MISS 项")
|
||||
print(f"{'=' * 60}")
|
||||
return all_ok
|
||||
|
||||
|
||||
def main():
|
||||
# 连接新库
|
||||
dst = psycopg2.connect(
|
||||
host=DB_HOST, port=DB_PORT, dbname=NEW_DB,
|
||||
user=DB_USER, password=DB_PASS,
|
||||
options="-c client_encoding=UTF8"
|
||||
)
|
||||
|
||||
# 步骤 1: 物化视图
|
||||
step1_create_matviews(dst)
|
||||
|
||||
# 步骤 2: 物化视图索引
|
||||
step2_create_mv_indexes(dst)
|
||||
|
||||
# 步骤 3: ANALYZE
|
||||
step3_analyze(dst)
|
||||
|
||||
# 步骤 4: 验证(需要连接旧库对比)
|
||||
src = psycopg2.connect(
|
||||
host=DB_HOST, port=DB_PORT, dbname=OLD_DB,
|
||||
user=DB_USER, password=DB_PASS,
|
||||
options="-c client_encoding=UTF8"
|
||||
)
|
||||
step4_verify(src, dst)
|
||||
|
||||
src.close()
|
||||
dst.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
163
scripts/migrate/migrate_fix_remaining.py
Normal file
163
scripts/migrate/migrate_fix_remaining.py
Normal file
@@ -0,0 +1,163 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""修复迁移中因部分导入导致的重复键问题:先 TRUNCATE 再重新 COPY。"""
|
||||
import sys
|
||||
import io
|
||||
import psycopg2
|
||||
|
||||
if sys.platform == "win32":
|
||||
sys.stdout.reconfigure(encoding="utf-8", errors="replace")
|
||||
sys.stderr.reconfigure(encoding="utf-8", errors="replace")
|
||||
|
||||
DB_HOST = "100.64.0.4"
|
||||
DB_PORT = 5432
|
||||
DB_USER = "local-Python"
|
||||
DB_PASS = "Neo-local-1991125"
|
||||
OLD_DB = "LLZQ-test"
|
||||
NEW_DB = "etl_feiqiu"
|
||||
|
||||
SCHEMA_MAP = {
|
||||
"billiards_ods": "ods",
|
||||
"billiards_dwd": "dwd",
|
||||
"billiards_dws": "dws",
|
||||
"etl_admin": "meta",
|
||||
}
|
||||
|
||||
def get_columns(conn, schema, table):
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""
|
||||
SELECT column_name FROM information_schema.columns
|
||||
WHERE table_schema = %s AND table_name = %s
|
||||
ORDER BY ordinal_position
|
||||
""", (schema, table))
|
||||
return [r[0] for r in cur.fetchall()]
|
||||
|
||||
def count_rows(conn, schema, table):
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(f'SELECT COUNT(*) FROM "{schema}"."{table}"')
|
||||
return cur.fetchone()[0]
|
||||
|
||||
def main():
|
||||
src = psycopg2.connect(host=DB_HOST, port=DB_PORT, dbname=OLD_DB, user=DB_USER, password=DB_PASS,
|
||||
options="-c client_encoding=UTF8")
|
||||
dst = psycopg2.connect(host=DB_HOST, port=DB_PORT, dbname=NEW_DB, user=DB_USER, password=DB_PASS,
|
||||
options="-c client_encoding=UTF8")
|
||||
|
||||
mismatched = []
|
||||
for old_s, new_s in SCHEMA_MAP.items():
|
||||
with src.cursor() as cur:
|
||||
cur.execute("SELECT tablename FROM pg_tables WHERE schemaname = %s ORDER BY tablename", (old_s,))
|
||||
tables = [r[0] for r in cur.fetchall()]
|
||||
for t in tables:
|
||||
s_cnt = count_rows(src, old_s, t)
|
||||
dst_cols = get_columns(dst, new_s, t)
|
||||
if not dst_cols:
|
||||
continue
|
||||
d_cnt = count_rows(dst, new_s, t)
|
||||
if s_cnt > 0 and d_cnt != s_cnt:
|
||||
mismatched.append((old_s, new_s, t, s_cnt, d_cnt))
|
||||
|
||||
if not mismatched:
|
||||
print("所有表行数一致,无需修复。")
|
||||
# 继续检查索引和 ANALYZE
|
||||
else:
|
||||
print(f"发现 {len(mismatched)} 个不一致表:")
|
||||
for old_s, new_s, t, s_cnt, d_cnt in mismatched:
|
||||
print(f" {old_s}.{t}: 源={s_cnt} 目标={d_cnt}")
|
||||
|
||||
for old_s, new_s, t, s_cnt, d_cnt in mismatched:
|
||||
print(f"\n修复 {new_s}.{t} ...")
|
||||
src_cols = get_columns(src, old_s, t)
|
||||
dst_cols = get_columns(dst, new_s, t)
|
||||
common = [c for c in dst_cols if c in src_cols]
|
||||
cols_sql = ", ".join(f'"{c}"' for c in common)
|
||||
|
||||
# TRUNCATE
|
||||
with dst.cursor() as cur:
|
||||
cur.execute(f'TRUNCATE "{new_s}"."{t}" CASCADE')
|
||||
dst.commit()
|
||||
print(f" TRUNCATE 完成")
|
||||
|
||||
# COPY
|
||||
buf = io.BytesIO()
|
||||
with src.cursor() as cur:
|
||||
cur.copy_expert(
|
||||
f'COPY (SELECT {cols_sql} FROM "{old_s}"."{t}") TO STDOUT WITH (FORMAT binary)', buf)
|
||||
buf.seek(0)
|
||||
with dst.cursor() as cur:
|
||||
cur.copy_expert(
|
||||
f'COPY "{new_s}"."{t}" ({cols_sql}) FROM STDIN WITH (FORMAT binary)', buf)
|
||||
dst.commit()
|
||||
|
||||
final = count_rows(dst, new_s, t)
|
||||
status = "OK" if final == s_cnt else "MISMATCH"
|
||||
print(f" 导入完成: {final} 行 ({status})")
|
||||
|
||||
# 迁移索引
|
||||
print("\n迁移索引...")
|
||||
idx_total = 0
|
||||
for old_s, new_s in SCHEMA_MAP.items():
|
||||
with src.cursor() as cur:
|
||||
cur.execute("""
|
||||
SELECT indexname, indexdef FROM pg_indexes
|
||||
WHERE schemaname = %s
|
||||
AND indexname NOT IN (
|
||||
SELECT conname FROM pg_constraint
|
||||
WHERE connamespace = (SELECT oid FROM pg_namespace WHERE nspname = %s))
|
||||
ORDER BY indexname
|
||||
""", (old_s, old_s))
|
||||
indexes = cur.fetchall()
|
||||
|
||||
created = 0
|
||||
for idx_name, idx_def in indexes:
|
||||
new_def = idx_def.replace(f'"{old_s}"', f'"{new_s}"').replace(f'{old_s}.', f'{new_s}.')
|
||||
new_def = new_def.replace("CREATE INDEX", "CREATE INDEX IF NOT EXISTS", 1)
|
||||
new_def = new_def.replace("CREATE UNIQUE INDEX", "CREATE UNIQUE INDEX IF NOT EXISTS", 1)
|
||||
try:
|
||||
with dst.cursor() as cur:
|
||||
cur.execute(new_def)
|
||||
dst.commit()
|
||||
created += 1
|
||||
except Exception as e:
|
||||
dst.rollback()
|
||||
print(f" 索引失败 {idx_name}: {e}")
|
||||
idx_total += created
|
||||
print(f" {old_s} -> {new_s}: {created}/{len(indexes)} 索引")
|
||||
|
||||
# ANALYZE
|
||||
print("\n执行 ANALYZE...")
|
||||
dst.autocommit = True
|
||||
with dst.cursor() as cur:
|
||||
for new_s in SCHEMA_MAP.values():
|
||||
tables = get_columns(dst, new_s, "") # dummy
|
||||
cur.execute(f"ANALYZE")
|
||||
print("ANALYZE 完成")
|
||||
|
||||
# 最终验证
|
||||
print("\n最终验证:")
|
||||
all_ok = True
|
||||
for old_s, new_s in SCHEMA_MAP.items():
|
||||
with src.cursor() as cur:
|
||||
cur.execute("SELECT tablename FROM pg_tables WHERE schemaname = %s ORDER BY tablename", (old_s,))
|
||||
tables = [r[0] for r in cur.fetchall()]
|
||||
for t in tables:
|
||||
s_cnt = count_rows(src, old_s, t)
|
||||
if s_cnt == 0:
|
||||
continue
|
||||
dst_cols = get_columns(dst, new_s, t)
|
||||
if not dst_cols:
|
||||
print(f" MISS {new_s}.{t}: 目标表不存在")
|
||||
all_ok = False
|
||||
continue
|
||||
d_cnt = count_rows(dst, new_s, t)
|
||||
if d_cnt != s_cnt:
|
||||
print(f" FAIL {new_s}.{t}: 源={s_cnt} 目标={d_cnt}")
|
||||
all_ok = False
|
||||
|
||||
if all_ok:
|
||||
print(" 全部一致 OK")
|
||||
|
||||
src.close()
|
||||
dst.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user