在前后端开发联调前 的提交20260223
This commit is contained in:
86
scripts/ops/fix_bc_dates.py
Normal file
86
scripts/ops/fix_bc_dates.py
Normal file
@@ -0,0 +1,86 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""BUG 12 存量修复:扫描 DWD 所有表的 timestamptz 列,将 BC 日期(< 0002-01-01)修复为 NULL。
|
||||
|
||||
根因:上游 API 用 0001-01-01T00:00:00 表示"未设置",ODS 存为 timestamp,
|
||||
DWD 隐式转为 timestamptz 时在 Asia/Shanghai 时区下变成 BC 日期,
|
||||
psycopg2 无法解析导致 fetchall() 崩溃。
|
||||
"""
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(Path(__file__).resolve().parents[2] / ".env")
|
||||
|
||||
PG_DSN = os.environ.get("PG_DSN")
|
||||
if not PG_DSN:
|
||||
print("ERROR: PG_DSN 未配置", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
|
||||
SENTINEL = "0002-01-01"
|
||||
SCHEMA = "dwd"
|
||||
|
||||
def main():
|
||||
conn = psycopg2.connect(PG_DSN)
|
||||
conn.autocommit = False
|
||||
try:
|
||||
with conn.cursor(cursor_factory=RealDictCursor) as cur:
|
||||
# 查找所有 timestamptz 列
|
||||
cur.execute("""
|
||||
SELECT t.table_name, c.column_name
|
||||
FROM information_schema.tables t
|
||||
JOIN information_schema.columns c
|
||||
ON t.table_schema = c.table_schema AND t.table_name = c.table_name
|
||||
WHERE t.table_schema = %s
|
||||
AND t.table_type = 'BASE TABLE'
|
||||
AND c.data_type = 'timestamp with time zone'
|
||||
ORDER BY t.table_name, c.ordinal_position
|
||||
""", (SCHEMA,))
|
||||
cols = cur.fetchall()
|
||||
|
||||
total_fixed = 0
|
||||
for row in cols:
|
||||
tbl = row["table_name"]
|
||||
col = row["column_name"]
|
||||
# psycopg2 执行含 BC 日期的 UPDATE 可能在内部触发解析错误,
|
||||
# 用 server-side DO 块绕过客户端解析
|
||||
sql = (
|
||||
f'UPDATE "{SCHEMA}"."{tbl}" '
|
||||
f'SET "{col}" = NULL '
|
||||
f"WHERE EXTRACT(year FROM \"{col}\") < 1"
|
||||
)
|
||||
try:
|
||||
cur.execute(sql)
|
||||
cnt = cur.rowcount
|
||||
if cnt > 0:
|
||||
print(f" FIXED: {SCHEMA}.{tbl}.{col} — {cnt} 行")
|
||||
total_fixed += cnt
|
||||
except Exception as e:
|
||||
conn.rollback()
|
||||
# 回退后用 text cast 方式重试
|
||||
print(f" WARN: {SCHEMA}.{tbl}.{col} — EXTRACT 失败({e}),用 text 方式重试")
|
||||
sql2 = (
|
||||
f'UPDATE "{SCHEMA}"."{tbl}" '
|
||||
f'SET "{col}" = NULL '
|
||||
f"WHERE \"{col}\"::text LIKE '%BC%'"
|
||||
)
|
||||
cur.execute(sql2)
|
||||
cnt = cur.rowcount
|
||||
if cnt > 0:
|
||||
print(f" FIXED: {SCHEMA}.{tbl}.{col} — {cnt} 行 (text 方式)")
|
||||
total_fixed += cnt
|
||||
|
||||
conn.commit()
|
||||
print(f"\n完成:共修复 {total_fixed} 行")
|
||||
|
||||
except Exception:
|
||||
conn.rollback()
|
||||
raise
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user