代码迁移
This commit is contained in:
0
etl_billiards/database/__init__.py
Normal file
0
etl_billiards/database/__init__.py
Normal file
112
etl_billiards/database/base.py
Normal file
112
etl_billiards/database/base.py
Normal file
@@ -0,0 +1,112 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
数据库操作(批量、RETURNING支持)
|
||||
"""
|
||||
import re
|
||||
from typing import List, Dict, Tuple
|
||||
import psycopg2.extras
|
||||
from .connection import DatabaseConnection
|
||||
|
||||
|
||||
class DatabaseOperations(DatabaseConnection):
|
||||
"""扩展数据库操作(包含批量upsert和returning支持)"""
|
||||
|
||||
def batch_execute(self, sql: str, rows: List[Dict], page_size: int = 1000):
|
||||
"""批量执行SQL(不带RETURNING)"""
|
||||
if not rows:
|
||||
return
|
||||
with self.conn.cursor() as c:
|
||||
psycopg2.extras.execute_batch(c, sql, rows, page_size=page_size)
|
||||
|
||||
def batch_upsert_with_returning(self, sql: str, rows: List[Dict], page_size: int = 1000) -> Tuple[int, int]:
|
||||
"""
|
||||
批量 UPSERT 并统计插入/更新数
|
||||
|
||||
Args:
|
||||
sql: 包含RETURNING子句的SQL
|
||||
rows: 数据行列表
|
||||
page_size: 批次大小
|
||||
|
||||
Returns:
|
||||
(inserted_count, updated_count) 元组
|
||||
"""
|
||||
if not rows:
|
||||
return (0, 0)
|
||||
|
||||
use_returning = "RETURNING" in sql.upper()
|
||||
|
||||
with self.conn.cursor() as c:
|
||||
if not use_returning:
|
||||
psycopg2.extras.execute_batch(c, sql, rows, page_size=page_size)
|
||||
return (0, 0)
|
||||
|
||||
# 优先尝试向量化执行
|
||||
try:
|
||||
inserted, updated = self._execute_with_returning_vectorized(c, sql, rows, page_size)
|
||||
return (inserted, updated)
|
||||
except Exception:
|
||||
# 回退到逐行执行
|
||||
return self._execute_with_returning_row_by_row(c, sql, rows)
|
||||
|
||||
def _execute_with_returning_vectorized(self, cursor, sql: str, rows: List[Dict], page_size: int) -> Tuple[int, int]:
|
||||
"""向量化执行(使用execute_values)"""
|
||||
# 解析VALUES子句
|
||||
m = re.search(r"VALUES\s*\((.*?)\)", sql, flags=re.IGNORECASE | re.DOTALL)
|
||||
if not m:
|
||||
raise ValueError("Cannot parse VALUES clause")
|
||||
|
||||
tpl = "(" + m.group(1) + ")"
|
||||
base_sql = sql[:m.start()] + "VALUES %s" + sql[m.end():]
|
||||
|
||||
ret = psycopg2.extras.execute_values(
|
||||
cursor, base_sql, rows, template=tpl, page_size=page_size, fetch=True
|
||||
)
|
||||
|
||||
if not ret:
|
||||
return (0, 0)
|
||||
|
||||
inserted = 0
|
||||
for rec in ret:
|
||||
flag = self._extract_inserted_flag(rec)
|
||||
if flag:
|
||||
inserted += 1
|
||||
|
||||
return (inserted, len(ret) - inserted)
|
||||
|
||||
def _execute_with_returning_row_by_row(self, cursor, sql: str, rows: List[Dict]) -> Tuple[int, int]:
|
||||
"""逐行执行(回退方案)"""
|
||||
inserted = 0
|
||||
updated = 0
|
||||
|
||||
for r in rows:
|
||||
cursor.execute(sql, r)
|
||||
try:
|
||||
rec = cursor.fetchone()
|
||||
except Exception:
|
||||
rec = None
|
||||
|
||||
flag = self._extract_inserted_flag(rec) if rec else None
|
||||
|
||||
if flag:
|
||||
inserted += 1
|
||||
else:
|
||||
updated += 1
|
||||
|
||||
return (inserted, updated)
|
||||
|
||||
@staticmethod
|
||||
def _extract_inserted_flag(rec) -> bool:
|
||||
"""从返回记录中提取inserted标志"""
|
||||
if isinstance(rec, tuple):
|
||||
return bool(rec[0])
|
||||
elif isinstance(rec, dict):
|
||||
return bool(rec.get("inserted"))
|
||||
else:
|
||||
try:
|
||||
return bool(rec["inserted"])
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
# 为了向后兼容,提供Pg别名
|
||||
Pg = DatabaseOperations
|
||||
50
etl_billiards/database/connection.py
Normal file
50
etl_billiards/database/connection.py
Normal file
@@ -0,0 +1,50 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""数据库连接管理"""
|
||||
import psycopg2
|
||||
import psycopg2.extras
|
||||
|
||||
class DatabaseConnection:
|
||||
"""数据库连接管理器"""
|
||||
|
||||
def __init__(self, dsn: str, session: dict = None, connect_timeout: int = None):
|
||||
self.conn = psycopg2.connect(dsn, connect_timeout=(connect_timeout or 5))
|
||||
self.conn.autocommit = False
|
||||
|
||||
# 设置会话参数
|
||||
if session:
|
||||
with self.conn.cursor() as c:
|
||||
if session.get("timezone"):
|
||||
c.execute("SET TIME ZONE %s", (session["timezone"],))
|
||||
if session.get("statement_timeout_ms") is not None:
|
||||
c.execute("SET statement_timeout = %s", (int(session["statement_timeout_ms"]),))
|
||||
if session.get("lock_timeout_ms") is not None:
|
||||
c.execute("SET lock_timeout = %s", (int(session["lock_timeout_ms"]),))
|
||||
if session.get("idle_in_tx_timeout_ms") is not None:
|
||||
c.execute("SET idle_in_transaction_session_timeout = %s",
|
||||
(int(session["idle_in_tx_timeout_ms"]),))
|
||||
|
||||
def query(self, sql: str, args=None):
|
||||
"""执行查询并返回结果"""
|
||||
with self.conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as c:
|
||||
c.execute(sql, args)
|
||||
return c.fetchall()
|
||||
|
||||
def execute(self, sql: str, args=None):
|
||||
"""执行SQL语句"""
|
||||
with self.conn.cursor() as c:
|
||||
c.execute(sql, args)
|
||||
|
||||
def commit(self):
|
||||
"""提交事务"""
|
||||
self.conn.commit()
|
||||
|
||||
def rollback(self):
|
||||
"""回滚事务"""
|
||||
self.conn.rollback()
|
||||
|
||||
def close(self):
|
||||
"""关闭连接"""
|
||||
try:
|
||||
self.conn.close()
|
||||
except Exception:
|
||||
pass
|
||||
77
etl_billiards/database/operations.py
Normal file
77
etl_billiards/database/operations.py
Normal file
@@ -0,0 +1,77 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""数据库批量操作"""
|
||||
import psycopg2.extras
|
||||
import re
|
||||
|
||||
class DatabaseOperations:
|
||||
"""数据库批量操作封装"""
|
||||
|
||||
def __init__(self, connection):
|
||||
self.conn = connection.conn
|
||||
|
||||
def batch_execute(self, sql: str, rows: list, page_size: int = 1000):
|
||||
"""批量执行SQL"""
|
||||
if not rows:
|
||||
return
|
||||
with self.conn.cursor() as c:
|
||||
psycopg2.extras.execute_batch(c, sql, rows, page_size=page_size)
|
||||
|
||||
def batch_upsert_with_returning(self, sql: str, rows: list,
|
||||
page_size: int = 1000) -> tuple:
|
||||
"""批量UPSERT并返回插入/更新计数"""
|
||||
if not rows:
|
||||
return (0, 0)
|
||||
|
||||
use_returning = "RETURNING" in sql.upper()
|
||||
|
||||
with self.conn.cursor() as c:
|
||||
if not use_returning:
|
||||
psycopg2.extras.execute_batch(c, sql, rows, page_size=page_size)
|
||||
return (0, 0)
|
||||
|
||||
# 尝试向量化执行
|
||||
try:
|
||||
m = re.search(r"VALUES\s*\((.*?)\)", sql, flags=re.IGNORECASE | re.DOTALL)
|
||||
if m:
|
||||
tpl = "(" + m.group(1) + ")"
|
||||
base_sql = sql[:m.start()] + "VALUES %s" + sql[m.end():]
|
||||
|
||||
ret = psycopg2.extras.execute_values(
|
||||
c, base_sql, rows, template=tpl, page_size=page_size, fetch=True
|
||||
)
|
||||
|
||||
if not ret:
|
||||
return (0, 0)
|
||||
|
||||
inserted = sum(1 for rec in ret if self._is_inserted(rec))
|
||||
return (inserted, len(ret) - inserted)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 回退:逐行执行
|
||||
inserted = 0
|
||||
updated = 0
|
||||
for r in rows:
|
||||
c.execute(sql, r)
|
||||
try:
|
||||
rec = c.fetchone()
|
||||
except Exception:
|
||||
rec = None
|
||||
|
||||
if self._is_inserted(rec):
|
||||
inserted += 1
|
||||
else:
|
||||
updated += 1
|
||||
|
||||
return (inserted, updated)
|
||||
|
||||
@staticmethod
|
||||
def _is_inserted(rec) -> bool:
|
||||
"""判断是否为插入操作"""
|
||||
if rec is None:
|
||||
return False
|
||||
if isinstance(rec, tuple):
|
||||
return bool(rec[0])
|
||||
if isinstance(rec, dict):
|
||||
return bool(rec.get("inserted"))
|
||||
return False
|
||||
Reference in New Issue
Block a user