Updata2
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# -*- coding: utf-8 -*-
|
||||
"""ODS ingestion tasks."""
|
||||
from __future__ import annotations
|
||||
|
||||
@@ -305,7 +305,9 @@ class BaseOdsTask(BaseTask):
|
||||
source_endpoint: str | None,
|
||||
) -> tuple[int, int]:
|
||||
"""
|
||||
按 DB 表结构动态写入 ODS(只插新数据:ON CONFLICT DO NOTHING)。
|
||||
按 DB 表结构动态写入 ODS。
|
||||
- 新记录:插入
|
||||
- 已存在的记录:回填 NULL 列(用新值填充数据库中为 NULL 的字段)
|
||||
返回 (inserted, skipped)。
|
||||
"""
|
||||
if not records:
|
||||
@@ -324,9 +326,54 @@ class BaseOdsTask(BaseTask):
|
||||
col_names = [c[0] for c in cols_info]
|
||||
quoted_cols = ", ".join(f'\"{c}\"' for c in col_names)
|
||||
sql = f"INSERT INTO {table} ({quoted_cols}) VALUES %s"
|
||||
|
||||
# 冲突处理模式:
|
||||
# "nothing" - 跳过已存在记录 (DO NOTHING)
|
||||
# "backfill" - 只回填 NULL 列 (COALESCE)
|
||||
# "update" - 全字段对比更新 (覆盖所有变化的字段)
|
||||
conflict_mode = str(self.config.get("run.ods_conflict_mode", "update")).lower()
|
||||
|
||||
# 兼容旧配置
|
||||
if self.config.get("run.ods_backfill_null_columns") is False:
|
||||
conflict_mode = "nothing"
|
||||
|
||||
if pk_cols:
|
||||
pk_clause = ", ".join(f'\"{c}\"' for c in pk_cols)
|
||||
sql += f" ON CONFLICT ({pk_clause}) DO NOTHING"
|
||||
|
||||
if conflict_mode in ("backfill", "update"):
|
||||
# 排除主键列和元数据列
|
||||
meta_cols = {"payload", "source_file", "source_endpoint", "fetched_at", "content_hash"}
|
||||
pk_cols_lower = {c.lower() for c in pk_cols}
|
||||
update_cols = [
|
||||
c for c in col_names
|
||||
if c.lower() not in pk_cols_lower and c.lower() not in meta_cols
|
||||
]
|
||||
|
||||
if update_cols:
|
||||
if conflict_mode == "backfill":
|
||||
# 回填模式:只填充 NULL 列
|
||||
set_clause = ", ".join(
|
||||
f'"{c}" = COALESCE({table}."{c}", EXCLUDED."{c}")'
|
||||
for c in update_cols
|
||||
)
|
||||
where_clause = " OR ".join(f'{table}."{c}" IS NULL' for c in update_cols)
|
||||
sql += f" ON CONFLICT ({pk_clause}) DO UPDATE SET {set_clause} WHERE {where_clause}"
|
||||
else:
|
||||
# update 模式:全字段对比更新
|
||||
set_clause = ", ".join(
|
||||
f'"{c}" = EXCLUDED."{c}"'
|
||||
for c in update_cols
|
||||
)
|
||||
# 只在有字段变化时才更新
|
||||
where_clause = " OR ".join(
|
||||
f'{table}."{c}" IS DISTINCT FROM EXCLUDED."{c}"'
|
||||
for c in update_cols
|
||||
)
|
||||
sql += f" ON CONFLICT ({pk_clause}) DO UPDATE SET {set_clause} WHERE {where_clause}"
|
||||
else:
|
||||
sql += f" ON CONFLICT ({pk_clause}) DO NOTHING"
|
||||
else:
|
||||
sql += f" ON CONFLICT ({pk_clause}) DO NOTHING"
|
||||
|
||||
now = datetime.now(self.tz)
|
||||
json_dump = lambda v: json.dumps(v, ensure_ascii=False) # noqa: E731
|
||||
@@ -499,6 +546,14 @@ class BaseOdsTask(BaseTask):
|
||||
if value is None:
|
||||
return None
|
||||
dt = (data_type or "").lower()
|
||||
if dt == "boolean":
|
||||
if isinstance(value, bool):
|
||||
return value
|
||||
if isinstance(value, (int, float)):
|
||||
return bool(value)
|
||||
if isinstance(value, str):
|
||||
return value.lower() in ("true", "1", "yes", "t")
|
||||
return bool(value)
|
||||
if dt in ("integer", "bigint", "smallint"):
|
||||
if isinstance(value, bool):
|
||||
return int(value)
|
||||
|
||||
Reference in New Issue
Block a user