改 相对路径 完成客户端
This commit is contained in:
@@ -72,6 +72,10 @@ class DwdLoadTask(BaseTask):
|
||||
"start_use_time",
|
||||
"fetched_at",
|
||||
]
|
||||
# 对于会出现“回补旧记录”的事实表,额外补齐缺失主键记录
|
||||
FACT_MISSING_FILL_TABLES = {
|
||||
"billiards_dwd.dwd_assistant_service_log",
|
||||
}
|
||||
|
||||
# 特殊列映射:dwd 列名 -> 源列表达式(可选 CAST)
|
||||
FACT_MAPPINGS: dict[str, list[tuple[str, str, str | None]]] = {
|
||||
@@ -697,7 +701,14 @@ class DwdLoadTask(BaseTask):
|
||||
if not select_exprs:
|
||||
return 0
|
||||
|
||||
cur.execute(f"SELECT {', '.join(select_exprs)} FROM {ods_table_sql}")
|
||||
# 对于 dim_site 和 dim_site_ex,使用 DISTINCT ON 优化查询
|
||||
# 避免从大表 table_fee_transactions 全表扫描,只获取每个 site_id 的最新记录
|
||||
if dwd_table in ("billiards_dwd.dim_site", "billiards_dwd.dim_site_ex"):
|
||||
sql = f"SELECT DISTINCT ON (site_id) {', '.join(select_exprs)} FROM {ods_table_sql} ORDER BY site_id, fetched_at DESC NULLS LAST"
|
||||
else:
|
||||
sql = f"SELECT {', '.join(select_exprs)} FROM {ods_table_sql}"
|
||||
|
||||
cur.execute(sql)
|
||||
rows = [{k.lower(): v for k, v in r.items()} for r in cur.fetchall()]
|
||||
|
||||
if dwd_table == "billiards_dwd.dim_goods_category":
|
||||
@@ -1081,7 +1092,7 @@ class DwdLoadTask(BaseTask):
|
||||
self.logger.warning("跳过 %s:未找到可插入的列", dwd_table)
|
||||
return 0
|
||||
|
||||
order_col = self._pick_order_column(dwd_cols, ods_cols)
|
||||
order_col = self._pick_order_column(dwd_table, dwd_cols, ods_cols)
|
||||
where_sql = ""
|
||||
params: List[Any] = []
|
||||
dwd_table_sql = self._format_table(dwd_table, "billiards_dwd")
|
||||
@@ -1090,9 +1101,7 @@ class DwdLoadTask(BaseTask):
|
||||
where_sql = f'WHERE "{order_col}" >= %s AND "{order_col}" < %s'
|
||||
params.extend([window_start, window_end])
|
||||
elif order_col:
|
||||
cur.execute(f'SELECT COALESCE(MAX("{order_col}"), %s) FROM {dwd_table_sql}', ("1970-01-01",))
|
||||
row = cur.fetchone() or {}
|
||||
watermark = list(row.values())[0] if row else "1970-01-01"
|
||||
watermark = self._get_fact_watermark(cur, dwd_table, ods_table, order_col, dwd_cols, ods_cols)
|
||||
where_sql = f'WHERE "{order_col}" > %s'
|
||||
params.append(watermark)
|
||||
|
||||
@@ -1121,16 +1130,149 @@ class DwdLoadTask(BaseTask):
|
||||
sql += f" ON CONFLICT ({pk_sql}) DO NOTHING"
|
||||
|
||||
cur.execute(sql, params)
|
||||
return cur.rowcount
|
||||
inserted = cur.rowcount
|
||||
|
||||
def _pick_order_column(self, dwd_cols: Iterable[str], ods_cols: Iterable[str]) -> str | None:
|
||||
"""选择用于增量的时间列(需同时存在于 DWD 与 ODS)。"""
|
||||
# 回补缺失主键记录(处理历史回补导致的“create_time 水位”遗漏)
|
||||
if dwd_table.lower() in self.FACT_MISSING_FILL_TABLES:
|
||||
inserted += self._insert_missing_by_pk(
|
||||
cur,
|
||||
dwd_table,
|
||||
ods_table,
|
||||
dwd_cols,
|
||||
ods_cols,
|
||||
mapping,
|
||||
insert_cols,
|
||||
dwd_types,
|
||||
ods_types,
|
||||
)
|
||||
|
||||
return inserted
|
||||
def _pick_order_column(self, dwd_table: str, dwd_cols: Iterable[str], ods_cols: Iterable[str]) -> str | None:
|
||||
"""Pick an incremental order column that exists in both DWD and ODS."""
|
||||
lower_cols = {c.lower() for c in dwd_cols} & {c.lower() for c in ods_cols}
|
||||
for candidate in self.FACT_ORDER_CANDIDATES:
|
||||
if candidate.lower() in lower_cols:
|
||||
return candidate.lower()
|
||||
return None
|
||||
|
||||
def _get_fact_watermark(
|
||||
self,
|
||||
cur,
|
||||
dwd_table: str,
|
||||
ods_table: str,
|
||||
order_col: str,
|
||||
dwd_cols: Iterable[str],
|
||||
ods_cols: Iterable[str],
|
||||
) -> Any:
|
||||
"""Fetch incremental watermark; default from DWD, fallback from ODS join."""
|
||||
dwd_table_sql = self._format_table(dwd_table, "billiards_dwd")
|
||||
ods_table_sql = self._format_table(ods_table, "billiards_ods")
|
||||
dwd_set = {c.lower() for c in dwd_cols}
|
||||
ods_set = {c.lower() for c in ods_cols}
|
||||
if order_col.lower() in dwd_set:
|
||||
cur.execute(
|
||||
f'SELECT COALESCE(MAX("{order_col}"), %s) FROM {dwd_table_sql}', ("1970-01-01",)
|
||||
)
|
||||
row = cur.fetchone() or {}
|
||||
return list(row.values())[0] if row else "1970-01-01"
|
||||
|
||||
pk_cols = self._get_primary_keys(cur, dwd_table)
|
||||
if not pk_cols or order_col.lower() not in ods_set:
|
||||
return "1970-01-01"
|
||||
|
||||
join_cond = " AND ".join(f'd."{pk}" = o."{pk}"' for pk in pk_cols if pk.lower() in ods_set)
|
||||
if not join_cond:
|
||||
return "1970-01-01"
|
||||
|
||||
cur.execute(
|
||||
f'SELECT COALESCE(MAX(o."{order_col}"), %s) FROM {ods_table_sql} o JOIN {dwd_table_sql} d ON {join_cond}',
|
||||
("1970-01-01",),
|
||||
)
|
||||
row = cur.fetchone() or {}
|
||||
return list(row.values())[0] if row else "1970-01-01"
|
||||
|
||||
def _insert_missing_by_pk(
|
||||
self,
|
||||
cur,
|
||||
dwd_table: str,
|
||||
ods_table: str,
|
||||
dwd_cols: Sequence[str],
|
||||
ods_cols: Sequence[str],
|
||||
mapping: Dict[str, tuple[str, str | None]],
|
||||
insert_cols: Sequence[str],
|
||||
dwd_types: Dict[str, str],
|
||||
ods_types: Dict[str, str],
|
||||
) -> int:
|
||||
"""Backfill missing PK rows for facts that can receive late data."""
|
||||
pk_cols = self._get_primary_keys(cur, dwd_table)
|
||||
if not pk_cols:
|
||||
return 0
|
||||
|
||||
ods_set = {c.lower() for c in ods_cols}
|
||||
dwd_table_sql = self._format_table(dwd_table, "billiards_dwd")
|
||||
ods_table_sql = self._format_table(ods_table, "billiards_ods")
|
||||
|
||||
join_pairs = []
|
||||
for pk in pk_cols:
|
||||
pk_lower = pk.lower()
|
||||
if pk_lower in mapping:
|
||||
src, _ = mapping[pk_lower]
|
||||
elif pk_lower in ods_set:
|
||||
src = pk
|
||||
elif "id" in ods_set:
|
||||
src = "id"
|
||||
else:
|
||||
src = None
|
||||
if not src:
|
||||
return 0
|
||||
join_pairs.append((pk, src))
|
||||
|
||||
join_cond = " AND ".join(
|
||||
f'd."{pk}" = o."{src}"' for pk, src in join_pairs
|
||||
)
|
||||
null_cond = " AND ".join(f'd."{pk}" IS NULL' for pk, _ in join_pairs)
|
||||
|
||||
# 类型转换需要的类型集合
|
||||
numeric_types = {"integer", "bigint", "smallint", "numeric", "double precision", "real", "decimal"}
|
||||
text_types = {"text", "character varying", "varchar"}
|
||||
|
||||
select_exprs = []
|
||||
for col in insert_cols:
|
||||
key = col.lower()
|
||||
if key in mapping:
|
||||
src, cast_type = mapping[key]
|
||||
if src.isidentifier():
|
||||
expr = self._cast_expr(f'o."{src}"', cast_type)
|
||||
else:
|
||||
expr = self._cast_expr(src, cast_type)
|
||||
select_exprs.append(expr)
|
||||
elif key in ods_set:
|
||||
# 检查是否需要类型转换 (ODS text -> DWD numeric)
|
||||
d_type = dwd_types.get(col)
|
||||
o_type = ods_types.get(col)
|
||||
if d_type in numeric_types and o_type in text_types:
|
||||
select_exprs.append(f'CAST(NULLIF(CAST(o."{col}" AS text), \'\') AS {d_type})')
|
||||
else:
|
||||
select_exprs.append(f'o."{col}"')
|
||||
else:
|
||||
select_exprs.append("NULL")
|
||||
|
||||
select_cols_sql = ", ".join(select_exprs)
|
||||
insert_cols_sql = ", ".join(f'"{c}"' for c in insert_cols)
|
||||
sql = (
|
||||
f'INSERT INTO {dwd_table_sql} ({insert_cols_sql}) '
|
||||
f'SELECT {select_cols_sql} '
|
||||
f'FROM {ods_table_sql} o '
|
||||
f'LEFT JOIN {dwd_table_sql} d ON {join_cond} '
|
||||
f'WHERE {null_cond}'
|
||||
)
|
||||
|
||||
pk_sql = ", ".join(f'"{c}"' for c in pk_cols)
|
||||
sql += f" ON CONFLICT ({pk_sql}) DO NOTHING"
|
||||
|
||||
cur.execute(sql)
|
||||
return cur.rowcount
|
||||
|
||||
def _build_fact_select_exprs(
|
||||
self,
|
||||
insert_cols: Sequence[str],
|
||||
|
||||
Reference in New Issue
Block a user