改 相对路径 完成客户端

This commit is contained in:
Neo
2026-01-27 22:14:01 +08:00
parent 04c064793a
commit 9f8976e75a
292 changed files with 307062 additions and 678 deletions

View File

@@ -72,6 +72,10 @@ class DwdLoadTask(BaseTask):
"start_use_time",
"fetched_at",
]
# 对于会出现“回补旧记录”的事实表,额外补齐缺失主键记录
FACT_MISSING_FILL_TABLES = {
"billiards_dwd.dwd_assistant_service_log",
}
# 特殊列映射dwd 列名 -> 源列表达式(可选 CAST
FACT_MAPPINGS: dict[str, list[tuple[str, str, str | None]]] = {
@@ -697,7 +701,14 @@ class DwdLoadTask(BaseTask):
if not select_exprs:
return 0
cur.execute(f"SELECT {', '.join(select_exprs)} FROM {ods_table_sql}")
# 对于 dim_site 和 dim_site_ex使用 DISTINCT ON 优化查询
# 避免从大表 table_fee_transactions 全表扫描,只获取每个 site_id 的最新记录
if dwd_table in ("billiards_dwd.dim_site", "billiards_dwd.dim_site_ex"):
sql = f"SELECT DISTINCT ON (site_id) {', '.join(select_exprs)} FROM {ods_table_sql} ORDER BY site_id, fetched_at DESC NULLS LAST"
else:
sql = f"SELECT {', '.join(select_exprs)} FROM {ods_table_sql}"
cur.execute(sql)
rows = [{k.lower(): v for k, v in r.items()} for r in cur.fetchall()]
if dwd_table == "billiards_dwd.dim_goods_category":
@@ -1081,7 +1092,7 @@ class DwdLoadTask(BaseTask):
self.logger.warning("跳过 %s:未找到可插入的列", dwd_table)
return 0
order_col = self._pick_order_column(dwd_cols, ods_cols)
order_col = self._pick_order_column(dwd_table, dwd_cols, ods_cols)
where_sql = ""
params: List[Any] = []
dwd_table_sql = self._format_table(dwd_table, "billiards_dwd")
@@ -1090,9 +1101,7 @@ class DwdLoadTask(BaseTask):
where_sql = f'WHERE "{order_col}" >= %s AND "{order_col}" < %s'
params.extend([window_start, window_end])
elif order_col:
cur.execute(f'SELECT COALESCE(MAX("{order_col}"), %s) FROM {dwd_table_sql}', ("1970-01-01",))
row = cur.fetchone() or {}
watermark = list(row.values())[0] if row else "1970-01-01"
watermark = self._get_fact_watermark(cur, dwd_table, ods_table, order_col, dwd_cols, ods_cols)
where_sql = f'WHERE "{order_col}" > %s'
params.append(watermark)
@@ -1121,16 +1130,149 @@ class DwdLoadTask(BaseTask):
sql += f" ON CONFLICT ({pk_sql}) DO NOTHING"
cur.execute(sql, params)
return cur.rowcount
inserted = cur.rowcount
def _pick_order_column(self, dwd_cols: Iterable[str], ods_cols: Iterable[str]) -> str | None:
"""选择用于增量的时间列(需同时存在于 DWD 与 ODS"""
# 回补缺失主键记录处理历史回补导致的“create_time 水位”遗漏)
if dwd_table.lower() in self.FACT_MISSING_FILL_TABLES:
inserted += self._insert_missing_by_pk(
cur,
dwd_table,
ods_table,
dwd_cols,
ods_cols,
mapping,
insert_cols,
dwd_types,
ods_types,
)
return inserted
def _pick_order_column(self, dwd_table: str, dwd_cols: Iterable[str], ods_cols: Iterable[str]) -> str | None:
"""Pick an incremental order column that exists in both DWD and ODS."""
lower_cols = {c.lower() for c in dwd_cols} & {c.lower() for c in ods_cols}
for candidate in self.FACT_ORDER_CANDIDATES:
if candidate.lower() in lower_cols:
return candidate.lower()
return None
def _get_fact_watermark(
self,
cur,
dwd_table: str,
ods_table: str,
order_col: str,
dwd_cols: Iterable[str],
ods_cols: Iterable[str],
) -> Any:
"""Fetch incremental watermark; default from DWD, fallback from ODS join."""
dwd_table_sql = self._format_table(dwd_table, "billiards_dwd")
ods_table_sql = self._format_table(ods_table, "billiards_ods")
dwd_set = {c.lower() for c in dwd_cols}
ods_set = {c.lower() for c in ods_cols}
if order_col.lower() in dwd_set:
cur.execute(
f'SELECT COALESCE(MAX("{order_col}"), %s) FROM {dwd_table_sql}', ("1970-01-01",)
)
row = cur.fetchone() or {}
return list(row.values())[0] if row else "1970-01-01"
pk_cols = self._get_primary_keys(cur, dwd_table)
if not pk_cols or order_col.lower() not in ods_set:
return "1970-01-01"
join_cond = " AND ".join(f'd."{pk}" = o."{pk}"' for pk in pk_cols if pk.lower() in ods_set)
if not join_cond:
return "1970-01-01"
cur.execute(
f'SELECT COALESCE(MAX(o."{order_col}"), %s) FROM {ods_table_sql} o JOIN {dwd_table_sql} d ON {join_cond}',
("1970-01-01",),
)
row = cur.fetchone() or {}
return list(row.values())[0] if row else "1970-01-01"
def _insert_missing_by_pk(
self,
cur,
dwd_table: str,
ods_table: str,
dwd_cols: Sequence[str],
ods_cols: Sequence[str],
mapping: Dict[str, tuple[str, str | None]],
insert_cols: Sequence[str],
dwd_types: Dict[str, str],
ods_types: Dict[str, str],
) -> int:
"""Backfill missing PK rows for facts that can receive late data."""
pk_cols = self._get_primary_keys(cur, dwd_table)
if not pk_cols:
return 0
ods_set = {c.lower() for c in ods_cols}
dwd_table_sql = self._format_table(dwd_table, "billiards_dwd")
ods_table_sql = self._format_table(ods_table, "billiards_ods")
join_pairs = []
for pk in pk_cols:
pk_lower = pk.lower()
if pk_lower in mapping:
src, _ = mapping[pk_lower]
elif pk_lower in ods_set:
src = pk
elif "id" in ods_set:
src = "id"
else:
src = None
if not src:
return 0
join_pairs.append((pk, src))
join_cond = " AND ".join(
f'd."{pk}" = o."{src}"' for pk, src in join_pairs
)
null_cond = " AND ".join(f'd."{pk}" IS NULL' for pk, _ in join_pairs)
# 类型转换需要的类型集合
numeric_types = {"integer", "bigint", "smallint", "numeric", "double precision", "real", "decimal"}
text_types = {"text", "character varying", "varchar"}
select_exprs = []
for col in insert_cols:
key = col.lower()
if key in mapping:
src, cast_type = mapping[key]
if src.isidentifier():
expr = self._cast_expr(f'o."{src}"', cast_type)
else:
expr = self._cast_expr(src, cast_type)
select_exprs.append(expr)
elif key in ods_set:
# 检查是否需要类型转换 (ODS text -> DWD numeric)
d_type = dwd_types.get(col)
o_type = ods_types.get(col)
if d_type in numeric_types and o_type in text_types:
select_exprs.append(f'CAST(NULLIF(CAST(o."{col}" AS text), \'\') AS {d_type})')
else:
select_exprs.append(f'o."{col}"')
else:
select_exprs.append("NULL")
select_cols_sql = ", ".join(select_exprs)
insert_cols_sql = ", ".join(f'"{c}"' for c in insert_cols)
sql = (
f'INSERT INTO {dwd_table_sql} ({insert_cols_sql}) '
f'SELECT {select_cols_sql} '
f'FROM {ods_table_sql} o '
f'LEFT JOIN {dwd_table_sql} d ON {join_cond} '
f'WHERE {null_cond}'
)
pk_sql = ", ".join(f'"{c}"' for c in pk_cols)
sql += f" ON CONFLICT ({pk_sql}) DO NOTHING"
cur.execute(sql)
return cur.rowcount
def _build_fact_select_exprs(
self,
insert_cols: Sequence[str],