在前后端开发联调前 的提交20260223

This commit is contained in:
Neo
2026-02-23 23:02:20 +08:00
parent 254ccb1e77
commit fafc95e64c
1142 changed files with 10366960 additions and 36957 deletions

View File

@@ -306,6 +306,11 @@ class BaseOdsTask(BaseTask):
# 用户明确指定了窗口,尊重用户选择
return base_start, base_end, base_minutes
# full_window 模式:直接使用基础窗口,跳过 MAX(fetched_at) 兜底
# 该模式以 API 返回数据的实际时间范围为准,无游标偏移风险
if self.config.get("run.processing_mode") == "full_window":
return base_start, base_end, base_minutes
# 以 ODS 表 MAX(fetched_at) 兜底:避免“窗口游标推进但未实际入库”导致漏数。
last_fetched = self._get_max_fetched_at(self.SPEC.table_name)
if last_fetched:
@@ -652,6 +657,15 @@ class BaseOdsTask(BaseTask):
now = datetime.now(self.tz)
insert_rows: list[tuple] = []
# CHANGE [2026-02-20] intent: 识别所有 JSONB 列索引,防止 dict/list 值导致 psycopg2 适配错误
jsonb_col_indices: set[int] = set()
for ci in cols_info:
col_lower = ci[2] # udt_name
if col_lower in ("jsonb", "json"):
idx = col_index.get(ci[0].lower())
if idx is not None:
jsonb_col_indices.add(idx)
for row in latest_rows:
row = list(row)
@@ -675,11 +689,14 @@ class BaseOdsTask(BaseTask):
if fetched_at_idx is not None:
row[fetched_at_idx] = now
# 将 payload 包装为 Json 以便 psycopg2 正确序列化
row[payload_idx] = Json(
original_payload,
dumps=lambda v: json.dumps(v, ensure_ascii=False),
)
# 将所有 JSONB 列的 dict/list 值包装为 Json 以便 psycopg2 正确序列化
for ji in jsonb_col_indices:
val = row[ji]
if isinstance(val, (dict, list)):
row[ji] = Json(
val,
dumps=lambda v: json.dumps(v, ensure_ascii=False),
)
insert_rows.append(tuple(row))
@@ -1217,19 +1234,6 @@ ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
snapshot_time_column="create_time",
description="助教服务流水 ODSGetOrderAssistantDetails -> orderAssistantDetails 原始 JSON",
),
OdsTaskSpec(
code="ODS_ASSISTANT_ABOLISH",
class_name="OdsAssistantAbolishTask",
table_name="ods.assistant_cancellation_records",
endpoint="/AssistantPerformance/GetAbolitionAssistant",
data_path=("data",),
list_key="abolitionAssistants",
pk_columns=(_int_col("id", "id", required=True),),
include_source_endpoint=False,
include_fetched_at=False,
include_record_index=True,
description="助教废除记录 ODSGetAbolitionAssistant -> abolitionAssistants 原始 JSON",
),
OdsTaskSpec(
code="ODS_STORE_GOODS_SALES",
class_name="OdsGoodsLedgerTask",
@@ -1454,7 +1458,8 @@ ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
include_source_endpoint=False,
include_fetched_at=False,
include_record_index=True,
requires_window=False,
requires_window=True,
time_fields=("startTime", "endTime"),
description="库存汇总 ODSGetGoodsStockReport 原始 JSON",
),
OdsTaskSpec(
@@ -1545,284 +1550,32 @@ ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
description="租户商品档案 ODSQueryTenantGoods -> tenantGoodsList 原始 JSON",
),
OdsTaskSpec(
code="ODS_SETTLEMENT_TICKET",
class_name="OdsSettlementTicketTask",
table_name="ods.settlement_ticket_details",
endpoint="/Order/GetOrderSettleTicketNew",
data_path=(),
list_key=None,
pk_columns=(_int_col("ordersettleid", "orderSettleId", required=True),),
code="ODS_STAFF_INFO",
class_name="OdsStaffInfoTask",
table_name="ods.staff_info_master",
endpoint="/PersonnelManagement/SearchSystemStaffInfo",
data_path=("data",),
list_key="staffProfiles",
pk_columns=(_int_col("id", "id", required=True),),
extra_params={
"workStatusEnum": 0,
"dingTalkSynced": 0,
"staffIdentity": 0,
"rankId": 0,
"criticismStatus": 0,
"signStatus": -1,
},
include_source_endpoint=False,
include_fetched_at=True,
include_fetched_at=False,
include_record_index=True,
requires_window=False,
include_site_id=False,
description="结账小票详情 ODSGetOrderSettleTicketNew 原始 JSON",
time_fields=None,
snapshot_mode=SnapshotMode.FULL_TABLE,
description="员工档案 ODSSearchSystemStaffInfo -> staffProfiles 原始 JSON",
),
)
def _get_spec(code: str) -> OdsTaskSpec:
for spec in ODS_TASK_SPECS:
if spec.code == code:
return spec
raise KeyError(f"Spec not found for code {code}")
_SETTLEMENT_TICKET_SPEC = _get_spec("ODS_SETTLEMENT_TICKET")
class OdsSettlementTicketTask(BaseOdsTask):
"""Special handling: fetch ticket details per payment relate_id/orderSettleId."""
SPEC = _SETTLEMENT_TICKET_SPEC
def extract(self, context) -> dict:
"""Fetch ticket payloads only (used by fetch-only flow)."""
existing_ids = self._fetch_existing_ticket_ids()
candidates = self._collect_settlement_ids(
context.store_id or 0, existing_ids, context.window_start, context.window_end
)
candidates = [cid for cid in candidates if cid and cid not in existing_ids]
payloads, skipped = self._fetch_ticket_payloads(candidates)
return {"records": payloads, "skipped": skipped, "fetched": len(candidates)}
def execute(self, cursor_data: dict | None = None) -> dict:
spec = self.SPEC
base_context = self._build_context(cursor_data)
segments = build_window_segments(
self.config,
base_context.window_start,
base_context.window_end,
tz=self.tz,
override_only=True,
)
if not segments:
segments = [(base_context.window_start, base_context.window_end)]
total_segments = len(segments)
if total_segments > 1:
self.logger.info("%s: 窗口拆分为 %s", spec.code, total_segments)
store_id = TypeParser.parse_int(self.config.get("app.store_id")) or 0
counts_total = {"fetched": 0, "inserted": 0, "updated": 0, "skipped": 0, "errors": 0}
segment_results: list[dict] = []
source_file = self._resolve_source_file_hint(spec)
try:
existing_ids = self._fetch_existing_ticket_ids()
for idx, (seg_start, seg_end) in enumerate(segments, start=1):
context = self._build_context_for_window(seg_start, seg_end, cursor_data)
self.logger.info(
"%s: 开始执行(%s/%s),窗口[%s ~ %s]",
spec.code,
idx,
total_segments,
context.window_start,
context.window_end,
)
candidates = self._collect_settlement_ids(
store_id, existing_ids, context.window_start, context.window_end
)
candidates = [cid for cid in candidates if cid and cid not in existing_ids]
segment_counts = {"fetched": 0, "inserted": 0, "updated": 0, "skipped": 0, "errors": 0}
segment_counts["fetched"] = len(candidates)
if not candidates:
self.logger.info(
"%s: 窗口[%s ~ %s] 未发现需要抓取的小票",
spec.code,
context.window_start,
context.window_end,
)
self._accumulate_counts(counts_total, segment_counts)
if total_segments > 1:
segment_results.append(
{
"window": {
"start": context.window_start,
"end": context.window_end,
"minutes": context.window_minutes,
},
"counts": segment_counts,
}
)
continue
payloads, skipped = self._fetch_ticket_payloads(candidates)
segment_counts["skipped"] += skipped
inserted, updated, skipped2 = self._insert_records_schema_aware(
table=spec.table_name,
records=payloads,
response_payload=None,
source_file=source_file,
source_endpoint=spec.endpoint,
)
segment_counts["inserted"] += inserted
segment_counts["updated"] += updated
segment_counts["skipped"] += skipped2
self.db.commit()
existing_ids.update(candidates)
self._accumulate_counts(counts_total, segment_counts)
if total_segments > 1:
segment_results.append(
{
"window": {
"start": context.window_start,
"end": context.window_end,
"minutes": context.window_minutes,
},
"counts": segment_counts,
}
)
self.logger.info(
"%s: 小票抓取完成,抓取=%s 插入=%s 更新=%s 跳过=%s",
spec.code,
counts_total["fetched"],
counts_total["inserted"],
counts_total["updated"],
counts_total["skipped"],
)
result = self._build_result("SUCCESS", counts_total)
overall_start = segments[0][0]
overall_end = segments[-1][1]
result["window"] = {
"start": overall_start,
"end": overall_end,
"minutes": calc_window_minutes(overall_start, overall_end),
}
if segment_results:
result["segments"] = segment_results
result["request_params"] = {"candidates": counts_total["fetched"]}
return result
except Exception:
counts_total["errors"] += 1
self.db.rollback()
self.logger.error("%s: 小票抓取失败", spec.code, exc_info=True)
raise
def _fetch_existing_ticket_ids(self) -> set[int]:
sql = """
SELECT DISTINCT
CASE WHEN (payload ->> 'orderSettleId') ~ '^[0-9]+$'
THEN (payload ->> 'orderSettleId')::bigint
END AS order_settle_id
FROM ods.settlement_ticket_details
"""
try:
rows = self.db.query(sql)
except Exception:
self.logger.warning("查询已有小票失败,按空集处理", exc_info=True)
return set()
return {
TypeParser.parse_int(row.get("order_settle_id"))
for row in rows
if row.get("order_settle_id") is not None
}
def _collect_settlement_ids(
self, store_id: int, existing_ids: set[int], window_start, window_end
) -> list[int]:
ids = self._fetch_from_payment_table(store_id)
if not ids:
ids = self._fetch_from_payment_api(store_id, window_start, window_end)
return sorted(i for i in ids if i is not None and i not in existing_ids)
def _fetch_from_payment_table(self, store_id: int) -> set[int]:
sql = """
SELECT DISTINCT COALESCE(
CASE WHEN (payload ->> 'orderSettleId') ~ '^[0-9]+$'
THEN (payload ->> 'orderSettleId')::bigint END,
CASE WHEN (payload ->> 'relateId') ~ '^[0-9]+$'
THEN (payload ->> 'relateId')::bigint END
) AS order_settle_id
FROM ods.payment_transactions
WHERE (payload ->> 'orderSettleId') ~ '^[0-9]+$'
OR (payload ->> 'relateId') ~ '^[0-9]+$'
"""
params = None
if store_id:
sql += " AND COALESCE((payload ->> 'siteId')::bigint, %s) = %s"
params = (store_id, store_id)
try:
rows = self.db.query(sql, params)
except Exception:
self.logger.warning("读取支付流水以获取结算单ID失败将尝试调用支付接口回退", exc_info=True)
return set()
return {
TypeParser.parse_int(row.get("order_settle_id"))
for row in rows
if row.get("order_settle_id") is not None
}
def _fetch_from_payment_api(self, store_id: int, window_start, window_end) -> set[int]:
params = self._merge_common_params(
{
"siteId": store_id,
"StartPayTime": TypeParser.format_timestamp(window_start, self.tz),
"EndPayTime": TypeParser.format_timestamp(window_end, self.tz),
}
)
candidate_ids: set[int] = set()
try:
for _, records, _, _ in self.api.iter_paginated(
endpoint="/PayLog/GetPayLogListPage",
params=params,
page_size=self.config.get("api.page_size", 200),
data_path=("data",),
):
for rec in records:
relate_id = TypeParser.parse_int(
(rec or {}).get("relateId")
or (rec or {}).get("orderSettleId")
or (rec or {}).get("order_settle_id")
)
if relate_id:
candidate_ids.add(relate_id)
except Exception:
self.logger.warning("调用支付接口获取结算单ID失败当前批次将跳过回退来源", exc_info=True)
return candidate_ids
def _fetch_ticket_payload(self, order_settle_id: int):
payload = None
try:
for _, _, _, response in self.api.iter_paginated(
endpoint=self.SPEC.endpoint,
params={"orderSettleId": order_settle_id},
page_size=None,
data_path=self.SPEC.data_path,
list_key=self.SPEC.list_key,
):
payload = response
except Exception:
self.logger.warning(
"调用小票接口失败 orderSettleId=%s", order_settle_id, exc_info=True
)
if isinstance(payload, dict) and isinstance(payload.get("data"), list) and len(payload["data"]) == 1:
# 本地桩回放可能把响应包装成单元素 list这里展开以贴近真实结果
payload = payload["data"][0]
return payload
def _fetch_ticket_payloads(self, candidates: list[int]) -> tuple[list, int]:
"""Fetch ticket payloads for a set of orderSettleIds; returns (payloads, skipped)."""
payloads: list = []
skipped = 0
for order_settle_id in candidates:
payload = self._fetch_ticket_payload(order_settle_id)
if payload:
payloads.append(payload)
else:
skipped += 1
return payloads, skipped
def _build_task_class(spec: OdsTaskSpec) -> Type[BaseOdsTask]:
attrs = {
"SPEC": spec,
@@ -1835,7 +1588,6 @@ def _build_task_class(spec: OdsTaskSpec) -> Type[BaseOdsTask]:
ENABLED_ODS_CODES = {
"ODS_ASSISTANT_ACCOUNT",
"ODS_ASSISTANT_LEDGER",
"ODS_ASSISTANT_ABOLISH",
"ODS_INVENTORY_CHANGE",
"ODS_INVENTORY_STOCK",
"ODS_GROUP_PACKAGE",
@@ -1854,8 +1606,8 @@ ENABLED_ODS_CODES = {
"ODS_TABLE_FEE_DISCOUNT",
"ODS_STORE_GOODS_SALES",
"ODS_TENANT_GOODS",
"ODS_SETTLEMENT_TICKET",
"ODS_SETTLEMENT_RECORDS",
"ODS_STAFF_INFO",
}
ODS_TASK_CLASSES: Dict[str, Type[BaseOdsTask]] = {
@@ -1863,7 +1615,5 @@ ODS_TASK_CLASSES: Dict[str, Type[BaseOdsTask]] = {
for spec in ODS_TASK_SPECS
if spec.code in ENABLED_ODS_CODES
}
# 使用专用的结账小票实现覆盖默认流程
ODS_TASK_CLASSES["ODS_SETTLEMENT_TICKET"] = OdsSettlementTicketTask
__all__ = ["ODS_TASK_CLASSES", "ODS_TASK_SPECS", "BaseOdsTask", "ENABLED_ODS_CODES"]