init: 项目初始提交 - NeoZQYY Monorepo 完整代码

2026-02-15 14:58:14 +08:00
commit ded6dfb9d8
769 changed files with 182616 additions and 0 deletions
--- a/apps/etl/pipelines/feiqiu/utils/init.py
+++ b/apps/etl/pipelines/feiqiu/utils/init.py
--- a/apps/etl/pipelines/feiqiu/utils/helpers.py
+++ b/apps/etl/pipelines/feiqiu/utils/helpers.py
@@ -0,0 +1,22 @@
+# -*- coding: utf-8 -*-
+"""通用工具函数"""
+import hashlib
+from datetime import datetime
+from pathlib import Path
+
+def ensure_dir(path: Path):
+    """确保目录存在"""
+    path.mkdir(parents=True, exist_ok=True)
+
+def make_surrogate_key(*parts) -> int:
+    """
+    生成代理键
+    将多个字段值拼接后计算SHA1，取前8字节转为无符号64位整数
+    """
+    raw = "|".join("" if p is None else str(p) for p in parts)
+    h = hashlib.sha1(raw.encode("utf-8")).digest()[:8]
+    return int.from_bytes(h, byteorder="big", signed=False)
+
+def now_local(tz) -> datetime:
+    """获取本地当前时间"""
+    return datetime.now(tz)
--- a/apps/etl/pipelines/feiqiu/utils/json_store.py
+++ b/apps/etl/pipelines/feiqiu/utils/json_store.py
@@ -0,0 +1,78 @@
+# -*- coding: utf-8 -*-
+"""JSON 归档/读取的通用工具。"""
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import Any
+from urllib.parse import urlparse
+
+ENDPOINT_FILENAME_MAP: dict[str, str] = {
+    "/memberprofile/gettenantmemberlist": "member_profiles.json",
+    "/memberprofile/getmembercardbalancechange": "member_balance_changes.json",
+    "/memberprofile/gettenantmembercardlist": "member_stored_value_cards.json",
+    "/site/getrechargesettlelist": "recharge_settlements.json",
+    "/assistantperformance/getabolitionassistant": "assistant_cancellation_records.json",
+    "/assistantperformance/getorderassistantdetails": "assistant_service_records.json",
+    "/personnelmanagement/searchassistantinfo": "assistant_accounts_master.json",
+    "/table/getsitetables": "site_tables_master.json",
+    "/site/gettaifeeadjustlist": "table_fee_discount_records.json",
+    "/site/getsitetableorderdetails": "table_fee_transactions.json",
+    "/tenantgoods/querytenantgoods": "tenant_goods_master.json",
+    "/packagecoupon/querypackagecouponlist": "group_buy_packages.json",
+    "/site/getsitetableusedetails": "group_buy_redemption_records.json",
+    "/order/getordersettleticketnew": "settlement_ticket_details.json",
+    "/promotion/getofflinecouponconsumepagelist": "platform_coupon_redemption_records.json",
+    "/goodsstockmanage/querygoodsoutboundreceipt": "goods_stock_movements.json",
+    "/tenantgoodscategory/queryprimarysecondarycategory": "stock_goods_category_tree.json",
+    "/tenantgoods/getgoodsstockreport": "goods_stock_summary.json",
+    "/paylog/getpayloglistpage": "payment_transactions.json",
+    "/site/getallordersettlelist": "settlement_records.json",
+    "/order/getrefundpayloglist": "refund_transactions.json",
+    "/tenantgoods/getgoodsinventorylist": "store_goods_master.json",
+    "/tenantgoods/getgoodssaleslist": "store_goods_sales_records.json",
+}
+
+def endpoint_to_filename(endpoint: str) -> str:
+    """
+    将 API endpoint 转换为规范化的文件名，优先使用 非球接口API.md 中约定的名称。
+    未覆盖的路径会回退到“去掉开头斜杠 -> 用双下划线替换斜杠 -> 小写”的规则。
+    """
+    normalized = _normalize_endpoint(endpoint)
+    if normalized in ENDPOINT_FILENAME_MAP:
+        return ENDPOINT_FILENAME_MAP[normalized]
+
+    fallback = normalized.strip("/").replace("/", "__").replace(" ", "_")
+    return f"{fallback or 'root'}.json"
+
+
+def dump_json(path: Path, payload: Any, pretty: bool = False):
+    """将 JSON 对象写入文件，默认紧凑，可选美化。"""
+    path.parent.mkdir(parents=True, exist_ok=True)
+    with path.open("w", encoding="utf-8") as fp:
+        json.dump(payload, fp, ensure_ascii=False, indent=2 if pretty else None)
+
+
+def _normalize_endpoint(endpoint: str) -> str:
+    """标准化 endpoint，提取路径部分并统一小写、去除 base 前缀。"""
+    raw = str(endpoint or "").strip()
+    if not raw:
+        return ""
+
+    parsed = urlparse(raw)
+    path = parsed.path or raw
+    if not path.startswith("/"):
+        path = f"/{path}"
+
+    path = path.rstrip("/") or "/"
+    lowered = path.lower()
+    for prefix in ("/apiprod/admin/v1", "apiprod/admin/v1"):
+        if lowered.startswith(prefix):
+            path = path[len(prefix) :]
+            if not path.startswith("/"):
+                path = f"/{path}"
+            path = path.rstrip("/") or "/"
+            lowered = path.lower()
+            break
+
+    return lowered
--- a/apps/etl/pipelines/feiqiu/utils/logging_utils.py
+++ b/apps/etl/pipelines/feiqiu/utils/logging_utils.py
@@ -0,0 +1,142 @@
+# -*- coding: utf-8 -*-
+"""日志配置工具
+
+提供统一的日志配置和格式化。
+"""
+from __future__ import annotations
+
+import logging
+import sys
+from contextlib import contextmanager
+from datetime import datetime
+from pathlib import Path
+from typing import Iterator, TextIO
+
+
+# 统一日志格式（中文友好）
+UNIFIED_FORMAT = "[%(asctime)s] %(levelname)-5s | %(name)s | %(message)s"
+DATE_FORMAT = "%Y-%m-%d %H:%M:%S"
+
+
+class TeeStream:
+    """同时输出到多个流"""
+    
+    def __init__(self, *streams: TextIO) -> None:
+        self._streams = streams
+
+    def write(self, data: str) -> int:
+        for stream in self._streams:
+            stream.write(data)
+        return len(data)
+
+    def flush(self) -> None:
+        for stream in self._streams:
+            stream.flush()
+
+    def isatty(self) -> bool:
+        return False
+
+    def fileno(self) -> int:
+        return self._streams[0].fileno()
+
+
+def build_log_path(log_dir: Path, prefix: str, tag: str = "") -> Path:
+    """构建日志文件路径"""
+    suffix = f"_{tag}" if tag else ""
+    stamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    return log_dir / f"{prefix}{suffix}_{stamp}.log"
+
+
+def get_unified_formatter() -> logging.Formatter:
+    """获取统一格式的日志格式器"""
+    return logging.Formatter(UNIFIED_FORMAT, DATE_FORMAT)
+
+
+@contextmanager
+def configure_logging(
+    name: str,
+    log_file: Path | None,
+    *,
+    level: str = "INFO",
+    console: bool = True,
+    tee_std: bool = True,
+) -> Iterator[logging.Logger]:
+    """
+    配置日志
+    
+    Args:
+        name: 日志器名称
+        log_file: 日志文件路径，None 表示不写文件
+        level: 日志级别
+        console: 是否输出到控制台
+        tee_std: 是否将 stdout/stderr 也写入日志文件
+    
+    Yields:
+        配置好的日志器
+    """
+    logger = logging.getLogger(name)
+    logger.handlers.clear()
+    logger.setLevel(getattr(logging, level.upper(), logging.INFO))
+    logger.propagate = False
+
+    formatter = get_unified_formatter()
+
+    original_stdout = sys.stdout
+    original_stderr = sys.stderr
+    log_fp: TextIO | None = None
+
+    try:
+        if log_file:
+            log_file.parent.mkdir(parents=True, exist_ok=True)
+            log_fp = open(log_file, "a", encoding="utf-8", buffering=1)
+            if tee_std:
+                if console:
+                    sys.stdout = TeeStream(original_stdout, log_fp)
+                    sys.stderr = TeeStream(original_stderr, log_fp)
+                else:
+                    sys.stdout = log_fp
+                    sys.stderr = log_fp
+            file_handler = logging.StreamHandler(log_fp)
+            file_handler.setFormatter(formatter)
+            logger.addHandler(file_handler)
+
+        if console:
+            console_handler = logging.StreamHandler(original_stdout)
+            console_handler.setFormatter(formatter)
+            logger.addHandler(console_handler)
+
+        yield logger
+    finally:
+        for handler in list(logger.handlers):
+            handler.flush()
+            handler.close()
+            logger.removeHandler(handler)
+        if log_fp:
+            log_fp.flush()
+            log_fp.close()
+        sys.stdout = original_stdout
+        sys.stderr = original_stderr
+
+
+def setup_root_logger(level: str = "INFO") -> logging.Logger:
+    """
+    配置根日志器
+    
+    Args:
+        level: 日志级别
+    
+    Returns:
+        根日志器
+    """
+    root = logging.getLogger()
+    root.setLevel(getattr(logging, level.upper(), logging.INFO))
+    
+    # 清除已有处理器
+    root.handlers.clear()
+    
+    # 添加控制台处理器
+    handler = logging.StreamHandler()
+    handler.setFormatter(get_unified_formatter())
+    root.addHandler(handler)
+    
+    return root
--- a/apps/etl/pipelines/feiqiu/utils/ods_record_utils.py
+++ b/apps/etl/pipelines/feiqiu/utils/ods_record_utils.py
@@ -0,0 +1,55 @@
+# -*- coding: utf-8 -*-
+"""Shared helpers for ODS/API record normalization."""
+from __future__ import annotations
+
+from typing import Iterable
+
+
+def merge_record_layers(record: dict) -> dict:
+    """Flatten nested data/settleList layers into a single dict."""
+    merged = record
+    data_part = merged.get("data")
+    while isinstance(data_part, dict):
+        merged = {**data_part, **merged}
+        data_part = data_part.get("data")
+    settle_inner = merged.get("settleList")
+    if isinstance(settle_inner, dict):
+        merged = {**settle_inner, **merged}
+    return merged
+
+
+def get_value_case_insensitive(record: dict | None, col: str | None):
+    """Fetch column value without case sensitivity."""
+    if record is None or col is None:
+        return None
+    if col in record:
+        return record.get(col)
+    col_lower = col.lower()
+    for k, v in record.items():
+        if isinstance(k, str) and k.lower() == col_lower:
+            return v
+    return None
+
+
+def normalize_pk_value(value):
+    """Normalize PK value (e.g., digit string -> int)."""
+    if value is None:
+        return None
+    if isinstance(value, str) and value.isdigit():
+        try:
+            return int(value)
+        except Exception:
+            return value
+    return value
+
+
+def pk_tuple_from_record(record: dict, pk_cols: Iterable[str]) -> tuple | None:
+    """Extract PK tuple from a record."""
+    merged = merge_record_layers(record)
+    values = []
+    for col in pk_cols:
+        val = normalize_pk_value(get_value_case_insensitive(merged, col))
+        if val is None or val == "":
+            return None
+        values.append(val)
+    return tuple(values)
--- a/apps/etl/pipelines/feiqiu/utils/reporting.py
+++ b/apps/etl/pipelines/feiqiu/utils/reporting.py
@@ -0,0 +1,247 @@
+# -*- coding: utf-8 -*-
+"""任务结果汇总与格式化工具。
+
+提供多种格式的任务报告输出：
+- 简单文本格式
+- 详细表格格式（ASCII）
+- 任务总结报告
+"""
+from __future__ import annotations
+
+from datetime import datetime
+from typing import Any, Dict, Iterable, List, Optional
+
+
+def summarize_counts(task_results: Iterable[dict]) -> dict:
+    """
+    汇总多个任务的 counts，返回总计与逐任务明细。
+    task_results: 形如 {"task_code": str, "counts": {...}} 的字典序列。
+    """
+    totals = {"fetched": 0, "inserted": 0, "updated": 0, "skipped": 0, "errors": 0}
+    details = []
+
+    for res in task_results:
+        code = res.get("task_code") or res.get("code") or "UNKNOWN"
+        counts = res.get("counts") or {}
+        row = {"task_code": code}
+        for key in totals.keys():
+            val = int(counts.get(key, 0) or 0)
+            row[key] = val
+            totals[key] += val
+        details.append(row)
+
+    return {"total": totals, "details": details}
+
+
+def format_report(summary: dict) -> str:
+    """将 summarize_counts 的输出格式化为可读文案（简单格式）。"""
+    lines = []
+    totals = summary.get("total", {})
+    lines.append(
+        "TOTAL fetched={fetched} inserted={inserted} updated={updated} skipped={skipped} errors={errors}".format(
+            fetched=totals.get("fetched", 0),
+            inserted=totals.get("inserted", 0),
+            updated=totals.get("updated", 0),
+            skipped=totals.get("skipped", 0),
+            errors=totals.get("errors", 0),
+        )
+    )
+    for row in summary.get("details", []):
+        lines.append(
+            "{task_code}: fetched={fetched} inserted={inserted} updated={updated} skipped={skipped} errors={errors}".format(
+                task_code=row.get("task_code", "UNKNOWN"),
+                fetched=row.get("fetched", 0),
+                inserted=row.get("inserted", 0),
+                updated=row.get("updated", 0),
+                skipped=row.get("skipped", 0),
+                errors=row.get("errors", 0),
+            )
+        )
+    return "\n".join(lines)
+
+
+def format_task_summary(result: dict) -> str:
+    """
+    生成格式化的任务总结报告
+    
+    Args:
+        result: 任务执行结果字典，包含:
+            - task_code: 任务代码
+            - status: 执行状态
+            - start_time: 开始时间
+            - end_time: 结束时间
+            - elapsed_seconds: 耗时秒数
+            - counts: 统计数据
+            - verification_result: 校验结果（可选）
+            - error_message: 错误信息（可选）
+    
+    Returns:
+        格式化的总结字符串（ASCII 边框）
+    """
+    task_code = result.get("task_code", "UNKNOWN")
+    status = result.get("status", "未知")
+    counts = result.get("counts", {})
+    verification = result.get("verification_result")
+    error_message = result.get("error_message")
+    
+    # 计算时间
+    start_time = result.get("start_time")
+    end_time = result.get("end_time")
+    elapsed = result.get("elapsed_seconds", 0)
+    
+    if isinstance(start_time, str):
+        start_str = start_time[:19]
+    elif isinstance(start_time, datetime):
+        start_str = start_time.strftime("%Y-%m-%d %H:%M:%S")
+    else:
+        start_str = "-"
+    
+    if isinstance(end_time, str):
+        end_str = end_time[11:19] if len(end_time) >= 19 else end_time
+    elif isinstance(end_time, datetime):
+        end_str = end_time.strftime("%H:%M:%S")
+    else:
+        end_str = "-"
+    
+    elapsed_str = _format_duration(elapsed)
+    
+    # 构建报告
+    lines = [
+        "╔══════════════════════════════════════════════════════════════╗",
+        "║                     任务执行总结                              ║",
+        "╠══════════════════════════════════════════════════════════════╣",
+        f"║ 任务代码: {task_code:<50} ║",
+        f"║ 执行状态: {status:<50} ║",
+        f"║ 执行时间: {start_str} ~ {end_str} ({elapsed_str}){' '*(31-len(elapsed_str))} ║",
+        "╠══════════════════════════════════════════════════════════════╣",
+        "║ 数据统计                                                      ║",
+        f"║   - 获取记录: {counts.get('fetched', 0):>10,}                                  ║",
+        f"║   - 新增记录: {counts.get('inserted', 0):>10,}                                  ║",
+        f"║   - 更新记录: {counts.get('updated', 0):>10,}                                  ║",
+        f"║   - 跳过记录: {counts.get('skipped', 0):>10,}                                  ║",
+        f"║   - 错误记录: {counts.get('errors', 0):>10,}                                  ║",
+    ]
+    
+    # 校验结果
+    if verification:
+        backfilled_missing = verification.get("backfilled_missing_count", verification.get("backfilled_count", 0))
+        backfilled_mismatch = verification.get("backfilled_mismatch_count", 0)
+        lines.extend([
+            "╠══════════════════════════════════════════════════════════════╣",
+            "║ 校验结果                                                      ║",
+            f"║   - 源数据量: {verification.get('source_count', 0):>10,}                                  ║",
+            f"║   - 目标数据量: {verification.get('target_count', 0):>10,}                                ║",
+            f"║   - 缺失补齐: {backfilled_missing:>10,}                                  ║",
+            f"║   - 不一致补齐: {backfilled_mismatch:>10,}                                ║",
+        ])
+    
+    # 错误信息
+    if error_message:
+        error_str = str(error_message)[:48]
+        lines.extend([
+            "╠══════════════════════════════════════════════════════════════╣",
+            f"║ 错误信息: {error_str:<50} ║",
+        ])
+    
+    lines.append("╚══════════════════════════════════════════════════════════════╝")
+    
+    return "\n".join(lines)
+
+
+def format_pipeline_summary(
+    pipeline_name: str,
+    task_results: List[dict],
+    start_time: datetime,
+    end_time: datetime,
+    verification_summary: Optional[dict] = None,
+) -> str:
+    """
+    生成管道执行总结报告
+    
+    Args:
+        pipeline_name: 管道名称
+        task_results: 各任务执行结果列表
+        start_time: 管道开始时间
+        end_time: 管道结束时间
+        verification_summary: 校验汇总（可选）
+    
+    Returns:
+        格式化的管道总结字符串
+    """
+    elapsed = (end_time - start_time).total_seconds()
+    elapsed_str = _format_duration(elapsed)
+    
+    # 汇总统计
+    summary = summarize_counts(task_results)
+    totals = summary.get("total", {})
+    
+    # 统计成功/失败
+    success_count = sum(1 for r in task_results if r.get("status") == "成功")
+    fail_count = len(task_results) - success_count
+    
+    lines = [
+        "╔══════════════════════════════════════════════════════════════╗",
+        "║                     管道执行总结                              ║",
+        "╠══════════════════════════════════════════════════════════════╣",
+        f"║ 管道名称: {pipeline_name:<50} ║",
+        f"║ 任务数量: {len(task_results)} (成功: {success_count}, 失败: {fail_count}){' '*(32-len(str(len(task_results)))-len(str(success_count))-len(str(fail_count)))} ║",
+        f"║ 执行时间: {start_time.strftime('%Y-%m-%d %H:%M:%S')} ~ {end_time.strftime('%H:%M:%S')} ({elapsed_str}){' '*(31-len(elapsed_str))} ║",
+        "╠══════════════════════════════════════════════════════════════╣",
+        "║ 数据汇总                                                      ║",
+        f"║   - 总获取: {totals.get('fetched', 0):>12,}                                  ║",
+        f"║   - 总新增: {totals.get('inserted', 0):>12,}                                  ║",
+        f"║   - 总更新: {totals.get('updated', 0):>12,}                                  ║",
+        f"║   - 总跳过: {totals.get('skipped', 0):>12,}                                  ║",
+        f"║   - 总错误: {totals.get('errors', 0):>12,}                                  ║",
+    ]
+    
+    # 校验汇总
+    if verification_summary:
+        total_backfilled_missing = verification_summary.get(
+            "total_backfilled_missing",
+            verification_summary.get("total_backfilled", 0),
+        )
+        total_backfilled_mismatch = verification_summary.get("total_backfilled_mismatch", 0)
+        lines.extend([
+            "╠══════════════════════════════════════════════════════════════╣",
+            "║ 校验汇总                                                      ║",
+            f"║   - 校验表数: {verification_summary.get('total_tables', 0):>10,}                                  ║",
+            f"║   - 一致表数: {verification_summary.get('consistent_tables', 0):>10,}                                  ║",
+            f"║   - 总补齐数: {verification_summary.get('total_backfilled', 0):>10,}                                  ║",
+            f"║   - 缺失补齐: {total_backfilled_missing:>10,}                                  ║",
+            f"║   - 不一致补齐: {total_backfilled_mismatch:>8,}                                  ║",
+        ])
+    
+    # 任务明细
+    lines.extend([
+        "╠══════════════════════════════════════════════════════════════╣",
+        "║ 任务明细                                                      ║",
+    ])
+    
+    for result in task_results[:10]:  # 最多显示10个
+        task_code = result.get("task_code", "UNKNOWN")[:25]
+        status = "✓" if result.get("status") == "成功" else "✗"
+        counts = result.get("counts", {})
+        fetched = counts.get("fetched", 0)
+        lines.append(f"║   {status} {task_code:<25} 获取:{fetched:>6,}                   ║")
+    
+    if len(task_results) > 10:
+        lines.append(f"║   ... 还有 {len(task_results) - 10} 个任务 ...                                ║")
+    
+    lines.append("╚══════════════════════════════════════════════════════════════╝")
+    
+    return "\n".join(lines)
+
+
+def _format_duration(seconds: float) -> str:
+    """格式化时长"""
+    if seconds < 60:
+        return f"{seconds:.1f}秒"
+    elif seconds < 3600:
+        mins = int(seconds // 60)
+        secs = seconds % 60
+        return f"{mins}分{secs:.0f}秒"
+    else:
+        hours = int(seconds // 3600)
+        mins = int((seconds % 3600) // 60)
+        return f"{hours}时{mins}分"
--- a/apps/etl/pipelines/feiqiu/utils/task_logger.py
+++ b/apps/etl/pipelines/feiqiu/utils/task_logger.py
@@ -0,0 +1,292 @@
+# -*- coding: utf-8 -*-
+"""统一任务日志器
+
+提供统一的日志输出格式，支持：
+- 任务开始/结束记录
+- 进度追踪
+- 统计计数
+- 格式化的任务总结
+"""
+
+import logging
+import time
+from datetime import datetime
+from typing import Any, Dict, Optional
+
+
+# 统一日志格式
+UNIFIED_LOG_FORMAT = "[%(asctime)s] %(levelname)-5s | %(name)s | %(message)s"
+UNIFIED_DATE_FORMAT = "%Y-%m-%d %H:%M:%S"
+
+
+class TaskLogger:
+    """任务日志器，统一 print 和 logging 输出"""
+    
+    def __init__(
+        self,
+        task_code: str,
+        logger: Optional[logging.Logger] = None,
+    ):
+        """
+        初始化任务日志器
+        
+        Args:
+            task_code: 任务代码
+            logger: 底层日志器，如果不提供则创建新的
+        """
+        self.task_code = task_code
+        self.logger = logger or logging.getLogger(f"task.{task_code}")
+        
+        # 任务状态
+        self.start_time: Optional[datetime] = None
+        self.end_time: Optional[datetime] = None
+        self.status: str = "pending"
+        
+        # 统计计数
+        self.counts: Dict[str, int] = {
+            "fetched": 0,
+            "inserted": 0,
+            "updated": 0,
+            "skipped": 0,
+            "errors": 0,
+        }
+        
+        # 额外信息
+        self.extra_info: Dict[str, Any] = {}
+        
+        # 校验结果（如果有）
+        self.verification_result: Optional[dict] = None
+    
+    def start(self, message: str = "任务开始"):
+        """
+        记录任务开始
+        
+        Args:
+            message: 开始消息
+        """
+        self.start_time = datetime.now()
+        self.status = "running"
+        self.logger.info(
+            "%s | %s | 开始时间: %s",
+            self.task_code, message,
+            self.start_time.strftime(UNIFIED_DATE_FORMAT)
+        )
+    
+    def progress(self, message: str, **kwargs):
+        """
+        记录进度
+        
+        Args:
+            message: 进度消息
+            **kwargs: 额外的统计信息
+        """
+        # 更新计数
+        for key, value in kwargs.items():
+            if key in self.counts:
+                if isinstance(value, int):
+                    self.counts[key] += value
+                else:
+                    self.counts[key] = value
+            else:
+                self.extra_info[key] = value
+        
+        # 构建进度字符串
+        counts_str = ", ".join(f"{k}={v}" for k, v in self.counts.items() if v > 0)
+        if counts_str:
+            self.logger.info("%s | %s | %s", self.task_code, message, counts_str)
+        else:
+            self.logger.info("%s | %s", self.task_code, message)
+    
+    def info(self, message: str, *args):
+        """记录信息级别日志"""
+        if args:
+            self.logger.info(f"{self.task_code} | {message}", *args)
+        else:
+            self.logger.info(f"{self.task_code} | {message}")
+    
+    def warning(self, message: str, *args):
+        """记录警告级别日志"""
+        if args:
+            self.logger.warning(f"{self.task_code} | {message}", *args)
+        else:
+            self.logger.warning(f"{self.task_code} | {message}")
+    
+    def error(self, message: str, *args, exc_info: bool = False):
+        """记录错误级别日志"""
+        self.counts["errors"] += 1
+        if args:
+            self.logger.error(f"{self.task_code} | {message}", *args, exc_info=exc_info)
+        else:
+            self.logger.error(f"{self.task_code} | {message}", exc_info=exc_info)
+    
+    def set_counts(self, **counts):
+        """直接设置计数"""
+        for key, value in counts.items():
+            if key in self.counts:
+                self.counts[key] = value
+    
+    def add_counts(self, **counts):
+        """累加计数"""
+        for key, value in counts.items():
+            if key in self.counts:
+                self.counts[key] += value
+    
+    def set_verification_result(self, result: dict):
+        """设置校验结果"""
+        self.verification_result = result
+    
+    def end(self, status: str = "成功", error_message: Optional[str] = None) -> str:
+        """
+        记录任务结束，返回格式化的总结
+        
+        Args:
+            status: 状态 ("成功" / "失败" / "取消")
+            error_message: 错误信息（如果失败）
+        
+        Returns:
+            格式化的任务总结字符串
+        """
+        self.end_time = datetime.now()
+        self.status = status
+        
+        # 计算耗时
+        if self.start_time:
+            elapsed = (self.end_time - self.start_time).total_seconds()
+            elapsed_str = self._format_duration(elapsed)
+        else:
+            elapsed = 0
+            elapsed_str = "-"
+        
+        # 生成总结
+        summary = self._format_summary(status, elapsed_str, error_message)
+        
+        # 记录日志
+        if status == "成功":
+            self.logger.info("\n%s", summary)
+        else:
+            self.logger.error("\n%s", summary)
+        
+        return summary
+    
+    def _format_duration(self, seconds: float) -> str:
+        """格式化时长"""
+        if seconds < 60:
+            return f"{seconds:.1f}秒"
+        elif seconds < 3600:
+            mins = int(seconds // 60)
+            secs = seconds % 60
+            return f"{mins}分{secs:.0f}秒"
+        else:
+            hours = int(seconds // 3600)
+            mins = int((seconds % 3600) // 60)
+            return f"{hours}时{mins}分"
+    
+    def _format_summary(
+        self,
+        status: str,
+        elapsed_str: str,
+        error_message: Optional[str] = None,
+    ) -> str:
+        """格式化任务总结"""
+        lines = [
+            "╔══════════════════════════════════════════════════════════════╗",
+            "║                     任务执行总结                              ║",
+            "╠══════════════════════════════════════════════════════════════╣",
+            f"║ 任务代码: {self.task_code:<50} ║",
+            f"║ 执行状态: {status:<50} ║",
+        ]
+        
+        if self.start_time and self.end_time:
+            time_range = f"{self.start_time.strftime('%Y-%m-%d %H:%M:%S')} ~ {self.end_time.strftime('%H:%M:%S')} ({elapsed_str})"
+            lines.append(f"║ 执行时间: {time_range:<50} ║")
+        
+        lines.extend([
+            "╠══════════════════════════════════════════════════════════════╣",
+            "║ 数据统计                                                      ║",
+            f"║   - 获取记录: {self.counts['fetched']:>10,}                                  ║",
+            f"║   - 新增记录: {self.counts['inserted']:>10,}                                  ║",
+            f"║   - 更新记录: {self.counts['updated']:>10,}                                  ║",
+            f"║   - 跳过记录: {self.counts['skipped']:>10,}                                  ║",
+            f"║   - 错误记录: {self.counts['errors']:>10,}                                  ║",
+        ])
+        
+        # 校验结果
+        if self.verification_result:
+            backfilled_missing = self.verification_result.get(
+                "backfilled_missing_count",
+                self.verification_result.get("backfilled_count", 0),
+            )
+            backfilled_mismatch = self.verification_result.get("backfilled_mismatch_count", 0)
+            lines.extend([
+                "╠══════════════════════════════════════════════════════════════╣",
+                "║ 校验结果                                                      ║",
+                f"║   - 源数据量: {self.verification_result.get('source_count', 0):>10,}                                  ║",
+                f"║   - 目标数据量: {self.verification_result.get('target_count', 0):>10,}                                ║",
+                f"║   - 缺失补齐: {backfilled_missing:>10,}                                  ║",
+                f"║   - 不一致补齐: {backfilled_mismatch:>10,}                                ║",
+            ])
+        
+        # 错误信息
+        if error_message:
+            lines.extend([
+                "╠══════════════════════════════════════════════════════════════╣",
+                f"║ 错误信息: {error_message[:50]:<50} ║",
+            ])
+        
+        lines.append("╚══════════════════════════════════════════════════════════════╝")
+        
+        return "\n".join(lines)
+    
+    def get_result(self) -> dict:
+        """获取任务结果字典"""
+        elapsed = 0
+        if self.start_time and self.end_time:
+            elapsed = (self.end_time - self.start_time).total_seconds()
+        
+        return {
+            "task_code": self.task_code,
+            "status": self.status,
+            "start_time": self.start_time.isoformat() if self.start_time else None,
+            "end_time": self.end_time.isoformat() if self.end_time else None,
+            "elapsed_seconds": elapsed,
+            "counts": self.counts.copy(),
+            "extra_info": self.extra_info.copy(),
+            "verification_result": self.verification_result,
+        }
+
+
+def configure_task_logging(
+    name: str = "fq_etl",
+    level: str = "INFO",
+) -> logging.Logger:
+    """
+    配置任务日志
+    
+    Args:
+        name: 日志器名称
+        level: 日志级别
+    
+    Returns:
+        配置好的日志器
+    """
+    logger = logging.getLogger(name)
+    logger.setLevel(getattr(logging, level.upper(), logging.INFO))
+    
+    # 清除已有处理器
+    logger.handlers.clear()
+    
+    # 添加控制台处理器
+    handler = logging.StreamHandler()
+    handler.setLevel(logging.DEBUG)
+    
+    # 设置格式
+    formatter = logging.Formatter(
+        UNIFIED_LOG_FORMAT,
+        UNIFIED_DATE_FORMAT,
+    )
+    handler.setFormatter(formatter)
+    
+    logger.addHandler(handler)
+    logger.propagate = False
+    
+    return logger
--- a/apps/etl/pipelines/feiqiu/utils/windowing.py
+++ b/apps/etl/pipelines/feiqiu/utils/windowing.py
@@ -0,0 +1,142 @@
+# -*- coding: utf-8 -*-
+"""Time window helpers for ETL and validation tasks."""
+from __future__ import annotations
+
+from datetime import datetime, timedelta, time
+from typing import List, Tuple
+from zoneinfo import ZoneInfo
+
+
+def _ensure_tz(dt: datetime, tz: ZoneInfo | None) -> datetime:
+    if tz is None:
+        return dt
+    if dt.tzinfo is None:
+        return dt.replace(tzinfo=tz)
+    return dt.astimezone(tz)
+
+
+def _next_month_start(dt: datetime, tz: ZoneInfo | None) -> datetime:
+    year = dt.year
+    month = dt.month
+    if month == 12:
+        year += 1
+        month = 1
+    else:
+        month += 1
+    return datetime(year, month, 1, tzinfo=tz)
+
+
+def calc_window_minutes(start: datetime, end: datetime) -> int:
+    if end <= start:
+        return 0
+    return max(1, int((end - start).total_seconds() // 60))
+
+
+def calc_window_days(start: datetime, end: datetime) -> float:
+    if end <= start:
+        return 0.0
+    return (end - start).total_seconds() / 86400
+
+
+def format_window_days(value: float) -> str:
+    if value is None:
+        return "0"
+    if abs(value - round(value)) < 1e-6:
+        return str(int(round(value)))
+    return f"{value:.2f}"
+
+
+def split_window(
+    start: datetime,
+    end: datetime,
+    *,
+    tz: ZoneInfo | None,
+    split_unit: str | None,
+    compensation_hours: int | float | None,
+    split_days: int | None = None,
+) -> List[Tuple[datetime, datetime]]:
+    start = _ensure_tz(start, tz)
+    end = _ensure_tz(end, tz)
+
+    comp = int(compensation_hours or 0)
+    if comp:
+        start = start - timedelta(hours=comp)
+        end = end + timedelta(hours=comp)
+
+    if end <= start:
+        return []
+
+    unit = (split_unit or "").strip().lower()
+    if unit in ("", "none", "off", "false", "0"):
+        return [(start, end)]
+
+    if unit in ("day", "daily"):
+        step_days = max(1, int(split_days or 1))
+        windows: List[Tuple[datetime, datetime]] = []
+        cur = start
+        while cur < end:
+            nxt = cur + timedelta(days=step_days)
+            if nxt > end:
+                nxt = end
+            if nxt <= cur:
+                break
+            windows.append((cur, nxt))
+            cur = nxt
+        return windows
+
+    if unit in ("week", "weekly"):
+        step_days = 7
+        windows: List[Tuple[datetime, datetime]] = []
+        cur = start
+        while cur < end:
+            nxt = cur + timedelta(days=step_days)
+            if nxt > end:
+                nxt = end
+            if nxt <= cur:
+                break
+            windows.append((cur, nxt))
+            cur = nxt
+        return windows
+
+    if unit not in ("month", "monthly"):
+        return [(start, end)]
+
+    windows: List[Tuple[datetime, datetime]] = []
+    cur = start
+    while cur < end:
+        boundary = _next_month_start(cur, tz)
+        nxt = boundary if boundary < end else end
+        if nxt <= cur:
+            break
+        windows.append((cur, nxt))
+        cur = nxt
+    return windows
+
+
+def build_window_segments(
+    cfg,
+    start: datetime,
+    end: datetime,
+    *,
+    tz: ZoneInfo | None,
+    override_only: bool,
+) -> List[Tuple[datetime, datetime]]:
+    split_unit = cfg.get("run.window_split.unit", "month")
+    split_days = cfg.get("run.window_split.days", 1)
+    compensation_hours = cfg.get("run.window_split.compensation_hours", 0)
+
+    if override_only:
+        override_start = cfg.get("run.window_override.start")
+        override_end = cfg.get("run.window_override.end")
+        if not (override_start and override_end):
+            split_unit = "none"
+            compensation_hours = 0
+
+    return split_window(
+        start,
+        end,
+        tz=tz,
+        split_unit=split_unit,
+        compensation_hours=compensation_hours,
+        split_days=split_days,
+    )