改相对路径完成客户端

2026-01-27 22:14:01 +08:00
parent 04c064793a
commit 9f8976e75a
292 changed files with 307062 additions and 678 deletions
--- a/etl_billiards/gui/workers/init.py
+++ b/etl_billiards/gui/workers/init.py
@@ -0,0 +1,7 @@
+# -*- coding: utf-8 -*-
+"""后台工作线程模块"""
+
+from .task_worker import TaskWorker
+from .db_worker import DBWorker
+
+__all__ = ["TaskWorker", "DBWorker"]
--- a/etl_billiards/gui/workers/db_worker.py
+++ b/etl_billiards/gui/workers/db_worker.py
@@ -0,0 +1,192 @@
+# -*- coding: utf-8 -*-
+"""数据库查询工作线程"""
+
+import sys
+from pathlib import Path
+from typing import List, Dict, Any, Optional, Tuple
+
+from PySide6.QtCore import QThread, Signal
+
+# 添加项目路径
+PROJECT_ROOT = Path(__file__).resolve().parents[2]
+if str(PROJECT_ROOT) not in sys.path:
+    sys.path.insert(0, str(PROJECT_ROOT))
+
+
+class DBWorker(QThread):
+    """数据库查询工作线程"""
+    
+    # 信号
+    query_finished = Signal(list, list)  # 查询完成 (columns, rows)
+    query_error = Signal(str)  # 查询错误
+    connection_status = Signal(bool, str)  # 连接状态 (connected, message)
+    tables_loaded = Signal(dict)  # 表列表加载完成 {schema: [(table, rows, updated_at), ...]}
+    
+    def __init__(self, parent=None):
+        super().__init__(parent)
+        self.conn = None
+        self._task = None
+        self._task_args = None
+    
+    def connect_db(self, dsn: str):
+        """连接数据库"""
+        self._task = "connect"
+        self._task_args = (dsn,)
+        self.start()
+    
+    def disconnect_db(self):
+        """断开数据库连接"""
+        self._task = "disconnect"
+        self._task_args = None
+        self.start()
+    
+    def execute_query(self, sql: str, params: Optional[tuple] = None):
+        """执行查询"""
+        self._task = "query"
+        self._task_args = (sql, params)
+        self.start()
+    
+    def load_tables(self, schemas: Optional[List[str]] = None):
+        """加载表列表"""
+        self._task = "load_tables"
+        self._task_args = (schemas,)
+        self.start()
+    
+    def run(self):
+        """执行任务"""
+        if self._task == "connect":
+            self._do_connect(*self._task_args)
+        elif self._task == "disconnect":
+            self._do_disconnect()
+        elif self._task == "query":
+            self._do_query(*self._task_args)
+        elif self._task == "load_tables":
+            self._do_load_tables(*self._task_args)
+    
+    def _do_connect(self, dsn: str):
+        """执行连接"""
+        try:
+            import psycopg2
+            from psycopg2.extras import RealDictCursor
+            
+            self.conn = psycopg2.connect(dsn, connect_timeout=10)
+            self.conn.set_session(autocommit=True)
+            
+            # 测试连接
+            with self.conn.cursor() as cur:
+                cur.execute("SELECT version()")
+                version = cur.fetchone()[0]
+            
+            self.connection_status.emit(True, f"已连接: {version[:50]}...")
+        except ImportError:
+            self.connection_status.emit(False, "缺少 psycopg2 模块，请安装: pip install psycopg2-binary")
+        except Exception as e:
+            self.conn = None
+            self.connection_status.emit(False, f"连接失败: {e}")
+    
+    def _do_disconnect(self):
+        """执行断开连接"""
+        if self.conn:
+            try:
+                self.conn.close()
+            except Exception:
+                pass
+            self.conn = None
+        self.connection_status.emit(False, "已断开连接")
+    
+    def _do_query(self, sql: str, params: Optional[tuple]):
+        """执行查询"""
+        if not self.conn:
+            self.query_error.emit("未连接到数据库")
+            return
+        
+        try:
+            from psycopg2.extras import RealDictCursor
+            
+            with self.conn.cursor(cursor_factory=RealDictCursor) as cur:
+                cur.execute(sql, params)
+                
+                # 检查是否有结果
+                if cur.description:
+                    columns = [desc[0] for desc in cur.description]
+                    rows = [dict(row) for row in cur.fetchall()]
+                    self.query_finished.emit(columns, rows)
+                else:
+                    self.query_finished.emit([], [])
+        except Exception as e:
+            self.query_error.emit(f"查询失败: {e}")
+    
+    def _do_load_tables(self, schemas: Optional[List[str]]):
+        """加载表列表"""
+        if not self.conn:
+            self.query_error.emit("未连接到数据库")
+            return
+        
+        try:
+            if schemas is None:
+                schemas = ["billiards_ods", "billiards_dwd", "billiards_dws", "etl_admin"]
+            
+            result = {}
+            
+            for schema in schemas:
+                tables = []
+                
+                # 获取表列表
+                sql = """
+                    SELECT 
+                        t.table_name,
+                        COALESCE(s.n_live_tup, 0) as row_count
+                    FROM information_schema.tables t
+                    LEFT JOIN pg_stat_user_tables s 
+                        ON t.table_name = s.relname 
+                        AND t.table_schema = s.schemaname
+                    WHERE t.table_schema = %s 
+                        AND t.table_type = 'BASE TABLE'
+                    ORDER BY t.table_name
+                """
+                
+                with self.conn.cursor() as cur:
+                    cur.execute(sql, (schema,))
+                    for row in cur.fetchall():
+                        table_name = row[0]
+                        row_count = row[1] or 0
+                        
+                        # 尝试获取最新更新时间
+                        updated_at = None
+                        try:
+                            # 尝试 fetched_at 字段
+                            cur.execute(f'SELECT MAX(fetched_at) FROM "{schema}"."{table_name}"')
+                            result_row = cur.fetchone()
+                            if result_row and result_row[0]:
+                                updated_at = str(result_row[0])[:19]
+                        except Exception:
+                            pass
+                        
+                        if not updated_at:
+                            try:
+                                # 尝试 updated_at 字段
+                                cur.execute(f'SELECT MAX(updated_at) FROM "{schema}"."{table_name}"')
+                                result_row = cur.fetchone()
+                                if result_row and result_row[0]:
+                                    updated_at = str(result_row[0])[:19]
+                            except Exception:
+                                pass
+                        
+                        tables.append((table_name, row_count, updated_at or "-"))
+                
+                result[schema] = tables
+            
+            self.tables_loaded.emit(result)
+        except Exception as e:
+            self.query_error.emit(f"加载表列表失败: {e}")
+    
+    def is_connected(self) -> bool:
+        """检查是否已连接"""
+        if not self.conn:
+            return False
+        try:
+            with self.conn.cursor() as cur:
+                cur.execute("SELECT 1")
+            return True
+        except Exception:
+            return False
--- a/etl_billiards/gui/workers/task_worker.py
+++ b/etl_billiards/gui/workers/task_worker.py
@@ -144,24 +144,166 @@ class TaskWorker(QThread):
        if not self._output_lines:
            return "无输出"
        
-        # 查找关键信息
+        return self._parse_detailed_summary()
+    
+    def _parse_detailed_summary(self) -> str:
+        """解析详细的执行摘要"""
+        import re
+        import json
+        
        summary_parts = []
        
-        for line in self._output_lines[-20:]:  # 只看最后 20 行
-            line_lower = line.lower()
-            if "success" in line_lower or "完成" in line or "成功" in line:
-                summary_parts.append(line)
-            elif "error" in line_lower or "失败" in line or "错误" in line:
-                summary_parts.append(line)
-            elif "inserted" in line_lower or "updated" in line_lower:
-                summary_parts.append(line)
-            elif "fetched" in line_lower or "抓取" in line:
-                summary_parts.append(line)
+        # 统计各类信息
+        ods_stats = []  # ODS 抓取统计
+        dwd_stats = []  # DWD 装载统计
+        integrity_stats = {}  # 数据校验统计
+        errors = []  # 错误信息
+        task_results = []  # 任务结果
+        
+        for line in self._output_lines:
+            # 1. 解析 ODS 抓取完成信息
+            # 格式: "xxx: 抓取完成，文件=xxx，记录数=123"
+            match = re.search(r'(\w+): 抓取完成.*记录数[=:]\s*(\d+)', line)
+            if match:
+                task_name = match.group(1)
+                record_count = int(match.group(2))
+                if record_count > 0:
+                    ods_stats.append(f"{task_name}: {record_count}条")
+                continue
+            
+            # 2. 解析 DWD 装载完成信息
+            # 格式: "DWD 装载完成：xxx，用时 1.02s"
+            match = re.search(r'DWD 装载完成[：:]\s*(\S+).*用时\s*([\d.]+)s', line)
+            if match:
+                table_name = match.group(1).replace('billiards_dwd.', '')
+                continue
+            
+            # 3. 解析任务完成统计 (JSON格式)
+            # 格式: "xxx: 完成，统计={'tables': [...]}"
+            if "完成，统计=" in line or "完成,统计=" in line:
+                try:
+                    match = re.search(r"统计=(\{.+\})", line)
+                    if match:
+                        stats_str = match.group(1).replace("'", '"')
+                        stats = json.loads(stats_str)
+                        
+                        # 解析 DWD 装载统计
+                        if 'tables' in stats:
+                            total_processed = 0
+                            total_inserted = 0
+                            tables_with_data = []
+                            
+                            for tbl in stats['tables']:
+                                table_name = tbl.get('table', '').replace('billiards_dwd.', '')
+                                processed = tbl.get('processed', 0)
+                                inserted = tbl.get('inserted', 0)
+                                
+                                if processed > 0:
+                                    total_processed += processed
+                                    tables_with_data.append(f"{table_name}({processed})")
+                                elif inserted > 0:
+                                    total_inserted += inserted
+                                    tables_with_data.append(f"{table_name}(+{inserted})")
+                            
+                            if total_processed > 0 or total_inserted > 0:
+                                dwd_stats.append(f"处理维度: {total_processed}条, 新增事实: {total_inserted}条")
+                                if len(tables_with_data) <= 5:
+                                    dwd_stats.append(f"涉及表: {', '.join(tables_with_data)}")
+                                else:
+                                    dwd_stats.append(f"涉及 {len(tables_with_data)} 张表")
+                except Exception:
+                    pass
+                continue
+            
+            # 4. 解析数据校验结果
+            # 格式: "CHECK_DONE task=xxx missing=1 records=136 errors=0"
+            match = re.search(r'CHECK_DONE task=(\w+) missing=(\d+) records=(\d+)', line)
+            if match:
+                task_name = match.group(1)
+                missing = int(match.group(2))
+                records = int(match.group(3))
+                if missing > 0:
+                    if 'missing_tasks' not in integrity_stats:
+                        integrity_stats['missing_tasks'] = []
+                    integrity_stats['missing_tasks'].append(f"{task_name}: 缺失{missing}/{records}")
+                integrity_stats['total_records'] = integrity_stats.get('total_records', 0) + records
+                integrity_stats['total_missing'] = integrity_stats.get('total_missing', 0) + missing
+                continue
+            
+            # 5. 解析数据校验最终结果
+            # 格式: "结果统计: {'missing': 463, 'errors': 0, 'backfilled': 0}"
+            if "结果统计:" in line or "结果统计：" in line:
+                try:
+                    match = re.search(r"\{.+\}", line)
+                    if match:
+                        stats_str = match.group(0).replace("'", '"')
+                        stats = json.loads(stats_str)
+                        integrity_stats['final_missing'] = stats.get('missing', 0)
+                        integrity_stats['final_errors'] = stats.get('errors', 0)
+                        integrity_stats['backfilled'] = stats.get('backfilled', 0)
+                except Exception:
+                    pass
+                continue
+            
+            # 6. 解析错误信息
+            if "[ERROR]" in line or "错误" in line.lower() or "error" in line.lower():
+                if "Traceback" not in line and "File " not in line:
+                    errors.append(line.strip()[:100])
+            
+            # 7. 解析任务完成信息
+            if "任务执行成功" in line or "ETL运行完成" in line:
+                task_results.append("✓ " + line.split("]")[-1].strip() if "]" in line else line.strip())
+            elif "任务执行失败" in line:
+                task_results.append("✗ " + line.split("]")[-1].strip() if "]" in line else line.strip())
+        
+        # 构建摘要
+        if ods_stats:
+            summary_parts.append("【ODS 抓取】" + ", ".join(ods_stats[:5]))
+            if len(ods_stats) > 5:
+                summary_parts[-1] += f" 等{len(ods_stats)}项"
+        
+        if dwd_stats:
+            summary_parts.append("【DWD 装载】" + "; ".join(dwd_stats))
+        
+        if integrity_stats:
+            total_missing = integrity_stats.get('final_missing', integrity_stats.get('total_missing', 0))
+            total_records = integrity_stats.get('total_records', 0)
+            backfilled = integrity_stats.get('backfilled', 0)
+            
+            int_summary = f"【数据校验】检查 {total_records} 条记录"
+            if total_missing > 0:
+                int_summary += f", 发现 {total_missing} 条缺失"
+                if backfilled > 0:
+                    int_summary += f", 已补全 {backfilled} 条"
+            else:
+                int_summary += ", 数据完整"
+            summary_parts.append(int_summary)
+            
+            # 显示缺失详情
+            if integrity_stats.get('missing_tasks'):
+                missing_detail = integrity_stats['missing_tasks'][:3]
+                summary_parts.append("  缺失: " + "; ".join(missing_detail))
+                if len(integrity_stats['missing_tasks']) > 3:
+                    summary_parts[-1] += f" 等{len(integrity_stats['missing_tasks'])}项"
+        
+        if errors:
+            summary_parts.append("【错误】" + "; ".join(errors[:3]))
+        
+        if task_results:
+            summary_parts.append("【结果】" + " | ".join(task_results))
        
        if summary_parts:
-            return "\n".join(summary_parts[-5:])  # 最多返回 5 行
+            return "\n".join(summary_parts)
+        
+        # 如果没有解析到任何信息，返回最后几行关键信息
+        key_lines = []
+        for line in self._output_lines[-10:]:
+            if "完成" in line or "成功" in line or "失败" in line:
+                key_lines.append(line.strip()[:80])
+        
+        if key_lines:
+            return "\n".join(key_lines[-3:])
        
-        # 如果没有找到关键信息，返回最后一行
        return self._output_lines[-1] if self._output_lines else "执行完成"
    
    @property