#!/usr/bin/env python # -*- coding: utf-8 -*- """DWS 层扩展验证脚本 — 影子跑数验证。 对照 Requirements 9.1–9.4 验证三个 DWS 表的结构完整性和数据合理性: 1. dws_assistant_order_contribution — 四项统计一致性 2. dws_member_consumption_summary — 充值窗口字段 3. dws_assistant_daily_detail — 定档折算惩罚字段 4. RLS 视图 + FDW 外部表存在性 用法: cd apps/etl/connectors/feiqiu python scripts/verify_dws_extensions.py """ from __future__ import annotations import os import sys from pathlib import Path from dotenv import load_dotenv # --------------------------------------------------------------------------- # 1. 加载根 .env(遵循 testing-env.md 规范) # --------------------------------------------------------------------------- _ROOT = Path(__file__).resolve().parents[5] # scripts/ → feiqiu/ → connectors/ → etl/ → apps/ → 根目录 load_dotenv(_ROOT / ".env") PG_DSN = os.environ.get("PG_DSN") if not PG_DSN: raise RuntimeError("PG_DSN 未设置,请检查 .env 配置") APP_DB_DSN = os.environ.get("APP_DB_DSN") if not APP_DB_DSN: raise RuntimeError("APP_DB_DSN 未设置,请检查 .env 配置") # --------------------------------------------------------------------------- # 2. 数据库连接 # --------------------------------------------------------------------------- try: import psycopg2 except ImportError: print("ERROR: psycopg2 未安装,请执行 uv pip install psycopg2-binary") sys.exit(1) # --------------------------------------------------------------------------- # 辅助函数 # --------------------------------------------------------------------------- class _Result: """单条验证结果。""" def __init__(self, name: str): self.name = name self.passed = True self.details: list[str] = [] def fail(self, msg: str) -> None: self.passed = False self.details.append(f" FAIL: {msg}") def ok(self, msg: str) -> None: self.details.append(f" OK: {msg}") def __str__(self) -> str: status = "PASS" if self.passed else "FAIL" header = f"[{status}] {self.name}" if self.details: return header + "\n" + "\n".join(self.details) return header def _query(conn, sql: str, params=None) -> list[tuple]: with conn.cursor() as cur: cur.execute(sql, params) return cur.fetchall() def _query_one(conn, sql: str, params=None): rows = _query(conn, sql, params) return rows[0] if rows else None def _column_exists(conn, schema: str, table: str, column: str) -> bool: row = _query_one( conn, """ SELECT 1 FROM information_schema.columns WHERE table_schema = %s AND table_name = %s AND column_name = %s """, (schema, table, column), ) return row is not None def _table_exists(conn, schema: str, table: str) -> bool: row = _query_one( conn, """ SELECT 1 FROM information_schema.tables WHERE table_schema = %s AND table_name = %s """, (schema, table), ) return row is not None # --------------------------------------------------------------------------- # 验证 1:dws_assistant_order_contribution 四项统计(Req 9.1, 9.2) # --------------------------------------------------------------------------- def verify_contribution_table(conn) -> _Result: r = _Result("验证 1:dws_assistant_order_contribution 表结构与数据") # 1a. 表存在 if not _table_exists(conn, "dws", "dws_assistant_order_contribution"): r.fail("表 dws.dws_assistant_order_contribution 不存在") return r r.ok("表存在") # 1b. 关键字段存在 required_cols = [ "contribution_id", "site_id", "tenant_id", "assistant_id", "assistant_nickname", "stat_date", "order_gross_revenue", "order_net_revenue", "time_weighted_revenue", "time_weighted_net_revenue", "order_count", "total_service_seconds", "created_at", "updated_at", ] missing = [c for c in required_cols if not _column_exists(conn, "dws", "dws_assistant_order_contribution", c)] if missing: r.fail(f"缺少字段: {', '.join(missing)}") else: r.ok(f"全部 {len(required_cols)} 个字段存在") # 1c. 唯一索引存在 idx_row = _query_one( conn, """ SELECT indexname FROM pg_indexes WHERE schemaname = 'dws' AND tablename = 'dws_assistant_order_contribution' AND indexname = 'idx_aoc_site_assistant_date' """, ) if idx_row: r.ok("唯一索引 idx_aoc_site_assistant_date 存在") else: r.fail("唯一索引 idx_aoc_site_assistant_date 不存在") # 1d. 数据行数(信息性,不判 FAIL) row = _query_one(conn, "SELECT COUNT(*) FROM dws.dws_assistant_order_contribution") count = row[0] if row else 0 r.ok(f"当前数据行数: {count}") # 1e. 如果有数据,检查四项统计非负 if count > 0: neg_row = _query_one( conn, """ SELECT COUNT(*) FROM dws.dws_assistant_order_contribution WHERE order_gross_revenue < 0 OR order_net_revenue < 0 OR time_weighted_revenue < 0 OR time_weighted_net_revenue < 0 """, ) neg_count = neg_row[0] if neg_row else 0 if neg_count > 0: r.fail(f"存在 {neg_count} 条四项统计为负值的记录") else: r.ok("四项统计数值均非负") return r # --------------------------------------------------------------------------- # 验证 2:dws_member_consumption_summary 充值窗口字段(Req 9.3) # --------------------------------------------------------------------------- def verify_consumption_fields(conn) -> _Result: r = _Result("验证 2:dws_member_consumption_summary 充值窗口字段") if not _table_exists(conn, "dws", "dws_member_consumption_summary"): r.fail("表 dws.dws_member_consumption_summary 不存在") return r r.ok("表存在") new_cols = [ "recharge_count_30d", "recharge_count_60d", "recharge_count_90d", "recharge_amount_30d", "recharge_amount_60d", "recharge_amount_90d", "avg_ticket_amount", ] missing = [c for c in new_cols if not _column_exists(conn, "dws", "dws_member_consumption_summary", c)] if missing: r.fail(f"缺少新增字段: {', '.join(missing)}") else: r.ok(f"全部 {len(new_cols)} 个新增字段存在") # 如果有数据,检查充值金额和次均消费非负 row = _query_one(conn, "SELECT COUNT(*) FROM dws.dws_member_consumption_summary") count = row[0] if row else 0 r.ok(f"当前数据行数: {count}") if count > 0: neg_row = _query_one( conn, """ SELECT COUNT(*) FROM dws.dws_member_consumption_summary WHERE recharge_amount_30d < 0 OR recharge_amount_60d < 0 OR recharge_amount_90d < 0 OR avg_ticket_amount < 0 """, ) neg_count = neg_row[0] if neg_row else 0 if neg_count > 0: r.fail(f"存在 {neg_count} 条充值金额或次均消费为负值的记录") else: r.ok("充值金额和次均消费均非负") return r # --------------------------------------------------------------------------- # 验证 3:dws_assistant_daily_detail 惩罚字段(Req 9.4) # --------------------------------------------------------------------------- def verify_penalty_fields(conn) -> _Result: r = _Result("验证 3:dws_assistant_daily_detail 惩罚字段") if not _table_exists(conn, "dws", "dws_assistant_daily_detail"): r.fail("表 dws.dws_assistant_daily_detail 不存在") return r r.ok("表存在") new_cols = ["penalty_minutes", "penalty_reason", "is_exempt", "per_hour_contribution"] missing = [c for c in new_cols if not _column_exists(conn, "dws", "dws_assistant_daily_detail", c)] if missing: r.fail(f"缺少新增字段: {', '.join(missing)}") else: r.ok(f"全部 {len(new_cols)} 个惩罚字段存在") # 检查 is_exempt 字段类型为 boolean type_row = _query_one( conn, """ SELECT data_type FROM information_schema.columns WHERE table_schema = 'dws' AND table_name = 'dws_assistant_daily_detail' AND column_name = 'is_exempt' """, ) if type_row and type_row[0] == "boolean": r.ok("is_exempt 字段类型为 boolean") elif type_row: r.fail(f"is_exempt 字段类型为 {type_row[0]},预期 boolean") # 如果有数据,检查 penalty_minutes >= 0 row = _query_one(conn, "SELECT COUNT(*) FROM dws.dws_assistant_daily_detail") count = row[0] if row else 0 r.ok(f"当前数据行数: {count}") if count > 0: neg_row = _query_one( conn, """ SELECT COUNT(*) FROM dws.dws_assistant_daily_detail WHERE penalty_minutes < 0 """, ) neg_count = neg_row[0] if neg_row else 0 if neg_count > 0: r.fail(f"存在 {neg_count} 条 penalty_minutes 为负值的记录") else: r.ok("penalty_minutes 均非负") return r # --------------------------------------------------------------------------- # 验证 4:RLS 视图和 FDW 映射(Req 7, 8) # --------------------------------------------------------------------------- def verify_rls_views(conn_etl) -> _Result: r = _Result("验证 4a:RLS 视图存在性(ETL 库 app schema)") views = [ "v_dws_assistant_order_contribution", "v_dws_member_consumption_summary", "v_dws_assistant_daily_detail", ] for v in views: if _table_exists(conn_etl, "app", v): r.ok(f"视图 app.{v} 存在") else: r.fail(f"视图 app.{v} 不存在") return r def verify_fdw_tables(conn_app) -> _Result: r = _Result("验证 4b:FDW 外部表存在性(业务库 fdw_etl schema)") # FDW 外部表名与 RLS 视图名一致,带 v_ 前缀 tables = [ "v_dws_assistant_order_contribution", "v_dws_member_consumption_summary", "v_dws_assistant_daily_detail", ] for t in tables: row = _query_one( conn_app, """ SELECT 1 FROM information_schema.tables WHERE table_schema = 'fdw_etl' AND table_name = %s """, (t,), ) if row: r.ok(f"外部表 fdw_etl.{t} 存在") else: r.fail(f"外部表 fdw_etl.{t} 不存在") return r # --------------------------------------------------------------------------- # 主函数 # --------------------------------------------------------------------------- def main() -> int: results: list[_Result] = [] # 连接 ETL 测试库 try: conn_etl = psycopg2.connect(PG_DSN) conn_etl.autocommit = True except Exception as e: print(f"ERROR: 无法连接 ETL 库 ({PG_DSN[:40]}...): {e}") return 1 # 连接业务测试库 try: conn_app = psycopg2.connect(APP_DB_DSN) conn_app.autocommit = True except Exception as e: print(f"ERROR: 无法连接业务库 ({APP_DB_DSN[:40]}...): {e}") conn_etl.close() return 1 try: print("=" * 60) print("DWS 层扩展验证 — 影子跑数验证") print("=" * 60) print() # ETL 库验证 results.append(verify_contribution_table(conn_etl)) results.append(verify_consumption_fields(conn_etl)) results.append(verify_penalty_fields(conn_etl)) results.append(verify_rls_views(conn_etl)) # 业务库验证 results.append(verify_fdw_tables(conn_app)) # 输出结果 for r in results: print(r) print() # 汇总 total = len(results) passed = sum(1 for r in results if r.passed) failed = total - passed print("=" * 60) print(f"汇总: {passed}/{total} 通过, {failed} 失败") print("=" * 60) return 0 if failed == 0 else 1 finally: conn_etl.close() conn_app.close() if __name__ == "__main__": sys.exit(main())