Files
Neo-ZQYY/apps/etl/connectors/feiqiu/scripts/verify_dws_extensions.py

396 lines
12 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""DWS 层扩展验证脚本 — 影子跑数验证。
对照 Requirements 9.19.4 验证三个 DWS 表的结构完整性和数据合理性:
1. dws_assistant_order_contribution — 四项统计一致性
2. dws_member_consumption_summary — 充值窗口字段
3. dws_assistant_daily_detail — 定档折算惩罚字段
4. RLS 视图 + FDW 外部表存在性
用法:
cd apps/etl/connectors/feiqiu
python scripts/verify_dws_extensions.py
"""
from __future__ import annotations
import os
import sys
from pathlib import Path
from dotenv import load_dotenv
from neozqyy_shared.repo_root import ensure_repo_root
ensure_repo_root()
# ---------------------------------------------------------------------------
# 1. 加载根 .env遵循 testing-env.md 规范)
# ---------------------------------------------------------------------------
_ROOT = Path(__file__).resolve().parents[5] # scripts/ → feiqiu/ → connectors/ → etl/ → apps/ → 根目录
load_dotenv(_ROOT / ".env")
PG_DSN = os.environ.get("PG_DSN")
if not PG_DSN:
raise RuntimeError("PG_DSN 未设置,请检查 .env 配置")
APP_DB_DSN = os.environ.get("APP_DB_DSN")
if not APP_DB_DSN:
raise RuntimeError("APP_DB_DSN 未设置,请检查 .env 配置")
# ---------------------------------------------------------------------------
# 2. 数据库连接
# ---------------------------------------------------------------------------
try:
import psycopg2
except ImportError:
print("ERROR: psycopg2 未安装,请执行 uv pip install psycopg2-binary")
sys.exit(1)
# ---------------------------------------------------------------------------
# 辅助函数
# ---------------------------------------------------------------------------
class _Result:
"""单条验证结果。"""
def __init__(self, name: str):
self.name = name
self.passed = True
self.details: list[str] = []
def fail(self, msg: str) -> None:
self.passed = False
self.details.append(f" FAIL: {msg}")
def ok(self, msg: str) -> None:
self.details.append(f" OK: {msg}")
def __str__(self) -> str:
status = "PASS" if self.passed else "FAIL"
header = f"[{status}] {self.name}"
if self.details:
return header + "\n" + "\n".join(self.details)
return header
def _query(conn, sql: str, params=None) -> list[tuple]:
with conn.cursor() as cur:
cur.execute(sql, params)
return cur.fetchall()
def _query_one(conn, sql: str, params=None):
rows = _query(conn, sql, params)
return rows[0] if rows else None
def _column_exists(conn, schema: str, table: str, column: str) -> bool:
row = _query_one(
conn,
"""
SELECT 1 FROM information_schema.columns
WHERE table_schema = %s AND table_name = %s AND column_name = %s
""",
(schema, table, column),
)
return row is not None
def _table_exists(conn, schema: str, table: str) -> bool:
row = _query_one(
conn,
"""
SELECT 1 FROM information_schema.tables
WHERE table_schema = %s AND table_name = %s
""",
(schema, table),
)
return row is not None
# ---------------------------------------------------------------------------
# 验证 1dws_assistant_order_contribution 四项统计Req 9.1, 9.2
# ---------------------------------------------------------------------------
def verify_contribution_table(conn) -> _Result:
r = _Result("验证 1dws_assistant_order_contribution 表结构与数据")
# 1a. 表存在
if not _table_exists(conn, "dws", "dws_assistant_order_contribution"):
r.fail("表 dws.dws_assistant_order_contribution 不存在")
return r
r.ok("表存在")
# 1b. 关键字段存在
required_cols = [
"contribution_id", "site_id", "tenant_id", "assistant_id",
"assistant_nickname", "stat_date",
"order_gross_revenue", "order_net_revenue",
"time_weighted_revenue", "time_weighted_net_revenue",
"order_count", "total_service_seconds",
"created_at", "updated_at",
]
missing = [c for c in required_cols
if not _column_exists(conn, "dws", "dws_assistant_order_contribution", c)]
if missing:
r.fail(f"缺少字段: {', '.join(missing)}")
else:
r.ok(f"全部 {len(required_cols)} 个字段存在")
# 1c. 唯一索引存在
idx_row = _query_one(
conn,
"""
SELECT indexname FROM pg_indexes
WHERE schemaname = 'dws'
AND tablename = 'dws_assistant_order_contribution'
AND indexname = 'idx_aoc_site_assistant_date'
""",
)
if idx_row:
r.ok("唯一索引 idx_aoc_site_assistant_date 存在")
else:
r.fail("唯一索引 idx_aoc_site_assistant_date 不存在")
# 1d. 数据行数(信息性,不判 FAIL
row = _query_one(conn, "SELECT COUNT(*) FROM dws.dws_assistant_order_contribution")
count = row[0] if row else 0
r.ok(f"当前数据行数: {count}")
# 1e. 如果有数据,检查四项统计非负
if count > 0:
neg_row = _query_one(
conn,
"""
SELECT COUNT(*) FROM dws.dws_assistant_order_contribution
WHERE order_gross_revenue < 0
OR order_net_revenue < 0
OR time_weighted_revenue < 0
OR time_weighted_net_revenue < 0
""",
)
neg_count = neg_row[0] if neg_row else 0
if neg_count > 0:
r.fail(f"存在 {neg_count} 条四项统计为负值的记录")
else:
r.ok("四项统计数值均非负")
return r
# ---------------------------------------------------------------------------
# 验证 2dws_member_consumption_summary 充值窗口字段Req 9.3
# ---------------------------------------------------------------------------
def verify_consumption_fields(conn) -> _Result:
r = _Result("验证 2dws_member_consumption_summary 充值窗口字段")
if not _table_exists(conn, "dws", "dws_member_consumption_summary"):
r.fail("表 dws.dws_member_consumption_summary 不存在")
return r
r.ok("表存在")
new_cols = [
"recharge_count_30d", "recharge_count_60d", "recharge_count_90d",
"recharge_amount_30d", "recharge_amount_60d", "recharge_amount_90d",
"avg_ticket_amount",
]
missing = [c for c in new_cols
if not _column_exists(conn, "dws", "dws_member_consumption_summary", c)]
if missing:
r.fail(f"缺少新增字段: {', '.join(missing)}")
else:
r.ok(f"全部 {len(new_cols)} 个新增字段存在")
# 如果有数据,检查充值金额和次均消费非负
row = _query_one(conn, "SELECT COUNT(*) FROM dws.dws_member_consumption_summary")
count = row[0] if row else 0
r.ok(f"当前数据行数: {count}")
if count > 0:
neg_row = _query_one(
conn,
"""
SELECT COUNT(*) FROM dws.dws_member_consumption_summary
WHERE recharge_amount_30d < 0
OR recharge_amount_60d < 0
OR recharge_amount_90d < 0
OR avg_ticket_amount < 0
""",
)
neg_count = neg_row[0] if neg_row else 0
if neg_count > 0:
r.fail(f"存在 {neg_count} 条充值金额或次均消费为负值的记录")
else:
r.ok("充值金额和次均消费均非负")
return r
# ---------------------------------------------------------------------------
# 验证 3dws_assistant_daily_detail 惩罚字段Req 9.4
# ---------------------------------------------------------------------------
def verify_penalty_fields(conn) -> _Result:
r = _Result("验证 3dws_assistant_daily_detail 惩罚字段")
if not _table_exists(conn, "dws", "dws_assistant_daily_detail"):
r.fail("表 dws.dws_assistant_daily_detail 不存在")
return r
r.ok("表存在")
new_cols = ["penalty_minutes", "penalty_reason", "is_exempt", "per_hour_contribution"]
missing = [c for c in new_cols
if not _column_exists(conn, "dws", "dws_assistant_daily_detail", c)]
if missing:
r.fail(f"缺少新增字段: {', '.join(missing)}")
else:
r.ok(f"全部 {len(new_cols)} 个惩罚字段存在")
# 检查 is_exempt 字段类型为 boolean
type_row = _query_one(
conn,
"""
SELECT data_type FROM information_schema.columns
WHERE table_schema = 'dws'
AND table_name = 'dws_assistant_daily_detail'
AND column_name = 'is_exempt'
""",
)
if type_row and type_row[0] == "boolean":
r.ok("is_exempt 字段类型为 boolean")
elif type_row:
r.fail(f"is_exempt 字段类型为 {type_row[0]},预期 boolean")
# 如果有数据,检查 penalty_minutes >= 0
row = _query_one(conn, "SELECT COUNT(*) FROM dws.dws_assistant_daily_detail")
count = row[0] if row else 0
r.ok(f"当前数据行数: {count}")
if count > 0:
neg_row = _query_one(
conn,
"""
SELECT COUNT(*) FROM dws.dws_assistant_daily_detail
WHERE penalty_minutes < 0
""",
)
neg_count = neg_row[0] if neg_row else 0
if neg_count > 0:
r.fail(f"存在 {neg_count} 条 penalty_minutes 为负值的记录")
else:
r.ok("penalty_minutes 均非负")
return r
# ---------------------------------------------------------------------------
# 验证 4RLS 视图和 FDW 映射Req 7, 8
# ---------------------------------------------------------------------------
def verify_rls_views(conn_etl) -> _Result:
r = _Result("验证 4aRLS 视图存在性ETL 库 app schema")
views = [
"v_dws_assistant_order_contribution",
"v_dws_member_consumption_summary",
"v_dws_assistant_daily_detail",
]
for v in views:
if _table_exists(conn_etl, "app", v):
r.ok(f"视图 app.{v} 存在")
else:
r.fail(f"视图 app.{v} 不存在")
return r
def verify_fdw_tables(conn_app) -> _Result:
r = _Result("验证 4bFDW 外部表存在性(业务库 fdw_etl schema")
# FDW 外部表名与 RLS 视图名一致,带 v_ 前缀
tables = [
"v_dws_assistant_order_contribution",
"v_dws_member_consumption_summary",
"v_dws_assistant_daily_detail",
]
for t in tables:
row = _query_one(
conn_app,
"""
SELECT 1 FROM information_schema.tables
WHERE table_schema = 'fdw_etl' AND table_name = %s
""",
(t,),
)
if row:
r.ok(f"外部表 fdw_etl.{t} 存在")
else:
r.fail(f"外部表 fdw_etl.{t} 不存在")
return r
# ---------------------------------------------------------------------------
# 主函数
# ---------------------------------------------------------------------------
def main() -> int:
results: list[_Result] = []
# 连接 ETL 测试库
try:
conn_etl = psycopg2.connect(PG_DSN)
conn_etl.autocommit = True
except Exception as e:
print(f"ERROR: 无法连接 ETL 库 ({PG_DSN[:40]}...): {e}")
return 1
# 连接业务测试库
try:
conn_app = psycopg2.connect(APP_DB_DSN)
conn_app.autocommit = True
except Exception as e:
print(f"ERROR: 无法连接业务库 ({APP_DB_DSN[:40]}...): {e}")
conn_etl.close()
return 1
try:
print("=" * 60)
print("DWS 层扩展验证 — 影子跑数验证")
print("=" * 60)
print()
# ETL 库验证
results.append(verify_contribution_table(conn_etl))
results.append(verify_consumption_fields(conn_etl))
results.append(verify_penalty_fields(conn_etl))
results.append(verify_rls_views(conn_etl))
# 业务库验证
results.append(verify_fdw_tables(conn_app))
# 输出结果
for r in results:
print(r)
print()
# 汇总
total = len(results)
passed = sum(1 for r in results if r.passed)
failed = total - passed
print("=" * 60)
print(f"汇总: {passed}/{total} 通过, {failed} 失败")
print("=" * 60)
return 0 if failed == 0 else 1
finally:
conn_etl.close()
conn_app.close()
if __name__ == "__main__":
sys.exit(main())