Files
Neo-ZQYY/apps/backend/app/routers/db_viewer.py
Neo 17f045a89e fix(backend): Wave 1 Day 1 三个 P0 D Bug 修复
- W1-T3 修 4 处 fdw_etl.* 必坏残留 → app.* (P0-5 致命 1)
  · tenant_users.py L431/L456-457: v_dim_assistant + v_dim_staff(_ex)
  · tenant_excel.py L394/L411: v_dim_assistant + v_dim_staff
  · tenant_clues.py L119: v_dim_member
  · 修复后 tenant-admin 用户审核 / Excel 上传 / 维客线索恢复正常

- W1-T4 JWT aud sign 端写入 (P0-5 致命 2 最小止血)
  · jwt.py 全部 token 创建/解码函数加 audience 参数
  · auth.py admin 端加 audience="admin"
  · xcx_auth.py miniapp 端加 audience="miniapp" (8 处调用)
  · 18 router 切强制 aud 校验留 Wave 2

- W1-T5 DBViewer 白名单 + 黑名单双保险 (P0-8)
  · 白名单: SELECT/WITH/EXPLAIN/SHOW 开头
  · 黑名单: 17 关键词覆盖全 DML/DDL/DCL
  · 注释剥离避免误伤;15/15 单测 PASS

参考: docs/audit/changes/2026-05-04__wave1_day1_d_bug_triple_fix.md
2026-05-04 07:36:20 +08:00

264 lines
8.7 KiB
Python

# -*- coding: utf-8 -*-
"""数据库查看器 API
提供 4 个端点:
- GET /api/db/schemas — 返回 Schema 列表
- GET /api/db/schemas/{name}/tables — 返回表列表和行数
- GET /api/db/tables/{schema}/{table}/columns — 返回列定义
- POST /api/db/query — 只读 SQL 执行
所有端点需要 JWT 认证。
使用 get_etl_readonly_connection(site_id) 确保 RLS 隔离。
"""
from __future__ import annotations
import logging
import re
from fastapi import APIRouter, Depends, HTTPException, status
from psycopg2 import errors as pg_errors, OperationalError
from app.auth.dependencies import CurrentUser, get_current_user
from app.database import get_etl_readonly_connection
from app.schemas.db_viewer import (
ColumnInfo,
QueryRequest,
QueryResponse,
SchemaInfo,
TableInfo,
)
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/db", tags=["数据库查看器"])
# P0-8 修复(2026-05-04 Wave 1):改为白名单 + 黑名单双保险
# 白名单:SQL 必须以这些关键词开头(去注释/空白后)
_ALLOWED_PREFIXES = ("SELECT", "WITH", "EXPLAIN", "SHOW")
# 黑名单:深度防御,即使开头通过,语句中包含这些关键词也拒绝
# 涵盖 DML / DDL / DCL,补全原仅 5 个关键词的漏洞
_DENY_KEYWORDS = re.compile(
r"\b(INSERT|UPDATE|DELETE|DROP|TRUNCATE|ALTER|CREATE|GRANT|REVOKE|COPY|CALL|COMMENT|VACUUM|REINDEX|CLUSTER|REFRESH|LOCK)\b",
re.IGNORECASE,
)
def _strip_comments(sql: str) -> str:
"""剥离 SQL 中的 -- 行注释 与 /* */ 块注释,避免黑名单误匹配注释里的英文词。"""
# 块注释 /* ... */ (非贪婪)
sql = re.sub(r"/\*.*?\*/", " ", sql, flags=re.DOTALL)
# 行注释 -- 直到行尾
sql = re.sub(r"--[^\n]*", " ", sql)
return sql
def _extract_first_keyword(sql: str) -> str:
"""提取 SQL 第一个有效关键词(去掉前导注释 + 空白)。
支持 -- 行注释 和 /* */ 块注释。返回大写关键词,无关键词返回空字符串。
"""
s = _strip_comments(sql).strip()
m = re.match(r"\s*([A-Za-z_]+)", s)
return m.group(1).upper() if m else ""
# 查询结果行数上限
_MAX_ROWS = 1000
# 查询超时(秒)
_QUERY_TIMEOUT_SEC = 30
# ── GET /api/db/schemas ──────────────────────────────────────
@router.get("/schemas", response_model=list[SchemaInfo])
async def list_schemas(
user: CurrentUser = Depends(get_current_user),
) -> list[SchemaInfo]:
"""返回 ETL 数据库中的 Schema 列表。"""
conn = get_etl_readonly_connection(user.site_id)
try:
with conn.cursor() as cur:
cur.execute(
"""
SELECT schema_name
FROM information_schema.schemata
WHERE schema_name NOT IN ('pg_catalog', 'information_schema', 'pg_toast')
ORDER BY schema_name
"""
)
rows = cur.fetchall()
return [SchemaInfo(name=row[0]) for row in rows]
finally:
conn.close()
# ── GET /api/db/schemas/{name}/tables ────────────────────────
@router.get("/schemas/{name}/tables", response_model=list[TableInfo])
async def list_tables(
name: str,
user: CurrentUser = Depends(get_current_user),
) -> list[TableInfo]:
"""返回指定 Schema 下所有表的名称和行数统计。"""
conn = get_etl_readonly_connection(user.site_id)
try:
with conn.cursor() as cur:
cur.execute(
"""
SELECT
t.table_name,
s.n_live_tup
FROM information_schema.tables t
LEFT JOIN pg_stat_user_tables s
ON s.schemaname = t.table_schema
AND s.relname = t.table_name
WHERE t.table_schema = %s
AND t.table_type = 'BASE TABLE'
ORDER BY t.table_name
""",
(name,),
)
rows = cur.fetchall()
return [
TableInfo(name=row[0], row_count=row[1])
for row in rows
]
finally:
conn.close()
# ── GET /api/db/tables/{schema}/{table}/columns ──────────────
@router.get(
"/tables/{schema}/{table}/columns",
response_model=list[ColumnInfo],
)
async def list_columns(
schema: str,
table: str,
user: CurrentUser = Depends(get_current_user),
) -> list[ColumnInfo]:
"""返回指定表的列定义。"""
conn = get_etl_readonly_connection(user.site_id)
try:
with conn.cursor() as cur:
cur.execute(
"""
SELECT
column_name,
data_type,
is_nullable,
column_default
FROM information_schema.columns
WHERE table_schema = %s AND table_name = %s
ORDER BY ordinal_position
""",
(schema, table),
)
rows = cur.fetchall()
return [
ColumnInfo(
name=row[0],
data_type=row[1],
is_nullable=row[2] == "YES",
column_default=row[3],
)
for row in rows
]
finally:
conn.close()
# ── POST /api/db/query ───────────────────────────────────────
@router.post("/query", response_model=QueryResponse)
async def execute_query(
body: QueryRequest,
user: CurrentUser = Depends(get_current_user),
) -> QueryResponse:
"""只读 SQL 执行。
安全措施(P0-8 修复 2026-05-04):
1. 白名单:SQL 必须以 SELECT / WITH / EXPLAIN / SHOW 开头(去注释/空白后)
2. 黑名单:深度防御,语句含 DML/DDL/DCL 关键词一律拒绝(覆盖 INSERT/UPDATE/DELETE/DROP/
TRUNCATE/ALTER/CREATE/GRANT/REVOKE/COPY/CALL/COMMENT/VACUUM/REINDEX/CLUSTER/REFRESH/LOCK)
3. 限制返回行数上限 1000 行
4. 设置查询超时 30 秒
5. 数据库连接为只读账号(get_etl_readonly_connection,默认 read-only 事务)
"""
sql = body.sql.strip()
if not sql:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="SQL 语句不能为空",
)
# 白名单:首关键词校验
first_kw = _extract_first_keyword(sql)
if first_kw not in _ALLOWED_PREFIXES:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"只允许只读查询,SQL 必须以 {' / '.join(_ALLOWED_PREFIXES)} 开头",
)
# 黑名单:深度防御(剥离注释后再匹配,避免注释中"comment"等英文词误伤)
if _DENY_KEYWORDS.search(_strip_comments(sql)):
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="检测到禁止的关键词(DML/DDL/DCL),只允许只读查询",
)
conn = get_etl_readonly_connection(user.site_id)
try:
with conn.cursor() as cur:
# 设置查询超时
cur.execute(
"SET LOCAL statement_timeout = %s",
(f"{_QUERY_TIMEOUT_SEC}s",),
)
try:
cur.execute(sql)
except pg_errors.QueryCanceled:
raise HTTPException(
status_code=status.HTTP_408_REQUEST_TIMEOUT,
detail=f"查询超时(超过 {_QUERY_TIMEOUT_SEC} 秒)",
)
except Exception as exc:
# SQL 语法错误或其他执行错误
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"SQL 执行错误: {exc}",
)
# 提取列名
columns = (
[desc[0] for desc in cur.description]
if cur.description
else []
)
# 限制返回行数
rows = cur.fetchmany(_MAX_ROWS)
# 将元组转为列表,便于 JSON 序列化
rows_list = [list(row) for row in rows]
return QueryResponse(
columns=columns,
rows=rows_list,
row_count=len(rows_list),
)
except HTTPException:
raise
except OperationalError as exc:
# 连接级错误
logger.error("数据库查看器连接错误: %s", exc)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="数据库连接错误",
)
finally:
conn.close()