init: 项目初始提交 - NeoZQYY Monorepo 完整代码

This commit is contained in:
Neo
2026-02-15 14:58:14 +08:00
commit ded6dfb9d8
769 changed files with 182616 additions and 0 deletions

View File

@@ -0,0 +1,488 @@
#!/usr/bin/env python3
"""
BD_Manual 文档体系验证脚本。
# AI_CHANGELOG [2026-02-13] 新增:验证 Property 1/4/5/6/7/8/9/10支持 --pg-dsn 参数
验证 docs/database/ 下的目录结构、文档覆盖率、格式完整性和命名规范。
需要连接 PostgreSQL 获取 billiards_ods schema 的表清单作为基准。
用法:
python scripts/validate_bd_manual.py --pg-dsn "postgresql://user:pass@host/db"
python scripts/validate_bd_manual.py # 从 PG_DSN 环境变量或 .env 读取
"""
from __future__ import annotations
import argparse
import os
import re
import sys
from pathlib import Path
from dataclasses import dataclass, field
# ---------------------------------------------------------------------------
# 常量
# ---------------------------------------------------------------------------
BD_MANUAL_ROOT = Path("docs/database")
ODS_MAIN_DIR = BD_MANUAL_ROOT / "ODS" / "main"
ODS_MAPPINGS_DIR = BD_MANUAL_ROOT / "ODS" / "mappings"
ODS_DICT_PATH = Path("docs/database/overview/ods_tables_dictionary.md")
# 四个数据层,每层都应有 main/ 和 changes/
DATA_LAYERS = ["ODS", "DWD", "DWS", "ETL_Admin"]
# ODS 文档必须包含的章节标题Property 5
ODS_DOC_REQUIRED_SECTIONS = [
"表信息",
"字段说明",
"使用说明",
"可回溯性",
]
# ODS 文档"表信息"表格中必须出现的属性关键词
ODS_DOC_TABLE_INFO_KEYS = ["Schema", "表名", "主键", "数据来源", "说明"]
# ODS 文档必须提及的 ETL 元数据字段
ODS_DOC_ETL_META_FIELDS = [
"content_hash",
"source_file",
"source_endpoint",
"fetched_at",
"payload",
]
# 映射文档必须包含的章节/关键内容Property 8
MAPPING_DOC_REQUIRED_SECTIONS = [
"端点信息",
"字段映射",
"ETL 补充字段",
]
# 映射文档"端点信息"表格中必须出现的属性关键词
MAPPING_DOC_ENDPOINT_KEYS = ["接口路径", "ODS 对应表", "JSON 数据路径"]
# ---------------------------------------------------------------------------
# 数据结构
# ---------------------------------------------------------------------------
@dataclass
class CheckResult:
"""单条验证结果。"""
property_id: str # 如 "Property 1"
description: str
passed: bool
details: list[str] = field(default_factory=list) # 失败时的具体说明
# ---------------------------------------------------------------------------
# 数据库查询:获取 ODS 表清单
# ---------------------------------------------------------------------------
def fetch_ods_tables(pg_dsn: str) -> list[str]:
"""从 billiards_ods schema 获取所有用户表名(排除系统表)。"""
import psycopg2
sql = """
SELECT table_name
FROM information_schema.tables
WHERE table_schema = 'billiards_ods'
AND table_type = 'BASE TABLE'
ORDER BY table_name;
"""
with psycopg2.connect(pg_dsn) as conn:
with conn.cursor() as cur:
cur.execute(sql)
return [row[0] for row in cur.fetchall()]
# ---------------------------------------------------------------------------
# Property 1: 数据层目录结构一致性
# ---------------------------------------------------------------------------
def check_directory_structure() -> CheckResult:
"""ODS/DWD/DWS/ETL_Admin 各层都应有 main/ 和 changes/ 子目录。"""
missing: list[str] = []
for layer in DATA_LAYERS:
for sub in ("main", "changes"):
p = BD_MANUAL_ROOT / layer / sub
if not p.is_dir():
missing.append(str(p))
return CheckResult(
property_id="Property 1",
description="数据层目录结构一致性main/ + changes/",
passed=len(missing) == 0,
details=[f"缺失目录: {d}" for d in missing],
)
# ---------------------------------------------------------------------------
# Property 4: ODS 表级文档覆盖率
# ---------------------------------------------------------------------------
def check_ods_doc_coverage(ods_tables: list[str]) -> CheckResult:
"""billiards_ods 中每张表都应有 BD_manual_{表名}.md。"""
missing: list[str] = []
for tbl in ods_tables:
expected = ODS_MAIN_DIR / f"BD_manual_{tbl}.md"
if not expected.is_file():
missing.append(tbl)
return CheckResult(
property_id="Property 4",
description="ODS 表级文档覆盖率",
passed=len(missing) == 0,
details=[f"缺失文档: BD_manual_{t}.md" for t in missing],
)
# ---------------------------------------------------------------------------
# Property 5: ODS 表级文档格式完整性
# ---------------------------------------------------------------------------
def _check_single_ods_doc(filepath: Path) -> list[str]:
"""检查单份 ODS 文档是否包含必要章节和内容,返回问题列表。"""
issues: list[str] = []
name = filepath.name
try:
content = filepath.read_text(encoding="utf-8")
except Exception as e:
return [f"{name}: 无法读取 ({e})"]
# 检查必要章节
for section in ODS_DOC_REQUIRED_SECTIONS:
# 匹配 ## 章节标题(允许前后有空格)
pattern = rf"^##\s+.*{re.escape(section)}"
if not re.search(pattern, content, re.MULTILINE):
issues.append(f"{name}: 缺少「{section}」章节")
# 检查"表信息"表格中的关键属性
for key in ODS_DOC_TABLE_INFO_KEYS:
if key not in content:
issues.append(f"{name}: 表信息缺少「{key}」属性")
# 检查 ETL 元数据字段是否被提及
meta_missing = [f for f in ODS_DOC_ETL_META_FIELDS if f not in content]
if meta_missing:
issues.append(f"{name}: 未提及 ETL 元数据字段: {', '.join(meta_missing)}")
return issues
def check_ods_doc_format() -> CheckResult:
"""每份 ODS 文档应包含表信息、字段说明、使用说明、可回溯性、ETL 元数据字段。"""
all_issues: list[str] = []
if not ODS_MAIN_DIR.is_dir():
return CheckResult(
property_id="Property 5",
description="ODS 表级文档格式完整性",
passed=False,
details=["ODS/main/ 目录不存在"],
)
for f in sorted(ODS_MAIN_DIR.glob("BD_manual_*.md")):
all_issues.extend(_check_single_ods_doc(f))
return CheckResult(
property_id="Property 5",
description="ODS 表级文档格式完整性",
passed=len(all_issues) == 0,
details=all_issues,
)
# ---------------------------------------------------------------------------
# Property 6: ODS 表级文档命名规范
# ---------------------------------------------------------------------------
def check_ods_doc_naming() -> CheckResult:
"""ODS/main/ 下的文件名应匹配 BD_manual_{表名}.md。"""
bad: list[str] = []
if not ODS_MAIN_DIR.is_dir():
return CheckResult(
property_id="Property 6",
description="ODS 表级文档命名规范",
passed=False,
details=["ODS/main/ 目录不存在"],
)
pattern = re.compile(r"^BD_manual_[a-z][a-z0-9_]*\.md$")
for f in sorted(ODS_MAIN_DIR.iterdir()):
if f.suffix == ".md" and not pattern.match(f.name):
bad.append(f.name)
return CheckResult(
property_id="Property 6",
description="ODS 表级文档命名规范BD_manual_{表名}.md",
passed=len(bad) == 0,
details=[f"命名不规范: {n}" for n in bad],
)
# ---------------------------------------------------------------------------
# Property 7: 映射文档覆盖率
# ---------------------------------------------------------------------------
def check_mapping_doc_coverage(ods_tables: list[str]) -> CheckResult:
"""每个有 ODS 表的 API 端点都应有映射文档。
策略:遍历 ODS 表,检查 mappings/ 下是否存在至少一个
mapping_*_{表名}.md 文件。
"""
missing: list[str] = []
if not ODS_MAPPINGS_DIR.is_dir():
return CheckResult(
property_id="Property 7",
description="映射文档覆盖率",
passed=False,
details=["ODS/mappings/ 目录不存在"],
)
existing_mappings = {f.name for f in ODS_MAPPINGS_DIR.glob("mapping_*.md")}
for tbl in ods_tables:
# 查找 mapping_*_{表名}.md
found = any(
name.endswith(f"_{tbl}.md") and name.startswith("mapping_")
for name in existing_mappings
)
if not found:
missing.append(tbl)
return CheckResult(
property_id="Property 7",
description="映射文档覆盖率(每张 ODS 表至少一份映射文档)",
passed=len(missing) == 0,
details=[f"缺失映射文档: mapping_*_{t}.md" for t in missing],
)
# ---------------------------------------------------------------------------
# Property 8: 映射文档内容完整性
# ---------------------------------------------------------------------------
def _check_single_mapping_doc(filepath: Path) -> list[str]:
"""检查单份映射文档是否包含必要章节和内容。"""
issues: list[str] = []
name = filepath.name
try:
content = filepath.read_text(encoding="utf-8")
except Exception as e:
return [f"{name}: 无法读取 ({e})"]
# 检查必要章节
for section in MAPPING_DOC_REQUIRED_SECTIONS:
pattern = rf"^##\s+.*{re.escape(section)}"
if not re.search(pattern, content, re.MULTILINE):
issues.append(f"{name}: 缺少「{section}」章节")
# 检查端点信息表格中的关键属性
for key in MAPPING_DOC_ENDPOINT_KEYS:
if key not in content:
issues.append(f"{name}: 端点信息缺少「{key}」属性")
# 检查 ETL 补充字段是否被提及
etl_missing = [f for f in ODS_DOC_ETL_META_FIELDS if f not in content]
if etl_missing:
issues.append(f"{name}: 未提及 ETL 补充字段: {', '.join(etl_missing)}")
return issues
def check_mapping_doc_content() -> CheckResult:
"""每份映射文档应包含端点路径、ODS 表名、JSON 数据路径、字段映射表、ETL 补充字段。"""
all_issues: list[str] = []
if not ODS_MAPPINGS_DIR.is_dir():
return CheckResult(
property_id="Property 8",
description="映射文档内容完整性",
passed=False,
details=["ODS/mappings/ 目录不存在"],
)
for f in sorted(ODS_MAPPINGS_DIR.glob("mapping_*.md")):
all_issues.extend(_check_single_mapping_doc(f))
return CheckResult(
property_id="Property 8",
description="映射文档内容完整性",
passed=len(all_issues) == 0,
details=all_issues,
)
# ---------------------------------------------------------------------------
# Property 9: 映射文档命名规范
# ---------------------------------------------------------------------------
def check_mapping_doc_naming() -> CheckResult:
"""映射文档文件名应匹配 mapping_{API端点名}_{ODS表名}.md。"""
bad: list[str] = []
if not ODS_MAPPINGS_DIR.is_dir():
return CheckResult(
property_id="Property 9",
description="映射文档命名规范",
passed=False,
details=["ODS/mappings/ 目录不存在"],
)
# mapping_{EndpointName}_{table_name}.md
# 端点名PascalCase字母数字表名snake_case
pattern = re.compile(r"^mapping_[A-Z][A-Za-z0-9]+_[a-z][a-z0-9_]*\.md$")
for f in sorted(ODS_MAPPINGS_DIR.iterdir()):
if f.suffix == ".md" and f.name.startswith("mapping_"):
if not pattern.match(f.name):
bad.append(f.name)
return CheckResult(
property_id="Property 9",
description="映射文档命名规范mapping_{API端点名}_{ODS表名}.md",
passed=len(bad) == 0,
details=[f"命名不规范: {n}" for n in bad],
)
# ---------------------------------------------------------------------------
# Property 10: ODS 数据字典覆盖率
# ---------------------------------------------------------------------------
def check_ods_dictionary_coverage(ods_tables: list[str]) -> CheckResult:
"""数据字典中应包含所有 ODS 表条目。"""
if not ODS_DICT_PATH.is_file():
return CheckResult(
property_id="Property 10",
description="ODS 数据字典覆盖率",
passed=False,
details=[f"数据字典文件不存在: {ODS_DICT_PATH}"],
)
try:
content = ODS_DICT_PATH.read_text(encoding="utf-8")
except Exception as e:
return CheckResult(
property_id="Property 10",
description="ODS 数据字典覆盖率",
passed=False,
details=[f"无法读取数据字典: {e}"],
)
missing: list[str] = []
for tbl in ods_tables:
# 在字典内容中查找表名(反引号包裹或直接出现)
if tbl not in content:
missing.append(tbl)
return CheckResult(
property_id="Property 10",
description="ODS 数据字典覆盖率",
passed=len(missing) == 0,
details=[f"数据字典缺失条目: {t}" for t in missing],
)
# ---------------------------------------------------------------------------
# 报告输出
# ---------------------------------------------------------------------------
def print_report(results: list[CheckResult]) -> None:
"""打印验证报告。"""
print("=" * 60)
print("BD_Manual 文档体系验证报告")
print("=" * 60)
passed_count = sum(1 for r in results if r.passed)
total = len(results)
for r in results:
status = "✓ PASS" if r.passed else "✗ FAIL"
print(f"\n[{status}] {r.property_id}: {r.description}")
if not r.passed:
for d in r.details[:20]: # 最多显示 20 条
print(f" - {d}")
if len(r.details) > 20:
print(f" ... 还有 {len(r.details) - 20} 条问题")
print("\n" + "-" * 60)
print(f"结果: {passed_count}/{total} 项通过")
if passed_count < total:
print("存在未通过的验证项,请检查上述详情。")
else:
print("所有验证项均通过 ✓")
print("=" * 60)
# ---------------------------------------------------------------------------
# 主入口
# ---------------------------------------------------------------------------
def main(argv: list[str] | None = None) -> int:
parser = argparse.ArgumentParser(
description="验证 BD_Manual 文档体系的覆盖率、格式和命名规范",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
示例:
# 从 .env 或 PG_DSN 环境变量读取连接字符串
python scripts/validate_bd_manual.py
# 指定连接字符串
python scripts/validate_bd_manual.py --pg-dsn "postgresql://user:pass@host/db"
""",
)
parser.add_argument(
"--pg-dsn",
help="PostgreSQL 连接字符串(默认从 PG_DSN 环境变量或 .env 读取)",
)
args = parser.parse_args(argv)
# 加载 .env
try:
from dotenv import load_dotenv
load_dotenv()
except ImportError:
pass
pg_dsn = args.pg_dsn or os.environ.get("PG_DSN")
if not pg_dsn:
print(
"✗ 未提供 PG_DSN请通过 --pg-dsn 参数或 PG_DSN 环境变量指定",
file=sys.stderr,
)
return 1
# 获取 ODS 表清单
try:
ods_tables = fetch_ods_tables(pg_dsn)
except Exception as e:
print(f"✗ 连接数据库失败: {e}", file=sys.stderr)
return 1
if not ods_tables:
print("⚠ billiards_ods schema 中未找到任何表", file=sys.stderr)
return 1
print(f"从数据库获取到 {len(ods_tables)} 张 ODS 表\n")
# 运行所有验证
results: list[CheckResult] = [
check_directory_structure(), # Property 1
check_ods_doc_coverage(ods_tables), # Property 4
check_ods_doc_format(), # Property 5
check_ods_doc_naming(), # Property 6
check_mapping_doc_coverage(ods_tables),# Property 7
check_mapping_doc_content(), # Property 8
check_mapping_doc_naming(), # Property 9
check_ods_dictionary_coverage(ods_tables), # Property 10
]
print_report(results)
# 任一验证失败则返回非零退出码
if any(not r.passed for r in results):
return 1
return 0
if __name__ == "__main__":
sys.exit(main())