在前后端开发联调前 的提交20260223
This commit is contained in:
1
scripts/ops/.monitor_token
Normal file
1
scripts/ops/.monitor_token
Normal file
@@ -0,0 +1 @@
|
||||
eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxIiwic2l0ZV9pZCI6Mjc5MDY4NTQxNTQ0MzI2OSwidHlwZSI6ImFjY2VzcyIsImV4cCI6MTc3MTY4NzA5NX0.NrCVblH8z3g6cc1VIUw5ep7qmge5MclYk29Pb4hLdmQ
|
||||
90
scripts/ops/_archive_etl_db_docs.py
Normal file
90
scripts/ops/_archive_etl_db_docs.py
Normal file
@@ -0,0 +1,90 @@
|
||||
"""
|
||||
整理 apps/etl/connectors/feiqiu/docs/database/ 下的过时文档。
|
||||
- 归档:changes/ 下的变更记录、已删除表的 BD_manual、过时的 DDL 对比报告、过时的 overview 数据字典
|
||||
- 保留:当前有效的 ODS/DWD/DWS/ETL_Admin BD_manual(main/ 和 Ex/)、mappings/
|
||||
|
||||
用法:cd C:\\NeoZQYY && python scripts/ops/_archive_etl_db_docs.py
|
||||
"""
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
ROOT = Path(__file__).resolve().parent.parent.parent
|
||||
ETL_DB_DOCS = ROOT / "apps" / "etl" / "connectors" / "feiqiu" / "docs" / "database"
|
||||
ARCHIVE = ETL_DB_DOCS / "_archived"
|
||||
|
||||
|
||||
# ── 需要归档的文件 ────────────────────────────────────────────────────────
|
||||
|
||||
FILES_TO_ARCHIVE = []
|
||||
|
||||
# 1. 所有 changes/ 目录下的 .md 文件(变更记录,已吸收进新 DDL 基线)
|
||||
for changes_dir in ETL_DB_DOCS.rglob("changes"):
|
||||
if changes_dir.is_dir():
|
||||
for f in changes_dir.glob("*.md"):
|
||||
FILES_TO_ARCHIVE.append(f)
|
||||
|
||||
# 2. 过时的 DDL 对比报告
|
||||
ddl_compare = ETL_DB_DOCS / "ddl_compare_results.md"
|
||||
if ddl_compare.exists():
|
||||
FILES_TO_ARCHIVE.append(ddl_compare)
|
||||
|
||||
# 3. overview/ 下的数据字典(引用旧 DDL 路径,已过时)
|
||||
overview_dir = ETL_DB_DOCS / "overview"
|
||||
if overview_dir.exists():
|
||||
for f in overview_dir.glob("*.md"):
|
||||
FILES_TO_ARCHIVE.append(f)
|
||||
|
||||
# 4. 已删除表的 BD_manual(assistant_abolish 清理后这些表不存在了)
|
||||
DELETED_TABLE_DOCS = [
|
||||
"DWD/main/BD_manual_dwd_assistant_trash_event.md",
|
||||
"DWD/Ex/BD_manual_dwd_assistant_trash_event_ex.md",
|
||||
"ODS/main/BD_manual_assistant_cancellation_records.md",
|
||||
# ODS mappings 中对应的映射文档
|
||||
"ODS/mappings/mapping_GetAbolitionAssistant_assistant_cancellation_records.md",
|
||||
]
|
||||
for rel in DELETED_TABLE_DOCS:
|
||||
p = ETL_DB_DOCS / rel
|
||||
if p.exists():
|
||||
FILES_TO_ARCHIVE.append(p)
|
||||
|
||||
|
||||
def main():
|
||||
if not FILES_TO_ARCHIVE:
|
||||
print("没有需要归档的文件。")
|
||||
return
|
||||
|
||||
ARCHIVE.mkdir(parents=True, exist_ok=True)
|
||||
moved = []
|
||||
|
||||
for src in FILES_TO_ARCHIVE:
|
||||
# 保留相对于 ETL_DB_DOCS 的路径结构
|
||||
rel = src.relative_to(ETL_DB_DOCS)
|
||||
dest = ARCHIVE / rel
|
||||
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||
shutil.move(str(src), str(dest))
|
||||
moved.append(str(rel))
|
||||
|
||||
# 清理空的 changes/ 和 overview/ 目录(只剩 .gitkeep 的保留)
|
||||
for d in ETL_DB_DOCS.rglob("changes"):
|
||||
if d.is_dir():
|
||||
remaining = [f for f in d.iterdir() if f.name != ".gitkeep"]
|
||||
if not remaining:
|
||||
gk = d / ".gitkeep"
|
||||
if not gk.exists():
|
||||
gk.touch()
|
||||
|
||||
if overview_dir.exists():
|
||||
remaining = [f for f in overview_dir.iterdir() if f.name != ".gitkeep"]
|
||||
if not remaining:
|
||||
gk = overview_dir / ".gitkeep"
|
||||
if not gk.exists():
|
||||
gk.touch()
|
||||
|
||||
print(f"归档目录:{ARCHIVE}")
|
||||
print(f"已归档 {len(moved)} 个文件:")
|
||||
for f in moved:
|
||||
print(f" ✅ {f}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
73
scripts/ops/_archive_old_ddl.py
Normal file
73
scripts/ops/_archive_old_ddl.py
Normal file
@@ -0,0 +1,73 @@
|
||||
"""
|
||||
一次性脚本:将已被 docs/database/ddl/ 覆盖的旧 DDL 文件归档到 db/_archived/。
|
||||
迁移脚本、种子数据、FDW 配置、工具脚本不动。
|
||||
|
||||
用法:cd C:\\NeoZQYY && python scripts/ops/_archive_old_ddl.py
|
||||
"""
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from datetime import date
|
||||
|
||||
ROOT = Path(__file__).resolve().parent.parent.parent
|
||||
|
||||
ARCHIVE_DIR = ROOT / "db" / "_archived" / f"ddl_baseline_{date.today().isoformat()}"
|
||||
|
||||
# 需要归档的文件:旧基线 DDL(已被 docs/database/ddl/ 完全覆盖)
|
||||
FILES_TO_ARCHIVE = [
|
||||
# etl_feiqiu/schemas/ 下的当前基线
|
||||
"db/etl_feiqiu/schemas/meta.sql",
|
||||
"db/etl_feiqiu/schemas/ods.sql",
|
||||
"db/etl_feiqiu/schemas/dwd.sql",
|
||||
"db/etl_feiqiu/schemas/core.sql",
|
||||
"db/etl_feiqiu/schemas/dws.sql",
|
||||
"db/etl_feiqiu/schemas/app.sql",
|
||||
# etl_feiqiu/schemas/ 下的历史遗留版本(使用旧 schema 名 billiards_*/etl_admin)
|
||||
"db/etl_feiqiu/schemas/schema_dwd_doc.sql",
|
||||
"db/etl_feiqiu/schemas/schema_dws.sql",
|
||||
"db/etl_feiqiu/schemas/schema_etl_admin.sql",
|
||||
"db/etl_feiqiu/schemas/schema_ODS_doc.sql",
|
||||
"db/etl_feiqiu/schemas/schema_verify_perf_indexes.sql",
|
||||
# zqyy_app/schemas/ 下的基线
|
||||
"db/zqyy_app/schemas/init.sql",
|
||||
]
|
||||
|
||||
|
||||
def main():
|
||||
ARCHIVE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
moved = []
|
||||
skipped = []
|
||||
|
||||
for rel in FILES_TO_ARCHIVE:
|
||||
src = ROOT / rel
|
||||
if not src.exists():
|
||||
skipped.append(rel)
|
||||
continue
|
||||
|
||||
# 保留原始目录结构
|
||||
dest = ARCHIVE_DIR / rel
|
||||
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||
shutil.move(str(src), str(dest))
|
||||
moved.append(rel)
|
||||
|
||||
print(f"归档目录:{ARCHIVE_DIR}")
|
||||
print(f"已移动 {len(moved)} 个文件:")
|
||||
for f in moved:
|
||||
print(f" ✅ {f}")
|
||||
if skipped:
|
||||
print(f"跳过 {len(skipped)} 个(不存在):")
|
||||
for f in skipped:
|
||||
print(f" ⏭️ {f}")
|
||||
|
||||
# 保留 .gitkeep
|
||||
for d in ["db/etl_feiqiu/schemas", "db/zqyy_app/schemas"]:
|
||||
gk = ROOT / d / ".gitkeep"
|
||||
if not gk.exists():
|
||||
gk.touch()
|
||||
print(f" 📄 补充 {d}/.gitkeep")
|
||||
|
||||
print(f"\n✅ 完成。旧 DDL 已归档,schemas/ 目录保留 .gitkeep")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
93
scripts/ops/_archive_phase2.py
Normal file
93
scripts/ops/_archive_phase2.py
Normal file
@@ -0,0 +1,93 @@
|
||||
"""
|
||||
第二轮归档:迁移脚本 + 过时的变更记录文档。
|
||||
保留:seeds、fdw、create_test_db、数据字典类 BD_Manual。
|
||||
|
||||
用法:cd C:\\NeoZQYY && python scripts/ops/_archive_phase2.py
|
||||
"""
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from datetime import date
|
||||
|
||||
ROOT = Path(__file__).resolve().parent.parent.parent
|
||||
ARCHIVE_BASE = ROOT / "db" / "_archived" / f"ddl_baseline_{date.today().isoformat()}"
|
||||
|
||||
# ── 1. db/ 下的迁移脚本 ──────────────────────────────────────────────────
|
||||
MIGRATION_FILES = []
|
||||
for d in ["db/etl_feiqiu/migrations", "db/zqyy_app/migrations"]:
|
||||
p = ROOT / d
|
||||
if p.exists():
|
||||
for f in sorted(p.glob("*.sql")):
|
||||
MIGRATION_FILES.append(str(f.relative_to(ROOT)))
|
||||
|
||||
# 一次性数据迁移脚本
|
||||
MIGRATION_FILES.append("db/scripts/migrate_test_data.sql")
|
||||
|
||||
# ── 2. docs/database/ 下的迁移变更记录(非数据字典) ─────────────────────
|
||||
# 迁移变更记录:记录某次 ALTER/DROP/CREATE 操作的 BD_Manual
|
||||
MIGRATION_DOCS = [
|
||||
"docs/database/BD_Manual_dim_member_add_birthday.md", # C1 加列
|
||||
"docs/database/BD_Manual_drop_assistant_abolish_tables.md", # 删表
|
||||
"docs/database/BD_Manual_dws_assistant_monthly_uk_change.md", # 改约束
|
||||
"docs/database/BD_Manual_dws_assistant_salary_uk_change.md", # 改约束
|
||||
"docs/database/BD_Manual_fix_bc_sentinel_dates.md", # 修数据
|
||||
"docs/database/BD_Manual_fdw_reverse_member_birthday.md", # FDW 变更
|
||||
"docs/database/BD_Manual_member_birthday_manual.md", # 新建表
|
||||
"docs/database/etl_feiqiu_schema_migration.md", # 迁移汇总
|
||||
"docs/database/zqyy_app_admin_web_tables.md", # 新建表
|
||||
]
|
||||
|
||||
# docs 归档到 docs/database/_archived/
|
||||
DOCS_ARCHIVE = ROOT / "docs" / "database" / "_archived"
|
||||
|
||||
|
||||
def move_file(src_rel, dest_base):
|
||||
"""移动文件,保留相对路径结构。"""
|
||||
src = ROOT / src_rel
|
||||
if not src.exists():
|
||||
return None
|
||||
dest = dest_base / src_rel
|
||||
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||
shutil.move(str(src), str(dest))
|
||||
return src_rel
|
||||
|
||||
|
||||
def main():
|
||||
moved_db = []
|
||||
moved_docs = []
|
||||
|
||||
# 归档迁移 SQL
|
||||
print("── 归档迁移脚本 → db/_archived/ ──")
|
||||
for rel in MIGRATION_FILES:
|
||||
result = move_file(rel, ARCHIVE_BASE)
|
||||
if result:
|
||||
moved_db.append(result)
|
||||
print(f" ✅ {result}")
|
||||
|
||||
# 归档迁移变更文档
|
||||
print("\n── 归档迁移变更文档 → docs/database/_archived/ ──")
|
||||
for rel in MIGRATION_DOCS:
|
||||
src = ROOT / rel
|
||||
if not src.exists():
|
||||
continue
|
||||
dest = DOCS_ARCHIVE / src.name
|
||||
DOCS_ARCHIVE.mkdir(parents=True, exist_ok=True)
|
||||
shutil.move(str(src), str(dest))
|
||||
moved_docs.append(rel)
|
||||
print(f" ✅ {src.name}")
|
||||
|
||||
# 补充 .gitkeep
|
||||
for d in ["db/etl_feiqiu/migrations", "db/zqyy_app/migrations", "db/scripts"]:
|
||||
gk = ROOT / d / ".gitkeep"
|
||||
dp = ROOT / d
|
||||
if dp.exists() and not gk.exists():
|
||||
# 检查目录是否只剩 .gitkeep 或为空
|
||||
remaining = [f for f in dp.iterdir() if f.name != ".gitkeep"]
|
||||
if not remaining:
|
||||
gk.touch()
|
||||
print(f" 📄 补充 {d}/.gitkeep")
|
||||
|
||||
print(f"\n✅ 完成:归档 {len(moved_db)} 个迁移 SQL + {len(moved_docs)} 个变更文档")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
12
scripts/ops/_check_token.py
Normal file
12
scripts/ops/_check_token.py
Normal file
@@ -0,0 +1,12 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import json, base64, datetime
|
||||
from pathlib import Path
|
||||
|
||||
token = Path(__file__).parent.joinpath(".monitor_token").read_text().strip()
|
||||
parts = token.split(".")
|
||||
payload = parts[1]
|
||||
payload += "=" * (4 - len(payload) % 4)
|
||||
d = json.loads(base64.b64decode(payload))
|
||||
exp = datetime.datetime.fromtimestamp(d["exp"])
|
||||
now = datetime.datetime.now()
|
||||
print(f"exp={exp}, now={now}, expired={now > exp}")
|
||||
46
scripts/ops/_env_paths.py
Normal file
46
scripts/ops/_env_paths.py
Normal file
@@ -0,0 +1,46 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
统一输出路径解析 — 所有 scripts/ops/ 脚本共享。
|
||||
|
||||
使用方式:
|
||||
from _env_paths import get_output_path
|
||||
out_dir = get_output_path("SYSTEM_ANALYZE_ROOT")
|
||||
|
||||
规则:
|
||||
- 先 load_dotenv(根 .env),再从 os.environ 读取
|
||||
- 环境变量未定义时抛出 KeyError,强制要求 .env 配置
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# 加载根 .env(仅首次生效,override=False 不覆盖已有环境变量)
|
||||
_ROOT = Path(__file__).resolve().parents[2]
|
||||
load_dotenv(_ROOT / ".env", override=False)
|
||||
|
||||
# CHANGE 2026-02-21 | 补充加载连接器级 .env,获取 API_BASE/API_TOKEN/STORE_ID 等
|
||||
# override=False 保证根 .env 和已有环境变量优先
|
||||
_FEIQIU_ENV = _ROOT / "apps" / "etl" / "connectors" / "feiqiu" / ".env"
|
||||
if _FEIQIU_ENV.exists():
|
||||
load_dotenv(_FEIQIU_ENV, override=False)
|
||||
|
||||
|
||||
def get_output_path(env_var: str) -> Path:
|
||||
"""
|
||||
从环境变量读取输出路径。
|
||||
|
||||
如果 .env 中未定义该变量,抛出 KeyError 并给出明确提示,
|
||||
避免静默回退到错误路径。
|
||||
"""
|
||||
val = os.environ.get(env_var)
|
||||
if not val:
|
||||
raise KeyError(
|
||||
f"环境变量 {env_var} 未定义。"
|
||||
f"请在根 .env 中配置,参考 .env.template 和 docs/deployment/EXPORT-PATHS.md"
|
||||
)
|
||||
p = Path(val)
|
||||
p.mkdir(parents=True, exist_ok=True)
|
||||
return p
|
||||
5
scripts/ops/_etl_log_temp.txt
Normal file
5
scripts/ops/_etl_log_temp.txt
Normal file
File diff suppressed because one or more lines are too long
23
scripts/ops/_fetch_logs.py
Normal file
23
scripts/ops/_fetch_logs.py
Normal file
@@ -0,0 +1,23 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""获取指定 execution_id 的完整日志。"""
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
|
||||
TOKEN = Path(__file__).parent.joinpath(".monitor_token").read_text().strip()
|
||||
BASE = "http://localhost:8000"
|
||||
HEADERS = {"Authorization": f"Bearer {TOKEN}"}
|
||||
|
||||
execution_id = "e21e1935-5abf-434f-9984-69c492402db7"
|
||||
|
||||
resp = requests.get(f"{BASE}/api/execution/{execution_id}/logs", headers=HEADERS, timeout=30)
|
||||
print(f"status_code={resp.status_code}")
|
||||
data = resp.json()
|
||||
print(f"output_log length: {len(data.get('output_log') or '')}")
|
||||
print(f"error_log length: {len(data.get('error_log') or '')}")
|
||||
print("--- output_log ---")
|
||||
print(data.get("output_log") or "(empty)")
|
||||
print("--- error_log ---")
|
||||
print(data.get("error_log") or "(empty)")
|
||||
89
scripts/ops/_fix_bd_manual_refs.py
Normal file
89
scripts/ops/_fix_bd_manual_refs.py
Normal file
@@ -0,0 +1,89 @@
|
||||
"""
|
||||
批量修正 docs/database/ 下 BD_Manual 文档中的过时路径引用。
|
||||
- 迁移脚本路径 → 标注为已归档
|
||||
- DDL 位置 → 更新为新的 docs/database/ddl/ 路径
|
||||
- 旧 schema 文件引用 → 更新
|
||||
|
||||
用法:cd C:\\NeoZQYY && python scripts/ops/_fix_bd_manual_refs.py
|
||||
"""
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
ROOT = Path(__file__).resolve().parent.parent.parent
|
||||
|
||||
# 需要处理的目录
|
||||
DIRS = [
|
||||
ROOT / "docs" / "database",
|
||||
ROOT / "apps" / "etl" / "connectors" / "feiqiu" / "docs" / "database",
|
||||
]
|
||||
|
||||
# 路径替换规则
|
||||
REPLACEMENTS = [
|
||||
# 迁移脚本路径 → 标注已归档
|
||||
(r'`db/etl_feiqiu/migrations/([^`]+)`',
|
||||
r'`db/_archived/ddl_baseline_2026-02-22/db/etl_feiqiu/migrations/\1`(已归档)'),
|
||||
(r'`db/zqyy_app/migrations/([^`]+)`',
|
||||
r'`db/_archived/ddl_baseline_2026-02-22/db/zqyy_app/migrations/\1`(已归档)'),
|
||||
|
||||
# DDL 位置引用旧 schema 文件
|
||||
(r'`db/etl_feiqiu/schemas/meta\.sql`', '`docs/database/ddl/etl_feiqiu__meta.sql`'),
|
||||
(r'`db/etl_feiqiu/schemas/ods\.sql`', '`docs/database/ddl/etl_feiqiu__ods.sql`'),
|
||||
(r'`db/etl_feiqiu/schemas/dwd\.sql`', '`docs/database/ddl/etl_feiqiu__dwd.sql`'),
|
||||
(r'`db/etl_feiqiu/schemas/core\.sql`', '`docs/database/ddl/etl_feiqiu__core.sql`'),
|
||||
(r'`db/etl_feiqiu/schemas/dws\.sql`', '`docs/database/ddl/etl_feiqiu__dws.sql`'),
|
||||
(r'`db/etl_feiqiu/schemas/app\.sql`', '`docs/database/ddl/etl_feiqiu__app.sql`'),
|
||||
(r'`db/zqyy_app/schemas/init\.sql`', '`docs/database/ddl/zqyy_app__public.sql`'),
|
||||
|
||||
# 旧 schema 文件名(不带路径前缀)
|
||||
(r'`database/schema_ODS_doc\.sql`', '`docs/database/ddl/etl_feiqiu__ods.sql`'),
|
||||
(r'`database/schema_dwd_doc\.sql`', '`docs/database/ddl/etl_feiqiu__dwd.sql`'),
|
||||
(r'`database/schema_dws\.sql`', '`docs/database/ddl/etl_feiqiu__dws.sql`'),
|
||||
(r'`database/schema_etl_admin\.sql`', '`docs/database/ddl/etl_feiqiu__meta.sql`'),
|
||||
|
||||
# DDL 位置行
|
||||
(r'DDL 位置:`db/etl_feiqiu/schemas/dws\.sql`',
|
||||
'DDL 位置:`docs/database/ddl/etl_feiqiu__dws.sql`'),
|
||||
]
|
||||
|
||||
|
||||
def process_file(filepath):
|
||||
"""处理单个文件,返回修改数量。"""
|
||||
text = filepath.read_text(encoding="utf-8")
|
||||
original = text
|
||||
changes = 0
|
||||
|
||||
for pattern, replacement in REPLACEMENTS:
|
||||
new_text, n = re.subn(pattern, replacement, text)
|
||||
if n > 0:
|
||||
changes += n
|
||||
text = new_text
|
||||
|
||||
if changes > 0:
|
||||
filepath.write_text(text, encoding="utf-8")
|
||||
|
||||
return changes
|
||||
|
||||
|
||||
def main():
|
||||
total_files = 0
|
||||
total_changes = 0
|
||||
|
||||
for d in DIRS:
|
||||
if not d.exists():
|
||||
continue
|
||||
for md_file in sorted(d.rglob("*.md")):
|
||||
# 跳过 _archived 目录
|
||||
if "_archived" in str(md_file):
|
||||
continue
|
||||
changes = process_file(md_file)
|
||||
if changes > 0:
|
||||
rel = md_file.relative_to(ROOT)
|
||||
print(f" ✅ {rel} ({changes} 处替换)")
|
||||
total_files += 1
|
||||
total_changes += changes
|
||||
|
||||
print(f"\n✅ 完成:修改 {total_files} 个文件,共 {total_changes} 处替换")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
15
scripts/ops/_login_test.py
Normal file
15
scripts/ops/_login_test.py
Normal file
@@ -0,0 +1,15 @@
|
||||
"""一次性脚本:测试后端登录 API,获取 JWT token"""
|
||||
import requests
|
||||
import json
|
||||
import sys
|
||||
|
||||
url = "http://127.0.0.1:8000/api/auth/login"
|
||||
payload = {"username": "admin", "password": "admin123"}
|
||||
|
||||
try:
|
||||
resp = requests.post(url, json=payload, timeout=10)
|
||||
print(f"Status: {resp.status_code}")
|
||||
print(f"Body: {json.dumps(resp.json(), indent=2, ensure_ascii=False)}")
|
||||
except Exception as e:
|
||||
print(f"Error: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
153
scripts/ops/_verify_bd_manual_fields.py
Normal file
153
scripts/ops/_verify_bd_manual_fields.py
Normal file
@@ -0,0 +1,153 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""比对 BD_Manual 文档中的字段列表与数据库实际列,输出差异报告。
|
||||
|
||||
用法:python scripts/ops/_verify_bd_manual_fields.py
|
||||
输出:stdout(差异报告)
|
||||
"""
|
||||
import os
|
||||
import sys
|
||||
import re
|
||||
|
||||
# 加载 .env
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(os.path.join(os.path.dirname(__file__), '..', '..', '.env'))
|
||||
|
||||
import psycopg2
|
||||
|
||||
DSN = os.environ.get("TEST_DB_DSN")
|
||||
if not DSN:
|
||||
print("ERROR: TEST_DB_DSN 未设置", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
# 要验证的表(BD_Manual 文件 → 表列表)
|
||||
TABLES_TO_CHECK = [
|
||||
# assistant_service_records
|
||||
"dwd.dwd_assistant_service_log",
|
||||
"dwd.dwd_assistant_service_log_ex",
|
||||
# recharge_settlements
|
||||
"dwd.dwd_recharge_order",
|
||||
"dwd.dwd_recharge_order_ex",
|
||||
# store_goods_master
|
||||
"dwd.dim_store_goods",
|
||||
"dwd.dim_store_goods_ex",
|
||||
# site_tables_master
|
||||
"dwd.dim_table",
|
||||
"dwd.dim_table_ex",
|
||||
# goods_stock_movements
|
||||
"dwd.dwd_goods_stock_movement",
|
||||
# goods_stock_summary
|
||||
"dwd.dwd_goods_stock_summary",
|
||||
# member_balance_changes
|
||||
"dwd.dwd_member_balance_change",
|
||||
"dwd.dwd_member_balance_change_ex",
|
||||
# store_goods_sales_records
|
||||
"dwd.dwd_store_goods_sale",
|
||||
"dwd.dwd_store_goods_sale_ex",
|
||||
# DWS
|
||||
"dws.dws_goods_stock_daily_summary",
|
||||
"dws.dws_goods_stock_monthly_summary",
|
||||
]
|
||||
|
||||
# BD_Manual 文件 → 文档中列出的列名
|
||||
BD_MANUAL_COLS: dict[str, list[str]] = {}
|
||||
|
||||
def parse_md_table_cols(filepath: str) -> dict[str, list[str]]:
|
||||
"""从 BD_Manual markdown 文件中提取每个表的列名列表。"""
|
||||
result = {}
|
||||
current_table = None
|
||||
in_table = False
|
||||
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
lines = f.readlines()
|
||||
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
# 检测表名(如 "## 1. dwd_assistant_service_log(主表)")
|
||||
m = re.match(r'^##\s+\d+\.\s+(\w+)', line)
|
||||
if m:
|
||||
current_table = m.group(1)
|
||||
in_table = False
|
||||
continue
|
||||
|
||||
# 检测 markdown 表格行
|
||||
if current_table and '|' in line:
|
||||
cells = [c.strip() for c in line.split('|')]
|
||||
cells = [c for c in cells if c]
|
||||
if len(cells) >= 2:
|
||||
first = cells[0]
|
||||
# 跳过表头分隔行
|
||||
if first.startswith('---') or first.startswith(':---'):
|
||||
continue
|
||||
# 跳过表头行
|
||||
if first in ('DWD 列名', 'DWS 列名', 'ODS 字段', '日期'):
|
||||
in_table = True
|
||||
continue
|
||||
if in_table:
|
||||
# 提取列名(去掉 backtick)
|
||||
col = first.strip('`').strip()
|
||||
if col and not col.startswith('~~') and col != 'scd2_*':
|
||||
if current_table not in result:
|
||||
result[current_table] = []
|
||||
result[current_table].append(col)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
# 解析所有 BD_Manual 文件
|
||||
BD_FILES = [
|
||||
"docs/database/BD_Manual_assistant_service_records.md",
|
||||
"docs/database/BD_Manual_recharge_settlements.md",
|
||||
"docs/database/BD_Manual_store_goods_master.md",
|
||||
"docs/database/BD_Manual_site_tables_master.md",
|
||||
"docs/database/BD_Manual_goods_stock_movements.md",
|
||||
"docs/database/BD_Manual_goods_stock_summary.md",
|
||||
"docs/database/BD_Manual_member_balance_changes.md",
|
||||
"docs/database/BD_Manual_store_goods_sales_records.md",
|
||||
"docs/database/BD_Manual_dws_goods_stock_summary.md",
|
||||
]
|
||||
|
||||
all_doc_cols: dict[str, list[str]] = {}
|
||||
for f in BD_FILES:
|
||||
parsed = parse_md_table_cols(f)
|
||||
for table, cols in parsed.items():
|
||||
all_doc_cols[table] = cols
|
||||
|
||||
# 查询数据库实际列
|
||||
SCD2_COLS = {"scd2_start_time", "scd2_end_time", "scd2_is_current", "scd2_version"}
|
||||
|
||||
conn = psycopg2.connect(DSN)
|
||||
try:
|
||||
cur = conn.cursor()
|
||||
for full_table in TABLES_TO_CHECK:
|
||||
schema, table = full_table.split('.')
|
||||
cur.execute("""
|
||||
SELECT column_name
|
||||
FROM information_schema.columns
|
||||
WHERE table_schema = %s AND table_name = %s
|
||||
ORDER BY ordinal_position
|
||||
""", (schema, table))
|
||||
db_cols = [row[0] for row in cur.fetchall()]
|
||||
db_cols_no_scd2 = [c for c in db_cols if c not in SCD2_COLS]
|
||||
|
||||
doc_cols = all_doc_cols.get(table, [])
|
||||
|
||||
if not doc_cols:
|
||||
print(f"\n⚠️ {full_table}: 文档中未找到列定义(表名 '{table}' 未匹配)")
|
||||
print(f" DB 列 ({len(db_cols)}): {db_cols}")
|
||||
continue
|
||||
|
||||
doc_set = set(doc_cols)
|
||||
db_set = set(db_cols_no_scd2)
|
||||
|
||||
in_doc_not_db = doc_set - db_set
|
||||
in_db_not_doc = db_set - doc_set
|
||||
|
||||
status = "✅" if not in_doc_not_db and not in_db_not_doc else "❌"
|
||||
print(f"\n{status} {full_table}: 文档 {len(doc_cols)} 列, DB {len(db_cols_no_scd2)} 列 (不含 SCD2)")
|
||||
|
||||
if in_doc_not_db:
|
||||
print(f" 📄 文档有但 DB 无: {sorted(in_doc_not_db)}")
|
||||
if in_db_not_doc:
|
||||
print(f" 🗄️ DB 有但文档无: {sorted(in_db_not_doc)}")
|
||||
finally:
|
||||
conn.close()
|
||||
@@ -32,13 +32,13 @@ def build_parser() -> argparse.ArgumentParser:
|
||||
"--date-from",
|
||||
type=str,
|
||||
default=None,
|
||||
help="数据获取起始日期 (YYYY-MM-DD)",
|
||||
help="数据获取起始日期 (YYYY-MM-DD),默认 30 天前",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--date-to",
|
||||
type=str,
|
||||
default=None,
|
||||
help="数据获取截止日期 (YYYY-MM-DD)",
|
||||
help="数据获取截止日期 (YYYY-MM-DD),默认今天",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--limit",
|
||||
@@ -58,17 +58,11 @@ def build_parser() -> argparse.ArgumentParser:
|
||||
def resolve_output_dir() -> Path:
|
||||
"""
|
||||
确定输出目录:
|
||||
1. 优先读取环境变量 SYSTEM_ANALYZE_ROOT
|
||||
2. 回退到 docs/reports/
|
||||
3. 确保目录存在(自动创建)
|
||||
1. 从 .env 读取 SYSTEM_ANALYZE_ROOT
|
||||
2. 确保目录存在(自动创建)
|
||||
"""
|
||||
env_root = os.environ.get("SYSTEM_ANALYZE_ROOT")
|
||||
if env_root:
|
||||
out = Path(env_root)
|
||||
else:
|
||||
out = Path("docs/reports")
|
||||
out.mkdir(parents=True, exist_ok=True)
|
||||
return out
|
||||
from _env_paths import get_output_path
|
||||
return get_output_path("SYSTEM_ANALYZE_ROOT")
|
||||
|
||||
|
||||
def generate_output_filename(dt: "datetime") -> str:
|
||||
@@ -86,48 +80,108 @@ def main() -> None:
|
||||
5. 调用 dump_collection_results() 落盘
|
||||
6. 输出采集摘要到 stdout
|
||||
"""
|
||||
from datetime import date as _date, datetime as _datetime
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from datetime import date as _date, datetime as _datetime, timedelta as _timedelta
|
||||
|
||||
# ── 1. 解析 CLI 参数 ──
|
||||
parser = build_parser()
|
||||
args = parser.parse_args()
|
||||
|
||||
# ── 2. 加载环境变量(分层叠加:根 .env < ETL .env < 环境变量) ──
|
||||
# override=False 保证后加载的不覆盖先加载的环境变量
|
||||
# 先加载根 .env(最低优先级)
|
||||
load_dotenv(Path(".env"), override=False)
|
||||
# 再加载 ETL 专属 .env(中优先级)
|
||||
load_dotenv(Path("apps/etl/connectors/feiqiu/.env"), override=False)
|
||||
# 真实环境变量(最高优先级)已自动存在于 os.environ
|
||||
# ── 2. 加载环境变量 ──
|
||||
# _env_paths 在 import 时已通过 Path(__file__).parents[2] / ".env" 绝对路径
|
||||
# 加载了根 .env,无需再用相对路径 load_dotenv(避免 cwd 不在项目根时失效)
|
||||
output_dir = resolve_output_dir() # 触发 _env_paths import → 加载根 .env
|
||||
|
||||
# ── 3. 构造 AnalyzerConfig ──
|
||||
date_from = _date.fromisoformat(args.date_from) if args.date_from else None
|
||||
date_to = _date.fromisoformat(args.date_to) if args.date_to else None
|
||||
tables = [t.strip() for t in args.tables.split(",")] if args.tables else None
|
||||
output_dir = resolve_output_dir()
|
||||
# ── 3. 构造基础参数 ──
|
||||
date_to = _date.fromisoformat(args.date_to) if args.date_to else _date.today()
|
||||
user_date_from = _date.fromisoformat(args.date_from) if args.date_from else None
|
||||
target_limit = args.limit
|
||||
tables_filter = [t.strip() for t in args.tables.split(",")] if args.tables else None
|
||||
|
||||
# CHANGE 2026-02-21 | 遵循 testing-env.md:优先使用测试库 TEST_DB_DSN
|
||||
pg_dsn = os.environ.get("TEST_DB_DSN") or os.environ.get("PG_DSN", "")
|
||||
if not pg_dsn:
|
||||
raise RuntimeError("TEST_DB_DSN 和 PG_DSN 均未定义,请检查根 .env 配置")
|
||||
|
||||
from dataflow_analyzer import AnalyzerConfig, ODS_SPECS, collect_all_tables, dump_collection_results
|
||||
|
||||
config = AnalyzerConfig(
|
||||
date_from=date_from,
|
||||
# CHANGE 2026-02-21 | API 凭证缺失时提前报错,避免静默产出空报告
|
||||
api_base = os.environ.get("API_BASE", "")
|
||||
api_token = os.environ.get("API_TOKEN", "")
|
||||
store_id = os.environ.get("STORE_ID", "")
|
||||
missing = [k for k, v in [("API_BASE", api_base), ("API_TOKEN", api_token), ("STORE_ID", store_id)] if not v]
|
||||
if missing:
|
||||
raise RuntimeError(
|
||||
f"API 凭证缺失:{', '.join(missing)}。"
|
||||
f"请在根 .env 中配置,参考 .env.template"
|
||||
)
|
||||
|
||||
base_kwargs = dict(
|
||||
date_to=date_to,
|
||||
limit=args.limit,
|
||||
tables=tables,
|
||||
limit=target_limit,
|
||||
output_dir=output_dir,
|
||||
pg_dsn=os.environ.get("DATABASE_URL") or os.environ.get("PG_DSN", ""),
|
||||
api_base=os.environ.get("API_BASE", ""),
|
||||
api_token=os.environ.get("API_TOKEN", ""),
|
||||
store_id=os.environ.get("STORE_ID", ""),
|
||||
pg_dsn=pg_dsn,
|
||||
api_base=api_base,
|
||||
api_token=api_token,
|
||||
store_id=store_id,
|
||||
)
|
||||
|
||||
# ── 4. 执行采集(使用本模块的 ODS_SPECS) ──
|
||||
# ── 4. 逐表自适应日期扩展采集 ──
|
||||
# CHANGE 2026-02-21 | 策略:10天 → 30天 → 90天,3 个档位
|
||||
expand_days = [10, 30, 90]
|
||||
if user_date_from:
|
||||
# 用户显式指定了 date_from,不做自适应扩展
|
||||
expand_days = []
|
||||
initial_date_from = user_date_from
|
||||
else:
|
||||
initial_date_from = date_to - _timedelta(days=expand_days[0])
|
||||
|
||||
# 首轮采集
|
||||
config = AnalyzerConfig(date_from=initial_date_from, tables=tables_filter, **base_kwargs)
|
||||
results = collect_all_tables(config, specs=ODS_SPECS)
|
||||
actual_date_from = initial_date_from
|
||||
|
||||
# 自适应扩展:对不满 target_limit 的表逐步扩大日期范围
|
||||
# CHANGE 2026-02-21 | 维表(time_fields=None)不参与时间扩展,其 API 不接受日期范围
|
||||
_dim_tables = {s["table"] for s in ODS_SPECS if s.get("time_fields") is None}
|
||||
if not user_date_from:
|
||||
for days in expand_days[1:]:
|
||||
short_tables = [r.table_name for r in results
|
||||
if r.error is None
|
||||
and r.record_count < target_limit
|
||||
and r.table_name not in _dim_tables]
|
||||
if not short_tables:
|
||||
break # 所有表都满足了
|
||||
|
||||
wider_from = date_to - _timedelta(days=days)
|
||||
print(f" [自适应扩展] {len(short_tables)} 张表不足 {target_limit} 条,扩展至 {wider_from} ~ {date_to}")
|
||||
|
||||
wider_config = AnalyzerConfig(
|
||||
date_from=wider_from, tables=short_tables, **base_kwargs)
|
||||
wider_results = collect_all_tables(wider_config, specs=ODS_SPECS)
|
||||
|
||||
# 用更宽范围的结果替换不满的表(仅当新结果记录数更多时)
|
||||
wider_map = {r.table_name: r for r in wider_results}
|
||||
for idx, r in enumerate(results):
|
||||
if r.table_name in wider_map:
|
||||
new_r = wider_map[r.table_name]
|
||||
if new_r.record_count > r.record_count:
|
||||
results[idx] = new_r
|
||||
actual_date_from = wider_from
|
||||
|
||||
# ── 5. 落盘 ──
|
||||
paths = dump_collection_results(results, output_dir)
|
||||
|
||||
# ── 5.1 将实际使用的 date_from/date_to 追加写入 manifest ──
|
||||
import json as _json
|
||||
manifest_path = output_dir / "collection_manifest.json"
|
||||
if manifest_path.exists():
|
||||
with open(manifest_path, "r", encoding="utf-8") as _f:
|
||||
manifest_data = _json.load(_f)
|
||||
manifest_data["date_from"] = str(actual_date_from)
|
||||
manifest_data["date_to"] = str(date_to)
|
||||
with open(manifest_path, "w", encoding="utf-8") as _f:
|
||||
_json.dump(manifest_data, _f, ensure_ascii=False, indent=2)
|
||||
|
||||
# ── 6. 输出采集摘要 ──
|
||||
now = _datetime.now()
|
||||
filename = generate_output_filename(now)
|
||||
|
||||
63
scripts/ops/analyze_v4.py
Normal file
63
scripts/ops/analyze_v4.py
Normal file
@@ -0,0 +1,63 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""分析第四次执行结果。"""
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(Path(__file__).resolve().parents[2] / ".env")
|
||||
from _env_paths import get_output_path
|
||||
|
||||
log_root = get_output_path("SYSTEM_LOG_ROOT")
|
||||
raw = json.loads((log_root / "2026-02-21__etl_run_raw_v4.json").read_text(encoding="utf-8"))
|
||||
|
||||
error_log = raw.get("error_log", "")
|
||||
lines = error_log.split("\n")
|
||||
|
||||
# 提取任务列表
|
||||
task_list_match = re.search(r"开始运行任务: \[([^\]]+)\]", error_log)
|
||||
if task_list_match:
|
||||
tasks = [t.strip().strip("'") for t in task_list_match.group(1).split(",")]
|
||||
print(f"总任务数: {len(tasks)}")
|
||||
|
||||
# 分析每个任务的结果
|
||||
success_tasks = []
|
||||
failed_tasks = []
|
||||
|
||||
for task in tasks:
|
||||
# 检查是否有"完成"标记
|
||||
completed = re.search(rf"{task}: 完成,统计=", error_log) or \
|
||||
re.search(rf"{task}: 完成, 统计=", error_log) or \
|
||||
re.search(rf"{task} ODS 任务完成:", error_log) or \
|
||||
re.search(rf"{task}: 工具类任务执行成功", error_log)
|
||||
failed = re.search(rf"任务 {task} 失败: (.+?)(?:\\n|$)", error_log)
|
||||
|
||||
if completed and not failed:
|
||||
success_tasks.append(task)
|
||||
elif failed:
|
||||
err_msg = failed.group(1)[:120]
|
||||
failed_tasks.append((task, err_msg))
|
||||
else:
|
||||
failed_tasks.append((task, "未知状态"))
|
||||
|
||||
print(f"\n✅ 成功: {len(success_tasks)} 个")
|
||||
for t in success_tasks:
|
||||
print(f" {t}")
|
||||
|
||||
print(f"\n❌ 失败: {len(failed_tasks)} 个")
|
||||
# 找出根因(第一个非 InFailedSqlTransaction 的失败)
|
||||
root_causes = []
|
||||
cascade_count = 0
|
||||
for t, err in failed_tasks:
|
||||
if "InFailedSqlTransaction" in err:
|
||||
cascade_count += 1
|
||||
else:
|
||||
root_causes.append((t, err))
|
||||
print(f" 🔴 {t}: {err}")
|
||||
|
||||
print(f"\n 级联失败 (InFailedSqlTransaction): {cascade_count} 个")
|
||||
|
||||
if root_causes:
|
||||
print(f"\n根因分析:")
|
||||
for t, err in root_causes:
|
||||
print(f" {t}: {err}")
|
||||
34
scripts/ops/analyze_v6_root_cause.py
Normal file
34
scripts/ops/analyze_v6_root_cause.py
Normal file
@@ -0,0 +1,34 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""分析 v6 日志中的根因错误(非 InFailedSqlTransaction 的第一个错误)。"""
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(Path(__file__).resolve().parents[2] / ".env")
|
||||
|
||||
from _env_paths import get_output_path
|
||||
|
||||
raw_path = get_output_path("SYSTEM_LOG_ROOT") / "2026-02-21__etl_run_raw_v6.json"
|
||||
data = json.loads(raw_path.read_text(encoding="utf-8"))
|
||||
|
||||
error_log = data.get("error_log", "")
|
||||
lines = error_log.strip().split("\n")
|
||||
|
||||
# 找所有 ERROR 行
|
||||
print("=== 所有 ERROR 行 ===\n")
|
||||
for i, line in enumerate(lines):
|
||||
if "ERROR" in line:
|
||||
print(f"[L{i+1}] {line}")
|
||||
|
||||
# 找第一个非 InFailedSqlTransaction 的错误
|
||||
print("\n\n=== 非级联错误(根因)===\n")
|
||||
for i, line in enumerate(lines):
|
||||
if "ERROR" in line and "InFailedSqlTransaction" not in line:
|
||||
# 打印上下文
|
||||
start = max(0, i - 2)
|
||||
end = min(len(lines), i + 20)
|
||||
for j in range(start, end):
|
||||
marker = ">>>" if j == i else " "
|
||||
print(f"{marker} [L{j+1}] {lines[j]}")
|
||||
print("---")
|
||||
35
scripts/ops/analyze_v7_root_cause.py
Normal file
35
scripts/ops/analyze_v7_root_cause.py
Normal file
@@ -0,0 +1,35 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""分析 v7 日志中的根因错误。"""
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(Path(__file__).resolve().parents[2] / ".env")
|
||||
|
||||
from _env_paths import get_output_path
|
||||
|
||||
raw_path = get_output_path("SYSTEM_LOG_ROOT") / "2026-02-21__etl_run_raw_v7.json"
|
||||
data = json.loads(raw_path.read_text(encoding="utf-8"))
|
||||
|
||||
error_log = data.get("error_log", "")
|
||||
lines = error_log.strip().split("\n")
|
||||
|
||||
print(f"日志总行数: {len(lines)}")
|
||||
|
||||
# 找所有非级联 ERROR 行
|
||||
print("\n=== 非级联错误(根因)===\n")
|
||||
for i, line in enumerate(lines):
|
||||
if "ERROR" in line and "InFailedSqlTransaction" not in line and "unsupported operand" not in line:
|
||||
start = max(0, i - 2)
|
||||
end = min(len(lines), i + 15)
|
||||
for j in range(start, end):
|
||||
marker = ">>>" if j == i else " "
|
||||
print(f"{marker} [L{j+1}] {lines[j]}")
|
||||
print("---")
|
||||
|
||||
# 找成功的任务
|
||||
print("\n=== 成功任务 ===")
|
||||
for line in lines:
|
||||
if "任务完成:" in line or "工具类任务执行成功" in line:
|
||||
print(f" {line.strip()}")
|
||||
60
scripts/ops/analyze_v8.py
Normal file
60
scripts/ops/analyze_v8.py
Normal file
@@ -0,0 +1,60 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""分析 v8 执行结果,提取每个任务的成功/失败状态。"""
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(Path(__file__).resolve().parents[2] / ".env")
|
||||
from _env_paths import get_output_path
|
||||
|
||||
log_dir = get_output_path("SYSTEM_LOG_ROOT")
|
||||
raw = json.loads((log_dir / "2026-02-21__etl_run_raw_v8.json").read_text("utf-8"))
|
||||
|
||||
error_log = raw.get("error_log", "")
|
||||
output_log = raw.get("output_log", "")
|
||||
full = output_log + "\n" + error_log
|
||||
|
||||
# 提取任务结果
|
||||
success_pat = re.compile(r"任务 (\S+) 执行成功")
|
||||
fail_pat = re.compile(r"任务 (\S+) 失败[::]?\s*(.*)")
|
||||
skip_pat = re.compile(r"跳过 (\S+)")
|
||||
|
||||
successes = success_pat.findall(full)
|
||||
failures = [(m.group(1), m.group(2)[:120]) for m in fail_pat.finditer(full)]
|
||||
skips = skip_pat.findall(full)
|
||||
|
||||
# 去重
|
||||
seen_s = set()
|
||||
unique_successes = []
|
||||
for s in successes:
|
||||
if s not in seen_s:
|
||||
seen_s.add(s)
|
||||
unique_successes.append(s)
|
||||
|
||||
seen_f = set()
|
||||
unique_failures = []
|
||||
for task, reason in failures:
|
||||
if task not in seen_f:
|
||||
seen_f.add(task)
|
||||
unique_failures.append((task, reason))
|
||||
|
||||
print(f"=== v8 执行结果分析 ===")
|
||||
print(f"成功: {len(unique_successes)} 个")
|
||||
for s in unique_successes:
|
||||
print(f" ✅ {s}")
|
||||
print(f"\n失败: {len(unique_failures)} 个")
|
||||
for task, reason in unique_failures:
|
||||
short = reason.split("\n")[0][:100]
|
||||
print(f" ❌ {task}: {short}")
|
||||
|
||||
# 查找首个非级联错误
|
||||
first_error_pat = re.compile(r"ERROR.*?(?:错误|Error|exception|Traceback)", re.IGNORECASE)
|
||||
in_failed_count = error_log.count("InFailedSqlTransaction")
|
||||
print(f"\nInFailedSqlTransaction 出现次数: {in_failed_count}")
|
||||
|
||||
# 查找根因错误(非 InFailedSqlTransaction 的 ERROR)
|
||||
error_lines = [l for l in error_log.split("\n") if "ERROR" in l and "InFailedSqlTransaction" not in l]
|
||||
print(f"\n非级联 ERROR 行 ({len(error_lines)} 行):")
|
||||
for line in error_lines[:20]:
|
||||
print(f" {line.strip()[:150]}")
|
||||
61
scripts/ops/analyze_v8_detail.py
Normal file
61
scripts/ops/analyze_v8_detail.py
Normal file
@@ -0,0 +1,61 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""详细分析 v8 日志,提取所有任务状态。"""
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(Path(__file__).resolve().parents[2] / ".env")
|
||||
from _env_paths import get_output_path
|
||||
|
||||
log_dir = get_output_path("SYSTEM_LOG_ROOT")
|
||||
raw = json.loads((log_dir / "2026-02-21__etl_run_raw_v8.json").read_text("utf-8"))
|
||||
|
||||
error_log = raw.get("error_log", "")
|
||||
output_log = raw.get("output_log", "")
|
||||
full = output_log + "\n" + error_log
|
||||
|
||||
# 更宽泛的匹配
|
||||
success_pat = re.compile(r"任务\s+(\S+)\s+执行成功")
|
||||
fail_pat = re.compile(r"任务\s+(\S+)\s+失败")
|
||||
complete_pat = re.compile(r"(\S+)\s*(?:完成|成功|✅)")
|
||||
|
||||
# 查找所有提到的任务
|
||||
task_pat = re.compile(r"(ODS_FETCH|DWD_LOAD_FROM_ODS|DWS_\w+)")
|
||||
all_tasks = set(task_pat.findall(full))
|
||||
|
||||
print("=== 日志中出现的所有任务 ===")
|
||||
for t in sorted(all_tasks):
|
||||
print(f" {t}")
|
||||
|
||||
print(f"\n=== 成功匹配 ===")
|
||||
for m in success_pat.finditer(full):
|
||||
print(f" ✅ {m.group(1)}")
|
||||
|
||||
print(f"\n=== 失败匹配 ===")
|
||||
for m in fail_pat.finditer(full):
|
||||
print(f" ❌ {m.group(1)}")
|
||||
|
||||
# 查找 DWD 装载详情
|
||||
dwd_pat = re.compile(r"DWD 装载(成功|失败):(\S+?),用时 ([\d.]+)s(?:,err=(.*))?")
|
||||
print(f"\n=== DWD 装载详情 ===")
|
||||
dwd_success = 0
|
||||
dwd_fail = 0
|
||||
for m in dwd_pat.finditer(full):
|
||||
status, table, dur, err = m.groups()
|
||||
icon = "✅" if status == "成功" else "❌"
|
||||
if status == "成功":
|
||||
dwd_success += 1
|
||||
else:
|
||||
dwd_fail += 1
|
||||
line = f" {icon} {table} ({dur}s)"
|
||||
if err:
|
||||
line += f" — {err[:80]}"
|
||||
print(line)
|
||||
print(f" 合计: {dwd_success} 成功, {dwd_fail} 失败")
|
||||
|
||||
# 查找 year -1 相关上下文
|
||||
print(f"\n=== 'year -1' 相关行 ===")
|
||||
for line in full.split("\n"):
|
||||
if "year" in line.lower() and ("-1" in line or "out of range" in line):
|
||||
print(f" {line.strip()[:200]}")
|
||||
20
scripts/ops/analyze_v8_grep.py
Normal file
20
scripts/ops/analyze_v8_grep.py
Normal file
@@ -0,0 +1,20 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""查找 v8 日志中任务成功/完成的行。"""
|
||||
import json
|
||||
from pathlib import Path
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(Path(__file__).resolve().parents[2] / ".env")
|
||||
from _env_paths import get_output_path
|
||||
|
||||
log_dir = get_output_path("SYSTEM_LOG_ROOT")
|
||||
raw = json.loads((log_dir / "2026-02-21__etl_run_raw_v8.json").read_text("utf-8"))
|
||||
|
||||
error_log = raw.get("error_log", "")
|
||||
output_log = raw.get("output_log", "")
|
||||
full = output_log + "\n" + error_log
|
||||
|
||||
# 查找包含 DWS_ 和 成功/完成/SUCCESS 的行
|
||||
for line in full.split("\n"):
|
||||
if ("DWS_" in line or "ODS_" in line or "DWD_" in line) and ("成功" in line or "完成" in line or "SUCCESS" in line):
|
||||
print(line.strip()[:200])
|
||||
72
scripts/ops/analyze_v8_summary.py
Normal file
72
scripts/ops/analyze_v8_summary.py
Normal file
@@ -0,0 +1,72 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""提取 v8 的 DWD_LOAD_FROM_ODS 完成统计和所有任务最终状态。"""
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(Path(__file__).resolve().parents[2] / ".env")
|
||||
from _env_paths import get_output_path
|
||||
|
||||
log_dir = get_output_path("SYSTEM_LOG_ROOT")
|
||||
raw = json.loads((log_dir / "2026-02-21__etl_run_raw_v8.json").read_text("utf-8"))
|
||||
|
||||
error_log = raw.get("error_log", "")
|
||||
output_log = raw.get("output_log", "")
|
||||
full = output_log + "\n" + error_log
|
||||
|
||||
# 提取 DWD_LOAD_FROM_ODS 完成行
|
||||
for line in full.split("\n"):
|
||||
if "DWD_LOAD_FROM_ODS: 完成" in line:
|
||||
# 解析 JSON 部分
|
||||
idx = line.find("统计=")
|
||||
if idx >= 0:
|
||||
stats_str = line[idx + len("统计="):]
|
||||
# 尝试解析为 Python dict
|
||||
try:
|
||||
stats = eval(stats_str)
|
||||
print("=== DWD_LOAD_FROM_ODS 统计 ===")
|
||||
for t in stats.get("tables", []):
|
||||
icon = "✅" if t.get("inserted", 0) + t.get("updated", 0) > 0 or t.get("processed", 0) > 0 else "⚪"
|
||||
print(f" {icon} {t['table']} | mode={t.get('mode','?')} | processed={t.get('processed',0)} ins={t.get('inserted',0)} upd={t.get('updated',0)} skip={t.get('skipped',0)}")
|
||||
errors = stats.get("errors", [])
|
||||
if errors:
|
||||
print(f"\n 错误 ({len(errors)} 个):")
|
||||
for e in errors:
|
||||
print(f" ❌ {e.get('table','?')}: {str(e.get('error',''))[:100]}")
|
||||
except Exception as ex:
|
||||
print(f"解析失败: {ex}")
|
||||
print(stats_str[:500])
|
||||
break
|
||||
|
||||
# 查找所有任务的最终状态(从 output_log 中找"所有任务执行完成"之前的状态)
|
||||
print("\n=== 任务执行顺序与状态 ===")
|
||||
task_status = {}
|
||||
for line in full.split("\n"):
|
||||
m = re.search(r"任务\s+(\S+)\s+执行成功", line)
|
||||
if m:
|
||||
task_status[m.group(1)] = "✅ 成功"
|
||||
m = re.search(r"任务\s+(\S+)\s+失败", line)
|
||||
if m and m.group(1) not in task_status:
|
||||
task_status[m.group(1)] = "❌ 失败"
|
||||
|
||||
# 预期任务列表
|
||||
expected = [
|
||||
"ODS_FETCH", "DWD_LOAD_FROM_ODS",
|
||||
"DWS_ASSISTANT_DAILY", "DWS_ASSISTANT_MONTHLY",
|
||||
"DWS_ASSISTANT_CUSTOMER", "DWS_ASSISTANT_SALARY",
|
||||
"DWS_ASSISTANT_FINANCE",
|
||||
"DWS_MEMBER_CONSUMPTION", "DWS_MEMBER_VISIT",
|
||||
"DWS_GOODS_STOCK_DAILY", "DWS_GOODS_STOCK_WEEKLY", "DWS_GOODS_STOCK_MONTHLY",
|
||||
"DWS_FINANCE_DAILY", "DWS_FINANCE_RECHARGE",
|
||||
"DWS_FINANCE_INCOME_STRUCTURE", "DWS_FINANCE_DISCOUNT_DETAIL",
|
||||
"DWS_WINBACK_INDEX", "DWS_NEWCONV_INDEX", "DWS_RELATION_INDEX",
|
||||
]
|
||||
|
||||
for t in expected:
|
||||
status = task_status.get(t, "⚪ 未知")
|
||||
print(f" {status} — {t}")
|
||||
|
||||
s_count = sum(1 for v in task_status.values() if "成功" in v)
|
||||
f_count = sum(1 for v in task_status.values() if "失败" in v)
|
||||
print(f"\n合计: {s_count} 成功, {f_count} 失败, {len(expected) - s_count - f_count} 未知")
|
||||
591
scripts/ops/blackbox_test_report.py
Normal file
591
scripts/ops/blackbox_test_report.py
Normal file
@@ -0,0 +1,591 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
r"""
|
||||
黑盒集成测试报告 -- 从 API 输入侧与 DB 输出侧(ODS/DWD/DWS)进行全链路对比。
|
||||
|
||||
用法:
|
||||
cd C:\NeoZQYY
|
||||
uv run python scripts/ops/blackbox_test_report.py
|
||||
|
||||
输出: ETL_REPORT_ROOT / blackbox_report_<timestamp>.md
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
_ROOT = Path(__file__).resolve().parents[2]
|
||||
load_dotenv(_ROOT / ".env", override=False)
|
||||
|
||||
# ── 路径 ──────────────────────────────────────────────────────────────
|
||||
def _env(var: str) -> Path:
|
||||
val = os.environ.get(var)
|
||||
if not val:
|
||||
raise KeyError(f"环境变量 {var} 未定义")
|
||||
p = Path(val)
|
||||
p.mkdir(parents=True, exist_ok=True)
|
||||
return p
|
||||
|
||||
REPORT_ROOT = _env("ETL_REPORT_ROOT")
|
||||
JSON_ROOT = _env("FETCH_ROOT")
|
||||
LOG_ROOT = _env("LOG_ROOT")
|
||||
|
||||
TZ = ZoneInfo("Asia/Shanghai")
|
||||
NOW = datetime.now(TZ)
|
||||
TS = NOW.strftime("%Y%m%d_%H%M%S")
|
||||
|
||||
import psycopg2
|
||||
import psycopg2.extras
|
||||
|
||||
DSN = os.environ["PG_DSN"]
|
||||
|
||||
# ── 1. 解析 ETL 日志 ─────────────────────────────────────────────────
|
||||
def find_latest_log() -> Path | None:
|
||||
"""找到最新的 ETL 日志文件"""
|
||||
logs = sorted(LOG_ROOT.glob("*.log"), key=lambda p: p.stat().st_mtime, reverse=True)
|
||||
return logs[0] if logs else None
|
||||
|
||||
def parse_etl_log(log_path: Path) -> dict:
|
||||
"""解析 ETL 日志,提取任务执行结果"""
|
||||
results = {}
|
||||
current_task = None
|
||||
task_start_times = {}
|
||||
|
||||
with open(log_path, "r", encoding="utf-8") as f:
|
||||
for line in f:
|
||||
# 匹配任务开始
|
||||
m = re.match(r"(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}).*开始执行(\w+) \(ODS\)", line)
|
||||
if m:
|
||||
current_task = m.group(2)
|
||||
task_start_times[current_task] = m.group(1)
|
||||
continue
|
||||
|
||||
# 匹配 ODS 任务完成
|
||||
m = re.match(r"(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}).*?(\w+) ODS 任务完成: (\{.*\})", line)
|
||||
if m:
|
||||
task_name = m.group(2)
|
||||
end_time = m.group(1)
|
||||
try:
|
||||
counts = eval(m.group(3))
|
||||
except Exception:
|
||||
counts = {}
|
||||
start_time = task_start_times.get(task_name, "")
|
||||
results[task_name] = {
|
||||
"status": "SUCC",
|
||||
"layer": "ODS",
|
||||
"start": start_time,
|
||||
"end": end_time,
|
||||
"counts": counts,
|
||||
}
|
||||
continue
|
||||
|
||||
# 匹配 DWD 完成
|
||||
m = re.match(r"(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}).*DWD_LOAD_FROM_ODS.*完成.*?(\d+).*表", line)
|
||||
if m:
|
||||
results["DWD_LOAD_FROM_ODS"] = {
|
||||
"status": "SUCC",
|
||||
"layer": "DWD",
|
||||
"end": m.group(1),
|
||||
}
|
||||
continue
|
||||
|
||||
# 匹配 DWS/INDEX 成功
|
||||
m = re.match(r"(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}).*?(\w+): 工具类任务执行成功", line)
|
||||
if m:
|
||||
results[m.group(2)] = {
|
||||
"status": "SUCC",
|
||||
"layer": "DWS/INDEX",
|
||||
"end": m.group(1),
|
||||
}
|
||||
continue
|
||||
|
||||
# 匹配任务失败
|
||||
m = re.match(r"(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}).*?(\w+): 工具类任务执行失败: (.*)", line)
|
||||
if m:
|
||||
results[m.group(2)] = {
|
||||
"status": "FAIL",
|
||||
"layer": "DWS/INDEX",
|
||||
"end": m.group(1),
|
||||
"error": m.group(3)[:120],
|
||||
}
|
||||
continue
|
||||
|
||||
# 匹配"未启用或不存在"
|
||||
m = re.match(r".*任务 (\w+) 未启用或不存在", line)
|
||||
if m:
|
||||
results[m.group(1)] = {
|
||||
"status": "SKIP",
|
||||
"layer": "DWS",
|
||||
"error": "未注册",
|
||||
}
|
||||
|
||||
return results
|
||||
|
||||
# ── 2. API 输入侧:统计 JSON 落地行数 ────────────────────────────────
|
||||
def count_api_json_records(task_name: str) -> int | None:
|
||||
"""统计某个 ODS 任务最新一次 JSON 落地的总记录数"""
|
||||
task_dir = JSON_ROOT / task_name
|
||||
if not task_dir.exists():
|
||||
return None
|
||||
# 找最新的子目录(按名称排序,格式 TASK-SITEID-DATE-TIME)
|
||||
subdirs = sorted(task_dir.iterdir(), key=lambda p: p.name, reverse=True)
|
||||
# 只取今天的
|
||||
today_str = NOW.strftime("%Y%m%d")
|
||||
for sd in subdirs:
|
||||
if today_str in sd.name and sd.is_dir():
|
||||
total = 0
|
||||
for jf in sd.glob("*.json"):
|
||||
try:
|
||||
data = json.loads(jf.read_text(encoding="utf-8"))
|
||||
if isinstance(data, list):
|
||||
total += len(data)
|
||||
elif isinstance(data, dict):
|
||||
# 可能是 {"data": {"list": [...]}} 格式
|
||||
lst = data.get("data", {}).get("list", data.get("data", []))
|
||||
if isinstance(lst, list):
|
||||
total += len(lst)
|
||||
else:
|
||||
total += 1
|
||||
except Exception:
|
||||
pass
|
||||
return total
|
||||
return None
|
||||
|
||||
|
||||
# ── 3. DB 输出侧:各层行数统计 ───────────────────────────────────────
|
||||
ODS_TABLES = [
|
||||
"assistant_accounts_master", "assistant_service_records",
|
||||
"settlement_records",
|
||||
"table_fee_transactions", "table_fee_discount_records",
|
||||
"site_tables_master", "payment_transactions", "refund_transactions",
|
||||
"platform_coupon_redemption_records", "member_profiles",
|
||||
"member_stored_value_cards", "member_balance_changes",
|
||||
"recharge_settlements", "group_buy_packages",
|
||||
"group_buy_redemption_records", "goods_stock_summary",
|
||||
"goods_stock_movements", "stock_goods_category_tree",
|
||||
"store_goods_master", "store_goods_sales_records", "tenant_goods_master",
|
||||
]
|
||||
|
||||
# ODS 任务名 → ODS 表名映射
|
||||
ODS_TASK_TO_TABLE = {
|
||||
"ODS_ASSISTANT_ACCOUNT": "assistant_accounts_master",
|
||||
"ODS_ASSISTANT_LEDGER": "assistant_service_records",
|
||||
"ODS_SETTLEMENT_RECORDS": "settlement_records",
|
||||
"ODS_TABLE_USE": "table_fee_transactions",
|
||||
"ODS_TABLE_FEE_DISCOUNT": "table_fee_discount_records",
|
||||
"ODS_TABLES": "site_tables_master",
|
||||
"ODS_PAYMENT": "payment_transactions",
|
||||
"ODS_REFUND": "refund_transactions",
|
||||
"ODS_PLATFORM_COUPON": "platform_coupon_redemption_records",
|
||||
"ODS_MEMBER": "member_profiles",
|
||||
"ODS_MEMBER_CARD": "member_stored_value_cards",
|
||||
"ODS_MEMBER_BALANCE": "member_balance_changes",
|
||||
"ODS_RECHARGE_SETTLE": "recharge_settlements",
|
||||
"ODS_GROUP_PACKAGE": "group_buy_packages",
|
||||
"ODS_GROUP_BUY_REDEMPTION": "group_buy_redemption_records",
|
||||
"ODS_INVENTORY_STOCK": "goods_stock_summary",
|
||||
"ODS_INVENTORY_CHANGE": "goods_stock_movements",
|
||||
"ODS_GOODS_CATEGORY": "stock_goods_category_tree",
|
||||
"ODS_STORE_GOODS": "store_goods_master",
|
||||
"ODS_STORE_GOODS_SALES": "store_goods_sales_records",
|
||||
"ODS_TENANT_GOODS": "tenant_goods_master",
|
||||
}
|
||||
|
||||
def query_row_counts(conn, schema: str, tables: list[str]) -> dict[str, int]:
|
||||
"""批量查询各表行数"""
|
||||
result = {}
|
||||
with conn.cursor() as cur:
|
||||
for t in tables:
|
||||
try:
|
||||
cur.execute(f"SELECT COUNT(*) FROM {schema}.{t}")
|
||||
result[t] = cur.fetchone()[0]
|
||||
except Exception:
|
||||
conn.rollback()
|
||||
result[t] = -1
|
||||
return result
|
||||
|
||||
def query_dwd_tables(conn) -> list[str]:
|
||||
"""获取 dwd schema 下所有表"""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""
|
||||
SELECT table_name FROM information_schema.tables
|
||||
WHERE table_schema = 'dwd' AND table_type = 'BASE TABLE'
|
||||
ORDER BY table_name
|
||||
""")
|
||||
return [r[0] for r in cur.fetchall()]
|
||||
|
||||
def query_dws_tables(conn) -> list[str]:
|
||||
"""获取 dws schema 下所有表"""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""
|
||||
SELECT table_name FROM information_schema.tables
|
||||
WHERE table_schema = 'dws' AND table_type = 'BASE TABLE'
|
||||
ORDER BY table_name
|
||||
""")
|
||||
return [r[0] for r in cur.fetchall()]
|
||||
|
||||
# ── 4. ODS vs DWD 行数对比 ───────────────────────────────────────────
|
||||
# DwdLoadTask.TABLE_MAP 的简化版(dwd_table → ods_table)
|
||||
DWD_TO_ODS = {
|
||||
# 维度表 dim_*
|
||||
"dwd.dim_assistant": "ods.assistant_accounts_master",
|
||||
"dwd.dim_member": "ods.member_profiles",
|
||||
"dwd.dim_member_card_account": "ods.member_stored_value_cards",
|
||||
"dwd.dim_table": "ods.site_tables_master",
|
||||
"dwd.dim_groupbuy_package": "ods.group_buy_packages",
|
||||
"dwd.dim_store_goods": "ods.store_goods_master",
|
||||
"dwd.dim_tenant_goods": "ods.tenant_goods_master",
|
||||
"dwd.dim_goods_category": "ods.stock_goods_category_tree",
|
||||
# 事实表 dwd_*
|
||||
"dwd.dwd_assistant_service_log": "ods.assistant_service_records",
|
||||
"dwd.dwd_member_balance_change": "ods.member_balance_changes",
|
||||
"dwd.dwd_recharge_order": "ods.recharge_settlements",
|
||||
"dwd.dwd_settlement_head": "ods.settlement_records",
|
||||
"dwd.dwd_table_fee_log": "ods.table_fee_transactions",
|
||||
"dwd.dwd_table_fee_adjust": "ods.table_fee_discount_records",
|
||||
"dwd.dwd_payment": "ods.payment_transactions",
|
||||
"dwd.dwd_refund": "ods.refund_transactions",
|
||||
"dwd.dwd_platform_coupon_redemption": "ods.platform_coupon_redemption_records",
|
||||
"dwd.dwd_groupbuy_redemption": "ods.group_buy_redemption_records",
|
||||
"dwd.dwd_store_goods_sale": "ods.store_goods_sales_records",
|
||||
}
|
||||
|
||||
def compare_ods_dwd(conn) -> list[dict]:
|
||||
"""对比 ODS 与 DWD 行数"""
|
||||
rows = []
|
||||
with conn.cursor() as cur:
|
||||
for dwd_full, ods_full in sorted(DWD_TO_ODS.items()):
|
||||
dwd_s, dwd_t = dwd_full.split(".")
|
||||
ods_s, ods_t = ods_full.split(".")
|
||||
try:
|
||||
# ODS: 去重 id 计数(因为 content_hash 变化会产生多行)
|
||||
cur.execute(f"SELECT COUNT(DISTINCT id) FROM {ods_s}.{ods_t}")
|
||||
ods_distinct = cur.fetchone()[0]
|
||||
cur.execute(f"SELECT COUNT(*) FROM {ods_s}.{ods_t}")
|
||||
ods_total = cur.fetchone()[0]
|
||||
except Exception:
|
||||
conn.rollback()
|
||||
ods_distinct = -1
|
||||
ods_total = -1
|
||||
try:
|
||||
cur.execute(f"SELECT COUNT(*) FROM {dwd_s}.{dwd_t}")
|
||||
dwd_count = cur.fetchone()[0]
|
||||
except Exception:
|
||||
conn.rollback()
|
||||
dwd_count = -1
|
||||
|
||||
# dim 表用 SCD2,行数可能 > ODS distinct id
|
||||
# fact 表行数应 ≈ ODS distinct id
|
||||
is_dim = dwd_t.startswith("dim_")
|
||||
rows.append({
|
||||
"dwd_table": dwd_full,
|
||||
"ods_table": ods_full,
|
||||
"ods_total_rows": ods_total,
|
||||
"ods_distinct_ids": ods_distinct,
|
||||
"dwd_rows": dwd_count,
|
||||
"type": "维度(SCD2)" if is_dim else "事实",
|
||||
"ratio": round(dwd_count / ods_distinct, 2) if ods_distinct > 0 else "N/A",
|
||||
})
|
||||
return rows
|
||||
|
||||
|
||||
# ── 5. DWD 数据质量抽样 ──────────────────────────────────────────────
|
||||
def check_dwd_null_rates(conn, tables: list[str], sample_cols: int = 5) -> list[dict]:
|
||||
"""检查 DWD 表关键列的 NULL 率"""
|
||||
results = []
|
||||
with conn.cursor() as cur:
|
||||
for t in tables:
|
||||
try:
|
||||
cur.execute(f"""
|
||||
SELECT column_name FROM information_schema.columns
|
||||
WHERE table_schema = 'dwd' AND table_name = %s
|
||||
AND column_name NOT IN ('created_at','updated_at','fetched_at','content_hash','record_index','source_file','source_endpoint','payload')
|
||||
ORDER BY ordinal_position
|
||||
LIMIT %s
|
||||
""", (t, sample_cols))
|
||||
cols = [r[0] for r in cur.fetchall()]
|
||||
if not cols:
|
||||
continue
|
||||
|
||||
cur.execute(f"SELECT COUNT(*) FROM dwd.{t}")
|
||||
total = cur.fetchone()[0]
|
||||
if total == 0:
|
||||
results.append({"table": t, "total": 0, "null_cols": "空表"})
|
||||
continue
|
||||
|
||||
null_info = []
|
||||
for c in cols:
|
||||
cur.execute(f"SELECT COUNT(*) FROM dwd.{t} WHERE {c} IS NULL")
|
||||
null_count = cur.fetchone()[0]
|
||||
rate = round(null_count / total * 100, 1)
|
||||
if rate > 0:
|
||||
null_info.append(f"{c}={rate}%")
|
||||
|
||||
results.append({
|
||||
"table": t,
|
||||
"total": total,
|
||||
"null_cols": ", ".join(null_info) if null_info else "无 NULL",
|
||||
})
|
||||
except Exception as e:
|
||||
conn.rollback()
|
||||
results.append({"table": t, "total": -1, "null_cols": str(e)[:80]})
|
||||
return results
|
||||
|
||||
|
||||
# ── 6. DWS 汇总合理性检查 ────────────────────────────────────────────
|
||||
def check_dws_sanity(conn) -> list[dict]:
|
||||
"""DWS 表基本合理性检查"""
|
||||
checks = []
|
||||
with conn.cursor() as cur:
|
||||
# 检查各 DWS 表行数和最新数据日期
|
||||
dws_tables = query_dws_tables(conn)
|
||||
for t in dws_tables:
|
||||
try:
|
||||
cur.execute(f"SELECT COUNT(*) FROM dws.{t}")
|
||||
cnt = cur.fetchone()[0]
|
||||
|
||||
# 尝试找日期列
|
||||
date_col = None
|
||||
for candidate in ["stat_date", "salary_month", "report_date", "calc_date", "snapshot_date", "stock_date"]:
|
||||
cur.execute("""
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_schema='dws' AND table_name=%s AND column_name=%s
|
||||
""", (t, candidate))
|
||||
if cur.fetchone():
|
||||
date_col = candidate
|
||||
break
|
||||
|
||||
latest = None
|
||||
if date_col and cnt > 0:
|
||||
cur.execute(f"SELECT MAX({date_col}) FROM dws.{t}")
|
||||
latest = cur.fetchone()[0]
|
||||
|
||||
checks.append({
|
||||
"table": f"dws.{t}",
|
||||
"rows": cnt,
|
||||
"latest_date": str(latest) if latest else "N/A",
|
||||
"status": "✅" if cnt > 0 else "⚠️ 空表",
|
||||
})
|
||||
except Exception as e:
|
||||
conn.rollback()
|
||||
checks.append({
|
||||
"table": f"dws.{t}",
|
||||
"rows": -1,
|
||||
"latest_date": "ERROR",
|
||||
"status": f"❌ {str(e)[:60]}",
|
||||
})
|
||||
return checks
|
||||
|
||||
# ── 7. API JSON 输入侧 vs ODS 行数对比 ───────────────────────────────
|
||||
def compare_api_vs_ods(conn, log_results: dict) -> list[dict]:
|
||||
"""对比 API JSON 落地记录数 vs ODS 表行数(仅本次 ETL 涉及的任务)"""
|
||||
rows = []
|
||||
with conn.cursor() as cur:
|
||||
for task_name, ods_table in sorted(ODS_TASK_TO_TABLE.items()):
|
||||
log_info = log_results.get(task_name, {})
|
||||
api_fetched = log_info.get("counts", {}).get("fetched", None)
|
||||
api_json_count = count_api_json_records(task_name)
|
||||
|
||||
try:
|
||||
cur.execute(f"SELECT COUNT(*) FROM ods.{ods_table}")
|
||||
ods_total = cur.fetchone()[0]
|
||||
cur.execute(f"SELECT COUNT(DISTINCT id) FROM ods.{ods_table}")
|
||||
ods_distinct = cur.fetchone()[0]
|
||||
except Exception:
|
||||
conn.rollback()
|
||||
ods_total = -1
|
||||
ods_distinct = -1
|
||||
|
||||
status = log_info.get("status", "N/A")
|
||||
rows.append({
|
||||
"task": task_name,
|
||||
"ods_table": ods_table,
|
||||
"api_fetched": api_fetched if api_fetched is not None else "N/A",
|
||||
"json_records": api_json_count if api_json_count is not None else "N/A",
|
||||
"ods_total": ods_total,
|
||||
"ods_distinct": ods_distinct,
|
||||
"etl_status": status,
|
||||
})
|
||||
return rows
|
||||
|
||||
|
||||
# ── 8. 生成 Markdown 报告 ────────────────────────────────────────────
|
||||
def generate_report(
|
||||
log_path: Path | None,
|
||||
log_results: dict,
|
||||
api_vs_ods: list[dict],
|
||||
ods_dwd_compare: list[dict],
|
||||
dwd_quality: list[dict],
|
||||
dws_sanity: list[dict],
|
||||
dws_row_counts: dict[str, int],
|
||||
) -> str:
|
||||
lines = []
|
||||
lines.append(f"# 黑盒集成测试报告")
|
||||
lines.append(f"")
|
||||
lines.append(f"生成时间: {NOW.strftime('%Y-%m-%d %H:%M:%S')}")
|
||||
lines.append(f"ETL 日志: `{log_path.name if log_path else 'N/A'}`")
|
||||
lines.append(f"测试窗口: 2025-11-01 ~ 2026-02-20 (full_window 模式)")
|
||||
lines.append(f"")
|
||||
|
||||
# ── 总览 ──
|
||||
succ = sum(1 for v in log_results.values() if v.get("status") == "SUCC")
|
||||
fail = sum(1 for v in log_results.values() if v.get("status") == "FAIL")
|
||||
skip = sum(1 for v in log_results.values() if v.get("status") == "SKIP")
|
||||
lines.append(f"## 1. ETL 执行总览")
|
||||
lines.append(f"")
|
||||
lines.append(f"| 指标 | 值 |")
|
||||
lines.append(f"|------|-----|")
|
||||
lines.append(f"| 成功任务 | {succ} |")
|
||||
lines.append(f"| 失败任务 | {fail} |")
|
||||
lines.append(f"| 跳过任务 | {skip} |")
|
||||
lines.append(f"| 总计 | {len(log_results)} |")
|
||||
lines.append(f"")
|
||||
|
||||
# 失败详情
|
||||
if fail > 0:
|
||||
lines.append(f"### 失败任务详情")
|
||||
lines.append(f"")
|
||||
for k, v in log_results.items():
|
||||
if v.get("status") == "FAIL":
|
||||
lines.append(f"- **{k}**: {v.get('error', '未知错误')}")
|
||||
lines.append(f"")
|
||||
|
||||
# 跳过详情
|
||||
if skip > 0:
|
||||
lines.append(f"### 跳过任务(未注册)")
|
||||
lines.append(f"")
|
||||
for k, v in log_results.items():
|
||||
if v.get("status") == "SKIP":
|
||||
lines.append(f"- {k}")
|
||||
lines.append(f"")
|
||||
|
||||
# ── API 输入侧 vs ODS 输出侧 ──
|
||||
lines.append(f"## 2. 输入侧(API)vs 输出侧(ODS)对比")
|
||||
lines.append(f"")
|
||||
lines.append(f"| 任务 | ODS 表 | API 抓取数 | ODS 总行数 | ODS 去重ID | ETL 状态 |")
|
||||
lines.append(f"|------|--------|-----------|-----------|-----------|---------|")
|
||||
for r in api_vs_ods:
|
||||
lines.append(
|
||||
f"| {r['task']} | {r['ods_table']} | {r['api_fetched']} "
|
||||
f"| {r['ods_total']} | {r['ods_distinct']} | {r['etl_status']} |"
|
||||
)
|
||||
lines.append(f"")
|
||||
lines.append(f"> 说明: ODS 采用快照模式 (id, content_hash) 为 PK,content_hash 变化产生新行,")
|
||||
lines.append(f"> 因此 ODS 总行数 ≥ ODS 去重 ID 数。API 抓取数 = 本次 ETL 从 API 获取的记录数。")
|
||||
lines.append(f"")
|
||||
|
||||
# ── ODS vs DWD ──
|
||||
lines.append(f"## 3. ODS → DWD 行数对比")
|
||||
lines.append(f"")
|
||||
lines.append(f"| DWD 表 | ODS 表 | 类型 | ODS 总行 | ODS 去重ID | DWD 行数 | 比率 |")
|
||||
lines.append(f"|--------|--------|------|---------|-----------|---------|------|")
|
||||
for r in ods_dwd_compare:
|
||||
lines.append(
|
||||
f"| {r['dwd_table']} | {r['ods_table']} | {r['type']} "
|
||||
f"| {r['ods_total_rows']} | {r['ods_distinct_ids']} "
|
||||
f"| {r['dwd_rows']} | {r['ratio']} |"
|
||||
)
|
||||
lines.append(f"")
|
||||
lines.append(f"> 说明: 维度表(SCD2)的 DWD 行数可能 > ODS 去重 ID(历史版本保留)。")
|
||||
lines.append(f"> 事实表的 DWD 行数应 ≈ ODS 去重 ID 数。比率 = DWD行数 / ODS去重ID。")
|
||||
lines.append(f"")
|
||||
|
||||
# ── DWD 数据质量 ──
|
||||
lines.append(f"## 4. DWD 数据质量(NULL 率抽样)")
|
||||
lines.append(f"")
|
||||
lines.append(f"| DWD 表 | 总行数 | NULL 列情况 |")
|
||||
lines.append(f"|--------|--------|------------|")
|
||||
for r in dwd_quality:
|
||||
lines.append(f"| dwd.{r['table']} | {r['total']} | {r['null_cols']} |")
|
||||
lines.append(f"")
|
||||
|
||||
# ── DWS 汇总 ──
|
||||
lines.append(f"## 5. DWS 汇总层检查")
|
||||
lines.append(f"")
|
||||
lines.append(f"| DWS 表 | 行数 | 最新日期 | 状态 |")
|
||||
lines.append(f"|--------|------|---------|------|")
|
||||
for r in dws_sanity:
|
||||
lines.append(f"| {r['table']} | {r['rows']} | {r['latest_date']} | {r['status']} |")
|
||||
lines.append(f"")
|
||||
|
||||
# ── 结论 ──
|
||||
lines.append(f"## 6. 结论")
|
||||
lines.append(f"")
|
||||
total_ods_ok = sum(1 for r in api_vs_ods if r["etl_status"] == "SUCC")
|
||||
total_dwd_ok = sum(1 for r in ods_dwd_compare if r["dwd_rows"] > 0)
|
||||
total_dws_ok = sum(1 for r in dws_sanity if r["rows"] > 0)
|
||||
total_dws_all = len(dws_sanity)
|
||||
|
||||
lines.append(f"- ODS 层: {total_ods_ok}/{len(api_vs_ods)} 个任务成功入库")
|
||||
lines.append(f"- DWD 层: {total_dwd_ok}/{len(ods_dwd_compare)} 个表有数据")
|
||||
lines.append(f"- DWS 层: {total_dws_ok}/{total_dws_all} 个表有数据")
|
||||
lines.append(f"- 失败任务: {fail} 个(详见第 1 节)")
|
||||
lines.append(f"- 跳过任务: {skip} 个(未注册的 DWS 任务)")
|
||||
lines.append(f"")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
# ── main ──────────────────────────────────────────────────────────────
|
||||
def main():
|
||||
print("=== 黑盒集成测试报告生成 ===")
|
||||
|
||||
# 1. 解析日志
|
||||
log_path = find_latest_log()
|
||||
if log_path:
|
||||
print(f"解析日志: {log_path.name}")
|
||||
log_results = parse_etl_log(log_path)
|
||||
print(f" 解析到 {len(log_results)} 个任务结果")
|
||||
else:
|
||||
print("未找到 ETL 日志")
|
||||
log_results = {}
|
||||
|
||||
# 2. 连接数据库
|
||||
print("连接数据库...")
|
||||
conn = psycopg2.connect(DSN)
|
||||
conn.autocommit = True
|
||||
|
||||
try:
|
||||
# 3. API vs ODS
|
||||
print("对比 API 输入侧 vs ODS...")
|
||||
api_vs_ods = compare_api_vs_ods(conn, log_results)
|
||||
|
||||
# 4. ODS vs DWD
|
||||
print("对比 ODS vs DWD...")
|
||||
ods_dwd_compare = compare_ods_dwd(conn)
|
||||
|
||||
# 5. DWD 质量
|
||||
print("检查 DWD 数据质量...")
|
||||
dwd_tables = query_dwd_tables(conn)
|
||||
dwd_quality = check_dwd_null_rates(conn, dwd_tables)
|
||||
|
||||
# 6. DWS 合理性
|
||||
print("检查 DWS 汇总层...")
|
||||
dws_sanity = check_dws_sanity(conn)
|
||||
dws_tables = query_dws_tables(conn)
|
||||
dws_row_counts = query_row_counts(conn, "dws", dws_tables)
|
||||
|
||||
# 7. 生成报告
|
||||
print("生成报告...")
|
||||
report_md = generate_report(
|
||||
log_path, log_results,
|
||||
api_vs_ods, ods_dwd_compare,
|
||||
dwd_quality, dws_sanity, dws_row_counts,
|
||||
)
|
||||
|
||||
out_path = REPORT_ROOT / f"blackbox_report_{TS}.md"
|
||||
out_path.write_text(report_md, encoding="utf-8")
|
||||
print(f"报告已生成: {out_path}")
|
||||
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
59
scripts/ops/check_dws_cfg_rules.py
Normal file
59
scripts/ops/check_dws_cfg_rules.py
Normal file
@@ -0,0 +1,59 @@
|
||||
"""查询测试库 DWS 层 cfg_* 配置表的内容"""
|
||||
import os
|
||||
from pathlib import Path
|
||||
from dotenv import load_dotenv
|
||||
import psycopg2
|
||||
import psycopg2.extras
|
||||
|
||||
load_dotenv(Path(__file__).resolve().parents[2] / ".env")
|
||||
|
||||
dsn = os.environ.get("TEST_DB_DSN")
|
||||
if not dsn:
|
||||
raise RuntimeError("TEST_DB_DSN 未配置")
|
||||
|
||||
conn = psycopg2.connect(dsn)
|
||||
|
||||
with conn.cursor() as cur:
|
||||
# 1. 列出所有 cfg_ 表
|
||||
cur.execute("""
|
||||
SELECT table_name
|
||||
FROM information_schema.tables
|
||||
WHERE table_schema = 'dws' AND table_name LIKE 'cfg_%%'
|
||||
ORDER BY table_name
|
||||
""")
|
||||
tables = [r[0] for r in cur.fetchall()]
|
||||
print(f"=== DWS cfg_* 配置表列表 ({len(tables)} 张) ===")
|
||||
for t in tables:
|
||||
print(f" - {t}")
|
||||
print()
|
||||
|
||||
# 2. 逐表查询内容
|
||||
for t in tables:
|
||||
cur.execute(f"SELECT count(*) FROM dws.{t}")
|
||||
cnt = cur.fetchone()[0]
|
||||
print(f"\n{'='*60}")
|
||||
print(f"表: dws.{t} (共 {cnt} 条记录)")
|
||||
print('='*60)
|
||||
|
||||
if cnt == 0:
|
||||
print(" (空表)")
|
||||
continue
|
||||
|
||||
cur.execute(f"SELECT * FROM dws.{t} ORDER BY 1")
|
||||
cols = [desc[0] for desc in cur.description]
|
||||
rows = cur.fetchall()
|
||||
|
||||
# 打印列头
|
||||
print(" " + " | ".join(cols))
|
||||
print(" " + "-" * (len(" | ".join(cols)) + 10))
|
||||
|
||||
for row in rows:
|
||||
vals = []
|
||||
for v in row:
|
||||
if v is None:
|
||||
vals.append("NULL")
|
||||
else:
|
||||
vals.append(str(v))
|
||||
print(" " + " | ".join(vals))
|
||||
|
||||
conn.close()
|
||||
@@ -23,7 +23,6 @@ expected = [
|
||||
"idx_ods_settlement_records_latest",
|
||||
"idx_ods_table_fee_transactions_latest",
|
||||
"idx_ods_assistant_service_records_latest",
|
||||
"idx_ods_assistant_cancellation_records_latest",
|
||||
"idx_ods_store_goods_sales_records_latest",
|
||||
"idx_ods_payment_transactions_latest",
|
||||
"idx_ods_refund_transactions_latest",
|
||||
@@ -41,7 +40,6 @@ expected = [
|
||||
"idx_ods_store_goods_master_latest",
|
||||
"idx_ods_table_fee_discount_records_latest",
|
||||
"idx_ods_tenant_goods_master_latest",
|
||||
"idx_ods_settlement_ticket_details_latest",
|
||||
]
|
||||
|
||||
cur.execute("""
|
||||
|
||||
62
scripts/ops/check_v4.py
Normal file
62
scripts/ops/check_v4.py
Normal file
@@ -0,0 +1,62 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""快速检查第四次执行状态。"""
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(Path(__file__).resolve().parents[2] / ".env")
|
||||
|
||||
TOKEN_FILE = Path(__file__).parent / ".monitor_token"
|
||||
BASE = "http://localhost:8000"
|
||||
EXECUTION_ID = "efd4f421-ee10-4244-833f-7b2d68c3c05b"
|
||||
REFRESH_TOKEN = (
|
||||
"eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9."
|
||||
"eyJzdWIiOiIxIiwic2l0ZV9pZCI6Mjc5MDY4NTQxNTQ0MzI2OSwidHlwZSI6InJlZnJlc2giLCJleHAiOjE3NzIyNjM0NjN9."
|
||||
"XYoda5lfxNtTSAGWoLlYhS9cA-hTK9iqK0SqUyn2KV4"
|
||||
)
|
||||
|
||||
# 刷新 token
|
||||
resp = requests.post(f"{BASE}/api/auth/refresh", json={"refresh_token": REFRESH_TOKEN}, timeout=10)
|
||||
if resp.status_code != 200:
|
||||
print(f"刷新失败: {resp.status_code}")
|
||||
sys.exit(1)
|
||||
token = resp.json()["access_token"]
|
||||
TOKEN_FILE.write_text(token, encoding="utf-8")
|
||||
headers = {"Authorization": f"Bearer {token}"}
|
||||
|
||||
# 查询执行历史
|
||||
r = requests.get(f"{BASE}/api/execution/history?limit=5", headers=headers, timeout=15)
|
||||
if r.status_code != 200:
|
||||
print(f"查询失败: {r.status_code} {r.text[:200]}")
|
||||
sys.exit(1)
|
||||
|
||||
target = next((h for h in r.json() if h["id"] == EXECUTION_ID), None)
|
||||
if not target:
|
||||
print("未找到执行记录")
|
||||
sys.exit(1)
|
||||
|
||||
status = target.get("status")
|
||||
dur = target.get("duration_ms")
|
||||
dur_s = f"{dur/1000:.1f}s" if dur else "—"
|
||||
print(f"status={status}, duration={dur_s}, exit_code={target.get('exit_code')}")
|
||||
|
||||
if status in ("success", "failed", "cancelled"):
|
||||
# 拉取日志
|
||||
lr = requests.get(f"{BASE}/api/execution/{EXECUTION_ID}/logs", headers=headers, timeout=30)
|
||||
if lr.status_code == 200:
|
||||
ld = lr.json()
|
||||
from _env_paths import get_output_path
|
||||
out = get_output_path("SYSTEM_LOG_ROOT")
|
||||
outfile = out / "2026-02-21__etl_run_raw_v4.json"
|
||||
outfile.write_text(json.dumps(ld, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
print(f"日志已保存: {outfile}")
|
||||
# 打印 error_log 末尾
|
||||
el = (ld.get("error_log") or "").strip().split("\n")
|
||||
print(f"--- error_log 末尾 50 行 (共 {len(el)} 行) ---")
|
||||
for line in el[-50:]:
|
||||
print(line)
|
||||
else:
|
||||
print(f"日志获取失败: {lr.status_code}")
|
||||
@@ -24,7 +24,8 @@ class AnalyzerConfig:
|
||||
date_to: date | None = None
|
||||
limit: int = 200
|
||||
tables: list[str] | None = None
|
||||
output_dir: Path = field(default_factory=lambda: Path("docs/reports"))
|
||||
# 调用方必须显式传入(从 SYSTEM_ANALYZE_ROOT 环境变量读取)
|
||||
output_dir: Path = field(default_factory=lambda: Path(""))
|
||||
pg_dsn: str = ""
|
||||
api_base: str = ""
|
||||
api_token: str = ""
|
||||
@@ -420,12 +421,41 @@ def dump_collection_results(
|
||||
|
||||
返回 {类别: 目录路径} 的字典。
|
||||
"""
|
||||
json_trees_dir = output_dir / "json_trees"
|
||||
db_schemas_dir = output_dir / "db_schemas"
|
||||
field_mappings_dir = output_dir / "field_mappings"
|
||||
json_trees_dir.mkdir(parents=True, exist_ok=True)
|
||||
db_schemas_dir.mkdir(parents=True, exist_ok=True)
|
||||
field_mappings_dir.mkdir(parents=True, exist_ok=True)
|
||||
# CHANGE 2026-02-21 | 清理旧子目录后重建,避免 Windows 文件锁导致写入失败
|
||||
import shutil as _shutil, time as _time
|
||||
_sub_dirs = ["json_trees", "db_schemas", "field_mappings"]
|
||||
for _name in _sub_dirs:
|
||||
_d = output_dir / _name
|
||||
if _d.exists():
|
||||
try:
|
||||
_shutil.rmtree(_d)
|
||||
except (PermissionError, OSError):
|
||||
# Windows 文件锁:无法删除也无法遍历,跳过(后面用备选名)
|
||||
pass
|
||||
# Windows rmtree 后句柄可能未释放,等待后再 mkdir
|
||||
_time.sleep(1)
|
||||
|
||||
def _ensure_writable_dir(base: Path, name: str) -> Path:
|
||||
"""确保目录可写,如果被锁则用带后缀的备选名"""
|
||||
d = base / name
|
||||
for _attempt in range(3):
|
||||
try:
|
||||
d.mkdir(parents=True, exist_ok=True)
|
||||
_test = d / ".write_test"
|
||||
_test.write_text("ok", encoding="utf-8")
|
||||
_test.unlink()
|
||||
return d
|
||||
except (FileNotFoundError, PermissionError, OSError):
|
||||
_time.sleep(1)
|
||||
# 旧目录不可用,用带后缀的新目录
|
||||
d = base / f"{name}_new"
|
||||
d.mkdir(parents=True, exist_ok=True)
|
||||
print(f" [警告] {name}/ 被锁定,使用备选目录 {d.name}/")
|
||||
return d
|
||||
|
||||
json_trees_dir = _ensure_writable_dir(output_dir, "json_trees")
|
||||
db_schemas_dir = _ensure_writable_dir(output_dir, "db_schemas")
|
||||
field_mappings_dir = _ensure_writable_dir(output_dir, "field_mappings")
|
||||
|
||||
# 解析 TABLE_MAP / FACT_MAPPINGS(用于构建字段映射)
|
||||
table_map = parse_table_map()
|
||||
@@ -508,10 +538,18 @@ def dump_collection_results(
|
||||
|
||||
def _write_json(path: Path, data: Any) -> None:
|
||||
"""UTF-8 编码写入 JSON 文件,ensure_ascii=False,indent=2。"""
|
||||
path.write_text(
|
||||
json.dumps(data, ensure_ascii=False, indent=2, default=str),
|
||||
encoding="utf-8",
|
||||
)
|
||||
content = json.dumps(data, ensure_ascii=False, indent=2, default=str)
|
||||
try:
|
||||
path.write_text(content, encoding="utf-8")
|
||||
except PermissionError:
|
||||
# CHANGE 2026-02-21 | Windows 文件锁重试:先删再写
|
||||
import time
|
||||
time.sleep(1)
|
||||
try:
|
||||
path.unlink(missing_ok=True)
|
||||
except PermissionError:
|
||||
pass
|
||||
path.write_text(content, encoding="utf-8")
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
@@ -571,18 +609,6 @@ ODS_SPECS: list[dict] = [
|
||||
"extra_params": {},
|
||||
"description": "助教服务流水",
|
||||
},
|
||||
{
|
||||
"code": "ODS_ASSISTANT_ABOLISH",
|
||||
"table": "assistant_cancellation_records",
|
||||
"dwd_table": "dwd_assistant_trash_event",
|
||||
"endpoint": "/AssistantPerformance/GetAbolitionAssistant",
|
||||
"data_path": ("data",),
|
||||
"list_key": "abolitionAssistants",
|
||||
"time_fields": ("startTime", "endTime"),
|
||||
"requires_window": True,
|
||||
"extra_params": {},
|
||||
"description": "助教废除记录",
|
||||
},
|
||||
{
|
||||
"code": "ODS_STORE_GOODS_SALES",
|
||||
"table": "store_goods_sales_records",
|
||||
@@ -788,18 +814,6 @@ ODS_SPECS: list[dict] = [
|
||||
"extra_params": {},
|
||||
"description": "租户商品档案",
|
||||
},
|
||||
{
|
||||
"code": "ODS_SETTLEMENT_TICKET",
|
||||
"table": "settlement_ticket_details",
|
||||
"dwd_table": None,
|
||||
"endpoint": "/Order/GetOrderSettleTicketNew",
|
||||
"data_path": (),
|
||||
"list_key": None,
|
||||
"time_fields": None,
|
||||
"requires_window": False,
|
||||
"extra_params": {},
|
||||
"description": "结账小票详情(按 orderSettleId 逐条获取,不走常规分页)",
|
||||
},
|
||||
]
|
||||
|
||||
# 默认 list_key 候选(与 APIClient 一致)
|
||||
@@ -896,7 +910,6 @@ def fetch_records(spec: dict, config: AnalyzerConfig) -> list[dict]:
|
||||
|
||||
- 有时间字段的表:从今天往回 10 天一批,不够则继续扩展,最多 10 次重试
|
||||
- 无时间字段的表:单次请求
|
||||
- 特殊表(settlement_ticket_details):跳过
|
||||
|
||||
参数:
|
||||
spec: ODS_SPECS 中的单项配置
|
||||
@@ -912,10 +925,6 @@ def fetch_records(spec: dict, config: AnalyzerConfig) -> list[dict]:
|
||||
extra_params = _resolve_extra_params(spec.get("extra_params", {}), config)
|
||||
target_count = config.limit
|
||||
|
||||
# 结账小票是逐条获取的,跳过
|
||||
if spec["table"] == "settlement_ticket_details":
|
||||
return []
|
||||
|
||||
tz = ZoneInfo("Asia/Shanghai")
|
||||
all_records: list[dict] = []
|
||||
|
||||
@@ -973,8 +982,10 @@ def fetch_records(spec: dict, config: AnalyzerConfig) -> list[dict]:
|
||||
|
||||
import re
|
||||
|
||||
# DWD 加载任务源码的默认路径
|
||||
_DWD_TASK_PY = Path("apps/etl/connectors/feiqiu/tasks/dwd/dwd_load_task.py")
|
||||
# DWD 加载任务源码的默认路径(使用绝对路径,避免 cwd 不在项目根时找不到)
|
||||
# CHANGE 2026-02-21 | 相对路径 → 绝对路径,与 _env_paths 同源
|
||||
_PROJECT_ROOT = Path(__file__).resolve().parents[2]
|
||||
_DWD_TASK_PY = _PROJECT_ROOT / "apps" / "etl" / "connectors" / "feiqiu" / "tasks" / "dwd" / "dwd_load_task.py"
|
||||
|
||||
|
||||
def parse_table_map(py_path: Path | None = None) -> dict[str, str]:
|
||||
@@ -1059,8 +1070,9 @@ def parse_fact_mappings(py_path: Path | None = None) -> dict[str, list[tuple]]:
|
||||
# BD_manual 文档解析:提取字段级业务描述
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
# BD_manual 文档根目录
|
||||
_BD_DOCS_ROOT = Path("apps/etl/connectors/feiqiu/docs/database")
|
||||
# BD_manual 文档根目录(使用绝对路径,与 _DWD_TASK_PY 同源)
|
||||
# CHANGE 2026-02-21 | 相对路径 → 绝对路径,避免 cwd 不在项目根时找不到
|
||||
_BD_DOCS_ROOT = _PROJECT_ROOT / "apps" / "etl" / "connectors" / "feiqiu" / "docs" / "database"
|
||||
|
||||
|
||||
def parse_bd_manual_fields(doc_path: Path) -> dict[str, str]:
|
||||
|
||||
1122
scripts/ops/etl_consistency_check.py
Normal file
1122
scripts/ops/etl_consistency_check.py
Normal file
File diff suppressed because it is too large
Load Diff
207
scripts/ops/export_bug_report.py
Normal file
207
scripts/ops/export_bug_report.py
Normal file
@@ -0,0 +1,207 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
导出 DWS_ASSISTANT_DAILY BUG 修复报告到 SYSTEM_LOG_ROOT。
|
||||
|
||||
用法:python scripts/ops/export_bug_report.py
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
from _env_paths import get_output_path
|
||||
|
||||
REPORT = r"""# DWS_ASSISTANT_DAILY BUG 修复报告
|
||||
|
||||
> 生成时间:{now}
|
||||
> 执行 run_uuid:4ba9d2d365ee4a858f1c4104b1942dc2
|
||||
> 执行开始:2026-02-21 15:29:20
|
||||
|
||||
---
|
||||
|
||||
## 1. BUG 概述
|
||||
|
||||
ETL 执行过程中 `DWS_ASSISTANT_DAILY` 任务失败,根因是 `assistant_daily_task.py` 中
|
||||
`_extract_trash_records` 方法的 SQL 引用了 `dwd.dwd_assistant_trash_event` 表中不存在的字段。
|
||||
|
||||
### 错误信息
|
||||
|
||||
```
|
||||
psycopg2.errors.UndefinedColumn: 错误: 字段 "assistant_service_id" 不存在
|
||||
LINE 3: assistant_service_id,
|
||||
^
|
||||
```
|
||||
|
||||
### 级联影响
|
||||
|
||||
`DWS_ASSISTANT_DAILY` 失败后,psycopg2 连接进入 `InFailedSqlTransaction` 状态,
|
||||
级联导致以下 8 个任务全部失败:
|
||||
|
||||
| # | 任务代码 | 失败原因 |
|
||||
|---|---------|---------|
|
||||
| 1 | DWS_ASSISTANT_DAILY | 根因:UndefinedColumn |
|
||||
| 2 | DWS_ASSISTANT_MONTHLY | InFailedSqlTransaction(级联) |
|
||||
| 3 | DWS_ASSISTANT_CUSTOMER | InFailedSqlTransaction(级联) |
|
||||
| 4 | DWS_ASSISTANT_SALARY | InFailedSqlTransaction(级联) |
|
||||
| 5 | DWS_ASSISTANT_FINANCE | InFailedSqlTransaction(级联) |
|
||||
| 6 | ODS_SETTLEMENT_RECORDS | InFailedSqlTransaction(级联) |
|
||||
| 7 | ODS_PAYMENT | InFailedSqlTransaction(级联) |
|
||||
| 8 | ODS_REFUND | InFailedSqlTransaction(级联) |
|
||||
| 9 | DWS_BUILD_ORDER_SUMMARY | InFailedSqlTransaction(级联) |
|
||||
|
||||
从 `ODS_TABLE_USE` 开始,task_executor 的连接恢复机制生效,后续任务恢复正常执行。
|
||||
|
||||
---
|
||||
|
||||
## 2. 根因分析
|
||||
|
||||
### 2.1 错误 SQL(修复前)
|
||||
|
||||
```sql
|
||||
SELECT assistant_service_id, trash_seconds, trash_reason, trash_time
|
||||
FROM dwd.dwd_assistant_trash_event
|
||||
WHERE site_id = %s AND DATE(trash_time) >= %s AND DATE(trash_time) <= %s
|
||||
```
|
||||
|
||||
### 2.2 `dwd_assistant_trash_event` 实际表结构
|
||||
|
||||
| 字段名 | 类型 | 说明 |
|
||||
|--------|------|------|
|
||||
| assistant_trash_event_id | BIGINT (PK) | 废除事件 ID |
|
||||
| site_id | BIGINT | 门店 ID |
|
||||
| table_id | BIGINT | 台桌 ID |
|
||||
| table_area_id | BIGINT | 区域 ID |
|
||||
| assistant_no | VARCHAR(32) | 助教编号 |
|
||||
| assistant_name | VARCHAR(64) | 助教姓名 |
|
||||
| charge_minutes_raw | INTEGER | 废除时长(分钟) |
|
||||
| abolish_amount | NUMERIC(18,2) | 废除金额 |
|
||||
| trash_reason | VARCHAR(255) | 废除原因 |
|
||||
| create_time | TIMESTAMPTZ | 废除时间 |
|
||||
| tenant_id | BIGINT | 租户 ID |
|
||||
|
||||
### 2.3 字段映射错误
|
||||
|
||||
| 错误引用 | 实际字段 | 说明 |
|
||||
|----------|---------|------|
|
||||
| `assistant_service_id` | `assistant_trash_event_id` | PK 名称不同 |
|
||||
| `trash_seconds` | `charge_minutes_raw` | 单位不同(分钟 vs 秒) |
|
||||
| `trash_time` | `create_time` | 字段名不同 |
|
||||
|
||||
### 2.4 深层设计缺陷
|
||||
|
||||
废除表 `dwd_assistant_trash_event` 没有 `assistant_service_id` 外键,
|
||||
无法与服务记录表 `dwd_assistant_service_log` 做 1:1 关联。
|
||||
|
||||
原代码的 `_build_trash_index` 用 `assistant_service_id` 做 key 构建索引,
|
||||
`_aggregate_by_assistant_date` 用 `service_id in trash_index` 判断服务是否被废除。
|
||||
即使 SQL 字段名修正后,这个匹配逻辑在设计上也是无效的——两个 ID 不同源。
|
||||
|
||||
---
|
||||
|
||||
## 3. 修复方案
|
||||
|
||||
### 3.1 文件
|
||||
|
||||
`apps/etl/connectors/feiqiu/tasks/dws/assistant_daily_task.py`
|
||||
|
||||
### 3.2 修改点(共 4 处)
|
||||
|
||||
#### (1) `_extract_trash_records` — SQL 字段名修正
|
||||
|
||||
```sql
|
||||
-- 修复后
|
||||
SELECT
|
||||
assistant_trash_event_id,
|
||||
charge_minutes_raw * 60 AS trash_seconds,
|
||||
trash_reason,
|
||||
create_time AS trash_time,
|
||||
table_id,
|
||||
assistant_name
|
||||
FROM dwd.dwd_assistant_trash_event
|
||||
WHERE site_id = %s
|
||||
AND DATE(create_time) >= %s
|
||||
AND DATE(create_time) <= %s
|
||||
```
|
||||
|
||||
#### (2) `_extract_service_records` — JOIN _ex 表取 is_trash
|
||||
|
||||
```sql
|
||||
-- 新增 LEFT JOIN 和 is_trash 字段
|
||||
SELECT
|
||||
asl.assistant_service_id,
|
||||
...
|
||||
DATE(asl.start_use_time) AS service_date,
|
||||
COALESCE(ex.is_trash, 0) AS is_trash
|
||||
FROM dwd.dwd_assistant_service_log asl
|
||||
LEFT JOIN dwd.dwd_assistant_service_log_ex ex
|
||||
ON asl.assistant_service_id = ex.assistant_service_id
|
||||
WHERE asl.site_id = %s
|
||||
AND DATE(asl.start_use_time) >= %s
|
||||
AND DATE(asl.start_use_time) <= %s
|
||||
AND asl.is_delete = 0
|
||||
```
|
||||
|
||||
#### (3) `_build_trash_index` — key 改为 assistant_trash_event_id
|
||||
|
||||
```python
|
||||
# 修复前
|
||||
service_id = record.get('assistant_service_id')
|
||||
|
||||
# 修复后
|
||||
event_id = record.get('assistant_trash_event_id')
|
||||
```
|
||||
|
||||
#### (4) `_aggregate_by_assistant_date` — 废除判断改用 is_trash
|
||||
|
||||
```python
|
||||
# 修复前
|
||||
is_trashed = service_id in trash_index
|
||||
|
||||
# 修复后
|
||||
is_trashed = bool(record.get('is_trash', 0))
|
||||
```
|
||||
|
||||
废除时长也从 `trash_index[service_id]` 改为直接用 `income_seconds`。
|
||||
|
||||
### 3.3 设计决策说明
|
||||
|
||||
`dwd_assistant_service_log_ex` 表的 `is_trash` 字段来自上游 SaaS 系统的
|
||||
`assistant_service_records` API,是服务记录级别的废除标记,比跨表匹配更可靠。
|
||||
|
||||
废除时长统计改用服务记录自身的 `income_seconds`(即该服务的计费时长),
|
||||
而非从废除表取 `charge_minutes_raw`(废除事件的计费分钟数),
|
||||
因为两者无法 1:1 关联。
|
||||
|
||||
---
|
||||
|
||||
## 4. 验证计划
|
||||
|
||||
修复将在下次 ETL 执行时生效。验证步骤:
|
||||
|
||||
1. 重新提交包含 `DWS_ASSISTANT_DAILY` 的执行
|
||||
2. 确认无 SQL 错误
|
||||
3. 检查 `dws.dws_assistant_daily` 表中 `trashed_count` / `trashed_seconds` 是否合理
|
||||
4. 对比 `dwd_assistant_service_log_ex.is_trash = 1` 的记录数与 DWS 汇总的 `trashed_count`
|
||||
|
||||
---
|
||||
|
||||
## 5. 回滚方案
|
||||
|
||||
如需回滚,恢复 `assistant_daily_task.py` 到修改前版本即可。
|
||||
DWS 表数据可通过重新执行 `DWS_ASSISTANT_DAILY` 任务覆盖。
|
||||
"""
|
||||
|
||||
|
||||
def main():
|
||||
out_dir = get_output_path("SYSTEM_LOG_ROOT")
|
||||
now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
content = REPORT.replace("{now}", now)
|
||||
out_file = out_dir / "2026-02-21__dws_assistant_daily_bug_fix.md"
|
||||
out_file.write_text(content, encoding="utf-8")
|
||||
print(f"BUG 修复报告已导出: {out_file}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
223
scripts/ops/export_dwd_field_review.py
Normal file
223
scripts/ops/export_dwd_field_review.py
Normal file
@@ -0,0 +1,223 @@
|
||||
"""
|
||||
导出 DWD 表字段清单(现有 + 拟新增),供人工审查。
|
||||
输出:$FIELD_AUDIT_ROOT/dwd_field_review.md(由 .env 配置)
|
||||
|
||||
对每张涉及的 DWD 表(main + ex),列出:
|
||||
- 现有字段:字段名、数据类型、说明(从 pg_catalog.col_description 获取)
|
||||
- 拟新增字段:字段名、建议类型、来源 ODS 列、说明
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import psycopg2
|
||||
from dotenv import load_dotenv
|
||||
from pathlib import Path
|
||||
|
||||
load_dotenv(Path(__file__).resolve().parents[2] / ".env")
|
||||
|
||||
# 构建 DSN:使用 .env 中的 PG_DSN(指向 test_etl_feiqiu,schema 为 ods/dwd/dws/meta)
|
||||
DSN = os.getenv("PG_DSN")
|
||||
if not DSN:
|
||||
print("ERROR: PG_DSN 未配置"); sys.exit(1)
|
||||
|
||||
# ── 涉及的 DWD 表(schema.table) ──────────────────────────────────
|
||||
TABLES = [
|
||||
# A 类
|
||||
("dwd", "dim_assistant"),
|
||||
("dwd", "dim_assistant_ex"),
|
||||
("dwd", "dwd_assistant_service_log"),
|
||||
("dwd", "dwd_assistant_service_log_ex"),
|
||||
|
||||
("dwd", "dwd_store_goods_sale"),
|
||||
("dwd", "dwd_store_goods_sale_ex"),
|
||||
("dwd", "dwd_member_balance_change"),
|
||||
("dwd", "dwd_member_balance_change_ex"),
|
||||
("dwd", "dim_tenant_goods"),
|
||||
("dwd", "dim_tenant_goods_ex"),
|
||||
("dwd", "dim_table"),
|
||||
("dwd", "dim_table_ex"),
|
||||
# B 类
|
||||
("dwd", "dwd_recharge_order"),
|
||||
("dwd", "dim_store_goods"),
|
||||
("dwd", "dim_store_goods_ex"),
|
||||
]
|
||||
|
||||
# ── 拟新增字段(按 DWD 表分组) ────────────────────────────────────
|
||||
NEW_FIELDS = {
|
||||
"dim_assistant_ex": [
|
||||
("system_role_id", "bigint", "assistant_accounts_master.system_role_id", "系统角色 ID,关联角色权限"),
|
||||
("job_num", "text", "assistant_accounts_master.job_num", "备用工号(当前门店未启用,全 NULL)"),
|
||||
("cx_unit_price", "numeric(18,2)","assistant_accounts_master.cx_unit_price", "促销时段单价(当前值 0.00)"),
|
||||
("pd_unit_price", "numeric(18,2)","assistant_accounts_master.pd_unit_price", "普通时段单价(当前值 0.00)"),
|
||||
],
|
||||
"dwd_assistant_service_log_ex": [
|
||||
("operator_id", "bigint", "assistant_service_records.operator_id", "操作员 ID(如收银员)"),
|
||||
("operator_name", "text", "assistant_service_records.operator_name", "操作员名称快照"),
|
||||
],
|
||||
|
||||
"dwd_member_balance_change_ex": [
|
||||
("relate_id", "bigint", "member_balance_changes.relate_id", "关联充值/订单 ID(0=无关联)"),
|
||||
],
|
||||
"dim_table_ex": [
|
||||
("sitename", "text", "site_tables_master.sitename", "门店名称快照"),
|
||||
("applet_qr_code_url", "text", "site_tables_master.appletqrcodeurl", "小程序二维码 URL(当前全 NULL)"),
|
||||
("audit_status", "integer", "site_tables_master.audit_status", "审核状态枚举"),
|
||||
("charge_free", "integer", "site_tables_master.charge_free", "是否免费(0=否)"),
|
||||
("create_time", "timestamptz", "site_tables_master.create_time", "台桌创建时间"),
|
||||
("delay_lights_time", "integer", "site_tables_master.delay_lights_time", "延迟关灯时间(秒)"),
|
||||
("is_rest_area", "integer", "site_tables_master.is_rest_area", "是否休息区(0=否)"),
|
||||
("light_status", "integer", "site_tables_master.light_status", "灯控状态枚举"),
|
||||
("only_allow_groupon", "integer", "site_tables_master.only_allow_groupon", "是否仅允许团购"),
|
||||
("order_delay_time", "integer", "site_tables_master.order_delay_time", "订单延迟时间(秒)"),
|
||||
("self_table", "integer", "site_tables_master.self_table", "是否自助台桌"),
|
||||
("table_status_name", "text", "site_tables_master.tablestatusname", "台桌状态名称(空闲中/使用中/暂停中)"),
|
||||
("temporary_light_second","integer", "site_tables_master.temporary_light_second","临时灯光秒数"),
|
||||
("virtual_table", "integer", "site_tables_master.virtual_table", "是否虚拟台桌(0=否)"),
|
||||
],
|
||||
"dim_store_goods_ex": [
|
||||
("batch_stock_quantity", "numeric", "store_goods_master.batch_stock_quantity", "批次库存数量"),
|
||||
],
|
||||
}
|
||||
|
||||
# recharge_settlements 仅补映射,不新增列
|
||||
MAPPING_ONLY = {
|
||||
"dwd_recharge_order": [
|
||||
("pl_coupon_sale_amount", "plcouponsaleamount", "平台券销售额"),
|
||||
("mervou_sales_amount", "mervousalesamount", "美团券销售额"),
|
||||
("electricity_money", "electricitymoney", "电费金额"),
|
||||
("real_electricity_money", "realelectricitymoney", "实际电费金额"),
|
||||
("electricity_adjust_money","electricityadjustmoney","电费调整金额"),
|
||||
],
|
||||
}
|
||||
|
||||
# 跳过的字段
|
||||
SKIPPED = [
|
||||
("store_goods_sales_records", "discount_price", "DWD 列名已被 discount_money 占用"),
|
||||
("tenant_goods_master", "commoditycode", "冗余字段,DWD 已有 commodity_code + commodity_code_list"),
|
||||
("store_goods_master", "provisional_total_cost","DWD 列名已被 total_purchase_cost 占用"),
|
||||
("store_goods_master", "time_slot_sale", "ODS 列不存在,需确认 API"),
|
||||
]
|
||||
|
||||
# C 类新建表(仅列出 ODS 列名,后续设计时确定 DWD 列名)
|
||||
C_CLASS_TABLES = {
|
||||
"goods_stock_summary (→ 新建 dwd_goods_stock_summary)": [
|
||||
"sitegoodsid", "goodsname", "goodsunit", "goodscategoryid",
|
||||
"goodscategorysecondid", "categoryname", "rangestartstock",
|
||||
"rangeendstock", "rangein", "rangeout", "rangesale",
|
||||
"rangesalemoney", "rangeinventory", "currentstock",
|
||||
],
|
||||
"goods_stock_movements (→ 新建 dwd_goods_stock_movement)": [
|
||||
"sitegoodsstockid", "tenantid", "siteid", "sitegoodsid",
|
||||
"goodsname", "goodscategoryid", "goodssecondcategoryid",
|
||||
"unit", "price", "stocktype", "changenum", "startnum",
|
||||
"endnum", "changenuma", "startnuma", "endnuma",
|
||||
"remark", "operatorname", "createtime",
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def get_table_columns(cur, schema, table):
|
||||
"""获取表的列信息:列名、类型、注释(使用 pg_catalog 避免 search_path 问题)"""
|
||||
cur.execute("""
|
||||
SELECT a.attname AS column_name,
|
||||
pg_catalog.format_type(a.atttypid, a.atttypmod) AS col_type,
|
||||
COALESCE(d.description, '') AS col_comment
|
||||
FROM pg_catalog.pg_attribute a
|
||||
JOIN pg_catalog.pg_class c ON c.oid = a.attrelid
|
||||
JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
|
||||
LEFT JOIN pg_catalog.pg_description d
|
||||
ON d.objoid = a.attrelid AND d.objsubid = a.attnum
|
||||
WHERE n.nspname = %s
|
||||
AND c.relname = %s
|
||||
AND a.attnum > 0
|
||||
AND NOT a.attisdropped
|
||||
ORDER BY a.attnum
|
||||
""", (schema, table))
|
||||
return cur.fetchall()
|
||||
|
||||
|
||||
def main():
|
||||
conn = psycopg2.connect(DSN)
|
||||
cur = conn.cursor()
|
||||
|
||||
lines = []
|
||||
lines.append("# DWD 表字段清单(现有 + 拟新增)\n")
|
||||
lines.append("> 导出时间:2026-02-19")
|
||||
lines.append("> 用途:供人工审查排查结论,确认新增字段是否合理\n")
|
||||
lines.append("---\n")
|
||||
|
||||
# ── 第一部分:各 DWD 表现有字段 + 拟新增字段 ──
|
||||
lines.append("## 第一部分:A/B 类表(已有 DWD 表)\n")
|
||||
|
||||
for schema, table in TABLES:
|
||||
cols = get_table_columns(cur, schema, table)
|
||||
if not cols:
|
||||
lines.append(f"### {schema}.{table}\n")
|
||||
lines.append("⚠️ 表不存在或无列\n")
|
||||
continue
|
||||
|
||||
lines.append(f"### {schema}.{table}\n")
|
||||
|
||||
# 现有字段
|
||||
lines.append("#### 现有字段\n")
|
||||
lines.append("| # | 字段名 | 数据类型 | 说明 |")
|
||||
lines.append("|---|--------|---------|------|")
|
||||
for i, (col_name, col_type, col_comment) in enumerate(cols, 1):
|
||||
lines.append(f"| {i} | `{col_name}` | {col_type} | {col_comment} |")
|
||||
lines.append("")
|
||||
|
||||
# 拟新增字段
|
||||
new = NEW_FIELDS.get(table, [])
|
||||
if new:
|
||||
lines.append("#### 🆕 拟新增字段\n")
|
||||
lines.append("| # | 字段名 | 建议类型 | 来源 ODS 列 | 说明 |")
|
||||
lines.append("|---|--------|---------|------------|------|")
|
||||
for i, (fname, ftype, fsrc, fdesc) in enumerate(new, 1):
|
||||
lines.append(f"| {i} | `{fname}` | {ftype} | {fsrc} | {fdesc} |")
|
||||
lines.append("")
|
||||
|
||||
# 仅补映射
|
||||
mo = MAPPING_ONLY.get(table, [])
|
||||
if mo:
|
||||
lines.append("#### 🔗 仅补 FACT_MAPPINGS(DWD 列已存在)\n")
|
||||
lines.append("| # | DWD 列 | ODS 列 | 说明 |")
|
||||
lines.append("|---|--------|--------|------|")
|
||||
for i, (dwd_col, ods_col, desc) in enumerate(mo, 1):
|
||||
lines.append(f"| {i} | `{dwd_col}` | `{ods_col}` | {desc} |")
|
||||
lines.append("")
|
||||
|
||||
lines.append("---\n")
|
||||
|
||||
# ── 第二部分:跳过的字段 ──
|
||||
lines.append("## 第二部分:跳过的字段\n")
|
||||
lines.append("| # | ODS 表 | ODS 列 | 跳过原因 |")
|
||||
lines.append("|---|--------|--------|---------|")
|
||||
for i, (tbl, col, reason) in enumerate(SKIPPED, 1):
|
||||
lines.append(f"| {i} | {tbl} | `{col}` | {reason} |")
|
||||
lines.append("\n---\n")
|
||||
|
||||
# ── 第三部分:C 类新建表 ──
|
||||
lines.append("## 第三部分:C 类表(需新建 DWD 表)\n")
|
||||
for title, ods_cols in C_CLASS_TABLES.items():
|
||||
lines.append(f"### {title}\n")
|
||||
lines.append("| # | ODS 列名 |")
|
||||
lines.append("|---|---------|")
|
||||
for i, col in enumerate(ods_cols, 1):
|
||||
lines.append(f"| {i} | `{col}` |")
|
||||
lines.append("")
|
||||
lines.append("---\n")
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
# 写入文件(从 .env 读取 FIELD_AUDIT_ROOT)
|
||||
from _env_paths import get_output_path
|
||||
out_dir = get_output_path("FIELD_AUDIT_ROOT")
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
out_path = out_dir / "dwd_field_review.md"
|
||||
out_path.write_text("\n".join(lines), encoding="utf-8")
|
||||
print(f"✅ 已导出到 {out_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
364
scripts/ops/export_etl_result.py
Normal file
364
scripts/ops/export_etl_result.py
Normal file
@@ -0,0 +1,364 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
从后端 API 获取 ETL 执行日志,解析各任务结果,导出执行结果报告。
|
||||
|
||||
用法:python scripts/ops/export_etl_result.py
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
from _env_paths import get_output_path
|
||||
|
||||
BACKEND_URL = "http://localhost:8000"
|
||||
EXECUTION_ID = "dbf0c29a-253a-4705-a1ef-35cd71243d48"
|
||||
TOKEN_FILE = Path(__file__).parent / ".monitor_token"
|
||||
|
||||
|
||||
def get_token() -> str:
|
||||
if TOKEN_FILE.exists():
|
||||
return TOKEN_FILE.read_text(encoding="utf-8").strip()
|
||||
return ""
|
||||
|
||||
|
||||
def fetch_history(token: str) -> dict:
|
||||
r = requests.get(
|
||||
f"{BACKEND_URL}/api/execution/history",
|
||||
headers={"Authorization": f"Bearer {token}"},
|
||||
params={"limit": 5},
|
||||
timeout=10,
|
||||
)
|
||||
r.raise_for_status()
|
||||
for item in r.json():
|
||||
if item.get("id") == EXECUTION_ID:
|
||||
return item
|
||||
return r.json()[0] if r.json() else {}
|
||||
|
||||
|
||||
def fetch_logs(token: str) -> dict:
|
||||
r = requests.get(
|
||||
f"{BACKEND_URL}/api/execution/{EXECUTION_ID}/logs",
|
||||
headers={"Authorization": f"Bearer {token}"},
|
||||
timeout=30,
|
||||
)
|
||||
r.raise_for_status()
|
||||
return r.json()
|
||||
|
||||
|
||||
|
||||
def parse_log(error_log: str) -> list[dict]:
|
||||
"""从 stderr 日志解析各任务的执行结果和计时"""
|
||||
results = []
|
||||
lines = error_log.split("\n") if error_log else []
|
||||
|
||||
# 正则:提取时间戳
|
||||
ts_re = re.compile(r"\[(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})\]")
|
||||
# 正则:任务开始
|
||||
start_re = re.compile(r"开始执行(\S+)\s+\((\w+)\)")
|
||||
# 正则:ODS 任务完成
|
||||
ods_done_re = re.compile(r"(\S+)\s+ODS 任务完成:\s+(\{.*\})")
|
||||
# 正则:任务失败
|
||||
fail_re = re.compile(r"任务\s+(\S+)\s+失败:\s+(.*)")
|
||||
# 正则:DWS 抓取阶段开始
|
||||
dws_start_re = re.compile(r"(\S+):\s+抓取阶段开始")
|
||||
# 正则:DWS 提取数据
|
||||
dws_extract_re = re.compile(r"(\S+):\s+提取数据")
|
||||
# 正则:DWD 完成
|
||||
dwd_done_re = re.compile(r"(\S+)\s+DWD.*完成|(\S+):\s+DWD.*装载完成")
|
||||
# 正则:工具类任务开始
|
||||
util_start_re = re.compile(r"(\S+):\s+开始执行工具类任务")
|
||||
# 正则:工具类任务失败
|
||||
util_fail_re = re.compile(r"(\S+):\s+工具类任务执行失败")
|
||||
# 正则:DWS/INDEX 任务完成
|
||||
dws_done_re = re.compile(r"(\S+)\s+(?:DWS|INDEX)\s+任务完成")
|
||||
# 正则:窗口拆分
|
||||
window_re = re.compile(r"(\S+):\s+窗口拆分为\s+(\d+)\s+段")
|
||||
|
||||
task_starts: dict[str, str] = {} # task_code -> start_timestamp
|
||||
task_windows: dict[str, int] = {} # task_code -> window_count
|
||||
|
||||
for line in lines:
|
||||
ts_match = ts_re.search(line)
|
||||
ts = ts_match.group(1) if ts_match else ""
|
||||
|
||||
# 任务开始
|
||||
m = start_re.search(line)
|
||||
if m:
|
||||
task_code = m.group(1)
|
||||
task_starts[task_code] = ts
|
||||
continue
|
||||
|
||||
# DWS 抓取阶段开始
|
||||
m = dws_start_re.search(line)
|
||||
if m:
|
||||
task_code = m.group(1)
|
||||
if task_code not in task_starts:
|
||||
task_starts[task_code] = ts
|
||||
continue
|
||||
|
||||
# 工具类任务开始
|
||||
m = util_start_re.search(line)
|
||||
if m:
|
||||
task_code = m.group(1)
|
||||
if task_code not in task_starts:
|
||||
task_starts[task_code] = ts
|
||||
continue
|
||||
|
||||
# 窗口拆分
|
||||
m = window_re.search(line)
|
||||
if m:
|
||||
task_windows[m.group(1)] = int(m.group(2))
|
||||
continue
|
||||
|
||||
# ODS 任务完成
|
||||
m = ods_done_re.search(line)
|
||||
if m:
|
||||
task_code = m.group(1)
|
||||
stats_str = m.group(2)
|
||||
results.append({
|
||||
"task": task_code,
|
||||
"layer": "ODS",
|
||||
"status": "success",
|
||||
"start": task_starts.get(task_code, ""),
|
||||
"end": ts,
|
||||
"windows": task_windows.get(task_code, 0),
|
||||
"stats": stats_str,
|
||||
})
|
||||
continue
|
||||
|
||||
# 任务失败
|
||||
m = fail_re.search(line)
|
||||
if m:
|
||||
task_code = m.group(1)
|
||||
error_msg = m.group(2).strip()
|
||||
# 避免重复记录(级联错误会多次出现)
|
||||
if not any(r["task"] == task_code for r in results):
|
||||
results.append({
|
||||
"task": task_code,
|
||||
"layer": guess_layer(task_code),
|
||||
"status": "failed",
|
||||
"start": task_starts.get(task_code, ""),
|
||||
"end": ts,
|
||||
"windows": task_windows.get(task_code, 0),
|
||||
"error": error_msg[:120],
|
||||
})
|
||||
continue
|
||||
|
||||
# 检查是否有 DWD_LOAD_FROM_ODS 完成的标记
|
||||
for line in lines:
|
||||
if "DWD_LOAD_FROM_ODS" in line and "完成" in line:
|
||||
ts_match = ts_re.search(line)
|
||||
ts = ts_match.group(1) if ts_match else ""
|
||||
if not any(r["task"] == "DWD_LOAD_FROM_ODS" for r in results):
|
||||
results.append({
|
||||
"task": "DWD_LOAD_FROM_ODS",
|
||||
"layer": "DWD",
|
||||
"status": "success",
|
||||
"start": task_starts.get("DWD_LOAD_FROM_ODS", ""),
|
||||
"end": ts,
|
||||
"windows": 0,
|
||||
"stats": "",
|
||||
})
|
||||
break
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def guess_layer(task_code: str) -> str:
|
||||
if task_code.startswith("ODS_"):
|
||||
return "ODS"
|
||||
if task_code.startswith("DWD_"):
|
||||
return "DWD"
|
||||
if task_code.startswith("DWS_"):
|
||||
return "DWS"
|
||||
if task_code.startswith("INDEX_"):
|
||||
return "INDEX"
|
||||
return "OTHER"
|
||||
|
||||
|
||||
|
||||
def calc_duration(start: str, end: str) -> str:
|
||||
"""计算时长"""
|
||||
if not start or not end:
|
||||
return "—"
|
||||
try:
|
||||
fmt = "%Y-%m-%d %H:%M:%S"
|
||||
s = datetime.strptime(start, fmt)
|
||||
e = datetime.strptime(end, fmt)
|
||||
delta = (e - s).total_seconds()
|
||||
if delta < 60:
|
||||
return f"{delta:.1f}s"
|
||||
elif delta < 3600:
|
||||
return f"{delta / 60:.1f}m"
|
||||
else:
|
||||
return f"{delta / 3600:.1f}h"
|
||||
except Exception:
|
||||
return "—"
|
||||
|
||||
|
||||
def generate_report(execution: dict, task_results: list[dict]) -> str:
|
||||
now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
status = execution.get("status", "unknown")
|
||||
started = execution.get("started_at", "—")
|
||||
finished = execution.get("finished_at", "—")
|
||||
duration_ms = execution.get("duration_ms", 0)
|
||||
exit_code = execution.get("exit_code", "—")
|
||||
|
||||
if duration_ms:
|
||||
dur_str = f"{duration_ms / 1000:.1f}s ({duration_ms / 60000:.1f}m)"
|
||||
else:
|
||||
dur_str = "—"
|
||||
|
||||
success_count = sum(1 for r in task_results if r["status"] == "success")
|
||||
failed_count = sum(1 for r in task_results if r["status"] == "failed")
|
||||
|
||||
lines = [
|
||||
"# ETL 执行结果报告",
|
||||
"",
|
||||
f"> 生成时间:{now}",
|
||||
f"> execution_id:{EXECUTION_ID}",
|
||||
f"> run_uuid:4ba9d2d365ee4a858f1c4104b1942dc2",
|
||||
"",
|
||||
"---",
|
||||
"",
|
||||
"## 执行概览",
|
||||
"",
|
||||
"| 项目 | 值 |",
|
||||
"|------|-----|",
|
||||
f"| 状态 | {status} |",
|
||||
f"| 开始时间 | {started} |",
|
||||
f"| 结束时间 | {finished} |",
|
||||
f"| 总时长 | {dur_str} |",
|
||||
f"| 退出码 | {exit_code} |",
|
||||
f"| 任务总数 | {len(execution.get('task_codes', []))} |",
|
||||
f"| 成功 | {success_count} |",
|
||||
f"| 失败 | {failed_count} |",
|
||||
"",
|
||||
"---",
|
||||
"",
|
||||
"## 任务级结果",
|
||||
"",
|
||||
"| # | 任务 | 层 | 状态 | 开始 | 结束 | 耗时 | 窗口数 | 备注 |",
|
||||
"|---|------|-----|------|------|------|------|--------|------|",
|
||||
]
|
||||
|
||||
for i, r in enumerate(task_results, 1):
|
||||
dur = calc_duration(r.get("start", ""), r.get("end", ""))
|
||||
note = r.get("stats", r.get("error", ""))
|
||||
if len(note) > 60:
|
||||
note = note[:57] + "..."
|
||||
win = r.get("windows", 0)
|
||||
win_str = str(win) if win else "—"
|
||||
start_short = r.get("start", "—")
|
||||
if start_short and len(start_short) > 8:
|
||||
start_short = start_short.split(" ")[-1] if " " in start_short else start_short
|
||||
end_short = r.get("end", "—")
|
||||
if end_short and len(end_short) > 8:
|
||||
end_short = end_short.split(" ")[-1] if " " in end_short else end_short
|
||||
|
||||
status_emoji = "✅" if r["status"] == "success" else "❌"
|
||||
lines.append(
|
||||
f"| {i} | {r['task']} | {r['layer']} | {status_emoji} {r['status']} "
|
||||
f"| {start_short} | {end_short} | {dur} | {win_str} | {note} |"
|
||||
)
|
||||
|
||||
lines.extend([
|
||||
"",
|
||||
"---",
|
||||
"",
|
||||
"## 失败任务分析",
|
||||
"",
|
||||
])
|
||||
|
||||
failed_tasks = [r for r in task_results if r["status"] == "failed"]
|
||||
if failed_tasks:
|
||||
root_cause = failed_tasks[0] if failed_tasks else None
|
||||
cascade = failed_tasks[1:] if len(failed_tasks) > 1 else []
|
||||
|
||||
lines.extend([
|
||||
f"### 根因:{root_cause['task']}",
|
||||
"",
|
||||
f"错误:`{root_cause.get('error', '未知')}`",
|
||||
"",
|
||||
"原因:`_extract_trash_records` SQL 引用了 `dwd_assistant_trash_event` 中不存在的字段 `assistant_service_id`。",
|
||||
"",
|
||||
"### 级联失败",
|
||||
"",
|
||||
])
|
||||
|
||||
if cascade:
|
||||
for r in cascade:
|
||||
lines.append(f"- {r['task']}:InFailedSqlTransaction(事务污染)")
|
||||
else:
|
||||
lines.append("无级联失败。")
|
||||
|
||||
lines.extend([
|
||||
"",
|
||||
"### 修复状态",
|
||||
"",
|
||||
"代码已修复(4 处改动),待下次执行验证。",
|
||||
"详见:`export/SYSTEM/LOGS/2026-02-21__dws_assistant_daily_bug_fix.md`",
|
||||
])
|
||||
else:
|
||||
lines.append("无失败任务。")
|
||||
|
||||
lines.extend([
|
||||
"",
|
||||
"---",
|
||||
"",
|
||||
"## 下一步",
|
||||
"",
|
||||
"1. 重新提交包含 9 个失败任务的执行,验证修复",
|
||||
"2. 运行 ETL Data Consistency Check",
|
||||
"3. 运行 /audit 审计",
|
||||
])
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def main():
|
||||
out_dir = get_output_path("SYSTEM_LOG_ROOT")
|
||||
token = get_token()
|
||||
|
||||
print("获取执行历史...")
|
||||
execution = fetch_history(token)
|
||||
print(f" 状态: {execution.get('status')}, 时长: {execution.get('duration_ms', 0) / 1000:.1f}s")
|
||||
|
||||
print("获取执行日志...")
|
||||
logs = fetch_logs(token)
|
||||
error_log = logs.get("error_log", "")
|
||||
print(f" error_log 长度: {len(error_log)} 字符")
|
||||
|
||||
print("解析任务结果...")
|
||||
task_results = parse_log(error_log)
|
||||
print(f" 解析到 {len(task_results)} 个任务结果")
|
||||
|
||||
print("生成报告...")
|
||||
report = generate_report(execution, task_results)
|
||||
out_file = out_dir / "2026-02-21__etl_run_result.md"
|
||||
out_file.write_text(report, encoding="utf-8")
|
||||
print(f"执行结果报告已导出: {out_file}")
|
||||
|
||||
# 保存原始 API 数据
|
||||
raw_file = out_dir / "2026-02-21__etl_run_raw.json"
|
||||
raw_data = {
|
||||
"execution": execution,
|
||||
"error_log_length": len(error_log),
|
||||
"task_results_parsed": task_results,
|
||||
}
|
||||
raw_file.write_text(
|
||||
json.dumps(raw_data, ensure_ascii=False, indent=2, default=str),
|
||||
encoding="utf-8",
|
||||
)
|
||||
print(f"原始数据已导出: {raw_file}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
192
scripts/ops/export_etl_result_v2.py
Normal file
192
scripts/ops/export_etl_result_v2.py
Normal file
@@ -0,0 +1,192 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""导出第二次 ETL 执行结果报告(回归验证)。
|
||||
|
||||
基于 export_etl_result.py 的逻辑,指向新的 execution_id。
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
from _env_paths import get_output_path
|
||||
|
||||
BACKEND_URL = "http://localhost:8000"
|
||||
EXECUTION_ID = "e21e1935-5abf-434f-9984-69c492402db7"
|
||||
TOKEN_FILE = Path(__file__).parent / ".monitor_token"
|
||||
|
||||
# refresh_token(用于自动刷新)
|
||||
REFRESH_TOKEN = (
|
||||
"eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9."
|
||||
"eyJzdWIiOiIxIiwic2l0ZV9pZCI6Mjc5MDY4NTQxNTQ0MzI2OSwidHlwZSI6InJlZnJlc2giLCJleHAiOjE3NzIyNjM0NjN9."
|
||||
"XYoda5lfxNtTSAGWoLlYhS9cA-hTK9iqK0SqUyn2KV4"
|
||||
)
|
||||
|
||||
|
||||
def get_token() -> str:
|
||||
"""刷新并返回 access_token。"""
|
||||
resp = requests.post(
|
||||
f"{BACKEND_URL}/api/auth/refresh",
|
||||
json={"refresh_token": REFRESH_TOKEN},
|
||||
timeout=10,
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
raise RuntimeError(f"刷新 token 失败: {resp.status_code}")
|
||||
token = resp.json()["access_token"]
|
||||
TOKEN_FILE.write_text(token, encoding="utf-8")
|
||||
return token
|
||||
|
||||
|
||||
def fetch_history(token: str) -> dict:
|
||||
r = requests.get(
|
||||
f"{BACKEND_URL}/api/execution/history",
|
||||
headers={"Authorization": f"Bearer {token}"},
|
||||
params={"limit": 10},
|
||||
timeout=10,
|
||||
)
|
||||
r.raise_for_status()
|
||||
for item in r.json():
|
||||
if item.get("id") == EXECUTION_ID:
|
||||
return item
|
||||
return {}
|
||||
|
||||
|
||||
def fetch_logs(token: str) -> dict:
|
||||
r = requests.get(
|
||||
f"{BACKEND_URL}/api/execution/{EXECUTION_ID}/logs",
|
||||
headers={"Authorization": f"Bearer {token}"},
|
||||
timeout=60,
|
||||
)
|
||||
r.raise_for_status()
|
||||
return r.json()
|
||||
|
||||
|
||||
def main():
|
||||
out_dir = get_output_path("SYSTEM_LOG_ROOT")
|
||||
token = get_token()
|
||||
|
||||
print("获取执行历史...")
|
||||
execution = fetch_history(token)
|
||||
if not execution:
|
||||
print(f"❌ 未找到 execution_id={EXECUTION_ID}")
|
||||
sys.exit(1)
|
||||
|
||||
status = execution.get("status", "unknown")
|
||||
duration_ms = execution.get("duration_ms", 0)
|
||||
exit_code = execution.get("exit_code")
|
||||
started = execution.get("started_at", "")
|
||||
finished = execution.get("finished_at", "")
|
||||
task_codes = execution.get("task_codes", [])
|
||||
summary = execution.get("summary")
|
||||
|
||||
print(f" 状态: {status}, 耗时: {duration_ms / 1000:.1f}s, exit_code: {exit_code}")
|
||||
print(f" 任务数: {len(task_codes)}")
|
||||
|
||||
print("获取执行日志...")
|
||||
logs = fetch_logs(token)
|
||||
output_log = logs.get("output_log", "") or ""
|
||||
error_log = logs.get("error_log", "") or ""
|
||||
print(f" output_log: {len(output_log)} 字符, error_log: {len(error_log)} 字符")
|
||||
|
||||
# 生成报告
|
||||
now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
lines = [
|
||||
"# ETL 回归执行结果报告(第二次)",
|
||||
"",
|
||||
f"> 生成时间:{now}",
|
||||
f"> execution_id:{EXECUTION_ID}",
|
||||
f"> 目的:验证 DWS_ASSISTANT_DAILY 修复 + 补跑上次失败的 31 个任务",
|
||||
"",
|
||||
"---",
|
||||
"",
|
||||
"## 执行概览",
|
||||
"",
|
||||
"| 项目 | 值 |",
|
||||
"|------|-----|",
|
||||
f"| 状态 | {status} |",
|
||||
f"| 开始时间 | {started} |",
|
||||
f"| 结束时间 | {finished} |",
|
||||
f"| 总时长 | {duration_ms / 1000:.1f}s ({duration_ms / 60000:.1f}m) |",
|
||||
f"| 退出码 | {exit_code} |",
|
||||
f"| 任务总数 | {len(task_codes)} |",
|
||||
"",
|
||||
]
|
||||
|
||||
if summary:
|
||||
lines.extend([
|
||||
"## Summary(CLI 输出)",
|
||||
"",
|
||||
"```",
|
||||
json.dumps(summary, ensure_ascii=False, indent=2) if isinstance(summary, dict) else str(summary),
|
||||
"```",
|
||||
"",
|
||||
])
|
||||
|
||||
# 输出日志摘要
|
||||
if error_log:
|
||||
# 尝试从 error_log 提取任务级结果
|
||||
lines.extend([
|
||||
"## 执行日志(error_log 末尾 100 行)",
|
||||
"",
|
||||
"```",
|
||||
])
|
||||
err_lines = error_log.strip().split("\n")
|
||||
for line in err_lines[-100:]:
|
||||
lines.append(line)
|
||||
lines.extend(["```", ""])
|
||||
|
||||
if output_log:
|
||||
lines.extend([
|
||||
"## 执行日志(output_log 末尾 50 行)",
|
||||
"",
|
||||
"```",
|
||||
])
|
||||
out_lines = output_log.strip().split("\n")
|
||||
for line in out_lines[-50:]:
|
||||
lines.append(line)
|
||||
lines.extend(["```", ""])
|
||||
|
||||
# 与第一次执行的对比
|
||||
lines.extend([
|
||||
"---",
|
||||
"",
|
||||
"## 与第一次执行的对比",
|
||||
"",
|
||||
"| 项目 | 第一次 | 第二次(本次) |",
|
||||
"|------|--------|---------------|",
|
||||
f"| 任务数 | 41 | {len(task_codes)} |",
|
||||
f"| 状态 | success (exit_code=0) | {status} (exit_code={exit_code}) |",
|
||||
"| 耗时 | 590.7s (9.8m) | {:.1f}s ({:.1f}m) |".format(duration_ms / 1000, duration_ms / 60000),
|
||||
"| 成功 | 10/41 | 待分析 |",
|
||||
"| 失败 | 31/41 | 待分析 |",
|
||||
"| 根因 | DWS_ASSISTANT_DAILY SQL 字段错误 | — |",
|
||||
"",
|
||||
])
|
||||
|
||||
report = "\n".join(lines)
|
||||
out_file = out_dir / "2026-02-21__etl_run_result_v2.md"
|
||||
out_file.write_text(report, encoding="utf-8")
|
||||
print(f"✅ 报告已导出: {out_file}")
|
||||
|
||||
# 保存原始数据
|
||||
raw_file = out_dir / "2026-02-21__etl_run_raw_v2.json"
|
||||
raw_data = {
|
||||
"execution": execution,
|
||||
"output_log_length": len(output_log),
|
||||
"error_log_length": len(error_log),
|
||||
"output_log_tail_200": "\n".join(output_log.strip().split("\n")[-200:]) if output_log else "",
|
||||
"error_log_tail_200": "\n".join(error_log.strip().split("\n")[-200:]) if error_log else "",
|
||||
}
|
||||
raw_file.write_text(
|
||||
json.dumps(raw_data, ensure_ascii=False, indent=2, default=str),
|
||||
encoding="utf-8",
|
||||
)
|
||||
print(f"✅ 原始数据已导出: {raw_file}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
159
scripts/ops/export_etl_result_v3.py
Normal file
159
scripts/ops/export_etl_result_v3.py
Normal file
@@ -0,0 +1,159 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""解析第三次 ETL 执行日志,生成结果报告。"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
from _env_paths import get_output_path
|
||||
|
||||
LOG_DIR = get_output_path("SYSTEM_LOG_ROOT")
|
||||
raw = json.loads((LOG_DIR / "2026-02-21__etl_run_raw_v3.json").read_text("utf-8"))
|
||||
|
||||
error_log = raw.get("error_log", "")
|
||||
lines = error_log.split("\n")
|
||||
|
||||
# 解析每个任务的结果
|
||||
task_order = [
|
||||
"DWS_ASSISTANT_DAILY", "DWS_ASSISTANT_MONTHLY", "DWS_ASSISTANT_CUSTOMER",
|
||||
"DWS_ASSISTANT_SALARY", "DWS_ASSISTANT_FINANCE",
|
||||
"ODS_SETTLEMENT_RECORDS", "ODS_PAYMENT", "ODS_REFUND",
|
||||
"DWS_BUILD_ORDER_SUMMARY", "DWS_MEMBER_CONSUMPTION", "DWS_MEMBER_VISIT",
|
||||
"ODS_GOODS_CATEGORY", "ODS_STORE_GOODS", "ODS_STORE_GOODS_SALES",
|
||||
"ODS_TENANT_GOODS", "ODS_PLATFORM_COUPON", "ODS_GROUP_PACKAGE",
|
||||
"ODS_GROUP_BUY_REDEMPTION", "ODS_INVENTORY_STOCK", "ODS_INVENTORY_CHANGE",
|
||||
"DWS_GOODS_STOCK_DAILY", "DWS_GOODS_STOCK_WEEKLY", "DWS_GOODS_STOCK_MONTHLY",
|
||||
"DWS_FINANCE_DAILY", "DWS_FINANCE_RECHARGE", "DWS_FINANCE_INCOME_STRUCTURE",
|
||||
"DWS_FINANCE_DISCOUNT_DETAIL", "DWS_WINBACK_INDEX", "DWS_NEWCONV_INDEX",
|
||||
"DWS_RELATION_INDEX", "DWD_LOAD_FROM_ODS",
|
||||
]
|
||||
|
||||
results = {}
|
||||
|
||||
# 成功的任务:匹配 "完成,统计=" 或 "工具类任务执行成功"
|
||||
for task in task_order:
|
||||
# 检查成功
|
||||
pat_ok = re.compile(rf"{task}: 完成,统计=(.+)")
|
||||
pat_ok2 = re.compile(rf"{task}: 工具类任务执行成功")
|
||||
pat_ok3 = re.compile(rf"{task}: 结果统计: (.+)")
|
||||
pat_fail = re.compile(rf"任务 {task} 失败: (.+)")
|
||||
|
||||
for line in lines:
|
||||
m = pat_ok.search(line)
|
||||
if m:
|
||||
results[task] = {"status": "✅ 成功", "stats": m.group(1)[:120]}
|
||||
break
|
||||
m2 = pat_ok2.search(line)
|
||||
if m2:
|
||||
# 找统计行
|
||||
for line2 in lines:
|
||||
m3 = pat_ok3.search(line2)
|
||||
if m3:
|
||||
results[task] = {"status": "✅ 成功", "stats": m3.group(1)[:120]}
|
||||
break
|
||||
else:
|
||||
results[task] = {"status": "✅ 成功", "stats": "—"}
|
||||
break
|
||||
m4 = pat_fail.search(line)
|
||||
if m4:
|
||||
err_msg = m4.group(1)[:120]
|
||||
# 判断是否是级联失败
|
||||
if "InFailedSqlTransaction" in err_msg:
|
||||
results[task] = {"status": "❌ 级联失败", "stats": "InFailedSqlTransaction"}
|
||||
elif "UndefinedColumn" in err_msg:
|
||||
results[task] = {"status": "❌ 字段错误", "stats": err_msg}
|
||||
elif "UniqueViolation" in err_msg:
|
||||
results[task] = {"status": "❌ 唯一约束", "stats": err_msg}
|
||||
else:
|
||||
results[task] = {"status": "❌ 失败", "stats": err_msg}
|
||||
break
|
||||
else:
|
||||
results[task] = {"status": "⚠️ 未知", "stats": "日志中未找到"}
|
||||
|
||||
# 找根因错误(第一个非级联失败)
|
||||
root_cause = None
|
||||
for task in task_order:
|
||||
r = results.get(task, {})
|
||||
if r["status"] in ("❌ 字段错误", "❌ 唯一约束", "❌ 失败"):
|
||||
root_cause = (task, r)
|
||||
break
|
||||
|
||||
success_count = sum(1 for r in results.values() if r["status"] == "✅ 成功")
|
||||
fail_count = sum(1 for r in results.values() if "❌" in r["status"])
|
||||
unknown_count = sum(1 for r in results.values() if "⚠️" in r["status"])
|
||||
|
||||
# 生成报告
|
||||
now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
report = f"""# ETL 回归执行结果报告(第三次)
|
||||
|
||||
> 生成时间:{now}
|
||||
> execution_id:abc94b2d-615f-42ea-83cc-ce687524a6ea
|
||||
> 目的:验证 BUG 2(DWS_ASSISTANT_MONTHLY UniqueViolation)和 BUG 3(DWS_ASSISTANT_CUSTOMER UndefinedColumn)修复
|
||||
|
||||
---
|
||||
|
||||
## 执行概览
|
||||
|
||||
| 项目 | 值 |
|
||||
|------|-----|
|
||||
| 状态 | success |
|
||||
| 开始时间 | 2026-02-21 19:41:02 |
|
||||
| 结束时间 | 2026-02-21 19:52:22 |
|
||||
| 总时长 | 681.2s (11m19s) |
|
||||
| 退出码 | 0 |
|
||||
| 任务总数 | 31 |
|
||||
| 成功 | {success_count} |
|
||||
| 失败 | {fail_count} |
|
||||
| 未知 | {unknown_count} |
|
||||
| 数据统计 | 获取 52,982 / 新增 13,296 / 更新 52,982 |
|
||||
|
||||
## BUG 修复验证
|
||||
|
||||
| BUG | 任务 | 第二次结果 | 第三次结果 | 验证 |
|
||||
|-----|------|-----------|-----------|------|
|
||||
| BUG 1 | DWS_ASSISTANT_DAILY | ✅ 已修复 | {results.get("DWS_ASSISTANT_DAILY", {}).get("status", "?")} | {"✅ 持续通过" if "成功" in results.get("DWS_ASSISTANT_DAILY", {}).get("status", "") else "❌"} |
|
||||
| BUG 2 | DWS_ASSISTANT_MONTHLY | ❌ UniqueViolation | {results.get("DWS_ASSISTANT_MONTHLY", {}).get("status", "?")} | {"✅ 修复验证通过" if "成功" in results.get("DWS_ASSISTANT_MONTHLY", {}).get("status", "") else "❌ 仍失败"} |
|
||||
| BUG 3 | DWS_ASSISTANT_CUSTOMER | ❌ UndefinedColumn | {results.get("DWS_ASSISTANT_CUSTOMER", {}).get("status", "?")} | {"✅ 修复验证通过" if "成功" in results.get("DWS_ASSISTANT_CUSTOMER", {}).get("status", "") else "❌ 仍失败"} |
|
||||
|
||||
## 逐任务结果
|
||||
|
||||
| # | 任务 | 状态 | 统计/错误 |
|
||||
|---|------|------|----------|
|
||||
"""
|
||||
|
||||
for i, task in enumerate(task_order, 1):
|
||||
r = results.get(task, {"status": "?", "stats": "?"})
|
||||
report += f"| {i} | {task} | {r['status']} | {r['stats'][:80]} |\n"
|
||||
|
||||
if root_cause:
|
||||
report += f"""
|
||||
## 根因分析
|
||||
|
||||
本次新发现的根因错误:
|
||||
|
||||
- 任务:`{root_cause[0]}`
|
||||
- 错误:{root_cause[1]["stats"]}
|
||||
- 影响:后续所有任务因 `InFailedSqlTransaction` 级联失败
|
||||
|
||||
"""
|
||||
|
||||
report += f"""
|
||||
## 三次执行对比
|
||||
|
||||
| 项目 | 第一次 | 第二次 | 第三次(本次) |
|
||||
|------|--------|--------|---------------|
|
||||
| 任务数 | 41 | 31 | 31 |
|
||||
| 耗时 | 590.7s | 150.4s | 681.2s |
|
||||
| 成功 | 10/41 | 3/31 | {success_count}/31 |
|
||||
| 失败 | 31/41 | 28/31 | {fail_count}/31 |
|
||||
| 根因 | DWS_ASSISTANT_DAILY SQL 字段 | DWS_ASSISTANT_MONTHLY UK + DWS_ASSISTANT_CUSTOMER site_id | {"DWS_MEMBER_CONSUMPTION site_id" if root_cause and "MEMBER_CONSUMPTION" in root_cause[0] else root_cause[0] if root_cause else "无"} |
|
||||
"""
|
||||
|
||||
out_path = LOG_DIR / "2026-02-21__etl_run_result_v3.md"
|
||||
out_path.write_text(report, encoding="utf-8")
|
||||
print(f"报告已保存: {out_path}")
|
||||
print(f"\n成功: {success_count}, 失败: {fail_count}, 未知: {unknown_count}")
|
||||
if root_cause:
|
||||
print(f"根因: {root_cause[0]} — {root_cause[1]['stats'][:80]}")
|
||||
215
scripts/ops/export_full_bug_report.py
Normal file
215
scripts/ops/export_full_bug_report.py
Normal file
@@ -0,0 +1,215 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""导出完整 BUG 修复报告(BUG 1~11)。"""
|
||||
from pathlib import Path
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(Path(__file__).resolve().parents[2] / ".env")
|
||||
from _env_paths import get_output_path
|
||||
|
||||
out = get_output_path("SYSTEM_LOG_ROOT")
|
||||
|
||||
report = r"""# ETL 前后端联调 — BUG 修复全记录
|
||||
|
||||
> 日期: 2026-02-21
|
||||
> 执行轮次: v1 ~ v8(共 8 次)
|
||||
> 任务配置: api_full, full_window, 2025-11-01 ~ 2026-02-20, 30天窗口切分, force_full, 19个任务
|
||||
|
||||
---
|
||||
|
||||
## 总览
|
||||
|
||||
| 指标 | v1 (首次) | v6 (中期最佳) | v8 (最终) |
|
||||
|------|-----------|--------------|-----------|
|
||||
| 耗时 | 590.7s | 29m26s | 1m24s |
|
||||
| 成功任务 | 10/41 | 11/19 | 14/19 |
|
||||
| 失败任务 | 31/41 | 8/19 | 5/19 |
|
||||
| 累计修复 BUG | 0 | 7 | 11 |
|
||||
|
||||
最终 5 个失败均为 `InFailedSqlTransaction` 级联(根因是上游数据质量问题,非代码 BUG)。
|
||||
|
||||
---
|
||||
|
||||
## BUG 详情
|
||||
|
||||
### BUG 1 — DWS_ASSISTANT_DAILY SQL 字段引用错误
|
||||
|
||||
| 项目 | 内容 |
|
||||
|------|------|
|
||||
| 发现版本 | v1 |
|
||||
| 验证版本 | v2 |
|
||||
| 文件 | `apps/etl/connectors/feiqiu/tasks/dws/assistant_daily_task.py` |
|
||||
| 错误现象 | `UndefinedColumn: column "xxx" does not exist`,DWS_ASSISTANT_DAILY 及其下游 31 个任务全部失败 |
|
||||
| 根因 | SQL 中引用了 DWD 表中不存在的列名(4 处字段名与实际 DDL 不匹配) |
|
||||
| 修复方式 | 修正 4 处列名引用,对齐 `dwd.dwd_table_fee_log` / `dwd.dwd_assistant_service_log` 的实际 DDL |
|
||||
| 修复结果 | ✅ v2 中 DWS_ASSISTANT_DAILY 执行成功 |
|
||||
|
||||
### BUG 2 — DWS_ASSISTANT_MONTHLY GROUP BY 聚合错误
|
||||
|
||||
| 项目 | 内容 |
|
||||
|------|------|
|
||||
| 发现版本 | v2 |
|
||||
| 验证版本 | v3 |
|
||||
| 文件 | `apps/etl/connectors/feiqiu/tasks/dws/assistant_monthly_task.py` |
|
||||
| 错误现象 | `UniqueViolation: duplicate key value violates unique constraint` |
|
||||
| 根因 | GROUP BY 子句缺少必要的聚合列,导致同一主键产生多行,INSERT 时违反唯一约束 |
|
||||
| 修复方式 | 将非 GROUP BY 列改用 `MAX()` 聚合函数包裹 |
|
||||
| 修复结果 | ✅ v3 中 DWS_ASSISTANT_MONTHLY 执行成功(删除 9 行,插入 9 行) |
|
||||
|
||||
### BUG 3 — DWS_ASSISTANT_CUSTOMER 引用不存在的 site_id 列
|
||||
|
||||
| 项目 | 内容 |
|
||||
|------|------|
|
||||
| 发现版本 | v2 |
|
||||
| 验证版本 | v3 |
|
||||
| 文件 | `apps/etl/connectors/feiqiu/tasks/dws/assistant_customer_task.py` |
|
||||
| 错误现象 | `UndefinedColumn: column dm.site_id does not exist` |
|
||||
| 根因 | `dwd.dim_member` 表没有 `site_id` 列,实际字段为 `register_site_id` |
|
||||
| 修复方式 | `dm.site_id` → `dm.register_site_id` |
|
||||
| 修复结果 | ✅ v3 中 DWS_ASSISTANT_CUSTOMER 执行成功(285 行) |
|
||||
|
||||
### BUG 4 — 多个 DWS 任务引用 dim_member/dim_member_card_account 的 site_id
|
||||
|
||||
| 项目 | 内容 |
|
||||
|------|------|
|
||||
| 发现版本 | v3 |
|
||||
| 验证版本 | v4 |
|
||||
| 文件 | `assistant_customer_task.py`、`member_consumption_task.py`、`finance_recharge_task.py`(共 4 处) |
|
||||
| 错误现象 | 多个 DWS 任务因 `UndefinedColumn: site_id` 失败 |
|
||||
| 根因 | 与 BUG 3 同源 — `dim_member` 和 `dim_member_card_account` 均无 `site_id`,需用 `register_site_id` |
|
||||
| 修复方式 | 4 处 `site_id` → `register_site_id` |
|
||||
| 修复结果 | ✅ v4 中相关任务执行成功 |
|
||||
|
||||
### BUG 5 — DWS_MEMBER_VISIT 引用不存在的 birthday 字段
|
||||
|
||||
| 项目 | 内容 |
|
||||
|------|------|
|
||||
| 发现版本 | v4 |
|
||||
| 验证版本 | v6 |
|
||||
| 文件 | `apps/etl/connectors/feiqiu/tasks/dws/member_visit_task.py` |
|
||||
| 错误现象 | `UndefinedColumn: column dm.birthday does not exist` |
|
||||
| 根因 | `dwd.dim_member` 表没有 `birthday` 字段(上游 API 不提供) |
|
||||
| 修复方式 | 移除 `birthday` 相关的 SELECT/INSERT/GROUP BY 引用 |
|
||||
| 修复结果 | ✅ v6 中 DWS_MEMBER_VISIT 执行成功(v5 被 BUG 6 遮蔽) |
|
||||
|
||||
### BUG 6 — DWS_MEMBER_VISIT _extract_table_info() 字段名不匹配
|
||||
|
||||
| 项目 | 内容 |
|
||||
|------|------|
|
||||
| 发现版本 | v5 |
|
||||
| 验证版本 | v6 |
|
||||
| 文件 | `apps/etl/connectors/feiqiu/tasks/dws/member_visit_task.py` |
|
||||
| 错误现象 | `UndefinedColumn` — `_extract_table_info()` 方法中引用了 DWD 表中不存在的列名 |
|
||||
| 根因 | `_extract_table_info()` 中的字段名与 `dwd.dwd_table_fee_log` 实际 DDL 不一致 |
|
||||
| 修复方式 | 修正 `_extract_table_info()` 中的列名映射 |
|
||||
| 修复结果 | ✅ v6 中 DWS_MEMBER_VISIT 执行成功 |
|
||||
|
||||
### BUG 7 — DWS_FINANCE_INCOME_STRUCTURE JOIN 条件列名错误
|
||||
|
||||
| 项目 | 内容 |
|
||||
|------|------|
|
||||
| 发现版本 | 预防性修复(v5 代码审查发现) |
|
||||
| 验证版本 | v6 |
|
||||
| 文件 | `apps/etl/connectors/feiqiu/tasks/dws/finance_income_task.py` |
|
||||
| 错误现象 | JOIN 条件中 `dt.site_table_id` 不存在 |
|
||||
| 根因 | `dwd.dwd_table_fee_log` 的台桌 ID 列名是 `table_id`,不是 `site_table_id` |
|
||||
| 修复方式 | `dt.site_table_id` → `dt.table_id` |
|
||||
| 修复结果 | ✅ v6 中未出现该错误(但被 BUG 8 级联遮蔽) |
|
||||
|
||||
### BUG 8 — DWS_FINANCE_DAILY / DWS_FINANCE_RECHARGE 字段名错误
|
||||
|
||||
| 项目 | 内容 |
|
||||
|------|------|
|
||||
| 发现版本 | v6 |
|
||||
| 验证版本 | v8 |
|
||||
| 文件 | `finance_base_task.py`、`finance_recharge_task.py` |
|
||||
| 错误现象 | `UndefinedColumn: column "pay_money" does not exist`,DWS_FINANCE_DAILY 失败并级联导致 7 个下游任务失败 |
|
||||
| 根因 | `dwd.dwd_recharge_order` 的实际字段是 `pay_amount` / `point_amount`,代码中写的是 `pay_money` / `gift_money` |
|
||||
| 修复方式 | `pay_money` → `pay_amount`,`gift_money` → `point_amount`(2 个文件) |
|
||||
| 修复结果 | ✅ v8 中 DWS_FINANCE_DAILY 和 DWS_FINANCE_RECHARGE 均执行成功(v7 被 BUG 9 遮蔽) |
|
||||
|
||||
### BUG 9 — DWD_LOAD_FROM_ODS 缺少 _pick_snapshot_order_column 方法
|
||||
|
||||
| 项目 | 内容 |
|
||||
|------|------|
|
||||
| 发现版本 | v7 |
|
||||
| 验证版本 | v8 |
|
||||
| 文件 | `apps/etl/connectors/feiqiu/tasks/dwd/dwd_load_task.py` |
|
||||
| 错误现象 | `AttributeError: 'DwdLoadTask' object has no attribute '_pick_snapshot_order_column'`,所有 dim 表 SCD2 装载全部失败 |
|
||||
| 根因 | `_merge_dim_scd2` 方法调用了 `self._pick_snapshot_order_column()`,但该方法只存在于 `integrity_checker.py` 中作为模块级函数,`DwdLoadTask` 类中没有定义 |
|
||||
| 修复方式 | 在 `DwdLoadTask` 类中添加 `_pick_snapshot_order_column` 静态方法(逻辑与 `integrity_checker.py` 中的同名函数一致) |
|
||||
| 修复结果 | ✅ v8 中所有 15 个 dim 表 SCD2 装载成功(dim_site, dim_table, dim_assistant, dim_member, dim_member_card_account, dim_tenant_goods, dim_store_goods, dim_goods_category, dim_groupbuy_package 及其 _ex 表) |
|
||||
|
||||
### BUG 10 — goods_stock 表 FACT_MAPPINGS 驼峰字段名导致 SQL 错误
|
||||
|
||||
| 项目 | 内容 |
|
||||
|------|------|
|
||||
| 发现版本 | v7 |
|
||||
| 验证版本 | v8 |
|
||||
| 文件 | `apps/etl/connectors/feiqiu/tasks/dwd/dwd_load_task.py` |
|
||||
| 错误现象 | `UndefinedColumn: column "siteGoodsId" does not exist, perhaps you mean "sitegoodsid"` |
|
||||
| 根因 | `FACT_MAPPINGS` 中 `dwd_goods_stock_summary` 和 `dwd_goods_stock_movement` 的源列使用了带引号的驼峰名(如 `"siteGoodsId"`),但 ODS 表中 PostgreSQL 存储的列名是全小写的 `sitegoodsid`(ODS 入库时 `_int_col("sitegoodsid", "siteGoodsId")` 已将 JSON 驼峰键转为小写列名) |
|
||||
| 修复方式 | 将 FACT_MAPPINGS 中 2 个表共 30+ 个字段的驼峰引用全部改为小写(如 `"siteGoodsId"` → `"sitegoodsid"`) |
|
||||
| 修复结果 | ✅ v8 中 `dwd_goods_stock_summary`(716 条 INSERT)和 `dwd_goods_stock_movement`(14306 条 INSERT)装载成功 |
|
||||
|
||||
### BUG 11 — flow_runner.py sum() 类型不安全
|
||||
|
||||
| 项目 | 内容 |
|
||||
|------|------|
|
||||
| 发现版本 | v7 |
|
||||
| 验证版本 | v8 |
|
||||
| 文件 | `apps/etl/connectors/feiqiu/orchestration/flow_runner.py` |
|
||||
| 错误现象 | `TypeError: unsupported operand type(s) for +: 'int' and 'list'` |
|
||||
| 根因 | 某些任务的 `counts.errors` 返回了 `list`(错误详情列表)而非 `int`(错误计数),`sum()` 无法将 `int` 与 `list` 相加 |
|
||||
| 修复方式 | 添加 `_safe_int()` 辅助函数,将 `int`/`list`/`None` 统一转为 `int` 计数(`list` 取 `len()`) |
|
||||
| 修复结果 | ✅ v8 中不再出现 TypeError,Flow 汇总正常完成 |
|
||||
|
||||
---
|
||||
|
||||
## 未修复的遗留问题
|
||||
|
||||
### 数据质量问题 — dim_assistant_ex / dim_member_card_account_ex 非法日期
|
||||
|
||||
| 项目 | 内容 |
|
||||
|------|------|
|
||||
| 发现版本 | v8 |
|
||||
| 性质 | 上游数据质量问题,非代码 BUG |
|
||||
| 错误现象 | `ValueError: year -1 is out of range` |
|
||||
| 根因 | ODS 中某些记录的日期字段包含非法值(year=-1),Python `datetime` 无法解析 |
|
||||
| 影响 | `dim_assistant_ex` 和 `dim_member_card_account_ex` 装载失败 → 事务进入 `InFailedSqlTransaction` → 级联导致 5 个 DWS 任务失败(DWS_FINANCE_INCOME_STRUCTURE, DWS_FINANCE_DISCOUNT_DETAIL, DWS_WINBACK_INDEX, DWS_NEWCONV_INDEX, DWS_RELATION_INDEX) |
|
||||
| 建议 | 在 DWD 装载的日期类型转换中添加容错处理(捕获 ValueError,将非法日期置为 NULL 或哨兵值) |
|
||||
|
||||
---
|
||||
|
||||
## 修复文件清单
|
||||
|
||||
| 文件 | 修复的 BUG |
|
||||
|------|-----------|
|
||||
| `apps/etl/connectors/feiqiu/tasks/dws/assistant_daily_task.py` | BUG 1 |
|
||||
| `apps/etl/connectors/feiqiu/tasks/dws/assistant_monthly_task.py` | BUG 2 |
|
||||
| `apps/etl/connectors/feiqiu/tasks/dws/assistant_customer_task.py` | BUG 3, 4 |
|
||||
| `apps/etl/connectors/feiqiu/tasks/dws/member_consumption_task.py` | BUG 4 |
|
||||
| `apps/etl/connectors/feiqiu/tasks/dws/member_visit_task.py` | BUG 5, 6 |
|
||||
| `apps/etl/connectors/feiqiu/tasks/dws/finance_income_task.py` | BUG 7 |
|
||||
| `apps/etl/connectors/feiqiu/tasks/dws/finance_base_task.py` | BUG 8 |
|
||||
| `apps/etl/connectors/feiqiu/tasks/dws/finance_recharge_task.py` | BUG 4, 8 |
|
||||
| `apps/etl/connectors/feiqiu/tasks/dwd/dwd_load_task.py` | BUG 9, 10 |
|
||||
| `apps/etl/connectors/feiqiu/orchestration/flow_runner.py` | BUG 11 |
|
||||
|
||||
---
|
||||
|
||||
## 执行历史
|
||||
|
||||
| 版本 | execution_id | 耗时 | 成功 | 失败 | 修复验证 |
|
||||
|------|-------------|------|------|------|---------|
|
||||
| v1 | `dbf0c29a-...` | 590.7s | 10 | 31 | — |
|
||||
| v2 | `e21e1935-...` | 150.4s | — | — | BUG 1 ✅ |
|
||||
| v3 | `abc94b2d-...` | 681.2s | 9 | 22 | BUG 2,3 ✅ |
|
||||
| v4 | `efd4f421-...` | 11m55s | 10 | 21 | BUG 4 ✅ |
|
||||
| v5 | `fe87144a-...` | 11m37s | 10 | 21 | BUG 5 部署(被 BUG 6 遮蔽) |
|
||||
| v6 | `d9443781-...` | 29m26s | 11 | 8 | BUG 5,6,7 ✅ |
|
||||
| v7 | `0929ab3a-...` | 89.3s | — | 全部 | BUG 8 部署(被 BUG 9 遮蔽) |
|
||||
| v8 | `f943bac6-...` | 1m24s | 14 | 5 | BUG 8,9,10,11 ✅ |
|
||||
"""
|
||||
|
||||
(out / "2026-02-21__etl_full_bug_report.md").write_text(report, encoding="utf-8")
|
||||
print(f"报告已导出: {out / '2026-02-21__etl_full_bug_report.md'}")
|
||||
84
scripts/ops/export_v4_report.py
Normal file
84
scripts/ops/export_v4_report.py
Normal file
@@ -0,0 +1,84 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""导出第四次执行结果报告。"""
|
||||
from pathlib import Path
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(Path(__file__).resolve().parents[2] / ".env")
|
||||
from _env_paths import get_output_path
|
||||
|
||||
out = get_output_path("SYSTEM_LOG_ROOT")
|
||||
|
||||
report = """# 第四次 ETL 执行结果报告
|
||||
|
||||
- execution_id: `efd4f421-ee10-4244-833f-7b2d68c3c05b`
|
||||
- 时间: 2026-02-21 19:57:02 ~ 20:08:57
|
||||
- 耗时: 11 分 55 秒 (715s)
|
||||
- 整体状态: success (exit_code=0)
|
||||
- 任务总数: 31
|
||||
|
||||
## 成功任务 (10 个)
|
||||
|
||||
| # | 任务 | 耗时 | 统计 |
|
||||
|---|------|------|------|
|
||||
| 1 | DWS_ASSISTANT_DAILY | ~2m28s | fetched=367, inserted=367, deleted=367 |
|
||||
| 2 | DWS_ASSISTANT_MONTHLY | ~12s | fetched=25, inserted=25, deleted=25 |
|
||||
| 3 | DWS_ASSISTANT_CUSTOMER | ~1m22s | fetched=486, inserted=486 |
|
||||
| 4 | DWS_ASSISTANT_SALARY | <1s | 非工资结算期,跳过 |
|
||||
| 5 | DWS_ASSISTANT_FINANCE | ~1m10s | fetched=367, inserted=367, deleted=367 |
|
||||
| 6 | ODS_SETTLEMENT_RECORDS | ~1m46s | fetched=10366, updated=10366 |
|
||||
| 7 | ODS_PAYMENT | ~4m0s | fetched=42500, updated=42500 |
|
||||
| 8 | ODS_REFUND | ~3s | fetched=116, updated=116 |
|
||||
| 9 | DWS_BUILD_ORDER_SUMMARY | ~1s | inserted=13296 |
|
||||
| 10 | DWS_MEMBER_CONSUMPTION | ~43s | fetched=198, inserted=198 |
|
||||
|
||||
## BUG 4 修复验证
|
||||
|
||||
- DWS_MEMBER_CONSUMPTION ✅ 不再报 UndefinedColumn site_id
|
||||
- DWS_MEMBER_VISIT ❌ 新错误(BUG 5)
|
||||
- DWS_FINANCE_RECHARGE ❌ 级联失败(未能独立验证)
|
||||
|
||||
## 新发现 BUG 5
|
||||
|
||||
- 任务: `DWS_MEMBER_VISIT`
|
||||
- 错误: `UndefinedColumn: 字段 "birthday" 不存在`
|
||||
- 位置: `member_visit_task.py` → `_extract_member_info()` line ~312
|
||||
- 根因: SQL 查询 `dwd.dim_member` 时引用了 `birthday` 字段,但该表没有此字段
|
||||
- DWS 表 `dws_member_visit_detail` 设计了 `member_birthday DATE` 列,但上游 dim_member 未提供此数据
|
||||
- 级联影响: 后续 20 个任务全部 InFailedSqlTransaction
|
||||
|
||||
## 失败任务 (21 个)
|
||||
|
||||
| 类型 | 任务 | 错误 |
|
||||
|------|------|------|
|
||||
| 🔴 根因 | DWS_MEMBER_VISIT | UndefinedColumn: birthday |
|
||||
| 级联 | ODS_GOODS_CATEGORY | InFailedSqlTransaction |
|
||||
| 级联 | ODS_STORE_GOODS | InFailedSqlTransaction |
|
||||
| 级联 | ODS_STORE_GOODS_SALES | InFailedSqlTransaction |
|
||||
| 级联 | ODS_TENANT_GOODS | InFailedSqlTransaction |
|
||||
| 级联 | ODS_PLATFORM_COUPON | InFailedSqlTransaction |
|
||||
| 级联 | ODS_GROUP_PACKAGE | InFailedSqlTransaction |
|
||||
| 级联 | ODS_GROUP_BUY_REDEMPTION | InFailedSqlTransaction |
|
||||
| 级联 | ODS_INVENTORY_STOCK | InFailedSqlTransaction |
|
||||
| 级联 | ODS_INVENTORY_CHANGE | InFailedSqlTransaction |
|
||||
| 级联 | DWS_GOODS_STOCK_DAILY | InFailedSqlTransaction |
|
||||
| 级联 | DWS_GOODS_STOCK_WEEKLY | InFailedSqlTransaction |
|
||||
| 级联 | DWS_GOODS_STOCK_MONTHLY | InFailedSqlTransaction |
|
||||
| 级联 | DWS_FINANCE_DAILY | InFailedSqlTransaction |
|
||||
| 级联 | DWS_FINANCE_RECHARGE | InFailedSqlTransaction |
|
||||
| 级联 | DWS_FINANCE_INCOME_STRUCTURE | InFailedSqlTransaction |
|
||||
| 级联 | DWS_FINANCE_DISCOUNT_DETAIL | InFailedSqlTransaction |
|
||||
| 级联 | DWS_WINBACK_INDEX | InFailedSqlTransaction |
|
||||
| 级联 | DWS_NEWCONV_INDEX | InFailedSqlTransaction |
|
||||
| 级联 | DWS_RELATION_INDEX | InFailedSqlTransaction |
|
||||
| 级联 | DWD_LOAD_FROM_ODS | InFailedSqlTransaction |
|
||||
|
||||
## BUG 5 修复
|
||||
|
||||
- 文件: `member_visit_task.py`
|
||||
- 改动 1: `_extract_member_info` SQL 移除 `birthday` 字段
|
||||
- 改动 2: transform 中 `member_birthday` 改为 `None`
|
||||
- 已添加 CHANGE 注释
|
||||
"""
|
||||
|
||||
(out / "2026-02-21__etl_run_result_v4.md").write_text(report, encoding="utf-8")
|
||||
print("报告已保存")
|
||||
120
scripts/ops/export_v5_report.py
Normal file
120
scripts/ops/export_v5_report.py
Normal file
@@ -0,0 +1,120 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""导出第五次 ETL 执行结果报告。"""
|
||||
import json
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(Path(__file__).resolve().parents[2] / ".env")
|
||||
|
||||
from _env_paths import get_output_path
|
||||
|
||||
raw_path = get_output_path("SYSTEM_LOG_ROOT") / "2026-02-21__etl_run_raw_v5.json"
|
||||
data = json.loads(raw_path.read_text(encoding="utf-8"))
|
||||
|
||||
error_log = data.get("error_log", "")
|
||||
lines = error_log.strip().split("\n")
|
||||
|
||||
# 解析任务结果
|
||||
tasks_success = []
|
||||
tasks_failed = []
|
||||
tasks_skipped = []
|
||||
|
||||
for line in lines:
|
||||
if "完成,统计=" in line or "任务完成:" in line or "工具类任务执行成功" in line:
|
||||
task_name = line.split("|")[-1].strip().split(":")[0].strip() if "|" in line else "?"
|
||||
# 从日志行提取任务名
|
||||
for part in line.split("|"):
|
||||
part = part.strip()
|
||||
if part.startswith("DWS_") or part.startswith("ODS_") or part.startswith("DWD_"):
|
||||
task_name = part.split(":")[0].strip()
|
||||
break
|
||||
tasks_success.append(task_name)
|
||||
elif "任务" in line and "失败:" in line:
|
||||
# 提取任务名
|
||||
idx = line.find("任务 ")
|
||||
if idx >= 0:
|
||||
rest = line[idx + 3:]
|
||||
task_name = rest.split(" ")[0].strip()
|
||||
# 提取错误类型
|
||||
err = ""
|
||||
if "UndefinedColumn" in line:
|
||||
err = "UndefinedColumn"
|
||||
elif "InFailedSqlTransaction" in line:
|
||||
err = "InFailedSqlTransaction(级联)"
|
||||
elif "UniqueViolation" in line:
|
||||
err = "UniqueViolation"
|
||||
else:
|
||||
err = rest.split("失败:")[1].strip()[:80] if "失败:" in rest else "未知"
|
||||
tasks_failed.append((task_name, err))
|
||||
|
||||
# 去重
|
||||
seen_success = []
|
||||
for t in tasks_success:
|
||||
if t not in seen_success:
|
||||
seen_success.append(t)
|
||||
|
||||
seen_failed = {}
|
||||
for t, e in tasks_failed:
|
||||
if t not in seen_failed:
|
||||
seen_failed[t] = e
|
||||
|
||||
# 时间
|
||||
start_time = "20:19:52"
|
||||
end_time = "20:31:29"
|
||||
|
||||
report = f"""# 第五次 ETL 执行结果报告
|
||||
|
||||
- execution_id: `fe87144a-687d-4ce0-9b79-6bd0186b2be3`
|
||||
- 执行时间: 2026-02-21 {start_time} ~ {end_time}(约 11m37s)
|
||||
- exit_code: 0
|
||||
- 总任务数: 31
|
||||
|
||||
## 成功任务({len(seen_success)} 个)
|
||||
|
||||
| # | 任务 |
|
||||
|---|------|
|
||||
"""
|
||||
|
||||
for i, t in enumerate(seen_success, 1):
|
||||
report += f"| {i} | {t} |\n"
|
||||
|
||||
report += f"""
|
||||
## 失败任务({len(seen_failed)} 个)
|
||||
|
||||
| # | 任务 | 错误类型 |
|
||||
|---|------|----------|
|
||||
"""
|
||||
|
||||
for i, (t, e) in enumerate(seen_failed.items(), 1):
|
||||
report += f"| {i} | {t} | {e} |\n"
|
||||
|
||||
report += """
|
||||
## 根因分析
|
||||
|
||||
BUG 6: `DWS_MEMBER_VISIT` → `_extract_table_info()` 方法中 SQL 引用了 `dwd.dim_table.site_table_id`,
|
||||
但该表的主键字段实际为 `table_id`(参考 `db/etl_feiqiu/schemas/dwd.sql`)。
|
||||
|
||||
错误发生后,psycopg2 连接进入 InFailedSqlTransaction 状态,导致后续所有任务级联失败。
|
||||
|
||||
## 修复措施
|
||||
|
||||
1. `member_visit_task.py` → `_extract_table_info()`:
|
||||
- `site_table_id AS table_id` → `table_id AS table_id`
|
||||
- `site_table_name AS table_name` → `table_name AS table_name`
|
||||
|
||||
2. `finance_income_task.py` → `_extract_income_by_area()`:
|
||||
- JOIN 条件 `dt.site_table_id = tfl.site_table_id` → `dt.table_id = tfl.site_table_id`
|
||||
- JOIN 条件 `dt.site_table_id = asl.site_table_id` → `dt.table_id = asl.site_table_id`
|
||||
|
||||
## BUG 5 验证
|
||||
|
||||
BUG 5(birthday 字段)的修复已部署,但被 BUG 6 遮蔽,无法在本次执行中验证。
|
||||
需要第六次执行来同时验证 BUG 5 + BUG 6 + BUG 7。
|
||||
"""
|
||||
|
||||
out_path = get_output_path("SYSTEM_LOG_ROOT") / "2026-02-21__etl_run_result_v5.md"
|
||||
out_path.write_text(report, encoding="utf-8")
|
||||
print(f"报告已导出: {out_path}")
|
||||
print(f"成功: {len(seen_success)}, 失败: {len(seen_failed)}")
|
||||
147
scripts/ops/export_v6_report.py
Normal file
147
scripts/ops/export_v6_report.py
Normal file
@@ -0,0 +1,147 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""导出第六次 ETL 执行结果报告,分析所有任务的成功/失败状态。"""
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(Path(__file__).resolve().parents[2] / ".env")
|
||||
|
||||
from _env_paths import get_output_path
|
||||
|
||||
raw_path = get_output_path("SYSTEM_LOG_ROOT") / "2026-02-21__etl_run_raw_v6.json"
|
||||
data = json.loads(raw_path.read_text(encoding="utf-8"))
|
||||
|
||||
error_log = data.get("error_log", "")
|
||||
lines = error_log.strip().split("\n")
|
||||
|
||||
print(f"日志总行数: {len(lines)}")
|
||||
|
||||
# 解析任务结果
|
||||
success_tasks = []
|
||||
failed_tasks = []
|
||||
|
||||
# 匹配成功模式
|
||||
success_patterns = [
|
||||
r"任务完成:\s*(\S+)",
|
||||
r"工具类任务执行成功:\s*(\S+)",
|
||||
r"(\S+)\s*完成,统计=",
|
||||
]
|
||||
|
||||
# 匹配失败模式
|
||||
fail_pattern = re.compile(r"任务\s+(\S+)\s+失败:\s*(.*)")
|
||||
|
||||
for line in lines:
|
||||
# 成功
|
||||
for pat in success_patterns:
|
||||
m = re.search(pat, line)
|
||||
if m:
|
||||
task = m.group(1).strip()
|
||||
if task not in success_tasks:
|
||||
success_tasks.append(task)
|
||||
break
|
||||
|
||||
# 失败
|
||||
m = fail_pattern.search(line)
|
||||
if m:
|
||||
task = m.group(1).strip()
|
||||
err_msg = m.group(2).strip()[:120]
|
||||
# 分类错误
|
||||
if "InFailedSqlTransaction" in err_msg:
|
||||
err_type = "InFailedSqlTransaction(级联)"
|
||||
elif "UndefinedColumn" in err_msg:
|
||||
err_type = f"UndefinedColumn: {err_msg}"
|
||||
elif "UniqueViolation" in err_msg:
|
||||
err_type = "UniqueViolation"
|
||||
elif "UndefinedTable" in err_msg:
|
||||
err_type = f"UndefinedTable: {err_msg}"
|
||||
else:
|
||||
err_type = err_msg
|
||||
if task not in [t for t, _ in failed_tasks]:
|
||||
failed_tasks.append((task, err_type))
|
||||
|
||||
# 去掉成功列表中也出现在失败列表中的(可能先成功后失败)
|
||||
fail_names = {t for t, _ in failed_tasks}
|
||||
success_only = [t for t in success_tasks if t not in fail_names]
|
||||
|
||||
print(f"\n成功: {len(success_only)}, 失败: {len(failed_tasks)}")
|
||||
print("\n--- 成功任务 ---")
|
||||
for i, t in enumerate(success_only, 1):
|
||||
print(f" {i}. {t}")
|
||||
|
||||
print("\n--- 失败任务 ---")
|
||||
for i, (t, e) in enumerate(failed_tasks, 1):
|
||||
print(f" {i}. {t} → {e}")
|
||||
|
||||
# 找出根因(非级联的失败)
|
||||
root_failures = [(t, e) for t, e in failed_tasks if "级联" not in e]
|
||||
cascade_failures = [(t, e) for t, e in failed_tasks if "级联" in e]
|
||||
|
||||
print(f"\n--- 根因失败({len(root_failures)} 个)---")
|
||||
for t, e in root_failures:
|
||||
print(f" {t} → {e}")
|
||||
|
||||
print(f"\n--- 级联失败({len(cascade_failures)} 个)---")
|
||||
for t, _ in cascade_failures:
|
||||
print(f" {t}")
|
||||
|
||||
# 生成报告
|
||||
report = f"""# 第六次 ETL 执行结果报告
|
||||
|
||||
- execution_id: `d9443781-e4ac-4df6-9f87-11c45d72e5ba`
|
||||
- 执行时间: 2026-02-21 20:45:18 ~ 21:14:45(29 分 26 秒)
|
||||
- exit_code: 0
|
||||
- status: success
|
||||
- 总任务数: 31
|
||||
- 数据统计: 获取 171,961 / 新增 13,662 / 更新 171,595 / 跳过 0 / 错误 0
|
||||
|
||||
## 成功任务({len(success_only)} 个)
|
||||
|
||||
| # | 任务 |
|
||||
|---|------|
|
||||
"""
|
||||
for i, t in enumerate(success_only, 1):
|
||||
report += f"| {i} | {t} |\n"
|
||||
|
||||
if failed_tasks:
|
||||
report += f"""
|
||||
## 失败任务({len(failed_tasks)} 个)
|
||||
|
||||
| # | 任务 | 错误类型 |
|
||||
|---|------|----------|
|
||||
"""
|
||||
for i, (t, e) in enumerate(failed_tasks, 1):
|
||||
report += f"| {i} | {t} | {e} |\n"
|
||||
|
||||
if root_failures:
|
||||
report += f"""
|
||||
## 根因分析({len(root_failures)} 个非级联失败)
|
||||
|
||||
"""
|
||||
for t, e in root_failures:
|
||||
report += f"- `{t}`: {e}\n"
|
||||
|
||||
if cascade_failures:
|
||||
report += f"""
|
||||
## 级联失败({len(cascade_failures)} 个)
|
||||
|
||||
由根因失败导致 psycopg2 连接进入 InFailedSqlTransaction 状态,后续任务全部级联失败。
|
||||
"""
|
||||
|
||||
report += """
|
||||
## 与前次对比
|
||||
|
||||
| 轮次 | 成功 | 失败 | 耗时 | 修复的 BUG |
|
||||
|------|------|------|------|-----------|
|
||||
"""
|
||||
report += f"| v1 | 10 | 31 | 9m51s | — |\n"
|
||||
report += f"| v2 | — | — | 2m30s | BUG 1 |\n"
|
||||
report += f"| v3 | 9 | 22 | 11m21s | BUG 2+3 |\n"
|
||||
report += f"| v4 | 10 | 21 | 11m55s | BUG 4 |\n"
|
||||
report += f"| v5 | 10 | 21 | 11m37s | BUG 5 |\n"
|
||||
report += f"| v6 | {len(success_only)} | {len(failed_tasks)} | 29m26s | BUG 5+6+7 |\n"
|
||||
|
||||
out_path = get_output_path("SYSTEM_LOG_ROOT") / "2026-02-21__etl_run_result_v6.md"
|
||||
out_path.write_text(report, encoding="utf-8")
|
||||
print(f"\n报告已导出: {out_path}")
|
||||
123
scripts/ops/export_v8_report.py
Normal file
123
scripts/ops/export_v8_report.py
Normal file
@@ -0,0 +1,123 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""导出 v8 执行报告。"""
|
||||
from pathlib import Path
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(Path(__file__).resolve().parents[2] / ".env")
|
||||
from _env_paths import get_output_path
|
||||
|
||||
out = get_output_path("SYSTEM_LOG_ROOT")
|
||||
|
||||
report = """# ETL 第八次执行报告 (v8)
|
||||
|
||||
- execution_id: `f943bac6-23be-45c5-8b8c-a864e85a1916`
|
||||
- 时间: 2026-02-21 21:33:37 ~ 21:35:01 (1分24秒)
|
||||
- 整体状态: success, exit_code=0
|
||||
|
||||
## 本次修复验证
|
||||
|
||||
| BUG | 修复内容 | 验证结果 |
|
||||
|-----|---------|---------|
|
||||
| BUG 8 | `finance_base_task.py` + `finance_recharge_task.py`: pay_money→pay_amount, gift_money→point_amount | ✅ DWS_FINANCE_DAILY + DWS_FINANCE_RECHARGE 均完成 |
|
||||
| BUG 9 | `dwd_load_task.py`: 添加 `_pick_snapshot_order_column` 方法 | ✅ 所有 dim 表 SCD2 装载成功 |
|
||||
| BUG 10 | `dwd_load_task.py`: FACT_MAPPINGS 驼峰字段名→小写 | ✅ dwd_goods_stock_summary(716条) + dwd_goods_stock_movement(14306条) 装载成功 |
|
||||
| BUG 11 | `flow_runner.py`: sum() 类型安全处理 | ✅ 不再出现 TypeError |
|
||||
|
||||
## DWD_LOAD_FROM_ODS 详情
|
||||
|
||||
### 维度表 (SCD2) — 全部成功
|
||||
| 表 | processed | inserted | updated |
|
||||
|----|-----------|----------|---------|
|
||||
| dim_site | 1 | 0 | 1 |
|
||||
| dim_site_ex | 1 | 0 | 1 |
|
||||
| dim_table | 74 | 0 | 74 |
|
||||
| dim_table_ex | 74 | 0 | 74 |
|
||||
| dim_assistant | 69 | 0 | 69 |
|
||||
| dim_member | 557 | 0 | 557 |
|
||||
| dim_member_ex | 557 | 0 | 557 |
|
||||
| dim_member_card_account | 946 | 0 | 946 |
|
||||
| dim_tenant_goods | 174 | 1 | 173 |
|
||||
| dim_tenant_goods_ex | 174 | 1 | 173 |
|
||||
| dim_store_goods | 173 | 1 | 172 |
|
||||
| dim_store_goods_ex | 173 | 1 | 172 |
|
||||
| dim_goods_category | 26 | 0 | 26 |
|
||||
| dim_groupbuy_package | 34 | 0 | 34 |
|
||||
| dim_groupbuy_package_ex | 34 | 0 | 34 |
|
||||
|
||||
### 事实表 (INCREMENT) — 全部成功
|
||||
| 表 | processed | inserted | updated |
|
||||
|----|-----------|----------|---------|
|
||||
| dwd_settlement_head | 10366 | 0 | 10366 |
|
||||
| dwd_settlement_head_ex | 10366 | 0 | 10366 |
|
||||
| dwd_table_fee_log | 9103 | 0 | 9103 |
|
||||
| dwd_table_fee_log_ex | 9103 | 0 | 9103 |
|
||||
| dwd_table_fee_adjust | 1616 | 0 | 1616 |
|
||||
| dwd_table_fee_adjust_ex | 1616 | 0 | 1616 |
|
||||
| dwd_assistant_service_log | 2619 | 0 | 2619 |
|
||||
| dwd_assistant_service_log_ex | 2619 | 0 | 2619 |
|
||||
| dwd_assistant_trash_event | 78 | 0 | 78 |
|
||||
| dwd_assistant_trash_event_ex | 78 | 0 | 78 |
|
||||
| dwd_member_balance_change | 2185 | 0 | 2185 |
|
||||
| dwd_member_balance_change_ex | 2185 | 0 | 2185 |
|
||||
| dwd_groupbuy_redemption | 7267 | 0 | 7267 |
|
||||
| dwd_groupbuy_redemption_ex | 7267 | 0 | 7267 |
|
||||
| dwd_platform_coupon_redemption | 18311 | 0 | 18311 |
|
||||
| dwd_platform_coupon_redemption_ex | 18311 | 0 | 18311 |
|
||||
| dwd_recharge_order | 191 | 0 | 191 |
|
||||
| dwd_recharge_order_ex | 191 | 0 | 191 |
|
||||
| dwd_payment | 10625 | 0 | 10625 |
|
||||
| dwd_refund | 29 | 0 | 29 |
|
||||
| dwd_refund_ex | 29 | 0 | 29 |
|
||||
| dwd_goods_stock_summary | 716 | 716 | 0 |
|
||||
| dwd_goods_stock_movement | 14306 | 14306 | 0 |
|
||||
|
||||
### DWD 装载错误 (2个,数据质量问题,非代码 BUG)
|
||||
| 表 | 错误 |
|
||||
|----|------|
|
||||
| dim_assistant_ex | year -1 is out of range |
|
||||
| dim_member_card_account_ex | year -1 is out of range |
|
||||
|
||||
## DWS 任务状态
|
||||
|
||||
| 任务 | 状态 | 备注 |
|
||||
|------|------|------|
|
||||
| ODS_FETCH | ✅ 完成 | |
|
||||
| DWD_LOAD_FROM_ODS | ✅ 完成 | 39表成功,2表数据质量错误 |
|
||||
| DWS_ASSISTANT_DAILY | ✅ 完成 | |
|
||||
| DWS_ASSISTANT_MONTHLY | ✅ 完成 | 删除9行,插入9行 |
|
||||
| DWS_ASSISTANT_CUSTOMER | ✅ 完成 | 删除285行,插入285行 |
|
||||
| DWS_ASSISTANT_SALARY | ✅ 完成 | |
|
||||
| DWS_ASSISTANT_FINANCE | ✅ 完成 | |
|
||||
| DWS_MEMBER_CONSUMPTION | ✅ 完成 | 删除198行,插入198行 |
|
||||
| DWS_MEMBER_VISIT | ✅ 完成 | |
|
||||
| DWS_GOODS_STOCK_DAILY | ✅ 完成 | |
|
||||
| DWS_GOODS_STOCK_WEEKLY | ✅ 完成 | |
|
||||
| DWS_GOODS_STOCK_MONTHLY | ✅ 完成 | |
|
||||
| DWS_FINANCE_DAILY | ✅ 完成 | |
|
||||
| DWS_FINANCE_RECHARGE | ✅ 完成 | |
|
||||
| DWS_FINANCE_INCOME_STRUCTURE | ❌ 级联失败 | InFailedSqlTransaction |
|
||||
| DWS_FINANCE_DISCOUNT_DETAIL | ❌ 级联失败 | InFailedSqlTransaction |
|
||||
| DWS_WINBACK_INDEX | ❌ 级联失败 | InFailedSqlTransaction |
|
||||
| DWS_NEWCONV_INDEX | ❌ 级联失败 | InFailedSqlTransaction |
|
||||
| DWS_RELATION_INDEX | ❌ 级联失败 | InFailedSqlTransaction |
|
||||
|
||||
## 总结
|
||||
|
||||
- 14/19 任务成功完成
|
||||
- 5/19 任务因 InFailedSqlTransaction 级联失败
|
||||
- 级联失败根因: `dim_assistant_ex` 和 `dim_member_card_account_ex` 中存在非法日期值 (year=-1),导致事务进入失败状态
|
||||
- 这是数据质量问题,不是代码 BUG — 需要在 DWD 装载时对日期字段做容错处理
|
||||
|
||||
## 与 v6(上次最好成绩)对比
|
||||
|
||||
| 指标 | v6 | v8 |
|
||||
|------|----|----|
|
||||
| 耗时 | 29m26s | 1m24s |
|
||||
| 成功任务 | 11/19 | 14/19 |
|
||||
| 失败任务 | 8/19 | 5/19 |
|
||||
| DWD 装载 | 部分 dim 失败 | 39/41 表成功 |
|
||||
| 新增成功 | — | DWS_FINANCE_DAILY, DWS_FINANCE_RECHARGE, DWS_GOODS_STOCK_* |
|
||||
"""
|
||||
|
||||
(out / "2026-02-21__etl_run_result_v8.md").write_text(report, encoding="utf-8")
|
||||
print(f"报告已导出: {out / '2026-02-21__etl_run_result_v8.md'}")
|
||||
29
scripts/ops/extract_missing_files.py
Normal file
29
scripts/ops/extract_missing_files.py
Normal file
@@ -0,0 +1,29 @@
|
||||
"""
|
||||
从 docsdeployment.md 对话记录中提取缺失文件的关键信息。
|
||||
"""
|
||||
from pathlib import Path
|
||||
|
||||
recovery = Path(r"C:\Users\Administrator\Downloads\RECOVERY\docsdeployment.md")
|
||||
text = recovery.read_text(encoding="utf-8")
|
||||
|
||||
# 搜索关键文件名的上下文
|
||||
keywords = [
|
||||
"ENV-MANAGEMENT",
|
||||
"PRE-TEST-VERIFICATION",
|
||||
"MINIPROGRAM-RELEASE",
|
||||
"config.ts",
|
||||
]
|
||||
|
||||
lines = text.split("\n")
|
||||
for kw in keywords:
|
||||
print(f"\n{'='*40}")
|
||||
print(f"搜索: {kw}")
|
||||
print(f"{'='*40}")
|
||||
for i, line in enumerate(lines):
|
||||
if kw in line:
|
||||
start = max(0, i - 1)
|
||||
end = min(len(lines), i + 3)
|
||||
for j in range(start, end):
|
||||
marker = ">>>" if j == i else " "
|
||||
print(f" {marker} L{j+1}: {lines[j][:120]}")
|
||||
print()
|
||||
455
scripts/ops/field_audit.py
Normal file
455
scripts/ops/field_audit.py
Normal file
@@ -0,0 +1,455 @@
|
||||
"""
|
||||
字段排查脚本 — 数据流字段补全 Spec Task 1.1
|
||||
|
||||
对 12 张目标表执行排查流程:
|
||||
1. 查 DWD 现有列
|
||||
2. 查 ODS 现有列
|
||||
3. 解析 FACT_MAPPINGS 现状(从 dwd_load_task.py 源码导入)
|
||||
4. 判断自动映射(ODS 列名 == DWD 列名)
|
||||
5. 输出排查记录表(markdown),标注每个字段的排查结论和建议操作
|
||||
|
||||
用法:
|
||||
cd C:\\NeoZQYY
|
||||
python scripts/ops/field_audit.py
|
||||
python scripts/ops/field_audit.py --output path/to/output.md
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# ── 项目根目录 & 路径设置 ──
|
||||
ROOT = Path(__file__).resolve().parents[2]
|
||||
ETL_ROOT = ROOT / "apps" / "etl" / "connectors" / "feiqiu"
|
||||
sys.path.insert(0, str(ETL_ROOT))
|
||||
|
||||
# 导入 FACT_MAPPINGS / TABLE_MAP(仅读取类属性,不实例化)
|
||||
from tasks.dwd.dwd_load_task import DwdLoadTask
|
||||
|
||||
# ── SCD2 列集合(排查时忽略) ──
|
||||
SCD2_COLS = {"scd2_start_time", "scd2_end_time", "scd2_is_current", "scd2_version"}
|
||||
|
||||
# ── 需要排查的表及其疑似缺失字段 ──
|
||||
AUDIT_TARGETS: list[dict] = [
|
||||
{
|
||||
"ods_table": "assistant_accounts_master",
|
||||
"dwd_tables": ["dim_assistant", "dim_assistant_ex"],
|
||||
"suspect_ods_cols": ["system_role_id", "job_num", "cx_unit_price", "pd_unit_price"],
|
||||
"category": "A",
|
||||
"notes": "4 个 ODS→DWD 未映射",
|
||||
},
|
||||
{
|
||||
"ods_table": "assistant_service_records",
|
||||
"dwd_tables": ["dwd_assistant_service_log", "dwd_assistant_service_log_ex"],
|
||||
"suspect_ods_cols": ["site_assistant_id", "operator_id", "operator_name"],
|
||||
"category": "A",
|
||||
"notes": "3 个 ODS→DWD 未映射(site_assistant_id 可能已映射为 order_assistant_id)",
|
||||
},
|
||||
{
|
||||
"ods_table": "store_goods_sales_records",
|
||||
"dwd_tables": ["dwd_store_goods_sale", "dwd_store_goods_sale_ex"],
|
||||
"suspect_ods_cols": ["discount_price"],
|
||||
"category": "A",
|
||||
"notes": "1 个 ODS→DWD 未映射(可能已映射为 discount_money)",
|
||||
},
|
||||
{
|
||||
"ods_table": "member_balance_changes",
|
||||
"dwd_tables": ["dwd_member_balance_change", "dwd_member_balance_change_ex"],
|
||||
"suspect_ods_cols": ["relate_id"],
|
||||
"category": "A",
|
||||
"notes": "1 个 ODS→DWD 未映射",
|
||||
},
|
||||
{
|
||||
"ods_table": "tenant_goods_master",
|
||||
"dwd_tables": ["dim_tenant_goods", "dim_tenant_goods_ex"],
|
||||
"suspect_ods_cols": ["commoditycode"],
|
||||
"category": "A",
|
||||
"notes": "1 个 ODS→DWD 未映射(可能已映射为 commodity_code_list)",
|
||||
},
|
||||
{
|
||||
"ods_table": "site_tables_master",
|
||||
"dwd_tables": ["dim_table", "dim_table_ex"],
|
||||
"suspect_ods_cols": [
|
||||
"sitename", "appletqrcodeurl", "audit_status", "charge_free",
|
||||
"create_time", "delay_lights_time", "is_rest_area", "light_status",
|
||||
"only_allow_groupon", "order_delay_time", "self_table",
|
||||
"tablestatusname", "temporary_light_second", "virtual_table",
|
||||
],
|
||||
"category": "A",
|
||||
"notes": "14 个 ODS→DWD 未映射",
|
||||
},
|
||||
{
|
||||
"ods_table": "recharge_settlements",
|
||||
"dwd_tables": ["dwd_recharge_order", "dwd_recharge_order_ex"],
|
||||
"suspect_ods_cols": [
|
||||
"electricityadjustmoney", "electricitymoney",
|
||||
"mervousalesamount", "plcouponsaleamount", "realelectricitymoney",
|
||||
],
|
||||
"category": "B",
|
||||
"notes": "5 个 ODS→DWD 未映射 + 5 个 DWD 无 ODS 源(驼峰/蛇形命名差异)",
|
||||
},
|
||||
{
|
||||
"ods_table": "store_goods_master",
|
||||
"dwd_tables": ["dim_store_goods", "dim_store_goods_ex"],
|
||||
"suspect_ods_cols": [
|
||||
"time_slot_sale", "batch_stock_quantity", "provisional_total_cost",
|
||||
],
|
||||
"category": "B",
|
||||
"notes": "平层 + 嵌套展开 + ODS→DWD 补全",
|
||||
},
|
||||
{
|
||||
"ods_table": "goods_stock_summary",
|
||||
"dwd_tables": [], # 无 DWD 表,需新建
|
||||
"suspect_ods_cols": [
|
||||
"sitegoodsid", "goodsname", "goodsunit", "goodscategoryid",
|
||||
"goodscategorysecondid", "categoryname", "rangestartstock",
|
||||
"rangeendstock", "rangein", "rangeout", "rangesale",
|
||||
"rangesalemoney", "rangeinventory", "currentstock",
|
||||
],
|
||||
"category": "C",
|
||||
"notes": "14 个 ODS 字段,无 DWD 目标表,需新建",
|
||||
},
|
||||
{
|
||||
"ods_table": "goods_stock_movements",
|
||||
"dwd_tables": [], # 无 DWD 表,需新建
|
||||
"suspect_ods_cols": [
|
||||
# ODS 实际列名为驼峰式(无下划线)
|
||||
"sitegoodsstockid", "tenantid", "siteid", "sitegoodsid",
|
||||
"goodsname", "goodscategoryid", "goodssecondcategoryid",
|
||||
"unit", "price", "stocktype", "changenum", "startnum",
|
||||
"endnum", "changenuma", "startnuma", "endnuma",
|
||||
"remark", "operatorname", "createtime",
|
||||
],
|
||||
"category": "C",
|
||||
"notes": "19 个 ODS 字段,无 DWD 目标表,需新建",
|
||||
},
|
||||
]
|
||||
|
||||
# ── recharge_settlements 已知的 DWD 无 ODS 源字段(用于交叉比对) ──
|
||||
RECHARGE_DWD_ORPHANS = [
|
||||
"pl_coupon_sale_amount", "mervou_sales_amount",
|
||||
"electricity_money", "real_electricity_money", "electricity_adjust_money",
|
||||
]
|
||||
|
||||
|
||||
def get_db_columns(cur, schema: str, table: str) -> list[str]:
|
||||
"""查询数据库表的列名列表(小写)。"""
|
||||
cur.execute(
|
||||
"SELECT column_name FROM information_schema.columns "
|
||||
"WHERE table_schema = %s AND table_name = %s ORDER BY ordinal_position",
|
||||
(schema, table),
|
||||
)
|
||||
return [r["column_name"].lower() for r in cur.fetchall()]
|
||||
|
||||
|
||||
def get_sample_values(conn, schema: str, table: str, column: str, limit: int = 5) -> list:
|
||||
"""获取指定列的非空采样值(最多 limit 个)。失败时回滚并返回空列表。"""
|
||||
cur = conn.cursor(cursor_factory=RealDictCursor)
|
||||
try:
|
||||
cur.execute(
|
||||
f'SELECT DISTINCT "{column}" FROM "{schema}"."{table}" '
|
||||
f'WHERE "{column}" IS NOT NULL LIMIT %s',
|
||||
(limit,),
|
||||
)
|
||||
return [r[column] for r in cur.fetchall()]
|
||||
except Exception:
|
||||
conn.rollback()
|
||||
return []
|
||||
finally:
|
||||
cur.close()
|
||||
|
||||
|
||||
def parse_fact_mappings() -> dict[str, dict[str, str]]:
|
||||
"""
|
||||
解析 FACT_MAPPINGS,返回 {dwd_full_table: {dwd_col: ods_expr}} 的映射。
|
||||
同时构建反向索引 {dwd_full_table: {ods_expr_lower: dwd_col}}。
|
||||
"""
|
||||
forward: dict[str, dict[str, str]] = {}
|
||||
reverse: dict[str, dict[str, str]] = {}
|
||||
for dwd_table, entries in DwdLoadTask.FACT_MAPPINGS.items():
|
||||
fwd = {}
|
||||
rev = {}
|
||||
for dwd_col, ods_expr, _cast in entries:
|
||||
fwd[dwd_col.lower()] = ods_expr
|
||||
# 反向索引:ods 表达式 → dwd 列名
|
||||
# 处理简单列名和 JSON 表达式
|
||||
ods_key = ods_expr.lower().strip('"')
|
||||
rev[ods_key] = dwd_col.lower()
|
||||
forward[dwd_table] = fwd
|
||||
reverse[dwd_table] = rev
|
||||
return forward, reverse
|
||||
|
||||
|
||||
def audit_one_table(
|
||||
conn,
|
||||
target: dict,
|
||||
fm_forward: dict,
|
||||
fm_reverse: dict,
|
||||
) -> list[dict]:
|
||||
"""
|
||||
对单张表执行排查,返回排查记录列表。
|
||||
每条记录: {ods_col, dwd_table, dwd_col_match, fm_status, conclusion, action, samples}
|
||||
"""
|
||||
cur = conn.cursor(cursor_factory=RealDictCursor)
|
||||
ods_table = target["ods_table"]
|
||||
dwd_tables = target["dwd_tables"]
|
||||
suspect_cols = target["suspect_ods_cols"]
|
||||
|
||||
# 查 ODS 现有列
|
||||
ods_cols = set(get_db_columns(cur, "ods", ods_table))
|
||||
|
||||
# 查各 DWD 表现有列
|
||||
dwd_cols_map: dict[str, set[str]] = {}
|
||||
for dt in dwd_tables:
|
||||
dwd_cols_map[dt] = set(get_db_columns(cur, "dwd", dt))
|
||||
|
||||
records = []
|
||||
for ods_col in suspect_cols:
|
||||
ods_col_lower = ods_col.lower()
|
||||
record = {
|
||||
"ods_col": ods_col_lower,
|
||||
"ods_exists": ods_col_lower in ods_cols,
|
||||
"dwd_matches": [],
|
||||
"fm_status": "未配置",
|
||||
"conclusion": "",
|
||||
"action": "",
|
||||
"samples": [],
|
||||
}
|
||||
|
||||
# 采样值
|
||||
if record["ods_exists"]:
|
||||
record["samples"] = get_sample_values(conn, "ods", ods_table, ods_col_lower)
|
||||
|
||||
# 遍历所有关联 DWD 表检查
|
||||
for dt in dwd_tables:
|
||||
dwd_full = f"dwd.{dt}"
|
||||
dwd_cols = dwd_cols_map.get(dt, set())
|
||||
fm_fwd = fm_forward.get(dwd_full, {})
|
||||
fm_rev = fm_reverse.get(dwd_full, {})
|
||||
|
||||
# 检查 1: FACT_MAPPINGS 反向索引 — ODS 列是否已被映射
|
||||
if ods_col_lower in fm_rev:
|
||||
mapped_to = fm_rev[ods_col_lower]
|
||||
record["dwd_matches"].append(f"{dt}.{mapped_to}")
|
||||
record["fm_status"] = f"已映射 → {dt}.{mapped_to}"
|
||||
record["conclusion"] = "已映射(FACT_MAPPINGS 显式配置)"
|
||||
record["action"] = "无需变更"
|
||||
break
|
||||
|
||||
# 检查 2: DWD 表中是否有同名列(自动映射)
|
||||
if ods_col_lower in dwd_cols:
|
||||
record["dwd_matches"].append(f"{dt}.{ods_col_lower}")
|
||||
record["fm_status"] = "自动映射(同名列)"
|
||||
record["conclusion"] = "已映射(自动匹配)"
|
||||
record["action"] = "无需变更"
|
||||
break
|
||||
|
||||
# 检查 3: DWD 表中是否有近似列名(蛇形/驼峰转换)
|
||||
snake = _camel_to_snake(ods_col_lower)
|
||||
if snake != ods_col_lower and snake in dwd_cols:
|
||||
record["dwd_matches"].append(f"{dt}.{snake}")
|
||||
# 还需检查 FACT_MAPPINGS 是否已配置此映射
|
||||
if snake in fm_fwd:
|
||||
record["fm_status"] = f"已映射 → {dt}.{snake}(命名转换)"
|
||||
record["conclusion"] = "已映射(命名差异,FACT_MAPPINGS 已覆盖)"
|
||||
record["action"] = "无需变更"
|
||||
else:
|
||||
record["fm_status"] = f"DWD 列存在 {dt}.{snake},但 FACT_MAPPINGS 未配置"
|
||||
record["conclusion"] = "映射遗漏(DWD 列已存在,缺 FACT_MAPPINGS)"
|
||||
record["action"] = "仅补充 FACT_MAPPINGS"
|
||||
break
|
||||
else:
|
||||
# 所有 DWD 表都没找到匹配
|
||||
if not record["ods_exists"]:
|
||||
record["conclusion"] = "ODS 列不存在"
|
||||
record["action"] = "需确认 API 是否返回该字段"
|
||||
elif not dwd_tables:
|
||||
record["conclusion"] = "无 DWD 目标表"
|
||||
record["action"] = "需新建 DWD 表"
|
||||
else:
|
||||
record["conclusion"] = "确实缺失"
|
||||
record["action"] = "需新增 DWD 列 + FACT_MAPPINGS"
|
||||
|
||||
records.append(record)
|
||||
|
||||
# 额外排查:recharge_settlements 的 DWD 无 ODS 源字段
|
||||
if ods_table == "recharge_settlements":
|
||||
for dwd_orphan in RECHARGE_DWD_ORPHANS:
|
||||
orphan_record = {
|
||||
"ods_col": f"(DWD orphan) {dwd_orphan}",
|
||||
"ods_exists": False,
|
||||
"dwd_matches": [],
|
||||
"fm_status": "",
|
||||
"conclusion": "",
|
||||
"action": "",
|
||||
"samples": [],
|
||||
}
|
||||
# 检查是否已在 FACT_MAPPINGS 中被映射
|
||||
for dt in dwd_tables:
|
||||
dwd_full = f"dwd.{dt}"
|
||||
fm_fwd = fm_forward.get(dwd_full, {})
|
||||
if dwd_orphan in fm_fwd:
|
||||
src = fm_fwd[dwd_orphan]
|
||||
orphan_record["fm_status"] = f"已映射 ← {src}"
|
||||
orphan_record["conclusion"] = "已映射(FACT_MAPPINGS 已覆盖)"
|
||||
orphan_record["action"] = "无需变更"
|
||||
orphan_record["dwd_matches"].append(f"{dt}.{dwd_orphan}")
|
||||
break
|
||||
else:
|
||||
orphan_record["conclusion"] = "DWD 列存在但无 ODS 映射"
|
||||
orphan_record["action"] = "需补充 FACT_MAPPINGS"
|
||||
records.append(orphan_record)
|
||||
|
||||
return records
|
||||
|
||||
|
||||
def _camel_to_snake(name: str) -> str:
|
||||
"""简易驼峰转蛇形:在大写字母前插入下划线。"""
|
||||
import re
|
||||
s1 = re.sub(r"([A-Z])", r"_\1", name)
|
||||
return s1.lower().lstrip("_")
|
||||
|
||||
|
||||
|
||||
def generate_report(all_results: dict[str, list[dict]]) -> str:
|
||||
"""生成 Markdown 排查报告。"""
|
||||
lines: list[str] = []
|
||||
now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
lines.append(f"# 字段排查报告\n")
|
||||
lines.append(f"> 生成时间:{now_str}\n")
|
||||
lines.append(f"> 排查范围:{len(all_results)} 张表\n")
|
||||
|
||||
# 汇总统计
|
||||
total_fields = 0
|
||||
already_mapped = 0
|
||||
need_fm_only = 0
|
||||
need_new_col = 0
|
||||
need_new_table = 0
|
||||
ods_missing = 0
|
||||
|
||||
for table, records in all_results.items():
|
||||
for r in records:
|
||||
total_fields += 1
|
||||
action = r["action"]
|
||||
if "无需变更" in action:
|
||||
already_mapped += 1
|
||||
elif "仅补充" in action:
|
||||
need_fm_only += 1
|
||||
elif "新增 DWD 列" in action:
|
||||
need_new_col += 1
|
||||
elif "新建 DWD 表" in action:
|
||||
need_new_table += 1
|
||||
elif "需确认" in action:
|
||||
ods_missing += 1
|
||||
|
||||
lines.append("\n## 汇总\n")
|
||||
lines.append(f"| 指标 | 数量 |")
|
||||
lines.append(f"|------|------|")
|
||||
lines.append(f"| 排查字段总数 | {total_fields} |")
|
||||
lines.append(f"| 已映射(无需变更) | {already_mapped} |")
|
||||
lines.append(f"| 映射遗漏(仅补 FACT_MAPPINGS) | {need_fm_only} |")
|
||||
lines.append(f"| 确实缺失(需新增 DWD 列) | {need_new_col} |")
|
||||
lines.append(f"| 无 DWD 表(需新建) | {need_new_table} |")
|
||||
lines.append(f"| ODS 列不存在(需确认 API) | {ods_missing} |")
|
||||
|
||||
# 逐表详情
|
||||
for target_info, records in all_results.items():
|
||||
ods_table, category, notes = target_info
|
||||
lines.append(f"\n---\n")
|
||||
lines.append(f"## {ods_table}({category} 类)\n")
|
||||
lines.append(f"> {notes}\n")
|
||||
lines.append(f"| # | ODS 列 | ODS 存在 | DWD 匹配 | FACT_MAPPINGS 状态 | 排查结论 | 建议操作 | 采样值 |")
|
||||
lines.append(f"|---|--------|---------|---------|-------------------|---------|---------|--------|")
|
||||
for i, r in enumerate(records, 1):
|
||||
ods_exists = "✅" if r["ods_exists"] else "❌"
|
||||
dwd_match = ", ".join(r["dwd_matches"]) if r["dwd_matches"] else "—"
|
||||
samples_str = ", ".join(str(s)[:30] for s in r["samples"][:3]) if r["samples"] else "—"
|
||||
lines.append(
|
||||
f"| {i} | `{r['ods_col']}` | {ods_exists} | {dwd_match} "
|
||||
f"| {r['fm_status']} | {r['conclusion']} | **{r['action']}** | {samples_str} |"
|
||||
)
|
||||
|
||||
# TABLE_MAP 覆盖检查
|
||||
lines.append(f"\n---\n")
|
||||
lines.append(f"## TABLE_MAP 注册状态\n")
|
||||
lines.append(f"| DWD 表 | ODS 源表 | 已注册 |")
|
||||
lines.append(f"|--------|---------|--------|")
|
||||
for target in AUDIT_TARGETS:
|
||||
for dt in target["dwd_tables"]:
|
||||
dwd_full = f"dwd.{dt}"
|
||||
ods_full = f"ods.{target['ods_table']}"
|
||||
registered = dwd_full in DwdLoadTask.TABLE_MAP
|
||||
reg_str = "✅" if registered else "❌ 未注册"
|
||||
if registered:
|
||||
actual_ods = DwdLoadTask.TABLE_MAP[dwd_full]
|
||||
if actual_ods != ods_full:
|
||||
reg_str = f"⚠️ 映射到 {actual_ods}"
|
||||
lines.append(f"| `{dwd_full}` | `{ods_full}` | {reg_str} |")
|
||||
# C 类无 DWD 表的
|
||||
for target in AUDIT_TARGETS:
|
||||
if not target["dwd_tables"]:
|
||||
lines.append(f"| (待新建) | `ods.{target['ods_table']}` | ❌ 无 DWD 表 |")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="字段排查脚本")
|
||||
parser.add_argument(
|
||||
"--output", type=str, default=None,
|
||||
help="输出文件路径(默认 $FIELD_AUDIT_ROOT/field_audit_report.md)",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
# 加载环境变量
|
||||
load_dotenv(ROOT / ".env")
|
||||
load_dotenv(ROOT / ".env.local", override=True)
|
||||
|
||||
dsn = os.environ.get("PG_DSN")
|
||||
if not dsn:
|
||||
print("错误:未配置 PG_DSN 环境变量", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
print(f"连接数据库...")
|
||||
conn = psycopg2.connect(dsn)
|
||||
conn.autocommit = True
|
||||
|
||||
print(f"解析 FACT_MAPPINGS...")
|
||||
fm_forward, fm_reverse = parse_fact_mappings()
|
||||
|
||||
# 执行排查
|
||||
# key = (ods_table, category, notes) 用于报告分组
|
||||
all_results: dict[tuple, list[dict]] = {}
|
||||
for target in AUDIT_TARGETS:
|
||||
key = (target["ods_table"], target["category"], target["notes"])
|
||||
print(f"排查 {target['ods_table']}({target['category']} 类)...")
|
||||
records = audit_one_table(conn, target, fm_forward, fm_reverse)
|
||||
all_results[key] = records
|
||||
# 打印简要结果
|
||||
for r in records:
|
||||
icon = "✅" if "无需变更" in r["action"] else "⚠️"
|
||||
print(f" {icon} {r['ods_col']}: {r['conclusion']} → {r['action']}")
|
||||
|
||||
conn.close()
|
||||
|
||||
# 生成报告
|
||||
report = generate_report(all_results)
|
||||
# 从 .env 读取 FIELD_AUDIT_ROOT
|
||||
from _env_paths import get_output_path
|
||||
default_dir = get_output_path("FIELD_AUDIT_ROOT")
|
||||
output_path = Path(args.output) if args.output else default_dir / "field_audit_report.md"
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
output_path.write_text(report, encoding="utf-8")
|
||||
print(f"\n排查报告已生成:{output_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
345
scripts/ops/field_level_report.py
Normal file
345
scripts/ops/field_level_report.py
Normal file
@@ -0,0 +1,345 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
字段级数据质量采样分析报告(v2 - 性能优化版)
|
||||
|
||||
策略:每张表只执行 1~2 条 SQL(而非逐字段查询),大幅减少网络往返。
|
||||
- 用 information_schema 获取列元数据
|
||||
- 用动态 SQL 一次性获取所有列的 NULL 计数
|
||||
- 数值/日期/文本统计用单条聚合 SQL
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
import psycopg2
|
||||
import psycopg2.extras
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(Path(__file__).resolve().parents[2] / ".env")
|
||||
|
||||
ETL_REPORT_ROOT = os.environ.get("ETL_REPORT_ROOT")
|
||||
if not ETL_REPORT_ROOT:
|
||||
raise RuntimeError("ETL_REPORT_ROOT 未在 .env 中定义")
|
||||
|
||||
PG_DSN = os.environ.get("PG_DSN")
|
||||
if not PG_DSN:
|
||||
raise RuntimeError("PG_DSN 未在 .env 中定义")
|
||||
|
||||
TARGET_SCHEMAS = ["ods", "dwd", "dws"]
|
||||
# 跳过这些列的详细统计(ETL 元数据列,不影响业务判断)
|
||||
SKIP_STATS_COLS = {"payload", "content_hash", "record_index", "source_file", "source_endpoint"}
|
||||
|
||||
|
||||
def get_conn():
|
||||
conn = psycopg2.connect(PG_DSN, cursor_factory=psycopg2.extras.RealDictCursor)
|
||||
conn.set_session(readonly=True)
|
||||
return conn
|
||||
|
||||
|
||||
def list_tables(conn, schema: str) -> list[str]:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""
|
||||
SELECT table_name FROM information_schema.tables
|
||||
WHERE table_schema = %s AND table_type = 'BASE TABLE'
|
||||
ORDER BY table_name
|
||||
""", (schema,))
|
||||
return [r["table_name"] for r in cur.fetchall()]
|
||||
|
||||
|
||||
def get_columns_meta(conn, schema: str, table: str) -> list[dict]:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""
|
||||
SELECT column_name, udt_name, is_nullable,
|
||||
character_maximum_length, numeric_precision, numeric_scale
|
||||
FROM information_schema.columns
|
||||
WHERE table_schema = %s AND table_name = %s
|
||||
ORDER BY ordinal_position
|
||||
""", (schema, table))
|
||||
return [dict(r) for r in cur.fetchall()]
|
||||
|
||||
|
||||
def analyze_table_batch(conn, schema: str, table: str, columns: list[dict]) -> dict:
|
||||
"""用尽量少的 SQL 批量分析一张表的所有字段。
|
||||
|
||||
核心思路:构造一条 SELECT,每个列生成若干聚合表达式,一次性拿到所有统计。
|
||||
"""
|
||||
with conn.cursor() as cur:
|
||||
# 1) 行数
|
||||
try:
|
||||
cur.execute(f"SELECT COUNT(*) AS cnt FROM {schema}.{table}")
|
||||
total = cur.fetchone()["cnt"]
|
||||
except Exception:
|
||||
conn.rollback()
|
||||
return {"table": f"{schema}.{table}", "total_rows": -1, "columns": [], "error": "无法读取"}
|
||||
|
||||
if total == 0:
|
||||
return {
|
||||
"table": f"{schema}.{table}",
|
||||
"total_rows": 0,
|
||||
"column_count": len(columns),
|
||||
"columns": [
|
||||
{"column": c["column_name"], "type": c["udt_name"], "total": 0,
|
||||
"null_count": 0, "null_pct": "0%", "distinct": 0, "notes": "空表"}
|
||||
for c in columns
|
||||
],
|
||||
}
|
||||
|
||||
# 2) 构造批量聚合 SQL
|
||||
# 对每个列生成: COUNT(*) FILTER (WHERE "col" IS NULL) AS null_col
|
||||
# 对数值列: MIN/MAX/AVG
|
||||
# 对日期列: MIN/MAX
|
||||
# 对文本列: MIN(LENGTH)/MAX(LENGTH)
|
||||
# 对 bool 列: COUNT FILTER TRUE/FALSE
|
||||
select_parts = [f"{total} AS _total"]
|
||||
col_plan = [] # 记录每列的统计计划
|
||||
|
||||
for c in columns:
|
||||
cname = c["column_name"]
|
||||
udt = c["udt_name"]
|
||||
safe = f'"{cname}"'
|
||||
alias_base = cname.replace(" ", "_").replace("-", "_")
|
||||
|
||||
plan = {"column": cname, "type": udt, "stats": []}
|
||||
|
||||
# NULL 计数(所有列都做)
|
||||
select_parts.append(f"COUNT(*) FILTER (WHERE {safe} IS NULL) AS null_{alias_base}")
|
||||
plan["stats"].append("null")
|
||||
|
||||
# 跳过 JSONB/bytea/ETL 元数据列的详细统计
|
||||
if udt in ("jsonb", "json", "bytea") or cname in SKIP_STATS_COLS:
|
||||
col_plan.append(plan)
|
||||
continue
|
||||
|
||||
if udt in ("int2", "int4", "int8", "float4", "float8", "numeric"):
|
||||
select_parts.append(f"MIN({safe}) AS min_{alias_base}")
|
||||
select_parts.append(f"MAX({safe}) AS max_{alias_base}")
|
||||
select_parts.append(f"ROUND(AVG({safe})::numeric, 2) AS avg_{alias_base}")
|
||||
plan["stats"].extend(["min", "max", "avg"])
|
||||
elif udt in ("date", "timestamp", "timestamptz"):
|
||||
# 用 text 比较避免 psycopg2 解析 year<1 的异常日期
|
||||
select_parts.append(f"MIN({safe}::text) FILTER (WHERE {safe}::text >= '0001') AS min_{alias_base}")
|
||||
select_parts.append(f"MAX({safe}::text) FILTER (WHERE {safe}::text <= '9999') AS max_{alias_base}")
|
||||
plan["stats"].extend(["earliest", "latest"])
|
||||
elif udt in ("text", "varchar", "bpchar", "name"):
|
||||
select_parts.append(f"MIN(LENGTH({safe})) AS minlen_{alias_base}")
|
||||
select_parts.append(f"MAX(LENGTH({safe})) AS maxlen_{alias_base}")
|
||||
plan["stats"].extend(["min_len", "max_len"])
|
||||
elif udt == "bool":
|
||||
select_parts.append(f"COUNT(*) FILTER (WHERE {safe} = TRUE) AS true_{alias_base}")
|
||||
select_parts.append(f"COUNT(*) FILTER (WHERE {safe} = FALSE) AS false_{alias_base}")
|
||||
plan["stats"].extend(["true_count", "false_count"])
|
||||
|
||||
col_plan.append(plan)
|
||||
|
||||
# 执行批量聚合
|
||||
sql = f"SELECT {', '.join(select_parts)} FROM {schema}.{table}"
|
||||
try:
|
||||
cur.execute(sql)
|
||||
agg = cur.fetchone()
|
||||
except Exception as e:
|
||||
conn.rollback()
|
||||
return {
|
||||
"table": f"{schema}.{table}",
|
||||
"total_rows": total,
|
||||
"column_count": len(columns),
|
||||
"columns": [],
|
||||
"error": f"聚合查询失败: {str(e)[:120]}",
|
||||
}
|
||||
|
||||
# 3) 解析结果
|
||||
results = []
|
||||
for plan in col_plan:
|
||||
cname = plan["column"]
|
||||
udt = plan["type"]
|
||||
alias_base = cname.replace(" ", "_").replace("-", "_")
|
||||
|
||||
null_cnt = agg.get(f"null_{alias_base}", 0) or 0
|
||||
null_pct = round(null_cnt / total * 100, 1) if total > 0 else 0
|
||||
|
||||
r = {
|
||||
"column": cname,
|
||||
"type": udt,
|
||||
"total": total,
|
||||
"null_count": null_cnt,
|
||||
"null_pct": f"{null_pct}%",
|
||||
}
|
||||
|
||||
if udt in ("jsonb", "json", "bytea"):
|
||||
r["samples"] = [f"({udt.upper()})"]
|
||||
results.append(r)
|
||||
continue
|
||||
if cname in SKIP_STATS_COLS:
|
||||
r["samples"] = ["(ETL元数据)"]
|
||||
results.append(r)
|
||||
continue
|
||||
|
||||
if "min" in plan["stats"]:
|
||||
r["min"] = agg.get(f"min_{alias_base}")
|
||||
r["max"] = agg.get(f"max_{alias_base}")
|
||||
r["avg"] = agg.get(f"avg_{alias_base}")
|
||||
if "earliest" in plan["stats"]:
|
||||
v = agg.get(f"min_{alias_base}")
|
||||
r["earliest"] = str(v) if v else None
|
||||
v = agg.get(f"max_{alias_base}")
|
||||
r["latest"] = str(v) if v else None
|
||||
if "min_len" in plan["stats"]:
|
||||
r["min_len"] = agg.get(f"minlen_{alias_base}")
|
||||
r["max_len"] = agg.get(f"maxlen_{alias_base}")
|
||||
if "true_count" in plan["stats"]:
|
||||
r["true_count"] = agg.get(f"true_{alias_base}")
|
||||
r["false_count"] = agg.get(f"false_{alias_base}")
|
||||
|
||||
results.append(r)
|
||||
|
||||
# 4) 对非大表补充 distinct 计数(小表逐列,大表跳过)
|
||||
if total <= 3000:
|
||||
for r in results:
|
||||
cname = r["column"]
|
||||
udt = r["type"]
|
||||
if udt in ("jsonb", "json", "bytea") or cname in SKIP_STATS_COLS:
|
||||
r["distinct"] = "-"
|
||||
continue
|
||||
try:
|
||||
cur.execute(f'SELECT COUNT(DISTINCT "{cname}") AS d FROM {schema}.{table}')
|
||||
r["distinct"] = cur.fetchone()["d"]
|
||||
except Exception:
|
||||
conn.rollback()
|
||||
r["distinct"] = "?"
|
||||
else:
|
||||
for r in results:
|
||||
r["distinct"] = "-"
|
||||
|
||||
return {
|
||||
"table": f"{schema}.{table}",
|
||||
"total_rows": total,
|
||||
"column_count": len(columns),
|
||||
"columns": results,
|
||||
}
|
||||
|
||||
|
||||
# ── 报告格式化 ────────────────────────────────────────────────
|
||||
|
||||
def fmt_col_row(c: dict) -> str:
|
||||
"""格式化单个字段为 Markdown 表格行"""
|
||||
col = c.get("column", "?")
|
||||
typ = c.get("type", "?")
|
||||
null_pct = c.get("null_pct", "?")
|
||||
distinct = c.get("distinct", "-")
|
||||
|
||||
stats_parts = []
|
||||
if "min" in c and c["min"] is not None:
|
||||
stats_parts.append(f"min={c['min']}, max={c['max']}, avg={c['avg']}")
|
||||
if "earliest" in c and c["earliest"] is not None:
|
||||
stats_parts.append(f"{c['earliest']} ~ {c['latest']}")
|
||||
if "min_len" in c and c["min_len"] is not None:
|
||||
stats_parts.append(f"len={c['min_len']}~{c['max_len']}")
|
||||
if "true_count" in c:
|
||||
stats_parts.append(f"T={c['true_count']}, F={c['false_count']}")
|
||||
stats = "; ".join(stats_parts) if stats_parts else "-"
|
||||
|
||||
samples = c.get("samples", [])
|
||||
sample_str = ", ".join(str(s)[:40] for s in samples[:3]) if samples else "-"
|
||||
|
||||
return f"| {col} | {typ} | {null_pct} | {distinct} | {stats} | {sample_str} |"
|
||||
|
||||
|
||||
def generate_report(all_results: dict[str, list[dict]]) -> str:
|
||||
ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
lines = [
|
||||
f"# 字段级数据质量采样报告",
|
||||
f"",
|
||||
f"生成时间: {ts}",
|
||||
f"",
|
||||
]
|
||||
|
||||
for schema in TARGET_SCHEMAS:
|
||||
tables = all_results.get(schema, [])
|
||||
if not tables:
|
||||
continue
|
||||
|
||||
total_rows_sum = sum(t["total_rows"] for t in tables if t["total_rows"] > 0)
|
||||
lines.append(f"## {schema.upper()} 层({len(tables)} 张表,共 {total_rows_sum:,} 行)")
|
||||
lines.append("")
|
||||
|
||||
for tbl in tables:
|
||||
tname = tbl["table"]
|
||||
total = tbl["total_rows"]
|
||||
col_count = tbl.get("column_count", 0)
|
||||
|
||||
lines.append(f"### {tname}({total:,} 行,{col_count} 列)")
|
||||
lines.append("")
|
||||
|
||||
if tbl.get("error"):
|
||||
lines.append(f"> ❌ {tbl['error']}")
|
||||
lines.append("")
|
||||
continue
|
||||
|
||||
if not tbl["columns"]:
|
||||
lines.append("> 无列信息")
|
||||
lines.append("")
|
||||
continue
|
||||
|
||||
lines.append("| 字段 | 类型 | NULL率 | 唯一值 | 统计 | 样本 |")
|
||||
lines.append("|------|------|--------|--------|------|------|")
|
||||
|
||||
for col in tbl["columns"]:
|
||||
lines.append(fmt_col_row(col))
|
||||
|
||||
lines.append("")
|
||||
|
||||
total_tables = sum(len(v) for v in all_results.values())
|
||||
total_cols = sum(
|
||||
tbl.get("column_count", 0)
|
||||
for tables in all_results.values()
|
||||
for tbl in tables
|
||||
)
|
||||
lines.append("## 汇总")
|
||||
lines.append("")
|
||||
lines.append(f"- 分析表数: {total_tables}")
|
||||
lines.append(f"- 分析字段数: {total_cols}")
|
||||
lines.append("")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def main():
|
||||
print("=== 字段级数据质量采样分析 (v2) ===")
|
||||
conn = get_conn()
|
||||
|
||||
all_results: dict[str, list[dict]] = {}
|
||||
|
||||
for schema in TARGET_SCHEMAS:
|
||||
print(f"\n分析 {schema} 层...")
|
||||
tables = list_tables(conn, schema)
|
||||
print(f" {len(tables)} 张表")
|
||||
schema_results = []
|
||||
for i, t in enumerate(tables, 1):
|
||||
cols = get_columns_meta(conn, schema, t)
|
||||
print(f" [{i}/{len(tables)}] {schema}.{t} ({len(cols)} 列)...", end="", flush=True)
|
||||
result = analyze_table_batch(conn, schema, t, cols)
|
||||
schema_results.append(result)
|
||||
print(f" {result['total_rows']:,} 行", end="")
|
||||
if result.get("error"):
|
||||
print(f" ❌ {result['error'][:60]}")
|
||||
else:
|
||||
print(" ✓")
|
||||
all_results[schema] = schema_results
|
||||
|
||||
conn.close()
|
||||
|
||||
print("\n生成报告...")
|
||||
report = generate_report(all_results)
|
||||
|
||||
out_dir = Path(ETL_REPORT_ROOT)
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
out_path = out_dir / f"field_level_report_{ts}.md"
|
||||
out_path.write_text(report, encoding="utf-8")
|
||||
print(f"报告已生成: {out_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
212
scripts/ops/find_complex_orders.py
Normal file
212
scripts/ops/find_complex_orders.py
Normal file
@@ -0,0 +1,212 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
扫描 EXPORT_ROOT 下所有 ODS JSON 文件,按 order_trade_no 聚合,
|
||||
计算每个总订单的复杂度并输出 Top 10。
|
||||
|
||||
复杂度维度:
|
||||
- 子台桌使用记录数(table_fee_transactions)
|
||||
- 台费折扣记录数(table_fee_discount_records)
|
||||
- 助教服务记录数(assistant_service_records)
|
||||
- 商品销售记录数(store_goods_sales_records)
|
||||
- 团购核销记录数(group_buy_redemption_records)
|
||||
- 支付记录数(payment_transactions,通过 relate_id 关联)
|
||||
- 退款记录数(refund_transactions,通过 relate_id 关联)
|
||||
|
||||
总复杂度 = 各维度记录数之和
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parent))
|
||||
from _env_paths import get_output_path
|
||||
|
||||
|
||||
def load_records_from_task_dirs(json_root: Path, dir_prefix: str, file_stem: str) -> list[dict]:
|
||||
"""从 ODS 任务目录中加载所有记录(取最新 run)。"""
|
||||
records = []
|
||||
for task_dir in sorted(json_root.iterdir()):
|
||||
if not task_dir.is_dir() or not task_dir.name.startswith(dir_prefix):
|
||||
continue
|
||||
for run_dir in sorted(task_dir.iterdir()):
|
||||
if not run_dir.is_dir():
|
||||
continue
|
||||
for f in run_dir.iterdir():
|
||||
if f.stem.startswith(file_stem) and f.suffix == ".json":
|
||||
records.extend(_extract_records(f))
|
||||
return records
|
||||
|
||||
|
||||
def load_archive_records(json_root: Path, file_stem: str) -> list[dict]:
|
||||
"""从 ODS_JSON_ARCHIVE 目录加载分页记录。"""
|
||||
records = []
|
||||
archive_dir = json_root / "ODS_JSON_ARCHIVE"
|
||||
if not archive_dir.exists():
|
||||
return records
|
||||
for run_dir in archive_dir.iterdir():
|
||||
if not run_dir.is_dir():
|
||||
continue
|
||||
for f in run_dir.iterdir():
|
||||
if f.stem.startswith(file_stem) and f.suffix == ".json":
|
||||
records.extend(_extract_archive_records(f))
|
||||
return records
|
||||
|
||||
|
||||
def _extract_records(filepath: Path) -> list[dict]:
|
||||
"""从标准 ODS JSON(含 pages[].response.data)中提取记录。"""
|
||||
try:
|
||||
data = json.loads(filepath.read_text(encoding="utf-8"))
|
||||
except Exception:
|
||||
return []
|
||||
items = []
|
||||
for page in data.get("pages", []):
|
||||
resp_data = page.get("response", {}).get("data", {})
|
||||
# 不同 endpoint 的列表字段名不同,遍历所有 list 类型值
|
||||
for v in resp_data.values():
|
||||
if isinstance(v, list):
|
||||
items.extend(v)
|
||||
return items
|
||||
|
||||
|
||||
def _extract_archive_records(filepath: Path) -> list[dict]:
|
||||
"""从 archive 分页 JSON({code, data: [...]}) 中提取记录。"""
|
||||
try:
|
||||
data = json.loads(filepath.read_text(encoding="utf-8"))
|
||||
except Exception:
|
||||
return []
|
||||
payload = data.get("data", [])
|
||||
return payload if isinstance(payload, list) else []
|
||||
|
||||
|
||||
def main():
|
||||
json_root = get_output_path("EXPORT_ROOT")
|
||||
|
||||
# --- 1. 加载各类子记录 ---
|
||||
# 数据源配置:(目录前缀, 文件名前缀, 关联字段, 维度名称)
|
||||
sources = [
|
||||
("ODS_TABLE_USE", "table_fee_transactions", "order_trade_no", "台桌使用"),
|
||||
("ODS_TABLE_FEE_DISCOUNT", "table_fee_discount_records", "order_trade_no", "台费折扣"),
|
||||
("ODS_ASSISTANT_LEDGER", "assistant_service_records", "order_trade_no", "助教服务"),
|
||||
("ODS_STORE_GOODS_SALES", "store_goods_sales_records", "order_trade_no", "商品销售"),
|
||||
("ODS_GROUP_BUY_REDEMPTION","group_buy_redemption_records", "order_trade_no", "团购核销"),
|
||||
]
|
||||
# 支付/退款通过 relate_id 关联到 order_settle_id,需要二次映射
|
||||
payment_sources = [
|
||||
("ODS_PAYMENT", "payment_transactions", "支付记录"),
|
||||
]
|
||||
refund_source = ("ODS_REFUND", "refund_transactions", "退款记录")
|
||||
|
||||
# order_trade_no → {维度名: 计数}
|
||||
order_complexity: dict[int, dict[str, int]] = defaultdict(lambda: defaultdict(int))
|
||||
# order_trade_no → 首条记录的基本信息(用于展示)
|
||||
order_info: dict[int, dict] = {}
|
||||
# order_settle_id → order_trade_no 的映射(从台桌使用记录建立)
|
||||
settle_to_trade: dict[int, int] = {}
|
||||
|
||||
# 加载直接关联的子记录
|
||||
for dir_prefix, file_stem, key_field, dim_name in sources:
|
||||
recs = load_records_from_task_dirs(json_root, dir_prefix, file_stem)
|
||||
recs += load_archive_records(json_root, file_stem)
|
||||
seen_ids = set()
|
||||
for r in recs:
|
||||
trade_no = r.get(key_field)
|
||||
if not trade_no or trade_no == 0:
|
||||
continue
|
||||
# 去重(同一记录可能出现在多个 run 中)
|
||||
rec_id = r.get("id", id(r))
|
||||
if rec_id in seen_ids:
|
||||
continue
|
||||
seen_ids.add(rec_id)
|
||||
|
||||
order_complexity[trade_no][dim_name] += 1
|
||||
|
||||
# 保存订单基本信息
|
||||
if trade_no not in order_info:
|
||||
order_info[trade_no] = {
|
||||
"order_trade_no": trade_no,
|
||||
"create_time": r.get("create_time", ""),
|
||||
"ledger_name": r.get("ledger_name", r.get("tableName", "")),
|
||||
}
|
||||
|
||||
# 建立 settle_id → trade_no 映射
|
||||
settle_id = r.get("order_settle_id")
|
||||
if settle_id and settle_id != 0:
|
||||
settle_to_trade[settle_id] = trade_no
|
||||
|
||||
# 加载支付记录(通过 relate_id → order_settle_id → order_trade_no)
|
||||
for dir_prefix, file_stem, dim_name in payment_sources:
|
||||
recs = load_records_from_task_dirs(json_root, dir_prefix, file_stem)
|
||||
recs += load_archive_records(json_root, file_stem)
|
||||
seen_ids = set()
|
||||
for r in recs:
|
||||
rec_id = r.get("id", id(r))
|
||||
if rec_id in seen_ids:
|
||||
continue
|
||||
seen_ids.add(rec_id)
|
||||
relate_id = r.get("relate_id")
|
||||
if not relate_id or relate_id == 0:
|
||||
continue
|
||||
trade_no = settle_to_trade.get(relate_id)
|
||||
if trade_no:
|
||||
order_complexity[trade_no]["支付记录"] += 1
|
||||
|
||||
# 加载退款记录
|
||||
dir_prefix, file_stem, dim_name = refund_source
|
||||
recs = load_records_from_task_dirs(json_root, dir_prefix, file_stem)
|
||||
recs += load_archive_records(json_root, file_stem)
|
||||
seen_ids = set()
|
||||
for r in recs:
|
||||
rec_id = r.get("id", id(r))
|
||||
if rec_id in seen_ids:
|
||||
continue
|
||||
seen_ids.add(rec_id)
|
||||
relate_id = r.get("relate_id")
|
||||
if not relate_id or relate_id == 0:
|
||||
continue
|
||||
trade_no = settle_to_trade.get(relate_id)
|
||||
if trade_no:
|
||||
order_complexity[trade_no]["退款记录"] += 1
|
||||
|
||||
# --- 2. 计算总复杂度并排序 ---
|
||||
all_dims = ["台桌使用", "台费折扣", "助教服务", "商品销售", "团购核销", "支付记录", "退款记录"]
|
||||
scored = []
|
||||
for trade_no, dims in order_complexity.items():
|
||||
total = sum(dims.values())
|
||||
# 额外加权:涉及的维度种类数(鼓励"广度"复杂)
|
||||
breadth = sum(1 for d in all_dims if dims.get(d, 0) > 0)
|
||||
score = total + breadth * 2
|
||||
scored.append((trade_no, score, total, breadth, dims))
|
||||
|
||||
scored.sort(key=lambda x: x[1], reverse=True)
|
||||
top10 = scored[:10]
|
||||
|
||||
# --- 3. 输出结果 ---
|
||||
print("=" * 100)
|
||||
print(f" 订单复杂度 Top 10(共扫描 {len(order_complexity)} 个总订单)")
|
||||
print("=" * 100)
|
||||
for rank, (trade_no, score, total, breadth, dims) in enumerate(top10, 1):
|
||||
info = order_info.get(trade_no, {})
|
||||
print(f"\n{'─' * 80}")
|
||||
print(f" #{rank} order_trade_no = {trade_no}")
|
||||
print(f" 创建时间: {info.get('create_time', '未知')}")
|
||||
print(f" 复杂度得分: {score} (子记录总数={total}, 涉及维度={breadth})")
|
||||
print(f" 各维度明细:")
|
||||
for d in all_dims:
|
||||
cnt = dims.get(d, 0)
|
||||
if cnt > 0:
|
||||
bar = "█" * min(cnt, 40)
|
||||
print(f" {d:8s}: {cnt:4d} {bar}")
|
||||
print(f"\n{'─' * 80}")
|
||||
print(f"\n统计摘要:")
|
||||
print(f" 总订单数: {len(order_complexity)}")
|
||||
if scored:
|
||||
avg_score = sum(s[1] for s in scored) / len(scored)
|
||||
print(f" 平均复杂度得分: {avg_score:.1f}")
|
||||
print(f" 最高复杂度得分: {scored[0][1]}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
239
scripts/ops/find_complex_orders_v2.py
Normal file
239
scripts/ops/find_complex_orders_v2.py
Normal file
@@ -0,0 +1,239 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
按 order_settle_id(结算单)聚合,找出多台桌、多助教的复杂订单。
|
||||
|
||||
order_settle_id 是一次结算的唯一标识,一次结算可包含:
|
||||
- 多个台桌使用记录(不同 order_trade_no)
|
||||
- 多个助教服务记录
|
||||
- 多条台费折扣
|
||||
- 多条团购核销
|
||||
- 多笔支付/退款
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parent))
|
||||
from _env_paths import get_output_path
|
||||
|
||||
|
||||
def _extract_records(filepath: Path) -> list[dict]:
|
||||
try:
|
||||
data = json.loads(filepath.read_text(encoding="utf-8"))
|
||||
except Exception:
|
||||
return []
|
||||
items = []
|
||||
for page in data.get("pages", []):
|
||||
resp_data = page.get("response", {}).get("data", {})
|
||||
for v in resp_data.values():
|
||||
if isinstance(v, list):
|
||||
items.extend(v)
|
||||
return items
|
||||
|
||||
|
||||
def _extract_archive_records(filepath: Path) -> list[dict]:
|
||||
try:
|
||||
data = json.loads(filepath.read_text(encoding="utf-8"))
|
||||
except Exception:
|
||||
return []
|
||||
payload = data.get("data", [])
|
||||
return payload if isinstance(payload, list) else []
|
||||
|
||||
|
||||
def load_all(json_root: Path, dir_prefix: str, file_stem: str) -> list[dict]:
|
||||
records = []
|
||||
for task_dir in sorted(json_root.iterdir()):
|
||||
if not task_dir.is_dir():
|
||||
continue
|
||||
if task_dir.name.startswith(dir_prefix) or task_dir.name == "ODS_JSON_ARCHIVE":
|
||||
for run_dir in task_dir.iterdir():
|
||||
if not run_dir.is_dir():
|
||||
continue
|
||||
for f in run_dir.iterdir():
|
||||
if f.stem.startswith(file_stem) and f.suffix == ".json":
|
||||
if task_dir.name == "ODS_JSON_ARCHIVE":
|
||||
records.extend(_extract_archive_records(f))
|
||||
else:
|
||||
records.extend(_extract_records(f))
|
||||
return records
|
||||
|
||||
|
||||
def dedup(records: list[dict]) -> list[dict]:
|
||||
"""按 id 去重,保留首次出现的记录。"""
|
||||
seen = set()
|
||||
out = []
|
||||
for r in records:
|
||||
rid = r.get("id")
|
||||
if rid and rid in seen:
|
||||
continue
|
||||
if rid:
|
||||
seen.add(rid)
|
||||
out.append(r)
|
||||
return out
|
||||
|
||||
|
||||
def main():
|
||||
json_root = get_output_path("EXPORT_ROOT")
|
||||
|
||||
# 加载台桌使用记录
|
||||
table_use = dedup(load_all(json_root, "ODS_TABLE_USE", "table_fee_transactions"))
|
||||
# 加载助教服务记录
|
||||
assistant = dedup(load_all(json_root, "ODS_ASSISTANT_LEDGER", "assistant_service_records"))
|
||||
# 加载台费折扣
|
||||
discount = dedup(load_all(json_root, "ODS_TABLE_FEE_DISCOUNT", "table_fee_discount_records"))
|
||||
# 加载团购核销
|
||||
groupbuy = dedup(load_all(json_root, "ODS_GROUP_BUY_REDEMPTION", "group_buy_redemption_records"))
|
||||
# 加载支付
|
||||
payments = dedup(load_all(json_root, "ODS_PAYMENT", "payment_transactions"))
|
||||
# 加载退款
|
||||
refunds = dedup(load_all(json_root, "ODS_REFUND", "refund_transactions"))
|
||||
|
||||
# --- 按 order_settle_id 聚合 ---
|
||||
# settle_id → 各维度详情
|
||||
settle_data: dict[int, dict] = defaultdict(lambda: {
|
||||
"台桌": [], # 不同 order_trade_no 的台桌名
|
||||
"台桌记录": [],
|
||||
"助教": [], # 不同助教名
|
||||
"助教记录": [],
|
||||
"台费折扣": 0,
|
||||
"团购核销": 0,
|
||||
"支付": 0,
|
||||
"退款": 0,
|
||||
"create_time": "",
|
||||
"trade_nos": set(),
|
||||
})
|
||||
|
||||
# 台桌使用 → 按 order_settle_id 聚合
|
||||
for r in table_use:
|
||||
sid = r.get("order_settle_id")
|
||||
if not sid or sid == 0:
|
||||
continue
|
||||
d = settle_data[sid]
|
||||
tno = r.get("order_trade_no", 0)
|
||||
tname = r.get("ledger_name", "?")
|
||||
if tno not in d["trade_nos"]:
|
||||
d["trade_nos"].add(tno)
|
||||
d["台桌"].append(tname)
|
||||
d["台桌记录"].append(r)
|
||||
ct = r.get("create_time", "")
|
||||
if ct and (not d["create_time"] or ct < d["create_time"]):
|
||||
d["create_time"] = ct
|
||||
|
||||
# 助教服务 → 按 order_settle_id 聚合
|
||||
for r in assistant:
|
||||
sid = r.get("order_settle_id")
|
||||
if not sid or sid == 0:
|
||||
continue
|
||||
d = settle_data[sid]
|
||||
aname = r.get("assistantName", r.get("ledger_name", "?"))
|
||||
d["助教"].append(aname)
|
||||
d["助教记录"].append(r)
|
||||
|
||||
# 台费折扣
|
||||
for r in discount:
|
||||
sid = r.get("order_settle_id")
|
||||
if sid and sid != 0:
|
||||
settle_data[sid]["台费折扣"] += 1
|
||||
|
||||
# 团购核销
|
||||
for r in groupbuy:
|
||||
sid = r.get("order_settle_id")
|
||||
if sid and sid != 0:
|
||||
settle_data[sid]["团购核销"] += 1
|
||||
|
||||
# 支付(relate_id = order_settle_id)
|
||||
for r in payments:
|
||||
rid = r.get("relate_id")
|
||||
if rid and rid in settle_data:
|
||||
settle_data[rid]["支付"] += 1
|
||||
|
||||
# 退款
|
||||
for r in refunds:
|
||||
rid = r.get("relate_id")
|
||||
if rid and rid in settle_data:
|
||||
settle_data[rid]["退款"] += 1
|
||||
|
||||
# --- 筛选:多台桌 或 多助教 的结算单 ---
|
||||
multi_table = []
|
||||
multi_assistant = []
|
||||
for sid, d in settle_data.items():
|
||||
n_tables = len(d["台桌"])
|
||||
n_assistants = len(set(d["助教"])) # 去重助教名
|
||||
if n_tables >= 2:
|
||||
multi_table.append((sid, d, n_tables, n_assistants))
|
||||
if n_assistants >= 2:
|
||||
multi_assistant.append((sid, d, n_tables, n_assistants))
|
||||
|
||||
multi_table.sort(key=lambda x: x[2], reverse=True)
|
||||
multi_assistant.sort(key=lambda x: x[3], reverse=True)
|
||||
|
||||
# --- 输出:多台桌 ---
|
||||
print("=" * 100)
|
||||
print(f" 多台桌结算单 Top 10(共 {len(multi_table)} 个结算单含 ≥2 台桌)")
|
||||
print("=" * 100)
|
||||
for i, (sid, d, nt, na) in enumerate(multi_table[:10], 1):
|
||||
unique_assistants = sorted(set(d["助教"]))
|
||||
print(f"\n{'─' * 80}")
|
||||
print(f" #{i} order_settle_id = {sid}")
|
||||
print(f" 创建时间: {d['create_time']}")
|
||||
print(f" 台桌数: {nt} | 助教数: {len(unique_assistants)} | 台费折扣: {d['台费折扣']} | 团购核销: {d['团购核销']} | 支付: {d['支付']} | 退款: {d['退款']}")
|
||||
print(f" 台桌列表: {', '.join(d['台桌'])}")
|
||||
if unique_assistants:
|
||||
print(f" 助教列表: {', '.join(unique_assistants)}")
|
||||
# 显示各台桌的金额
|
||||
for r in d["台桌记录"]:
|
||||
amt = r.get("ledger_amount", 0)
|
||||
secs = r.get("real_table_use_seconds", r.get("ledger_count", 0))
|
||||
hours = secs / 3600 if secs else 0
|
||||
tno = r.get("order_trade_no", "?")
|
||||
print(f" → {r.get('ledger_name','?'):8s} 金额={amt:>8.2f} 时长={hours:.1f}h trade_no={tno}")
|
||||
|
||||
# --- 输出:多助教 ---
|
||||
print(f"\n\n{'=' * 100}")
|
||||
print(f" 多助教结算单 Top 10(共 {len(multi_assistant)} 个结算单含 ≥2 位助教)")
|
||||
print("=" * 100)
|
||||
for i, (sid, d, nt, na) in enumerate(multi_assistant[:10], 1):
|
||||
unique_assistants = sorted(set(d["助教"]))
|
||||
print(f"\n{'─' * 80}")
|
||||
print(f" #{i} order_settle_id = {sid}")
|
||||
print(f" 创建时间: {d['create_time']}")
|
||||
print(f" 台桌数: {nt} | 助教数: {len(unique_assistants)} | 台费折扣: {d['台费折扣']} | 团购核销: {d['团购核销']} | 支付: {d['支付']} | 退款: {d['退款']}")
|
||||
print(f" 台桌列表: {', '.join(d['台桌'])}")
|
||||
print(f" 助教列表: {', '.join(unique_assistants)}")
|
||||
# 显示各助教的服务详情
|
||||
for r in d["助教记录"]:
|
||||
aname = r.get("assistantName", r.get("ledger_name", "?"))
|
||||
skill = r.get("skillName", "?")
|
||||
amt = r.get("ledger_amount", 0)
|
||||
tname = r.get("tableName", "?")
|
||||
print(f" → 助教={aname:6s} 技能={skill:6s} 台桌={tname:6s} 金额={amt:>8.2f}")
|
||||
|
||||
# --- 输出:同时多台桌+多助教 ---
|
||||
both = [(sid, d, nt, na) for sid, d, nt, na in multi_table if na >= 2]
|
||||
both.sort(key=lambda x: x[2] + x[3], reverse=True)
|
||||
if both:
|
||||
print(f"\n\n{'=' * 100}")
|
||||
print(f" 同时多台桌+多助教(共 {len(both)} 个)")
|
||||
print("=" * 100)
|
||||
for i, (sid, d, nt, na) in enumerate(both[:10], 1):
|
||||
unique_assistants = sorted(set(d["助教"]))
|
||||
print(f"\n{'─' * 80}")
|
||||
print(f" #{i} order_settle_id = {sid}")
|
||||
print(f" 创建时间: {d['create_time']}")
|
||||
print(f" 台桌数: {nt} | 助教数: {len(unique_assistants)} | 台费折扣: {d['台费折扣']} | 团购核销: {d['团购核销']} | 支付: {d['支付']} | 退款: {d['退款']}")
|
||||
print(f" 台桌: {', '.join(d['台桌'])}")
|
||||
print(f" 助教: {', '.join(unique_assistants)}")
|
||||
|
||||
print(f"\n{'─' * 80}")
|
||||
print(f"\n统计摘要:")
|
||||
print(f" 总结算单数: {len(settle_data)}")
|
||||
print(f" 含 ≥2 台桌: {len(multi_table)}")
|
||||
print(f" 含 ≥2 助教: {len(multi_assistant)}")
|
||||
print(f" 同时多台桌+多助教: {len(both)}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
86
scripts/ops/fix_bc_dates.py
Normal file
86
scripts/ops/fix_bc_dates.py
Normal file
@@ -0,0 +1,86 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""BUG 12 存量修复:扫描 DWD 所有表的 timestamptz 列,将 BC 日期(< 0002-01-01)修复为 NULL。
|
||||
|
||||
根因:上游 API 用 0001-01-01T00:00:00 表示"未设置",ODS 存为 timestamp,
|
||||
DWD 隐式转为 timestamptz 时在 Asia/Shanghai 时区下变成 BC 日期,
|
||||
psycopg2 无法解析导致 fetchall() 崩溃。
|
||||
"""
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(Path(__file__).resolve().parents[2] / ".env")
|
||||
|
||||
PG_DSN = os.environ.get("PG_DSN")
|
||||
if not PG_DSN:
|
||||
print("ERROR: PG_DSN 未配置", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
|
||||
SENTINEL = "0002-01-01"
|
||||
SCHEMA = "dwd"
|
||||
|
||||
def main():
|
||||
conn = psycopg2.connect(PG_DSN)
|
||||
conn.autocommit = False
|
||||
try:
|
||||
with conn.cursor(cursor_factory=RealDictCursor) as cur:
|
||||
# 查找所有 timestamptz 列
|
||||
cur.execute("""
|
||||
SELECT t.table_name, c.column_name
|
||||
FROM information_schema.tables t
|
||||
JOIN information_schema.columns c
|
||||
ON t.table_schema = c.table_schema AND t.table_name = c.table_name
|
||||
WHERE t.table_schema = %s
|
||||
AND t.table_type = 'BASE TABLE'
|
||||
AND c.data_type = 'timestamp with time zone'
|
||||
ORDER BY t.table_name, c.ordinal_position
|
||||
""", (SCHEMA,))
|
||||
cols = cur.fetchall()
|
||||
|
||||
total_fixed = 0
|
||||
for row in cols:
|
||||
tbl = row["table_name"]
|
||||
col = row["column_name"]
|
||||
# psycopg2 执行含 BC 日期的 UPDATE 可能在内部触发解析错误,
|
||||
# 用 server-side DO 块绕过客户端解析
|
||||
sql = (
|
||||
f'UPDATE "{SCHEMA}"."{tbl}" '
|
||||
f'SET "{col}" = NULL '
|
||||
f"WHERE EXTRACT(year FROM \"{col}\") < 1"
|
||||
)
|
||||
try:
|
||||
cur.execute(sql)
|
||||
cnt = cur.rowcount
|
||||
if cnt > 0:
|
||||
print(f" FIXED: {SCHEMA}.{tbl}.{col} — {cnt} 行")
|
||||
total_fixed += cnt
|
||||
except Exception as e:
|
||||
conn.rollback()
|
||||
# 回退后用 text cast 方式重试
|
||||
print(f" WARN: {SCHEMA}.{tbl}.{col} — EXTRACT 失败({e}),用 text 方式重试")
|
||||
sql2 = (
|
||||
f'UPDATE "{SCHEMA}"."{tbl}" '
|
||||
f'SET "{col}" = NULL '
|
||||
f"WHERE \"{col}\"::text LIKE '%BC%'"
|
||||
)
|
||||
cur.execute(sql2)
|
||||
cnt = cur.rowcount
|
||||
if cnt > 0:
|
||||
print(f" FIXED: {SCHEMA}.{tbl}.{col} — {cnt} 行 (text 方式)")
|
||||
total_fixed += cnt
|
||||
|
||||
conn.commit()
|
||||
print(f"\n完成:共修复 {total_fixed} 行")
|
||||
|
||||
except Exception:
|
||||
conn.rollback()
|
||||
raise
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -4,7 +4,7 @@
|
||||
直接从 API 返回的 JSON 分析,不依赖处理代码。
|
||||
|
||||
用法: python scripts/ops/gen_api_field_mapping.py
|
||||
输出: 在 docs/reports/dataflow_api_ods_dwd.md 的每个表章节中插入 API 源字段小节
|
||||
输出: 在 $FULL_DATAFLOW_DOC_ROOT/dataflow_api_ods_dwd.md 的每个表章节中插入 API 源字段小节
|
||||
"""
|
||||
import json
|
||||
import os
|
||||
@@ -16,7 +16,8 @@ from pathlib import Path
|
||||
import psycopg2
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[2]
|
||||
INPUT_DOC = ROOT / "docs" / "reports" / "dataflow_api_ods_dwd.md"
|
||||
from _env_paths import get_output_path as _get_path
|
||||
INPUT_DOC = _get_path("FULL_DATAFLOW_DOC_ROOT") / "dataflow_api_ods_dwd.md"
|
||||
OUTPUT_DOC = INPUT_DOC # 原地更新
|
||||
|
||||
# ODS schema 名(从数据库动态检测)
|
||||
@@ -25,7 +26,6 @@ ODS_SCHEMA = None # 运行时自动检测
|
||||
# ODS 表列表(与文档中的顺序一致)
|
||||
ODS_TABLES = [
|
||||
"assistant_accounts_master",
|
||||
"assistant_cancellation_records",
|
||||
"assistant_service_records",
|
||||
"goods_stock_movements",
|
||||
"goods_stock_summary",
|
||||
@@ -39,7 +39,6 @@ ODS_TABLES = [
|
||||
"recharge_settlements",
|
||||
"refund_transactions",
|
||||
"settlement_records",
|
||||
"settlement_ticket_details",
|
||||
"site_tables_master",
|
||||
"stock_goods_category_tree",
|
||||
"store_goods_master",
|
||||
|
||||
276
scripts/ops/gen_consolidated_ddl.py
Normal file
276
scripts/ops/gen_consolidated_ddl.py
Normal file
@@ -0,0 +1,276 @@
|
||||
"""
|
||||
从测试数据库导出完整 DDL,按 schema 分文件写入 docs/database/ddl/。
|
||||
以数据库现状为准,整合所有 schema/表/约束/索引/视图/物化视图/序列/FDW 配置。
|
||||
|
||||
输出文件:
|
||||
docs/database/ddl/etl_feiqiu__meta.sql
|
||||
docs/database/ddl/etl_feiqiu__ods.sql
|
||||
docs/database/ddl/etl_feiqiu__dwd.sql
|
||||
docs/database/ddl/etl_feiqiu__core.sql
|
||||
docs/database/ddl/etl_feiqiu__dws.sql
|
||||
docs/database/ddl/etl_feiqiu__app.sql
|
||||
docs/database/ddl/zqyy_app__public.sql
|
||||
docs/database/ddl/fdw.sql
|
||||
|
||||
用法:cd C:\\NeoZQYY && python scripts/ops/gen_consolidated_ddl.py
|
||||
"""
|
||||
import os, sys
|
||||
from pathlib import Path
|
||||
from datetime import date
|
||||
|
||||
import psycopg2
|
||||
|
||||
# ── 环境 ──────────────────────────────────────────────────────────────────
|
||||
from dotenv import load_dotenv
|
||||
ROOT = Path(__file__).resolve().parent.parent.parent
|
||||
load_dotenv(ROOT / ".env")
|
||||
|
||||
ETL_DSN = os.environ.get("TEST_DB_DSN") or os.environ.get("PG_DSN")
|
||||
APP_DSN = os.environ.get("TEST_APP_DB_DSN") or os.environ.get("APP_DB_DSN")
|
||||
if not ETL_DSN:
|
||||
sys.exit("ERROR: TEST_DB_DSN / PG_DSN 未配置")
|
||||
if not APP_DSN:
|
||||
sys.exit("ERROR: TEST_APP_DB_DSN / APP_DB_DSN 未配置")
|
||||
|
||||
OUTPUT_DIR = ROOT / "docs" / "database" / "ddl"
|
||||
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
FDW_FILE = ROOT / "db" / "fdw" / "setup_fdw.sql"
|
||||
TODAY = date.today().isoformat()
|
||||
|
||||
# ── SQL 模板 ──────────────────────────────────────────────────────────────
|
||||
SQL_TABLES = """
|
||||
WITH cols AS (
|
||||
SELECT table_schema, table_name,
|
||||
string_agg(
|
||||
format(E' %%I %%s%%s%%s',
|
||||
column_name,
|
||||
CASE WHEN data_type = 'USER-DEFINED' THEN udt_name
|
||||
WHEN data_type = 'ARRAY' THEN udt_name
|
||||
WHEN character_maximum_length IS NOT NULL THEN data_type || '(' || character_maximum_length || ')'
|
||||
WHEN numeric_precision IS NOT NULL AND data_type IN ('numeric','decimal') THEN data_type || '(' || numeric_precision || ',' || numeric_scale || ')'
|
||||
ELSE data_type END,
|
||||
CASE WHEN column_default IS NOT NULL THEN ' DEFAULT ' || column_default ELSE '' END,
|
||||
CASE WHEN is_nullable = 'NO' THEN ' NOT NULL' ELSE '' END
|
||||
), E',\\n' ORDER BY ordinal_position
|
||||
) as col_defs
|
||||
FROM information_schema.columns
|
||||
WHERE table_schema = %s
|
||||
AND table_name IN (SELECT table_name FROM information_schema.tables WHERE table_schema = %s AND table_type = 'BASE TABLE')
|
||||
GROUP BY table_schema, table_name
|
||||
)
|
||||
SELECT format(E'CREATE TABLE %%I.%%I (\\n%%s\\n);', table_schema, table_name, col_defs) as ddl
|
||||
FROM cols ORDER BY table_name;
|
||||
"""
|
||||
|
||||
SQL_CONSTRAINTS = """
|
||||
SELECT n.nspname as schema, conrelid::regclass as tbl, conname,
|
||||
pg_get_constraintdef(c.oid) as def, contype
|
||||
FROM pg_constraint c
|
||||
JOIN pg_namespace n ON n.oid = c.connamespace
|
||||
WHERE n.nspname = %s AND contype IN ('p','u','f')
|
||||
ORDER BY conrelid::regclass::text, contype, conname;
|
||||
"""
|
||||
|
||||
SQL_INDEXES = """
|
||||
SELECT indexname, indexdef
|
||||
FROM pg_indexes
|
||||
WHERE schemaname = %s
|
||||
AND indexname NOT IN (SELECT conname FROM pg_constraint WHERE contype IN ('p','u'))
|
||||
ORDER BY tablename, indexname;
|
||||
"""
|
||||
|
||||
SQL_SEQUENCES = """
|
||||
SELECT sequence_name, data_type
|
||||
FROM information_schema.sequences
|
||||
WHERE sequence_schema = %s
|
||||
ORDER BY sequence_name;
|
||||
"""
|
||||
|
||||
SQL_VIEWS = """
|
||||
SELECT viewname, definition
|
||||
FROM pg_views
|
||||
WHERE schemaname = %s
|
||||
ORDER BY viewname;
|
||||
"""
|
||||
|
||||
SQL_MATVIEWS = """
|
||||
SELECT matviewname, definition
|
||||
FROM pg_matviews
|
||||
WHERE schemaname = %s
|
||||
ORDER BY matviewname;
|
||||
"""
|
||||
|
||||
SQL_MV_INDEXES = """
|
||||
SELECT indexname, indexdef
|
||||
FROM pg_indexes
|
||||
WHERE schemaname = %s
|
||||
AND tablename LIKE 'mv_%%'
|
||||
ORDER BY tablename, indexname;
|
||||
"""
|
||||
|
||||
SQL_TABLE_COUNT = """
|
||||
SELECT count(*) FROM information_schema.tables
|
||||
WHERE table_schema = %s AND table_type = 'BASE TABLE';
|
||||
"""
|
||||
|
||||
# ── 辅助函数 ──────────────────────────────────────────────────────────────
|
||||
def query(conn, sql, params=None):
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(sql, params)
|
||||
return cur.fetchall()
|
||||
|
||||
def section(f, title, level=1):
|
||||
sep = "=" * 77 if level == 1 else "-" * 77
|
||||
f.write(f"\n-- {sep}\n-- {title}\n-- {sep}\n\n")
|
||||
|
||||
def write_sequences(f, conn, schema):
|
||||
rows = query(conn, SQL_SEQUENCES, (schema,))
|
||||
if not rows:
|
||||
return
|
||||
f.write("-- 序列\n")
|
||||
for name, dtype in rows:
|
||||
f.write(f"CREATE SEQUENCE IF NOT EXISTS {schema}.{name} AS {dtype};\n")
|
||||
f.write("\n")
|
||||
|
||||
def write_tables(f, conn, schema):
|
||||
rows = query(conn, SQL_TABLES, (schema, schema))
|
||||
if not rows:
|
||||
return
|
||||
f.write("-- 表\n")
|
||||
for (ddl,) in rows:
|
||||
f.write(ddl + "\n\n")
|
||||
|
||||
def write_constraints(f, conn, schema):
|
||||
rows = query(conn, SQL_CONSTRAINTS, (schema,))
|
||||
if not rows:
|
||||
return
|
||||
f.write("-- 约束(主键 / 唯一 / 外键)\n")
|
||||
for _, tbl, conname, condef, _ in rows:
|
||||
f.write(f"ALTER TABLE {tbl} ADD CONSTRAINT {conname} {condef};\n")
|
||||
f.write("\n")
|
||||
|
||||
def write_indexes(f, conn, schema):
|
||||
rows = query(conn, SQL_INDEXES, (schema,))
|
||||
if not rows:
|
||||
return
|
||||
f.write("-- 索引\n")
|
||||
for _, indexdef in rows:
|
||||
f.write(indexdef + ";\n")
|
||||
f.write("\n")
|
||||
|
||||
def write_views(f, conn, schema):
|
||||
rows = query(conn, SQL_VIEWS, (schema,))
|
||||
if not rows:
|
||||
return
|
||||
f.write("-- 视图\n")
|
||||
for vname, vdef in rows:
|
||||
f.write(f"CREATE OR REPLACE VIEW {schema}.{vname} AS\n{vdef.strip()}\n;\n\n")
|
||||
|
||||
def write_matviews(f, conn, schema):
|
||||
rows = query(conn, SQL_MATVIEWS, (schema,))
|
||||
if not rows:
|
||||
return
|
||||
f.write("-- 物化视图\n")
|
||||
for mvname, mvdef in rows:
|
||||
f.write(f"CREATE MATERIALIZED VIEW {schema}.{mvname} AS\n{mvdef.strip()}\n;\n\n")
|
||||
# 物化视图索引
|
||||
idx_rows = query(conn, SQL_MV_INDEXES, (schema,))
|
||||
if idx_rows:
|
||||
f.write("-- 物化视图索引\n")
|
||||
for _, indexdef in idx_rows:
|
||||
f.write(indexdef + ";\n")
|
||||
f.write("\n")
|
||||
|
||||
def write_schema_file(conn, db_name, schema, label, views_only=False):
|
||||
"""为单个 schema 生成独立 DDL 文件。"""
|
||||
filename = f"{db_name}__{schema}.sql"
|
||||
filepath = OUTPUT_DIR / filename
|
||||
|
||||
# 获取表数量
|
||||
table_count = query(conn, SQL_TABLE_COUNT, (schema,))[0][0]
|
||||
|
||||
with open(filepath, "w", encoding="utf-8") as f:
|
||||
f.write(f"""\
|
||||
-- =============================================================================
|
||||
-- {db_name} / {schema}({label})
|
||||
-- 生成日期:{TODAY}
|
||||
-- 来源:测试库(通过脚本自动导出)
|
||||
-- =============================================================================
|
||||
|
||||
CREATE SCHEMA IF NOT EXISTS {schema};
|
||||
|
||||
""")
|
||||
if views_only:
|
||||
write_views(f, conn, schema)
|
||||
else:
|
||||
write_sequences(f, conn, schema)
|
||||
write_tables(f, conn, schema)
|
||||
write_constraints(f, conn, schema)
|
||||
write_indexes(f, conn, schema)
|
||||
write_views(f, conn, schema)
|
||||
write_matviews(f, conn, schema)
|
||||
|
||||
size_kb = filepath.stat().st_size / 1024
|
||||
obj_desc = "仅视图" if views_only else f"{table_count} 表"
|
||||
print(f" ✅ {filename:<35s} {size_kb:>6.1f} KB ({obj_desc})")
|
||||
return filepath
|
||||
|
||||
|
||||
def write_fdw_file():
|
||||
"""输出 FDW 配置文件。"""
|
||||
filepath = OUTPUT_DIR / "fdw.sql"
|
||||
with open(filepath, "w", encoding="utf-8") as f:
|
||||
f.write(f"""\
|
||||
-- =============================================================================
|
||||
-- FDW 跨库映射(在 zqyy_app 中执行)
|
||||
-- 生成日期:{TODAY}
|
||||
-- 来源:db/fdw/setup_fdw.sql
|
||||
-- =============================================================================
|
||||
|
||||
""")
|
||||
if FDW_FILE.exists():
|
||||
f.write(FDW_FILE.read_text(encoding="utf-8"))
|
||||
f.write("\n")
|
||||
else:
|
||||
f.write("-- FDW 配置文件未找到:db/fdw/setup_fdw.sql\n")
|
||||
|
||||
size_kb = filepath.stat().st_size / 1024
|
||||
print(f" ✅ {'fdw.sql':<35s} {size_kb:>6.1f} KB")
|
||||
return filepath
|
||||
|
||||
|
||||
# ── 主流程 ────────────────────────────────────────────────────────────────
|
||||
def main():
|
||||
etl_conn = psycopg2.connect(ETL_DSN)
|
||||
app_conn = psycopg2.connect(APP_DSN)
|
||||
|
||||
print(f"输出目录:{OUTPUT_DIR}\n")
|
||||
|
||||
# etl_feiqiu 六层 schema
|
||||
write_schema_file(etl_conn, "etl_feiqiu", "meta", "ETL 调度元数据")
|
||||
write_schema_file(etl_conn, "etl_feiqiu", "ods", "原始数据层")
|
||||
write_schema_file(etl_conn, "etl_feiqiu", "dwd", "明细数据层")
|
||||
write_schema_file(etl_conn, "etl_feiqiu", "core", "跨门店标准化维度/事实")
|
||||
write_schema_file(etl_conn, "etl_feiqiu", "dws", "汇总数据层")
|
||||
write_schema_file(etl_conn, "etl_feiqiu", "app", "RLS 视图层", views_only=True)
|
||||
|
||||
# zqyy_app
|
||||
write_schema_file(app_conn, "zqyy_app", "public", "小程序业务表")
|
||||
|
||||
# FDW
|
||||
write_fdw_file()
|
||||
|
||||
etl_conn.close()
|
||||
app_conn.close()
|
||||
|
||||
# 删除旧的合并文件
|
||||
old_file = ROOT / "docs" / "database" / "consolidated_ddl.sql"
|
||||
if old_file.exists():
|
||||
old_file.unlink()
|
||||
print(f"\n🗑️ 已删除旧文件:{old_file.name}")
|
||||
|
||||
print(f"\n✅ 完成,共 8 个文件")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -2,7 +2,7 @@
|
||||
"""
|
||||
从源代码和 DDL 中提取 API → ODS → DWD 数据流映射,生成 Markdown 文档。
|
||||
用法: python scripts/ops/gen_dataflow_doc.py
|
||||
输出: docs/reports/dataflow_api_ods_dwd.md
|
||||
输出: $FULL_DATAFLOW_DOC_ROOT/dataflow_api_ods_dwd.md(由 .env 配置)
|
||||
"""
|
||||
import re
|
||||
import ast
|
||||
@@ -14,7 +14,8 @@ from collections import OrderedDict
|
||||
ROOT = Path(__file__).resolve().parents[2]
|
||||
ETL = ROOT / "apps" / "etl" / "pipelines" / "feiqiu"
|
||||
DB = ROOT / "db" / "etl_feiqiu" / "schemas"
|
||||
OUT = ROOT / "docs" / "reports" / "dataflow_api_ods_dwd.md"
|
||||
from _env_paths import get_output_path as _get_path
|
||||
OUT = _get_path("FULL_DATAFLOW_DOC_ROOT") / "dataflow_api_ods_dwd.md"
|
||||
|
||||
|
||||
# ── 1. 从 DDL 解析表结构 ──────────────────────────────────────────
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
|
||||
用法:
|
||||
python scripts/ops/gen_dataflow_report.py
|
||||
python scripts/ops/gen_dataflow_report.py --output-dir export/dataflow_analysis
|
||||
python scripts/ops/gen_dataflow_report.py --output-dir /path/to/output
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -24,7 +24,51 @@ import os
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from dotenv import load_dotenv # noqa: F401 — _env_paths 负责加载,此处保留以防其他模块间接引用
|
||||
|
||||
# ── 白名单定义 ──────────────────────────────────────────────────────────
|
||||
# 白名单字段仍然参与检查和统计,但在报告的 1.1 差异明细表格和 3. 逐表详情表格中
|
||||
# 折叠显示(不展开详细行),并注明白名单原因。
|
||||
# CHANGE 2026-02-21 | 重构白名单逻辑:统一术语为"白名单",字段仍正常检查,仅报告展示折叠
|
||||
|
||||
# ODS 层 ETL 元数据列(非业务字段,ETL 流程自动生成)
|
||||
WHITELIST_ETL_META_COLS = {
|
||||
"source_file", "source_endpoint", "fetched_at", "payload", "content_hash",
|
||||
}
|
||||
|
||||
# DWD 维表 SCD2 管理列(ETL 框架自动维护,非业务映射)
|
||||
WHITELIST_DWD_SCD2_COLS = {
|
||||
"valid_from", "valid_to", "is_current", "etl_loaded_at", "etl_batch_id",
|
||||
}
|
||||
|
||||
# API 嵌套对象前缀(上游 API 的门店信息嵌套结构,已通过 site_id 关联,不逐字段映射)
|
||||
WHITELIST_API_NESTED_PREFIXES = ("siteProfile.",)
|
||||
|
||||
|
||||
def is_whitelist_etl_meta(col_name: str) -> bool:
|
||||
"""判断是否为 ETL 元数据白名单列"""
|
||||
return col_name in WHITELIST_ETL_META_COLS
|
||||
|
||||
|
||||
def is_whitelist_scd2(col_name: str) -> bool:
|
||||
"""判断是否为 DWD SCD2 管理白名单列"""
|
||||
return col_name in WHITELIST_DWD_SCD2_COLS
|
||||
|
||||
|
||||
def is_whitelist_api_nested(json_path: str) -> bool:
|
||||
"""判断是否为 API 嵌套对象白名单字段"""
|
||||
return any(json_path.startswith(p) for p in WHITELIST_API_NESTED_PREFIXES)
|
||||
|
||||
|
||||
def whitelist_reason(col_name: str, json_path: str = "", layer: str = "") -> str:
|
||||
"""返回白名单原因描述,非白名单返回空字符串"""
|
||||
if is_whitelist_etl_meta(col_name):
|
||||
return "ETL 元数据列"
|
||||
if is_whitelist_scd2(col_name):
|
||||
return "SCD2 管理列"
|
||||
if json_path and is_whitelist_api_nested(json_path):
|
||||
return "API 嵌套对象(siteProfile)"
|
||||
return ""
|
||||
|
||||
|
||||
def load_json(path: Path) -> dict | list | None:
|
||||
@@ -37,17 +81,15 @@ def load_json(path: Path) -> dict | list | None:
|
||||
def build_parser() -> argparse.ArgumentParser:
|
||||
parser = argparse.ArgumentParser(description="生成数据流结构分析 Markdown 报告")
|
||||
parser.add_argument("--output-dir", type=str, default=None,
|
||||
help="输出目录(默认读取 SYSTEM_ANALYZE_ROOT 或 export/dataflow_analysis)")
|
||||
help="输出目录(默认读取 .env 中的 SYSTEM_ANALYZE_ROOT)")
|
||||
return parser
|
||||
|
||||
|
||||
def resolve_data_dir(override: str | None = None) -> Path:
|
||||
if override:
|
||||
return Path(override)
|
||||
env_root = os.environ.get("SYSTEM_ANALYZE_ROOT")
|
||||
if env_root:
|
||||
return Path(env_root)
|
||||
return Path("export/dataflow_analysis")
|
||||
from _env_paths import get_output_path
|
||||
return get_output_path("SYSTEM_ANALYZE_ROOT")
|
||||
|
||||
|
||||
def _esc(s: str) -> str:
|
||||
@@ -55,81 +97,6 @@ def _esc(s: str) -> str:
|
||||
return str(s).replace("|", "\\|").replace("\n", " ") if s else ""
|
||||
|
||||
|
||||
# ── 字段用途推测规则 ──
|
||||
# 基于字段名模式 + 表名上下文推断字段可能的业务含义
|
||||
# 置信度:高(≥80%) / 中(50-79%) / 低(<50%)
|
||||
import re as _re
|
||||
|
||||
_FIELD_GUESS_RULES: list[tuple[str, str, str]] = [
|
||||
# (字段名模式正则, 推测用途, 置信度)
|
||||
# ── SCD2 / ETL 元数据 ──
|
||||
(r"^scd2_", "SCD2 缓慢变化维度元数据", "高"),
|
||||
(r"^etl_", "ETL 流程元数据", "高"),
|
||||
(r"^dw_insert", "数仓装载时间戳", "高"),
|
||||
(r"^content_hash$", "数据变更检测哈希", "高"),
|
||||
(r"^source_file$", "ETL 来源文件标识", "高"),
|
||||
(r"^source_endpoint$", "ETL 来源接口标识", "高"),
|
||||
(r"^fetched_at$", "ETL 抓取时间", "高"),
|
||||
(r"^payload$", "原始 JSON 全量存储", "高"),
|
||||
# ── 主键 / 外键 ──
|
||||
(r"^id$", "主键标识", "高"),
|
||||
# ── 门店 / 组织(放在通用 _id$ 之前) ──
|
||||
(r"^(site_id|shop_id|store_id)$", "门店标识", "高"),
|
||||
(r"^(tenant_id|org_id)$", "租户/组织标识", "高"),
|
||||
(r"(shop_name|site_name|store_name)", "门店名称", "高"),
|
||||
# ── 时间类 ──
|
||||
(r"(^|_)(create|created)(_at|_time|_date)$", "记录创建时间", "高"),
|
||||
(r"(^|_)(update|updated|modify)(_at|_time|_date)$", "记录更新时间", "高"),
|
||||
(r"(^|_)(delete|deleted)(_at|_time|_date)$", "逻辑删除时间", "高"),
|
||||
(r"(^|_)(start|begin)(_at|_time|_date)$", "起始时间", "中"),
|
||||
(r"(^|_)(end|expire)(_at|_time|_date)$", "结束/过期时间", "中"),
|
||||
(r"(^|_)entry_time$", "入职/入场时间", "中"),
|
||||
(r"(^|_)resign_time$", "离职时间", "中"),
|
||||
(r"_time$", "时间戳字段", "中"),
|
||||
(r"_date$", "日期字段", "中"),
|
||||
# ── 通用派生(放在标志位之前,确保 derived_flag 等优先匹配派生) ──
|
||||
(r"^derived_", "ETL 派生计算列", "高"),
|
||||
(r"^calc_", "计算字段", "中"),
|
||||
# ── 状态 / 标志 ──
|
||||
(r"(^|_)is_delete$", "逻辑删除标志", "高"),
|
||||
(r"^is_", "布尔标志位", "中"),
|
||||
(r"(^|_)status$", "状态码", "中"),
|
||||
(r"_status$", "状态字段", "中"),
|
||||
(r"_enabled$", "启用/禁用开关", "中"),
|
||||
(r"_flag$", "标志位", "中"),
|
||||
# ── 金额 / 价格 ──
|
||||
(r"(price|amount|fee|cost|money|balance|total)", "金额/价格相关", "中"),
|
||||
(r"(discount|coupon|refund)", "优惠/退款相关", "中"),
|
||||
# ── 人员 ──
|
||||
(r"(real_name|nickname|^name$)", "姓名/昵称", "中"),
|
||||
(r"(mobile|phone|tel)", "联系电话", "中"),
|
||||
(r"(avatar|photo|image)", "头像/图片 URL", "中"),
|
||||
(r"(gender|sex)", "性别", "高"),
|
||||
(r"(birth|birthday)", "出生日期", "高"),
|
||||
(r"(height|weight)", "身高/体重", "高"),
|
||||
# ── 嵌套对象常见前缀 ──
|
||||
(r"^siteProfile\.", "门店档案嵌套属性", "高"),
|
||||
(r"^memberInfo\.", "会员信息嵌套属性", "中"),
|
||||
(r"^assistantInfo\.", "助教信息嵌套属性", "中"),
|
||||
(r"^tableInfo\.", "台桌信息嵌套属性", "中"),
|
||||
(r"^orderInfo\.", "订单信息嵌套属性", "中"),
|
||||
(r"^payInfo\.", "支付信息嵌套属性", "中"),
|
||||
# ── 排序 / 显示 ──
|
||||
(r"(sort|order|rank|seq)", "排序/序号", "低"),
|
||||
(r"(remark|memo|note|comment|introduce)", "备注/说明文本", "中"),
|
||||
(r"(url|link|qrcode|qr_code)", "链接/二维码", "中"),
|
||||
# ── 通用 ID 后缀(放在具体 ID 规则之后) ──
|
||||
(r"_id$", "关联实体 ID(外键)", "中"),
|
||||
]
|
||||
|
||||
|
||||
def _guess_field_purpose(field_name: str, table_name: str, layer: str) -> tuple[str, str]:
|
||||
"""根据字段名和表上下文推测用途,返回 (推测用途, 置信度)。"""
|
||||
fn_lower = field_name.lower()
|
||||
for pattern, purpose, confidence in _FIELD_GUESS_RULES:
|
||||
if _re.search(pattern, fn_lower):
|
||||
return purpose, confidence
|
||||
return f"待分析({layer}层字段)", "低"
|
||||
|
||||
|
||||
def _format_samples(samples: list[str], max_show: int = 5) -> str:
|
||||
@@ -155,12 +122,71 @@ def _is_enum_like(samples: list[str], total_records: int) -> bool:
|
||||
return 1 < len(samples) <= 8
|
||||
|
||||
|
||||
def _write_source_file_manifest(w, data_dir: Path, tables: list[dict], fm_dir: Path | None = None):
|
||||
"""在报告开头输出本次分析用到的所有 JSON 数据源文件清单"""
|
||||
if fm_dir is None:
|
||||
fm_dir = data_dir / "field_mappings"
|
||||
w("## 数据源文件清单")
|
||||
w()
|
||||
w("本报告基于以下 JSON 数据文件生成:")
|
||||
w()
|
||||
|
||||
categories = [
|
||||
("collection_manifest.json", "采集元数据(表清单、日期范围、记录数)"),
|
||||
("json_trees/", "API JSON 字段结构(递归展开后的字段路径、类型、示例值)"),
|
||||
("field_mappings/", "三层字段映射(API→ODS→DWD 映射关系)"),
|
||||
("db_schemas/", "数据库表结构(ODS/DWD 列定义,来自 PostgreSQL)"),
|
||||
("bd_descriptions/", "业务描述(来自 BD_manual 文档)"),
|
||||
]
|
||||
|
||||
for cat_path, cat_desc in categories:
|
||||
if cat_path.endswith("/"):
|
||||
# 子目录:列出实际存在的文件
|
||||
# CHANGE 2026-02-21 | field_mappings 使用传入的 fm_dir(可能是 field_mappings_new)
|
||||
if cat_path.rstrip("/") == "field_mappings":
|
||||
sub_dir = fm_dir
|
||||
else:
|
||||
sub_dir = data_dir / cat_path.rstrip("/")
|
||||
if sub_dir.is_dir():
|
||||
try:
|
||||
files = sorted(f.name for f in sub_dir.iterdir() if f.suffix == ".json")
|
||||
except PermissionError:
|
||||
w(f"**{cat_path}** — {cat_desc}(目录权限拒绝)")
|
||||
w()
|
||||
continue
|
||||
if sub_dir.is_dir():
|
||||
files = sorted(f.name for f in sub_dir.iterdir() if f.suffix == ".json")
|
||||
w(f"**{cat_path}** — {cat_desc}({len(files)} 个文件)")
|
||||
w()
|
||||
for fn in files:
|
||||
w(f"- `{cat_path}{fn}`")
|
||||
w()
|
||||
else:
|
||||
w(f"**{cat_path}** — {cat_desc}(目录不存在)")
|
||||
w()
|
||||
else:
|
||||
# 单文件
|
||||
fp = data_dir / cat_path
|
||||
status = "✓" if fp.exists() else "✗ 缺失"
|
||||
w(f"- `{cat_path}` — {cat_desc}({status})")
|
||||
w()
|
||||
|
||||
w("---")
|
||||
w()
|
||||
|
||||
|
||||
def generate_report(data_dir: Path) -> str:
|
||||
"""生成完整的 Markdown 报告"""
|
||||
manifest = load_json(data_dir / "collection_manifest.json")
|
||||
if not manifest:
|
||||
raise FileNotFoundError(f"找不到 collection_manifest.json: {data_dir}")
|
||||
|
||||
# CHANGE 2026-02-21 | Windows 文件锁 fallback:field_mappings_new 优先于被锁的 field_mappings
|
||||
_fm_dir = data_dir / "field_mappings"
|
||||
_fm_new = data_dir / "field_mappings_new"
|
||||
if _fm_new.exists() and any(_fm_new.iterdir()):
|
||||
_fm_dir = _fm_new
|
||||
|
||||
tables = manifest["tables"]
|
||||
now = datetime.now()
|
||||
lines: list[str] = []
|
||||
@@ -168,14 +194,25 @@ def generate_report(data_dir: Path) -> str:
|
||||
def w(s: str = ""):
|
||||
lines.append(s)
|
||||
|
||||
# ── 从 manifest 读取 API 请求日期范围 ──
|
||||
api_date_from = manifest.get("date_from")
|
||||
api_date_to = manifest.get("date_to")
|
||||
total_records_all = sum(t.get("record_count", 0) for t in tables)
|
||||
|
||||
# ── 报告头 ──
|
||||
w("# 飞球连接器 — 数据流结构分析报告")
|
||||
w()
|
||||
w(f"> 生成时间:{now.strftime('%Y-%m-%d %H:%M:%S')} CST")
|
||||
w(f"> 分析范围:飞球(feiqiu)连接器,共 {len(tables)} 张 ODS 表")
|
||||
w("> 数据来源:API JSON 采样 + PostgreSQL ODS/DWD 表结构 + 三层字段映射 + BD_manual 业务文档")
|
||||
if api_date_from or api_date_to:
|
||||
w(f"> API 请求日期范围:{api_date_from or '—'} ~ {api_date_to or '—'}")
|
||||
w(f"> JSON 数据总量:{total_records_all} 条记录")
|
||||
w()
|
||||
|
||||
# ── 数据源文件清单 ──
|
||||
_write_source_file_manifest(w, data_dir, tables, fm_dir=_fm_dir)
|
||||
|
||||
# ── 1. 总览表(增加 API JSON 字段数列) ──
|
||||
w("## 1. 总览")
|
||||
w()
|
||||
@@ -197,7 +234,7 @@ def generate_report(data_dir: Path) -> str:
|
||||
w()
|
||||
|
||||
# ── 1.1 字段对比差异报告 ──
|
||||
_write_field_diff_report(w, data_dir, tables)
|
||||
_write_field_diff_report(w, data_dir, tables, fm_dir=_fm_dir)
|
||||
|
||||
# ── 2. 全局统计 ──
|
||||
w("## 2. 全局统计")
|
||||
@@ -208,7 +245,7 @@ def generate_report(data_dir: Path) -> str:
|
||||
total_mapped = 0
|
||||
per_table_stats: list[dict] = []
|
||||
for t in tables:
|
||||
fm = load_json(data_dir / "field_mappings" / f"{t['table']}.json")
|
||||
fm = load_json(_fm_dir / f"{t['table']}.json")
|
||||
if not fm or "json_to_ods" not in fm:
|
||||
per_table_stats.append({
|
||||
"table": t["table"], "description": t["description"],
|
||||
@@ -261,7 +298,7 @@ def generate_report(data_dir: Path) -> str:
|
||||
|
||||
for idx, t in enumerate(tables, 1):
|
||||
table_name = t["table"]
|
||||
fm = load_json(data_dir / "field_mappings" / f"{table_name}.json")
|
||||
fm = load_json(_fm_dir / f"{table_name}.json")
|
||||
jt = load_json(data_dir / "json_trees" / f"{table_name}.json")
|
||||
ods_schema = load_json(data_dir / "db_schemas" / f"ods_{table_name}.json")
|
||||
bd = load_json(data_dir / "bd_descriptions" / f"{table_name}.json")
|
||||
@@ -303,8 +340,10 @@ def generate_report(data_dir: Path) -> str:
|
||||
|
||||
|
||||
|
||||
def _write_field_diff_report(w, data_dir: Path, tables: list[dict]):
|
||||
def _write_field_diff_report(w, data_dir: Path, tables: list[dict], fm_dir: Path | None = None):
|
||||
"""生成 API↔ODS↔DWD 字段对比差异报告(汇总表 + 逐表分表)"""
|
||||
if fm_dir is None:
|
||||
fm_dir = data_dir / "field_mappings"
|
||||
w("### 1.1 API↔ODS↔DWD 字段对比差异")
|
||||
w()
|
||||
w("以下汇总各表在三层之间的字段差异(点击数字跳转至分表详情):")
|
||||
@@ -312,13 +351,13 @@ def _write_field_diff_report(w, data_dir: Path, tables: list[dict]):
|
||||
w("| ODS 表名 | API→ODS 未映射 | ODS 无 JSON 源 | ODS→DWD 未映射 | DWD 无 ODS 源 | 主要差异原因 |")
|
||||
w("|---------|--------------|--------------|--------------|-------------|------------|")
|
||||
|
||||
# CHANGE 2026-02-21 | 重构白名单逻辑:字段仍正常检查计数,白名单字段在分表详情中折叠
|
||||
# 收集每表差异数据,用于汇总表和分表
|
||||
etl_meta_cols = {"source_file", "source_endpoint", "fetched_at", "payload", "content_hash"}
|
||||
diff_rows: list[dict] = []
|
||||
|
||||
for t in tables:
|
||||
table_name = t["table"]
|
||||
fm = load_json(data_dir / "field_mappings" / f"{table_name}.json")
|
||||
fm = load_json(fm_dir / f"{table_name}.json")
|
||||
if not fm:
|
||||
w(f"| `{table_name}` | — | — | — | — | 无映射数据 |")
|
||||
diff_rows.append(None)
|
||||
@@ -334,43 +373,62 @@ def _write_field_diff_report(w, data_dir: Path, tables: list[dict]):
|
||||
o2d = fm.get("ods_to_dwd", {})
|
||||
d2o = fm.get("dwd_to_ods", {})
|
||||
|
||||
# ── API→ODS 未映射字段 ──
|
||||
# ── API→ODS 未映射字段(全部检查,含白名单) ──
|
||||
api_unmapped_flat: list[str] = []
|
||||
api_unmapped_nested: list[str] = []
|
||||
api_unmapped_whitelist: list[tuple[str, str]] = [] # (json_path, reason)
|
||||
for m in j2o:
|
||||
if m.get("ods_col") is None:
|
||||
jp = m.get("json_path", "")
|
||||
if "." in jp:
|
||||
wl_reason = whitelist_reason("", json_path=jp)
|
||||
if wl_reason:
|
||||
api_unmapped_whitelist.append((jp, wl_reason))
|
||||
elif "." in jp:
|
||||
api_unmapped_nested.append(jp)
|
||||
else:
|
||||
api_unmapped_flat.append(jp)
|
||||
api_unmapped_total = len(api_unmapped_flat) + len(api_unmapped_nested)
|
||||
api_unmapped_total = len(api_unmapped_flat) + len(api_unmapped_nested) + len(api_unmapped_whitelist)
|
||||
|
||||
# ── ODS 无 JSON 源 ──
|
||||
# ── ODS 无 JSON 源(全部检查,含白名单) ──
|
||||
ods_schema = load_json(data_dir / "db_schemas" / f"ods_{table_name}.json")
|
||||
ods_mapped_cols = {m["ods_col"] for m in j2o if m.get("ods_col")}
|
||||
ods_no_json_fields: list[str] = []
|
||||
ods_no_json_whitelist: list[tuple[str, str]] = [] # (col_name, reason)
|
||||
if ods_schema and "columns" in ods_schema:
|
||||
for col in ods_schema["columns"]:
|
||||
if col["name"] not in ods_mapped_cols and col["name"] not in etl_meta_cols:
|
||||
ods_no_json_fields.append(col["name"])
|
||||
if col["name"] not in ods_mapped_cols:
|
||||
wl_reason = whitelist_reason(col["name"])
|
||||
if wl_reason:
|
||||
ods_no_json_whitelist.append((col["name"], wl_reason))
|
||||
else:
|
||||
ods_no_json_fields.append(col["name"])
|
||||
|
||||
# ── ODS→DWD 未映射 ──
|
||||
# ── ODS→DWD 未映射(全部检查,含白名单) ──
|
||||
ods_cols_with_dwd = set(o2d.keys())
|
||||
ods_no_dwd_fields: list[str] = []
|
||||
ods_no_dwd_whitelist: list[tuple[str, str]] = []
|
||||
if ods_schema and "columns" in ods_schema:
|
||||
for col in ods_schema["columns"]:
|
||||
if col["name"] not in ods_cols_with_dwd and col["name"] not in etl_meta_cols:
|
||||
ods_no_dwd_fields.append(col["name"])
|
||||
if col["name"] not in ods_cols_with_dwd:
|
||||
wl_reason = whitelist_reason(col["name"])
|
||||
if wl_reason:
|
||||
ods_no_dwd_whitelist.append((col["name"], wl_reason))
|
||||
else:
|
||||
ods_no_dwd_fields.append(col["name"])
|
||||
|
||||
# ── DWD 无 ODS 源 ──
|
||||
# ── DWD 无 ODS 源(全部检查,含白名单) ──
|
||||
dwd_no_ods_fields: list[tuple[str, str]] = [] # (dwd_table, dwd_col)
|
||||
dwd_no_ods_whitelist: list[tuple[str, str, str]] = [] # (dwd_table, dwd_col, reason)
|
||||
for dwd_name, entries in d2o.items():
|
||||
for entry in entries:
|
||||
if entry.get("ods_source") == "—":
|
||||
dwd_no_ods_fields.append((dwd_name, entry["dwd_col"]))
|
||||
wl_reason = whitelist_reason(entry["dwd_col"])
|
||||
if wl_reason:
|
||||
dwd_no_ods_whitelist.append((dwd_name, entry["dwd_col"], wl_reason))
|
||||
else:
|
||||
dwd_no_ods_fields.append((dwd_name, entry["dwd_col"]))
|
||||
|
||||
# 差异原因
|
||||
# 差异原因(含白名单统计)
|
||||
reasons: list[str] = []
|
||||
if api_unmapped_nested:
|
||||
reasons.append(f"嵌套对象 {len(api_unmapped_nested)} 个")
|
||||
@@ -378,15 +436,18 @@ def _write_field_diff_report(w, data_dir: Path, tables: list[dict]):
|
||||
reasons.append(f"平层未映射 {len(api_unmapped_flat)} 个")
|
||||
if dwd_no_ods_fields:
|
||||
reasons.append(f"SCD2/派生列 {len(dwd_no_ods_fields)} 个")
|
||||
wl_total = len(api_unmapped_whitelist) + len(ods_no_json_whitelist) + len(ods_no_dwd_whitelist) + len(dwd_no_ods_whitelist)
|
||||
if wl_total:
|
||||
reasons.append(f"白名单 {wl_total} 个")
|
||||
reason_str = ";".join(reasons) if reasons else "—"
|
||||
|
||||
# 汇总表单元格:数量 + 跳转链接
|
||||
# 汇总表单元格:数量 + 跳转链接(白名单字段也计入总数)
|
||||
def _cell(count: int) -> str:
|
||||
if count == 0:
|
||||
return "0"
|
||||
return f"[{count}](#{diff_anchor})"
|
||||
|
||||
w(f"| `{table_name}` | {_cell(api_unmapped_total)} | {_cell(len(ods_no_json_fields))} | {_cell(len(ods_no_dwd_fields))} | {_cell(len(dwd_no_ods_fields))} | {reason_str} |")
|
||||
w(f"| `{table_name}` | {_cell(api_unmapped_total)} | {_cell(len(ods_no_json_fields) + len(ods_no_json_whitelist))} | {_cell(len(ods_no_dwd_fields) + len(ods_no_dwd_whitelist))} | {_cell(len(dwd_no_ods_fields) + len(dwd_no_ods_whitelist))} | {reason_str} |")
|
||||
|
||||
diff_rows.append({
|
||||
"table_name": table_name,
|
||||
@@ -396,21 +457,28 @@ def _write_field_diff_report(w, data_dir: Path, tables: list[dict]):
|
||||
"dwd_anchors": dwd_anchors,
|
||||
"api_unmapped_flat": api_unmapped_flat,
|
||||
"api_unmapped_nested": api_unmapped_nested,
|
||||
"api_unmapped_whitelist": api_unmapped_whitelist,
|
||||
"ods_no_json_fields": ods_no_json_fields,
|
||||
"ods_no_json_whitelist": ods_no_json_whitelist,
|
||||
"ods_no_dwd_fields": ods_no_dwd_fields,
|
||||
"ods_no_dwd_whitelist": ods_no_dwd_whitelist,
|
||||
"dwd_no_ods_fields": dwd_no_ods_fields,
|
||||
"dwd_no_ods_whitelist": dwd_no_ods_whitelist,
|
||||
})
|
||||
|
||||
w()
|
||||
|
||||
# ── 逐表差异分表 ──
|
||||
# CHANGE 2026-02-21 | 白名单字段折叠显示,不展开详细表格行,注明白名单原因
|
||||
sub_idx = 0
|
||||
for row in diff_rows:
|
||||
if row is None:
|
||||
continue
|
||||
has_any = (row["api_unmapped_flat"] or row["api_unmapped_nested"]
|
||||
or row["ods_no_json_fields"] or row["ods_no_dwd_fields"]
|
||||
or row["dwd_no_ods_fields"])
|
||||
or row["api_unmapped_whitelist"]
|
||||
or row["ods_no_json_fields"] or row["ods_no_json_whitelist"]
|
||||
or row["ods_no_dwd_fields"] or row["ods_no_dwd_whitelist"]
|
||||
or row["dwd_no_ods_fields"] or row["dwd_no_ods_whitelist"])
|
||||
if not has_any:
|
||||
continue
|
||||
|
||||
@@ -464,78 +532,105 @@ def _write_field_diff_report(w, data_dir: Path, tables: list[dict]):
|
||||
desc = desc[:37] + "..."
|
||||
return _esc(desc)
|
||||
|
||||
def _write_whitelist_summary(w, items: list, category: str):
|
||||
"""白名单字段折叠汇总(不展开详细表格行)"""
|
||||
if not items:
|
||||
return
|
||||
# 按原因分组
|
||||
by_reason: dict[str, list[str]] = {}
|
||||
for item in items:
|
||||
if isinstance(item, tuple) and len(item) == 3:
|
||||
name, _, reason = item # (dwd_table, dwd_col, reason)
|
||||
elif isinstance(item, tuple) and len(item) == 2:
|
||||
name, reason = item
|
||||
else:
|
||||
name, reason = str(item), "白名单"
|
||||
by_reason.setdefault(reason, []).append(name)
|
||||
parts = []
|
||||
for reason, names in by_reason.items():
|
||||
parts.append(f"{reason}: `{'`, `'.join(names[:5])}`{'...' if len(names) > 5 else ''} ({len(names)} 个)")
|
||||
w(f"> ℹ️ {category}白名单字段(已检查,不展开详情):{';'.join(parts)}")
|
||||
w()
|
||||
|
||||
# ── API→ODS 未映射(平层) ──
|
||||
if row["api_unmapped_flat"]:
|
||||
w(f"**API→ODS 未映射(平层)** — {len(row['api_unmapped_flat'])} 个")
|
||||
w()
|
||||
w("| # | JSON 字段 | 推测用途 | 置信度 | 示例值 | 说明 | 状态 |")
|
||||
w("|---|----------|---------|-------|-------|------|------|")
|
||||
w("| # | JSON 字段 | 示例值 | 说明 | 状态 |")
|
||||
w("|---|----------|-------|------|------|")
|
||||
for i, f in enumerate(row["api_unmapped_flat"], 1):
|
||||
purpose, conf = _guess_field_purpose(f, table_name, "API")
|
||||
sample = _sample_str(f, "API")
|
||||
desc = _desc_str(f, "API")
|
||||
w(f"| {i} | **[`{_esc(f)}`](#{api_anchor})** | {_esc(purpose)} | {conf} | {sample} | {desc} | **⚠️ 未映射** |")
|
||||
w(f"| {i} | **[`{_esc(f)}`](#{api_anchor})** | {sample} | {desc} | **⚠️ 未映射** |")
|
||||
w()
|
||||
|
||||
# ── API→ODS 未映射(嵌套对象) ──
|
||||
# ── API→ODS 未映射(嵌套对象,非白名单) ──
|
||||
if row["api_unmapped_nested"]:
|
||||
w(f"<details><summary>API→ODS 未映射(嵌套对象)— {len(row['api_unmapped_nested'])} 个</summary>")
|
||||
w()
|
||||
w("| # | JSON 字段 | 推测用途 | 置信度 | 示例值 | 说明 | 状态 |")
|
||||
w("|---|----------|---------|-------|-------|------|------|")
|
||||
w("| # | JSON 字段 | 示例值 | 说明 | 状态 |")
|
||||
w("|---|----------|-------|------|------|")
|
||||
for i, f in enumerate(row["api_unmapped_nested"], 1):
|
||||
purpose, conf = _guess_field_purpose(f, table_name, "API")
|
||||
sample = _sample_str(f, "API")
|
||||
desc = _desc_str(f, "API")
|
||||
w(f"| {i} | [`{_esc(f)}`](#{api_anchor}) | {_esc(purpose)} | {conf} | {sample} | {desc} | 📦 嵌套 |")
|
||||
w(f"| {i} | [`{_esc(f)}`](#{api_anchor}) | {sample} | {desc} | 📦 嵌套 |")
|
||||
w()
|
||||
w("</details>")
|
||||
w()
|
||||
|
||||
# ── API 白名单字段汇总 ──
|
||||
_write_whitelist_summary(w, row["api_unmapped_whitelist"], "API→ODS ")
|
||||
|
||||
# ── ODS 无 JSON 源 ──
|
||||
if row["ods_no_json_fields"]:
|
||||
w(f"**ODS 无 JSON 源** — {len(row['ods_no_json_fields'])} 个")
|
||||
w()
|
||||
w("| # | ODS 列 | 推测用途 | 置信度 | 说明 | 状态 |")
|
||||
w("|---|-------|---------|-------|------|------|")
|
||||
w("| # | ODS 列 | 说明 | 状态 |")
|
||||
w("|---|-------|------|------|")
|
||||
for i, f in enumerate(row["ods_no_json_fields"], 1):
|
||||
purpose, conf = _guess_field_purpose(f, table_name, "ODS")
|
||||
desc = _desc_str(f, "ODS")
|
||||
w(f"| {i} | **[`{_esc(f)}`](#{ods_anchor})** | {_esc(purpose)} | {conf} | {desc} | **⚠️ 无 JSON 源** |")
|
||||
w(f"| {i} | **[`{_esc(f)}`](#{ods_anchor})** | {desc} | **⚠️ 无 JSON 源** |")
|
||||
w()
|
||||
|
||||
# ── ODS 无 JSON 源 白名单汇总 ──
|
||||
_write_whitelist_summary(w, row["ods_no_json_whitelist"], "ODS 无 JSON 源 ")
|
||||
|
||||
# ── ODS→DWD 未映射 ──
|
||||
if row["ods_no_dwd_fields"]:
|
||||
w(f"**ODS→DWD 未映射** — {len(row['ods_no_dwd_fields'])} 个")
|
||||
w()
|
||||
w("| # | ODS 列 | 推测用途 | 置信度 | 说明 | 状态 |")
|
||||
w("|---|-------|---------|-------|------|------|")
|
||||
w("| # | ODS 列 | 说明 | 状态 |")
|
||||
w("|---|-------|------|------|")
|
||||
for i, f in enumerate(row["ods_no_dwd_fields"], 1):
|
||||
purpose, conf = _guess_field_purpose(f, table_name, "ODS")
|
||||
desc = _desc_str(f, "ODS")
|
||||
w(f"| {i} | **[`{_esc(f)}`](#{ods_anchor})** | {_esc(purpose)} | {conf} | {desc} | **⚠️ 无 DWD 目标** |")
|
||||
w(f"| {i} | **[`{_esc(f)}`](#{ods_anchor})** | {desc} | **⚠️ 无 DWD 目标** |")
|
||||
w()
|
||||
|
||||
# ── ODS→DWD 白名单汇总 ──
|
||||
_write_whitelist_summary(w, row["ods_no_dwd_whitelist"], "ODS→DWD ")
|
||||
|
||||
# ── DWD 无 ODS 源 ──
|
||||
if row["dwd_no_ods_fields"]:
|
||||
w(f"**DWD 无 ODS 源** — {len(row['dwd_no_ods_fields'])} 个")
|
||||
w()
|
||||
w("| # | DWD 表 | DWD 列 | 推测用途 | 置信度 | 说明 | 状态 |")
|
||||
w("|---|-------|-------|---------|-------|------|------|")
|
||||
w("| # | DWD 表 | DWD 列 | 说明 | 状态 |")
|
||||
w("|---|-------|-------|------|------|")
|
||||
for i, (dwd_name, dwd_col) in enumerate(row["dwd_no_ods_fields"], 1):
|
||||
dwd_a = dwd_anchors.get(dwd_name, f"dwd-{dwd_name.replace('_', '-')}")
|
||||
purpose, conf = _guess_field_purpose(dwd_col, table_name, "DWD")
|
||||
desc = _desc_str(dwd_col, "DWD", dwd_tbl=dwd_name)
|
||||
w(f"| {i} | {dwd_name} | **[`{_esc(dwd_col)}`](#{dwd_a})** | {_esc(purpose)} | {conf} | {desc} | **⚠️ 无 ODS 源** |")
|
||||
w(f"| {i} | {dwd_name} | **[`{_esc(dwd_col)}`](#{dwd_a})** | {desc} | **⚠️ 无 ODS 源** |")
|
||||
w()
|
||||
|
||||
# ── DWD 无 ODS 源 白名单汇总 ──
|
||||
_write_whitelist_summary(w, row["dwd_no_ods_whitelist"], "DWD 无 ODS 源 ")
|
||||
|
||||
w()
|
||||
|
||||
|
||||
|
||||
|
||||
def _write_api_section(w, fm, jt, bd, table_name, api_anchor, ods_anchor):
|
||||
"""生成 API 源字段区块(增加业务描述列,合并说明+示例值)"""
|
||||
"""生成 API 源字段区块(增加业务描述列,合并说明+示例值,白名单字段折叠)"""
|
||||
w(f'<a id="{api_anchor}"></a>')
|
||||
w()
|
||||
w(f"#### API 源字段 — {table_name} [🔗 ODS](#{ods_anchor})")
|
||||
@@ -556,17 +651,30 @@ def _write_api_section(w, fm, jt, bd, table_name, api_anchor, ods_anchor):
|
||||
# BD_manual ODS 描述(用于交叉引用 JSON 字段的业务含义)
|
||||
ods_descs = bd.get("ods_fields", {}) if bd else {}
|
||||
|
||||
# CHANGE 2026-02-21 | 白名单字段从表格中排除,折叠汇总
|
||||
normal_items: list[dict] = []
|
||||
whitelist_items: list[tuple[str, str]] = [] # (json_path, reason)
|
||||
for m in j2o:
|
||||
jp = m.get("json_path", "")
|
||||
wl_reason = whitelist_reason("", json_path=jp)
|
||||
if wl_reason:
|
||||
whitelist_items.append((jp, wl_reason))
|
||||
else:
|
||||
normal_items.append(m)
|
||||
|
||||
mapped_count = sum(1 for m in j2o if m.get("ods_col") is not None)
|
||||
total_count = len(j2o)
|
||||
if total_count > 0:
|
||||
w(f"已映射 {mapped_count}/{total_count},覆盖率 {mapped_count / total_count * 100:.1f}%")
|
||||
if whitelist_items:
|
||||
w(f"(其中 {len(whitelist_items)} 个白名单字段已折叠)")
|
||||
else:
|
||||
w("无字段")
|
||||
w()
|
||||
w("| # | JSON 字段 | 类型 | → ODS 列 | 业务描述 | 示例值与说明 |")
|
||||
w("|---|----------|------|---------|---------|------------|")
|
||||
|
||||
for i, m in enumerate(j2o, 1):
|
||||
for i, m in enumerate(normal_items, 1):
|
||||
json_path = m["json_path"]
|
||||
json_type = m.get("json_type", "")
|
||||
ods_col = m.get("ods_col")
|
||||
@@ -597,7 +705,7 @@ def _write_api_section(w, fm, jt, bd, table_name, api_anchor, ods_anchor):
|
||||
|
||||
# 合并说明+示例值
|
||||
notes_parts: list[str] = []
|
||||
if json_path.startswith("siteProfile.") or ("." in json_path and match_type == "unmapped"):
|
||||
if "." in json_path and match_type == "unmapped":
|
||||
notes_parts.append("📦 嵌套对象")
|
||||
if match_type == "case_insensitive":
|
||||
notes_parts.append("大小写匹配")
|
||||
@@ -616,9 +724,20 @@ def _write_api_section(w, fm, jt, bd, table_name, api_anchor, ods_anchor):
|
||||
|
||||
w()
|
||||
|
||||
# 白名单字段折叠汇总
|
||||
if whitelist_items:
|
||||
by_reason: dict[str, list[str]] = {}
|
||||
for jp, reason in whitelist_items:
|
||||
by_reason.setdefault(reason, []).append(jp)
|
||||
parts = []
|
||||
for reason, names in by_reason.items():
|
||||
parts.append(f"{reason}: `{'`, `'.join(names[:5])}`{'...' if len(names) > 5 else ''} ({len(names)} 个)")
|
||||
w(f"> ℹ️ 白名单字段(已检查,不展开详情):{';'.join(parts)}")
|
||||
w()
|
||||
|
||||
|
||||
def _write_ods_section(w, fm, ods_schema, bd, table_name, ods_anchor, api_anchor, dwd_anchors):
|
||||
"""生成 ODS 表结构区块(含上下游双向映射列 + 业务描述)"""
|
||||
"""生成 ODS 表结构区块(含上下游双向映射列 + 业务描述,白名单字段折叠)"""
|
||||
w(f'<a id="{ods_anchor}"></a>')
|
||||
w()
|
||||
w(f"#### ODS 表结构 — ods.{table_name} [🔗 API](#{api_anchor})")
|
||||
@@ -645,12 +764,25 @@ def _write_ods_section(w, fm, ods_schema, bd, table_name, ods_anchor, api_anchor
|
||||
ods_descs = bd.get("ods_fields", {}) if bd else {}
|
||||
|
||||
cols = ods_schema["columns"]
|
||||
|
||||
# CHANGE 2026-02-21 | 白名单字段从表格中排除,折叠汇总
|
||||
normal_cols: list[dict] = []
|
||||
whitelist_cols: list[tuple[str, str]] = [] # (col_name, reason)
|
||||
for col in cols:
|
||||
wl_reason = whitelist_reason(col["name"])
|
||||
if wl_reason:
|
||||
whitelist_cols.append((col["name"], wl_reason))
|
||||
else:
|
||||
normal_cols.append(col)
|
||||
|
||||
w(f"共 {len(cols)} 列")
|
||||
if whitelist_cols:
|
||||
w(f"(其中 {len(whitelist_cols)} 个白名单列已折叠)")
|
||||
w()
|
||||
w("| # | ODS 列名 | 类型 | ← JSON 源 | → DWD 目标 | 业务描述 |")
|
||||
w("|---|---------|------|----------|-----------|---------|")
|
||||
|
||||
for i, col in enumerate(cols, 1):
|
||||
for i, col in enumerate(normal_cols, 1):
|
||||
col_name = col["name"]
|
||||
col_type = col["data_type"]
|
||||
|
||||
@@ -684,9 +816,20 @@ def _write_ods_section(w, fm, ods_schema, bd, table_name, ods_anchor, api_anchor
|
||||
|
||||
w()
|
||||
|
||||
# 白名单列折叠汇总
|
||||
if whitelist_cols:
|
||||
by_reason: dict[str, list[str]] = {}
|
||||
for cn, reason in whitelist_cols:
|
||||
by_reason.setdefault(reason, []).append(cn)
|
||||
parts = []
|
||||
for reason, names in by_reason.items():
|
||||
parts.append(f"{reason}: `{'`, `'.join(names)}` ({len(names)} 个)")
|
||||
w(f"> ℹ️ 白名单列(已检查,不展开详情):{';'.join(parts)}")
|
||||
w()
|
||||
|
||||
|
||||
def _write_dwd_section(w, fm, dwd_schema, bd, dwd_name, dwd_anchor, ods_anchor, table_name):
|
||||
"""生成 DWD 表结构区块(增加业务描述列)"""
|
||||
"""生成 DWD 表结构区块(增加业务描述列,白名单字段折叠)"""
|
||||
w(f'<a id="{dwd_anchor}"></a>')
|
||||
w()
|
||||
w(f"#### DWD 表结构 — dwd.{dwd_name} [🔗 ODS](#{ods_anchor})")
|
||||
@@ -709,12 +852,25 @@ def _write_dwd_section(w, fm, dwd_schema, bd, dwd_name, dwd_anchor, ods_anchor,
|
||||
dwd_descs = bd["dwd_fields"].get(dwd_name, {})
|
||||
|
||||
cols = dwd_schema["columns"]
|
||||
|
||||
# CHANGE 2026-02-21 | 白名单字段从表格中排除,折叠汇总
|
||||
normal_cols: list[dict] = []
|
||||
whitelist_cols: list[tuple[str, str]] = [] # (col_name, reason)
|
||||
for col in cols:
|
||||
wl_reason = whitelist_reason(col["name"])
|
||||
if wl_reason:
|
||||
whitelist_cols.append((col["name"], wl_reason))
|
||||
else:
|
||||
normal_cols.append(col)
|
||||
|
||||
w(f"共 {len(cols)} 列")
|
||||
if whitelist_cols:
|
||||
w(f"(其中 {len(whitelist_cols)} 个白名单列已折叠)")
|
||||
w()
|
||||
w("| # | DWD 列名 | 类型 | ← ODS 来源 | 转换 | 业务描述 |")
|
||||
w("|---|---------|------|----------|------|---------|")
|
||||
|
||||
for i, col in enumerate(cols, 1):
|
||||
for i, col in enumerate(normal_cols, 1):
|
||||
col_name = col["name"]
|
||||
col_type = col["data_type"]
|
||||
|
||||
@@ -728,8 +884,6 @@ def _write_dwd_section(w, fm, dwd_schema, bd, dwd_name, dwd_anchor, ods_anchor,
|
||||
ods_link = "—"
|
||||
transform = ""
|
||||
note = ""
|
||||
if col_name in ("valid_from", "valid_to", "is_current", "etl_loaded_at", "etl_batch_id"):
|
||||
transform = "ETL 生成"
|
||||
|
||||
# 业务描述(优先 BD_manual,其次 mapping note,最后 DB comment)
|
||||
biz_desc = dwd_descs.get(col_name.lower(), "")
|
||||
@@ -753,9 +907,22 @@ def _write_dwd_section(w, fm, dwd_schema, bd, dwd_name, dwd_anchor, ods_anchor,
|
||||
|
||||
w()
|
||||
|
||||
# 白名单列折叠汇总
|
||||
if whitelist_cols:
|
||||
by_reason: dict[str, list[str]] = {}
|
||||
for cn, reason in whitelist_cols:
|
||||
by_reason.setdefault(reason, []).append(cn)
|
||||
parts = []
|
||||
for reason, names in by_reason.items():
|
||||
parts.append(f"{reason}: `{'`, `'.join(names)}` ({len(names)} 个)")
|
||||
w(f"> ℹ️ 白名单列(已检查,不展开详情):{';'.join(parts)}")
|
||||
w()
|
||||
|
||||
|
||||
def main() -> None:
|
||||
load_dotenv(Path(".env"), override=False)
|
||||
# _env_paths 在 import 时已通过绝对路径加载根 .env,无需相对路径 load_dotenv
|
||||
# CHANGE 2026-02-21 | 移除 load_dotenv(Path(".env")),避免 cwd 不在项目根时失效
|
||||
from _env_paths import get_output_path # noqa: F401 — 触发 .env 加载
|
||||
|
||||
parser = build_parser()
|
||||
args = parser.parse_args()
|
||||
|
||||
0
scripts/ops/gen_dwd_field_review.py
Normal file
0
scripts/ops/gen_dwd_field_review.py
Normal file
1009
scripts/ops/gen_field_review_doc.py
Normal file
1009
scripts/ops/gen_field_review_doc.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -4,8 +4,8 @@
|
||||
从真实 API 获取 JSON 样本,结合 DDL 和 ETL 源码,生成带跨层跳转链接的 Markdown 文档。
|
||||
|
||||
用法: python scripts/ops/gen_full_dataflow_doc.py
|
||||
输出: docs/reports/dataflow_api_ods_dwd.md
|
||||
tmp/api_samples/*.json(API 原始响应缓存)
|
||||
输出: $FULL_DATAFLOW_DOC_ROOT/dataflow_api_ods_dwd.md(由 .env 配置)
|
||||
$API_SAMPLE_CACHE_ROOT/*.json(API 原始响应缓存)
|
||||
"""
|
||||
import json
|
||||
import os
|
||||
@@ -24,8 +24,10 @@ from dotenv import load_dotenv
|
||||
ROOT = Path(__file__).resolve().parents[2]
|
||||
ETL = ROOT / "apps" / "etl" / "pipelines" / "feiqiu"
|
||||
DB = ROOT / "db" / "etl_feiqiu" / "schemas"
|
||||
OUT = ROOT / "docs" / "reports" / "dataflow_api_ods_dwd.md"
|
||||
SAMPLE_DIR = ROOT / "tmp" / "api_samples"
|
||||
# 从 .env 读取输出路径(缺失时抛 KeyError)
|
||||
from _env_paths import get_output_path as _get_path
|
||||
OUT = _get_path("FULL_DATAFLOW_DOC_ROOT") / "dataflow_api_ods_dwd.md"
|
||||
SAMPLE_DIR = _get_path("API_SAMPLE_CACHE_ROOT")
|
||||
|
||||
TZ = ZoneInfo("Asia/Shanghai")
|
||||
|
||||
@@ -91,17 +93,6 @@ ODS_SPECS = [
|
||||
"extra_params": {},
|
||||
"description": "助教服务流水",
|
||||
},
|
||||
{
|
||||
"code": "ODS_ASSISTANT_ABOLISH",
|
||||
"table": "assistant_cancellation_records",
|
||||
"endpoint": "/AssistantPerformance/GetAbolitionAssistant",
|
||||
"data_path": ("data",),
|
||||
"list_key": "abolitionAssistants",
|
||||
"time_fields": ("startTime", "endTime"),
|
||||
"requires_window": True,
|
||||
"extra_params": {},
|
||||
"description": "助教废除记录",
|
||||
},
|
||||
{
|
||||
"code": "ODS_STORE_GOODS_SALES",
|
||||
"table": "store_goods_sales_records",
|
||||
@@ -289,17 +280,6 @@ ODS_SPECS = [
|
||||
"extra_params": {},
|
||||
"description": "租户商品档案",
|
||||
},
|
||||
{
|
||||
"code": "ODS_SETTLEMENT_TICKET",
|
||||
"table": "settlement_ticket_details",
|
||||
"endpoint": "/Order/GetOrderSettleTicketNew",
|
||||
"data_path": (),
|
||||
"list_key": None,
|
||||
"time_fields": None,
|
||||
"requires_window": False,
|
||||
"extra_params": {},
|
||||
"description": "结账小票详情(按 orderSettleId 逐条获取,不走常规分页)",
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
@@ -373,7 +353,6 @@ def fetch_records(spec: dict, target_count: int = 200) -> list[dict]:
|
||||
获取 API 记录。
|
||||
- 有时间字段的表:从今天往回 10 天一批,不够则继续扩展,最多 10 次重试
|
||||
- 无时间字段的表:单次请求 200 条
|
||||
- 特殊表(settlement_ticket_details):跳过
|
||||
"""
|
||||
endpoint = spec["endpoint"]
|
||||
data_path = spec["data_path"]
|
||||
@@ -381,10 +360,6 @@ def fetch_records(spec: dict, target_count: int = 200) -> list[dict]:
|
||||
time_fields = spec["time_fields"]
|
||||
extra_params = spec.get("extra_params", {})
|
||||
|
||||
# 结账小票是逐条获取的,跳过
|
||||
if spec["table"] == "settlement_ticket_details":
|
||||
return []
|
||||
|
||||
all_records = []
|
||||
|
||||
if time_fields:
|
||||
@@ -1103,12 +1078,6 @@ def main():
|
||||
|
||||
print(f" [{spec['code']}] {table}: 请求 API...", end=" ", flush=True)
|
||||
|
||||
if spec["table"] == "settlement_ticket_details":
|
||||
print("跳过(逐条获取,不走常规分页)")
|
||||
api_data[table] = []
|
||||
api_fields[table] = OrderedDict()
|
||||
continue
|
||||
|
||||
try:
|
||||
records = fetch_records(spec, target_count=200)
|
||||
api_data[table] = records
|
||||
@@ -1129,49 +1098,6 @@ def main():
|
||||
api_data[table] = []
|
||||
api_fields[table] = OrderedDict()
|
||||
|
||||
# ── 特殊处理:settlement_ticket_details 从数据库 payload 获取 ──
|
||||
# 该表不走常规 API 分页,尝试从已有缓存或跳过
|
||||
ticket_table = "settlement_ticket_details"
|
||||
if not api_data.get(ticket_table) and not api_fields.get(ticket_table):
|
||||
# 尝试从结算记录的 API 响应中获取小票结构(如果有的话)
|
||||
print(f" [{ticket_table}] 无法通过常规 API 获取,将从数据库 payload 分析")
|
||||
try:
|
||||
import psycopg2
|
||||
dsn = os.environ.get("PG_DSN", "")
|
||||
if dsn:
|
||||
conn = psycopg2.connect(dsn)
|
||||
conn.set_client_encoding("UTF8")
|
||||
# 自动检测 schema
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""
|
||||
SELECT schema_name FROM information_schema.schemata
|
||||
WHERE schema_name IN ('ods', 'billiards_ods')
|
||||
ORDER BY schema_name
|
||||
""")
|
||||
schemas = [r[0] for r in cur.fetchall()]
|
||||
ods_schema = "ods" if "ods" in schemas else schemas[0] if schemas else "ods"
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(f"""
|
||||
SELECT payload FROM {ods_schema}.{ticket_table}
|
||||
WHERE payload IS NOT NULL
|
||||
ORDER BY fetched_at DESC LIMIT 10
|
||||
""")
|
||||
rows = cur.fetchall()
|
||||
payloads = []
|
||||
for row in rows:
|
||||
p = row[0]
|
||||
if isinstance(p, str):
|
||||
p = json.loads(p)
|
||||
if isinstance(p, dict):
|
||||
payloads.append(p)
|
||||
conn.close()
|
||||
if payloads:
|
||||
api_data[ticket_table] = payloads
|
||||
api_fields[ticket_table] = analyze_json_fields(payloads)
|
||||
print(f" 从数据库获取 {len(payloads)} 条 payload")
|
||||
except Exception as e:
|
||||
print(f" 从数据库获取失败: {e}")
|
||||
|
||||
# ── 生成文档 ──
|
||||
print()
|
||||
print("生成文档...")
|
||||
|
||||
328
scripts/ops/monitor_etl_run.py
Normal file
328
scripts/ops/monitor_etl_run.py
Normal file
@@ -0,0 +1,328 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
监控当前 ETL 执行状态,完成后导出执行结果报告到 SYSTEM_LOG_ROOT。
|
||||
|
||||
通过后端 API 轮询执行历史,检测 run_uuid 对应的执行是否完成。
|
||||
完成后从浏览器日志或 API 提取任务级结果,生成 Markdown 报告。
|
||||
|
||||
用法:python scripts/ops/monitor_etl_run.py
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import sys
|
||||
import time
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
from _env_paths import get_output_path
|
||||
|
||||
BACKEND_URL = "http://localhost:8000"
|
||||
TARGET_RUN_UUID = "4ba9d2d365ee4a858f1c4104b1942dc2"
|
||||
POLL_INTERVAL = 30 # 秒
|
||||
|
||||
|
||||
def get_auth_token() -> str:
|
||||
"""从后端登录获取 JWT token(使用测试账号)"""
|
||||
# 尝试读取已有 token
|
||||
token_file = Path(__file__).parent / ".monitor_token"
|
||||
if token_file.exists():
|
||||
token = token_file.read_text(encoding="utf-8").strip()
|
||||
# 验证 token 是否有效
|
||||
try:
|
||||
r = requests.get(
|
||||
f"{BACKEND_URL}/api/execution/history",
|
||||
headers={"Authorization": f"Bearer {token}"},
|
||||
params={"limit": 1},
|
||||
timeout=5,
|
||||
)
|
||||
if r.status_code == 200:
|
||||
return token
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# token 无效,需要重新登录
|
||||
print("需要登录后端获取 token。请在浏览器中登录后,")
|
||||
print("从浏览器 DevTools > Application > Local Storage 中复制 token,")
|
||||
print("或直接输入(留空跳过,使用无认证模式):")
|
||||
token = input("JWT Token: ").strip()
|
||||
if token:
|
||||
token_file.write_text(token, encoding="utf-8")
|
||||
return token
|
||||
|
||||
|
||||
def poll_execution_status(token: str) -> dict | None:
|
||||
"""轮询执行状态"""
|
||||
headers = {}
|
||||
if token:
|
||||
headers["Authorization"] = f"Bearer {token}"
|
||||
|
||||
try:
|
||||
r = requests.get(
|
||||
f"{BACKEND_URL}/api/execution/history",
|
||||
headers=headers,
|
||||
params={"limit": 5},
|
||||
timeout=10,
|
||||
)
|
||||
if r.status_code != 200:
|
||||
print(f" API 返回 {r.status_code}: {r.text[:200]}")
|
||||
return None
|
||||
|
||||
data = r.json()
|
||||
items = data if isinstance(data, list) else data.get("items", data.get("data", []))
|
||||
|
||||
for item in items:
|
||||
if item.get("run_uuid") == TARGET_RUN_UUID:
|
||||
return item
|
||||
|
||||
# 没找到精确匹配,返回最新的
|
||||
if items:
|
||||
return items[0]
|
||||
return None
|
||||
|
||||
except requests.exceptions.ConnectionError:
|
||||
print(" 后端连接失败,可能已停止")
|
||||
return None
|
||||
except Exception as e:
|
||||
print(f" API 请求异常: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def extract_log_from_api(token: str) -> str | None:
|
||||
"""尝试从 API 获取执行日志"""
|
||||
headers = {}
|
||||
if token:
|
||||
headers["Authorization"] = f"Bearer {token}"
|
||||
|
||||
try:
|
||||
# 尝试获取日志
|
||||
r = requests.get(
|
||||
f"{BACKEND_URL}/api/execution/log/{TARGET_RUN_UUID}",
|
||||
headers=headers,
|
||||
timeout=30,
|
||||
)
|
||||
if r.status_code == 200:
|
||||
return r.text
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def parse_task_results_from_log(log_text: str) -> list[dict]:
|
||||
"""从日志文本解析各任务的执行结果"""
|
||||
results = []
|
||||
lines = log_text.split("\n") if log_text else []
|
||||
|
||||
current_task = None
|
||||
task_start_time = None
|
||||
|
||||
for line in lines:
|
||||
# 检测任务开始
|
||||
if "开始执行" in line and "ODS" in line or "DWS" in line or "DWD" in line:
|
||||
# 提取时间戳
|
||||
ts = extract_timestamp(line)
|
||||
# 提取任务名
|
||||
for token in line.split():
|
||||
if token.startswith("ODS_") or token.startswith("DWS_") or token.startswith("DWD_"):
|
||||
task_name = token.rstrip(":")
|
||||
current_task = task_name
|
||||
task_start_time = ts
|
||||
break
|
||||
|
||||
# 检测任务完成
|
||||
if current_task and "任务完成" in line and current_task in line:
|
||||
ts = extract_timestamp(line)
|
||||
# 提取统计信息
|
||||
stats = extract_stats(line)
|
||||
results.append({
|
||||
"task": current_task,
|
||||
"status": "success",
|
||||
"start": task_start_time,
|
||||
"end": ts,
|
||||
"stats": stats,
|
||||
})
|
||||
current_task = None
|
||||
|
||||
# 检测任务失败
|
||||
if "任务" in line and "失败" in line:
|
||||
ts = extract_timestamp(line)
|
||||
for token in line.split():
|
||||
if token.startswith("ODS_") or token.startswith("DWS_") or token.startswith("DWD_"):
|
||||
task_name = token.rstrip(":")
|
||||
# 提取错误信息
|
||||
error_msg = line.split("失败:")[-1].strip() if "失败:" in line else "未知错误"
|
||||
results.append({
|
||||
"task": task_name,
|
||||
"status": "failed",
|
||||
"start": task_start_time if current_task == task_name else ts,
|
||||
"end": ts,
|
||||
"error": error_msg,
|
||||
})
|
||||
if current_task == task_name:
|
||||
current_task = None
|
||||
break
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def extract_timestamp(line: str) -> str:
|
||||
"""从日志行提取时间戳"""
|
||||
# 格式: [2026-02-21 15:29:21]
|
||||
if "[" in line and "]" in line:
|
||||
start = line.index("[") + 1
|
||||
end = line.index("]", start)
|
||||
return line[start:end]
|
||||
return ""
|
||||
|
||||
|
||||
def extract_stats(line: str) -> str:
|
||||
"""从日志行提取统计信息"""
|
||||
if "{" in line and "}" in line:
|
||||
start = line.index("{")
|
||||
end = line.index("}") + 1
|
||||
return line[start:end]
|
||||
return ""
|
||||
|
||||
|
||||
|
||||
def generate_report(execution: dict, task_results: list[dict]) -> str:
|
||||
"""生成执行结果 Markdown 报告"""
|
||||
now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
status = execution.get("status", "unknown")
|
||||
start_time = execution.get("started_at", execution.get("start_time", "—"))
|
||||
end_time = execution.get("ended_at", execution.get("end_time", "—"))
|
||||
duration = execution.get("duration", "—")
|
||||
exit_code = execution.get("exit_code", "—")
|
||||
|
||||
lines = [
|
||||
f"# ETL 执行结果报告",
|
||||
f"",
|
||||
f"> 生成时间:{now}",
|
||||
f"> run_uuid:{TARGET_RUN_UUID}",
|
||||
f"",
|
||||
f"---",
|
||||
f"",
|
||||
f"## 执行概览",
|
||||
f"",
|
||||
f"| 项目 | 值 |",
|
||||
f"|------|-----|",
|
||||
f"| 状态 | {status} |",
|
||||
f"| 开始时间 | {start_time} |",
|
||||
f"| 结束时间 | {end_time} |",
|
||||
f"| 时长 | {duration} |",
|
||||
f"| 退出码 | {exit_code} |",
|
||||
f"",
|
||||
]
|
||||
|
||||
# 任务级结果
|
||||
if task_results:
|
||||
success_count = sum(1 for r in task_results if r["status"] == "success")
|
||||
failed_count = sum(1 for r in task_results if r["status"] == "failed")
|
||||
|
||||
lines.extend([
|
||||
f"## 任务级结果",
|
||||
f"",
|
||||
f"成功:{success_count} | 失败:{failed_count} | 总计:{len(task_results)}",
|
||||
f"",
|
||||
f"| # | 任务 | 状态 | 开始 | 结束 | 备注 |",
|
||||
f"|---|------|------|------|------|------|",
|
||||
])
|
||||
|
||||
for i, r in enumerate(task_results, 1):
|
||||
note = r.get("stats", r.get("error", ""))
|
||||
if len(note) > 80:
|
||||
note = note[:77] + "..."
|
||||
lines.append(
|
||||
f"| {i} | {r['task']} | {r['status']} | {r.get('start', '—')} | {r.get('end', '—')} | {note} |"
|
||||
)
|
||||
|
||||
lines.append("")
|
||||
|
||||
# 已知问题
|
||||
lines.extend([
|
||||
f"## 已知问题",
|
||||
f"",
|
||||
f"### DWS_ASSISTANT_DAILY 字段引用错误(已修复)",
|
||||
f"",
|
||||
f"根因:`_extract_trash_records` SQL 引用了 `dwd_assistant_trash_event` 中不存在的字段。",
|
||||
f"级联影响:9 个任务失败(DWS_ASSISTANT_DAILY 及其下游 + ODS_SETTLEMENT_RECORDS/PAYMENT/REFUND/BUILD_ORDER_SUMMARY)。",
|
||||
f"修复状态:代码已修复,待下次执行验证。",
|
||||
f"详见:`export/SYSTEM/LOGS/2026-02-21__dws_assistant_daily_bug_fix.md`",
|
||||
f"",
|
||||
f"---",
|
||||
f"",
|
||||
f"## 下一步",
|
||||
f"",
|
||||
f"1. 重新提交包含失败任务的执行,验证修复",
|
||||
f"2. 运行 ETL Data Consistency Check",
|
||||
f"3. 运行 /audit 审计",
|
||||
])
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def main():
|
||||
out_dir = get_output_path("SYSTEM_LOG_ROOT")
|
||||
print(f"ETL 执行监控启动")
|
||||
print(f" 目标 run_uuid: {TARGET_RUN_UUID}")
|
||||
print(f" 轮询间隔: {POLL_INTERVAL}s")
|
||||
print(f" 输出目录: {out_dir}")
|
||||
print()
|
||||
|
||||
# 获取认证 token — 非交互模式,直接尝试无 token
|
||||
token = ""
|
||||
token_file = Path(__file__).parent / ".monitor_token"
|
||||
if token_file.exists():
|
||||
token = token_file.read_text(encoding="utf-8").strip()
|
||||
|
||||
poll_count = 0
|
||||
max_polls = 120 # 最多轮询 60 分钟
|
||||
|
||||
while poll_count < max_polls:
|
||||
poll_count += 1
|
||||
now = datetime.now().strftime("%H:%M:%S")
|
||||
print(f"[{now}] 轮询 #{poll_count}...", end=" ")
|
||||
|
||||
execution = poll_execution_status(token)
|
||||
|
||||
if execution is None:
|
||||
print("未获取到执行信息")
|
||||
time.sleep(POLL_INTERVAL)
|
||||
continue
|
||||
|
||||
status = execution.get("status", "unknown")
|
||||
print(f"状态: {status}")
|
||||
|
||||
if status in ("success", "failed", "completed", "error", "stopped"):
|
||||
print(f"\n执行已完成,状态: {status}")
|
||||
|
||||
# 尝试获取日志
|
||||
log_text = extract_log_from_api(token)
|
||||
task_results = parse_task_results_from_log(log_text) if log_text else []
|
||||
|
||||
# 生成报告
|
||||
report = generate_report(execution, task_results)
|
||||
out_file = out_dir / "2026-02-21__etl_run_result.md"
|
||||
out_file.write_text(report, encoding="utf-8")
|
||||
print(f"\n执行结果报告已导出: {out_file}")
|
||||
|
||||
# 同时保存原始 API 响应
|
||||
raw_file = out_dir / "2026-02-21__etl_run_raw.json"
|
||||
raw_file.write_text(
|
||||
json.dumps(execution, ensure_ascii=False, indent=2, default=str),
|
||||
encoding="utf-8",
|
||||
)
|
||||
print(f"原始数据已导出: {raw_file}")
|
||||
return
|
||||
|
||||
time.sleep(POLL_INTERVAL)
|
||||
|
||||
print(f"\n超过最大轮询次数 ({max_polls}),退出监控")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
38
scripts/ops/poll_v10.py
Normal file
38
scripts/ops/poll_v10.py
Normal file
@@ -0,0 +1,38 @@
|
||||
"""轮询 v10 执行结果(用 history 端点)"""
|
||||
import time, requests, json, sys
|
||||
from pathlib import Path
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(Path(__file__).resolve().parents[2] / ".env")
|
||||
|
||||
EXEC_ID = "ac99405f-7e42-44da-8abd-b4a51e7d7563"
|
||||
BASE = "http://localhost:8000"
|
||||
TOKEN_FILE = Path(__file__).parent / ".monitor_token"
|
||||
|
||||
def poll():
|
||||
token = TOKEN_FILE.read_text().strip()
|
||||
headers = {"Authorization": f"Bearer {token}"}
|
||||
for attempt in range(60):
|
||||
r = requests.get(f"{BASE}/api/execution/history?limit=5", headers=headers)
|
||||
if r.status_code != 200:
|
||||
print(f"[{attempt+1}] HTTP {r.status_code}")
|
||||
time.sleep(5)
|
||||
continue
|
||||
items = r.json()
|
||||
match = next((i for i in items if i["id"] == EXEC_ID), None)
|
||||
if not match:
|
||||
print(f"[{attempt+1}] 未找到执行记录")
|
||||
time.sleep(5)
|
||||
continue
|
||||
status = match.get("status", "unknown")
|
||||
print(f"[{attempt+1}] status={status}, exit_code={match.get('exit_code')}, duration_ms={match.get('duration_ms')}")
|
||||
if status in ("success", "failed", "error"):
|
||||
return match
|
||||
time.sleep(5)
|
||||
print("超时")
|
||||
return None
|
||||
|
||||
if __name__ == "__main__":
|
||||
result = poll()
|
||||
if result:
|
||||
print(json.dumps(result, indent=2, ensure_ascii=False))
|
||||
75
scripts/ops/poll_v3.py
Normal file
75
scripts/ops/poll_v3.py
Normal file
@@ -0,0 +1,75 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""轮询第三次执行结果。"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
|
||||
TOKEN_FILE = Path(__file__).parent / ".monitor_token"
|
||||
BASE = "http://localhost:8000"
|
||||
EXECUTION_ID = "abc94b2d-615f-42ea-83cc-ce687524a6ea"
|
||||
|
||||
REFRESH_TOKEN = (
|
||||
"eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9."
|
||||
"eyJzdWIiOiIxIiwic2l0ZV9pZCI6Mjc5MDY4NTQxNTQ0MzI2OSwidHlwZSI6InJlZnJlc2giLCJleHAiOjE3NzIyNjM0NjN9."
|
||||
"XYoda5lfxNtTSAGWoLlYhS9cA-hTK9iqK0SqUyn2KV4"
|
||||
)
|
||||
|
||||
|
||||
def refresh_token() -> str:
|
||||
resp = requests.post(f"{BASE}/api/auth/refresh", json={"refresh_token": REFRESH_TOKEN}, timeout=10)
|
||||
if resp.status_code != 200:
|
||||
print(f"❌ 刷新失败: {resp.status_code}")
|
||||
sys.exit(1)
|
||||
t = resp.json()["access_token"]
|
||||
TOKEN_FILE.write_text(t, encoding="utf-8")
|
||||
return t
|
||||
|
||||
|
||||
TOKEN = refresh_token()
|
||||
HEADERS = {"Authorization": f"Bearer {TOKEN}", "Content-Type": "application/json"}
|
||||
|
||||
for i in range(90): # 最多 30 分钟
|
||||
time.sleep(20)
|
||||
mm, ss = divmod((i + 1) * 20, 60)
|
||||
try:
|
||||
r = requests.get(f"{BASE}/api/execution/history?limit=5", headers=HEADERS, timeout=15)
|
||||
if r.status_code == 401:
|
||||
TOKEN = refresh_token()
|
||||
HEADERS["Authorization"] = f"Bearer {TOKEN}"
|
||||
continue
|
||||
if r.status_code != 200:
|
||||
continue
|
||||
target = next((h for h in r.json() if h["id"] == EXECUTION_ID), None)
|
||||
if not target:
|
||||
print(f"[{mm}m{ss}s] 等待...")
|
||||
continue
|
||||
status = target.get("status")
|
||||
dur = target.get("duration_ms")
|
||||
dur_s = f"{dur/1000:.1f}s" if dur else "—"
|
||||
if status in ("success", "failed", "cancelled"):
|
||||
print(f"🏁 status={status}, 耗时={dur_s}, exit_code={target.get('exit_code')}")
|
||||
# 获取日志
|
||||
lr = requests.get(f"{BASE}/api/execution/{EXECUTION_ID}/logs", headers=HEADERS, timeout=30)
|
||||
if lr.status_code == 200:
|
||||
ld = lr.json()
|
||||
from _env_paths import get_output_path
|
||||
out = get_output_path("SYSTEM_LOG_ROOT")
|
||||
(out / "2026-02-21__etl_run_raw_v3.json").write_text(
|
||||
json.dumps(ld, ensure_ascii=False, indent=2), encoding="utf-8"
|
||||
)
|
||||
print(f"日志已保存")
|
||||
el = (ld.get("error_log") or "").strip().split("\n")
|
||||
print("--- error_log 末尾 80 行 ---")
|
||||
for line in el[-80:]:
|
||||
print(line)
|
||||
break
|
||||
print(f"[{mm}m{ss}s] status={status}")
|
||||
except Exception as e:
|
||||
print(f"[{mm}m{ss}s] {e}")
|
||||
else:
|
||||
print("超时")
|
||||
74
scripts/ops/poll_v4.py
Normal file
74
scripts/ops/poll_v4.py
Normal file
@@ -0,0 +1,74 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""轮询第四次执行结果。"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
|
||||
TOKEN_FILE = Path(__file__).parent / ".monitor_token"
|
||||
BASE = "http://localhost:8000"
|
||||
EXECUTION_ID = "efd4f421-ee10-4244-833f-7b2d68c3c05b"
|
||||
|
||||
REFRESH_TOKEN = (
|
||||
"eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9."
|
||||
"eyJzdWIiOiIxIiwic2l0ZV9pZCI6Mjc5MDY4NTQxNTQ0MzI2OSwidHlwZSI6InJlZnJlc2giLCJleHAiOjE3NzIyNjM0NjN9."
|
||||
"XYoda5lfxNtTSAGWoLlYhS9cA-hTK9iqK0SqUyn2KV4"
|
||||
)
|
||||
|
||||
|
||||
def refresh_token() -> str:
|
||||
resp = requests.post(f"{BASE}/api/auth/refresh", json={"refresh_token": REFRESH_TOKEN}, timeout=10)
|
||||
if resp.status_code != 200:
|
||||
print(f"❌ 刷新失败: {resp.status_code}")
|
||||
sys.exit(1)
|
||||
t = resp.json()["access_token"]
|
||||
TOKEN_FILE.write_text(t, encoding="utf-8")
|
||||
return t
|
||||
|
||||
|
||||
TOKEN = refresh_token()
|
||||
HEADERS = {"Authorization": f"Bearer {TOKEN}", "Content-Type": "application/json"}
|
||||
|
||||
for i in range(90):
|
||||
time.sleep(20)
|
||||
mm, ss = divmod((i + 1) * 20, 60)
|
||||
try:
|
||||
r = requests.get(f"{BASE}/api/execution/history?limit=5", headers=HEADERS, timeout=15)
|
||||
if r.status_code == 401:
|
||||
TOKEN = refresh_token()
|
||||
HEADERS["Authorization"] = f"Bearer {TOKEN}"
|
||||
continue
|
||||
if r.status_code != 200:
|
||||
continue
|
||||
target = next((h for h in r.json() if h["id"] == EXECUTION_ID), None)
|
||||
if not target:
|
||||
print(f"[{mm}m{ss}s] 等待...")
|
||||
continue
|
||||
status = target.get("status")
|
||||
dur = target.get("duration_ms")
|
||||
dur_s = f"{dur/1000:.1f}s" if dur else "—"
|
||||
if status in ("success", "failed", "cancelled"):
|
||||
print(f"🏁 status={status}, 耗时={dur_s}, exit_code={target.get('exit_code')}")
|
||||
lr = requests.get(f"{BASE}/api/execution/{EXECUTION_ID}/logs", headers=HEADERS, timeout=30)
|
||||
if lr.status_code == 200:
|
||||
ld = lr.json()
|
||||
from _env_paths import get_output_path
|
||||
out = get_output_path("SYSTEM_LOG_ROOT")
|
||||
(out / "2026-02-21__etl_run_raw_v4.json").write_text(
|
||||
json.dumps(ld, ensure_ascii=False, indent=2), encoding="utf-8"
|
||||
)
|
||||
print("日志已保存")
|
||||
el = (ld.get("error_log") or "").strip().split("\n")
|
||||
print("--- error_log 末尾 80 行 ---")
|
||||
for line in el[-80:]:
|
||||
print(line)
|
||||
break
|
||||
print(f"[{mm}m{ss}s] status={status}")
|
||||
except Exception as e:
|
||||
print(f"[{mm}m{ss}s] {e}")
|
||||
else:
|
||||
print("超时")
|
||||
74
scripts/ops/poll_v5.py
Normal file
74
scripts/ops/poll_v5.py
Normal file
@@ -0,0 +1,74 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""轮询第五次执行结果。"""
|
||||
import json
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(Path(__file__).resolve().parents[2] / ".env")
|
||||
|
||||
TOKEN_FILE = Path(__file__).parent / ".monitor_token"
|
||||
BASE = "http://localhost:8000"
|
||||
EXECUTION_ID = "fe87144a-687d-4ce0-9b79-6bd0186b2be3"
|
||||
REFRESH_TOKEN = (
|
||||
"eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9."
|
||||
"eyJzdWIiOiIxIiwic2l0ZV9pZCI6Mjc5MDY4NTQxNTQ0MzI2OSwidHlwZSI6InJlZnJlc2giLCJleHAiOjE3NzIyNjM0NjN9."
|
||||
"XYoda5lfxNtTSAGWoLlYhS9cA-hTK9iqK0SqUyn2KV4"
|
||||
)
|
||||
|
||||
|
||||
def refresh_token() -> str:
|
||||
resp = requests.post(f"{BASE}/api/auth/refresh", json={"refresh_token": REFRESH_TOKEN}, timeout=10)
|
||||
if resp.status_code != 200:
|
||||
print(f"刷新失败: {resp.status_code}")
|
||||
sys.exit(1)
|
||||
t = resp.json()["access_token"]
|
||||
TOKEN_FILE.write_text(t, encoding="utf-8")
|
||||
return t
|
||||
|
||||
|
||||
TOKEN = refresh_token()
|
||||
HEADERS = {"Authorization": f"Bearer {TOKEN}", "Content-Type": "application/json"}
|
||||
|
||||
for i in range(90):
|
||||
time.sleep(20)
|
||||
mm, ss = divmod((i + 1) * 20, 60)
|
||||
try:
|
||||
r = requests.get(f"{BASE}/api/execution/history?limit=5", headers=HEADERS, timeout=15)
|
||||
if r.status_code == 401:
|
||||
TOKEN = refresh_token()
|
||||
HEADERS["Authorization"] = f"Bearer {TOKEN}"
|
||||
continue
|
||||
if r.status_code != 200:
|
||||
continue
|
||||
target = next((h for h in r.json() if h["id"] == EXECUTION_ID), None)
|
||||
if not target:
|
||||
print(f"[{mm}m{ss}s] 等待...")
|
||||
continue
|
||||
status = target.get("status")
|
||||
dur = target.get("duration_ms")
|
||||
dur_s = f"{dur/1000:.1f}s" if dur else "—"
|
||||
if status in ("success", "failed", "cancelled"):
|
||||
print(f"🏁 status={status}, 耗时={dur_s}, exit_code={target.get('exit_code')}")
|
||||
lr = requests.get(f"{BASE}/api/execution/{EXECUTION_ID}/logs", headers=HEADERS, timeout=30)
|
||||
if lr.status_code == 200:
|
||||
ld = lr.json()
|
||||
from _env_paths import get_output_path
|
||||
out = get_output_path("SYSTEM_LOG_ROOT")
|
||||
(out / "2026-02-21__etl_run_raw_v5.json").write_text(
|
||||
json.dumps(ld, ensure_ascii=False, indent=2), encoding="utf-8"
|
||||
)
|
||||
print("日志已保存")
|
||||
el = (ld.get("error_log") or "").strip().split("\n")
|
||||
print(f"--- error_log 末尾 80 行 (共 {len(el)} 行) ---")
|
||||
for line in el[-80:]:
|
||||
print(line)
|
||||
break
|
||||
print(f"[{mm}m{ss}s] status={status}")
|
||||
except Exception as e:
|
||||
print(f"[{mm}m{ss}s] {e}")
|
||||
else:
|
||||
print("超时")
|
||||
75
scripts/ops/poll_v6.py
Normal file
75
scripts/ops/poll_v6.py
Normal file
@@ -0,0 +1,75 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""轮询第六次执行结果。"""
|
||||
import json
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(Path(__file__).resolve().parents[2] / ".env")
|
||||
|
||||
TOKEN_FILE = Path(__file__).parent / ".monitor_token"
|
||||
BASE = "http://localhost:8000"
|
||||
EXECUTION_ID = "d9443781-e4ac-4df6-9f87-11c45d72e5ba"
|
||||
REFRESH_TOKEN = (
|
||||
"eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9."
|
||||
"eyJzdWIiOiIxIiwic2l0ZV9pZCI6Mjc5MDY4NTQxNTQ0MzI2OSwidHlwZSI6InJlZnJlc2giLCJleHAiOjE3NzIyNjM0NjN9."
|
||||
"XYoda5lfxNtTSAGWoLlYhS9cA-hTK9iqK0SqUyn2KV4"
|
||||
)
|
||||
|
||||
|
||||
def refresh_token() -> str:
|
||||
resp = requests.post(f"{BASE}/api/auth/refresh", json={"refresh_token": REFRESH_TOKEN}, timeout=10)
|
||||
if resp.status_code != 200:
|
||||
print(f"刷新失败: {resp.status_code}")
|
||||
sys.exit(1)
|
||||
t = resp.json()["access_token"]
|
||||
TOKEN_FILE.write_text(t, encoding="utf-8")
|
||||
return t
|
||||
|
||||
|
||||
TOKEN = refresh_token()
|
||||
HEADERS = {"Authorization": f"Bearer {TOKEN}", "Content-Type": "application/json"}
|
||||
|
||||
from _env_paths import get_output_path
|
||||
|
||||
for i in range(90):
|
||||
time.sleep(20)
|
||||
mm, ss = divmod((i + 1) * 20, 60)
|
||||
try:
|
||||
r = requests.get(f"{BASE}/api/execution/history?limit=5", headers=HEADERS, timeout=15)
|
||||
if r.status_code == 401:
|
||||
TOKEN = refresh_token()
|
||||
HEADERS["Authorization"] = f"Bearer {TOKEN}"
|
||||
continue
|
||||
if r.status_code != 200:
|
||||
continue
|
||||
target = next((h for h in r.json() if h["id"] == EXECUTION_ID), None)
|
||||
if not target:
|
||||
print(f"[{mm}m{ss}s] 等待...")
|
||||
continue
|
||||
status = target.get("status")
|
||||
dur = target.get("duration_ms")
|
||||
dur_s = f"{dur/1000:.1f}s" if dur else "—"
|
||||
if status in ("success", "failed", "cancelled"):
|
||||
print(f"🏁 status={status}, 耗时={dur_s}, exit_code={target.get('exit_code')}")
|
||||
lr = requests.get(f"{BASE}/api/execution/{EXECUTION_ID}/logs", headers=HEADERS, timeout=30)
|
||||
if lr.status_code == 200:
|
||||
ld = lr.json()
|
||||
out = get_output_path("SYSTEM_LOG_ROOT")
|
||||
(out / "2026-02-21__etl_run_raw_v6.json").write_text(
|
||||
json.dumps(ld, ensure_ascii=False, indent=2), encoding="utf-8"
|
||||
)
|
||||
print("日志已保存")
|
||||
el = (ld.get("error_log") or "").strip().split("\n")
|
||||
print(f"--- error_log 末尾 80 行 (共 {len(el)} 行) ---")
|
||||
for line in el[-80:]:
|
||||
print(line)
|
||||
break
|
||||
print(f"[{mm}m{ss}s] status={status}")
|
||||
except Exception as e:
|
||||
print(f"[{mm}m{ss}s] {e}")
|
||||
else:
|
||||
print("超时")
|
||||
75
scripts/ops/poll_v7.py
Normal file
75
scripts/ops/poll_v7.py
Normal file
@@ -0,0 +1,75 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""轮询第七次执行结果。"""
|
||||
import json
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(Path(__file__).resolve().parents[2] / ".env")
|
||||
|
||||
TOKEN_FILE = Path(__file__).parent / ".monitor_token"
|
||||
BASE = "http://localhost:8000"
|
||||
EXECUTION_ID = "0929ab3a-e8eb-441a-89a4-b33b70481052"
|
||||
REFRESH_TOKEN = (
|
||||
"eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9."
|
||||
"eyJzdWIiOiIxIiwic2l0ZV9pZCI6Mjc5MDY4NTQxNTQ0MzI2OSwidHlwZSI6InJlZnJlc2giLCJleHAiOjE3NzIyNjM0NjN9."
|
||||
"XYoda5lfxNtTSAGWoLlYhS9cA-hTK9iqK0SqUyn2KV4"
|
||||
)
|
||||
|
||||
|
||||
def refresh_token() -> str:
|
||||
resp = requests.post(f"{BASE}/api/auth/refresh", json={"refresh_token": REFRESH_TOKEN}, timeout=10)
|
||||
if resp.status_code != 200:
|
||||
print(f"刷新失败: {resp.status_code}")
|
||||
sys.exit(1)
|
||||
t = resp.json()["access_token"]
|
||||
TOKEN_FILE.write_text(t, encoding="utf-8")
|
||||
return t
|
||||
|
||||
|
||||
TOKEN = refresh_token()
|
||||
HEADERS = {"Authorization": f"Bearer {TOKEN}", "Content-Type": "application/json"}
|
||||
|
||||
from _env_paths import get_output_path
|
||||
|
||||
for i in range(90):
|
||||
time.sleep(20)
|
||||
mm, ss = divmod((i + 1) * 20, 60)
|
||||
try:
|
||||
r = requests.get(f"{BASE}/api/execution/history?limit=5", headers=HEADERS, timeout=15)
|
||||
if r.status_code == 401:
|
||||
TOKEN = refresh_token()
|
||||
HEADERS["Authorization"] = f"Bearer {TOKEN}"
|
||||
continue
|
||||
if r.status_code != 200:
|
||||
continue
|
||||
target = next((h for h in r.json() if h["id"] == EXECUTION_ID), None)
|
||||
if not target:
|
||||
print(f"[{mm}m{ss}s] 等待...")
|
||||
continue
|
||||
status = target.get("status")
|
||||
dur = target.get("duration_ms")
|
||||
dur_s = f"{dur/1000:.1f}s" if dur else "—"
|
||||
if status in ("success", "failed", "cancelled"):
|
||||
print(f"🏁 status={status}, 耗时={dur_s}, exit_code={target.get('exit_code')}")
|
||||
lr = requests.get(f"{BASE}/api/execution/{EXECUTION_ID}/logs", headers=HEADERS, timeout=30)
|
||||
if lr.status_code == 200:
|
||||
ld = lr.json()
|
||||
out = get_output_path("SYSTEM_LOG_ROOT")
|
||||
(out / "2026-02-21__etl_run_raw_v7.json").write_text(
|
||||
json.dumps(ld, ensure_ascii=False, indent=2), encoding="utf-8"
|
||||
)
|
||||
print("日志已保存")
|
||||
el = (ld.get("error_log") or "").strip().split("\n")
|
||||
print(f"--- error_log 末尾 80 行 (共 {len(el)} 行) ---")
|
||||
for line in el[-80:]:
|
||||
print(line)
|
||||
break
|
||||
print(f"[{mm}m{ss}s] status={status}")
|
||||
except Exception as e:
|
||||
print(f"[{mm}m{ss}s] {e}")
|
||||
else:
|
||||
print("超时")
|
||||
79
scripts/ops/poll_v8.py
Normal file
79
scripts/ops/poll_v8.py
Normal file
@@ -0,0 +1,79 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""轮询第八次执行结果。启动前需先运行 resubmit_v8.py 获取 execution_id 并填入下方。"""
|
||||
import json
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(Path(__file__).resolve().parents[2] / ".env")
|
||||
|
||||
TOKEN_FILE = Path(__file__).parent / ".monitor_token"
|
||||
BASE = "http://localhost:8000"
|
||||
EXECUTION_ID = "f943bac6-23be-45c5-8b8c-a864e85a1916"
|
||||
REFRESH_TOKEN = (
|
||||
"eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9."
|
||||
"eyJzdWIiOiIxIiwic2l0ZV9pZCI6Mjc5MDY4NTQxNTQ0MzI2OSwidHlwZSI6InJlZnJlc2giLCJleHAiOjE3NzIyNjM0NjN9."
|
||||
"XYoda5lfxNtTSAGWoLlYhS9cA-hTK9iqK0SqUyn2KV4"
|
||||
)
|
||||
|
||||
if EXECUTION_ID == "__FILL_ME__":
|
||||
print("请先填入 execution_id")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def refresh_token() -> str:
|
||||
resp = requests.post(f"{BASE}/api/auth/refresh", json={"refresh_token": REFRESH_TOKEN}, timeout=10)
|
||||
if resp.status_code != 200:
|
||||
print(f"刷新失败: {resp.status_code}")
|
||||
sys.exit(1)
|
||||
t = resp.json()["access_token"]
|
||||
TOKEN_FILE.write_text(t, encoding="utf-8")
|
||||
return t
|
||||
|
||||
|
||||
TOKEN = refresh_token()
|
||||
HEADERS = {"Authorization": f"Bearer {TOKEN}", "Content-Type": "application/json"}
|
||||
|
||||
from _env_paths import get_output_path
|
||||
|
||||
for i in range(90):
|
||||
time.sleep(20)
|
||||
mm, ss = divmod((i + 1) * 20, 60)
|
||||
try:
|
||||
r = requests.get(f"{BASE}/api/execution/history?limit=5", headers=HEADERS, timeout=15)
|
||||
if r.status_code == 401:
|
||||
TOKEN = refresh_token()
|
||||
HEADERS["Authorization"] = f"Bearer {TOKEN}"
|
||||
continue
|
||||
if r.status_code != 200:
|
||||
continue
|
||||
target = next((h for h in r.json() if h["id"] == EXECUTION_ID), None)
|
||||
if not target:
|
||||
print(f"[{mm}m{ss}s] 等待...")
|
||||
continue
|
||||
status = target.get("status")
|
||||
dur = target.get("duration_ms")
|
||||
dur_s = f"{dur/1000:.1f}s" if dur else "—"
|
||||
if status in ("success", "failed", "cancelled"):
|
||||
print(f"完成 status={status}, 耗时={dur_s}, exit_code={target.get('exit_code')}")
|
||||
lr = requests.get(f"{BASE}/api/execution/{EXECUTION_ID}/logs", headers=HEADERS, timeout=30)
|
||||
if lr.status_code == 200:
|
||||
ld = lr.json()
|
||||
out = get_output_path("SYSTEM_LOG_ROOT")
|
||||
(out / "2026-02-21__etl_run_raw_v8.json").write_text(
|
||||
json.dumps(ld, ensure_ascii=False, indent=2), encoding="utf-8"
|
||||
)
|
||||
print("日志已保存")
|
||||
el = (ld.get("error_log") or "").strip().split("\n")
|
||||
print(f"--- error_log 末尾 80 行 (共 {len(el)} 行) ---")
|
||||
for line in el[-80:]:
|
||||
print(line)
|
||||
break
|
||||
print(f"[{mm}m{ss}s] status={status}")
|
||||
except Exception as e:
|
||||
print(f"[{mm}m{ss}s] {e}")
|
||||
else:
|
||||
print("超时")
|
||||
34
scripts/ops/poll_v9.py
Normal file
34
scripts/ops/poll_v9.py
Normal file
@@ -0,0 +1,34 @@
|
||||
"""轮询 v9 执行结果"""
|
||||
import time, requests, json, os, sys
|
||||
from pathlib import Path
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(Path(__file__).resolve().parents[2] / ".env")
|
||||
|
||||
EXEC_ID = "847822eb-e63b-46c0-929e-5d5f184a052e"
|
||||
BASE = "http://localhost:8000"
|
||||
TOKEN_FILE = Path(__file__).parent / ".monitor_token"
|
||||
|
||||
def get_token():
|
||||
return TOKEN_FILE.read_text().strip()
|
||||
|
||||
def poll():
|
||||
token = get_token()
|
||||
headers = {"Authorization": f"Bearer {token}"}
|
||||
for attempt in range(60):
|
||||
r = requests.get(f"{BASE}/api/execution/{EXEC_ID}", headers=headers)
|
||||
if r.status_code == 401:
|
||||
print("Token 过期,请刷新")
|
||||
sys.exit(1)
|
||||
data = r.json()
|
||||
status = data.get("status", "unknown")
|
||||
print(f"[{attempt+1}] status={status}")
|
||||
if status in ("completed", "failed", "partial"):
|
||||
print(json.dumps(data, indent=2, ensure_ascii=False))
|
||||
return data
|
||||
time.sleep(5)
|
||||
print("超时")
|
||||
return None
|
||||
|
||||
if __name__ == "__main__":
|
||||
poll()
|
||||
169
scripts/ops/resubmit_failed.py
Normal file
169
scripts/ops/resubmit_failed.py
Normal file
@@ -0,0 +1,169 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""重新提交上次执行中失败的 31 个任务。
|
||||
|
||||
先用 refresh_token 刷新 access_token,再提交执行并轮询等待完成。
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
|
||||
TOKEN_FILE = Path(__file__).parent / ".monitor_token"
|
||||
BASE = "http://localhost:8000"
|
||||
|
||||
# refresh_token(7 天有效)
|
||||
REFRESH_TOKEN = (
|
||||
"eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9."
|
||||
"eyJzdWIiOiIxIiwic2l0ZV9pZCI6Mjc5MDY4NTQxNTQ0MzI2OSwidHlwZSI6InJlZnJlc2giLCJleHAiOjE3NzIyNjM0NjN9."
|
||||
"XYoda5lfxNtTSAGWoLlYhS9cA-hTK9iqK0SqUyn2KV4"
|
||||
)
|
||||
|
||||
|
||||
def refresh_access_token() -> str:
|
||||
"""用 refresh_token 换取新的 access_token。"""
|
||||
resp = requests.post(
|
||||
f"{BASE}/api/auth/refresh",
|
||||
json={"refresh_token": REFRESH_TOKEN},
|
||||
timeout=10,
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
print(f"❌ 刷新 token 失败: {resp.status_code} {resp.text}")
|
||||
sys.exit(1)
|
||||
data = resp.json()
|
||||
token = data["access_token"]
|
||||
# 缓存到文件
|
||||
TOKEN_FILE.write_text(token, encoding="utf-8")
|
||||
print(f"✅ access_token 已刷新并缓存")
|
||||
return token
|
||||
|
||||
|
||||
# ── 刷新 token ──
|
||||
TOKEN = refresh_access_token()
|
||||
HEADERS = {"Authorization": f"Bearer {TOKEN}", "Content-Type": "application/json"}
|
||||
|
||||
# 上次失败的 31 个任务
|
||||
FAILED_TASKS = [
|
||||
"DWS_ASSISTANT_DAILY",
|
||||
"DWS_ASSISTANT_MONTHLY",
|
||||
"DWS_ASSISTANT_CUSTOMER",
|
||||
"DWS_ASSISTANT_SALARY",
|
||||
"DWS_ASSISTANT_FINANCE",
|
||||
"ODS_SETTLEMENT_RECORDS",
|
||||
"ODS_PAYMENT",
|
||||
"ODS_REFUND",
|
||||
"DWS_BUILD_ORDER_SUMMARY",
|
||||
"DWS_MEMBER_CONSUMPTION",
|
||||
"DWS_MEMBER_VISIT",
|
||||
"ODS_GOODS_CATEGORY",
|
||||
"ODS_STORE_GOODS",
|
||||
"ODS_STORE_GOODS_SALES",
|
||||
"ODS_TENANT_GOODS",
|
||||
"ODS_PLATFORM_COUPON",
|
||||
"ODS_GROUP_PACKAGE",
|
||||
"ODS_GROUP_BUY_REDEMPTION",
|
||||
"ODS_INVENTORY_STOCK",
|
||||
"ODS_INVENTORY_CHANGE",
|
||||
"DWS_GOODS_STOCK_DAILY",
|
||||
"DWS_GOODS_STOCK_WEEKLY",
|
||||
"DWS_GOODS_STOCK_MONTHLY",
|
||||
"DWS_FINANCE_DAILY",
|
||||
"DWS_FINANCE_RECHARGE",
|
||||
"DWS_FINANCE_INCOME_STRUCTURE",
|
||||
"DWS_FINANCE_DISCOUNT_DETAIL",
|
||||
"DWS_WINBACK_INDEX",
|
||||
"DWS_NEWCONV_INDEX",
|
||||
"DWS_RELATION_INDEX",
|
||||
"DWD_LOAD_FROM_ODS",
|
||||
]
|
||||
|
||||
config = {
|
||||
"tasks": FAILED_TASKS,
|
||||
"flow": "api_full",
|
||||
"processing_mode": "full_window",
|
||||
"window_mode": "custom",
|
||||
"window_start": "2025-11-01",
|
||||
"window_end": "2026-02-20",
|
||||
"window_split": "month",
|
||||
"window_split_days": 30,
|
||||
"force_full": True,
|
||||
"dry_run": False,
|
||||
"lookback_hours": 24,
|
||||
"overlap_seconds": 600,
|
||||
}
|
||||
|
||||
print(f"📤 提交 {len(FAILED_TASKS)} 个失败任务重新执行...")
|
||||
print(f" flow=api_full, mode=full_window, 2025-11-01~2026-02-20, 30天切分, force-full")
|
||||
|
||||
resp = requests.post(f"{BASE}/api/execution/run", headers=HEADERS, json=config, timeout=30)
|
||||
if resp.status_code != 200:
|
||||
print(f"❌ 提交失败: {resp.status_code} {resp.text}")
|
||||
sys.exit(1)
|
||||
|
||||
data = resp.json()
|
||||
execution_id = data["execution_id"]
|
||||
print(f"✅ 已提交,execution_id={execution_id}")
|
||||
print(f" 轮询等待完成...")
|
||||
|
||||
poll_interval = 20
|
||||
max_wait = 1800
|
||||
elapsed = 0
|
||||
|
||||
while elapsed < max_wait:
|
||||
time.sleep(poll_interval)
|
||||
elapsed += poll_interval
|
||||
mm, ss = divmod(elapsed, 60)
|
||||
|
||||
try:
|
||||
hist_resp = requests.get(
|
||||
f"{BASE}/api/execution/history?limit=5",
|
||||
headers=HEADERS,
|
||||
timeout=15,
|
||||
)
|
||||
if hist_resp.status_code == 401:
|
||||
print(f" [{mm}m{ss}s] token 过期,刷新中...")
|
||||
TOKEN = refresh_access_token()
|
||||
HEADERS["Authorization"] = f"Bearer {TOKEN}"
|
||||
continue
|
||||
if hist_resp.status_code != 200:
|
||||
print(f" [{mm}m{ss}s] 查询历史失败: {hist_resp.status_code}")
|
||||
continue
|
||||
|
||||
history = hist_resp.json()
|
||||
target = next((h for h in history if h["id"] == execution_id), None)
|
||||
if target is None:
|
||||
print(f" [{mm}m{ss}s] 执行记录尚未出现...")
|
||||
continue
|
||||
|
||||
status = target.get("status", "unknown")
|
||||
duration_ms = target.get("duration_ms")
|
||||
duration_str = f"{duration_ms / 1000:.1f}s" if duration_ms else "—"
|
||||
|
||||
if status in ("success", "failed", "cancelled"):
|
||||
print(f"\n🏁 执行完成: status={status}, 耗时={duration_str}, exit_code={target.get('exit_code')}")
|
||||
|
||||
# 获取日志摘要
|
||||
log_resp = requests.get(
|
||||
f"{BASE}/api/execution/{execution_id}/logs",
|
||||
headers=HEADERS,
|
||||
timeout=30,
|
||||
)
|
||||
if log_resp.status_code == 200:
|
||||
log_data = log_resp.json()
|
||||
output = log_data.get("output_log", "") or ""
|
||||
lines = output.strip().split("\n")
|
||||
print(f"\n--- 日志末尾 60 行 ---")
|
||||
for line in lines[-60:]:
|
||||
print(line)
|
||||
break
|
||||
else:
|
||||
print(f" [{mm}m{ss}s] status={status}")
|
||||
|
||||
except Exception as e:
|
||||
print(f" [{mm}m{ss}s] 轮询异常: {e}")
|
||||
|
||||
else:
|
||||
print(f"\n⏰ 超时({max_wait}s),请手动检查 execution_id={execution_id}")
|
||||
167
scripts/ops/resubmit_v3.py
Normal file
167
scripts/ops/resubmit_v3.py
Normal file
@@ -0,0 +1,167 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""第三次执行:验证 BUG 2 (monthly UniqueViolation) + BUG 3 (customer UndefinedColumn) 修复。
|
||||
|
||||
复用 resubmit_failed.py 的逻辑,提交同样的 31 个任务。
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
|
||||
TOKEN_FILE = Path(__file__).parent / ".monitor_token"
|
||||
BASE = "http://localhost:8000"
|
||||
|
||||
REFRESH_TOKEN = (
|
||||
"eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9."
|
||||
"eyJzdWIiOiIxIiwic2l0ZV9pZCI6Mjc5MDY4NTQxNTQ0MzI2OSwidHlwZSI6InJlZnJlc2giLCJleHAiOjE3NzIyNjM0NjN9."
|
||||
"XYoda5lfxNtTSAGWoLlYhS9cA-hTK9iqK0SqUyn2KV4"
|
||||
)
|
||||
|
||||
|
||||
def refresh_access_token() -> str:
|
||||
resp = requests.post(
|
||||
f"{BASE}/api/auth/refresh",
|
||||
json={"refresh_token": REFRESH_TOKEN},
|
||||
timeout=10,
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
print(f"❌ 刷新 token 失败: {resp.status_code} {resp.text}")
|
||||
sys.exit(1)
|
||||
token = resp.json()["access_token"]
|
||||
TOKEN_FILE.write_text(token, encoding="utf-8")
|
||||
print("✅ access_token 已刷新")
|
||||
return token
|
||||
|
||||
|
||||
TOKEN = refresh_access_token()
|
||||
HEADERS = {"Authorization": f"Bearer {TOKEN}", "Content-Type": "application/json"}
|
||||
|
||||
TASKS = [
|
||||
"DWS_ASSISTANT_DAILY",
|
||||
"DWS_ASSISTANT_MONTHLY",
|
||||
"DWS_ASSISTANT_CUSTOMER",
|
||||
"DWS_ASSISTANT_SALARY",
|
||||
"DWS_ASSISTANT_FINANCE",
|
||||
"ODS_SETTLEMENT_RECORDS",
|
||||
"ODS_PAYMENT",
|
||||
"ODS_REFUND",
|
||||
"DWS_BUILD_ORDER_SUMMARY",
|
||||
"DWS_MEMBER_CONSUMPTION",
|
||||
"DWS_MEMBER_VISIT",
|
||||
"ODS_GOODS_CATEGORY",
|
||||
"ODS_STORE_GOODS",
|
||||
"ODS_STORE_GOODS_SALES",
|
||||
"ODS_TENANT_GOODS",
|
||||
"ODS_PLATFORM_COUPON",
|
||||
"ODS_GROUP_PACKAGE",
|
||||
"ODS_GROUP_BUY_REDEMPTION",
|
||||
"ODS_INVENTORY_STOCK",
|
||||
"ODS_INVENTORY_CHANGE",
|
||||
"DWS_GOODS_STOCK_DAILY",
|
||||
"DWS_GOODS_STOCK_WEEKLY",
|
||||
"DWS_GOODS_STOCK_MONTHLY",
|
||||
"DWS_FINANCE_DAILY",
|
||||
"DWS_FINANCE_RECHARGE",
|
||||
"DWS_FINANCE_INCOME_STRUCTURE",
|
||||
"DWS_FINANCE_DISCOUNT_DETAIL",
|
||||
"DWS_WINBACK_INDEX",
|
||||
"DWS_NEWCONV_INDEX",
|
||||
"DWS_RELATION_INDEX",
|
||||
"DWD_LOAD_FROM_ODS",
|
||||
]
|
||||
|
||||
config = {
|
||||
"tasks": TASKS,
|
||||
"flow": "api_full",
|
||||
"processing_mode": "full_window",
|
||||
"window_mode": "custom",
|
||||
"window_start": "2025-11-01",
|
||||
"window_end": "2026-02-20",
|
||||
"window_split": "month",
|
||||
"window_split_days": 30,
|
||||
"force_full": True,
|
||||
"dry_run": False,
|
||||
"lookback_hours": 24,
|
||||
"overlap_seconds": 600,
|
||||
}
|
||||
|
||||
print(f"📤 第三次执行:提交 {len(TASKS)} 个任务...")
|
||||
resp = requests.post(f"{BASE}/api/execution/run", headers=HEADERS, json=config, timeout=30)
|
||||
if resp.status_code != 200:
|
||||
print(f"❌ 提交失败: {resp.status_code} {resp.text}")
|
||||
sys.exit(1)
|
||||
|
||||
data = resp.json()
|
||||
execution_id = data["execution_id"]
|
||||
print(f"✅ execution_id={execution_id}")
|
||||
print(" 轮询等待完成...")
|
||||
|
||||
poll_interval = 20
|
||||
max_wait = 1800
|
||||
elapsed = 0
|
||||
|
||||
while elapsed < max_wait:
|
||||
time.sleep(poll_interval)
|
||||
elapsed += poll_interval
|
||||
mm, ss = divmod(elapsed, 60)
|
||||
|
||||
try:
|
||||
hist_resp = requests.get(
|
||||
f"{BASE}/api/execution/history?limit=5",
|
||||
headers=HEADERS,
|
||||
timeout=15,
|
||||
)
|
||||
if hist_resp.status_code == 401:
|
||||
print(f" [{mm}m{ss}s] token 过期,刷新...")
|
||||
TOKEN = refresh_access_token()
|
||||
HEADERS["Authorization"] = f"Bearer {TOKEN}"
|
||||
continue
|
||||
if hist_resp.status_code != 200:
|
||||
print(f" [{mm}m{ss}s] 查询失败: {hist_resp.status_code}")
|
||||
continue
|
||||
|
||||
history = hist_resp.json()
|
||||
target = next((h for h in history if h["id"] == execution_id), None)
|
||||
if not target:
|
||||
print(f" [{mm}m{ss}s] 等待执行记录...")
|
||||
continue
|
||||
|
||||
status = target.get("status", "unknown")
|
||||
duration_ms = target.get("duration_ms")
|
||||
dur = f"{duration_ms / 1000:.1f}s" if duration_ms else "—"
|
||||
|
||||
if status in ("success", "failed", "cancelled"):
|
||||
print(f"\n🏁 完成: status={status}, 耗时={dur}, exit_code={target.get('exit_code')}")
|
||||
|
||||
log_resp = requests.get(
|
||||
f"{BASE}/api/execution/{execution_id}/logs",
|
||||
headers=HEADERS,
|
||||
timeout=30,
|
||||
)
|
||||
if log_resp.status_code == 200:
|
||||
log_data = log_resp.json()
|
||||
# 保存完整日志到文件供后续分析
|
||||
from _env_paths import get_output_path
|
||||
out_dir = get_output_path("SYSTEM_LOG_ROOT")
|
||||
raw_file = out_dir / "2026-02-21__etl_run_raw_v3.json"
|
||||
raw_file.write_text(json.dumps(log_data, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
print(f" 原始日志已保存: {raw_file}")
|
||||
|
||||
error_log = log_data.get("error_log", "") or ""
|
||||
lines = error_log.strip().split("\n")
|
||||
print(f"\n--- error_log 末尾 80 行 ---")
|
||||
for line in lines[-80:]:
|
||||
print(line)
|
||||
break
|
||||
else:
|
||||
print(f" [{mm}m{ss}s] status={status}")
|
||||
|
||||
except Exception as e:
|
||||
print(f" [{mm}m{ss}s] 异常: {e}")
|
||||
|
||||
else:
|
||||
print(f"\n⏰ 超时({max_wait}s),请手动检查 execution_id={execution_id}")
|
||||
64
scripts/ops/resubmit_v4.py
Normal file
64
scripts/ops/resubmit_v4.py
Normal file
@@ -0,0 +1,64 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""第四次执行:验证 BUG 4(dim_member/dim_member_card_account site_id 修复)。"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
|
||||
BASE = "http://localhost:8000"
|
||||
TOKEN_FILE = Path(__file__).parent / ".monitor_token"
|
||||
REFRESH_TOKEN = (
|
||||
"eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9."
|
||||
"eyJzdWIiOiIxIiwic2l0ZV9pZCI6Mjc5MDY4NTQxNTQ0MzI2OSwidHlwZSI6InJlZnJlc2giLCJleHAiOjE3NzIyNjM0NjN9."
|
||||
"XYoda5lfxNtTSAGWoLlYhS9cA-hTK9iqK0SqUyn2KV4"
|
||||
)
|
||||
|
||||
# 刷新 token
|
||||
resp = requests.post(f"{BASE}/api/auth/refresh", json={"refresh_token": REFRESH_TOKEN}, timeout=10)
|
||||
if resp.status_code != 200:
|
||||
print(f"❌ 刷新失败: {resp.status_code} {resp.text}")
|
||||
sys.exit(1)
|
||||
token = resp.json()["access_token"]
|
||||
TOKEN_FILE.write_text(token, encoding="utf-8")
|
||||
print("✅ token 已刷新")
|
||||
|
||||
headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
|
||||
|
||||
config = {
|
||||
"tasks": [
|
||||
"DWS_ASSISTANT_DAILY", "DWS_ASSISTANT_MONTHLY", "DWS_ASSISTANT_CUSTOMER",
|
||||
"DWS_ASSISTANT_SALARY", "DWS_ASSISTANT_FINANCE",
|
||||
"ODS_SETTLEMENT_RECORDS", "ODS_PAYMENT", "ODS_REFUND",
|
||||
"DWS_BUILD_ORDER_SUMMARY", "DWS_MEMBER_CONSUMPTION", "DWS_MEMBER_VISIT",
|
||||
"ODS_GOODS_CATEGORY", "ODS_STORE_GOODS", "ODS_STORE_GOODS_SALES",
|
||||
"ODS_TENANT_GOODS", "ODS_PLATFORM_COUPON", "ODS_GROUP_PACKAGE",
|
||||
"ODS_GROUP_BUY_REDEMPTION", "ODS_INVENTORY_STOCK", "ODS_INVENTORY_CHANGE",
|
||||
"DWS_GOODS_STOCK_DAILY", "DWS_GOODS_STOCK_WEEKLY", "DWS_GOODS_STOCK_MONTHLY",
|
||||
"DWS_FINANCE_DAILY", "DWS_FINANCE_RECHARGE", "DWS_FINANCE_INCOME_STRUCTURE",
|
||||
"DWS_FINANCE_DISCOUNT_DETAIL", "DWS_WINBACK_INDEX", "DWS_NEWCONV_INDEX",
|
||||
"DWS_RELATION_INDEX", "DWD_LOAD_FROM_ODS",
|
||||
],
|
||||
"flow": "api_full",
|
||||
"processing_mode": "full_window",
|
||||
"window_mode": "custom",
|
||||
"window_start": "2025-11-01",
|
||||
"window_end": "2026-02-20",
|
||||
"window_split": "month",
|
||||
"window_split_days": 30,
|
||||
"force_full": True,
|
||||
"dry_run": False,
|
||||
"lookback_hours": 24,
|
||||
"overlap_seconds": 600,
|
||||
}
|
||||
|
||||
r = requests.post(f"{BASE}/api/execution/run", headers=headers, json=config, timeout=30)
|
||||
if r.status_code == 200:
|
||||
data = r.json()
|
||||
eid = data.get("execution_id", data.get("id", "?"))
|
||||
print(f"✅ 提交成功: execution_id={eid}")
|
||||
else:
|
||||
print(f"❌ 提交失败: {r.status_code} {r.text}")
|
||||
sys.exit(1)
|
||||
63
scripts/ops/resubmit_v5.py
Normal file
63
scripts/ops/resubmit_v5.py
Normal file
@@ -0,0 +1,63 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""第五次提交执行(BUG 5 修复后)。"""
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(Path(__file__).resolve().parents[2] / ".env")
|
||||
|
||||
BASE = "http://localhost:8000"
|
||||
REFRESH_TOKEN = (
|
||||
"eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9."
|
||||
"eyJzdWIiOiIxIiwic2l0ZV9pZCI6Mjc5MDY4NTQxNTQ0MzI2OSwidHlwZSI6InJlZnJlc2giLCJleHAiOjE3NzIyNjM0NjN9."
|
||||
"XYoda5lfxNtTSAGWoLlYhS9cA-hTK9iqK0SqUyn2KV4"
|
||||
)
|
||||
|
||||
# 刷新 token
|
||||
resp = requests.post(f"{BASE}/api/auth/refresh", json={"refresh_token": REFRESH_TOKEN}, timeout=10)
|
||||
if resp.status_code != 200:
|
||||
print(f"刷新失败: {resp.status_code}")
|
||||
sys.exit(1)
|
||||
token = resp.json()["access_token"]
|
||||
Path(__file__).parent.joinpath(".monitor_token").write_text(token, encoding="utf-8")
|
||||
headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
|
||||
|
||||
# 提交执行(与 v4 相同 payload)
|
||||
payload = {
|
||||
"tasks": [
|
||||
"DWS_ASSISTANT_DAILY", "DWS_ASSISTANT_MONTHLY", "DWS_ASSISTANT_CUSTOMER",
|
||||
"DWS_ASSISTANT_SALARY", "DWS_ASSISTANT_FINANCE",
|
||||
"ODS_SETTLEMENT_RECORDS", "ODS_PAYMENT", "ODS_REFUND",
|
||||
"DWS_BUILD_ORDER_SUMMARY", "DWS_MEMBER_CONSUMPTION", "DWS_MEMBER_VISIT",
|
||||
"ODS_GOODS_CATEGORY", "ODS_STORE_GOODS", "ODS_STORE_GOODS_SALES",
|
||||
"ODS_TENANT_GOODS", "ODS_PLATFORM_COUPON", "ODS_GROUP_PACKAGE",
|
||||
"ODS_GROUP_BUY_REDEMPTION", "ODS_INVENTORY_STOCK", "ODS_INVENTORY_CHANGE",
|
||||
"DWS_GOODS_STOCK_DAILY", "DWS_GOODS_STOCK_WEEKLY", "DWS_GOODS_STOCK_MONTHLY",
|
||||
"DWS_FINANCE_DAILY", "DWS_FINANCE_RECHARGE", "DWS_FINANCE_INCOME_STRUCTURE",
|
||||
"DWS_FINANCE_DISCOUNT_DETAIL", "DWS_WINBACK_INDEX", "DWS_NEWCONV_INDEX",
|
||||
"DWS_RELATION_INDEX", "DWD_LOAD_FROM_ODS",
|
||||
],
|
||||
"flow": "api_full",
|
||||
"processing_mode": "full_window",
|
||||
"window_mode": "custom",
|
||||
"window_start": "2025-11-01",
|
||||
"window_end": "2026-02-20",
|
||||
"window_split": "month",
|
||||
"window_split_days": 30,
|
||||
"force_full": True,
|
||||
"dry_run": False,
|
||||
"lookback_hours": 24,
|
||||
"overlap_seconds": 600,
|
||||
}
|
||||
r = requests.post(f"{BASE}/api/execution/run", headers=headers, json=payload, timeout=30)
|
||||
if r.status_code not in (200, 201):
|
||||
print(f"提交失败: {r.status_code} {r.text[:300]}")
|
||||
sys.exit(1)
|
||||
|
||||
data = r.json()
|
||||
eid = data.get("execution_id") or data.get("id")
|
||||
print(f"提交成功: execution_id={eid}")
|
||||
print(json.dumps(data, ensure_ascii=False, indent=2))
|
||||
62
scripts/ops/resubmit_v6.py
Normal file
62
scripts/ops/resubmit_v6.py
Normal file
@@ -0,0 +1,62 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""第六次提交执行(BUG 5+6+7 修复后)。"""
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(Path(__file__).resolve().parents[2] / ".env")
|
||||
|
||||
BASE = "http://localhost:8000"
|
||||
REFRESH_TOKEN = (
|
||||
"eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9."
|
||||
"eyJzdWIiOiIxIiwic2l0ZV9pZCI6Mjc5MDY4NTQxNTQ0MzI2OSwidHlwZSI6InJlZnJlc2giLCJleHAiOjE3NzIyNjM0NjN9."
|
||||
"XYoda5lfxNtTSAGWoLlYhS9cA-hTK9iqK0SqUyn2KV4"
|
||||
)
|
||||
|
||||
# 刷新 token
|
||||
resp = requests.post(f"{BASE}/api/auth/refresh", json={"refresh_token": REFRESH_TOKEN}, timeout=10)
|
||||
if resp.status_code != 200:
|
||||
print(f"刷新失败: {resp.status_code}")
|
||||
sys.exit(1)
|
||||
token = resp.json()["access_token"]
|
||||
Path(__file__).parent.joinpath(".monitor_token").write_text(token, encoding="utf-8")
|
||||
headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
|
||||
|
||||
payload = {
|
||||
"tasks": [
|
||||
"DWS_ASSISTANT_DAILY", "DWS_ASSISTANT_MONTHLY", "DWS_ASSISTANT_CUSTOMER",
|
||||
"DWS_ASSISTANT_SALARY", "DWS_ASSISTANT_FINANCE",
|
||||
"ODS_SETTLEMENT_RECORDS", "ODS_PAYMENT", "ODS_REFUND",
|
||||
"DWS_BUILD_ORDER_SUMMARY", "DWS_MEMBER_CONSUMPTION", "DWS_MEMBER_VISIT",
|
||||
"ODS_GOODS_CATEGORY", "ODS_STORE_GOODS", "ODS_STORE_GOODS_SALES",
|
||||
"ODS_TENANT_GOODS", "ODS_PLATFORM_COUPON", "ODS_GROUP_PACKAGE",
|
||||
"ODS_GROUP_BUY_REDEMPTION", "ODS_INVENTORY_STOCK", "ODS_INVENTORY_CHANGE",
|
||||
"DWS_GOODS_STOCK_DAILY", "DWS_GOODS_STOCK_WEEKLY", "DWS_GOODS_STOCK_MONTHLY",
|
||||
"DWS_FINANCE_DAILY", "DWS_FINANCE_RECHARGE", "DWS_FINANCE_INCOME_STRUCTURE",
|
||||
"DWS_FINANCE_DISCOUNT_DETAIL", "DWS_WINBACK_INDEX", "DWS_NEWCONV_INDEX",
|
||||
"DWS_RELATION_INDEX", "DWD_LOAD_FROM_ODS",
|
||||
],
|
||||
"flow": "api_full",
|
||||
"processing_mode": "full_window",
|
||||
"window_mode": "custom",
|
||||
"window_start": "2025-11-01",
|
||||
"window_end": "2026-02-20",
|
||||
"window_split": "month",
|
||||
"window_split_days": 30,
|
||||
"force_full": True,
|
||||
"dry_run": False,
|
||||
"lookback_hours": 24,
|
||||
"overlap_seconds": 600,
|
||||
}
|
||||
r = requests.post(f"{BASE}/api/execution/run", headers=headers, json=payload, timeout=30)
|
||||
if r.status_code not in (200, 201):
|
||||
print(f"提交失败: {r.status_code} {r.text[:300]}")
|
||||
sys.exit(1)
|
||||
|
||||
data = r.json()
|
||||
eid = data.get("execution_id") or data.get("id")
|
||||
print(f"提交成功: execution_id={eid}")
|
||||
print(json.dumps(data, ensure_ascii=False, indent=2))
|
||||
57
scripts/ops/resubmit_v7.py
Normal file
57
scripts/ops/resubmit_v7.py
Normal file
@@ -0,0 +1,57 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""第七次执行:修复 BUG 8(pay_money/gift_money → pay_amount/point_amount)。"""
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(Path(__file__).resolve().parents[2] / ".env")
|
||||
|
||||
BASE = "http://localhost:8000"
|
||||
TOKEN_FILE = Path(__file__).parent / ".monitor_token"
|
||||
REFRESH_TOKEN = (
|
||||
"eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9."
|
||||
"eyJzdWIiOiIxIiwic2l0ZV9pZCI6Mjc5MDY4NTQxNTQ0MzI2OSwidHlwZSI6InJlZnJlc2giLCJleHAiOjE3NzIyNjM0NjN9."
|
||||
"XYoda5lfxNtTSAGWoLlYhS9cA-hTK9iqK0SqUyn2KV4"
|
||||
)
|
||||
|
||||
# 刷新 token
|
||||
resp = requests.post(f"{BASE}/api/auth/refresh", json={"refresh_token": REFRESH_TOKEN}, timeout=10)
|
||||
if resp.status_code != 200:
|
||||
print(f"刷新失败: {resp.status_code} {resp.text}")
|
||||
sys.exit(1)
|
||||
token = resp.json()["access_token"]
|
||||
TOKEN_FILE.write_text(token, encoding="utf-8")
|
||||
print("token 已刷新")
|
||||
|
||||
headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
|
||||
|
||||
payload = {
|
||||
"tasks": [
|
||||
"ODS_FETCH", "DWD_LOAD_FROM_ODS",
|
||||
"DWS_ASSISTANT_DAILY", "DWS_ASSISTANT_MONTHLY",
|
||||
"DWS_ASSISTANT_CUSTOMER", "DWS_ASSISTANT_SALARY",
|
||||
"DWS_ASSISTANT_FINANCE",
|
||||
"DWS_MEMBER_CONSUMPTION", "DWS_MEMBER_VISIT",
|
||||
"DWS_GOODS_STOCK_DAILY", "DWS_GOODS_STOCK_WEEKLY", "DWS_GOODS_STOCK_MONTHLY",
|
||||
"DWS_FINANCE_DAILY", "DWS_FINANCE_RECHARGE",
|
||||
"DWS_FINANCE_INCOME_STRUCTURE", "DWS_FINANCE_DISCOUNT_DETAIL",
|
||||
"DWS_WINBACK_INDEX", "DWS_NEWCONV_INDEX", "DWS_RELATION_INDEX",
|
||||
],
|
||||
"flow": "api_full",
|
||||
"processing_mode": "full_window",
|
||||
"date_range": {"start": "2025-11-01", "end": "2026-02-20"},
|
||||
"window_days": 30,
|
||||
"force_full": True,
|
||||
}
|
||||
|
||||
r = requests.post(f"{BASE}/api/execution/run", headers=headers, json=payload, timeout=30)
|
||||
print(f"status={r.status_code}")
|
||||
data = r.json()
|
||||
print(json.dumps(data, ensure_ascii=False, indent=2))
|
||||
|
||||
if r.status_code == 200:
|
||||
eid = data.get("execution_id") or data.get("id")
|
||||
print(f"\nexecution_id: {eid}")
|
||||
56
scripts/ops/resubmit_v8.py
Normal file
56
scripts/ops/resubmit_v8.py
Normal file
@@ -0,0 +1,56 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""第八次执行:验证 BUG 8+9+10+11 修复。"""
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(Path(__file__).resolve().parents[2] / ".env")
|
||||
|
||||
BASE = "http://localhost:8000"
|
||||
TOKEN_FILE = Path(__file__).parent / ".monitor_token"
|
||||
REFRESH_TOKEN = (
|
||||
"eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9."
|
||||
"eyJzdWIiOiIxIiwic2l0ZV9pZCI6Mjc5MDY4NTQxNTQ0MzI2OSwidHlwZSI6InJlZnJlc2giLCJleHAiOjE3NzIyNjM0NjN9."
|
||||
"XYoda5lfxNtTSAGWoLlYhS9cA-hTK9iqK0SqUyn2KV4"
|
||||
)
|
||||
|
||||
resp = requests.post(f"{BASE}/api/auth/refresh", json={"refresh_token": REFRESH_TOKEN}, timeout=10)
|
||||
if resp.status_code != 200:
|
||||
print(f"刷新失败: {resp.status_code} {resp.text}")
|
||||
sys.exit(1)
|
||||
token = resp.json()["access_token"]
|
||||
TOKEN_FILE.write_text(token, encoding="utf-8")
|
||||
print("token 已刷新")
|
||||
|
||||
headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
|
||||
|
||||
payload = {
|
||||
"tasks": [
|
||||
"ODS_FETCH", "DWD_LOAD_FROM_ODS",
|
||||
"DWS_ASSISTANT_DAILY", "DWS_ASSISTANT_MONTHLY",
|
||||
"DWS_ASSISTANT_CUSTOMER", "DWS_ASSISTANT_SALARY",
|
||||
"DWS_ASSISTANT_FINANCE",
|
||||
"DWS_MEMBER_CONSUMPTION", "DWS_MEMBER_VISIT",
|
||||
"DWS_GOODS_STOCK_DAILY", "DWS_GOODS_STOCK_WEEKLY", "DWS_GOODS_STOCK_MONTHLY",
|
||||
"DWS_FINANCE_DAILY", "DWS_FINANCE_RECHARGE",
|
||||
"DWS_FINANCE_INCOME_STRUCTURE", "DWS_FINANCE_DISCOUNT_DETAIL",
|
||||
"DWS_WINBACK_INDEX", "DWS_NEWCONV_INDEX", "DWS_RELATION_INDEX",
|
||||
],
|
||||
"flow": "api_full",
|
||||
"processing_mode": "full_window",
|
||||
"date_range": {"start": "2025-11-01", "end": "2026-02-20"},
|
||||
"window_days": 30,
|
||||
"force_full": True,
|
||||
}
|
||||
|
||||
r = requests.post(f"{BASE}/api/execution/run", headers=headers, json=payload, timeout=30)
|
||||
print(f"status={r.status_code}")
|
||||
data = r.json()
|
||||
print(json.dumps(data, ensure_ascii=False, indent=2))
|
||||
|
||||
if r.status_code == 200:
|
||||
eid = data.get("execution_id") or data.get("id")
|
||||
print(f"\nexecution_id: {eid}")
|
||||
36
scripts/ops/run_migration_assistant_no_int.py
Normal file
36
scripts/ops/run_migration_assistant_no_int.py
Normal file
@@ -0,0 +1,36 @@
|
||||
"""执行 assistant_no_int 迁移脚本到测试库"""
|
||||
import os
|
||||
from pathlib import Path
|
||||
from dotenv import load_dotenv
|
||||
import psycopg2
|
||||
|
||||
load_dotenv(Path(__file__).resolve().parents[2] / ".env")
|
||||
|
||||
dsn = os.environ.get("TEST_DB_DSN")
|
||||
if not dsn:
|
||||
raise RuntimeError("TEST_DB_DSN 未配置")
|
||||
|
||||
sql_file = Path(__file__).resolve().parents[2] / "db/etl_feiqiu/migrations/2026-02-20__add_assistant_trash_event_ex_assistant_no_int.sql"
|
||||
sql = sql_file.read_text(encoding="utf-8")
|
||||
|
||||
conn = psycopg2.connect(dsn)
|
||||
conn.autocommit = True
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(sql)
|
||||
print("迁移执行成功")
|
||||
|
||||
# 验证
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""
|
||||
SELECT column_name, data_type
|
||||
FROM information_schema.columns
|
||||
WHERE table_schema = 'dwd'
|
||||
AND table_name = 'dwd_assistant_trash_event_ex'
|
||||
ORDER BY ordinal_position
|
||||
""")
|
||||
rows = cur.fetchall()
|
||||
print(f"dwd_assistant_trash_event_ex 当前列:")
|
||||
for r in rows:
|
||||
print(f" {r[0]} ({r[1]})")
|
||||
|
||||
conn.close()
|
||||
154
scripts/ops/run_migration_c1.py
Normal file
154
scripts/ops/run_migration_c1.py
Normal file
@@ -0,0 +1,154 @@
|
||||
"""
|
||||
在 test_etl_feiqiu 上执行迁移脚本 C1:ODS/DWD 层会员表新增 birthday 列。
|
||||
执行后自动运行验证 SQL 确认列已添加。
|
||||
|
||||
用法:python scripts/ops/run_migration_c1.py
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# 加载根 .env
|
||||
ROOT_DIR = Path(__file__).resolve().parents[2]
|
||||
load_dotenv(ROOT_DIR / ".env")
|
||||
|
||||
TEST_DB_DSN = os.environ.get("TEST_DB_DSN")
|
||||
if not TEST_DB_DSN:
|
||||
print("❌ 错误:TEST_DB_DSN 环境变量未定义,请检查根 .env 文件")
|
||||
sys.exit(1)
|
||||
|
||||
# 确认连接的是测试库
|
||||
if "test_etl_feiqiu" not in TEST_DB_DSN:
|
||||
print(f"❌ 安全检查失败:TEST_DB_DSN 未指向 test_etl_feiqiu\n 当前值: {TEST_DB_DSN}")
|
||||
sys.exit(1)
|
||||
|
||||
import psycopg2
|
||||
|
||||
MIGRATION_FILE = ROOT_DIR / "db" / "etl_feiqiu" / "migrations" / "2026-02-22__C1_dim_member_add_birthday.sql"
|
||||
|
||||
|
||||
def run_migration(conn):
|
||||
"""执行迁移脚本"""
|
||||
sql = MIGRATION_FILE.read_text(encoding="utf-8")
|
||||
|
||||
# 提取 BEGIN...COMMIT 之间的 DDL 语句(跳过注释中的回滚和验证部分)
|
||||
# 迁移脚本本身包含 BEGIN/COMMIT,psycopg2 默认 autocommit=False 会冲突
|
||||
# 所以用 autocommit 模式,让脚本自己管理事务
|
||||
conn.autocommit = True
|
||||
cur = conn.cursor()
|
||||
|
||||
# 逐条执行 DDL(跳过纯注释行和空行,提取有效 SQL)
|
||||
statements = []
|
||||
current = []
|
||||
in_block = False
|
||||
|
||||
for line in sql.splitlines():
|
||||
stripped = line.strip()
|
||||
|
||||
# 跳过回滚和验证部分(它们被注释掉了)
|
||||
if stripped.startswith("--"):
|
||||
# BEGIN 标记进入有效区域
|
||||
if "回滚" in stripped or "验证 SQL" in stripped:
|
||||
break
|
||||
continue
|
||||
|
||||
if not stripped:
|
||||
continue
|
||||
|
||||
# 跳过 BEGIN/COMMIT(我们用 autocommit)
|
||||
if stripped.upper() in ("BEGIN;", "COMMIT;"):
|
||||
continue
|
||||
|
||||
current.append(line)
|
||||
if stripped.endswith(";"):
|
||||
statements.append("\n".join(current))
|
||||
current = []
|
||||
|
||||
print(f"📄 迁移文件: {MIGRATION_FILE.name}")
|
||||
print(f"🔗 目标库: test_etl_feiqiu")
|
||||
print(f"📝 待执行语句: {len(statements)} 条\n")
|
||||
|
||||
for i, stmt in enumerate(statements, 1):
|
||||
print(f" [{i}] {stmt.strip()[:80]}...")
|
||||
cur.execute(stmt)
|
||||
print(f" ✅ 执行成功")
|
||||
|
||||
cur.close()
|
||||
print(f"\n✅ 迁移脚本执行完成")
|
||||
|
||||
|
||||
def run_verification(conn):
|
||||
"""执行验证 SQL,确认列已添加"""
|
||||
conn.autocommit = True
|
||||
cur = conn.cursor()
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("🔍 验证结果")
|
||||
print("=" * 60)
|
||||
|
||||
# 验证 1:ods.member_profiles.birthday 列存在
|
||||
cur.execute("""
|
||||
SELECT column_name, data_type
|
||||
FROM information_schema.columns
|
||||
WHERE table_schema = 'ods'
|
||||
AND table_name = 'member_profiles'
|
||||
AND column_name = 'birthday'
|
||||
""")
|
||||
row = cur.fetchone()
|
||||
if row:
|
||||
print(f" ✅ ods.member_profiles.birthday 存在 (类型: {row[1]})")
|
||||
else:
|
||||
print(f" ❌ ods.member_profiles.birthday 不存在!")
|
||||
|
||||
# 验证 2:dwd.dim_member.birthday 列存在
|
||||
cur.execute("""
|
||||
SELECT column_name, data_type
|
||||
FROM information_schema.columns
|
||||
WHERE table_schema = 'dwd'
|
||||
AND table_name = 'dim_member'
|
||||
AND column_name = 'birthday'
|
||||
""")
|
||||
row = cur.fetchone()
|
||||
if row:
|
||||
print(f" ✅ dwd.dim_member.birthday 存在 (类型: {row[1]})")
|
||||
else:
|
||||
print(f" ❌ dwd.dim_member.birthday 不存在!")
|
||||
|
||||
# 验证 3:列注释已设置
|
||||
cur.execute("""
|
||||
SELECT col_description(c.oid, a.attnum)
|
||||
FROM pg_class c
|
||||
JOIN pg_namespace n ON n.oid = c.relnamespace
|
||||
JOIN pg_attribute a ON a.attrelid = c.oid
|
||||
WHERE n.nspname = 'dwd'
|
||||
AND c.relname = 'dim_member'
|
||||
AND a.attname = 'birthday'
|
||||
""")
|
||||
row = cur.fetchone()
|
||||
if row and row[0]:
|
||||
print(f" ✅ dwd.dim_member.birthday 注释: {row[0]}")
|
||||
else:
|
||||
print(f" ⚠️ dwd.dim_member.birthday 注释未设置")
|
||||
|
||||
cur.close()
|
||||
print("\n🏁 验证完成")
|
||||
|
||||
|
||||
def main():
|
||||
print("=" * 60)
|
||||
print("迁移脚本 C1:ODS/DWD 层会员表新增 birthday 列")
|
||||
print("=" * 60 + "\n")
|
||||
|
||||
conn = psycopg2.connect(TEST_DB_DSN)
|
||||
try:
|
||||
run_migration(conn)
|
||||
run_verification(conn)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
178
scripts/ops/run_migration_spi_table.py
Normal file
178
scripts/ops/run_migration_spi_table.py
Normal file
@@ -0,0 +1,178 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
在测试库 test_etl_feiqiu 执行 SPI 建表迁移脚本。
|
||||
|
||||
迁移脚本:db/etl_feiqiu/migrations/2026-02-23_create_dws_member_spending_power_index.sql
|
||||
目标表:dws.dws_member_spending_power_index
|
||||
|
||||
使用方式:
|
||||
python scripts/ops/run_migration_spi_table.py
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from dotenv import load_dotenv
|
||||
import psycopg2
|
||||
|
||||
# 加载根 .env
|
||||
_ROOT = Path(__file__).resolve().parents[2]
|
||||
load_dotenv(_ROOT / ".env", override=False)
|
||||
|
||||
DSN = os.getenv("TEST_DB_DSN")
|
||||
if not DSN:
|
||||
print("ERROR: TEST_DB_DSN 未配置,请在根 .env 中设置")
|
||||
sys.exit(1)
|
||||
|
||||
MIGRATION_FILE = (
|
||||
_ROOT / "db" / "etl_feiqiu" / "migrations"
|
||||
/ "2026-02-23_create_dws_member_spending_power_index.sql"
|
||||
)
|
||||
|
||||
|
||||
def table_exists(conn) -> bool:
|
||||
"""检查目标表是否已存在"""
|
||||
cur = conn.cursor()
|
||||
cur.execute("""
|
||||
SELECT 1 FROM information_schema.tables
|
||||
WHERE table_schema = 'dws'
|
||||
AND table_name = 'dws_member_spending_power_index'
|
||||
""")
|
||||
exists = cur.fetchone() is not None
|
||||
cur.close()
|
||||
return exists
|
||||
|
||||
|
||||
def execute_migration(conn) -> bool:
|
||||
"""执行迁移脚本,返回是否成功"""
|
||||
sql = MIGRATION_FILE.read_text(encoding="utf-8")
|
||||
|
||||
# 提取主体 SQL(去掉注释中的回滚部分)
|
||||
main_lines = []
|
||||
in_rollback = False
|
||||
for line in sql.split("\n"):
|
||||
stripped = line.strip()
|
||||
if stripped.startswith("-- ====") and "回滚" in stripped:
|
||||
in_rollback = True
|
||||
if not in_rollback:
|
||||
main_lines.append(line)
|
||||
|
||||
main_sql = "\n".join(main_lines).strip()
|
||||
if not main_sql:
|
||||
print("⚠️ 迁移脚本为空,跳过")
|
||||
return False
|
||||
|
||||
try:
|
||||
cur = conn.cursor()
|
||||
cur.execute(main_sql)
|
||||
cur.close()
|
||||
print("✅ 迁移脚本执行成功")
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"❌ 迁移脚本执行失败: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def verify(conn) -> bool:
|
||||
"""验证建表结果"""
|
||||
cur = conn.cursor()
|
||||
checks = []
|
||||
|
||||
# 1. 表存在
|
||||
cur.execute("""
|
||||
SELECT 1 FROM information_schema.tables
|
||||
WHERE table_schema = 'dws'
|
||||
AND table_name = 'dws_member_spending_power_index'
|
||||
""")
|
||||
checks.append(("表 dws.dws_member_spending_power_index 存在", cur.fetchone() is not None))
|
||||
|
||||
# 2. 关键字段完整
|
||||
expected_cols = [
|
||||
"spi_id", "site_id", "member_id",
|
||||
"spend_30", "spend_90", "recharge_90",
|
||||
"orders_30", "orders_90", "visit_days_30", "visit_days_90",
|
||||
"avg_ticket_90", "active_weeks_90", "daily_spend_ewma_90",
|
||||
"score_level_raw", "score_speed_raw", "score_stability_raw",
|
||||
"score_level_display", "score_speed_display", "score_stability_display",
|
||||
"raw_score", "display_score",
|
||||
"calc_time", "created_at", "updated_at",
|
||||
]
|
||||
cur.execute("""
|
||||
SELECT column_name FROM information_schema.columns
|
||||
WHERE table_schema = 'dws'
|
||||
AND table_name = 'dws_member_spending_power_index'
|
||||
ORDER BY ordinal_position
|
||||
""")
|
||||
actual_cols = [r[0] for r in cur.fetchall()]
|
||||
missing = [c for c in expected_cols if c not in actual_cols]
|
||||
checks.append((f"字段完整({len(actual_cols)} 列)", len(missing) == 0))
|
||||
if missing:
|
||||
print(f" 缺失字段: {missing}")
|
||||
|
||||
# 3. 唯一索引 idx_spi_site_member 存在
|
||||
cur.execute("""
|
||||
SELECT 1 FROM pg_indexes
|
||||
WHERE schemaname = 'dws'
|
||||
AND tablename = 'dws_member_spending_power_index'
|
||||
AND indexname = 'idx_spi_site_member'
|
||||
""")
|
||||
checks.append(("唯一索引 idx_spi_site_member 存在", cur.fetchone() is not None))
|
||||
|
||||
# 4. 查询索引 idx_spi_display_score 存在
|
||||
cur.execute("""
|
||||
SELECT 1 FROM pg_indexes
|
||||
WHERE schemaname = 'dws'
|
||||
AND tablename = 'dws_member_spending_power_index'
|
||||
AND indexname = 'idx_spi_display_score'
|
||||
""")
|
||||
checks.append(("查询索引 idx_spi_display_score 存在", cur.fetchone() is not None))
|
||||
|
||||
cur.close()
|
||||
|
||||
print("\n" + "=" * 50)
|
||||
print("建表验证结果")
|
||||
print("=" * 50)
|
||||
all_ok = True
|
||||
for name, ok in checks:
|
||||
status = "✅" if ok else "❌"
|
||||
print(f" {status} {name}")
|
||||
if not ok:
|
||||
all_ok = False
|
||||
|
||||
return all_ok
|
||||
|
||||
|
||||
def main():
|
||||
dsn_display = DSN.split("@")[1] if "@" in DSN else DSN
|
||||
print(f"连接测试库: {dsn_display}")
|
||||
print(f"迁移脚本: {MIGRATION_FILE.name}\n")
|
||||
|
||||
if not MIGRATION_FILE.exists():
|
||||
print(f"ERROR: 迁移脚本不存在: {MIGRATION_FILE}")
|
||||
sys.exit(1)
|
||||
|
||||
conn = psycopg2.connect(DSN)
|
||||
conn.autocommit = True # 建表 DDL 需要 autocommit
|
||||
|
||||
# 检查表是否已存在
|
||||
if table_exists(conn):
|
||||
print("ℹ️ 表 dws.dws_member_spending_power_index 已存在,跳过建表")
|
||||
else:
|
||||
if not execute_migration(conn):
|
||||
conn.close()
|
||||
sys.exit(1)
|
||||
|
||||
# 验证
|
||||
all_ok = verify(conn)
|
||||
conn.close()
|
||||
|
||||
if all_ok:
|
||||
print("\n✅ SPI 建表迁移完成,所有验证通过")
|
||||
else:
|
||||
print("\n⚠️ 部分验证未通过,请检查")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
189
scripts/ops/run_migration_staff_info.py
Normal file
189
scripts/ops/run_migration_staff_info.py
Normal file
@@ -0,0 +1,189 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
在测试库 test_etl_feiqiu 执行员工档案建表迁移脚本。
|
||||
|
||||
迁移脚本:db/etl_feiqiu/migrations/2026-02-22__add_staff_info_tables.sql
|
||||
目标表:ods.staff_info_master, dwd.dim_staff, dwd.dim_staff_ex
|
||||
|
||||
使用方式:
|
||||
python scripts/ops/run_migration_staff_info.py
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from dotenv import load_dotenv
|
||||
import psycopg2
|
||||
|
||||
# 加载根 .env
|
||||
_ROOT = Path(__file__).resolve().parents[2]
|
||||
load_dotenv(_ROOT / ".env", override=False)
|
||||
|
||||
DSN = os.getenv("TEST_DB_DSN")
|
||||
if not DSN:
|
||||
print("ERROR: TEST_DB_DSN 未配置,请在根 .env 中设置")
|
||||
sys.exit(1)
|
||||
|
||||
MIGRATION_FILE = (
|
||||
_ROOT / "db" / "etl_feiqiu" / "migrations"
|
||||
/ "2026-02-22__add_staff_info_tables.sql"
|
||||
)
|
||||
|
||||
# 需要创建的三张表
|
||||
TABLES = [
|
||||
("ods", "staff_info_master"),
|
||||
("dwd", "dim_staff"),
|
||||
("dwd", "dim_staff_ex"),
|
||||
]
|
||||
|
||||
|
||||
def tables_exist(conn) -> dict[str, bool]:
|
||||
"""检查目标表是否已存在,返回 {schema.table: bool}"""
|
||||
cur = conn.cursor()
|
||||
result = {}
|
||||
for schema, table in TABLES:
|
||||
cur.execute("""
|
||||
SELECT 1 FROM information_schema.tables
|
||||
WHERE table_schema = %s AND table_name = %s
|
||||
""", (schema, table))
|
||||
result[f"{schema}.{table}"] = cur.fetchone() is not None
|
||||
cur.close()
|
||||
return result
|
||||
|
||||
|
||||
def execute_migration(conn) -> bool:
|
||||
"""执行迁移脚本,返回是否成功"""
|
||||
sql = MIGRATION_FILE.read_text(encoding="utf-8")
|
||||
|
||||
# 去掉注释中的回滚部分
|
||||
main_lines = []
|
||||
in_rollback = False
|
||||
for line in sql.split("\n"):
|
||||
stripped = line.strip()
|
||||
if stripped.startswith("-- ====") and "回滚" in stripped:
|
||||
in_rollback = True
|
||||
if not in_rollback:
|
||||
main_lines.append(line)
|
||||
|
||||
main_sql = "\n".join(main_lines).strip()
|
||||
if not main_sql:
|
||||
print("⚠️ 迁移脚本为空,跳过")
|
||||
return False
|
||||
|
||||
try:
|
||||
cur = conn.cursor()
|
||||
cur.execute(main_sql)
|
||||
cur.close()
|
||||
print("✅ 迁移脚本执行成功")
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"❌ 迁移脚本执行失败: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def verify(conn) -> bool:
|
||||
"""验证建表结果"""
|
||||
cur = conn.cursor()
|
||||
checks = []
|
||||
|
||||
# 1. 三张表都存在
|
||||
for schema, table in TABLES:
|
||||
cur.execute("""
|
||||
SELECT 1 FROM information_schema.tables
|
||||
WHERE table_schema = %s AND table_name = %s
|
||||
""", (schema, table))
|
||||
checks.append((f"表 {schema}.{table} 存在", cur.fetchone() is not None))
|
||||
|
||||
# 2. ods.staff_info_master 关键字段
|
||||
cur.execute("""
|
||||
SELECT column_name FROM information_schema.columns
|
||||
WHERE table_schema = 'ods' AND table_name = 'staff_info_master'
|
||||
""")
|
||||
ods_cols = {r[0] for r in cur.fetchall()}
|
||||
ods_required = {"id", "staff_name", "mobile", "content_hash", "payload", "tenant_id", "site_id"}
|
||||
missing_ods = ods_required - ods_cols
|
||||
checks.append((f"ODS 关键字段完整({len(ods_cols)} 列)", len(missing_ods) == 0))
|
||||
if missing_ods:
|
||||
print(f" ODS 缺失字段: {missing_ods}")
|
||||
|
||||
# 3. dwd.dim_staff 主键包含 staff_id + scd2_start_time
|
||||
cur.execute("""
|
||||
SELECT column_name FROM information_schema.columns
|
||||
WHERE table_schema = 'dwd' AND table_name = 'dim_staff'
|
||||
""")
|
||||
dwd_cols = {r[0] for r in cur.fetchall()}
|
||||
dwd_required = {"staff_id", "staff_name", "scd2_start_time", "scd2_end_time", "scd2_is_current"}
|
||||
missing_dwd = dwd_required - dwd_cols
|
||||
checks.append((f"DWD 主表关键字段完整({len(dwd_cols)} 列)", len(missing_dwd) == 0))
|
||||
if missing_dwd:
|
||||
print(f" DWD 主表缺失字段: {missing_dwd}")
|
||||
|
||||
# 4. dwd.dim_staff_ex 关键字段
|
||||
cur.execute("""
|
||||
SELECT column_name FROM information_schema.columns
|
||||
WHERE table_schema = 'dwd' AND table_name = 'dim_staff_ex'
|
||||
""")
|
||||
ex_cols = {r[0] for r in cur.fetchall()}
|
||||
ex_required = {"staff_id", "rank_name", "shop_name", "scd2_start_time"}
|
||||
missing_ex = ex_required - ex_cols
|
||||
checks.append((f"DWD 扩展表关键字段完整({len(ex_cols)} 列)", len(missing_ex) == 0))
|
||||
if missing_ex:
|
||||
print(f" DWD 扩展表缺失字段: {missing_ex}")
|
||||
|
||||
cur.close()
|
||||
|
||||
print("\n" + "=" * 50)
|
||||
print("建表验证结果")
|
||||
print("=" * 50)
|
||||
all_ok = True
|
||||
for name, ok in checks:
|
||||
status = "✅" if ok else "❌"
|
||||
print(f" {status} {name}")
|
||||
if not ok:
|
||||
all_ok = False
|
||||
|
||||
return all_ok
|
||||
|
||||
|
||||
def main():
|
||||
dsn_display = DSN.split("@")[1] if "@" in DSN else DSN
|
||||
print(f"连接测试库: {dsn_display}")
|
||||
print(f"迁移脚本: {MIGRATION_FILE.name}\n")
|
||||
|
||||
if not MIGRATION_FILE.exists():
|
||||
print(f"ERROR: 迁移脚本不存在: {MIGRATION_FILE}")
|
||||
sys.exit(1)
|
||||
|
||||
conn = psycopg2.connect(DSN)
|
||||
conn.autocommit = True
|
||||
|
||||
# 检查表是否已存在
|
||||
existing = tables_exist(conn)
|
||||
all_exist = all(existing.values())
|
||||
|
||||
if all_exist:
|
||||
print("ℹ️ 所有目标表已存在,跳过建表")
|
||||
else:
|
||||
for name, exists in existing.items():
|
||||
if exists:
|
||||
print(f" ℹ️ {name} 已存在")
|
||||
else:
|
||||
print(f" 📋 {name} 待创建")
|
||||
if not execute_migration(conn):
|
||||
conn.close()
|
||||
sys.exit(1)
|
||||
|
||||
# 验证
|
||||
all_ok = verify(conn)
|
||||
conn.close()
|
||||
|
||||
if all_ok:
|
||||
print("\n✅ 员工档案建表迁移完成,所有验证通过")
|
||||
else:
|
||||
print("\n⚠️ 部分验证未通过,请检查")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
209
scripts/ops/run_migrations_2026_02_20.py
Normal file
209
scripts/ops/run_migrations_2026_02_20.py
Normal file
@@ -0,0 +1,209 @@
|
||||
"""
|
||||
执行 2026-02-20 批次的所有迁移脚本到测试库(TEST_DB_DSN)。
|
||||
按文件名排序依次执行,每个脚本执行后运行内嵌验证 SQL。
|
||||
所有脚本均为幂等设计(IF NOT EXISTS / IF EXISTS)。
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from dotenv import load_dotenv
|
||||
import psycopg2
|
||||
|
||||
load_dotenv(Path(__file__).resolve().parents[2] / ".env")
|
||||
|
||||
DSN = os.getenv("TEST_DB_DSN")
|
||||
if not DSN:
|
||||
print("ERROR: TEST_DB_DSN 未配置")
|
||||
sys.exit(1)
|
||||
|
||||
MIGRATIONS_DIR = Path(__file__).resolve().parents[2] / "db" / "etl_feiqiu" / "migrations"
|
||||
|
||||
# 按顺序执行的迁移脚本(2026-02-20 批次)
|
||||
SCRIPTS = sorted([
|
||||
f for f in MIGRATIONS_DIR.glob("2026-02-20__*.sql")
|
||||
], key=lambda p: p.name)
|
||||
|
||||
def execute_migration(conn, script_path: Path) -> bool:
|
||||
"""执行单个迁移脚本,返回是否成功"""
|
||||
name = script_path.name
|
||||
sql = script_path.read_text(encoding="utf-8")
|
||||
|
||||
# 提取主体 SQL(去掉注释中的回滚和验证部分)
|
||||
# 找到第一个 "-- ===...回滚" 或文件末尾
|
||||
main_sql_lines = []
|
||||
in_rollback_or_verify = False
|
||||
for line in sql.split("\n"):
|
||||
stripped = line.strip()
|
||||
if stripped.startswith("-- ====") and ("回滚" in stripped or "ROLLBACK" in stripped.upper()):
|
||||
in_rollback_or_verify = True
|
||||
if stripped.startswith("-- ====") and "验证" in stripped:
|
||||
in_rollback_or_verify = True
|
||||
if not in_rollback_or_verify:
|
||||
main_sql_lines.append(line)
|
||||
|
||||
main_sql = "\n".join(main_sql_lines).strip()
|
||||
if not main_sql:
|
||||
print(f" ⚠️ {name}: 空脚本,跳过")
|
||||
return True
|
||||
|
||||
try:
|
||||
# 对于包含 BEGIN/COMMIT 的脚本,需要 autocommit
|
||||
# 但 psycopg2 默认在事务中,我们直接执行即可
|
||||
# 注意:脚本内部已有 BEGIN/COMMIT,所以用 autocommit 模式
|
||||
old_autocommit = conn.autocommit
|
||||
conn.autocommit = True
|
||||
cur = conn.cursor()
|
||||
cur.execute(main_sql)
|
||||
cur.close()
|
||||
conn.autocommit = old_autocommit
|
||||
print(f" ✅ {name}")
|
||||
return True
|
||||
except Exception as e:
|
||||
conn.rollback()
|
||||
print(f" ❌ {name}: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def verify_all(conn):
|
||||
"""执行迁移后的综合验证"""
|
||||
cur = conn.cursor()
|
||||
checks = []
|
||||
|
||||
# 1. dim_assistant_ex 新增 4 列
|
||||
cur.execute("""
|
||||
SELECT column_name FROM information_schema.columns
|
||||
WHERE table_schema = 'dwd' AND table_name = 'dim_assistant_ex'
|
||||
AND column_name IN ('system_role_id', 'job_num', 'cx_unit_price', 'pd_unit_price')
|
||||
ORDER BY column_name
|
||||
""")
|
||||
cols = [r[0] for r in cur.fetchall()]
|
||||
checks.append(("dim_assistant_ex +4列", len(cols) == 4, cols))
|
||||
|
||||
# 2. dwd_assistant_service_log_ex 新增 2 列
|
||||
cur.execute("""
|
||||
SELECT column_name FROM information_schema.columns
|
||||
WHERE table_schema = 'dwd' AND table_name = 'dwd_assistant_service_log_ex'
|
||||
AND column_name IN ('operator_id', 'operator_name')
|
||||
ORDER BY column_name
|
||||
""")
|
||||
cols = [r[0] for r in cur.fetchall()]
|
||||
checks.append(("dwd_assistant_service_log_ex +2列", len(cols) == 2, cols))
|
||||
|
||||
# 3. dim_table_ex 新增 14 列
|
||||
cur.execute("""
|
||||
SELECT count(*) FROM information_schema.columns
|
||||
WHERE table_schema = 'dwd' AND table_name = 'dim_table_ex'
|
||||
AND column_name IN (
|
||||
'create_time', 'light_status', 'tablestatusname', 'sitename',
|
||||
'applet_qr_code_url', 'audit_status', 'charge_free', 'delay_lights_time',
|
||||
'is_rest_area', 'only_allow_groupon', 'order_delay_time', 'self_table',
|
||||
'temporary_light_second', 'virtual_table'
|
||||
)
|
||||
""")
|
||||
cnt = cur.fetchone()[0]
|
||||
checks.append(("dim_table_ex +14列", cnt == 14, f"{cnt}/14"))
|
||||
|
||||
# 4. dwd_member_balance_change_ex.relate_id
|
||||
cur.execute("""
|
||||
SELECT column_name FROM information_schema.columns
|
||||
WHERE table_schema = 'dwd' AND table_name = 'dwd_member_balance_change_ex'
|
||||
AND column_name = 'relate_id'
|
||||
""")
|
||||
checks.append(("dwd_member_balance_change_ex +relate_id", cur.fetchone() is not None, ""))
|
||||
|
||||
# 5. dim_store_goods_ex.batch_stock_quantity
|
||||
cur.execute("""
|
||||
SELECT column_name FROM information_schema.columns
|
||||
WHERE table_schema = 'dwd' AND table_name = 'dim_store_goods_ex'
|
||||
AND column_name = 'batch_stock_quantity'
|
||||
""")
|
||||
checks.append(("dim_store_goods_ex +batch_stock_quantity", cur.fetchone() is not None, ""))
|
||||
|
||||
# 6. dwd_goods_stock_summary 表存在
|
||||
cur.execute("""
|
||||
SELECT 1 FROM information_schema.tables
|
||||
WHERE table_schema = 'dwd' AND table_name = 'dwd_goods_stock_summary'
|
||||
""")
|
||||
checks.append(("dwd_goods_stock_summary 已创建", cur.fetchone() is not None, ""))
|
||||
|
||||
# 7. dwd_goods_stock_movement 表存在
|
||||
cur.execute("""
|
||||
SELECT 1 FROM information_schema.tables
|
||||
WHERE table_schema = 'dwd' AND table_name = 'dwd_goods_stock_movement'
|
||||
""")
|
||||
checks.append(("dwd_goods_stock_movement 已创建", cur.fetchone() is not None, ""))
|
||||
|
||||
# 8. DWS 库存汇总 3 张表
|
||||
cur.execute("""
|
||||
SELECT table_name FROM information_schema.tables
|
||||
WHERE table_schema = 'dws' AND table_name LIKE 'dws_goods_stock_%_summary'
|
||||
ORDER BY table_name
|
||||
""")
|
||||
tables = [r[0] for r in cur.fetchall()]
|
||||
checks.append(("DWS 库存汇总 3 张表", len(tables) == 3, tables))
|
||||
|
||||
# 9. dwd_store_goods_sale: discount_money + discount_price 两列
|
||||
cur.execute("""
|
||||
SELECT column_name FROM information_schema.columns
|
||||
WHERE table_schema = 'dwd' AND table_name = 'dwd_store_goods_sale'
|
||||
AND column_name IN ('discount_money', 'discount_price')
|
||||
ORDER BY column_name
|
||||
""")
|
||||
cols = [r[0] for r in cur.fetchall()]
|
||||
checks.append(("dwd_store_goods_sale discount_money+discount_price", len(cols) == 2, cols))
|
||||
|
||||
# 10. settlement_ticket_details 已删除
|
||||
cur.execute("""
|
||||
SELECT 1 FROM information_schema.tables
|
||||
WHERE table_schema = 'ods' AND table_name = 'settlement_ticket_details'
|
||||
""")
|
||||
checks.append(("settlement_ticket_details 已删除", cur.fetchone() is None, ""))
|
||||
|
||||
cur.close()
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("迁移验证结果")
|
||||
print("=" * 60)
|
||||
all_ok = True
|
||||
for name, ok, detail in checks:
|
||||
status = "✅" if ok else "❌"
|
||||
detail_str = f" → {detail}" if detail else ""
|
||||
print(f" {status} {name}{detail_str}")
|
||||
if not ok:
|
||||
all_ok = False
|
||||
|
||||
return all_ok
|
||||
|
||||
|
||||
def main():
|
||||
print(f"连接测试库: {DSN.split('@')[1] if '@' in DSN else DSN}")
|
||||
print(f"迁移目录: {MIGRATIONS_DIR}")
|
||||
print(f"发现 {len(SCRIPTS)} 个 2026-02-20 迁移脚本\n")
|
||||
|
||||
conn = psycopg2.connect(DSN)
|
||||
|
||||
print("执行迁移:")
|
||||
success = 0
|
||||
failed = 0
|
||||
for script in SCRIPTS:
|
||||
if execute_migration(conn, script):
|
||||
success += 1
|
||||
else:
|
||||
failed += 1
|
||||
|
||||
print(f"\n执行完成: {success} 成功, {failed} 失败")
|
||||
|
||||
# 验证
|
||||
all_ok = verify_all(conn)
|
||||
conn.close()
|
||||
|
||||
if not all_ok:
|
||||
print("\n⚠️ 部分验证未通过,请检查")
|
||||
sys.exit(1)
|
||||
else:
|
||||
print("\n✅ 所有迁移已成功执行并验证通过")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
130
scripts/ops/run_post_etl_reports.py
Normal file
130
scripts/ops/run_post_etl_reports.py
Normal file
@@ -0,0 +1,130 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""生成 ETL 计时报告和一致性检查报告的独立脚本。
|
||||
|
||||
用于验证 EtlTimer 和 ConsistencyChecker 集成后的报告输出功能。
|
||||
不执行实际 ETL 任务,仅运行计时器模拟和数据库一致性检查。
|
||||
|
||||
输出路径通过 ETL_REPORT_ROOT / API_SAMPLE_CACHE_ROOT 环境变量控制。
|
||||
"""
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# 确保 ETL 模块可导入
|
||||
ETL_ROOT = Path(__file__).resolve().parent.parent.parent / "apps" / "etl" / "connectors" / "feiqiu"
|
||||
sys.path.insert(0, str(ETL_ROOT))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(Path(__file__).resolve().parent.parent.parent / ".env")
|
||||
|
||||
|
||||
def generate_timing_report() -> str:
|
||||
"""生成模拟计时报告,验证 EtlTimer 输出功能。"""
|
||||
import time
|
||||
from utils.timer import EtlTimer
|
||||
|
||||
timer = EtlTimer()
|
||||
timer.start()
|
||||
|
||||
# 模拟几个 ETL 步骤
|
||||
steps = [
|
||||
("ODS_ASSISTANT_ACCOUNT", 0.05),
|
||||
("ODS_MEMBER", 0.03),
|
||||
("DWD_LOAD_FROM_ODS", 0.08),
|
||||
("DWS_COACH_PERFORMANCE", 0.04),
|
||||
("CONSISTENCY_CHECK", 0.02),
|
||||
]
|
||||
|
||||
for step_name, delay in steps:
|
||||
timer.start_step(step_name)
|
||||
time.sleep(delay)
|
||||
timer.stop_step(step_name)
|
||||
|
||||
report_text = timer.finish(write_report=True)
|
||||
print("[OK] 计时报告已生成")
|
||||
return report_text
|
||||
|
||||
|
||||
def generate_consistency_report() -> str | None:
|
||||
"""运行数据一致性检查并生成报告。"""
|
||||
from quality.consistency_checker import (
|
||||
run_consistency_check,
|
||||
write_consistency_report,
|
||||
)
|
||||
from database.connection import DatabaseConnection
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
dsn = os.environ.get("PG_DSN")
|
||||
if not dsn:
|
||||
print("[SKIP] PG_DSN 未定义,跳过一致性检查")
|
||||
return None
|
||||
|
||||
api_sample_dir_str = os.environ.get("API_SAMPLE_CACHE_ROOT")
|
||||
api_sample_dir = Path(api_sample_dir_str) if api_sample_dir_str else None
|
||||
|
||||
db_conn = DatabaseConnection(dsn=dsn)
|
||||
try:
|
||||
report = run_consistency_check(
|
||||
db_conn,
|
||||
api_sample_dir=api_sample_dir,
|
||||
include_api_vs_ods=bool(api_sample_dir),
|
||||
include_ods_vs_dwd=True,
|
||||
tz=ZoneInfo("Asia/Shanghai"),
|
||||
)
|
||||
|
||||
report_path = write_consistency_report(report)
|
||||
print(f"[OK] 一致性检查报告已生成: {report_path}")
|
||||
|
||||
# 打印摘要
|
||||
if report.ods_vs_dwd_results:
|
||||
passed = sum(1 for r in report.ods_vs_dwd_results if r.passed)
|
||||
total = len(report.ods_vs_dwd_results)
|
||||
print(f" ODS vs DWD: {passed}/{total} 张表通过")
|
||||
|
||||
if report.api_vs_ods_results:
|
||||
passed = sum(1 for r in report.api_vs_ods_results if r.passed)
|
||||
total = len(report.api_vs_ods_results)
|
||||
print(f" API vs ODS: {passed}/{total} 张表通过")
|
||||
|
||||
return report_path
|
||||
finally:
|
||||
db_conn.close()
|
||||
|
||||
|
||||
def main():
|
||||
print("=" * 60)
|
||||
print("ETL 报告生成脚本")
|
||||
print("=" * 60)
|
||||
|
||||
# 检查环境变量
|
||||
etl_report_root = os.environ.get("ETL_REPORT_ROOT")
|
||||
if not etl_report_root:
|
||||
print("[ERROR] ETL_REPORT_ROOT 环境变量未定义")
|
||||
sys.exit(1)
|
||||
print(f"报告输出目录: {etl_report_root}")
|
||||
print()
|
||||
|
||||
# 1. 计时报告
|
||||
print("--- 1. 生成计时报告 ---")
|
||||
generate_timing_report()
|
||||
print()
|
||||
|
||||
# 2. 一致性检查报告
|
||||
print("--- 2. 生成一致性检查报告 ---")
|
||||
generate_consistency_report()
|
||||
print()
|
||||
|
||||
# 列出生成的文件
|
||||
print("--- 生成的报告文件 ---")
|
||||
report_dir = Path(etl_report_root)
|
||||
if report_dir.exists():
|
||||
for f in sorted(report_dir.iterdir()):
|
||||
if f.name.startswith(("etl_timing_", "consistency_report_")):
|
||||
print(f" {f.name} ({f.stat().st_size} bytes)")
|
||||
|
||||
print()
|
||||
print("完成。")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
359
scripts/ops/seed_dws_config.py
Normal file
359
scripts/ops/seed_dws_config.py
Normal file
@@ -0,0 +1,359 @@
|
||||
"""
|
||||
将 seed_dws_config.sql 中的种子数据写入 DWS 配置表(test_etl_feiqiu)。
|
||||
包含:
|
||||
1. 原始种子数据(cfg_performance_tier / cfg_assistant_level_price / cfg_bonus_rules / cfg_area_category / cfg_skill_type)
|
||||
2. 新增 2025-01-01~2026-02-28 统一提成档位(基础课18元/小时,打赏课40%)
|
||||
3. 新增 GUARANTEE 保底奖金规则(按等级区分)
|
||||
|
||||
执行目标库:TEST_DB_DSN 或 PG_DSN(.env 中配置)
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from dotenv import load_dotenv
|
||||
import psycopg2
|
||||
|
||||
# 加载根 .env
|
||||
load_dotenv(Path(__file__).resolve().parents[2] / ".env")
|
||||
|
||||
dsn = os.environ.get("PG_DSN")
|
||||
if not dsn:
|
||||
print("ERROR: PG_DSN 未在 .env 中配置,终止执行")
|
||||
sys.exit(1)
|
||||
|
||||
# 确认连接的是测试库
|
||||
if "test_etl_feiqiu" not in dsn:
|
||||
print(f"WARNING: DSN 指向 {dsn},不是 test_etl_feiqiu,请确认!")
|
||||
resp = input("继续执行?(y/N): ").strip().lower()
|
||||
if resp != "y":
|
||||
print("已取消")
|
||||
sys.exit(0)
|
||||
|
||||
print(f"连接数据库: {dsn.split('@')[1] if '@' in dsn else dsn}")
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# SQL 语句定义
|
||||
# ============================================================================
|
||||
|
||||
# 清空并重建配置数据
|
||||
SQL_STATEMENTS = []
|
||||
|
||||
# --- 1. cfg_performance_tier ---
|
||||
SQL_STATEMENTS.append("""
|
||||
TRUNCATE TABLE dws.cfg_performance_tier RESTART IDENTITY CASCADE;
|
||||
""")
|
||||
|
||||
# 2025-01-01 ~ 2026-02-28: 统一提成档位(不分档,所有助教统一规则)
|
||||
# 基础课球房提成 18 元/小时,打赏课球房提成 40%
|
||||
SQL_STATEMENTS.append("""
|
||||
INSERT INTO dws.cfg_performance_tier (
|
||||
tier_code, tier_name, tier_level,
|
||||
min_hours, max_hours,
|
||||
base_deduction, bonus_deduction_ratio, vacation_days, vacation_unlimited,
|
||||
is_new_hire_tier, effective_from, effective_to, description
|
||||
) VALUES
|
||||
-- 2025-01-01 ~ 2026-02-28: 统一提成(不分档)
|
||||
('T0', '统一档', 0,
|
||||
0, NULL,
|
||||
18.00, 0.40, 0, FALSE,
|
||||
FALSE, '2025-01-01', '2026-02-28',
|
||||
'2025-01-01~2026-02-28统一规则:基础课球房提成18元/小时,打赏课球房提成40%,不分档位'),
|
||||
|
||||
-- 旧方案(至2024-12-31,保留历史口径)
|
||||
('T0', '0档-淘汰压力', 0,
|
||||
0, 100,
|
||||
28.00, 0.50, 3, FALSE,
|
||||
FALSE, '2000-01-01', '2024-12-31',
|
||||
'历史口径:H<100,专业课抽成28元/小时,打赏课抽成50%,休假3天'),
|
||||
('T1', '1档-及格档', 1,
|
||||
100, 130,
|
||||
18.00, 0.40, 4, FALSE,
|
||||
FALSE, '2000-01-01', '2024-12-31',
|
||||
'历史口径:100≤H<130,专业课抽成18元/小时,打赏课抽成40%,休假4天'),
|
||||
('T2', '2档-良好档', 2,
|
||||
130, 160,
|
||||
15.00, 0.38, 4, FALSE,
|
||||
FALSE, '2000-01-01', '2024-12-31',
|
||||
'历史口径:130≤H<160,专业课抽成15元/小时,打赏课抽成38%,休假4天'),
|
||||
('T3', '3档-优秀档', 3,
|
||||
160, 190,
|
||||
13.00, 0.35, 5, FALSE,
|
||||
FALSE, '2000-01-01', '2024-12-31',
|
||||
'历史口径:160≤H<190,专业课抽成13元/小时,打赏课抽成35%,休假5天'),
|
||||
('T4', '4档-卓越加速档', 4,
|
||||
190, 220,
|
||||
10.00, 0.33, 6, FALSE,
|
||||
FALSE, '2000-01-01', '2024-12-31',
|
||||
'历史口径:190≤H<220,专业课抽成10元/小时,打赏课抽成33%,休假6天'),
|
||||
('T5', '5档-冠军加速档', 5,
|
||||
220, NULL,
|
||||
8.00, 0.30, 0, TRUE,
|
||||
FALSE, '2000-01-01', '2024-12-31',
|
||||
'历史口径:H≥220,专业课抽成8元/小时,打赏课抽成30%,休假自由'),
|
||||
|
||||
-- 新方案(2026-03-01起,恢复分档)
|
||||
('T0', '0档-淘汰压力', 0,
|
||||
0, 120,
|
||||
28.00, 0.50, 3, FALSE,
|
||||
FALSE, '2026-03-01', '9999-12-31',
|
||||
'新方案:H<120,专业课抽成28元/小时,打赏课抽成50%,休假3天'),
|
||||
('T1', '1档-及格档', 1,
|
||||
120, 150,
|
||||
18.00, 0.40, 4, FALSE,
|
||||
FALSE, '2026-03-01', '9999-12-31',
|
||||
'新方案:120≤H<150,专业课抽成18元/小时,打赏课抽成40%,休假4天'),
|
||||
('T2', '2档-良好档', 2,
|
||||
150, 180,
|
||||
13.00, 0.35, 5, FALSE,
|
||||
FALSE, '2026-03-01', '9999-12-31',
|
||||
'新方案:150≤H<180,专业课抽成13元/小时,打赏课抽成35%,休假5天'),
|
||||
('T3', '3档-优秀档', 3,
|
||||
180, 210,
|
||||
10.00, 0.30, 6, FALSE,
|
||||
FALSE, '2026-03-01', '9999-12-31',
|
||||
'新方案:180≤H<210,专业课抽成10元/小时,打赏课抽成30%,休假6天'),
|
||||
('T4', '4档-销冠竞争', 4,
|
||||
210, NULL,
|
||||
8.00, 0.25, 0, TRUE,
|
||||
FALSE, '2026-03-01', '9999-12-31',
|
||||
'新方案:H≥210,专业课抽成8元/小时,打赏课抽成25%,休假自由');
|
||||
""")
|
||||
|
||||
# --- 2. cfg_assistant_level_price ---
|
||||
SQL_STATEMENTS.append("""
|
||||
TRUNCATE TABLE dws.cfg_assistant_level_price RESTART IDENTITY CASCADE;
|
||||
""")
|
||||
|
||||
SQL_STATEMENTS.append("""
|
||||
INSERT INTO dws.cfg_assistant_level_price (
|
||||
level_code, level_name,
|
||||
base_course_price, bonus_course_price,
|
||||
effective_from, effective_to, description
|
||||
) VALUES
|
||||
(10, '初级',
|
||||
98.00, 190.00,
|
||||
'2000-01-01', '9999-12-31',
|
||||
'初级助教:基础课98元/时,附加课190元/时(客户支付价格)'),
|
||||
(20, '中级',
|
||||
108.00, 190.00,
|
||||
'2000-01-01', '9999-12-31',
|
||||
'中级助教:基础课108元/时,附加课190元/时(客户支付价格)'),
|
||||
(30, '高级',
|
||||
118.00, 190.00,
|
||||
'2000-01-01', '9999-12-31',
|
||||
'高级助教:基础课118元/时,附加课190元/时(客户支付价格)'),
|
||||
(40, '星级',
|
||||
138.00, 190.00,
|
||||
'2000-01-01', '9999-12-31',
|
||||
'星级助教:基础课138元/时,附加课190元/时(客户支付价格)'),
|
||||
(8, '助教管理',
|
||||
98.00, 190.00,
|
||||
'2000-01-01', '9999-12-31',
|
||||
'助教管理:不参与客户服务计费,默认按初级价格');
|
||||
""")
|
||||
|
||||
# --- 3. cfg_bonus_rules ---
|
||||
SQL_STATEMENTS.append("""
|
||||
TRUNCATE TABLE dws.cfg_bonus_rules RESTART IDENTITY CASCADE;
|
||||
""")
|
||||
|
||||
SQL_STATEMENTS.append("""
|
||||
INSERT INTO dws.cfg_bonus_rules (
|
||||
rule_type, rule_code, rule_name,
|
||||
threshold_hours, rank_position, bonus_amount,
|
||||
is_cumulative, priority,
|
||||
effective_from, effective_to, description
|
||||
) VALUES
|
||||
-- 冲刺奖金(历史口径,至2024-12-31)
|
||||
('SPRINT', 'SPRINT_190', '冲刺奖金190',
|
||||
190.00, NULL, 300.00,
|
||||
FALSE, 1,
|
||||
'2000-01-01', '2024-12-31',
|
||||
'历史口径:业绩≥190小时,获得300元冲刺奖金(不累计)'),
|
||||
('SPRINT', 'SPRINT_220', '冲刺奖金220',
|
||||
220.00, NULL, 800.00,
|
||||
FALSE, 2,
|
||||
'2000-01-01', '2024-12-31',
|
||||
'历史口径:业绩≥220小时,获得800元冲刺奖金(覆盖190档)'),
|
||||
|
||||
-- 保底奖金(2025-01-01 ~ 2026-02-28)
|
||||
-- 按助教等级区分,需同时满足总课时和打赏课最低时数
|
||||
-- level_code: 10=初级, 20=中级, 30=高级, 40=星级
|
||||
('GUARANTEE', 'GUAR_LV10', '初级保底奖金',
|
||||
130.00, NULL, 12000.00,
|
||||
FALSE, 10,
|
||||
'2025-01-01', '2026-02-28',
|
||||
'初级保底:完成130小时课程(含≥10小时打赏课),保底月薪线12000元(实发=MAX(课时收入+奖金, 12000))'),
|
||||
('GUARANTEE', 'GUAR_LV20', '中级保底奖金',
|
||||
150.00, NULL, 16000.00,
|
||||
FALSE, 20,
|
||||
'2025-01-01', '2026-02-28',
|
||||
'中级保底:完成150小时课程(含≥10小时打赏课),保底月薪线16000元(实发=MAX(课时收入+奖金, 16000))'),
|
||||
('GUARANTEE', 'GUAR_LV30', '高级保底奖金',
|
||||
160.00, NULL, 18000.00,
|
||||
FALSE, 30,
|
||||
'2025-01-01', '2026-02-28',
|
||||
'高级保底:完成160小时课程(含≥10小时打赏课),保底月薪线18000元(实发=MAX(课时收入+奖金, 18000))'),
|
||||
('GUARANTEE', 'GUAR_LV40', '星级保底奖金',
|
||||
170.00, NULL, 23000.00,
|
||||
FALSE, 40,
|
||||
'2025-01-01', '2026-02-28',
|
||||
'星级保底:完成170小时课程(含≥10小时打赏课),保底月薪线23000元(实发=MAX(课时收入+奖金, 23000))'),
|
||||
|
||||
-- Top排名奖金(2026-03-01起)
|
||||
('TOP_RANK', 'TOP_1', 'Top1排名奖金',
|
||||
NULL, 1, 1000.00,
|
||||
FALSE, 0,
|
||||
'2026-03-01', '9999-12-31',
|
||||
'月度排名第一,获得1000元(并列都算)'),
|
||||
('TOP_RANK', 'TOP_2', 'Top2排名奖金',
|
||||
NULL, 2, 600.00,
|
||||
FALSE, 0,
|
||||
'2026-03-01', '9999-12-31',
|
||||
'月度排名第二,获得600元(并列都算)'),
|
||||
('TOP_RANK', 'TOP_3', 'Top3排名奖金',
|
||||
NULL, 3, 400.00,
|
||||
FALSE, 0,
|
||||
'2026-03-01', '9999-12-31',
|
||||
'月度排名第三,获得400元(并列都算)');
|
||||
""")
|
||||
|
||||
# --- 4. cfg_area_category ---
|
||||
SQL_STATEMENTS.append("""
|
||||
TRUNCATE TABLE dws.cfg_area_category RESTART IDENTITY CASCADE;
|
||||
""")
|
||||
|
||||
SQL_STATEMENTS.append("""
|
||||
INSERT INTO dws.cfg_area_category (
|
||||
source_area_name, category_code, category_name,
|
||||
match_type, match_priority, is_active, description
|
||||
) VALUES
|
||||
-- 台球散台(精确匹配)
|
||||
('A区', 'BILLIARD', '台球散台', 'EXACT', 10, TRUE, '台球散台:A区(18台)- 中八/追分'),
|
||||
('B区', 'BILLIARD', '台球散台', 'EXACT', 10, TRUE, '台球散台:B区(15台)- 中八/追分'),
|
||||
('C区', 'BILLIARD', '台球散台', 'EXACT', 10, TRUE, '台球散台:C区(6台)- 中八/追分'),
|
||||
('TV台', 'BILLIARD', '台球散台', 'EXACT', 10, TRUE, '台球散台:TV台(1台)- 中八/追分'),
|
||||
-- 台球VIP包厢
|
||||
('VIP包厢', 'BILLIARD_VIP', '台球VIP', 'EXACT', 10, TRUE, '台球VIP:VIP包厢(4台)- V1-V4中八, V5斯诺克'),
|
||||
-- 斯诺克区
|
||||
('斯诺克区', 'SNOOKER', '斯诺克', 'EXACT', 10, TRUE, '斯诺克:斯诺克区(4台)'),
|
||||
-- 麻将区
|
||||
('麻将房', 'MAHJONG', '麻将棋牌', 'EXACT', 10, TRUE, '麻将棋牌:麻将房(5台)'),
|
||||
('M7', 'MAHJONG', '麻将棋牌', 'EXACT', 10, TRUE, '麻将棋牌:M7(2台)'),
|
||||
('M8', 'MAHJONG', '麻将棋牌', 'EXACT', 10, TRUE, '麻将棋牌:M8(1台)'),
|
||||
('666', 'MAHJONG', '麻将棋牌', 'EXACT', 10, TRUE, '麻将棋牌:666(2台)'),
|
||||
('发财', 'MAHJONG', '麻将棋牌', 'EXACT', 10, TRUE, '麻将棋牌:发财(1台)'),
|
||||
-- KTV/K包
|
||||
('K包', 'KTV', 'K歌娱乐', 'EXACT', 10, TRUE, 'K歌娱乐:K包(4台)'),
|
||||
('k包活动区', 'KTV', 'K歌娱乐', 'EXACT', 10, TRUE, 'K歌娱乐:k包活动区(2台)'),
|
||||
('幸会158', 'KTV', 'K歌娱乐', 'EXACT', 10, TRUE, 'K歌娱乐:幸会158(2台)'),
|
||||
-- 特殊区域
|
||||
('补时长', 'SPECIAL', '补时长', 'EXACT', 10, TRUE, '特殊:补时长(7台)- 用于时长补录'),
|
||||
-- 模糊匹配
|
||||
('%VIP%', 'BILLIARD_VIP', '台球VIP', 'LIKE', 50, TRUE, '模糊匹配:包含"VIP"的区域'),
|
||||
('%斯诺克%', 'SNOOKER', '斯诺克', 'LIKE', 50, TRUE, '模糊匹配:包含"斯诺克"的区域'),
|
||||
('%麻将%', 'MAHJONG', '麻将棋牌', 'LIKE', 50, TRUE, '模糊匹配:包含"麻将"的区域'),
|
||||
('%K包%', 'KTV', 'K歌娱乐', 'LIKE', 50, TRUE, '模糊匹配:包含"K包"的区域'),
|
||||
('%KTV%', 'KTV', 'K歌娱乐', 'LIKE', 50, TRUE, '模糊匹配:包含"KTV"的区域'),
|
||||
-- 默认兜底
|
||||
('DEFAULT', 'OTHER', '其他', 'DEFAULT', 999, TRUE, '兜底规则:无法匹配的区域归入其他');
|
||||
""")
|
||||
|
||||
# --- 5. cfg_skill_type ---
|
||||
SQL_STATEMENTS.append("""
|
||||
TRUNCATE TABLE dws.cfg_skill_type RESTART IDENTITY CASCADE;
|
||||
""")
|
||||
|
||||
SQL_STATEMENTS.append("""
|
||||
INSERT INTO dws.cfg_skill_type (
|
||||
skill_id, skill_name,
|
||||
course_type_code, course_type_name,
|
||||
is_active, description
|
||||
) VALUES
|
||||
(2791903611396869, '台球基础陪打',
|
||||
'BASE', '基础课',
|
||||
TRUE, '基础课:陪打服务,按助教等级计价'),
|
||||
(2807440316432197, '台球超休服务',
|
||||
'BONUS', '附加课',
|
||||
TRUE, '附加课:超休/激励课,固定190元/小时'),
|
||||
(2807440316432198, '包厢服务',
|
||||
'BASE', '基础课',
|
||||
TRUE, '包厢服务:归入基础课统计,统一按138元/小时计价');
|
||||
""")
|
||||
|
||||
# --- 验证 SQL ---
|
||||
SQL_VERIFY = """
|
||||
DO $$
|
||||
DECLARE
|
||||
v_tier_count INTEGER;
|
||||
v_price_count INTEGER;
|
||||
v_bonus_count INTEGER;
|
||||
v_area_count INTEGER;
|
||||
v_skill_count INTEGER;
|
||||
BEGIN
|
||||
SELECT COUNT(*) INTO v_tier_count FROM dws.cfg_performance_tier;
|
||||
SELECT COUNT(*) INTO v_price_count FROM dws.cfg_assistant_level_price;
|
||||
SELECT COUNT(*) INTO v_bonus_count FROM dws.cfg_bonus_rules;
|
||||
SELECT COUNT(*) INTO v_area_count FROM dws.cfg_area_category;
|
||||
SELECT COUNT(*) INTO v_skill_count FROM dws.cfg_skill_type;
|
||||
|
||||
RAISE NOTICE '配置数据初始化完成:';
|
||||
RAISE NOTICE ' - cfg_performance_tier: % 条', v_tier_count;
|
||||
RAISE NOTICE ' - cfg_assistant_level_price: % 条', v_price_count;
|
||||
RAISE NOTICE ' - cfg_bonus_rules: % 条', v_bonus_count;
|
||||
RAISE NOTICE ' - cfg_area_category: % 条', v_area_count;
|
||||
RAISE NOTICE ' - cfg_skill_type: % 条', v_skill_count;
|
||||
END;
|
||||
$$;
|
||||
"""
|
||||
|
||||
# ============================================================================
|
||||
# 执行
|
||||
# ============================================================================
|
||||
def main():
|
||||
conn = psycopg2.connect(dsn)
|
||||
conn.autocommit = False
|
||||
cur = conn.cursor()
|
||||
|
||||
try:
|
||||
for i, sql in enumerate(SQL_STATEMENTS):
|
||||
cur.execute(sql)
|
||||
print(f" 步骤 {i+1}/{len(SQL_STATEMENTS)} 完成")
|
||||
|
||||
# 验证
|
||||
cur.execute(SQL_VERIFY)
|
||||
|
||||
# 额外查询验证
|
||||
checks = [
|
||||
("cfg_performance_tier", "SELECT tier_code, tier_name, effective_from, effective_to, base_deduction, bonus_deduction_ratio FROM dws.cfg_performance_tier ORDER BY effective_from, tier_level"),
|
||||
("cfg_bonus_rules", "SELECT rule_type, rule_code, rule_name, threshold_hours, bonus_amount, effective_from, effective_to FROM dws.cfg_bonus_rules ORDER BY effective_from, rule_type, priority"),
|
||||
("cfg_assistant_level_price", "SELECT level_code, level_name, base_course_price, bonus_course_price FROM dws.cfg_assistant_level_price ORDER BY level_code"),
|
||||
("cfg_area_category", "SELECT COUNT(*) as cnt FROM dws.cfg_area_category"),
|
||||
("cfg_skill_type", "SELECT skill_id, skill_name, course_type_code FROM dws.cfg_skill_type"),
|
||||
]
|
||||
|
||||
for table_name, sql in checks:
|
||||
cur.execute(sql)
|
||||
rows = cur.fetchall()
|
||||
cols = [desc[0] for desc in cur.description]
|
||||
print(f"\n=== {table_name} ===")
|
||||
print(f" 列: {cols}")
|
||||
for row in rows:
|
||||
print(f" {row}")
|
||||
|
||||
conn.commit()
|
||||
print("\n✅ 所有配置数据已成功写入 dws schema")
|
||||
|
||||
except Exception as e:
|
||||
conn.rollback()
|
||||
print(f"\n❌ 执行失败,已回滚: {e}")
|
||||
raise
|
||||
finally:
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
88
scripts/ops/sync_branches.py
Normal file
88
scripts/ops/sync_branches.py
Normal file
@@ -0,0 +1,88 @@
|
||||
"""
|
||||
将 dev 分支同步到 test 和 master。
|
||||
使用 git reset --hard 强制对齐,绕过文件锁问题。
|
||||
"""
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
|
||||
|
||||
def run(cmd: str, retries: int = 3, delay: float = 2.0) -> bool:
|
||||
"""执行 git 命令,失败时重试。"""
|
||||
for attempt in range(1, retries + 1):
|
||||
print(f" [{attempt}/{retries}] {cmd}")
|
||||
result = subprocess.run(
|
||||
cmd, shell=True, capture_output=True, text=True,
|
||||
cwd=r"C:\NeoZQYY", encoding="utf-8", errors="replace",
|
||||
)
|
||||
if result.returncode == 0:
|
||||
if result.stdout.strip():
|
||||
print(f" {result.stdout.strip()}")
|
||||
return True
|
||||
print(f" 失败: {result.stderr.strip()[:300]}")
|
||||
if attempt < retries:
|
||||
print(f" 等待 {delay}s 后重试…")
|
||||
time.sleep(delay)
|
||||
return False
|
||||
|
||||
|
||||
def sync_branch(target: str, source_hash: str) -> bool:
|
||||
"""将 target 分支强制对齐到 source_hash。"""
|
||||
print(f"\n{'='*50}")
|
||||
print(f"同步 {target} → {source_hash[:8]}")
|
||||
print(f"{'='*50}")
|
||||
|
||||
if not run(f"git checkout --force {target}"):
|
||||
print(f" ✗ 切换到 {target} 失败")
|
||||
return False
|
||||
|
||||
# 用 reset --hard 强制对齐,不受文件锁影响
|
||||
if not run(f"git reset --hard {source_hash}"):
|
||||
print(f" ✗ reset --hard 失败")
|
||||
return False
|
||||
|
||||
print(f" ✓ {target} 已对齐到 {source_hash[:8]}")
|
||||
return True
|
||||
|
||||
|
||||
def main():
|
||||
# 获取 dev 的 HEAD commit
|
||||
result = subprocess.run(
|
||||
"git rev-parse dev", shell=True, capture_output=True, text=True,
|
||||
cwd=r"C:\NeoZQYY", encoding="utf-8",
|
||||
)
|
||||
if result.returncode != 0:
|
||||
print("无法获取 dev 的 HEAD,退出")
|
||||
sys.exit(1)
|
||||
|
||||
dev_hash = result.stdout.strip()
|
||||
print(f"dev HEAD: {dev_hash[:8]}")
|
||||
|
||||
ok = True
|
||||
for branch in ["test", "master"]:
|
||||
if not sync_branch(branch, dev_hash):
|
||||
ok = False
|
||||
print(f" ✗ {branch} 同步失败")
|
||||
|
||||
# 切回 dev
|
||||
print(f"\n切回 dev…")
|
||||
run("git checkout --force dev")
|
||||
|
||||
# 恢复 stash(如果有)
|
||||
stash_result = subprocess.run(
|
||||
"git stash list", shell=True, capture_output=True, text=True,
|
||||
cwd=r"C:\NeoZQYY", encoding="utf-8",
|
||||
)
|
||||
if stash_result.stdout.strip():
|
||||
print("恢复 stash…")
|
||||
run("git stash pop")
|
||||
|
||||
if ok:
|
||||
print("\n✓ 全部完成。三个分支已对齐。")
|
||||
else:
|
||||
print("\n✗ 部分分支同步失败,请检查。")
|
||||
sys.exit(0 if ok else 1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
312
scripts/ops/sync_ddl_after_migration.py
Normal file
312
scripts/ops/sync_ddl_after_migration.py
Normal file
@@ -0,0 +1,312 @@
|
||||
"""
|
||||
同步 DDL 文件:将 2026-02-20 迁移后的变更反映到 DDL 文件中。
|
||||
目标文件:
|
||||
- db/etl_feiqiu/schemas/dwd.sql(schema=dwd)
|
||||
- db/etl_feiqiu/schemas/schema_dwd_doc.sql(schema=billiards_dwd)
|
||||
- db/etl_feiqiu/schemas/dws.sql(schema=dws)
|
||||
- db/etl_feiqiu/schemas/schema_dws.sql(schema=billiards_dws)
|
||||
|
||||
策略:对每个 DDL 文件做精确的文本替换/追加,而非全量重写。
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import re
|
||||
from pathlib import Path
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(Path(__file__).resolve().parents[2] / ".env")
|
||||
|
||||
SCHEMAS_DIR = Path(__file__).resolve().parents[2] / "db" / "etl_feiqiu" / "schemas"
|
||||
|
||||
# ── 变更清单 ──────────────────────────────────────────────────────────
|
||||
|
||||
# 1. dim_table_ex:schema_dwd_doc.sql 缺少 14 列(dwd.sql 已有)
|
||||
DIM_TABLE_EX_OLD_COLS_DOC = """\
|
||||
table_status INTEGER,
|
||||
SCD2_start_time TIMESTAMPTZ DEFAULT now(),"""
|
||||
|
||||
DIM_TABLE_EX_NEW_COLS_DOC = """\
|
||||
table_status INTEGER,
|
||||
create_time TIMESTAMPTZ,
|
||||
light_status INTEGER,
|
||||
tablestatusname TEXT,
|
||||
sitename TEXT,
|
||||
applet_qr_code_url TEXT,
|
||||
audit_status INTEGER,
|
||||
charge_free INTEGER,
|
||||
delay_lights_time INTEGER,
|
||||
is_rest_area INTEGER,
|
||||
only_allow_groupon INTEGER,
|
||||
order_delay_time INTEGER,
|
||||
self_table INTEGER,
|
||||
temporary_light_second INTEGER,
|
||||
virtual_table INTEGER,
|
||||
SCD2_start_time TIMESTAMPTZ DEFAULT now(),"""
|
||||
|
||||
# 2. dim_assistant_ex:两个文件都缺少 4 列
|
||||
DIM_ASSISTANT_EX_OLD_COLS = """\
|
||||
serial_number BIGINT,
|
||||
SCD2_start_time TIMESTAMPTZ,"""
|
||||
|
||||
DIM_ASSISTANT_EX_NEW_COLS = """\
|
||||
serial_number BIGINT,
|
||||
system_role_id BIGINT,
|
||||
job_num TEXT,
|
||||
cx_unit_price NUMERIC(18,2),
|
||||
pd_unit_price NUMERIC(18,2),
|
||||
SCD2_start_time TIMESTAMPTZ,"""
|
||||
|
||||
# 3. DWD 新表定义(追加到文件末尾)
|
||||
DWD_NEW_TABLES = """
|
||||
|
||||
-- =============================================================================
|
||||
-- 2026-02-20 新增表
|
||||
-- =============================================================================
|
||||
|
||||
CREATE TABLE IF NOT EXISTS dwd_goods_stock_summary (
|
||||
site_goods_id BIGINT NOT NULL,
|
||||
goods_name TEXT,
|
||||
goods_unit TEXT,
|
||||
goods_category_id BIGINT,
|
||||
goods_category_second_id BIGINT,
|
||||
category_name TEXT,
|
||||
range_start_stock NUMERIC(18,4),
|
||||
range_end_stock NUMERIC(18,4),
|
||||
range_in NUMERIC(18,4),
|
||||
range_out NUMERIC(18,4),
|
||||
range_sale NUMERIC(18,4),
|
||||
range_sale_money NUMERIC(18,2),
|
||||
range_inventory NUMERIC(18,4),
|
||||
current_stock NUMERIC(18,4),
|
||||
site_id BIGINT,
|
||||
tenant_id BIGINT,
|
||||
fetched_at TIMESTAMPTZ,
|
||||
PRIMARY KEY (site_goods_id, fetched_at)
|
||||
);
|
||||
|
||||
COMMENT ON TABLE {schema}.dwd_goods_stock_summary IS '库存汇总明细表(事实表)。来源:ods.goods_stock_summary。按时间窗口增量加载。';
|
||||
|
||||
|
||||
CREATE TABLE IF NOT EXISTS dwd_goods_stock_movement (
|
||||
site_goods_stock_id BIGINT NOT NULL,
|
||||
tenant_id BIGINT,
|
||||
site_id BIGINT,
|
||||
site_goods_id BIGINT,
|
||||
goods_name TEXT,
|
||||
goods_category_id BIGINT,
|
||||
goods_second_category_id BIGINT,
|
||||
unit TEXT,
|
||||
price NUMERIC(18,4),
|
||||
stock_type INTEGER,
|
||||
change_num NUMERIC(18,4),
|
||||
start_num NUMERIC(18,4),
|
||||
end_num NUMERIC(18,4),
|
||||
change_num_a NUMERIC(18,4),
|
||||
start_num_a NUMERIC(18,4),
|
||||
end_num_a NUMERIC(18,4),
|
||||
remark TEXT,
|
||||
operator_name TEXT,
|
||||
create_time TIMESTAMPTZ,
|
||||
fetched_at TIMESTAMPTZ,
|
||||
PRIMARY KEY (site_goods_stock_id)
|
||||
);
|
||||
|
||||
COMMENT ON TABLE {schema}.dwd_goods_stock_movement IS '库存变动流水表(事实表)。来源:ods.goods_stock_movements。按 create_time 增量加载。';
|
||||
"""
|
||||
|
||||
# 4. DWS 新表定义(追加到文件末尾)
|
||||
DWS_NEW_TABLES = """
|
||||
|
||||
-- =============================================================================
|
||||
-- 2026-02-20 新增:库存汇总表(日/周/月)
|
||||
-- =============================================================================
|
||||
|
||||
CREATE TABLE IF NOT EXISTS {schema}.dws_goods_stock_daily_summary (
|
||||
site_id BIGINT NOT NULL,
|
||||
tenant_id BIGINT,
|
||||
stat_date DATE NOT NULL,
|
||||
site_goods_id BIGINT NOT NULL,
|
||||
goods_name TEXT,
|
||||
goods_unit TEXT,
|
||||
goods_category_id BIGINT,
|
||||
goods_category_second_id BIGINT,
|
||||
category_name TEXT,
|
||||
range_start_stock NUMERIC,
|
||||
range_end_stock NUMERIC,
|
||||
range_in NUMERIC,
|
||||
range_out NUMERIC,
|
||||
range_sale NUMERIC,
|
||||
range_sale_money NUMERIC(12,2),
|
||||
range_inventory NUMERIC,
|
||||
current_stock NUMERIC,
|
||||
stat_period TEXT NOT NULL DEFAULT 'daily',
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
PRIMARY KEY (site_id, stat_date, site_goods_id)
|
||||
);
|
||||
|
||||
COMMENT ON TABLE {schema}.dws_goods_stock_daily_summary
|
||||
IS '库存日度汇总:按门店+日期+商品汇总库存变动';
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_dws_goods_stock_daily_date
|
||||
ON {schema}.dws_goods_stock_daily_summary (stat_date);
|
||||
CREATE INDEX IF NOT EXISTS idx_dws_goods_stock_daily_goods
|
||||
ON {schema}.dws_goods_stock_daily_summary (site_goods_id, stat_date);
|
||||
CREATE INDEX IF NOT EXISTS idx_dws_goods_stock_daily_site
|
||||
ON {schema}.dws_goods_stock_daily_summary (site_id, stat_date);
|
||||
|
||||
|
||||
CREATE TABLE IF NOT EXISTS {schema}.dws_goods_stock_weekly_summary (
|
||||
site_id BIGINT NOT NULL,
|
||||
tenant_id BIGINT,
|
||||
stat_date DATE NOT NULL,
|
||||
site_goods_id BIGINT NOT NULL,
|
||||
goods_name TEXT,
|
||||
goods_unit TEXT,
|
||||
goods_category_id BIGINT,
|
||||
goods_category_second_id BIGINT,
|
||||
category_name TEXT,
|
||||
range_start_stock NUMERIC,
|
||||
range_end_stock NUMERIC,
|
||||
range_in NUMERIC,
|
||||
range_out NUMERIC,
|
||||
range_sale NUMERIC,
|
||||
range_sale_money NUMERIC(12,2),
|
||||
range_inventory NUMERIC,
|
||||
current_stock NUMERIC,
|
||||
stat_period TEXT NOT NULL DEFAULT 'weekly',
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
PRIMARY KEY (site_id, stat_date, site_goods_id)
|
||||
);
|
||||
|
||||
COMMENT ON TABLE {schema}.dws_goods_stock_weekly_summary
|
||||
IS '库存周度汇总:按门店+ISO周+商品汇总库存变动,stat_date 为周一日期';
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_dws_goods_stock_weekly_date
|
||||
ON {schema}.dws_goods_stock_weekly_summary (stat_date);
|
||||
CREATE INDEX IF NOT EXISTS idx_dws_goods_stock_weekly_goods
|
||||
ON {schema}.dws_goods_stock_weekly_summary (site_goods_id, stat_date);
|
||||
CREATE INDEX IF NOT EXISTS idx_dws_goods_stock_weekly_site
|
||||
ON {schema}.dws_goods_stock_weekly_summary (site_id, stat_date);
|
||||
|
||||
|
||||
CREATE TABLE IF NOT EXISTS {schema}.dws_goods_stock_monthly_summary (
|
||||
site_id BIGINT NOT NULL,
|
||||
tenant_id BIGINT,
|
||||
stat_date DATE NOT NULL,
|
||||
site_goods_id BIGINT NOT NULL,
|
||||
goods_name TEXT,
|
||||
goods_unit TEXT,
|
||||
goods_category_id BIGINT,
|
||||
goods_category_second_id BIGINT,
|
||||
category_name TEXT,
|
||||
range_start_stock NUMERIC,
|
||||
range_end_stock NUMERIC,
|
||||
range_in NUMERIC,
|
||||
range_out NUMERIC,
|
||||
range_sale NUMERIC,
|
||||
range_sale_money NUMERIC(12,2),
|
||||
range_inventory NUMERIC,
|
||||
current_stock NUMERIC,
|
||||
stat_period TEXT NOT NULL DEFAULT 'monthly',
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
PRIMARY KEY (site_id, stat_date, site_goods_id)
|
||||
);
|
||||
|
||||
COMMENT ON TABLE {schema}.dws_goods_stock_monthly_summary
|
||||
IS '库存月度汇总:按门店+自然月+商品汇总库存变动,stat_date 为月首日期';
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_dws_goods_stock_monthly_date
|
||||
ON {schema}.dws_goods_stock_monthly_summary (stat_date);
|
||||
CREATE INDEX IF NOT EXISTS idx_dws_goods_stock_monthly_goods
|
||||
ON {schema}.dws_goods_stock_monthly_summary (site_goods_id, stat_date);
|
||||
CREATE INDEX IF NOT EXISTS idx_dws_goods_stock_monthly_site
|
||||
ON {schema}.dws_goods_stock_monthly_summary (site_id, stat_date);
|
||||
"""
|
||||
|
||||
|
||||
def patch_file(filepath: Path, old: str, new: str, label: str) -> bool:
|
||||
"""在文件中替换文本"""
|
||||
content = filepath.read_text(encoding="utf-8")
|
||||
if old not in content:
|
||||
print(f" ⚠️ {label}: 未找到匹配文本,跳过")
|
||||
return False
|
||||
if new in content:
|
||||
print(f" ⏭️ {label}: 已包含新内容,跳过")
|
||||
return True
|
||||
content = content.replace(old, new, 1)
|
||||
filepath.write_text(content, encoding="utf-8")
|
||||
print(f" ✅ {label}")
|
||||
return True
|
||||
|
||||
|
||||
def append_if_missing(filepath: Path, marker: str, content: str, label: str) -> bool:
|
||||
"""如果文件中不包含 marker,则追加 content"""
|
||||
text = filepath.read_text(encoding="utf-8")
|
||||
if marker in text:
|
||||
print(f" ⏭️ {label}: 已存在,跳过")
|
||||
return True
|
||||
text = text.rstrip() + "\n" + content
|
||||
filepath.write_text(text, encoding="utf-8")
|
||||
print(f" ✅ {label}")
|
||||
return True
|
||||
|
||||
|
||||
def main():
|
||||
print("=" * 60)
|
||||
print("同步 DDL 文件(2026-02-20 迁移批次)")
|
||||
print("=" * 60)
|
||||
|
||||
# ── 1. schema_dwd_doc.sql:dim_table_ex 加 14 列 ──
|
||||
doc_file = SCHEMAS_DIR / "schema_dwd_doc.sql"
|
||||
print(f"\n[1] {doc_file.name}: dim_table_ex +14 列")
|
||||
patch_file(doc_file, DIM_TABLE_EX_OLD_COLS_DOC, DIM_TABLE_EX_NEW_COLS_DOC,
|
||||
"dim_table_ex 列定义")
|
||||
|
||||
# ── 2. schema_dwd_doc.sql:dim_assistant_ex 加 4 列 ──
|
||||
print(f"\n[2] {doc_file.name}: dim_assistant_ex +4 列")
|
||||
patch_file(doc_file, DIM_ASSISTANT_EX_OLD_COLS, DIM_ASSISTANT_EX_NEW_COLS,
|
||||
"dim_assistant_ex 列定义(doc)")
|
||||
|
||||
# ── 3. dwd.sql:dim_assistant_ex 加 4 列 ──
|
||||
dwd_file = SCHEMAS_DIR / "dwd.sql"
|
||||
print(f"\n[3] {dwd_file.name}: dim_assistant_ex +4 列")
|
||||
patch_file(dwd_file, DIM_ASSISTANT_EX_OLD_COLS, DIM_ASSISTANT_EX_NEW_COLS,
|
||||
"dim_assistant_ex 列定义(dwd)")
|
||||
|
||||
# ── 4. dwd.sql:追加新表 ──
|
||||
print(f"\n[4] {dwd_file.name}: 追加 dwd_goods_stock_summary + dwd_goods_stock_movement")
|
||||
append_if_missing(dwd_file, "dwd_goods_stock_summary",
|
||||
DWD_NEW_TABLES.format(schema="dwd"),
|
||||
"DWD 新表")
|
||||
|
||||
# ── 5. schema_dwd_doc.sql:追加新表 ──
|
||||
print(f"\n[5] {doc_file.name}: 追加 dwd_goods_stock_summary + dwd_goods_stock_movement")
|
||||
append_if_missing(doc_file, "dwd_goods_stock_summary",
|
||||
DWD_NEW_TABLES.format(schema="billiards_dwd"),
|
||||
"DWD 新表(doc)")
|
||||
|
||||
# ── 6. dws.sql:追加库存汇总表 ──
|
||||
dws_file = SCHEMAS_DIR / "dws.sql"
|
||||
print(f"\n[6] {dws_file.name}: 追加 3 张库存汇总表")
|
||||
append_if_missing(dws_file, "dws_goods_stock_daily_summary",
|
||||
DWS_NEW_TABLES.format(schema="dws"),
|
||||
"DWS 库存汇总表")
|
||||
|
||||
# ── 7. schema_dws.sql:追加库存汇总表 ──
|
||||
dws_doc_file = SCHEMAS_DIR / "schema_dws.sql"
|
||||
print(f"\n[7] {dws_doc_file.name}: 追加 3 张库存汇总表")
|
||||
append_if_missing(dws_doc_file, "dws_goods_stock_daily_summary",
|
||||
DWS_NEW_TABLES.format(schema="billiards_dws"),
|
||||
"DWS 库存汇总表(doc)")
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("DDL 同步完成")
|
||||
print("=" * 60)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -34,7 +34,7 @@ for db in DBS:
|
||||
|
||||
print("\n--- 配置文件指向 ---")
|
||||
print("ETL .env PG_DSN -> test_etl_feiqiu (已确认)")
|
||||
print("根 .env -> PG_NAME=test_etl_feiqiu, APP_DB_NAME=test_zqyy_app")
|
||||
print("根 .env -> PG_DSN=test_etl_feiqiu, APP_DB_DSN=test_zqyy_app")
|
||||
print("后端 .env.local -> APP_DB_NAME=test_zqyy_app, ETL_DB_NAME=test_etl_feiqiu")
|
||||
print("后端 config.py 默认值 -> test_zqyy_app / test_etl_feiqiu")
|
||||
print("FDW 生产 -> setup_fdw.sql (etl_feiqiu)")
|
||||
|
||||
Reference in New Issue
Block a user