微信小程序页面迁移校验之前 P5任务处理之前

This commit is contained in:
Neo
2026-03-09 01:19:21 +08:00
parent 263bf96035
commit 6e20987d2f
1112 changed files with 153824 additions and 219694 deletions

View File

@@ -0,0 +1,154 @@
#!/usr/bin/env python3
"""
深度调查 ETL 执行时间线,查明凌晨全量更新后为什么还有数据缺失
"""
import os
import glob
from pathlib import Path
from datetime import datetime, timedelta
from dotenv import load_dotenv
def main():
# 加载环境变量
load_dotenv()
log_root = os.environ.get('LOG_ROOT')
if not log_root:
raise RuntimeError("LOG_ROOT 环境变量未设置")
log_dir = Path(log_root)
print("🔍 深度调查 ETL 执行时间线")
print("=" * 60)
# 获取所有日志文件并按修改时间排序
log_files = list(log_dir.glob("*.log"))
log_files.sort(key=lambda x: x.stat().st_mtime)
print(f"找到 {len(log_files)} 个日志文件")
# 分析最近 24 小时的日志
now = datetime.now()
yesterday = now - timedelta(hours=24)
recent_logs = []
for log_file in log_files:
mtime = datetime.fromtimestamp(log_file.stat().st_mtime)
if mtime >= yesterday:
recent_logs.append((log_file, mtime))
print(f"\n📅 最近 24 小时内的日志文件 ({len(recent_logs)} 个):")
for log_file, mtime in recent_logs:
print(f" {mtime.strftime('%Y-%m-%d %H:%M:%S')} - {log_file.name}")
# 分析每个日志文件的关键信息
print(f"\n🔍 详细分析最近的日志:")
for i, (log_file, mtime) in enumerate(recent_logs[-5:]): # 最近5个
print(f"\n--- 日志 {i+1}: {log_file.name} ---")
print(f"修改时间: {mtime.strftime('%Y-%m-%d %H:%M:%S')}")
try:
with open(log_file, 'r', encoding='utf-8') as f:
content = f.read()
lines = content.split('\n')
# 查找关键信息
start_time = None
end_time = None
flow_type = None
window_info = None
dwd_info = None
errors = []
for line in lines:
if '开始运行任务' in line and 'run_uuid' in line:
start_time = line.split(']')[0].replace('[', '')
if 'api_full' in line:
flow_type = 'api_full (全量)'
elif 'api_ods_dwd' in line:
flow_type = 'api_ods_dwd (增量)'
else:
flow_type = '未知'
if 'ETL运行完成' in line:
end_time = line.split(']')[0].replace('[', '')
if 'force_full' in line or '时间窗口' in line:
window_info = line.strip()
if 'DWD_LOAD_FROM_ODS' in line and ('开始' in line or '完成' in line):
dwd_info = line.strip()
if 'ERROR' in line or '失败' in line:
errors.append(line.strip())
print(f" 执行类型: {flow_type or '未知'}")
print(f" 开始时间: {start_time or '未找到'}")
print(f" 结束时间: {end_time or '未找到'}")
if window_info:
print(f" 窗口信息: {window_info}")
if dwd_info:
print(f" DWD 处理: {dwd_info}")
if errors:
print(f" 错误数量: {len(errors)}")
for error in errors[:3]: # 只显示前3个错误
print(f" - {error}")
# 计算执行时长
if start_time and end_time:
try:
start_dt = datetime.strptime(start_time, '%Y-%m-%d %H:%M:%S')
end_dt = datetime.strptime(end_time, '%Y-%m-%d %H:%M:%S')
duration = end_dt - start_dt
print(f" 执行时长: {duration}")
except:
pass
except Exception as e:
print(f" 读取失败: {e}")
# 特别关注今天凌晨的执行
print(f"\n🌅 今天凌晨执行分析:")
today_start = now.replace(hour=0, minute=0, second=0, microsecond=0)
morning_end = now.replace(hour=8, minute=0, second=0, microsecond=0)
morning_logs = []
for log_file, mtime in recent_logs:
if today_start <= mtime <= morning_end:
morning_logs.append((log_file, mtime))
if morning_logs:
print(f"找到 {len(morning_logs)} 个凌晨执行的日志:")
for log_file, mtime in morning_logs:
print(f" {mtime.strftime('%H:%M:%S')} - {log_file.name}")
# 检查是否包含 DWD 处理
try:
with open(log_file, 'r', encoding='utf-8') as f:
content = f.read()
if 'DWD_LOAD_FROM_ODS' in content:
print(f" ✅ 包含 DWD 处理")
# 检查处理的时间范围
lines = content.split('\n')
for line in lines:
if '时间窗口' in line or 'window' in line.lower():
print(f" 📅 {line.strip()}")
else:
print(f" ❌ 未包含 DWD 处理")
except Exception as e:
print(f" 读取失败: {e}")
else:
print("❌ 未找到今天凌晨的 ETL 执行记录!")
print("这可能解释了为什么有数据缺失。")
if __name__ == "__main__":
main()