微信小程序页面迁移校验之前 P5任务处理之前

This commit is contained in:
Neo
2026-03-09 01:19:21 +08:00
parent 263bf96035
commit 6e20987d2f
1112 changed files with 153824 additions and 219694 deletions

View File

@@ -0,0 +1,335 @@
#!/usr/bin/env python3
"""migrate_session_dirs — 将旧格式 session_logs 目录迁移到新格式。
旧格式:{chatShort}_{HHMMSS}/main_{seq}_{execShort}_{HHMMSS}.md
新格式:{seq:02d}_{chatShort}_{HHMMSS}/main_{seq}_{execShort}.md
迁移规则:
1. 同一天内,按 chatSessionId 分组,同 chatSession 的多个旧目录合并到一个新目录
2. 新目录按当天出现顺序分配序号01_, 02_, ...
3. 文件名去掉时间后缀_HHMMSS
4. 更新双索引中所有 entry 的 output_dir
5. 跨天对话生成 _ref_{chatShort}.md 指引文件
用法:
python scripts/ops/migrate_session_dirs.py --dry-run # 预览变更
python scripts/ops/migrate_session_dirs.py # 执行迁移
"""
import json
import os
import re
import shutil
import sys
from collections import defaultdict
from _env_paths import ensure_repo_root
ensure_repo_root()
SESSION_LOG_DIR = os.path.join("docs", "audit", "session_logs")
INDEX_PATH = os.path.join(SESSION_LOG_DIR, "_session_index.json")
INDEX_FULL_PATH = os.path.join(SESSION_LOG_DIR, "_session_index_full.json")
# 旧目录名格式:{hex8}_{HHMMSS}(无序号前缀)
OLD_DIR_PATTERN = re.compile(r"^([0-9a-f]{8})_(\d{6})$")
# 新目录名格式:{seq:02d}_{hex8}_{HHMMSS}(有序号前缀)
NEW_DIR_PATTERN = re.compile(r"^(\d{2})_([0-9a-f]{8})_(\d{6})$")
# 旧文件名格式main_{seq}_{hash8}_{HHMMSS}.md 或 sub_{seq}_{hash8}_{HHMMSS}.md
OLD_FILE_PATTERN = re.compile(r"^(main|sub)_(\d{2})_([0-9a-f]{8})_(\d{6})\.md$")
def load_json(path):
if not os.path.isfile(path):
return {}
with open(path, "r", encoding="utf-8") as f:
return json.load(f)
def save_json(path, data):
with open(path, "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=2)
def find_all_day_dirs():
"""找到所有 YYYY-MM/DD 目录"""
results = []
if not os.path.isdir(SESSION_LOG_DIR):
return results
for ym in sorted(os.listdir(SESSION_LOG_DIR)):
ym_path = os.path.join(SESSION_LOG_DIR, ym)
if not os.path.isdir(ym_path) or not re.match(r"^\d{4}-\d{2}$", ym):
continue
for dd in sorted(os.listdir(ym_path)):
dd_path = os.path.join(ym_path, dd)
if not os.path.isdir(dd_path) or not re.match(r"^\d{2}$", dd):
continue
results.append(dd_path)
return results
def scan_day_dir(day_dir):
"""扫描一个 day_dir返回需要迁移的旧目录列表。
返回 [(dir_name, chat_short, hms, full_path), ...]
已经是新格式的目录会被跳过。
"""
old_dirs = []
for d in sorted(os.listdir(day_dir)):
full = os.path.join(day_dir, d)
if not os.path.isdir(full):
continue
# 跳过已经是新格式的
if NEW_DIR_PATTERN.match(d):
continue
m = OLD_DIR_PATTERN.match(d)
if m:
old_dirs.append((d, m.group(1), m.group(2), full))
return old_dirs
def rename_file(old_name):
"""将旧文件名转为新文件名(去掉时间后缀)。
main_01_abc12345_013337.md → main_01_abc12345.md
sub_02_def67890_013337.md → sub_02_def67890.md
"""
m = OLD_FILE_PATTERN.match(old_name)
if m:
prefix, seq, hash8, _hms = m.groups()
return f"{prefix}_{seq}_{hash8}.md"
return old_name # 不匹配的文件名保持不变
def build_migration_plan(day_dir):
"""为一个 day_dir 构建迁移计划。
返回 plan: [{
"old_dirs": [(dir_name, full_path), ...], # 同一 chatSession 的旧目录(可能多个)
"chat_short": str,
"first_hms": str, # 最早的 HHMMSS
"new_dir_name": str, # 新目录名 {seq:02d}_{chatShort}_{firstHms}
"file_renames": [(old_name, new_name), ...],
}]
"""
old_dirs = scan_day_dir(day_dir)
if not old_dirs:
return []
# 按 chatShort 分组,同一 chatSession 的多个旧目录合并
groups = defaultdict(list)
for dir_name, chat_short, hms, full_path in old_dirs:
groups[chat_short].append((dir_name, hms, full_path))
# 按每组最早的 hms 排序,分配序号
sorted_groups = sorted(groups.items(), key=lambda g: min(h for _, h, _ in g[1]))
# 检查已有新格式目录,避免序号冲突
existing_seqs = []
for d in os.listdir(day_dir):
if os.path.isdir(os.path.join(day_dir, d)):
m = NEW_DIR_PATTERN.match(d)
if m:
existing_seqs.append(int(m.group(1)))
next_seq = max(existing_seqs, default=0) + 1
plan = []
for chat_short, dirs in sorted_groups:
dirs.sort(key=lambda x: x[1]) # 按 hms 排序
first_hms = dirs[0][1]
# 检查是否已有新格式目录包含此 chatShort已部分迁移
existing_new = None
for d in os.listdir(day_dir):
m = NEW_DIR_PATTERN.match(d)
if m and m.group(2) == chat_short:
existing_new = d
break
if existing_new:
new_dir_name = existing_new
else:
new_dir_name = f"{next_seq:02d}_{chat_short}_{first_hms}"
next_seq += 1
# 收集所有文件的重命名计划
file_renames = []
for dir_name, hms, full_path in dirs:
for fname in sorted(os.listdir(full_path)):
if not fname.endswith(".md"):
continue
new_fname = rename_file(fname)
file_renames.append((
os.path.join(full_path, fname), # 旧完整路径
fname, # 旧文件名
new_fname, # 新文件名
))
plan.append({
"old_dirs": [(d, p) for d, _, p in dirs],
"chat_short": chat_short,
"first_hms": first_hms,
"new_dir_name": new_dir_name,
"new_dir_path": os.path.join(day_dir, new_dir_name),
"file_renames": file_renames,
})
return plan
def execute_migration(plan, day_dir, dry_run=False):
"""执行一个 day_dir 的迁移计划。返回 (moved_dirs, moved_files, errors)"""
moved_dirs = 0
moved_files = 0
errors = []
for item in plan:
new_dir_path = item["new_dir_path"]
old_dirs = item["old_dirs"]
if not dry_run:
os.makedirs(new_dir_path, exist_ok=True)
# 移动文件到新目录
for old_src, old_fname, new_fname in item["file_renames"]:
dst = os.path.join(new_dir_path, new_fname)
# 如果目标已存在(部分迁移过),跳过
if os.path.isfile(dst):
continue
if dry_run:
print(f" MOVE {old_src}{dst}")
else:
try:
shutil.move(old_src, dst)
moved_files += 1
except Exception as e:
errors.append(f"move {old_src}: {e}")
# 删除空的旧目录
for dir_name, dir_path in old_dirs:
# 新目录名和旧目录名相同时跳过(不需要删除)
if dir_name == item["new_dir_name"]:
continue
if dry_run:
print(f" RMDIR {dir_path}")
else:
try:
# 只删除空目录
remaining = os.listdir(dir_path)
if not remaining:
os.rmdir(dir_path)
moved_dirs += 1
else:
errors.append(f"rmdir {dir_path}: not empty ({remaining})")
except Exception as e:
errors.append(f"rmdir {dir_path}: {e}")
return moved_dirs, moved_files, errors
def update_indexes(all_plans):
"""根据迁移计划更新双索引中的 output_dir。
旧 output_dir 格式docs/audit/session_logs/2026-03/03/b6b5e1fd_013337
新 output_dir 格式docs/audit/session_logs/2026-03/03/01_b6b5e1fd_013337
"""
# 构建映射:旧目录路径 → 新目录路径
dir_map = {}
for plan_list, day_dir in all_plans:
for item in plan_list:
new_path = item["new_dir_path"].replace("\\", "/")
for dir_name, dir_path in item["old_dirs"]:
old_path = dir_path.replace("\\", "/")
dir_map[old_path] = new_path
if not dir_map:
return 0
updated = 0
for idx_path in [INDEX_PATH, INDEX_FULL_PATH]:
data = load_json(idx_path)
entries = data.get("entries", {})
changed = False
for eid, ent in entries.items():
old_dir = ent.get("output_dir", "")
if old_dir in dir_map:
ent["output_dir"] = dir_map[old_dir]
changed = True
updated += 1
if changed:
save_json(idx_path, data)
return updated
def main():
import argparse
parser = argparse.ArgumentParser(description="迁移 session_logs 目录到新格式")
parser.add_argument("--dry-run", action="store_true", help="预览变更,不实际执行")
args = parser.parse_args()
day_dirs = find_all_day_dirs()
if not day_dirs:
print("[migrate] 未找到任何 day_dir")
return
total_plans = []
total_items = 0
total_files = 0
for day_dir in day_dirs:
plan = build_migration_plan(day_dir)
if plan:
total_plans.append((plan, day_dir))
total_items += len(plan)
total_files += sum(len(item["file_renames"]) for item in plan)
if not total_plans:
print("[migrate] 所有目录已是新格式,无需迁移")
return
print(f"[migrate] 共 {len(day_dirs)} 个 day_dir{total_items} 个对话组,{total_files} 个文件待迁移")
if args.dry_run:
print("\n=== DRY RUN ===\n")
for plan, day_dir in total_plans:
rel = os.path.relpath(day_dir)
print(f"\n--- {rel} ---")
for item in plan:
old_names = [d for d, _ in item["old_dirs"]]
print(f" {' + '.join(old_names)}{item['new_dir_name']}/")
for _, old_fname, new_fname in item["file_renames"]:
if old_fname != new_fname:
print(f" {old_fname}{new_fname}")
else:
print(f" {old_fname} (不变)")
print(f"\n[dry-run] 共 {total_items} 个对话组,{total_files} 个文件")
return
# 执行迁移
all_moved_dirs = 0
all_moved_files = 0
all_errors = []
for plan, day_dir in total_plans:
md, mf, errs = execute_migration(plan, day_dir, dry_run=False)
all_moved_dirs += md
all_moved_files += mf
all_errors.extend(errs)
rel = os.path.relpath(day_dir)
print(f"[migrate] {rel}: {mf} files moved, {md} dirs removed")
# 更新索引
idx_updated = update_indexes(total_plans)
print(f"[migrate] 索引更新: {idx_updated} entries")
if all_errors:
print(f"\n[migrate] {len(all_errors)} 个错误:")
for e in all_errors[:20]:
print(f"{e}")
print(f"\n[migrate] 完成: {all_moved_files} files, {all_moved_dirs} dirs removed, {idx_updated} index entries updated")
if __name__ == "__main__":
main()