Files
Neo-ZQYY/scripts/ops/fix_assistant_ledger_misdelete.py
Neo b25308c3f4 feat: P1-P3 全栈集成 — 数据库基础 + DWS 扩展 + 小程序鉴权 + 工程化体系
## P1 数据库基础
- zqyy_app: 创建 auth/biz schema、FDW 连接 etl_feiqiu
- etl_feiqiu: 创建 app schema RLS 视图、商品库存预警表
- 清理 assistant_abolish 残留数据

## P2 ETL/DWS 扩展
- 新增 DWS 助教订单贡献度表 (dws.assistant_order_contribution)
- 新增 assistant_order_contribution_task 任务及 RLS 视图
- member_consumption 增加充值字段、assistant_daily 增加处罚字段
- 更新 ODS/DWD/DWS 任务文档及业务规则文档
- 更新 consistency_checker、flow_runner、task_registry 等核心模块

## P3 小程序鉴权系统
- 新增 xcx_auth 路由/schema(微信登录 + JWT)
- 新增 wechat/role/matching/application 服务层
- zqyy_app 鉴权表迁移 + 角色权限种子数据
- auth/dependencies.py 支持小程序 JWT 鉴权

## 文档与审计
- 新增 DOCUMENTATION-MAP 文档导航
- 新增 7 份 BD_Manual 数据库变更文档
- 更新 DDL 基线快照(etl_feiqiu 6 schema + zqyy_app auth)
- 新增全栈集成审计记录、部署检查清单更新
- 新增 BACKLOG 路线图、FDW→Core 迁移计划

## Kiro 工程化
- 新增 5 个 Spec(P1/P2/P3/全栈集成/核心业务)
- 新增审计自动化脚本(agent_on_stop/build_audit_context/compliance_prescan)
- 新增 6 个 Hook(合规检查/会话日志/提交审计等)
- 新增 doc-map steering 文件

## 运维与测试
- 新增 ops 脚本:迁移验证/API 健康检查/ETL 监控/集成报告
- 新增属性测试:test_dws_contribution / test_auth_system
- 清理过期 export 报告文件
- 更新 .gitignore 排除规则
2026-02-26 08:03:53 +08:00

247 lines
7.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
"""
修复 ODS_ASSISTANT_LEDGER 误删记录2025-11-21 ~ 2025-11-23
背景:
run_id 89322026-02-24 00:24快照对比时recent endpoint 因数据保留期滚动
丢失了 2025-11-21~2025-11-23 的 67 条记录_mark_missing_as_deleted 将其误标
为 is_delete=1。
修复策略:
1. 调 Former endpoint 拉取 2025-11-01 ~ 2025-11-24 的完整数据
2. 用 ODS 任务的 _insert_records_schema_aware 入库content_hash 去重保证幂等)
3. 对比 ODS 中 is_delete=1 但 Former 返回 is_delete=0 的记录INSERT 修正版本行
4. 完成后提示用户跑 DWD 加载
用法:
cd apps/etl/connectors/feiqiu
python ../../../../scripts/ops/fix_assistant_ledger_misdelete.py [--dry-run]
"""
from __future__ import annotations
import argparse
import json
import sys
from datetime import datetime
from pathlib import Path
from zoneinfo import ZoneInfo
# 加载环境变量
from dotenv import load_dotenv
_ROOT = Path(__file__).resolve().parents[2]
load_dotenv(_ROOT / ".env", override=False)
_FEIQIU_ENV = _ROOT / "apps" / "etl" / "connectors" / "feiqiu" / ".env"
if _FEIQIU_ENV.exists():
load_dotenv(_FEIQIU_ENV, override=False)
# 确保 ETL 模块可导入
sys.path.insert(0, str(_ROOT / "apps" / "etl" / "connectors" / "feiqiu"))
from config.settings import AppConfig
from api.client import APIClient
from database.connection import DatabaseConnection
TZ = ZoneInfo("Asia/Shanghai")
FORMER_ENDPOINT = "/AssistantPerformance/GetFormerOrderAssistantDetails"
TABLE = "ods.assistant_service_records"
STORE_ID = 2790685415443269
WINDOW_START = "2025-11-01 00:00:00"
WINDOW_END = "2025-11-24 00:00:00"
def parse_args():
p = argparse.ArgumentParser(description="修复 ODS_ASSISTANT_LEDGER 误删记录")
p.add_argument("--dry-run", action="store_true", help="仅查询不写入")
return p.parse_args()
def fetch_former_records(api: APIClient) -> list[dict]:
"""调 Former endpoint 拉取指定窗口的全部记录。"""
params = {
"siteId": STORE_ID,
"startTime": WINDOW_START,
"endTime": WINDOW_END,
}
all_records, _ = api.get_paginated(
endpoint=FORMER_ENDPOINT,
params=params,
page_size=200,
data_path=("data",),
list_key="orderAssistantDetails",
)
return all_records
def find_misdeleted_ids(db: DatabaseConnection) -> set[int]:
"""查询 ODS 中被误标 is_delete=1 的记录 ID窗口内最新版本"""
sql = """
SELECT DISTINCT ON (id) id, is_delete, fetched_at
FROM ods.assistant_service_records
WHERE create_time >= %s AND create_time < %s
ORDER BY id, fetched_at DESC NULLS LAST
"""
rows = db.query(sql, (WINDOW_START, WINDOW_END))
return {r["id"] for r in rows if r["is_delete"] == 1}
def get_table_columns(db: DatabaseConnection) -> list[str]:
"""获取 ODS 表的列名列表。"""
sql = """
SELECT column_name
FROM information_schema.columns
WHERE table_schema = 'ods' AND table_name = 'assistant_service_records'
ORDER BY ordinal_position
"""
return [r["column_name"] for r in db.query(sql)]
def insert_correction_rows(
db: DatabaseConnection,
former_records: list[dict],
misdeleted_ids: set[int],
columns: list[str],
dry_run: bool,
) -> int:
"""为误删记录插入修正版本行is_delete=0新 fetched_at
策略:从 Former API 返回的原始数据构造 ODS 行,
content_hash 基于 payload + is_delete=0 计算ON CONFLICT DO NOTHING 保证幂等。
"""
import hashlib
now = datetime.now(TZ)
corrected = 0
for rec in former_records:
rec_id = rec.get("id")
if rec_id is None:
continue
try:
rec_id = int(rec_id)
except (ValueError, TypeError):
continue
if rec_id not in misdeleted_ids:
continue
# 构造 payload JSON
payload_json = json.dumps(rec, ensure_ascii=False, sort_keys=True)
# content_hash = md5(payload_json + "|is_delete=0")
hash_input = payload_json + "|is_delete=0"
content_hash = hashlib.md5(hash_input.encode("utf-8")).hexdigest()
# 从 payload 提取 create_time
raw_ct = rec.get("create_time") or rec.get("createTime") or rec.get("Create_time")
create_time_val = None
if raw_ct:
try:
from dateutil import parser as dtparser
create_time_val = dtparser.parse(str(raw_ct))
except (ValueError, TypeError):
pass
if dry_run:
print(f" [DRY-RUN] 将修正 id={rec_id}, create_time={create_time_val}, content_hash={content_hash}")
corrected += 1
continue
# INSERT 修正行(含 create_time
sql = """
INSERT INTO ods.assistant_service_records
(id, payload, is_delete, content_hash, fetched_at, source_file, create_time)
VALUES (%s, %s, %s, %s, %s, %s, %s)
ON CONFLICT (id, content_hash) DO NOTHING
"""
from psycopg2.extras import Json as PgJson
db.execute(sql, (
rec_id,
PgJson(rec, dumps=lambda v: json.dumps(v, ensure_ascii=False)),
0,
content_hash,
now,
f"fix_misdelete_former_{WINDOW_START[:10]}_{WINDOW_END[:10]}",
create_time_val,
))
corrected += 1
return corrected
def main():
args = parse_args()
config = AppConfig.load()
dsn = config.get("db.dsn")
if not dsn:
raise RuntimeError("db.dsn 未配置")
print(f"=== 修复 ODS_ASSISTANT_LEDGER 误删记录 ===")
print(f"窗口: {WINDOW_START} ~ {WINDOW_END}")
print(f"Former endpoint: {FORMER_ENDPOINT}")
print(f"目标表: {TABLE}")
if args.dry_run:
print("[DRY-RUN 模式]")
print()
# 1. 连接数据库
db = DatabaseConnection(dsn, session={"timezone": "Asia/Shanghai"})
print("数据库连接成功")
# 2. 查询当前误删记录
misdeleted = find_misdeleted_ids(db)
print(f"ODS 中窗口内 is_delete=1 的记录数: {len(misdeleted)}")
if not misdeleted:
print("无需修复,退出")
db.close()
return
# 3. 调 Former endpoint 拉取数据
api = APIClient(
base_url=config.get("api.base_url"),
token=config.get("api.token"),
timeout=config.get("api.timeout", 20),
retry_max=config.get("api.retry_max", 3),
)
print(f"正在调用 Former endpoint...")
former_records = fetch_former_records(api)
print(f"Former endpoint 返回 {len(former_records)} 条记录")
# 4. 匹配Former 返回的记录中,哪些在 ODS 被误标为 is_delete=1
former_ids = set()
for rec in former_records:
rid = rec.get("id")
if rid is not None:
try:
former_ids.add(int(rid))
except (ValueError, TypeError):
pass
recoverable = misdeleted & former_ids
print(f"可修复记录数: {len(recoverable)} (ODS误删={len(misdeleted)}, Former返回={len(former_ids)})")
if not recoverable:
print("Former endpoint 未返回任何误删记录,退出")
db.close()
return
# 5. 获取表结构
columns = get_table_columns(db)
# 6. 插入修正版本行
corrected = insert_correction_rows(db, former_records, recoverable, columns, args.dry_run)
if not args.dry_run:
db.commit()
print(f"\n已插入 {corrected} 条修正版本行is_delete=0")
print("\n下一步:跑 DWD 加载以同步修正数据到 DWD 层")
print(" cd apps/etl/connectors/feiqiu")
print(' python -m cli.main --tasks DWD_LOAD_FROM_ODS --window-start "2025-11-01" --window-end "2025-11-24" --force-window-override')
else:
print(f"\n[DRY-RUN] 将修正 {corrected} 条记录")
db.close()
print("\n完成")
if __name__ == "__main__":
main()