feat: P1-P3 全栈集成 — 数据库基础 + DWS 扩展 + 小程序鉴权 + 工程化体系
## P1 数据库基础 - zqyy_app: 创建 auth/biz schema、FDW 连接 etl_feiqiu - etl_feiqiu: 创建 app schema RLS 视图、商品库存预警表 - 清理 assistant_abolish 残留数据 ## P2 ETL/DWS 扩展 - 新增 DWS 助教订单贡献度表 (dws.assistant_order_contribution) - 新增 assistant_order_contribution_task 任务及 RLS 视图 - member_consumption 增加充值字段、assistant_daily 增加处罚字段 - 更新 ODS/DWD/DWS 任务文档及业务规则文档 - 更新 consistency_checker、flow_runner、task_registry 等核心模块 ## P3 小程序鉴权系统 - 新增 xcx_auth 路由/schema(微信登录 + JWT) - 新增 wechat/role/matching/application 服务层 - zqyy_app 鉴权表迁移 + 角色权限种子数据 - auth/dependencies.py 支持小程序 JWT 鉴权 ## 文档与审计 - 新增 DOCUMENTATION-MAP 文档导航 - 新增 7 份 BD_Manual 数据库变更文档 - 更新 DDL 基线快照(etl_feiqiu 6 schema + zqyy_app auth) - 新增全栈集成审计记录、部署检查清单更新 - 新增 BACKLOG 路线图、FDW→Core 迁移计划 ## Kiro 工程化 - 新增 5 个 Spec(P1/P2/P3/全栈集成/核心业务) - 新增审计自动化脚本(agent_on_stop/build_audit_context/compliance_prescan) - 新增 6 个 Hook(合规检查/会话日志/提交审计等) - 新增 doc-map steering 文件 ## 运维与测试 - 新增 ops 脚本:迁移验证/API 健康检查/ETL 监控/集成报告 - 新增属性测试:test_dws_contribution / test_auth_system - 清理过期 export 报告文件 - 更新 .gitignore 排除规则
This commit is contained in:
246
scripts/ops/fix_assistant_ledger_misdelete.py
Normal file
246
scripts/ops/fix_assistant_ledger_misdelete.py
Normal file
@@ -0,0 +1,246 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
修复 ODS_ASSISTANT_LEDGER 误删记录(2025-11-21 ~ 2025-11-23)。
|
||||
|
||||
背景:
|
||||
run_id 8932(2026-02-24 00:24)快照对比时,recent endpoint 因数据保留期滚动
|
||||
丢失了 2025-11-21~2025-11-23 的 67 条记录,_mark_missing_as_deleted 将其误标
|
||||
为 is_delete=1。
|
||||
|
||||
修复策略:
|
||||
1. 调 Former endpoint 拉取 2025-11-01 ~ 2025-11-24 的完整数据
|
||||
2. 用 ODS 任务的 _insert_records_schema_aware 入库(content_hash 去重保证幂等)
|
||||
3. 对比 ODS 中 is_delete=1 但 Former 返回 is_delete=0 的记录,INSERT 修正版本行
|
||||
4. 完成后提示用户跑 DWD 加载
|
||||
|
||||
用法:
|
||||
cd apps/etl/connectors/feiqiu
|
||||
python ../../../../scripts/ops/fix_assistant_ledger_misdelete.py [--dry-run]
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
# 加载环境变量
|
||||
from dotenv import load_dotenv
|
||||
|
||||
_ROOT = Path(__file__).resolve().parents[2]
|
||||
load_dotenv(_ROOT / ".env", override=False)
|
||||
_FEIQIU_ENV = _ROOT / "apps" / "etl" / "connectors" / "feiqiu" / ".env"
|
||||
if _FEIQIU_ENV.exists():
|
||||
load_dotenv(_FEIQIU_ENV, override=False)
|
||||
|
||||
# 确保 ETL 模块可导入
|
||||
sys.path.insert(0, str(_ROOT / "apps" / "etl" / "connectors" / "feiqiu"))
|
||||
|
||||
from config.settings import AppConfig
|
||||
from api.client import APIClient
|
||||
from database.connection import DatabaseConnection
|
||||
|
||||
TZ = ZoneInfo("Asia/Shanghai")
|
||||
FORMER_ENDPOINT = "/AssistantPerformance/GetFormerOrderAssistantDetails"
|
||||
TABLE = "ods.assistant_service_records"
|
||||
STORE_ID = 2790685415443269
|
||||
WINDOW_START = "2025-11-01 00:00:00"
|
||||
WINDOW_END = "2025-11-24 00:00:00"
|
||||
|
||||
|
||||
def parse_args():
|
||||
p = argparse.ArgumentParser(description="修复 ODS_ASSISTANT_LEDGER 误删记录")
|
||||
p.add_argument("--dry-run", action="store_true", help="仅查询不写入")
|
||||
return p.parse_args()
|
||||
|
||||
|
||||
def fetch_former_records(api: APIClient) -> list[dict]:
|
||||
"""调 Former endpoint 拉取指定窗口的全部记录。"""
|
||||
params = {
|
||||
"siteId": STORE_ID,
|
||||
"startTime": WINDOW_START,
|
||||
"endTime": WINDOW_END,
|
||||
}
|
||||
all_records, _ = api.get_paginated(
|
||||
endpoint=FORMER_ENDPOINT,
|
||||
params=params,
|
||||
page_size=200,
|
||||
data_path=("data",),
|
||||
list_key="orderAssistantDetails",
|
||||
)
|
||||
return all_records
|
||||
|
||||
|
||||
def find_misdeleted_ids(db: DatabaseConnection) -> set[int]:
|
||||
"""查询 ODS 中被误标 is_delete=1 的记录 ID(窗口内最新版本)。"""
|
||||
sql = """
|
||||
SELECT DISTINCT ON (id) id, is_delete, fetched_at
|
||||
FROM ods.assistant_service_records
|
||||
WHERE create_time >= %s AND create_time < %s
|
||||
ORDER BY id, fetched_at DESC NULLS LAST
|
||||
"""
|
||||
rows = db.query(sql, (WINDOW_START, WINDOW_END))
|
||||
return {r["id"] for r in rows if r["is_delete"] == 1}
|
||||
|
||||
|
||||
def get_table_columns(db: DatabaseConnection) -> list[str]:
|
||||
"""获取 ODS 表的列名列表。"""
|
||||
sql = """
|
||||
SELECT column_name
|
||||
FROM information_schema.columns
|
||||
WHERE table_schema = 'ods' AND table_name = 'assistant_service_records'
|
||||
ORDER BY ordinal_position
|
||||
"""
|
||||
return [r["column_name"] for r in db.query(sql)]
|
||||
|
||||
|
||||
def insert_correction_rows(
|
||||
db: DatabaseConnection,
|
||||
former_records: list[dict],
|
||||
misdeleted_ids: set[int],
|
||||
columns: list[str],
|
||||
dry_run: bool,
|
||||
) -> int:
|
||||
"""为误删记录插入修正版本行(is_delete=0,新 fetched_at)。
|
||||
|
||||
策略:从 Former API 返回的原始数据构造 ODS 行,
|
||||
content_hash 基于 payload + is_delete=0 计算,ON CONFLICT DO NOTHING 保证幂等。
|
||||
"""
|
||||
import hashlib
|
||||
|
||||
now = datetime.now(TZ)
|
||||
corrected = 0
|
||||
|
||||
for rec in former_records:
|
||||
rec_id = rec.get("id")
|
||||
if rec_id is None:
|
||||
continue
|
||||
try:
|
||||
rec_id = int(rec_id)
|
||||
except (ValueError, TypeError):
|
||||
continue
|
||||
if rec_id not in misdeleted_ids:
|
||||
continue
|
||||
|
||||
# 构造 payload JSON
|
||||
payload_json = json.dumps(rec, ensure_ascii=False, sort_keys=True)
|
||||
# content_hash = md5(payload_json + "|is_delete=0")
|
||||
hash_input = payload_json + "|is_delete=0"
|
||||
content_hash = hashlib.md5(hash_input.encode("utf-8")).hexdigest()
|
||||
|
||||
# 从 payload 提取 create_time
|
||||
raw_ct = rec.get("create_time") or rec.get("createTime") or rec.get("Create_time")
|
||||
create_time_val = None
|
||||
if raw_ct:
|
||||
try:
|
||||
from dateutil import parser as dtparser
|
||||
create_time_val = dtparser.parse(str(raw_ct))
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
|
||||
if dry_run:
|
||||
print(f" [DRY-RUN] 将修正 id={rec_id}, create_time={create_time_val}, content_hash={content_hash}")
|
||||
corrected += 1
|
||||
continue
|
||||
|
||||
# INSERT 修正行(含 create_time)
|
||||
sql = """
|
||||
INSERT INTO ods.assistant_service_records
|
||||
(id, payload, is_delete, content_hash, fetched_at, source_file, create_time)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s)
|
||||
ON CONFLICT (id, content_hash) DO NOTHING
|
||||
"""
|
||||
from psycopg2.extras import Json as PgJson
|
||||
db.execute(sql, (
|
||||
rec_id,
|
||||
PgJson(rec, dumps=lambda v: json.dumps(v, ensure_ascii=False)),
|
||||
0,
|
||||
content_hash,
|
||||
now,
|
||||
f"fix_misdelete_former_{WINDOW_START[:10]}_{WINDOW_END[:10]}",
|
||||
create_time_val,
|
||||
))
|
||||
corrected += 1
|
||||
|
||||
return corrected
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
config = AppConfig.load()
|
||||
dsn = config.get("db.dsn")
|
||||
if not dsn:
|
||||
raise RuntimeError("db.dsn 未配置")
|
||||
|
||||
print(f"=== 修复 ODS_ASSISTANT_LEDGER 误删记录 ===")
|
||||
print(f"窗口: {WINDOW_START} ~ {WINDOW_END}")
|
||||
print(f"Former endpoint: {FORMER_ENDPOINT}")
|
||||
print(f"目标表: {TABLE}")
|
||||
if args.dry_run:
|
||||
print("[DRY-RUN 模式]")
|
||||
print()
|
||||
|
||||
# 1. 连接数据库
|
||||
db = DatabaseConnection(dsn, session={"timezone": "Asia/Shanghai"})
|
||||
print("数据库连接成功")
|
||||
|
||||
# 2. 查询当前误删记录
|
||||
misdeleted = find_misdeleted_ids(db)
|
||||
print(f"ODS 中窗口内 is_delete=1 的记录数: {len(misdeleted)}")
|
||||
if not misdeleted:
|
||||
print("无需修复,退出")
|
||||
db.close()
|
||||
return
|
||||
|
||||
# 3. 调 Former endpoint 拉取数据
|
||||
api = APIClient(
|
||||
base_url=config.get("api.base_url"),
|
||||
token=config.get("api.token"),
|
||||
timeout=config.get("api.timeout", 20),
|
||||
retry_max=config.get("api.retry_max", 3),
|
||||
)
|
||||
print(f"正在调用 Former endpoint...")
|
||||
former_records = fetch_former_records(api)
|
||||
print(f"Former endpoint 返回 {len(former_records)} 条记录")
|
||||
|
||||
# 4. 匹配:Former 返回的记录中,哪些在 ODS 被误标为 is_delete=1
|
||||
former_ids = set()
|
||||
for rec in former_records:
|
||||
rid = rec.get("id")
|
||||
if rid is not None:
|
||||
try:
|
||||
former_ids.add(int(rid))
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
|
||||
recoverable = misdeleted & former_ids
|
||||
print(f"可修复记录数: {len(recoverable)} (ODS误删={len(misdeleted)}, Former返回={len(former_ids)})")
|
||||
|
||||
if not recoverable:
|
||||
print("Former endpoint 未返回任何误删记录,退出")
|
||||
db.close()
|
||||
return
|
||||
|
||||
# 5. 获取表结构
|
||||
columns = get_table_columns(db)
|
||||
|
||||
# 6. 插入修正版本行
|
||||
corrected = insert_correction_rows(db, former_records, recoverable, columns, args.dry_run)
|
||||
|
||||
if not args.dry_run:
|
||||
db.commit()
|
||||
print(f"\n已插入 {corrected} 条修正版本行(is_delete=0)")
|
||||
print("\n下一步:跑 DWD 加载以同步修正数据到 DWD 层")
|
||||
print(" cd apps/etl/connectors/feiqiu")
|
||||
print(' python -m cli.main --tasks DWD_LOAD_FROM_ODS --window-start "2025-11-01" --window-end "2025-11-24" --force-window-override')
|
||||
else:
|
||||
print(f"\n[DRY-RUN] 将修正 {corrected} 条记录")
|
||||
|
||||
db.close()
|
||||
print("\n完成")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user