feat: P1-P3 全栈集成 — 数据库基础 + DWS 扩展 + 小程序鉴权 + 工程化体系

## P1 数据库基础
- zqyy_app: 创建 auth/biz schema、FDW 连接 etl_feiqiu
- etl_feiqiu: 创建 app schema RLS 视图、商品库存预警表
- 清理 assistant_abolish 残留数据

## P2 ETL/DWS 扩展
- 新增 DWS 助教订单贡献度表 (dws.assistant_order_contribution)
- 新增 assistant_order_contribution_task 任务及 RLS 视图
- member_consumption 增加充值字段、assistant_daily 增加处罚字段
- 更新 ODS/DWD/DWS 任务文档及业务规则文档
- 更新 consistency_checker、flow_runner、task_registry 等核心模块

## P3 小程序鉴权系统
- 新增 xcx_auth 路由/schema(微信登录 + JWT)
- 新增 wechat/role/matching/application 服务层
- zqyy_app 鉴权表迁移 + 角色权限种子数据
- auth/dependencies.py 支持小程序 JWT 鉴权

## 文档与审计
- 新增 DOCUMENTATION-MAP 文档导航
- 新增 7 份 BD_Manual 数据库变更文档
- 更新 DDL 基线快照(etl_feiqiu 6 schema + zqyy_app auth)
- 新增全栈集成审计记录、部署检查清单更新
- 新增 BACKLOG 路线图、FDW→Core 迁移计划

## Kiro 工程化
- 新增 5 个 Spec(P1/P2/P3/全栈集成/核心业务)
- 新增审计自动化脚本(agent_on_stop/build_audit_context/compliance_prescan)
- 新增 6 个 Hook(合规检查/会话日志/提交审计等)
- 新增 doc-map steering 文件

## 运维与测试
- 新增 ops 脚本:迁移验证/API 健康检查/ETL 监控/集成报告
- 新增属性测试:test_dws_contribution / test_auth_system
- 清理过期 export 报告文件
- 更新 .gitignore 排除规则
This commit is contained in:
Neo
2026-02-26 08:03:53 +08:00
parent fafc95e64c
commit b25308c3f4
224 changed files with 17660 additions and 32198 deletions

View File

@@ -0,0 +1,246 @@
# -*- coding: utf-8 -*-
"""
修复 ODS_ASSISTANT_LEDGER 误删记录2025-11-21 ~ 2025-11-23
背景:
run_id 89322026-02-24 00:24快照对比时recent endpoint 因数据保留期滚动
丢失了 2025-11-21~2025-11-23 的 67 条记录_mark_missing_as_deleted 将其误标
为 is_delete=1。
修复策略:
1. 调 Former endpoint 拉取 2025-11-01 ~ 2025-11-24 的完整数据
2. 用 ODS 任务的 _insert_records_schema_aware 入库content_hash 去重保证幂等)
3. 对比 ODS 中 is_delete=1 但 Former 返回 is_delete=0 的记录INSERT 修正版本行
4. 完成后提示用户跑 DWD 加载
用法:
cd apps/etl/connectors/feiqiu
python ../../../../scripts/ops/fix_assistant_ledger_misdelete.py [--dry-run]
"""
from __future__ import annotations
import argparse
import json
import sys
from datetime import datetime
from pathlib import Path
from zoneinfo import ZoneInfo
# 加载环境变量
from dotenv import load_dotenv
_ROOT = Path(__file__).resolve().parents[2]
load_dotenv(_ROOT / ".env", override=False)
_FEIQIU_ENV = _ROOT / "apps" / "etl" / "connectors" / "feiqiu" / ".env"
if _FEIQIU_ENV.exists():
load_dotenv(_FEIQIU_ENV, override=False)
# 确保 ETL 模块可导入
sys.path.insert(0, str(_ROOT / "apps" / "etl" / "connectors" / "feiqiu"))
from config.settings import AppConfig
from api.client import APIClient
from database.connection import DatabaseConnection
TZ = ZoneInfo("Asia/Shanghai")
FORMER_ENDPOINT = "/AssistantPerformance/GetFormerOrderAssistantDetails"
TABLE = "ods.assistant_service_records"
STORE_ID = 2790685415443269
WINDOW_START = "2025-11-01 00:00:00"
WINDOW_END = "2025-11-24 00:00:00"
def parse_args():
p = argparse.ArgumentParser(description="修复 ODS_ASSISTANT_LEDGER 误删记录")
p.add_argument("--dry-run", action="store_true", help="仅查询不写入")
return p.parse_args()
def fetch_former_records(api: APIClient) -> list[dict]:
"""调 Former endpoint 拉取指定窗口的全部记录。"""
params = {
"siteId": STORE_ID,
"startTime": WINDOW_START,
"endTime": WINDOW_END,
}
all_records, _ = api.get_paginated(
endpoint=FORMER_ENDPOINT,
params=params,
page_size=200,
data_path=("data",),
list_key="orderAssistantDetails",
)
return all_records
def find_misdeleted_ids(db: DatabaseConnection) -> set[int]:
"""查询 ODS 中被误标 is_delete=1 的记录 ID窗口内最新版本"""
sql = """
SELECT DISTINCT ON (id) id, is_delete, fetched_at
FROM ods.assistant_service_records
WHERE create_time >= %s AND create_time < %s
ORDER BY id, fetched_at DESC NULLS LAST
"""
rows = db.query(sql, (WINDOW_START, WINDOW_END))
return {r["id"] for r in rows if r["is_delete"] == 1}
def get_table_columns(db: DatabaseConnection) -> list[str]:
"""获取 ODS 表的列名列表。"""
sql = """
SELECT column_name
FROM information_schema.columns
WHERE table_schema = 'ods' AND table_name = 'assistant_service_records'
ORDER BY ordinal_position
"""
return [r["column_name"] for r in db.query(sql)]
def insert_correction_rows(
db: DatabaseConnection,
former_records: list[dict],
misdeleted_ids: set[int],
columns: list[str],
dry_run: bool,
) -> int:
"""为误删记录插入修正版本行is_delete=0新 fetched_at
策略:从 Former API 返回的原始数据构造 ODS 行,
content_hash 基于 payload + is_delete=0 计算ON CONFLICT DO NOTHING 保证幂等。
"""
import hashlib
now = datetime.now(TZ)
corrected = 0
for rec in former_records:
rec_id = rec.get("id")
if rec_id is None:
continue
try:
rec_id = int(rec_id)
except (ValueError, TypeError):
continue
if rec_id not in misdeleted_ids:
continue
# 构造 payload JSON
payload_json = json.dumps(rec, ensure_ascii=False, sort_keys=True)
# content_hash = md5(payload_json + "|is_delete=0")
hash_input = payload_json + "|is_delete=0"
content_hash = hashlib.md5(hash_input.encode("utf-8")).hexdigest()
# 从 payload 提取 create_time
raw_ct = rec.get("create_time") or rec.get("createTime") or rec.get("Create_time")
create_time_val = None
if raw_ct:
try:
from dateutil import parser as dtparser
create_time_val = dtparser.parse(str(raw_ct))
except (ValueError, TypeError):
pass
if dry_run:
print(f" [DRY-RUN] 将修正 id={rec_id}, create_time={create_time_val}, content_hash={content_hash}")
corrected += 1
continue
# INSERT 修正行(含 create_time
sql = """
INSERT INTO ods.assistant_service_records
(id, payload, is_delete, content_hash, fetched_at, source_file, create_time)
VALUES (%s, %s, %s, %s, %s, %s, %s)
ON CONFLICT (id, content_hash) DO NOTHING
"""
from psycopg2.extras import Json as PgJson
db.execute(sql, (
rec_id,
PgJson(rec, dumps=lambda v: json.dumps(v, ensure_ascii=False)),
0,
content_hash,
now,
f"fix_misdelete_former_{WINDOW_START[:10]}_{WINDOW_END[:10]}",
create_time_val,
))
corrected += 1
return corrected
def main():
args = parse_args()
config = AppConfig.load()
dsn = config.get("db.dsn")
if not dsn:
raise RuntimeError("db.dsn 未配置")
print(f"=== 修复 ODS_ASSISTANT_LEDGER 误删记录 ===")
print(f"窗口: {WINDOW_START} ~ {WINDOW_END}")
print(f"Former endpoint: {FORMER_ENDPOINT}")
print(f"目标表: {TABLE}")
if args.dry_run:
print("[DRY-RUN 模式]")
print()
# 1. 连接数据库
db = DatabaseConnection(dsn, session={"timezone": "Asia/Shanghai"})
print("数据库连接成功")
# 2. 查询当前误删记录
misdeleted = find_misdeleted_ids(db)
print(f"ODS 中窗口内 is_delete=1 的记录数: {len(misdeleted)}")
if not misdeleted:
print("无需修复,退出")
db.close()
return
# 3. 调 Former endpoint 拉取数据
api = APIClient(
base_url=config.get("api.base_url"),
token=config.get("api.token"),
timeout=config.get("api.timeout", 20),
retry_max=config.get("api.retry_max", 3),
)
print(f"正在调用 Former endpoint...")
former_records = fetch_former_records(api)
print(f"Former endpoint 返回 {len(former_records)} 条记录")
# 4. 匹配Former 返回的记录中,哪些在 ODS 被误标为 is_delete=1
former_ids = set()
for rec in former_records:
rid = rec.get("id")
if rid is not None:
try:
former_ids.add(int(rid))
except (ValueError, TypeError):
pass
recoverable = misdeleted & former_ids
print(f"可修复记录数: {len(recoverable)} (ODS误删={len(misdeleted)}, Former返回={len(former_ids)})")
if not recoverable:
print("Former endpoint 未返回任何误删记录,退出")
db.close()
return
# 5. 获取表结构
columns = get_table_columns(db)
# 6. 插入修正版本行
corrected = insert_correction_rows(db, former_records, recoverable, columns, args.dry_run)
if not args.dry_run:
db.commit()
print(f"\n已插入 {corrected} 条修正版本行is_delete=0")
print("\n下一步:跑 DWD 加载以同步修正数据到 DWD 层")
print(" cd apps/etl/connectors/feiqiu")
print(' python -m cli.main --tasks DWD_LOAD_FROM_ODS --window-start "2025-11-01" --window-end "2025-11-24" --force-window-override')
else:
print(f"\n[DRY-RUN] 将修正 {corrected} 条记录")
db.close()
print("\n完成")
if __name__ == "__main__":
main()