微信小程序页面迁移校验之前 P5任务处理之前

This commit is contained in:
Neo
2026-03-09 01:19:21 +08:00
parent 263bf96035
commit 6e20987d2f
1112 changed files with 153824 additions and 219694 deletions

View File

@@ -0,0 +1,189 @@
# -*- coding: utf-8 -*-
"""一次性调研脚本:拉取全部团购详情并写入 ods.group_buy_package_details。
用法cwd = C:\\NeoZQYY/
python apps/etl/connectors/feiqiu/scripts/research_coupon_details.py
流程:
1. 从 ods.group_buy_packages 读取所有 coupon_idid 列)
2. 串行调用 QueryPackageCouponInfo 详情接口RateLimiter 5-20s
3. 提取结构化字段 + 计算 content_hash + 保留原始 payload
4. UPSERT 写入 ods.group_buy_package_details
需求覆盖:附录 B 调研 3、4
"""
from __future__ import annotations
import os
import sys
from pathlib import Path
# ── 环境初始化 ──────────────────────────────────────────────────────────
# 加载根 .env脚本 cwd 为 apps/etl/connectors/feiqiu/
from dotenv import load_dotenv
_SCRIPT_DIR = Path(__file__).resolve().parent # scripts/
_FEIQIU_DIR = _SCRIPT_DIR.parent # apps/etl/connectors/feiqiu/
_REPO_ROOT = _FEIQIU_DIR.parents[3] # → connectors/ → etl/ → apps/ → root
load_dotenv(_REPO_ROOT / ".env")
# 必需环境变量校验
_REQUIRED_ENV = ("FETCH_ROOT", "EXPORT_ROOT", "PG_DSN", "TEST_DB_DSN")
_missing = [k for k in _REQUIRED_ENV if not os.environ.get(k)]
if _missing:
sys.exit(f"ERROR: 缺少必需环境变量: {', '.join(_missing)}")
TEST_DB_DSN = os.environ["TEST_DB_DSN"]
# 确保 feiqiu 目录在 sys.path 中,以便从仓库根目录运行时也能 import 本地模块
if str(_FEIQIU_DIR) not in sys.path:
sys.path.insert(0, str(_FEIQIU_DIR))
# ── 依赖导入 ──────────────────────────────────────────────────────────
from psycopg2.extras import Json # noqa: E402
from config.settings import AppConfig # noqa: E402
from api.client import APIClient # noqa: E402
from api.rate_limiter import RateLimiter # noqa: E402
from database.connection import DatabaseConnection # noqa: E402
# 复用 ods_tasks.py 中的字段提取逻辑
from tasks.ods.ods_tasks import _group_package_detail_process_fn # noqa: E402
def main():
# ── 1. 加载配置 ──────────────────────────────────────────────────
config = AppConfig.load()
print(f"✅ 配置加载完成 (store_id={config.get('app.store_id')})")
# ── 2. 连接测试库 ──────────────────────────────────────────────
db = DatabaseConnection(
dsn=TEST_DB_DSN,
session=config["db"].get("session", {}),
connect_timeout=config["db"].get("connect_timeout_sec"),
)
print(f"✅ 已连接测试库: {TEST_DB_DSN.split('@')[-1]}")
# ── 3. 查询所有 coupon_id ────────────────────────────────────
rows = db.query("SELECT DISTINCT id FROM ods.group_buy_packages ORDER BY id")
coupon_ids = [r["id"] for r in rows]
print(f"📋 共 {len(coupon_ids)} 个 coupon_id 待拉取")
if not coupon_ids:
print("⚠️ 没有找到任何 coupon_id退出")
db.close()
return
# ── 4. 初始化 API 客户端 + 限流器 ────────────────────────────
api = APIClient(
base_url=config["api"]["base_url"],
token=config["api"]["token"],
timeout=config.get("api.timeout_sec", 20),
)
limiter = RateLimiter(min_interval=5.0, max_interval=20.0)
# ── 5. 串行拉取详情 ──────────────────────────────────────────
success_count = 0
fail_count = 0
skip_count = 0
for idx, cid in enumerate(coupon_ids, 1):
print(f"\n[{idx}/{len(coupon_ids)}] coupon_id={cid} ...", end=" ", flush=True)
try:
resp = api.get(
"/PackageCoupon/QueryPackageCouponInfo",
{"couponId": cid},
)
except Exception as e:
print(f"❌ API 错误: {e}")
fail_count += 1
if idx < len(coupon_ids):
limiter.wait()
continue
# 提取字段(复用 _group_package_detail_process_fn
records = _group_package_detail_process_fn(resp)
if not records:
print("⚠️ 响应无有效数据,跳过")
skip_count += 1
if idx < len(coupon_ids):
limiter.wait()
continue
record = records[0]
# ── 6. UPSERT 写入 ──────────────────────────────────────
try:
_upsert_detail(db, record)
db.commit()
success_count += 1
print(f"✅ 写入成功 (hash={record['content_hash'][:8]}...)")
except Exception as e:
db.rollback()
print(f"❌ 写入失败: {e}")
fail_count += 1
# 限流等待(最后一条不等)
if idx < len(coupon_ids):
waited = limiter.wait()
if not waited:
print("⚠️ 等待被中断")
break
# ── 7. 汇总 ──────────────────────────────────────────────────
print("\n" + "=" * 50)
print(f"📊 拉取完成: 成功={success_count}, 失败={fail_count}, 跳过={skip_count}, 总计={len(coupon_ids)}")
print("=" * 50)
db.close()
def _upsert_detail(db: DatabaseConnection, record: dict) -> None:
"""UPSERT 单条详情记录到 ods.group_buy_package_details。
ON CONFLICT (coupon_id) 时更新所有字段。
"""
columns = [
"coupon_id", "package_name", "duration", "start_time", "end_time",
"add_start_clock", "add_end_clock", "is_enabled", "is_delete",
"site_id", "tenant_id", "create_time", "creator_name",
"table_area_ids", "table_area_names", "assistant_services",
"groupon_site_infos", "package_services", "coupon_details_list",
"content_hash", "payload",
]
# JSONB 字段需要用 Json 适配器
_JSONB_COLS = {
"table_area_ids", "table_area_names", "assistant_services",
"groupon_site_infos", "package_services", "coupon_details_list",
"payload",
}
values = []
for col in columns:
val = record.get(col)
if col in _JSONB_COLS and val is not None:
val = Json(val)
values.append(val)
col_list = ", ".join(columns)
placeholders = ", ".join(["%s"] * len(columns))
# 除 coupon_id 外的所有列用于 UPDATE
update_cols = [c for c in columns if c != "coupon_id"]
update_set = ", ".join(f"{c} = EXCLUDED.{c}" for c in update_cols)
sql = (
f"INSERT INTO ods.group_buy_package_details ({col_list}) "
f"VALUES ({placeholders}) "
f"ON CONFLICT (coupon_id) DO UPDATE SET {update_set}, "
f"fetched_at = now()"
)
db.execute(sql, values)
if __name__ == "__main__":
main()