微信小程序页面迁移校验之前 P5任务处理之前
This commit is contained in:
189
apps/etl/connectors/feiqiu/scripts/research_coupon_details.py
Normal file
189
apps/etl/connectors/feiqiu/scripts/research_coupon_details.py
Normal file
@@ -0,0 +1,189 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""一次性调研脚本:拉取全部团购详情并写入 ods.group_buy_package_details。
|
||||
|
||||
用法(cwd = C:\\NeoZQYY/):
|
||||
python apps/etl/connectors/feiqiu/scripts/research_coupon_details.py
|
||||
|
||||
流程:
|
||||
1. 从 ods.group_buy_packages 读取所有 coupon_id(id 列)
|
||||
2. 串行调用 QueryPackageCouponInfo 详情接口(RateLimiter 5-20s)
|
||||
3. 提取结构化字段 + 计算 content_hash + 保留原始 payload
|
||||
4. UPSERT 写入 ods.group_buy_package_details
|
||||
|
||||
需求覆盖:附录 B 调研 3、4
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# ── 环境初始化 ──────────────────────────────────────────────────────────
|
||||
# 加载根 .env(脚本 cwd 为 apps/etl/connectors/feiqiu/)
|
||||
from dotenv import load_dotenv
|
||||
|
||||
_SCRIPT_DIR = Path(__file__).resolve().parent # scripts/
|
||||
_FEIQIU_DIR = _SCRIPT_DIR.parent # apps/etl/connectors/feiqiu/
|
||||
_REPO_ROOT = _FEIQIU_DIR.parents[3] # → connectors/ → etl/ → apps/ → root
|
||||
|
||||
load_dotenv(_REPO_ROOT / ".env")
|
||||
|
||||
# 必需环境变量校验
|
||||
_REQUIRED_ENV = ("FETCH_ROOT", "EXPORT_ROOT", "PG_DSN", "TEST_DB_DSN")
|
||||
_missing = [k for k in _REQUIRED_ENV if not os.environ.get(k)]
|
||||
if _missing:
|
||||
sys.exit(f"ERROR: 缺少必需环境变量: {', '.join(_missing)}")
|
||||
|
||||
TEST_DB_DSN = os.environ["TEST_DB_DSN"]
|
||||
|
||||
# 确保 feiqiu 目录在 sys.path 中,以便从仓库根目录运行时也能 import 本地模块
|
||||
if str(_FEIQIU_DIR) not in sys.path:
|
||||
sys.path.insert(0, str(_FEIQIU_DIR))
|
||||
|
||||
# ── 依赖导入 ──────────────────────────────────────────────────────────
|
||||
from psycopg2.extras import Json # noqa: E402
|
||||
|
||||
from config.settings import AppConfig # noqa: E402
|
||||
from api.client import APIClient # noqa: E402
|
||||
from api.rate_limiter import RateLimiter # noqa: E402
|
||||
from database.connection import DatabaseConnection # noqa: E402
|
||||
|
||||
# 复用 ods_tasks.py 中的字段提取逻辑
|
||||
from tasks.ods.ods_tasks import _group_package_detail_process_fn # noqa: E402
|
||||
|
||||
|
||||
def main():
|
||||
# ── 1. 加载配置 ──────────────────────────────────────────────────
|
||||
config = AppConfig.load()
|
||||
print(f"✅ 配置加载完成 (store_id={config.get('app.store_id')})")
|
||||
|
||||
# ── 2. 连接测试库 ──────────────────────────────────────────────
|
||||
db = DatabaseConnection(
|
||||
dsn=TEST_DB_DSN,
|
||||
session=config["db"].get("session", {}),
|
||||
connect_timeout=config["db"].get("connect_timeout_sec"),
|
||||
)
|
||||
print(f"✅ 已连接测试库: {TEST_DB_DSN.split('@')[-1]}")
|
||||
|
||||
# ── 3. 查询所有 coupon_id ────────────────────────────────────
|
||||
rows = db.query("SELECT DISTINCT id FROM ods.group_buy_packages ORDER BY id")
|
||||
coupon_ids = [r["id"] for r in rows]
|
||||
print(f"📋 共 {len(coupon_ids)} 个 coupon_id 待拉取")
|
||||
|
||||
if not coupon_ids:
|
||||
print("⚠️ 没有找到任何 coupon_id,退出")
|
||||
db.close()
|
||||
return
|
||||
|
||||
# ── 4. 初始化 API 客户端 + 限流器 ────────────────────────────
|
||||
api = APIClient(
|
||||
base_url=config["api"]["base_url"],
|
||||
token=config["api"]["token"],
|
||||
timeout=config.get("api.timeout_sec", 20),
|
||||
)
|
||||
limiter = RateLimiter(min_interval=5.0, max_interval=20.0)
|
||||
|
||||
# ── 5. 串行拉取详情 ──────────────────────────────────────────
|
||||
success_count = 0
|
||||
fail_count = 0
|
||||
skip_count = 0
|
||||
|
||||
for idx, cid in enumerate(coupon_ids, 1):
|
||||
print(f"\n[{idx}/{len(coupon_ids)}] coupon_id={cid} ...", end=" ", flush=True)
|
||||
|
||||
try:
|
||||
resp = api.get(
|
||||
"/PackageCoupon/QueryPackageCouponInfo",
|
||||
{"couponId": cid},
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"❌ API 错误: {e}")
|
||||
fail_count += 1
|
||||
if idx < len(coupon_ids):
|
||||
limiter.wait()
|
||||
continue
|
||||
|
||||
# 提取字段(复用 _group_package_detail_process_fn)
|
||||
records = _group_package_detail_process_fn(resp)
|
||||
if not records:
|
||||
print("⚠️ 响应无有效数据,跳过")
|
||||
skip_count += 1
|
||||
if idx < len(coupon_ids):
|
||||
limiter.wait()
|
||||
continue
|
||||
|
||||
record = records[0]
|
||||
|
||||
# ── 6. UPSERT 写入 ──────────────────────────────────────
|
||||
try:
|
||||
_upsert_detail(db, record)
|
||||
db.commit()
|
||||
success_count += 1
|
||||
print(f"✅ 写入成功 (hash={record['content_hash'][:8]}...)")
|
||||
except Exception as e:
|
||||
db.rollback()
|
||||
print(f"❌ 写入失败: {e}")
|
||||
fail_count += 1
|
||||
|
||||
# 限流等待(最后一条不等)
|
||||
if idx < len(coupon_ids):
|
||||
waited = limiter.wait()
|
||||
if not waited:
|
||||
print("⚠️ 等待被中断")
|
||||
break
|
||||
|
||||
# ── 7. 汇总 ──────────────────────────────────────────────────
|
||||
print("\n" + "=" * 50)
|
||||
print(f"📊 拉取完成: 成功={success_count}, 失败={fail_count}, 跳过={skip_count}, 总计={len(coupon_ids)}")
|
||||
print("=" * 50)
|
||||
|
||||
db.close()
|
||||
|
||||
|
||||
def _upsert_detail(db: DatabaseConnection, record: dict) -> None:
|
||||
"""UPSERT 单条详情记录到 ods.group_buy_package_details。
|
||||
|
||||
ON CONFLICT (coupon_id) 时更新所有字段。
|
||||
"""
|
||||
columns = [
|
||||
"coupon_id", "package_name", "duration", "start_time", "end_time",
|
||||
"add_start_clock", "add_end_clock", "is_enabled", "is_delete",
|
||||
"site_id", "tenant_id", "create_time", "creator_name",
|
||||
"table_area_ids", "table_area_names", "assistant_services",
|
||||
"groupon_site_infos", "package_services", "coupon_details_list",
|
||||
"content_hash", "payload",
|
||||
]
|
||||
|
||||
# JSONB 字段需要用 Json 适配器
|
||||
_JSONB_COLS = {
|
||||
"table_area_ids", "table_area_names", "assistant_services",
|
||||
"groupon_site_infos", "package_services", "coupon_details_list",
|
||||
"payload",
|
||||
}
|
||||
|
||||
values = []
|
||||
for col in columns:
|
||||
val = record.get(col)
|
||||
if col in _JSONB_COLS and val is not None:
|
||||
val = Json(val)
|
||||
values.append(val)
|
||||
|
||||
col_list = ", ".join(columns)
|
||||
placeholders = ", ".join(["%s"] * len(columns))
|
||||
|
||||
# 除 coupon_id 外的所有列用于 UPDATE
|
||||
update_cols = [c for c in columns if c != "coupon_id"]
|
||||
update_set = ", ".join(f"{c} = EXCLUDED.{c}" for c in update_cols)
|
||||
|
||||
sql = (
|
||||
f"INSERT INTO ods.group_buy_package_details ({col_list}) "
|
||||
f"VALUES ({placeholders}) "
|
||||
f"ON CONFLICT (coupon_id) DO UPDATE SET {update_set}, "
|
||||
f"fetched_at = now()"
|
||||
)
|
||||
|
||||
db.execute(sql, values)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user