微信小程序页面迁移校验之前 P5任务处理之前
This commit is contained in:
@@ -14,6 +14,9 @@ import os
|
||||
import re
|
||||
import sys
|
||||
|
||||
from neozqyy_shared.repo_root import ensure_repo_root
|
||||
ensure_repo_root()
|
||||
|
||||
SAMPLES_DIR = os.path.join("docs", "api-reference", "samples")
|
||||
DOCS_DIR = os.path.join("docs", "api-reference")
|
||||
|
||||
|
||||
@@ -11,6 +11,9 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
from dotenv import load_dotenv
|
||||
import psycopg2
|
||||
|
||||
from neozqyy_shared.repo_root import ensure_repo_root
|
||||
ensure_repo_root()
|
||||
|
||||
load_dotenv()
|
||||
|
||||
PG_DSN = os.getenv("PG_DSN")
|
||||
|
||||
@@ -23,6 +23,9 @@ from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from neozqyy_shared.repo_root import ensure_repo_root
|
||||
ensure_repo_root()
|
||||
|
||||
|
||||
class DiffKind(str, Enum):
|
||||
"""差异分类枚举。"""
|
||||
|
||||
@@ -13,6 +13,9 @@ from pathlib import Path
|
||||
from dotenv import load_dotenv
|
||||
import psycopg2
|
||||
|
||||
from neozqyy_shared.repo_root import ensure_repo_root
|
||||
ensure_repo_root()
|
||||
|
||||
load_dotenv()
|
||||
|
||||
SUMMARY_DIR = Path("docs/api-reference/summary")
|
||||
|
||||
@@ -414,6 +414,7 @@ def _check_ods_vs_dwd(
|
||||
# ══════════════════════════════════════════════════════════════
|
||||
|
||||
# 已知的 DWS→DWD 聚合关系映射
|
||||
# 营业日口径:使用 dws.biz_date() 替代 ::date 自然日转换
|
||||
_DWS_DWD_MAP: dict[str, dict] = {
|
||||
"dws.dws_assistant_daily_detail": {
|
||||
"dwd_source": "dwd.dwd_assistant_service_log",
|
||||
@@ -425,28 +426,28 @@ _DWS_DWD_MAP: dict[str, dict] = {
|
||||
"dwd_source": "dwd.dwd_settlement_head",
|
||||
"dws_date_col": "stat_date",
|
||||
"dwd_date_col": "pay_time",
|
||||
"dwd_date_cast": "::date",
|
||||
"dwd_date_cast": "dws.biz_date(%col%)",
|
||||
"description": "财务日度汇总 vs DWD 结账记录",
|
||||
},
|
||||
"dws.dws_member_visit_detail": {
|
||||
"dwd_source": "dwd.dwd_settlement_head",
|
||||
"dws_date_col": "visit_date",
|
||||
"dwd_date_col": "pay_time",
|
||||
"dwd_date_cast": "::date",
|
||||
"dwd_date_cast": "dws.biz_date(%col%)",
|
||||
"description": "会员到店明细 vs DWD 结账记录",
|
||||
},
|
||||
"dws.dws_member_consumption_summary": {
|
||||
"dwd_source": "dwd.dwd_settlement_head",
|
||||
"dws_date_col": "stat_month",
|
||||
"dwd_date_col": "pay_time",
|
||||
"dwd_date_cast": "date_trunc('month', %col%)::date",
|
||||
"dwd_date_cast": "date_trunc('month', dws.biz_date(%col%))::date",
|
||||
"description": "会员消费汇总 vs DWD 结账记录",
|
||||
},
|
||||
"dws.dws_finance_recharge_summary": {
|
||||
"dwd_source": "dwd.dwd_recharge_order",
|
||||
"dws_date_col": "stat_date",
|
||||
"dwd_date_col": "pay_time",
|
||||
"dwd_date_cast": "::date",
|
||||
"dwd_date_cast": "dws.biz_date(%col%)",
|
||||
"description": "充值汇总 vs DWD 充值订单",
|
||||
},
|
||||
}
|
||||
|
||||
@@ -20,6 +20,9 @@ from datetime import datetime
|
||||
|
||||
import requests
|
||||
|
||||
from neozqyy_shared.repo_root import ensure_repo_root
|
||||
ensure_repo_root()
|
||||
|
||||
# ── 配置 ──────────────────────────────────────────────────────────────────
|
||||
API_BASE = "https://pc.ficoo.vip/apiprod/admin/v1/"
|
||||
API_TOKEN = os.environ.get("API_TOKEN", "")
|
||||
|
||||
@@ -12,6 +12,9 @@ import re
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
|
||||
from neozqyy_shared.repo_root import ensure_repo_root
|
||||
ensure_repo_root()
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 常量
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -68,27 +68,6 @@
|
||||
"payload",
|
||||
"content_hash"
|
||||
],
|
||||
"assistant_cancellation_records": [
|
||||
"id",
|
||||
"siteid",
|
||||
"siteprofile",
|
||||
"assistantname",
|
||||
"assistantabolishamount",
|
||||
"assistanton",
|
||||
"pdchargeminutes",
|
||||
"tableareaid",
|
||||
"tablearea",
|
||||
"tableid",
|
||||
"tablename",
|
||||
"trashreason",
|
||||
"createtime",
|
||||
"source_file",
|
||||
"source_endpoint",
|
||||
"fetched_at",
|
||||
"payload",
|
||||
"content_hash",
|
||||
"tenant_id"
|
||||
],
|
||||
"assistant_service_records": [
|
||||
"id",
|
||||
"tenant_id",
|
||||
|
||||
@@ -15,6 +15,9 @@ import sys
|
||||
import time
|
||||
import requests
|
||||
|
||||
from neozqyy_shared.repo_root import ensure_repo_root
|
||||
ensure_repo_root()
|
||||
|
||||
# ── 配置 ──────────────────────────────────────────────────────────────────
|
||||
API_BASE = "https://pc.ficoo.vip/apiprod/admin/v1/"
|
||||
API_TOKEN = os.environ.get("API_TOKEN", "")
|
||||
@@ -58,7 +61,6 @@ CROSS_REF_HEADERS = {"字段名", "类型", "示例值", "说明", "field", "exa
|
||||
ACTUAL_LIST_KEY = {
|
||||
"assistant_accounts_master": "assistantInfos",
|
||||
"assistant_service_records": "orderAssistantDetails",
|
||||
"assistant_cancellation_records": "abolitionAssistants",
|
||||
"table_fee_transactions": "siteTableUseDetailsList",
|
||||
"table_fee_discount_records": "taiFeeAdjustInfos",
|
||||
"tenant_goods_master": "tenantGoodsList",
|
||||
|
||||
189
apps/etl/connectors/feiqiu/scripts/research_coupon_details.py
Normal file
189
apps/etl/connectors/feiqiu/scripts/research_coupon_details.py
Normal file
@@ -0,0 +1,189 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""一次性调研脚本:拉取全部团购详情并写入 ods.group_buy_package_details。
|
||||
|
||||
用法(cwd = C:\\NeoZQYY/):
|
||||
python apps/etl/connectors/feiqiu/scripts/research_coupon_details.py
|
||||
|
||||
流程:
|
||||
1. 从 ods.group_buy_packages 读取所有 coupon_id(id 列)
|
||||
2. 串行调用 QueryPackageCouponInfo 详情接口(RateLimiter 5-20s)
|
||||
3. 提取结构化字段 + 计算 content_hash + 保留原始 payload
|
||||
4. UPSERT 写入 ods.group_buy_package_details
|
||||
|
||||
需求覆盖:附录 B 调研 3、4
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# ── 环境初始化 ──────────────────────────────────────────────────────────
|
||||
# 加载根 .env(脚本 cwd 为 apps/etl/connectors/feiqiu/)
|
||||
from dotenv import load_dotenv
|
||||
|
||||
_SCRIPT_DIR = Path(__file__).resolve().parent # scripts/
|
||||
_FEIQIU_DIR = _SCRIPT_DIR.parent # apps/etl/connectors/feiqiu/
|
||||
_REPO_ROOT = _FEIQIU_DIR.parents[3] # → connectors/ → etl/ → apps/ → root
|
||||
|
||||
load_dotenv(_REPO_ROOT / ".env")
|
||||
|
||||
# 必需环境变量校验
|
||||
_REQUIRED_ENV = ("FETCH_ROOT", "EXPORT_ROOT", "PG_DSN", "TEST_DB_DSN")
|
||||
_missing = [k for k in _REQUIRED_ENV if not os.environ.get(k)]
|
||||
if _missing:
|
||||
sys.exit(f"ERROR: 缺少必需环境变量: {', '.join(_missing)}")
|
||||
|
||||
TEST_DB_DSN = os.environ["TEST_DB_DSN"]
|
||||
|
||||
# 确保 feiqiu 目录在 sys.path 中,以便从仓库根目录运行时也能 import 本地模块
|
||||
if str(_FEIQIU_DIR) not in sys.path:
|
||||
sys.path.insert(0, str(_FEIQIU_DIR))
|
||||
|
||||
# ── 依赖导入 ──────────────────────────────────────────────────────────
|
||||
from psycopg2.extras import Json # noqa: E402
|
||||
|
||||
from config.settings import AppConfig # noqa: E402
|
||||
from api.client import APIClient # noqa: E402
|
||||
from api.rate_limiter import RateLimiter # noqa: E402
|
||||
from database.connection import DatabaseConnection # noqa: E402
|
||||
|
||||
# 复用 ods_tasks.py 中的字段提取逻辑
|
||||
from tasks.ods.ods_tasks import _group_package_detail_process_fn # noqa: E402
|
||||
|
||||
|
||||
def main():
|
||||
# ── 1. 加载配置 ──────────────────────────────────────────────────
|
||||
config = AppConfig.load()
|
||||
print(f"✅ 配置加载完成 (store_id={config.get('app.store_id')})")
|
||||
|
||||
# ── 2. 连接测试库 ──────────────────────────────────────────────
|
||||
db = DatabaseConnection(
|
||||
dsn=TEST_DB_DSN,
|
||||
session=config["db"].get("session", {}),
|
||||
connect_timeout=config["db"].get("connect_timeout_sec"),
|
||||
)
|
||||
print(f"✅ 已连接测试库: {TEST_DB_DSN.split('@')[-1]}")
|
||||
|
||||
# ── 3. 查询所有 coupon_id ────────────────────────────────────
|
||||
rows = db.query("SELECT DISTINCT id FROM ods.group_buy_packages ORDER BY id")
|
||||
coupon_ids = [r["id"] for r in rows]
|
||||
print(f"📋 共 {len(coupon_ids)} 个 coupon_id 待拉取")
|
||||
|
||||
if not coupon_ids:
|
||||
print("⚠️ 没有找到任何 coupon_id,退出")
|
||||
db.close()
|
||||
return
|
||||
|
||||
# ── 4. 初始化 API 客户端 + 限流器 ────────────────────────────
|
||||
api = APIClient(
|
||||
base_url=config["api"]["base_url"],
|
||||
token=config["api"]["token"],
|
||||
timeout=config.get("api.timeout_sec", 20),
|
||||
)
|
||||
limiter = RateLimiter(min_interval=5.0, max_interval=20.0)
|
||||
|
||||
# ── 5. 串行拉取详情 ──────────────────────────────────────────
|
||||
success_count = 0
|
||||
fail_count = 0
|
||||
skip_count = 0
|
||||
|
||||
for idx, cid in enumerate(coupon_ids, 1):
|
||||
print(f"\n[{idx}/{len(coupon_ids)}] coupon_id={cid} ...", end=" ", flush=True)
|
||||
|
||||
try:
|
||||
resp = api.get(
|
||||
"/PackageCoupon/QueryPackageCouponInfo",
|
||||
{"couponId": cid},
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"❌ API 错误: {e}")
|
||||
fail_count += 1
|
||||
if idx < len(coupon_ids):
|
||||
limiter.wait()
|
||||
continue
|
||||
|
||||
# 提取字段(复用 _group_package_detail_process_fn)
|
||||
records = _group_package_detail_process_fn(resp)
|
||||
if not records:
|
||||
print("⚠️ 响应无有效数据,跳过")
|
||||
skip_count += 1
|
||||
if idx < len(coupon_ids):
|
||||
limiter.wait()
|
||||
continue
|
||||
|
||||
record = records[0]
|
||||
|
||||
# ── 6. UPSERT 写入 ──────────────────────────────────────
|
||||
try:
|
||||
_upsert_detail(db, record)
|
||||
db.commit()
|
||||
success_count += 1
|
||||
print(f"✅ 写入成功 (hash={record['content_hash'][:8]}...)")
|
||||
except Exception as e:
|
||||
db.rollback()
|
||||
print(f"❌ 写入失败: {e}")
|
||||
fail_count += 1
|
||||
|
||||
# 限流等待(最后一条不等)
|
||||
if idx < len(coupon_ids):
|
||||
waited = limiter.wait()
|
||||
if not waited:
|
||||
print("⚠️ 等待被中断")
|
||||
break
|
||||
|
||||
# ── 7. 汇总 ──────────────────────────────────────────────────
|
||||
print("\n" + "=" * 50)
|
||||
print(f"📊 拉取完成: 成功={success_count}, 失败={fail_count}, 跳过={skip_count}, 总计={len(coupon_ids)}")
|
||||
print("=" * 50)
|
||||
|
||||
db.close()
|
||||
|
||||
|
||||
def _upsert_detail(db: DatabaseConnection, record: dict) -> None:
|
||||
"""UPSERT 单条详情记录到 ods.group_buy_package_details。
|
||||
|
||||
ON CONFLICT (coupon_id) 时更新所有字段。
|
||||
"""
|
||||
columns = [
|
||||
"coupon_id", "package_name", "duration", "start_time", "end_time",
|
||||
"add_start_clock", "add_end_clock", "is_enabled", "is_delete",
|
||||
"site_id", "tenant_id", "create_time", "creator_name",
|
||||
"table_area_ids", "table_area_names", "assistant_services",
|
||||
"groupon_site_infos", "package_services", "coupon_details_list",
|
||||
"content_hash", "payload",
|
||||
]
|
||||
|
||||
# JSONB 字段需要用 Json 适配器
|
||||
_JSONB_COLS = {
|
||||
"table_area_ids", "table_area_names", "assistant_services",
|
||||
"groupon_site_infos", "package_services", "coupon_details_list",
|
||||
"payload",
|
||||
}
|
||||
|
||||
values = []
|
||||
for col in columns:
|
||||
val = record.get(col)
|
||||
if col in _JSONB_COLS and val is not None:
|
||||
val = Json(val)
|
||||
values.append(val)
|
||||
|
||||
col_list = ", ".join(columns)
|
||||
placeholders = ", ".join(["%s"] * len(columns))
|
||||
|
||||
# 除 coupon_id 外的所有列用于 UPDATE
|
||||
update_cols = [c for c in columns if c != "coupon_id"]
|
||||
update_set = ", ".join(f"{c} = EXCLUDED.{c}" for c in update_cols)
|
||||
|
||||
sql = (
|
||||
f"INSERT INTO ods.group_buy_package_details ({col_list}) "
|
||||
f"VALUES ({placeholders}) "
|
||||
f"ON CONFLICT (coupon_id) DO UPDATE SET {update_set}, "
|
||||
f"fetched_at = now()"
|
||||
)
|
||||
|
||||
db.execute(sql, values)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -6,6 +6,9 @@ ODS 列数据来自 information_schema.columns WHERE table_schema = 'ods'。
|
||||
import json
|
||||
import os
|
||||
|
||||
from neozqyy_shared.repo_root import ensure_repo_root
|
||||
ensure_repo_root()
|
||||
|
||||
SAMPLES_DIR = os.path.join(os.path.dirname(__file__), "..", "docs", "api-reference", "samples")
|
||||
REPORT_DIR = os.environ.get("ETL_REPORT_ROOT")
|
||||
if not REPORT_DIR:
|
||||
@@ -16,7 +19,7 @@ NESTED_OBJECTS = {"siteprofile", "tableprofile"}
|
||||
# 22 张需要比对的表
|
||||
TABLES = [
|
||||
"assistant_accounts_master", "settlement_records", "assistant_service_records",
|
||||
"assistant_cancellation_records", "table_fee_transactions", "table_fee_discount_records",
|
||||
"table_fee_transactions", "table_fee_discount_records",
|
||||
"payment_transactions", "refund_transactions", "platform_coupon_redemption_records",
|
||||
"tenant_goods_master", "store_goods_sales_records", "store_goods_master",
|
||||
"stock_goods_category_tree", "goods_stock_movements", "member_profiles",
|
||||
|
||||
@@ -31,7 +31,7 @@ ODS_META = {"source_file", "source_endpoint", "fetched_at", "payload", "content_
|
||||
|
||||
TABLES = [
|
||||
"assistant_accounts_master", "settlement_records", "assistant_service_records",
|
||||
"assistant_cancellation_records", "table_fee_transactions", "table_fee_discount_records",
|
||||
"table_fee_transactions", "table_fee_discount_records",
|
||||
"payment_transactions", "refund_transactions", "platform_coupon_redemption_records",
|
||||
"tenant_goods_master", "store_goods_sales_records", "store_goods_master",
|
||||
"stock_goods_category_tree", "goods_stock_movements", "member_profiles",
|
||||
@@ -195,7 +195,7 @@ def classify_ods_only(table_name: str, field: str) -> str:
|
||||
return "ODS 后续版本新增字段(当前使用中的台桌关联订单 ID)"
|
||||
# tenant_id 在某些表中是 ODS 额外添加的
|
||||
if field == "tenant_id" and table_name in (
|
||||
"assistant_cancellation_records", "payment_transactions"
|
||||
"payment_transactions",
|
||||
):
|
||||
return "ODS 额外添加的租户 ID 字段(API 响应中不含,ETL 入库时补充)"
|
||||
# API 后续版本新增字段(文档快照未覆盖)
|
||||
|
||||
@@ -14,7 +14,7 @@ import multiprocessing as mp
|
||||
import subprocess
|
||||
import sys
|
||||
import time as time_mod
|
||||
from datetime import date, datetime, time, timedelta
|
||||
from datetime import date, datetime, timedelta
|
||||
from pathlib import Path
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
@@ -27,6 +27,7 @@ from tasks.utility.check_cutoff_task import CheckCutoffTask
|
||||
from tasks.dwd.dwd_load_task import DwdLoadTask
|
||||
from tasks.ods.ods_tasks import ENABLED_ODS_CODES
|
||||
from utils.logging_utils import build_log_path, configure_logging
|
||||
from neozqyy_shared.datetime_utils import business_date, business_day_range, now_shanghai
|
||||
|
||||
STEP_TIMEOUT_SEC = 120
|
||||
|
||||
@@ -53,6 +54,7 @@ def _compute_dws_window(
|
||||
if dws_start and dws_end and dws_end < dws_start:
|
||||
raise ValueError("dws_end must be >= dws_start")
|
||||
|
||||
cutoff = int(cfg.get("app.business_day_start_hour", 8))
|
||||
store_id = int(cfg.get("app.store_id"))
|
||||
dsn = cfg["db"]["dsn"]
|
||||
session = cfg["db"].get("session")
|
||||
@@ -67,19 +69,22 @@ def _compute_dws_window(
|
||||
if isinstance(mx, date):
|
||||
dws_start = mx - timedelta(days=max(0, int(rebuild_days)))
|
||||
else:
|
||||
dws_start = (datetime.now(tz).date()) - timedelta(days=max(1, int(bootstrap_days)))
|
||||
# 营业日口径:用 business_date 计算"今天"
|
||||
dws_start = business_date(now_shanghai(), cutoff) - timedelta(days=max(1, int(bootstrap_days)))
|
||||
|
||||
if dws_end is None:
|
||||
dws_end = datetime.now(tz).date()
|
||||
dws_end = business_date(now_shanghai(), cutoff)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
start_dt = datetime.combine(dws_start, time.min).replace(tzinfo=tz)
|
||||
# end_dt 取到当天 23:59:59,避免只跑到“当前时刻”的 date() 导致少一天
|
||||
end_dt = datetime.combine(dws_end, time.max).replace(tzinfo=tz)
|
||||
# 营业日口径:窗口边界按 cutoff 小时对齐
|
||||
start_dt = business_day_range(dws_start, cutoff)[0]
|
||||
# end_dt 取到营业日结束(即 dws_end 次日 cutoff 前一秒),覆盖完整营业日
|
||||
end_dt = business_day_range(dws_end, cutoff)[1] - timedelta(seconds=1)
|
||||
return start_dt, end_dt
|
||||
|
||||
|
||||
|
||||
def _run_check_cutoff(cfg: AppConfig, logger: logging.Logger):
|
||||
dsn = cfg["db"]["dsn"]
|
||||
session = cfg["db"].get("session")
|
||||
@@ -99,21 +104,21 @@ def _run_check_cutoff(cfg: AppConfig, logger: logging.Logger):
|
||||
|
||||
|
||||
def _iter_daily_windows(window_start: datetime, window_end: datetime) -> list[tuple[datetime, datetime]]:
|
||||
"""按营业日拆分时间窗口。
|
||||
|
||||
window_start/window_end 已按 cutoff 小时对齐(由 _compute_dws_window 保证)。
|
||||
"""
|
||||
if window_start > window_end:
|
||||
return []
|
||||
tz = window_start.tzinfo
|
||||
windows: list[tuple[datetime, datetime]] = []
|
||||
cur = window_start
|
||||
while cur <= window_end:
|
||||
day_start = datetime.combine(cur.date(), time.min).replace(tzinfo=tz)
|
||||
day_end = datetime.combine(cur.date(), time.max).replace(tzinfo=tz)
|
||||
if day_start < window_start:
|
||||
day_start = window_start
|
||||
if day_end > window_end:
|
||||
day_end = window_end
|
||||
windows.append((day_start, day_end))
|
||||
next_day = cur.date() + timedelta(days=1)
|
||||
cur = datetime.combine(next_day, time.min).replace(tzinfo=tz)
|
||||
# 从 window_start 开始,每次推进 24 小时(一个营业日)
|
||||
cur_start = window_start
|
||||
while cur_start <= window_end:
|
||||
cur_end = cur_start + timedelta(days=1) - timedelta(seconds=1)
|
||||
if cur_end > window_end:
|
||||
cur_end = window_end
|
||||
windows.append((cur_start, cur_end))
|
||||
cur_start = cur_start + timedelta(days=1)
|
||||
return windows
|
||||
|
||||
|
||||
|
||||
@@ -21,6 +21,9 @@ import sys
|
||||
from pathlib import Path
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from neozqyy_shared.repo_root import ensure_repo_root
|
||||
ensure_repo_root()
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 常量
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -20,6 +20,8 @@ import sys
|
||||
from pathlib import Path
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from neozqyy_shared.repo_root import ensure_repo_root
|
||||
ensure_repo_root()
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 1. 加载根 .env(遵循 testing-env.md 规范)
|
||||
|
||||
Reference in New Issue
Block a user