微信小程序页面迁移校验之前 P5任务处理之前

This commit is contained in:
Neo
2026-03-09 01:19:21 +08:00
parent 263bf96035
commit 6e20987d2f
1112 changed files with 153824 additions and 219694 deletions

View File

@@ -14,6 +14,9 @@ import os
import re
import sys
from neozqyy_shared.repo_root import ensure_repo_root
ensure_repo_root()
SAMPLES_DIR = os.path.join("docs", "api-reference", "samples")
DOCS_DIR = os.path.join("docs", "api-reference")

View File

@@ -11,6 +11,9 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from dotenv import load_dotenv
import psycopg2
from neozqyy_shared.repo_root import ensure_repo_root
ensure_repo_root()
load_dotenv()
PG_DSN = os.getenv("PG_DSN")

View File

@@ -23,6 +23,9 @@ from enum import Enum
from pathlib import Path
from typing import Optional
from neozqyy_shared.repo_root import ensure_repo_root
ensure_repo_root()
class DiffKind(str, Enum):
"""差异分类枚举。"""

View File

@@ -13,6 +13,9 @@ from pathlib import Path
from dotenv import load_dotenv
import psycopg2
from neozqyy_shared.repo_root import ensure_repo_root
ensure_repo_root()
load_dotenv()
SUMMARY_DIR = Path("docs/api-reference/summary")

View File

@@ -414,6 +414,7 @@ def _check_ods_vs_dwd(
# ══════════════════════════════════════════════════════════════
# 已知的 DWS→DWD 聚合关系映射
# 营业日口径:使用 dws.biz_date() 替代 ::date 自然日转换
_DWS_DWD_MAP: dict[str, dict] = {
"dws.dws_assistant_daily_detail": {
"dwd_source": "dwd.dwd_assistant_service_log",
@@ -425,28 +426,28 @@ _DWS_DWD_MAP: dict[str, dict] = {
"dwd_source": "dwd.dwd_settlement_head",
"dws_date_col": "stat_date",
"dwd_date_col": "pay_time",
"dwd_date_cast": "::date",
"dwd_date_cast": "dws.biz_date(%col%)",
"description": "财务日度汇总 vs DWD 结账记录",
},
"dws.dws_member_visit_detail": {
"dwd_source": "dwd.dwd_settlement_head",
"dws_date_col": "visit_date",
"dwd_date_col": "pay_time",
"dwd_date_cast": "::date",
"dwd_date_cast": "dws.biz_date(%col%)",
"description": "会员到店明细 vs DWD 结账记录",
},
"dws.dws_member_consumption_summary": {
"dwd_source": "dwd.dwd_settlement_head",
"dws_date_col": "stat_month",
"dwd_date_col": "pay_time",
"dwd_date_cast": "date_trunc('month', %col%)::date",
"dwd_date_cast": "date_trunc('month', dws.biz_date(%col%))::date",
"description": "会员消费汇总 vs DWD 结账记录",
},
"dws.dws_finance_recharge_summary": {
"dwd_source": "dwd.dwd_recharge_order",
"dws_date_col": "stat_date",
"dwd_date_col": "pay_time",
"dwd_date_cast": "::date",
"dwd_date_cast": "dws.biz_date(%col%)",
"description": "充值汇总 vs DWD 充值订单",
},
}

View File

@@ -20,6 +20,9 @@ from datetime import datetime
import requests
from neozqyy_shared.repo_root import ensure_repo_root
ensure_repo_root()
# ── 配置 ──────────────────────────────────────────────────────────────────
API_BASE = "https://pc.ficoo.vip/apiprod/admin/v1/"
API_TOKEN = os.environ.get("API_TOKEN", "")

View File

@@ -12,6 +12,9 @@ import re
from dataclasses import dataclass, field
from pathlib import Path
from neozqyy_shared.repo_root import ensure_repo_root
ensure_repo_root()
# ---------------------------------------------------------------------------
# 常量
# ---------------------------------------------------------------------------

View File

@@ -68,27 +68,6 @@
"payload",
"content_hash"
],
"assistant_cancellation_records": [
"id",
"siteid",
"siteprofile",
"assistantname",
"assistantabolishamount",
"assistanton",
"pdchargeminutes",
"tableareaid",
"tablearea",
"tableid",
"tablename",
"trashreason",
"createtime",
"source_file",
"source_endpoint",
"fetched_at",
"payload",
"content_hash",
"tenant_id"
],
"assistant_service_records": [
"id",
"tenant_id",

View File

@@ -15,6 +15,9 @@ import sys
import time
import requests
from neozqyy_shared.repo_root import ensure_repo_root
ensure_repo_root()
# ── 配置 ──────────────────────────────────────────────────────────────────
API_BASE = "https://pc.ficoo.vip/apiprod/admin/v1/"
API_TOKEN = os.environ.get("API_TOKEN", "")
@@ -58,7 +61,6 @@ CROSS_REF_HEADERS = {"字段名", "类型", "示例值", "说明", "field", "exa
ACTUAL_LIST_KEY = {
"assistant_accounts_master": "assistantInfos",
"assistant_service_records": "orderAssistantDetails",
"assistant_cancellation_records": "abolitionAssistants",
"table_fee_transactions": "siteTableUseDetailsList",
"table_fee_discount_records": "taiFeeAdjustInfos",
"tenant_goods_master": "tenantGoodsList",

View File

@@ -0,0 +1,189 @@
# -*- coding: utf-8 -*-
"""一次性调研脚本:拉取全部团购详情并写入 ods.group_buy_package_details。
用法cwd = C:\\NeoZQYY/
python apps/etl/connectors/feiqiu/scripts/research_coupon_details.py
流程:
1. 从 ods.group_buy_packages 读取所有 coupon_idid 列)
2. 串行调用 QueryPackageCouponInfo 详情接口RateLimiter 5-20s
3. 提取结构化字段 + 计算 content_hash + 保留原始 payload
4. UPSERT 写入 ods.group_buy_package_details
需求覆盖:附录 B 调研 3、4
"""
from __future__ import annotations
import os
import sys
from pathlib import Path
# ── 环境初始化 ──────────────────────────────────────────────────────────
# 加载根 .env脚本 cwd 为 apps/etl/connectors/feiqiu/
from dotenv import load_dotenv
_SCRIPT_DIR = Path(__file__).resolve().parent # scripts/
_FEIQIU_DIR = _SCRIPT_DIR.parent # apps/etl/connectors/feiqiu/
_REPO_ROOT = _FEIQIU_DIR.parents[3] # → connectors/ → etl/ → apps/ → root
load_dotenv(_REPO_ROOT / ".env")
# 必需环境变量校验
_REQUIRED_ENV = ("FETCH_ROOT", "EXPORT_ROOT", "PG_DSN", "TEST_DB_DSN")
_missing = [k for k in _REQUIRED_ENV if not os.environ.get(k)]
if _missing:
sys.exit(f"ERROR: 缺少必需环境变量: {', '.join(_missing)}")
TEST_DB_DSN = os.environ["TEST_DB_DSN"]
# 确保 feiqiu 目录在 sys.path 中,以便从仓库根目录运行时也能 import 本地模块
if str(_FEIQIU_DIR) not in sys.path:
sys.path.insert(0, str(_FEIQIU_DIR))
# ── 依赖导入 ──────────────────────────────────────────────────────────
from psycopg2.extras import Json # noqa: E402
from config.settings import AppConfig # noqa: E402
from api.client import APIClient # noqa: E402
from api.rate_limiter import RateLimiter # noqa: E402
from database.connection import DatabaseConnection # noqa: E402
# 复用 ods_tasks.py 中的字段提取逻辑
from tasks.ods.ods_tasks import _group_package_detail_process_fn # noqa: E402
def main():
# ── 1. 加载配置 ──────────────────────────────────────────────────
config = AppConfig.load()
print(f"✅ 配置加载完成 (store_id={config.get('app.store_id')})")
# ── 2. 连接测试库 ──────────────────────────────────────────────
db = DatabaseConnection(
dsn=TEST_DB_DSN,
session=config["db"].get("session", {}),
connect_timeout=config["db"].get("connect_timeout_sec"),
)
print(f"✅ 已连接测试库: {TEST_DB_DSN.split('@')[-1]}")
# ── 3. 查询所有 coupon_id ────────────────────────────────────
rows = db.query("SELECT DISTINCT id FROM ods.group_buy_packages ORDER BY id")
coupon_ids = [r["id"] for r in rows]
print(f"📋 共 {len(coupon_ids)} 个 coupon_id 待拉取")
if not coupon_ids:
print("⚠️ 没有找到任何 coupon_id退出")
db.close()
return
# ── 4. 初始化 API 客户端 + 限流器 ────────────────────────────
api = APIClient(
base_url=config["api"]["base_url"],
token=config["api"]["token"],
timeout=config.get("api.timeout_sec", 20),
)
limiter = RateLimiter(min_interval=5.0, max_interval=20.0)
# ── 5. 串行拉取详情 ──────────────────────────────────────────
success_count = 0
fail_count = 0
skip_count = 0
for idx, cid in enumerate(coupon_ids, 1):
print(f"\n[{idx}/{len(coupon_ids)}] coupon_id={cid} ...", end=" ", flush=True)
try:
resp = api.get(
"/PackageCoupon/QueryPackageCouponInfo",
{"couponId": cid},
)
except Exception as e:
print(f"❌ API 错误: {e}")
fail_count += 1
if idx < len(coupon_ids):
limiter.wait()
continue
# 提取字段(复用 _group_package_detail_process_fn
records = _group_package_detail_process_fn(resp)
if not records:
print("⚠️ 响应无有效数据,跳过")
skip_count += 1
if idx < len(coupon_ids):
limiter.wait()
continue
record = records[0]
# ── 6. UPSERT 写入 ──────────────────────────────────────
try:
_upsert_detail(db, record)
db.commit()
success_count += 1
print(f"✅ 写入成功 (hash={record['content_hash'][:8]}...)")
except Exception as e:
db.rollback()
print(f"❌ 写入失败: {e}")
fail_count += 1
# 限流等待(最后一条不等)
if idx < len(coupon_ids):
waited = limiter.wait()
if not waited:
print("⚠️ 等待被中断")
break
# ── 7. 汇总 ──────────────────────────────────────────────────
print("\n" + "=" * 50)
print(f"📊 拉取完成: 成功={success_count}, 失败={fail_count}, 跳过={skip_count}, 总计={len(coupon_ids)}")
print("=" * 50)
db.close()
def _upsert_detail(db: DatabaseConnection, record: dict) -> None:
"""UPSERT 单条详情记录到 ods.group_buy_package_details。
ON CONFLICT (coupon_id) 时更新所有字段。
"""
columns = [
"coupon_id", "package_name", "duration", "start_time", "end_time",
"add_start_clock", "add_end_clock", "is_enabled", "is_delete",
"site_id", "tenant_id", "create_time", "creator_name",
"table_area_ids", "table_area_names", "assistant_services",
"groupon_site_infos", "package_services", "coupon_details_list",
"content_hash", "payload",
]
# JSONB 字段需要用 Json 适配器
_JSONB_COLS = {
"table_area_ids", "table_area_names", "assistant_services",
"groupon_site_infos", "package_services", "coupon_details_list",
"payload",
}
values = []
for col in columns:
val = record.get(col)
if col in _JSONB_COLS and val is not None:
val = Json(val)
values.append(val)
col_list = ", ".join(columns)
placeholders = ", ".join(["%s"] * len(columns))
# 除 coupon_id 外的所有列用于 UPDATE
update_cols = [c for c in columns if c != "coupon_id"]
update_set = ", ".join(f"{c} = EXCLUDED.{c}" for c in update_cols)
sql = (
f"INSERT INTO ods.group_buy_package_details ({col_list}) "
f"VALUES ({placeholders}) "
f"ON CONFLICT (coupon_id) DO UPDATE SET {update_set}, "
f"fetched_at = now()"
)
db.execute(sql, values)
if __name__ == "__main__":
main()

View File

@@ -6,6 +6,9 @@ ODS 列数据来自 information_schema.columns WHERE table_schema = 'ods'。
import json
import os
from neozqyy_shared.repo_root import ensure_repo_root
ensure_repo_root()
SAMPLES_DIR = os.path.join(os.path.dirname(__file__), "..", "docs", "api-reference", "samples")
REPORT_DIR = os.environ.get("ETL_REPORT_ROOT")
if not REPORT_DIR:
@@ -16,7 +19,7 @@ NESTED_OBJECTS = {"siteprofile", "tableprofile"}
# 22 张需要比对的表
TABLES = [
"assistant_accounts_master", "settlement_records", "assistant_service_records",
"assistant_cancellation_records", "table_fee_transactions", "table_fee_discount_records",
"table_fee_transactions", "table_fee_discount_records",
"payment_transactions", "refund_transactions", "platform_coupon_redemption_records",
"tenant_goods_master", "store_goods_sales_records", "store_goods_master",
"stock_goods_category_tree", "goods_stock_movements", "member_profiles",

View File

@@ -31,7 +31,7 @@ ODS_META = {"source_file", "source_endpoint", "fetched_at", "payload", "content_
TABLES = [
"assistant_accounts_master", "settlement_records", "assistant_service_records",
"assistant_cancellation_records", "table_fee_transactions", "table_fee_discount_records",
"table_fee_transactions", "table_fee_discount_records",
"payment_transactions", "refund_transactions", "platform_coupon_redemption_records",
"tenant_goods_master", "store_goods_sales_records", "store_goods_master",
"stock_goods_category_tree", "goods_stock_movements", "member_profiles",
@@ -195,7 +195,7 @@ def classify_ods_only(table_name: str, field: str) -> str:
return "ODS 后续版本新增字段(当前使用中的台桌关联订单 ID"
# tenant_id 在某些表中是 ODS 额外添加的
if field == "tenant_id" and table_name in (
"assistant_cancellation_records", "payment_transactions"
"payment_transactions",
):
return "ODS 额外添加的租户 ID 字段API 响应中不含ETL 入库时补充)"
# API 后续版本新增字段(文档快照未覆盖)

View File

@@ -14,7 +14,7 @@ import multiprocessing as mp
import subprocess
import sys
import time as time_mod
from datetime import date, datetime, time, timedelta
from datetime import date, datetime, timedelta
from pathlib import Path
from zoneinfo import ZoneInfo
@@ -27,6 +27,7 @@ from tasks.utility.check_cutoff_task import CheckCutoffTask
from tasks.dwd.dwd_load_task import DwdLoadTask
from tasks.ods.ods_tasks import ENABLED_ODS_CODES
from utils.logging_utils import build_log_path, configure_logging
from neozqyy_shared.datetime_utils import business_date, business_day_range, now_shanghai
STEP_TIMEOUT_SEC = 120
@@ -53,6 +54,7 @@ def _compute_dws_window(
if dws_start and dws_end and dws_end < dws_start:
raise ValueError("dws_end must be >= dws_start")
cutoff = int(cfg.get("app.business_day_start_hour", 8))
store_id = int(cfg.get("app.store_id"))
dsn = cfg["db"]["dsn"]
session = cfg["db"].get("session")
@@ -67,19 +69,22 @@ def _compute_dws_window(
if isinstance(mx, date):
dws_start = mx - timedelta(days=max(0, int(rebuild_days)))
else:
dws_start = (datetime.now(tz).date()) - timedelta(days=max(1, int(bootstrap_days)))
# 营业日口径:用 business_date 计算"今天"
dws_start = business_date(now_shanghai(), cutoff) - timedelta(days=max(1, int(bootstrap_days)))
if dws_end is None:
dws_end = datetime.now(tz).date()
dws_end = business_date(now_shanghai(), cutoff)
finally:
conn.close()
start_dt = datetime.combine(dws_start, time.min).replace(tzinfo=tz)
# end_dt 取到当天 23:59:59避免只跑到“当前时刻”的 date() 导致少一天
end_dt = datetime.combine(dws_end, time.max).replace(tzinfo=tz)
# 营业日口径:窗口边界按 cutoff 小时对齐
start_dt = business_day_range(dws_start, cutoff)[0]
# end_dt 取到营业日结束(即 dws_end 次日 cutoff 前一秒),覆盖完整营业日
end_dt = business_day_range(dws_end, cutoff)[1] - timedelta(seconds=1)
return start_dt, end_dt
def _run_check_cutoff(cfg: AppConfig, logger: logging.Logger):
dsn = cfg["db"]["dsn"]
session = cfg["db"].get("session")
@@ -99,21 +104,21 @@ def _run_check_cutoff(cfg: AppConfig, logger: logging.Logger):
def _iter_daily_windows(window_start: datetime, window_end: datetime) -> list[tuple[datetime, datetime]]:
"""按营业日拆分时间窗口。
window_start/window_end 已按 cutoff 小时对齐(由 _compute_dws_window 保证)。
"""
if window_start > window_end:
return []
tz = window_start.tzinfo
windows: list[tuple[datetime, datetime]] = []
cur = window_start
while cur <= window_end:
day_start = datetime.combine(cur.date(), time.min).replace(tzinfo=tz)
day_end = datetime.combine(cur.date(), time.max).replace(tzinfo=tz)
if day_start < window_start:
day_start = window_start
if day_end > window_end:
day_end = window_end
windows.append((day_start, day_end))
next_day = cur.date() + timedelta(days=1)
cur = datetime.combine(next_day, time.min).replace(tzinfo=tz)
# 从 window_start 开始,每次推进 24 小时(一个营业日)
cur_start = window_start
while cur_start <= window_end:
cur_end = cur_start + timedelta(days=1) - timedelta(seconds=1)
if cur_end > window_end:
cur_end = window_end
windows.append((cur_start, cur_end))
cur_start = cur_start + timedelta(days=1)
return windows

View File

@@ -21,6 +21,9 @@ import sys
from pathlib import Path
from dataclasses import dataclass, field
from neozqyy_shared.repo_root import ensure_repo_root
ensure_repo_root()
# ---------------------------------------------------------------------------
# 常量
# ---------------------------------------------------------------------------

View File

@@ -20,6 +20,8 @@ import sys
from pathlib import Path
from dotenv import load_dotenv
from neozqyy_shared.repo_root import ensure_repo_root
ensure_repo_root()
# ---------------------------------------------------------------------------
# 1. 加载根 .env遵循 testing-env.md 规范)