Files
Neo-ZQYY/scripts/ops/backfill_coach_area_hours.py
Neo 779b2f6d52 chore: v1 整理 — 清理历史文件、DDL 合并、文档归档
- 清理 1155 个已删除的历史文件(废弃 prompt_logs、tmp、旧 ops 脚本)
- export/ 数据文件从 git 移除(已在 .gitignore)
- demo-miniprogram 从 tmp/ 移入 apps/,添加 CLAUDE.md 注解
- DDL 合并:完整 schema 定义填充到 db/*/schemas/(从 docs/database/ddl/ 复制)
- 39 个 v1 迁移脚本归档到 db/_archived/migrations_v1_merged/
- 4 个迁移变更类 BD_Manual 文档归档到 docs/database/_archived/
- .gitignore 补充 .vite/ 和 apps/*.zip
- settings.json 添加 effortLevel 默认配置
- scripts/ops/ 新增运维脚本入库

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-06 00:39:27 +08:00

161 lines
5.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
"""
回填脚本dws_coach_area_hours
对已有月份批量从 DWD 服务记录聚合助教区域课时,写入 dws_coach_area_hours。
用法:
cd C:\\NeoZQYY
uv run python scripts/ops/backfill_coach_area_hours.py \\
--site-id 2790685415443269 --start-month 2025-07-01 --end-month 2026-03-01
"""
from __future__ import annotations
import argparse
import logging
import os
import sys
from datetime import date, timedelta
from decimal import Decimal
# ── 环境初始化 ──
_ROOT = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
sys.path.insert(0, _ROOT)
from dotenv import load_dotenv
load_dotenv(os.path.join(_ROOT, ".env"))
import psycopg2
import psycopg2.extras
from neozqyy_shared.area_mapping import resolve_area_code
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
logger = logging.getLogger("backfill_coach_area_hours")
# ── 导入 ETL 纯函数stub 模式) ──
import importlib.util as _ilu
import types as _types
_ETL_ROOT = os.path.join(_ROOT, "apps", "etl", "connectors", "feiqiu")
_tasks_pkg = _types.ModuleType("tasks")
_tasks_pkg.__path__ = [os.path.join(_ETL_ROOT, "tasks")]
_dws_pkg = _types.ModuleType("tasks.dws")
_dws_pkg.__path__ = [os.path.join(_ETL_ROOT, "tasks", "dws")]
sys.modules.setdefault("tasks", _tasks_pkg)
sys.modules.setdefault("tasks.dws", _dws_pkg)
_base_stub = _types.ModuleType("tasks.dws.base_dws_task")
_base_stub.TaskContext = type("TaskContext", (), {})
_base_stub.BaseDwsTask = type("BaseDwsTask", (), {})
sys.modules["tasks.dws.base_dws_task"] = _base_stub
_mod_path = os.path.join(_ETL_ROOT, "tasks", "dws", "coach_area_hours_task.py")
_spec = _ilu.spec_from_file_location("tasks.dws.coach_area_hours_task", _mod_path)
_mod = _ilu.module_from_spec(_spec)
_mod.__package__ = "tasks.dws"
sys.modules["tasks.dws.coach_area_hours_task"] = _mod
_spec.loader.exec_module(_mod)
transform_coach_area_hours = _mod.transform_coach_area_hours
# ── 数据提取 ──
def extract_service_records(conn, site_id: int, stat_month: date) -> list[dict]:
"""从 DWD 提取指定月份的服务记录"""
if stat_month.month == 12:
next_month = stat_month.replace(year=stat_month.year + 1, month=1)
else:
next_month = stat_month.replace(month=stat_month.month + 1)
sql = """
SELECT
s.site_assistant_id AS assistant_id,
dt.site_table_area_name AS area_name,
s.skill_name,
s.income_seconds,
COALESCE(ex.is_trash, 0) AS is_trash
FROM dwd.dwd_assistant_service_log s
LEFT JOIN dwd.dwd_assistant_service_log_ex ex
ON s.assistant_service_id = ex.assistant_service_id
LEFT JOIN dwd.dim_table dt
ON s.site_table_id = dt.table_id
AND dt.scd2_is_current = 1
WHERE s.site_id = %s
AND s.is_delete = 0
AND DATE(s.start_use_time) >= %s
AND DATE(s.start_use_time) < %s
"""
with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
cur.execute(sql, (site_id, stat_month, next_month))
return cur.fetchall()
def main():
parser = argparse.ArgumentParser(description="回填 dws_coach_area_hours")
parser.add_argument("--site-id", type=int, required=True)
parser.add_argument("--start-month", type=str, required=True, help="起始月份 YYYY-MM-01")
parser.add_argument("--end-month", type=str, required=True, help="结束月份 YYYY-MM-01")
args = parser.parse_args()
dsn = os.environ.get("PG_DSN")
if not dsn:
raise RuntimeError("PG_DSN 未设置")
os.environ.setdefault("PGCLIENTENCODING", "UTF8")
conn = psycopg2.connect(dsn)
conn.autocommit = False
site_id = args.site_id
start = date.fromisoformat(args.start_month)
end = date.fromisoformat(args.end_month)
total_inserted = 0
month = start
month_idx = 0
while month <= end:
month_idx += 1
records = extract_service_records(conn, site_id, month)
rows = transform_coach_area_hours(
records=records,
stat_month=month,
site_id=site_id,
tenant_id=site_id,
)
# delete-before-insert
with conn.cursor() as cur:
cur.execute(
"DELETE FROM dws.dws_coach_area_hours WHERE site_id = %s AND stat_month = %s",
(site_id, month),
)
if rows:
cols = [
"site_id", "tenant_id", "stat_month", "assistant_id", "area_code",
"base_hours", "bonus_hours", "room_hours",
"effective_hours", "trashed_hours",
"base_service_count", "bonus_service_count", "room_service_count",
]
placeholders = ", ".join(["%s"] * len(cols))
insert_sql = f"INSERT INTO dws.dws_coach_area_hours ({', '.join(cols)}) VALUES ({placeholders})"
for row in rows:
cur.execute(insert_sql, tuple(row[c] for c in cols))
conn.commit()
total_inserted += len(rows)
logger.info("[%d] %s%d 行(%d 条服务记录)", month_idx, month, len(rows), len(records))
# 下一个月
if month.month == 12:
month = month.replace(year=month.year + 1, month=1)
else:
month = month.replace(month=month.month + 1)
logger.info("=== 回填完成:共 %d 行 ===", total_inserted)
conn.close()
if __name__ == "__main__":
main()