初始提交:飞球 ETL 系统全量代码

This commit is contained in:
Neo
2026-02-13 08:05:34 +08:00
commit 3c51f5485d
441 changed files with 117631 additions and 0 deletions

48
database/README.md Normal file
View File

@@ -0,0 +1,48 @@
# database/ — 数据库层
## 文件说明
| 文件 | 用途 |
|------|------|
| `connection.py` | 数据库连接管理(带超时的 psycopg2 封装) |
| `operations.py` | 批量操作upsert、execute、query |
| `base.py` | 数据库操作基础类 |
## DDL Schema 文件
| 文件 | Schema | 说明 |
|------|--------|------|
| `schema_ODS_doc.sql` | `billiards_ods` | ODS 层表结构(含字段注释) |
| `schema_dwd_doc.sql` | `billiards_dwd` | DWD 层表结构(维度 + 事实,含 SCD2 列) |
| `schema_dws.sql` | `billiards_dws` | DWS 层表结构(汇总表 + 配置表) |
| `schema_etl_admin.sql` | `etl_admin` | ETL 元数据(任务注册、游标、运行记录) |
| `schema_verify_perf_indexes.sql` | 各 Schema | 校验性能索引(仅索引 + ANALYZE |
## 种子脚本
| 文件 | 用途 |
|------|------|
| `seed_ods_tasks.sql` | 注册 ODS 任务到 `etl_admin.etl_task` |
| `seed_scheduler_tasks.sql` | 初始化调度任务配置 |
| `seed_dws_config.sql` | DWS 配置数据(绩效档位、等级定价、技能映射等) |
| `seed_index_parameters.sql` | 指数算法参数WBI/NCI/RS/OS/MS/ML |
## 迁移脚本
位于 `migrations/` 子目录,纯 SQL按日期前缀命名
```
migrations/
└── 20260208_relation_index_manual_ml.sql
```
新增迁移时,文件名格式:`YYYYMMDD_描述.sql`
## Schema 约定
- 所有 DDL 使用 `CREATE TABLE IF NOT EXISTS`,支持幂等执行
- 表名小写蛇形,带 Schema 前缀(如 `billiards_dwd.dim_member`
- 维度表包含 SCD2 列:`scd2_start_time``scd2_end_time``scd2_is_current``scd2_version`
- ODS 表包含元数据列:`content_hash``payload``fetched_at``source_file`
- 金额字段统一 `NUMERIC(12,2)`ID 字段统一 `BIGINT`
- 不使用 ORM所有 SQL 通过 `psycopg2` 直接执行

0
database/__init__.py Normal file
View File

112
database/base.py Normal file
View File

@@ -0,0 +1,112 @@
# -*- coding: utf-8 -*-
"""
数据库操作批量、RETURNING支持
"""
import re
from typing import List, Dict, Tuple
import psycopg2.extras
from .connection import DatabaseConnection
class DatabaseOperations(DatabaseConnection):
"""扩展数据库操作包含批量upsert和returning支持"""
def batch_execute(self, sql: str, rows: List[Dict], page_size: int = 1000):
"""批量执行SQL不带RETURNING"""
if not rows:
return
with self.conn.cursor() as c:
psycopg2.extras.execute_batch(c, sql, rows, page_size=page_size)
def batch_upsert_with_returning(self, sql: str, rows: List[Dict], page_size: int = 1000) -> Tuple[int, int]:
"""
批量 UPSERT 并统计插入/更新数
Args:
sql: 包含RETURNING子句的SQL
rows: 数据行列表
page_size: 批次大小
Returns:
(inserted_count, updated_count) 元组
"""
if not rows:
return (0, 0)
use_returning = "RETURNING" in sql.upper()
with self.conn.cursor() as c:
if not use_returning:
psycopg2.extras.execute_batch(c, sql, rows, page_size=page_size)
return (0, 0)
# 优先尝试向量化执行
try:
inserted, updated = self._execute_with_returning_vectorized(c, sql, rows, page_size)
return (inserted, updated)
except Exception:
# 回退到逐行执行
return self._execute_with_returning_row_by_row(c, sql, rows)
def _execute_with_returning_vectorized(self, cursor, sql: str, rows: List[Dict], page_size: int) -> Tuple[int, int]:
"""向量化执行使用execute_values"""
# 解析VALUES子句
m = re.search(r"VALUES\s*\((.*?)\)", sql, flags=re.IGNORECASE | re.DOTALL)
if not m:
raise ValueError("Cannot parse VALUES clause")
tpl = "(" + m.group(1) + ")"
base_sql = sql[:m.start()] + "VALUES %s" + sql[m.end():]
ret = psycopg2.extras.execute_values(
cursor, base_sql, rows, template=tpl, page_size=page_size, fetch=True
)
if not ret:
return (0, 0)
inserted = 0
for rec in ret:
flag = self._extract_inserted_flag(rec)
if flag:
inserted += 1
return (inserted, len(ret) - inserted)
def _execute_with_returning_row_by_row(self, cursor, sql: str, rows: List[Dict]) -> Tuple[int, int]:
"""逐行执行(回退方案)"""
inserted = 0
updated = 0
for r in rows:
cursor.execute(sql, r)
try:
rec = cursor.fetchone()
except Exception:
rec = None
flag = self._extract_inserted_flag(rec) if rec else None
if flag:
inserted += 1
else:
updated += 1
return (inserted, updated)
@staticmethod
def _extract_inserted_flag(rec) -> bool:
"""从返回记录中提取inserted标志"""
if isinstance(rec, tuple):
return bool(rec[0])
elif isinstance(rec, dict):
return bool(rec.get("inserted"))
else:
try:
return bool(rec["inserted"])
except Exception:
return False
# 为了向后兼容提供Pg别名
Pg = DatabaseOperations

80
database/connection.py Normal file
View File

@@ -0,0 +1,80 @@
# -*- coding: utf-8 -*-
"""数据库连接管理器(限制最大连接超时时间)。"""
import psycopg2
import psycopg2.extras
class DatabaseConnection:
"""封装 psycopg2 连接,支持会话参数和超时保护。"""
def __init__(self, dsn: str, session: dict = None, connect_timeout: int = None):
self._dsn = dsn
self._session = session or {}
self._connect_timeout = connect_timeout
self.conn = self._open_connection()
def _open_connection(self):
"""创建并初始化连接(包含会话参数)。"""
timeout_val = self._connect_timeout if self._connect_timeout is not None else 5
# 生产环境要求:数据库连接超时不得超过 20 秒。
timeout_val = max(1, min(int(timeout_val), 20))
conn = psycopg2.connect(self._dsn, connect_timeout=timeout_val)
conn.autocommit = False
# 会话参数(时区、语句超时等)
if self._session:
with conn.cursor() as c:
if self._session.get("timezone"):
c.execute("SET TIME ZONE %s", (self._session["timezone"],))
if self._session.get("statement_timeout_ms") is not None:
c.execute(
"SET statement_timeout = %s",
(int(self._session["statement_timeout_ms"]),),
)
if self._session.get("lock_timeout_ms") is not None:
c.execute(
"SET lock_timeout = %s", (int(self._session["lock_timeout_ms"]),)
)
if self._session.get("idle_in_tx_timeout_ms") is not None:
c.execute(
"SET idle_in_transaction_session_timeout = %s",
(int(self._session["idle_in_tx_timeout_ms"]),),
)
return conn
def query(self, sql: str, args=None):
"""Execute a query and fetch all rows."""
with self.conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as c:
c.execute(sql, args)
return c.fetchall()
def execute(self, sql: str, args=None):
"""Execute a SQL statement without returning rows."""
with self.conn.cursor() as c:
c.execute(sql, args)
def commit(self):
"""Commit current transaction."""
self.conn.commit()
def rollback(self):
"""Rollback current transaction."""
self.conn.rollback()
def close(self):
"""Safely close the connection."""
try:
self.conn.close()
except Exception:
pass
def ensure_open(self) -> bool:
"""确保连接可用,若已关闭则尝试重连。"""
try:
if getattr(self.conn, "closed", 0):
self.conn = self._open_connection()
return True
except Exception:
return False

View File

@@ -0,0 +1,144 @@
-- =============================================================================
-- 关系指数与 ML 人工台账迁移脚本
-- 版本: 2026-02-08
-- 说明:
-- 1) 新增关系指数结果表 dws_member_assistant_relation_index
-- 2) 新增 ML 人工台账宽表/窄表
-- 3) 补充 RS/OS/MS/ML 参数并下线 INTIMACY
-- =============================================================================
BEGIN;
-- -----------------------------------------------------------------------------
-- 1) 关系指数结果表
-- -----------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS billiards_dws.dws_member_assistant_relation_index (
relation_id BIGSERIAL PRIMARY KEY,
site_id BIGINT NOT NULL,
tenant_id BIGINT NOT NULL,
member_id BIGINT NOT NULL,
assistant_id BIGINT NOT NULL,
session_count INTEGER NOT NULL DEFAULT 0,
total_duration_minutes INTEGER NOT NULL DEFAULT 0,
basic_session_count INTEGER NOT NULL DEFAULT 0,
incentive_session_count INTEGER NOT NULL DEFAULT 0,
days_since_last_session INTEGER,
rs_f NUMERIC(14,6) NOT NULL DEFAULT 0,
rs_d NUMERIC(14,6) NOT NULL DEFAULT 0,
rs_r NUMERIC(14,6) NOT NULL DEFAULT 0,
rs_raw NUMERIC(14,6) NOT NULL DEFAULT 0,
rs_display NUMERIC(4,2) NOT NULL DEFAULT 0,
os_share NUMERIC(10,6) NOT NULL DEFAULT 0,
os_label VARCHAR(20) NOT NULL DEFAULT 'POOL',
os_rank INTEGER,
ms_f_short NUMERIC(14,6) NOT NULL DEFAULT 0,
ms_f_long NUMERIC(14,6) NOT NULL DEFAULT 0,
ms_raw NUMERIC(14,6) NOT NULL DEFAULT 0,
ms_display NUMERIC(4,2) NOT NULL DEFAULT 0,
ml_order_count INTEGER NOT NULL DEFAULT 0,
ml_allocated_amount NUMERIC(14,2) NOT NULL DEFAULT 0,
ml_raw NUMERIC(14,6) NOT NULL DEFAULT 0,
ml_display NUMERIC(4,2) NOT NULL DEFAULT 0,
calc_time TIMESTAMPTZ NOT NULL DEFAULT NOW(),
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
CONSTRAINT uk_dws_member_assistant_relation_index UNIQUE (site_id, member_id, assistant_id)
);
CREATE INDEX IF NOT EXISTS idx_dws_relation_member
ON billiards_dws.dws_member_assistant_relation_index (site_id, member_id, os_share DESC);
CREATE INDEX IF NOT EXISTS idx_dws_relation_assistant
ON billiards_dws.dws_member_assistant_relation_index (site_id, assistant_id, rs_display DESC);
CREATE INDEX IF NOT EXISTS idx_dws_relation_calc_time
ON billiards_dws.dws_member_assistant_relation_index (calc_time);
-- -----------------------------------------------------------------------------
-- 2) ML 人工台账宽表
-- -----------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS billiards_dws.dws_ml_manual_order_source (
source_id BIGSERIAL PRIMARY KEY,
site_id BIGINT NOT NULL,
biz_date DATE NOT NULL,
external_id VARCHAR(128) NOT NULL,
member_id BIGINT NOT NULL DEFAULT 0,
pay_time TIMESTAMPTZ NOT NULL,
order_amount NUMERIC(14,2) NOT NULL DEFAULT 0,
currency VARCHAR(16) NOT NULL DEFAULT 'CNY',
assistant_id_1 BIGINT,
assistant_name_1 VARCHAR(128),
assistant_id_2 BIGINT,
assistant_name_2 VARCHAR(128),
assistant_id_3 BIGINT,
assistant_name_3 VARCHAR(128),
assistant_id_4 BIGINT,
assistant_name_4 VARCHAR(128),
assistant_id_5 BIGINT,
assistant_name_5 VARCHAR(128),
import_batch_no VARCHAR(64) NOT NULL,
import_file_name VARCHAR(255) NOT NULL,
import_scope_key VARCHAR(128) NOT NULL,
import_time TIMESTAMPTZ NOT NULL DEFAULT NOW(),
import_user VARCHAR(64),
row_no INTEGER NOT NULL,
remark TEXT,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
CONSTRAINT uk_dws_ml_manual_order_source UNIQUE (site_id, external_id, import_scope_key, row_no)
);
CREATE INDEX IF NOT EXISTS idx_dws_ml_source_scope
ON billiards_dws.dws_ml_manual_order_source (site_id, biz_date);
CREATE INDEX IF NOT EXISTS idx_dws_ml_source_external
ON billiards_dws.dws_ml_manual_order_source (site_id, external_id);
-- -----------------------------------------------------------------------------
-- 3) ML 人工台账窄表
-- -----------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS billiards_dws.dws_ml_manual_order_alloc (
alloc_id BIGSERIAL PRIMARY KEY,
site_id BIGINT NOT NULL,
biz_date DATE NOT NULL,
external_id VARCHAR(128) NOT NULL,
member_id BIGINT NOT NULL DEFAULT 0,
pay_time TIMESTAMPTZ NOT NULL,
order_amount NUMERIC(14,2) NOT NULL DEFAULT 0,
assistant_id BIGINT NOT NULL,
assistant_name VARCHAR(128),
share_ratio NUMERIC(14,8) NOT NULL DEFAULT 0,
allocated_amount NUMERIC(14,2) NOT NULL DEFAULT 0,
currency VARCHAR(16) NOT NULL DEFAULT 'CNY',
import_scope_key VARCHAR(128) NOT NULL,
import_batch_no VARCHAR(64) NOT NULL,
import_file_name VARCHAR(255) NOT NULL,
import_time TIMESTAMPTZ NOT NULL DEFAULT NOW(),
import_user VARCHAR(64),
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
CONSTRAINT uk_dws_ml_manual_order_alloc UNIQUE (site_id, external_id, assistant_id)
);
CREATE INDEX IF NOT EXISTS idx_dws_ml_alloc_scope
ON billiards_dws.dws_ml_manual_order_alloc (site_id, biz_date);
CREATE INDEX IF NOT EXISTS idx_dws_ml_alloc_member_assistant
ON billiards_dws.dws_ml_manual_order_alloc (site_id, member_id, assistant_id);
-- -----------------------------------------------------------------------------
-- 4) 参数切换
-- -----------------------------------------------------------------------------
UPDATE billiards_dws.cfg_index_parameters
SET effective_to = DATE '2025-12-31',
updated_at = NOW()
WHERE index_type = 'INTIMACY'
AND (effective_to IS NULL OR effective_to > DATE '2025-12-31');
INSERT INTO billiards_dws.cfg_index_parameters
(index_type, param_name, param_value, description, effective_from)
VALUES
('OS', 'ownership_gap_threshold', 0.150000, '主责与次席份额差阈值', DATE '2026-01-01'),
('ML', 'source_mode', 0.000000, '数据源模式0=manual_only,1=last_touch_fallback', DATE '2026-01-01')
ON CONFLICT (index_type, param_name, effective_from) DO UPDATE SET
param_value = EXCLUDED.param_value,
description = EXCLUDED.description,
updated_at = NOW();
COMMIT;

107
database/operations.py Normal file
View File

@@ -0,0 +1,107 @@
# -*- coding: utf-8 -*-
"""数据库批量操作"""
import psycopg2.extras
import re
class DatabaseOperations:
"""数据库批量操作封装"""
def __init__(self, connection):
self._connection = connection
self.conn = connection.conn
def batch_execute(self, sql: str, rows: list, page_size: int = 1000):
"""批量执行SQL"""
if not rows:
return
with self.conn.cursor() as c:
psycopg2.extras.execute_batch(c, sql, rows, page_size=page_size)
def batch_upsert_with_returning(self, sql: str, rows: list,
page_size: int = 1000) -> tuple:
"""批量UPSERT并返回插入/更新计数"""
if not rows:
return (0, 0)
use_returning = "RETURNING" in sql.upper()
# 不带 RETURNING直接批量执行即可
if not use_returning:
with self.conn.cursor() as c:
psycopg2.extras.execute_batch(c, sql, rows, page_size=page_size)
return (0, 0)
# 尝试向量化执行execute_values + fetch returning
vectorized_failed = False
m = re.search(r"VALUES\s*\((.*?)\)", sql, flags=re.IGNORECASE | re.DOTALL)
if m:
tpl = "(" + m.group(1) + ")"
base_sql = sql[:m.start()] + "VALUES %s" + sql[m.end():]
try:
with self.conn.cursor() as c:
ret = psycopg2.extras.execute_values(
c, base_sql, rows, template=tpl, page_size=page_size, fetch=True
)
if not ret:
return (0, 0)
inserted = sum(1 for rec in ret if self._is_inserted(rec))
return (inserted, len(ret) - inserted)
except Exception:
# 向量化失败后,事务通常处于 aborted 状态,需要先 rollback 才能继续执行。
vectorized_failed = True
if vectorized_failed:
try:
self.conn.rollback()
except Exception:
pass
# 回退:逐行执行
inserted = 0
updated = 0
with self.conn.cursor() as c:
for r in rows:
c.execute(sql, r)
try:
rec = c.fetchone()
except Exception:
rec = None
if self._is_inserted(rec):
inserted += 1
else:
updated += 1
return (inserted, updated)
@staticmethod
def _is_inserted(rec) -> bool:
"""判断是否为插入操作"""
if rec is None:
return False
if isinstance(rec, tuple):
return bool(rec[0])
if isinstance(rec, dict):
return bool(rec.get("inserted"))
return False
# --- 透传辅助方法 -------------------------------------------------
def commit(self):
"""提交事务(委托给底层连接)"""
self._connection.commit()
def rollback(self):
"""回滚事务(委托给底层连接)"""
self._connection.rollback()
def query(self, sql: str, args=None):
"""执行查询并返回结果"""
return self._connection.query(sql, args)
def execute(self, sql: str, args=None):
"""执行任意 SQL"""
self._connection.execute(sql, args)
def cursor(self):
"""暴露原生 cursor供特殊操作使用"""
return self.conn.cursor()

2050
database/schema_ODS_doc.sql Normal file

File diff suppressed because it is too large Load Diff

2083
database/schema_dwd_doc.sql Normal file

File diff suppressed because it is too large Load Diff

1710
database/schema_dws.sql Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,105 @@
-- 文件说明etl_admin 调度元数据 DDL独立文件便于初始化任务单独执行
-- 包含任务注册表、游标表、运行记录表;字段注释使用中文。
CREATE SCHEMA IF NOT EXISTS etl_admin;
CREATE TABLE IF NOT EXISTS etl_admin.etl_task (
task_id BIGSERIAL PRIMARY KEY,
task_code TEXT NOT NULL,
store_id BIGINT NOT NULL,
enabled BOOLEAN DEFAULT TRUE,
cursor_field TEXT,
window_minutes_default INT DEFAULT 30,
overlap_seconds INT DEFAULT 600,
page_size INT DEFAULT 200,
retry_max INT DEFAULT 3,
params JSONB DEFAULT '{}'::jsonb,
created_at TIMESTAMPTZ DEFAULT now(),
updated_at TIMESTAMPTZ DEFAULT now(),
UNIQUE (task_code, store_id)
);
COMMENT ON TABLE etl_admin.etl_task IS '任务注册表:调度依据的任务清单(与 task_registry 中的任务码对应)。';
COMMENT ON COLUMN etl_admin.etl_task.task_code IS '任务编码,需与代码中的任务码一致。';
COMMENT ON COLUMN etl_admin.etl_task.store_id IS '门店/租户粒度,区分多门店执行。';
COMMENT ON COLUMN etl_admin.etl_task.enabled IS '是否启用此任务。';
COMMENT ON COLUMN etl_admin.etl_task.cursor_field IS '增量游标字段名(可选)。';
COMMENT ON COLUMN etl_admin.etl_task.window_minutes_default IS '默认时间窗口(分钟)。';
COMMENT ON COLUMN etl_admin.etl_task.overlap_seconds IS '窗口重叠秒数,用于防止遗漏。';
COMMENT ON COLUMN etl_admin.etl_task.page_size IS '默认分页大小。';
COMMENT ON COLUMN etl_admin.etl_task.retry_max IS 'API重试次数上限。';
COMMENT ON COLUMN etl_admin.etl_task.params IS '任务级自定义参数 JSON。';
COMMENT ON COLUMN etl_admin.etl_task.created_at IS '创建时间。';
COMMENT ON COLUMN etl_admin.etl_task.updated_at IS '更新时间。';
CREATE TABLE IF NOT EXISTS etl_admin.etl_cursor (
cursor_id BIGSERIAL PRIMARY KEY,
task_id BIGINT NOT NULL REFERENCES etl_admin.etl_task(task_id) ON DELETE CASCADE,
store_id BIGINT NOT NULL,
last_start TIMESTAMPTZ,
last_end TIMESTAMPTZ,
last_id BIGINT,
last_run_id BIGINT,
extra JSONB DEFAULT '{}'::jsonb,
created_at TIMESTAMPTZ DEFAULT now(),
updated_at TIMESTAMPTZ DEFAULT now(),
UNIQUE (task_id, store_id)
);
COMMENT ON TABLE etl_admin.etl_cursor IS '任务游标表:记录每个任务/门店的增量窗口及最后 run。';
COMMENT ON COLUMN etl_admin.etl_cursor.task_id IS '关联 etl_task.task_id。';
COMMENT ON COLUMN etl_admin.etl_cursor.store_id IS '门店/租户粒度。';
COMMENT ON COLUMN etl_admin.etl_cursor.last_start IS '上次窗口开始时间(含重叠偏移)。';
COMMENT ON COLUMN etl_admin.etl_cursor.last_end IS '上次窗口结束时间。';
COMMENT ON COLUMN etl_admin.etl_cursor.last_id IS '上次处理的最大主键/游标值(可选)。';
COMMENT ON COLUMN etl_admin.etl_cursor.last_run_id IS '上次运行ID对应 etl_run.run_id。';
COMMENT ON COLUMN etl_admin.etl_cursor.extra IS '附加游标信息 JSON。';
COMMENT ON COLUMN etl_admin.etl_cursor.created_at IS '创建时间。';
COMMENT ON COLUMN etl_admin.etl_cursor.updated_at IS '更新时间。';
CREATE TABLE IF NOT EXISTS etl_admin.etl_run (
run_id BIGSERIAL PRIMARY KEY,
run_uuid TEXT NOT NULL,
task_id BIGINT NOT NULL REFERENCES etl_admin.etl_task(task_id) ON DELETE CASCADE,
store_id BIGINT NOT NULL,
status TEXT NOT NULL,
started_at TIMESTAMPTZ DEFAULT now(),
ended_at TIMESTAMPTZ,
window_start TIMESTAMPTZ,
window_end TIMESTAMPTZ,
window_minutes INT,
overlap_seconds INT,
fetched_count INT DEFAULT 0,
loaded_count INT DEFAULT 0,
updated_count INT DEFAULT 0,
skipped_count INT DEFAULT 0,
error_count INT DEFAULT 0,
unknown_fields INT DEFAULT 0,
export_dir TEXT,
log_path TEXT,
request_params JSONB DEFAULT '{}'::jsonb,
manifest JSONB DEFAULT '{}'::jsonb,
error_message TEXT,
extra JSONB DEFAULT '{}'::jsonb
);
COMMENT ON TABLE etl_admin.etl_run IS '运行记录表:记录每次任务执行的窗口、状态、计数与日志路径。';
COMMENT ON COLUMN etl_admin.etl_run.run_uuid IS '本次调度的唯一标识。';
COMMENT ON COLUMN etl_admin.etl_run.task_id IS '关联 etl_task.task_id。';
COMMENT ON COLUMN etl_admin.etl_run.store_id IS '门店/租户粒度。';
COMMENT ON COLUMN etl_admin.etl_run.status IS '运行状态SUCC/FAIL/PARTIAL 等)。';
COMMENT ON COLUMN etl_admin.etl_run.started_at IS '开始时间。';
COMMENT ON COLUMN etl_admin.etl_run.ended_at IS '结束时间。';
COMMENT ON COLUMN etl_admin.etl_run.window_start IS '本次窗口开始时间。';
COMMENT ON COLUMN etl_admin.etl_run.window_end IS '本次窗口结束时间。';
COMMENT ON COLUMN etl_admin.etl_run.window_minutes IS '窗口跨度(分钟)。';
COMMENT ON COLUMN etl_admin.etl_run.overlap_seconds IS '窗口重叠秒数。';
COMMENT ON COLUMN etl_admin.etl_run.fetched_count IS '抓取/读取的记录数。';
COMMENT ON COLUMN etl_admin.etl_run.loaded_count IS '插入的记录数。';
COMMENT ON COLUMN etl_admin.etl_run.updated_count IS '更新的记录数。';
COMMENT ON COLUMN etl_admin.etl_run.skipped_count IS '跳过的记录数。';
COMMENT ON COLUMN etl_admin.etl_run.error_count IS '错误记录数。';
COMMENT ON COLUMN etl_admin.etl_run.unknown_fields IS '未知字段计数(清洗阶段)。';
COMMENT ON COLUMN etl_admin.etl_run.export_dir IS '抓取/导出目录。';
COMMENT ON COLUMN etl_admin.etl_run.log_path IS '日志路径。';
COMMENT ON COLUMN etl_admin.etl_run.request_params IS '请求参数 JSON。';
COMMENT ON COLUMN etl_admin.etl_run.manifest IS '运行产出清单/统计 JSON。';
COMMENT ON COLUMN etl_admin.etl_run.error_message IS '错误信息(若失败)。';
COMMENT ON COLUMN etl_admin.etl_run.extra IS '附加字段,保留扩展。';

View File

@@ -0,0 +1,173 @@
SET client_encoding TO "UTF8";
-- ============================================================================
-- 校验性能索引ODS / DWD
-- ----------------------------------------------------------------------------
-- 用途:
-- 1) 加速校验查询(主键查找、窗口扫描、当前版本扫描)。
-- 2) 保持数据语义不变(仅添加索引 + ANALYZE不改写业务数据
--
-- 注意事项:
-- 1) 本脚本具有幂等性(`CREATE INDEX IF NOT EXISTS`)。
-- 2) 如有严格的在线 DDL 要求,请手动使用 `CREATE INDEX CONCURRENTLY`
-- 在维护安全模式下执行(不可在事务块内运行)。
-- ============================================================================
DO $$
DECLARE
rec RECORD;
pk_cols TEXT[];
pk_cols_sql TEXT;
idx_name TEXT;
BEGIN
FOR rec IN
SELECT table_name
FROM information_schema.tables
WHERE table_schema = 'billiards_ods'
AND table_type = 'BASE TABLE'
LOOP
IF EXISTS (
SELECT 1
FROM information_schema.columns
WHERE table_schema = 'billiards_ods'
AND table_name = rec.table_name
AND column_name = 'fetched_at'
) THEN
idx_name := left(format('idx_%s_vfy_fetched_at', rec.table_name), 50)
|| '_' || substr(md5(rec.table_name || '_vfy_fetched_at'), 1, 8);
EXECUTE format(
'CREATE INDEX IF NOT EXISTS %I ON billiards_ods.%I (fetched_at)',
idx_name, rec.table_name
);
SELECT array_agg(kcu.column_name ORDER BY kcu.ordinal_position)
INTO pk_cols
FROM information_schema.table_constraints tc
JOIN information_schema.key_column_usage kcu
ON tc.table_schema = kcu.table_schema
AND tc.table_name = kcu.table_name
AND tc.constraint_name = kcu.constraint_name
WHERE tc.table_schema = 'billiards_ods'
AND tc.table_name = rec.table_name
AND tc.constraint_type = 'PRIMARY KEY';
IF pk_cols IS NOT NULL AND coalesce(array_length(pk_cols, 1), 0) <= 3 THEN
SELECT string_agg(format('%I', c), ', ')
INTO pk_cols_sql
FROM unnest(pk_cols) AS c;
idx_name := left(format('idx_%s_vfy_fetched_pk', rec.table_name), 50)
|| '_' || substr(md5(rec.table_name || '_vfy_fetched_pk'), 1, 8);
EXECUTE format(
'CREATE INDEX IF NOT EXISTS %I ON billiards_ods.%I (fetched_at, %s)',
idx_name, rec.table_name, pk_cols_sql
);
END IF;
END IF;
END LOOP;
END
$$;
DO $$
DECLARE
rec RECORD;
tcol TEXT;
pk_cols TEXT[];
pk_cols_sql TEXT;
idx_name TEXT;
time_candidates TEXT[] := ARRAY[
'pay_time',
'create_time',
'start_use_time',
'scd2_start_time',
'calc_time',
'order_date',
'fetched_at'
];
BEGIN
FOR rec IN
SELECT table_name
FROM information_schema.tables
WHERE table_schema = 'billiards_dwd'
AND table_type = 'BASE TABLE'
LOOP
SELECT array_agg(kcu.column_name ORDER BY kcu.ordinal_position)
INTO pk_cols
FROM information_schema.table_constraints tc
JOIN information_schema.key_column_usage kcu
ON tc.table_schema = kcu.table_schema
AND tc.table_name = kcu.table_name
AND tc.constraint_name = kcu.constraint_name
WHERE tc.table_schema = 'billiards_dwd'
AND tc.table_name = rec.table_name
AND tc.constraint_type = 'PRIMARY KEY';
IF EXISTS (
SELECT 1
FROM information_schema.columns
WHERE table_schema = 'billiards_dwd'
AND table_name = rec.table_name
AND column_name = 'scd2_is_current'
) AND pk_cols IS NOT NULL
AND coalesce(array_length(pk_cols, 1), 0) BETWEEN 1 AND 4 THEN
SELECT string_agg(format('%I', c), ', ')
INTO pk_cols_sql
FROM unnest(pk_cols) AS c;
idx_name := left(format('idx_%s_vfy_pk_current', rec.table_name), 50)
|| '_' || substr(md5(rec.table_name || '_vfy_pk_current'), 1, 8);
EXECUTE format(
'CREATE INDEX IF NOT EXISTS %I ON billiards_dwd.%I (%s, scd2_is_current)',
idx_name, rec.table_name, pk_cols_sql
);
END IF;
FOREACH tcol IN ARRAY time_candidates
LOOP
IF EXISTS (
SELECT 1
FROM information_schema.columns
WHERE table_schema = 'billiards_dwd'
AND table_name = rec.table_name
AND column_name = tcol
) THEN
idx_name := left(format('idx_%s_vfy_%s', rec.table_name, tcol), 50)
|| '_' || substr(md5(rec.table_name || '_vfy_' || tcol), 1, 8);
EXECUTE format(
'CREATE INDEX IF NOT EXISTS %I ON billiards_dwd.%I (%I)',
idx_name, rec.table_name, tcol
);
IF pk_cols IS NOT NULL AND coalesce(array_length(pk_cols, 1), 0) <= 3 THEN
SELECT string_agg(format('%I', c), ', ')
INTO pk_cols_sql
FROM unnest(pk_cols) AS c;
idx_name := left(format('idx_%s_vfy_%s_pk', rec.table_name, tcol), 50)
|| '_' || substr(md5(rec.table_name || '_vfy_' || tcol || '_pk'), 1, 8);
EXECUTE format(
'CREATE INDEX IF NOT EXISTS %I ON billiards_dwd.%I (%I, %s)',
idx_name, rec.table_name, tcol, pk_cols_sql
);
END IF;
END IF;
END LOOP;
END LOOP;
END
$$;
DO $$
DECLARE
rec RECORD;
BEGIN
FOR rec IN
SELECT table_schema, table_name
FROM information_schema.tables
WHERE table_schema IN ('billiards_ods', 'billiards_dwd')
AND table_type = 'BASE TABLE'
LOOP
EXECUTE format('ANALYZE %I.%I', rec.table_schema, rec.table_name);
END LOOP;
END
$$;

View File

@@ -0,0 +1,389 @@
-- =============================================================================
-- DWS 配置表初始数据
-- 版本: v3.0
-- 创建日期: 2026-02-01
-- 描述: 初始化配置表数据,包含绩效档位、等级定价、奖金规则、区域分类、技能映射
-- =============================================================================
-- NOTE: 当前数据库 cfg_* 配置表为空(以数据库现状为准)
-- 下方默认配置仅作参考,已整体注释
/*
-- =============================================================================
-- 1. cfg_performance_tier - 绩效档位配置(含历史口径)
-- 数据来源DWS 数据库处理需求.md
-- 旧方案历史口径至2026-02-28:
-- 0档 淘汰压力 H <100 28 50% 3
-- 1档 及格档(重点激励) 100≤ H <130 18 40% 4
-- 2档 良好档(重点激励) 130≤ H <160 15 38% 4
-- 3档 优秀档 160≤ H <190 13 35% 5
-- 4档 卓越加速档(高端人才倾斜) 190≤ H <220 10 33% 6
-- 5档 冠军加速档(高端人才倾斜) H ≥220 8 30% 休假自由
-- 新方案2026-03-01起:
-- 0档 淘汰压力 H <120 28 50% 3
-- 1档 及格档 120≤ H <150 18 40% 4
-- 2档 良好档 150≤ H <180 13 35% 5
-- 3档 优秀档 180≤ H <210 10 30% 6
-- 4档 销冠竞争 H ≥210 8 25% 休假自由
-- =============================================================================
TRUNCATE TABLE billiards_dws.cfg_performance_tier RESTART IDENTITY CASCADE;
INSERT INTO billiards_dws.cfg_performance_tier (
tier_code, tier_name, tier_level,
min_hours, max_hours,
base_deduction, bonus_deduction_ratio, vacation_days, vacation_unlimited,
is_new_hire_tier, effective_from, effective_to, description
) VALUES
-- 旧方案至2026-02-28
-- 0档 淘汰压力: H<100, 专业课抽成28元/小时, 打赏课抽成50%, 休假3天
('T0', '0档-淘汰压力', 0,
0, 100,
28.00, 0.50, 3, FALSE,
FALSE, '2000-01-01', '2026-02-28',
'旧方案H<100专业课抽成28元/小时打赏课抽成50%休假3天'),
-- 1档 及格档: 100≤H<130, 专业课抽成18元/小时, 打赏课抽成40%, 休假4天
('T1', '1档-及格档', 1,
100, 130,
18.00, 0.40, 4, FALSE,
FALSE, '2000-01-01', '2026-02-28',
'旧方案100≤H<130专业课抽成18元/小时打赏课抽成40%休假4天'),
-- 2档 良好档: 130≤H<160, 专业课抽成15元/小时, 打赏课抽成38%, 休假4天
('T2', '2档-良好档', 2,
130, 160,
15.00, 0.38, 4, FALSE,
FALSE, '2000-01-01', '2026-02-28',
'旧方案130≤H<160专业课抽成15元/小时打赏课抽成38%休假4天'),
-- 3档 优秀档: 160≤H<190, 专业课抽成13元/小时, 打赏课抽成35%, 休假5天
('T3', '3档-优秀档', 3,
160, 190,
13.00, 0.35, 5, FALSE,
FALSE, '2000-01-01', '2026-02-28',
'旧方案160≤H<190专业课抽成13元/小时打赏课抽成35%休假5天'),
-- 4档 卓越加速档: 190≤H<220, 专业课抽成10元/小时, 打赏课抽成33%, 休假6天
('T4', '4档-卓越加速档', 4,
190, 220,
10.00, 0.33, 6, FALSE,
FALSE, '2000-01-01', '2026-02-28',
'旧方案190≤H<220专业课抽成10元/小时打赏课抽成33%休假6天'),
-- 5档 冠军加速档: H≥220, 专业课抽成8元/小时, 打赏课抽成30%, 休假自由
('T5', '5档-冠军加速档', 5,
220, NULL,
8.00, 0.30, 0, TRUE,
FALSE, '2000-01-01', '2026-02-28',
'旧方案H≥220专业课抽成8元/小时打赏课抽成30%,休假自由'),
-- 新方案2026-03-01起
-- 0档 淘汰压力: H<120, 专业课抽成28元/小时, 打赏课抽成50%, 休假3天
('T0', '0档-淘汰压力', 0,
0, 120,
28.00, 0.50, 3, FALSE,
FALSE, '2026-03-01', '9999-12-31',
'新方案H<120专业课抽成28元/小时打赏课抽成50%休假3天'),
-- 1档 及格档: 120≤H<150, 专业课抽成18元/小时, 打赏课抽成40%, 休假4天
('T1', '1档-及格档', 1,
120, 150,
18.00, 0.40, 4, FALSE,
FALSE, '2026-03-01', '9999-12-31',
'新方案120≤H<150专业课抽成18元/小时打赏课抽成40%休假4天'),
-- 2档 良好档: 150≤H<180, 专业课抽成13元/小时, 打赏课抽成35%, 休假5天
('T2', '2档-良好档', 2,
150, 180,
13.00, 0.35, 5, FALSE,
FALSE, '2026-03-01', '9999-12-31',
'新方案150≤H<180专业课抽成13元/小时打赏课抽成35%休假5天'),
-- 3档 优秀档: 180≤H<210, 专业课抽成10元/小时, 打赏课抽成30%, 休假6天
('T3', '3档-优秀档', 3,
180, 210,
10.00, 0.30, 6, FALSE,
FALSE, '2026-03-01', '9999-12-31',
'新方案180≤H<210专业课抽成10元/小时打赏课抽成30%休假6天'),
-- 4档 销冠竞争: H≥210, 专业课抽成8元/小时, 打赏课抽成25%, 休假自由
('T4', '4档-销冠竞争', 4,
210, NULL,
8.00, 0.25, 0, TRUE,
FALSE, '2026-03-01', '9999-12-31',
'新方案H≥210专业课抽成8元/小时打赏课抽成25%,休假自由');
-- =============================================================================
-- 2. cfg_assistant_level_price - 助教等级定价
-- 说明:
-- - level_code 来自 dim_assistant.assistant_level
-- - 8=助教管理, 10=初级, 20=中级, 30=高级, 40=星级
-- - 价格为客户支付价格(对外价格),助教收入=客户支付-档位抽成
-- - 包厢课基础课统一138元/小时(不随等级变化)
-- - 数据来源DWS 数据库处理需求.md
-- =============================================================================
TRUNCATE TABLE billiards_dws.cfg_assistant_level_price RESTART IDENTITY CASCADE;
INSERT INTO billiards_dws.cfg_assistant_level_price (
level_code, level_name,
base_course_price, bonus_course_price,
effective_from, effective_to, description
) VALUES
-- 初级助教基础课对客户收费98元/小时
(10, '初级',
98.00, 190.00,
'2000-01-01', '9999-12-31',
'初级助教基础课98元/附加课190元/时(客户支付价格)'),
-- 中级助教基础课对客户收费108元/小时
(20, '中级',
108.00, 190.00,
'2000-01-01', '9999-12-31',
'中级助教基础课108元/附加课190元/时(客户支付价格)'),
-- 高级助教基础课对客户收费118元/小时
(30, '高级',
118.00, 190.00,
'2000-01-01', '9999-12-31',
'高级助教基础课118元/附加课190元/时(客户支付价格)'),
-- 星级助教基础课对客户收费138元/小时
(40, '星级',
138.00, 190.00,
'2000-01-01', '9999-12-31',
'星级助教基础课138元/附加课190元/时(客户支付价格)'),
-- 助教管理level_code=8通常不参与客户服务计费此处设置默认值
(8, '助教管理',
98.00, 190.00,
'2000-01-01', '9999-12-31',
'助教管理:不参与客户服务计费,默认按初级价格');
-- =============================================================================
-- 3. cfg_bonus_rules - 奖金规则配置
-- 说明:
-- - SPRINT: 冲刺奖金历史口径至2026-02-28
-- - TOP_RANK: Top3排名奖金2026-03-01起
-- =============================================================================
TRUNCATE TABLE billiards_dws.cfg_bonus_rules RESTART IDENTITY CASCADE;
INSERT INTO billiards_dws.cfg_bonus_rules (
rule_type, rule_code, rule_name,
threshold_hours, rank_position, bonus_amount,
is_cumulative, priority,
effective_from, effective_to, description
) VALUES
-- 冲刺奖金: H>=190 得300元历史口径
('SPRINT', 'SPRINT_190', '冲刺奖金190',
190.00, NULL, 300.00,
FALSE, 1,
'2000-01-01', '2026-02-28',
'历史口径业绩≥190小时获得300元冲刺奖金不累计'),
-- 冲刺奖金: H>=220 得800元历史口径优先级更高覆盖190档
('SPRINT', 'SPRINT_220', '冲刺奖金220',
220.00, NULL, 800.00,
FALSE, 2,
'2000-01-01', '2026-02-28',
'历史口径业绩≥220小时获得800元冲刺奖金覆盖190档'),
-- Top1排名奖金: 1000元2026-03-01起
('TOP_RANK', 'TOP_1', 'Top1排名奖金',
NULL, 1, 1000.00,
FALSE, 0,
'2026-03-01', '9999-12-31',
'月度排名第一获得1000元并列都算'),
-- Top2排名奖金: 600元2026-03-01起
('TOP_RANK', 'TOP_2', 'Top2排名奖金',
NULL, 2, 600.00,
FALSE, 0,
'2026-03-01', '9999-12-31',
'月度排名第二获得600元并列都算'),
-- Top3排名奖金: 400元2026-03-01起
('TOP_RANK', 'TOP_3', 'Top3排名奖金',
NULL, 3, 400.00,
FALSE, 0,
'2026-03-01', '9999-12-31',
'月度排名第三获得400元并列都算');
-- =============================================================================
-- 4. cfg_area_category - 台区分类映射
-- 说明:
-- - 将 dim_table.site_table_area_name 映射到财务报表区域分类
-- - 映射规则: 精确匹配 > 模糊匹配 > 默认兜底
-- - 数据来源: BD_manual_dim_table.md 中的 site_table_area_name 实际分布
-- 分类设计:
-- - BILLIARD: 台球散台A区/B区/C区/TV台
-- - BILLIARD_VIP: 台球VIP包厢
-- - SNOOKER: 斯诺克区
-- - MAHJONG: 麻将区
-- - KTV: K歌/KTV
-- - SPECIAL: 特殊(补时长等)
-- - OTHER: 其他
-- =============================================================================
TRUNCATE TABLE billiards_dws.cfg_area_category RESTART IDENTITY CASCADE;
INSERT INTO billiards_dws.cfg_area_category (
source_area_name, category_code, category_name,
match_type, match_priority, is_active, description
) VALUES
-- ============ 台球散台区(精确匹配)============
('A区', 'BILLIARD', '台球散台',
'EXACT', 10, TRUE, '台球散台A区18台- 中八/追分'),
('B区', 'BILLIARD', '台球散台',
'EXACT', 10, TRUE, '台球散台B区15台- 中八/追分'),
('C区', 'BILLIARD', '台球散台',
'EXACT', 10, TRUE, '台球散台C区6台- 中八/追分'),
('TV台', 'BILLIARD', '台球散台',
'EXACT', 10, TRUE, '台球散台TV台1台- 中八/追分'),
-- ============ 台球VIP包厢精确匹配============
('VIP包厢', 'BILLIARD_VIP', '台球VIP',
'EXACT', 10, TRUE, '台球VIPVIP包厢4台- V1-V4中八, V5斯诺克'),
-- ============ 斯诺克区(精确匹配)============
('斯诺克区', 'SNOOKER', '斯诺克',
'EXACT', 10, TRUE, '斯诺克斯诺克区4台'),
-- ============ 麻将区(精确匹配)============
('麻将房', 'MAHJONG', '麻将棋牌',
'EXACT', 10, TRUE, '麻将棋牌麻将房5台'),
('M7', 'MAHJONG', '麻将棋牌',
'EXACT', 10, TRUE, '麻将棋牌M72台'),
('M8', 'MAHJONG', '麻将棋牌',
'EXACT', 10, TRUE, '麻将棋牌M81台'),
('666', 'MAHJONG', '麻将棋牌',
'EXACT', 10, TRUE, '麻将棋牌6662台'),
('发财', 'MAHJONG', '麻将棋牌',
'EXACT', 10, TRUE, '麻将棋牌发财1台'),
-- ============ KTV/K包精确匹配============
('K包', 'KTV', 'K歌娱乐',
'EXACT', 10, TRUE, 'K歌娱乐K包4台'),
('k包活动区', 'KTV', 'K歌娱乐',
'EXACT', 10, TRUE, 'K歌娱乐k包活动区2台'),
('幸会158', 'KTV', 'K歌娱乐',
'EXACT', 10, TRUE, 'K歌娱乐幸会1582台'),
-- ============ 特殊区域(精确匹配)============
('补时长', 'SPECIAL', '补时长',
'EXACT', 10, TRUE, '特殊补时长7台- 用于时长补录'),
-- ============ 模糊匹配规则(优先级较低)============
('%VIP%', 'BILLIARD_VIP', '台球VIP',
'LIKE', 50, TRUE, '模糊匹配:包含"VIP"的区域'),
('%斯诺克%', 'SNOOKER', '斯诺克',
'LIKE', 50, TRUE, '模糊匹配:包含"斯诺克"的区域'),
('%麻将%', 'MAHJONG', '麻将棋牌',
'LIKE', 50, TRUE, '模糊匹配:包含"麻将"的区域'),
('%K包%', 'KTV', 'K歌娱乐',
'LIKE', 50, TRUE, '模糊匹配:包含"K包"的区域'),
('%KTV%', 'KTV', 'K歌娱乐',
'LIKE', 50, TRUE, '模糊匹配:包含"KTV"的区域'),
-- ============ 默认兜底(优先级最低)============
('DEFAULT', 'OTHER', '其他',
'DEFAULT', 999, TRUE, '兜底规则:无法匹配的区域归入其他');
-- =============================================================================
-- 5. cfg_skill_type - 技能→课程类型映射
-- 说明:
-- - 将 skill_id 映射到课程类型
-- - 基础课/陪打: skill_id = 2791903611396869
-- - 附加课/超休: skill_id = 2807440316432197
-- - 避免依赖 skill_name 文本匹配
-- =============================================================================
TRUNCATE TABLE billiards_dws.cfg_skill_type RESTART IDENTITY CASCADE;
INSERT INTO billiards_dws.cfg_skill_type (
skill_id, skill_name,
course_type_code, course_type_name,
is_active, description
) VALUES
-- 基础课/陪打
(2791903611396869, '台球基础陪打',
'BASE', '基础课',
TRUE, '基础课:陪打服务,按助教等级计价'),
-- 附加课/超休
(2807440316432197, '台球超休服务',
'BONUS', '附加课',
TRUE, '附加课:超休/激励课固定190元/小时'),
-- 包厢课(如有)
(2807440316432198, '包厢服务',
'BASE', '基础课',
TRUE, '包厢服务归入基础课统计统一按138元/小时计价');
-- =============================================================================
-- 6. 优惠类型配置(用于财务优惠明细分析)
-- 说明: 定义各类优惠的代码和名称,便于后续分析
-- =============================================================================
-- 此配置作为代码常量使用,不单独建表
-- GROUPBUY - 团购优惠
-- VIP - 会员折扣
-- GIFT_CARD - 赠送卡抵扣
-- MANUAL - 手动调整
-- ROUNDING - 抹零
-- BIG_CUSTOMER - 大客户优惠(待抽样分析确认)
-- OTHER - 其他优惠
-- =============================================================================
-- 7. 支出类型配置用于Excel导入
-- 说明: 定义各类支出的代码和名称
-- =============================================================================
-- 此配置作为代码常量使用,不单独建表
-- RENT - 房租
-- UTILITY - 水电费
-- PROPERTY - 物业费
-- SALARY - 工资
-- REIMBURSE - 报销
-- PLATFORM_FEE - 平台服务费
-- OTHER - 其他支出
-- =============================================================================
-- 8. 平台类型配置用于Excel导入
-- 说明: 定义各平台的代码和名称
-- =============================================================================
-- 此配置作为代码常量使用,不单独建表
-- MEITUAN - 美团
-- DOUYIN - 抖音
-- DIANPING - 大众点评
-- OTHER - 其他平台
-- =============================================================================
-- 验证数据插入
-- =============================================================================
DO $$
DECLARE
v_tier_count INTEGER;
v_price_count INTEGER;
v_bonus_count INTEGER;
v_area_count INTEGER;
v_skill_count INTEGER;
BEGIN
SELECT COUNT(*) INTO v_tier_count FROM billiards_dws.cfg_performance_tier;
SELECT COUNT(*) INTO v_price_count FROM billiards_dws.cfg_assistant_level_price;
SELECT COUNT(*) INTO v_bonus_count FROM billiards_dws.cfg_bonus_rules;
SELECT COUNT(*) INTO v_area_count FROM billiards_dws.cfg_area_category;
SELECT COUNT(*) INTO v_skill_count FROM billiards_dws.cfg_skill_type;
RAISE NOTICE '配置数据初始化完成:';
RAISE NOTICE ' - cfg_performance_tier: % 条', v_tier_count;
RAISE NOTICE ' - cfg_assistant_level_price: % 条', v_price_count;
RAISE NOTICE ' - cfg_bonus_rules: % 条', v_bonus_count;
RAISE NOTICE ' - cfg_area_category: % 条', v_area_count;
RAISE NOTICE ' - cfg_skill_type: % 条', v_skill_count;
END;
$$;
*/

View File

@@ -0,0 +1,226 @@
-- =============================================================================
-- 指数算法参数初始化脚本(与数据库现状对齐)
-- 版本: v2.0
-- 创建日期: 2026-02-07
-- 描述: 对齐 RS / OS / MS / ML / NCI / WBI 指数参数快照(兼容保留 RECALL / INTIMACY
-- =============================================================================
-- 清空旧数据(如需重置)
-- DELETE FROM billiards_dws.cfg_index_parameters
-- WHERE index_type IN ('RS', 'OS', 'MS', 'ML', 'NCI', 'WBI', 'RECALL', 'INTIMACY');
INSERT INTO billiards_dws.cfg_index_parameters
(index_type, param_name, param_value, description, effective_from)
VALUES
('INTIMACY', 'amount_base', 500.000000, 'amount compression base', DATE '2026-02-06'),
('INTIMACY', 'burst_gamma', 0.600000, 'burst gamma', DATE '2026-02-06'),
('INTIMACY', 'ewma_alpha', 0.200000, 'EWMA alpha', DATE '2026-02-06'),
('INTIMACY', 'halflife_last', 10.000000, 'last-contact half-life (days)', DATE '2026-02-06'),
('INTIMACY', 'halflife_long', 30.000000, 'long-term burst half-life (days)', DATE '2026-02-06'),
('INTIMACY', 'halflife_recharge', 21.000000, 'recharge half-life (days)', DATE '2026-02-06'),
('INTIMACY', 'halflife_session', 14.000000, 'session half-life (days)', DATE '2026-02-06'),
('INTIMACY', 'halflife_short', 7.000000, 'short-term burst half-life (days)', DATE '2026-02-06'),
('INTIMACY', 'incentive_weight', 1.500000, 'incentive multiplier', DATE '2026-02-06'),
('INTIMACY', 'lookback_days', 60.000000, 'lookback window (days)', DATE '2026-02-06'),
('INTIMACY', 'percentile_lower', 5.000000, 'lower percentile', DATE '2026-02-06'),
('INTIMACY', 'percentile_upper', 95.000000, 'upper percentile', DATE '2026-02-06'),
('INTIMACY', 'recharge_attribute_hours', 1.000000, 'recharge attribution window (hours)', DATE '2026-02-06'),
('INTIMACY', 'session_merge_hours', 4.000000, 'session merge gap (hours)', DATE '2026-02-06'),
('INTIMACY', 'weight_duration', 0.500000, 'duration weight', DATE '2026-02-06'),
('INTIMACY', 'weight_frequency', 2.000000, 'frequency weight', DATE '2026-02-06'),
('INTIMACY', 'weight_recency', 1.500000, 'recency weight', DATE '2026-02-06'),
('INTIMACY', 'weight_recharge', 2.000000, 'recharge weight', DATE '2026-02-06'),
('NCI', 'active_new_penalty', 0.200000, 'active-new suppression multiplier', DATE '2026-02-06'),
('NCI', 'active_new_recency_days', 7.000000, 'active-new recency window (days)', DATE '2026-02-06'),
('NCI', 'active_new_visit_threshold_14d', 2.000000, 'active-new threshold in 14d visits', DATE '2026-02-06'),
('NCI', 'amount_base_M0', 300.000000, 'spend log base M0', DATE '2026-02-06'),
('NCI', 'balance_base_B0', 500.000000, 'balance log base B0', DATE '2026-02-06'),
('NCI', 'compression_mode', 0.000000, 'compression mode', DATE '2026-02-06'),
('NCI', 'enable_stop_high_balance_exception', 0.000000, 'enable high-balance STOP exception', DATE '2026-02-06'),
('NCI', 'ewma_alpha', 0.200000, 'EWMA alpha', DATE '2026-02-06'),
('NCI', 'h_recharge', 7.000000, 'recharge decay half-life (days)', DATE '2026-02-06'),
('NCI', 'high_balance_threshold', 1000.000000, 'high-balance threshold', DATE '2026-02-06'),
('NCI', 'lookback_days_recency', 60.000000, 'recency lookback window (days)', DATE '2026-02-06'),
('NCI', 'new_days_threshold', 30.000000, 'new member days threshold', DATE '2026-02-06'),
('NCI', 'new_recharge_max_visits', 10.000000, 'max visits for new-recharge grouping', DATE '2026-02-06'),
('NCI', 'new_visit_threshold', 2.000000, 'new member visit threshold', DATE '2026-02-06'),
('NCI', 'no_touch_days_new', 3.000000, 'no-touch threshold (days)', DATE '2026-02-06'),
('NCI', 'percentile_lower', 5.000000, 'lower percentile', DATE '2026-02-06'),
('NCI', 'percentile_upper', 95.000000, 'upper percentile', DATE '2026-02-06'),
('NCI', 'recharge_recent_days', 14.000000, 'recent recharge window (days)', DATE '2026-02-06'),
('NCI', 'salvage_end', 60.000000, 'salvage decay end day', DATE '2026-02-06'),
('NCI', 'salvage_start', 30.000000, 'salvage decay start day', DATE '2026-02-06'),
('NCI', 't2_target_days', 7.000000, 'second-visit target window (days)', DATE '2026-02-06'),
('NCI', 'use_smoothing', 1.000000, 'enable smoothing', DATE '2026-02-06'),
('NCI', 'value_w_bal', 0.800000, 'value weight for balance', DATE '2026-02-06'),
('NCI', 'value_w_spend', 1.000000, 'value weight for spend', DATE '2026-02-06'),
('NCI', 'visit_lookback_days', 180.000000, 'visit history lookback (days)', DATE '2026-02-06'),
('NCI', 'w_need', 1.600000, 'need weight', DATE '2026-02-06'),
('NCI', 'w_re', 0.800000, 'recharge pressure weight', DATE '2026-02-06'),
('NCI', 'w_value', 1.000000, 'value weight', DATE '2026-02-06'),
('NCI', 'w_welcome', 1.000000, 'welcome-stage weight', DATE '2026-02-06'),
('NCI', 'welcome_window_days', 3.000000, 'welcome outreach window for first touch (days)', DATE '2026-02-06'),
('RECALL', 'ewma_alpha', 0.200000, 'EWMA alpha', DATE '2026-02-06'),
('RECALL', 'halflife_new', 7.000000, 'new member half-life (days)', DATE '2026-02-06'),
('RECALL', 'halflife_recharge', 10.000000, 'recharge half-life (days)', DATE '2026-02-06'),
('RECALL', 'lookback_days', 60.000000, 'recall lookback window (days)', DATE '2026-02-06'),
('RECALL', 'percentile_lower', 5.000000, 'lower percentile', DATE '2026-02-06'),
('RECALL', 'percentile_upper', 95.000000, 'upper percentile', DATE '2026-02-06'),
('RECALL', 'sigma_min', 2.000000, 'minimum sigma for volatility', DATE '2026-02-06'),
('RECALL', 'weight_hot', 1.000000, 'hotness weight', DATE '2026-02-06'),
('RECALL', 'weight_new', 1.000000, 'new member weight', DATE '2026-02-06'),
('RECALL', 'weight_overdue', 3.000000, 'overdue weight', DATE '2026-02-06'),
('RECALL', 'weight_recharge', 1.000000, 'recharge weight', DATE '2026-02-06'),
('WBI', 'amount_base_M0', 300.000000, 'spend log base M0', DATE '2026-02-06'),
('WBI', 'balance_base_B0', 500.000000, 'balance log base B0', DATE '2026-02-06'),
('WBI', 'compression_mode', 0.000000, 'compression mode', DATE '2026-02-06'),
('WBI', 'enable_stop_high_balance_exception', 0.000000, 'enable high-balance STOP exception', DATE '2026-02-06'),
('WBI', 'ewma_alpha', 0.200000, 'EWMA alpha', DATE '2026-02-06'),
('WBI', 'h_recharge', 7.000000, 'recharge decay half-life (days)', DATE '2026-02-06'),
('WBI', 'high_balance_threshold', 1000.000000, 'high-balance threshold', DATE '2026-02-06'),
('WBI', 'lookback_days_recency', 60.000000, 'recency lookback window (days)', DATE '2026-02-06'),
('WBI', 'new_days_threshold', 30.000000, 'new member days threshold', DATE '2026-02-06'),
('WBI', 'new_recharge_max_visits', 10.000000, 'max visits for new-recharge grouping', DATE '2026-02-06'),
('WBI', 'new_visit_threshold', 2.000000, 'new member visit threshold', DATE '2026-02-06'),
('WBI', 'overdue_alpha', 2.000000, 'overdue fallback alpha', DATE '2026-02-06'),
('WBI', 'overdue_weight_blend_min_samples', 8.000000, 'minimum samples to fully trust weighted overdue CDF', DATE '2026-02-07'),
('WBI', 'overdue_weight_halflife_days', 30.000000, 'overdue weighted-CDF interval half-life (days)', DATE '2026-02-07'),
('WBI', 'percentile_lower', 5.000000, 'lower percentile', DATE '2026-02-06'),
('WBI', 'percentile_upper', 95.000000, 'upper percentile', DATE '2026-02-06'),
('WBI', 'recency_gate_days', 14.000000, 'recency suppression gate center (days)', DATE '2026-02-06'),
('WBI', 'recency_gate_slope_days', 3.000000, 'recency suppression slope (days)', DATE '2026-02-06'),
('WBI', 'recency_hard_floor_days', 14.000000, 'hard floor for winback recency (days)', DATE '2026-02-06'),
('WBI', 'recharge_recent_days', 14.000000, 'recent recharge window (days)', DATE '2026-02-06'),
('WBI', 'use_smoothing', 1.000000, 'enable smoothing', DATE '2026-02-06'),
('WBI', 'value_w_bal', 1.000000, 'value weight for balance', DATE '2026-02-06'),
('WBI', 'value_w_spend', 1.000000, 'value weight for spend', DATE '2026-02-06'),
('WBI', 'visit_lookback_days', 180.000000, 'visit history lookback (days)', DATE '2026-02-06'),
('WBI', 'w_drop', 1.000000, 'drop weight', DATE '2026-02-06'),
('WBI', 'w_over', 2.000000, 'overdue weight', DATE '2026-02-06'),
('WBI', 'w_re', 0.400000, 'recharge pressure weight', DATE '2026-02-06'),
('WBI', 'w_value', 1.200000, 'value weight', DATE '2026-02-06')
ON CONFLICT (index_type, param_name, effective_from) DO UPDATE SET
param_value = EXCLUDED.param_value,
description = EXCLUDED.description,
updated_at = NOW();
-- =============================================================================
-- 关系指数RS/OS/MS/ML参数
-- 生效时间:北京时间 2026-01-01按数据库日期管理
-- =============================================================================
-- 下线旧版 INTIMACY 参数(兼容保留历史记录)
UPDATE billiards_dws.cfg_index_parameters
SET effective_to = DATE '2025-12-31',
updated_at = NOW()
WHERE index_type = 'INTIMACY'
AND (effective_to IS NULL OR effective_to > DATE '2025-12-31');
INSERT INTO billiards_dws.cfg_index_parameters
(index_type, param_name, param_value, description, effective_from)
VALUES
-- RS关系强度
('RS', 'lookback_days', 60.000000, '服务行为回溯窗口(天)', DATE '2026-01-01'),
('RS', 'session_merge_hours', 4.000000, '会话合并阈值(小时)', DATE '2026-01-01'),
('RS', 'incentive_weight', 1.500000, '激励课权重', DATE '2026-01-01'),
('RS', 'halflife_session', 14.000000, '会话半衰期(天)', DATE '2026-01-01'),
('RS', 'halflife_last', 10.000000, '最近一次服务半衰期(天)', DATE '2026-01-01'),
('RS', 'weight_f', 1.000000, '频次项权重', DATE '2026-01-01'),
('RS', 'weight_d', 0.700000, '时长项权重', DATE '2026-01-01'),
('RS', 'gate_alpha', 0.600000, '最近服务门控指数', DATE '2026-01-01'),
('RS', 'percentile_lower', 5.000000, '展示分下分位', DATE '2026-01-01'),
('RS', 'percentile_upper', 95.000000, '展示分上分位', DATE '2026-01-01'),
('RS', 'compression_mode', 1.000000, '压缩模式0=none,1=log1p,2=asinh', DATE '2026-01-01'),
('RS', 'use_smoothing', 1.000000, '是否启用分位平滑', DATE '2026-01-01'),
('RS', 'ewma_alpha', 0.200000, 'EWMA平滑系数', DATE '2026-01-01'),
-- OS归属份额
('OS', 'min_rs_raw_for_ownership', 0.050000, '参与归属计算的最小RS_raw', DATE '2026-01-01'),
('OS', 'min_total_rs_raw', 0.100000, '形成稳定归属的最小sum_rs', DATE '2026-01-01'),
('OS', 'ownership_main_threshold', 0.600000, '主责阈值', DATE '2026-01-01'),
('OS', 'ownership_comanage_threshold', 0.350000, '共管阈值', DATE '2026-01-01'),
('OS', 'ownership_gap_threshold', 0.150000, '主责与次席份额差阈值', DATE '2026-01-01'),
('OS', 'eps', 0.000001, '数值稳定项', DATE '2026-01-01'),
-- MS升温动量
('MS', 'lookback_days', 60.000000, '服务行为回溯窗口(天)', DATE '2026-01-01'),
('MS', 'session_merge_hours', 4.000000, '会话合并阈值(小时)', DATE '2026-01-01'),
('MS', 'incentive_weight', 1.500000, '激励课权重', DATE '2026-01-01'),
('MS', 'halflife_short', 7.000000, '短期半衰期(天)', DATE '2026-01-01'),
('MS', 'halflife_long', 30.000000, '长期半衰期(天)', DATE '2026-01-01'),
('MS', 'eps', 0.000001, '数值稳定项', DATE '2026-01-01'),
('MS', 'percentile_lower', 5.000000, '展示分下分位', DATE '2026-01-01'),
('MS', 'percentile_upper', 95.000000, '展示分上分位', DATE '2026-01-01'),
('MS', 'compression_mode', 1.000000, '压缩模式0=none,1=log1p,2=asinh', DATE '2026-01-01'),
('MS', 'use_smoothing', 1.000000, '是否启用分位平滑', DATE '2026-01-01'),
('MS', 'ewma_alpha', 0.200000, 'EWMA平滑系数', DATE '2026-01-01'),
-- ML付费关联
('ML', 'lookback_days', 60.000000, '充值行为回溯窗口(天)', DATE '2026-01-01'),
('ML', 'source_mode', 0.000000, '数据源模式0=manual_only,1=last_touch_fallback', DATE '2026-01-01'),
('ML', 'recharge_attribute_hours', 1.000000, 'last-touch备用归因窗口小时', DATE '2026-01-01'),
('ML', 'amount_base', 500.000000, '金额压缩基准', DATE '2026-01-01'),
('ML', 'halflife_recharge', 21.000000, '充值半衰期(天)', DATE '2026-01-01'),
('ML', 'percentile_lower', 5.000000, '展示分下分位', DATE '2026-01-01'),
('ML', 'percentile_upper', 95.000000, '展示分上分位', DATE '2026-01-01'),
('ML', 'compression_mode', 1.000000, '压缩模式0=none,1=log1p,2=asinh', DATE '2026-01-01'),
('ML', 'use_smoothing', 1.000000, '是否启用分位平滑', DATE '2026-01-01'),
('ML', 'ewma_alpha', 0.200000, 'EWMA平滑系数', DATE '2026-01-01')
ON CONFLICT (index_type, param_name, effective_from) DO UPDATE SET
param_value = EXCLUDED.param_value,
description = EXCLUDED.description,
updated_at = NOW();
-- =============================================================================
-- 验证
-- =============================================================================
DO $$
DECLARE
rs_count INTEGER;
os_count INTEGER;
ms_count INTEGER;
ml_count INTEGER;
nci_count INTEGER;
wbi_count INTEGER;
BEGIN
SELECT COUNT(*) INTO rs_count
FROM billiards_dws.cfg_index_parameters
WHERE index_type = 'RS';
SELECT COUNT(*) INTO os_count
FROM billiards_dws.cfg_index_parameters
WHERE index_type = 'OS';
SELECT COUNT(*) INTO ms_count
FROM billiards_dws.cfg_index_parameters
WHERE index_type = 'MS';
SELECT COUNT(*) INTO ml_count
FROM billiards_dws.cfg_index_parameters
WHERE index_type = 'ML';
SELECT COUNT(*) INTO nci_count
FROM billiards_dws.cfg_index_parameters
WHERE index_type = 'NCI';
SELECT COUNT(*) INTO wbi_count
FROM billiards_dws.cfg_index_parameters
WHERE index_type = 'WBI';
RAISE NOTICE 'RS 参数数量: %', rs_count;
RAISE NOTICE 'OS 参数数量: %', os_count;
RAISE NOTICE 'MS 参数数量: %', ms_count;
RAISE NOTICE 'ML 参数数量: %', ml_count;
RAISE NOTICE '新客转化参数数量: %', nci_count;
RAISE NOTICE '唤回指数参数数量: %', wbi_count;
END $$;
SELECT
index_type,
param_name,
param_value,
description,
effective_from
FROM billiards_dws.cfg_index_parameters
ORDER BY index_type, param_name, effective_from;

View File

@@ -0,0 +1,41 @@
-- 将新的 ODS 任务注册到 etl_admin.etl_task按需替换 store_id
-- 使用方式(示例):
-- psql "$PG_DSN" -f etl_billiards/database/seed_ods_tasks.sql
-- 或在 psql 中直接执行本文件内容。
WITH target_store AS (
SELECT 2790685415443269::bigint AS store_id -- TODO: 替换为实际 store_id
),
task_codes AS (
SELECT unnest(ARRAY[
-- Must match tasks/ods_tasks.py (ENABLED_ODS_CODES)
'ODS_ASSISTANT_ACCOUNT',
'ODS_ASSISTANT_LEDGER',
'ODS_ASSISTANT_ABOLISH',
'ODS_SETTLEMENT_RECORDS',
'ODS_TABLE_USE',
'ODS_PAYMENT',
'ODS_REFUND',
'ODS_PLATFORM_COUPON',
'ODS_MEMBER',
'ODS_MEMBER_CARD',
'ODS_MEMBER_BALANCE',
'ODS_RECHARGE_SETTLE',
'ODS_GROUP_PACKAGE',
'ODS_GROUP_BUY_REDEMPTION',
'ODS_INVENTORY_STOCK',
'ODS_INVENTORY_CHANGE',
'ODS_TABLES',
'ODS_GOODS_CATEGORY',
'ODS_STORE_GOODS',
'ODS_STORE_GOODS_SALES',
'ODS_TABLE_FEE_DISCOUNT',
'ODS_TENANT_GOODS',
'ODS_SETTLEMENT_TICKET'
]) AS task_code
)
INSERT INTO etl_admin.etl_task (task_code, store_id, enabled)
SELECT t.task_code, s.store_id, TRUE
FROM task_codes t CROSS JOIN target_store s
ON CONFLICT (task_code, store_id) DO UPDATE
SET enabled = EXCLUDED.enabled;

View File

@@ -0,0 +1,54 @@
-- Seed scheduler-compatible tasks into etl_admin.etl_task.
--
-- Notes:
-- - These task_code values must match orchestration/task_registry.py.
-- - ODS_* tasks are intentionally excluded here because they don't follow the
-- BaseTask(cursor_data) scheduler interface in this repo version.
--
-- Usage (example):
-- psql "%PG_DSN%" -f etl_billiards/database/seed_scheduler_tasks.sql
--
WITH target_store AS (
SELECT 2790685415443269::bigint AS store_id -- TODO: replace with your store_id
),
task_codes AS (
SELECT unnest(ARRAY[
'ASSISTANT_ABOLISH',
'ASSISTANTS',
'COUPON_USAGE',
'CHECK_CUTOFF',
'DWD_LOAD_FROM_ODS',
'DWD_QUALITY_CHECK',
'INIT_DWD_SCHEMA',
'INIT_DWS_SCHEMA',
'INIT_ODS_SCHEMA',
'INVENTORY_CHANGE',
'LEDGER',
'MANUAL_INGEST',
'MEMBERS',
'MEMBERS_DWD',
'ODS_JSON_ARCHIVE',
'ORDERS',
'PACKAGES_DEF',
'PAYMENTS',
'PAYMENTS_DWD',
'PRODUCTS',
'REFUNDS',
'TABLE_DISCOUNT',
'TABLES',
'TICKET_DWD',
'TOPUPS',
'DWS_BUILD_ORDER_SUMMARY',
'DWS_WINBACK_INDEX',
'DWS_NEWCONV_INDEX',
'DWS_INTIMACY_INDEX',
'DWS_RELATION_INDEX',
'DWS_ML_MANUAL_IMPORT'
]) AS task_code
)
INSERT INTO etl_admin.etl_task (task_code, store_id, enabled)
SELECT t.task_code, s.store_id, TRUE
FROM task_codes t CROSS JOIN target_store s
ON CONFLICT (task_code, store_id) DO UPDATE
SET enabled = EXCLUDED.enabled,
updated_at = now();