This commit is contained in:
Neo
2026-01-27 22:45:50 +08:00
parent a6ad343092
commit 4c192e921c
476 changed files with 381543 additions and 5819 deletions

View File

@@ -1,53 +1,132 @@
# 数据库配置(真实库)
# ==============================================================================
# ETL 系统配置文件
# ==============================================================================
# 配置优先级DEFAULTS < .env < CLI 参数
# ------------------------------------------------------------------------------
# 数据库配置
# ------------------------------------------------------------------------------
# 完整 DSN优先使用如果设置了则忽略下面的 host/port/name/user/password
PG_DSN=postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test
# 分离式配置(如果不使用 DSN可以单独配置以下参数
# PG_HOST=localhost
# PG_PORT=5432
# PG_NAME=your_database
# PG_USER=your_user
# PG_PASSWORD=your_password
# 连接超时(秒,范围 1-20
PG_CONNECT_TIMEOUT=10
# 如需拆分配置PG_HOST=... PG_PORT=... PG_NAME=... PG_USER=... PG_PASSWORD=...
# API配置如需走真实接口再填写
API_BASE=https://api.example.com
API_TOKEN=your_token_here
# API_TIMEOUT=20
# API_PAGE_SIZE=200
# API_RETRY_MAX=3
# ------------------------------------------------------------------------------
# 数据库 Schema 配置
# ------------------------------------------------------------------------------
# OLTP 业务数据 schema默认 billiards
SCHEMA_OLTP=billiards
# 应用配置
# ETL 管理数据 schema默认 etl_admin
SCHEMA_ETL=etl_admin
# ------------------------------------------------------------------------------
# API 配置
# ------------------------------------------------------------------------------
API_BASE=https://pc.ficoo.vip/apiprod/admin/v1/
API_TOKEN=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnQtdHlwZSI6IjQiLCJ1c2VyLXR5cGUiOiIxIiwiaHR0cDovL3NjaGVtYXMubWljcm9zb2Z0LmNvbS93cy8yMDA4LzA2L2lkZW50aXR5L2NsYWltcy9yb2xlIjoiMTIiLCJyb2xlLWlkIjoiMTIiLCJ0ZW5hbnQtaWQiOiIyNzkwNjgzMTYwNzA5OTU3Iiwibmlja25hbWUiOiLnp5_miLfnrqHnkIblkZjvvJrmganmgakxIiwic2l0ZS1pZCI6IjAiLCJtb2JpbGUiOiIxMzgxMDUwMjMwNCIsInNpZCI6IjI5NTA0ODk2NTgzOTU4NDUiLCJzdGFmZi1pZCI6IjMwMDk5MTg2OTE1NTkwNDUiLCJvcmctaWQiOiIwIiwicm9sZS10eXBlIjoiMyIsInJlZnJlc2hUb2tlbiI6Iks1ZnBhYlRTNkFsR0FpMmN4WGYrMHdJVkk0L2UvTVQrSVBHM3V5VWRrSjg9IiwicmVmcmVzaEV4cGlyeVRpbWUiOiIyMDI2LzEvMzEg5LiL5Y2IMTA6MTQ6NTEiLCJuZWVkQ2hlY2tUb2tlbiI6ImZhbHNlIiwiZXhwIjoxNzY5ODY4ODkxLCJpc3MiOiJ0ZXN0IiwiYXVkIjoiVXNlciJ9.BH3-iwwrBczb8aFfI__6kwe3AIsEPacN9TruaTrQ3nY
# API 请求超时(秒)
API_TIMEOUT=20
# 分页大小
API_PAGE_SIZE=200
# 最大重试次数
API_RETRY_MAX=3
# 重试退避时间JSON 数组格式,单位秒)
# API_RETRY_BACKOFF=[1, 2, 4]
# ------------------------------------------------------------------------------
# 门店配置
# ------------------------------------------------------------------------------
STORE_ID=2790685415443269
# TIMEZONE=Asia/Taipei
# SCHEMA_OLTP=billiards
# SCHEMA_ETL=etl_admin
TIMEZONE=Asia/Taipei
# ------------------------------------------------------------------------------
# 路径配置
EXPORT_ROOT=C:\dev\LLTQ\export\JSON
LOG_ROOT=C:\dev\LLTQ\export\LOG
FETCH_ROOT=
INGEST_SOURCE_DIR=
WRITE_PRETTY_JSON=false
PGCLIENTENCODING=utf8
# ------------------------------------------------------------------------------
# 导出根目录
EXPORT_ROOT=export/JSON
# ETL配置
# 日志根目录
LOG_ROOT=export/LOG
# 在线抓取 JSON 输出目录
FETCH_ROOT=export/JSON
# 本地入库数据源目录INGEST_ONLY 模式使用)
INGEST_SOURCE_DIR=export/test-json-doc
# ------------------------------------------------------------------------------
# 流水线配置
# ------------------------------------------------------------------------------
# 运行模式FULL抓取+入库、FETCH_ONLY仅抓取、INGEST_ONLY仅入库
PIPELINE_FLOW=FULL
# JSON 美化输出(调试用,会增加文件大小)
WRITE_PRETTY_JSON=false
# ------------------------------------------------------------------------------
# 时间窗口配置
# ------------------------------------------------------------------------------
# 冗余窗口(秒),向前多抓取的时间避免边界数据丢失
OVERLAP_SECONDS=120
# 忙时窗口大小(分钟)
WINDOW_BUSY_MIN=30
# 闲时窗口大小(分钟)
WINDOW_IDLE_MIN=180
# 闲时窗口定义HH:MM 格式)
IDLE_START=04:00
IDLE_END=16:00
# 窗口切分单位month/none用于长时间回溯任务按月切分
WINDOW_SPLIT_UNIT=month
# 窗口前后补偿小时数,用于捕获边界数据
WINDOW_COMPENSATION_HOURS=2
# 允许空结果推进窗口
ALLOW_EMPTY_RESULT_ADVANCE=true
# 清洗配置
LOG_UNKNOWN_FIELDS=true
HASH_ALGO=sha1
STRICT_NUMERIC=true
ROUND_MONEY_SCALE=2
# ------------------------------------------------------------------------------
# 数据完整性检查配置
# ------------------------------------------------------------------------------
# 检查模式history历史全量、recent最近增量
INTEGRITY_MODE=history
# 测试/离线模式(真实库联调建议 ONLINE
TEST_MODE=ONLINE
TEST_JSON_ARCHIVE_DIR=tests/source-data-doc
TEST_JSON_TEMP_DIR=/tmp/etl_billiards_json_tmp
# 历史检查起始日期history 模式使用
INTEGRITY_HISTORY_START=2025-07-01
# 测试数据库
TEST_DB_DSN=postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test
# 历史检查结束日期(留空表示到当前)
INTEGRITY_HISTORY_END=
# ODS <20>ؽ<EFBFBD><D8BD>ű<EFBFBD><C5B1><EFBFBD><EFBFBD>ã<EFBFBD><C3A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ã<EFBFBD>
JSON_DOC_DIR=C:\dev\LLTQ\export\test-json-doc
ODS_INCLUDE_FILES=
ODS_DROP_SCHEMA_FIRST=true
# 是否包含维度表校验
INTEGRITY_INCLUDE_DIMENSIONS=true
# 发现丢失数据时是否自动补全
INTEGRITY_AUTO_BACKFILL=true
# 自动执行完整性检查ETL 完成后自动触发)
INTEGRITY_AUTO_CHECK=false
# 指定要校验的 ODS 任务代码(逗号分隔,留空表示全部)
# INTEGRITY_ODS_TASK_CODES=ODS_PAYMENT,ODS_MEMBER
# ------------------------------------------------------------------------------
# 默认任务列表(逗号分隔,可被 CLI --tasks 参数覆盖)
# ------------------------------------------------------------------------------
# RUN_TASKS=ODS_PAYMENT,ODS_MEMBER,ODS_SETTLEMENT_RECORDS,DWD_LOAD_FROM_ODS