ODS 完成
This commit is contained in:
0
etl_billiards/scripts/Temp1.py
Normal file
0
etl_billiards/scripts/Temp1.py
Normal file
76
etl_billiards/scripts/bootstrap_schema.py
Normal file
76
etl_billiards/scripts/bootstrap_schema.py
Normal file
@@ -0,0 +1,76 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Apply the PRD-aligned warehouse schema (ODS/DWD/DWS) to PostgreSQL."""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
PROJECT_ROOT = Path(__file__).resolve().parents[1]
|
||||
if str(PROJECT_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(PROJECT_ROOT))
|
||||
|
||||
from database.connection import DatabaseConnection # noqa: E402
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Create/upgrade warehouse schemas using schema_v2.sql"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dsn",
|
||||
help="PostgreSQL DSN (fallback to PG_DSN env)",
|
||||
default=os.environ.get("PG_DSN"),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--file",
|
||||
help="Path to schema SQL",
|
||||
default=str(PROJECT_ROOT / "database" / "schema_v2.sql"),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--timeout",
|
||||
type=int,
|
||||
default=int(os.environ.get("PG_CONNECT_TIMEOUT", 10) or 10),
|
||||
help="connect_timeout seconds (capped at 20, default 10)",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def apply_schema(dsn: str, sql_path: Path, timeout: int) -> None:
|
||||
if not sql_path.exists():
|
||||
raise FileNotFoundError(f"Schema file not found: {sql_path}")
|
||||
|
||||
sql_text = sql_path.read_text(encoding="utf-8")
|
||||
timeout_val = max(1, min(timeout, 20))
|
||||
|
||||
conn = DatabaseConnection(dsn, connect_timeout=timeout_val)
|
||||
try:
|
||||
with conn.conn.cursor() as cur:
|
||||
cur.execute(sql_text)
|
||||
conn.commit()
|
||||
except Exception:
|
||||
conn.rollback()
|
||||
raise
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def main() -> int:
|
||||
args = parse_args()
|
||||
if not args.dsn:
|
||||
print("Missing DSN. Set PG_DSN or pass --dsn.", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
try:
|
||||
apply_schema(args.dsn, Path(args.file), args.timeout)
|
||||
except Exception as exc: # pragma: no cover - utility script
|
||||
print(f"Schema apply failed: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
print("Schema applied successfully.")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
425
etl_billiards/scripts/build_dwd_from_ods.py
Normal file
425
etl_billiards/scripts/build_dwd_from_ods.py
Normal file
@@ -0,0 +1,425 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Populate PRD DWD tables from ODS payload snapshots."""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
|
||||
import psycopg2
|
||||
|
||||
|
||||
SQL_STEPS: list[tuple[str, str]] = [
|
||||
(
|
||||
"dim_tenant",
|
||||
"""
|
||||
INSERT INTO billiards_dwd.dim_tenant (tenant_id, tenant_name, status)
|
||||
SELECT DISTINCT tenant_id, 'default' AS tenant_name, 'active' AS status
|
||||
FROM (
|
||||
SELECT tenant_id FROM billiards_ods.ods_order_settle
|
||||
UNION SELECT tenant_id FROM billiards_ods.ods_order_receipt_detail
|
||||
UNION SELECT tenant_id FROM billiards_ods.ods_member_profile
|
||||
) s
|
||||
WHERE tenant_id IS NOT NULL
|
||||
ON CONFLICT (tenant_id) DO UPDATE SET updated_at = now();
|
||||
""",
|
||||
),
|
||||
(
|
||||
"dim_site",
|
||||
"""
|
||||
INSERT INTO billiards_dwd.dim_site (site_id, tenant_id, site_name, status)
|
||||
SELECT DISTINCT site_id, MAX(tenant_id) AS tenant_id, 'default' AS site_name, 'active' AS status
|
||||
FROM (
|
||||
SELECT site_id, tenant_id FROM billiards_ods.ods_order_settle
|
||||
UNION SELECT site_id, tenant_id FROM billiards_ods.ods_order_receipt_detail
|
||||
UNION SELECT site_id, tenant_id FROM billiards_ods.ods_table_info
|
||||
) s
|
||||
WHERE site_id IS NOT NULL
|
||||
GROUP BY site_id
|
||||
ON CONFLICT (site_id) DO UPDATE SET updated_at = now();
|
||||
""",
|
||||
),
|
||||
(
|
||||
"dim_product_category",
|
||||
"""
|
||||
INSERT INTO billiards_dwd.dim_product_category (category_id, category_name, parent_id, level_no, status)
|
||||
SELECT DISTINCT category_id, category_name, parent_id, level_no, status
|
||||
FROM billiards_ods.ods_goods_category
|
||||
WHERE category_id IS NOT NULL
|
||||
ON CONFLICT (category_id) DO UPDATE SET
|
||||
category_name = EXCLUDED.category_name,
|
||||
parent_id = EXCLUDED.parent_id,
|
||||
level_no = EXCLUDED.level_no,
|
||||
status = EXCLUDED.status;
|
||||
""",
|
||||
),
|
||||
(
|
||||
"dim_product",
|
||||
"""
|
||||
INSERT INTO billiards_dwd.dim_product (goods_id, goods_name, goods_code, category_id, category_name, unit, default_price, status)
|
||||
SELECT DISTINCT goods_id, goods_name, NULL::TEXT AS goods_code, category_id, category_name, NULL::TEXT AS unit, sale_price AS default_price, status
|
||||
FROM billiards_ods.ods_store_product
|
||||
WHERE goods_id IS NOT NULL
|
||||
ON CONFLICT (goods_id) DO UPDATE SET
|
||||
goods_name = EXCLUDED.goods_name,
|
||||
category_id = EXCLUDED.category_id,
|
||||
category_name = EXCLUDED.category_name,
|
||||
default_price = EXCLUDED.default_price,
|
||||
status = EXCLUDED.status,
|
||||
updated_at = now();
|
||||
""",
|
||||
),
|
||||
(
|
||||
"dim_product_from_sales",
|
||||
"""
|
||||
INSERT INTO billiards_dwd.dim_product (goods_id, goods_name)
|
||||
SELECT DISTINCT goods_id, goods_name
|
||||
FROM billiards_ods.ods_store_sale_item
|
||||
WHERE goods_id IS NOT NULL
|
||||
ON CONFLICT (goods_id) DO NOTHING;
|
||||
""",
|
||||
),
|
||||
(
|
||||
"dim_member_card_type",
|
||||
"""
|
||||
INSERT INTO billiards_dwd.dim_member_card_type (card_type_id, card_type_name, discount_rate)
|
||||
SELECT DISTINCT card_type_id, card_type_name, discount_rate
|
||||
FROM billiards_ods.ods_member_card
|
||||
WHERE card_type_id IS NOT NULL
|
||||
ON CONFLICT (card_type_id) DO UPDATE SET
|
||||
card_type_name = EXCLUDED.card_type_name,
|
||||
discount_rate = EXCLUDED.discount_rate;
|
||||
""",
|
||||
),
|
||||
(
|
||||
"dim_member",
|
||||
"""
|
||||
INSERT INTO billiards_dwd.dim_member (
|
||||
site_id, member_id, tenant_id, member_name, nickname, gender, birthday, mobile,
|
||||
member_type_id, member_type_name, status, register_time, last_visit_time,
|
||||
balance, total_recharge_amount, total_consumed_amount, wechat_id, alipay_id, remark
|
||||
)
|
||||
SELECT DISTINCT
|
||||
prof.site_id,
|
||||
prof.member_id,
|
||||
prof.tenant_id,
|
||||
prof.member_name,
|
||||
prof.nickname,
|
||||
prof.gender,
|
||||
prof.birthday,
|
||||
prof.mobile,
|
||||
card.member_type_id,
|
||||
card.member_type_name,
|
||||
prof.status,
|
||||
prof.register_time,
|
||||
prof.last_visit_time,
|
||||
prof.balance,
|
||||
NULL::NUMERIC AS total_recharge_amount,
|
||||
NULL::NUMERIC AS total_consumed_amount,
|
||||
prof.wechat_id,
|
||||
prof.alipay_id,
|
||||
prof.remarks
|
||||
FROM billiards_ods.ods_member_profile prof
|
||||
LEFT JOIN (
|
||||
SELECT DISTINCT site_id, member_id, card_type_id AS member_type_id, card_type_name AS member_type_name
|
||||
FROM billiards_ods.ods_member_card
|
||||
) card
|
||||
ON prof.site_id = card.site_id AND prof.member_id = card.member_id
|
||||
WHERE prof.member_id IS NOT NULL
|
||||
ON CONFLICT (site_id, member_id) DO UPDATE SET
|
||||
member_name = EXCLUDED.member_name,
|
||||
nickname = EXCLUDED.nickname,
|
||||
gender = EXCLUDED.gender,
|
||||
birthday = EXCLUDED.birthday,
|
||||
mobile = EXCLUDED.mobile,
|
||||
member_type_id = EXCLUDED.member_type_id,
|
||||
member_type_name = EXCLUDED.member_type_name,
|
||||
status = EXCLUDED.status,
|
||||
register_time = EXCLUDED.register_time,
|
||||
last_visit_time = EXCLUDED.last_visit_time,
|
||||
balance = EXCLUDED.balance,
|
||||
wechat_id = EXCLUDED.wechat_id,
|
||||
alipay_id = EXCLUDED.alipay_id,
|
||||
remark = EXCLUDED.remark,
|
||||
updated_at = now();
|
||||
""",
|
||||
),
|
||||
(
|
||||
"dim_table",
|
||||
"""
|
||||
INSERT INTO billiards_dwd.dim_table (table_id, site_id, table_code, table_name, table_type, area_name, status, created_time, updated_time)
|
||||
SELECT DISTINCT table_id, site_id, table_code, table_name, table_type, area_name, status, created_time, updated_time
|
||||
FROM billiards_ods.ods_table_info
|
||||
WHERE table_id IS NOT NULL
|
||||
ON CONFLICT (table_id) DO UPDATE SET
|
||||
site_id = EXCLUDED.site_id,
|
||||
table_code = EXCLUDED.table_code,
|
||||
table_name = EXCLUDED.table_name,
|
||||
table_type = EXCLUDED.table_type,
|
||||
area_name = EXCLUDED.area_name,
|
||||
status = EXCLUDED.status,
|
||||
created_time = EXCLUDED.created_time,
|
||||
updated_time = EXCLUDED.updated_time;
|
||||
""",
|
||||
),
|
||||
(
|
||||
"dim_assistant",
|
||||
"""
|
||||
INSERT INTO billiards_dwd.dim_assistant (assistant_id, assistant_name, mobile, status)
|
||||
SELECT DISTINCT assistant_id, assistant_name, mobile, status
|
||||
FROM billiards_ods.ods_assistant_account
|
||||
WHERE assistant_id IS NOT NULL
|
||||
ON CONFLICT (assistant_id) DO UPDATE SET
|
||||
assistant_name = EXCLUDED.assistant_name,
|
||||
mobile = EXCLUDED.mobile,
|
||||
status = EXCLUDED.status,
|
||||
updated_at = now();
|
||||
""",
|
||||
),
|
||||
(
|
||||
"dim_pay_method",
|
||||
"""
|
||||
INSERT INTO billiards_dwd.dim_pay_method (pay_method_code, pay_method_name, is_stored_value, status)
|
||||
SELECT DISTINCT pay_method_code, pay_method_name, FALSE AS is_stored_value, 'active' AS status
|
||||
FROM billiards_ods.ods_payment_record
|
||||
WHERE pay_method_code IS NOT NULL
|
||||
ON CONFLICT (pay_method_code) DO UPDATE SET
|
||||
pay_method_name = EXCLUDED.pay_method_name,
|
||||
status = EXCLUDED.status,
|
||||
updated_at = now();
|
||||
""",
|
||||
),
|
||||
(
|
||||
"dim_coupon_platform",
|
||||
"""
|
||||
INSERT INTO billiards_dwd.dim_coupon_platform (platform_code, platform_name)
|
||||
SELECT DISTINCT platform_code, platform_code AS platform_name
|
||||
FROM billiards_ods.ods_platform_coupon_log
|
||||
WHERE platform_code IS NOT NULL
|
||||
ON CONFLICT (platform_code) DO NOTHING;
|
||||
""",
|
||||
),
|
||||
(
|
||||
"fact_sale_item",
|
||||
"""
|
||||
INSERT INTO billiards_dwd.fact_sale_item (
|
||||
site_id, sale_item_id, order_trade_no, order_settle_id, member_id,
|
||||
goods_id, category_id, quantity, original_amount, discount_amount,
|
||||
final_amount, is_gift, sale_time
|
||||
)
|
||||
SELECT
|
||||
site_id,
|
||||
sale_item_id,
|
||||
order_trade_no,
|
||||
order_settle_id,
|
||||
NULL::BIGINT AS member_id,
|
||||
goods_id,
|
||||
category_id,
|
||||
quantity,
|
||||
original_amount,
|
||||
discount_amount,
|
||||
final_amount,
|
||||
COALESCE(is_gift, FALSE),
|
||||
sale_time
|
||||
FROM billiards_ods.ods_store_sale_item
|
||||
ON CONFLICT (site_id, sale_item_id) DO NOTHING;
|
||||
""",
|
||||
),
|
||||
(
|
||||
"fact_table_usage",
|
||||
"""
|
||||
INSERT INTO billiards_dwd.fact_table_usage (
|
||||
site_id, ledger_id, order_trade_no, order_settle_id, table_id,
|
||||
member_id, start_time, end_time, duration_minutes,
|
||||
original_table_fee, member_discount_amount, manual_discount_amount,
|
||||
final_table_fee, is_canceled, cancel_time
|
||||
)
|
||||
SELECT
|
||||
site_id,
|
||||
ledger_id,
|
||||
order_trade_no,
|
||||
order_settle_id,
|
||||
table_id,
|
||||
member_id,
|
||||
start_time,
|
||||
end_time,
|
||||
duration_minutes,
|
||||
original_table_fee,
|
||||
0::NUMERIC AS member_discount_amount,
|
||||
discount_amount AS manual_discount_amount,
|
||||
final_table_fee,
|
||||
FALSE AS is_canceled,
|
||||
NULL::TIMESTAMPTZ AS cancel_time
|
||||
FROM billiards_ods.ods_table_use_log
|
||||
ON CONFLICT (site_id, ledger_id) DO NOTHING;
|
||||
""",
|
||||
),
|
||||
(
|
||||
"fact_assistant_service",
|
||||
"""
|
||||
INSERT INTO billiards_dwd.fact_assistant_service (
|
||||
site_id, ledger_id, order_trade_no, order_settle_id, assistant_id,
|
||||
assist_type_code, member_id, start_time, end_time, duration_minutes,
|
||||
original_fee, member_discount_amount, manual_discount_amount,
|
||||
final_fee, is_canceled, cancel_time
|
||||
)
|
||||
SELECT
|
||||
site_id,
|
||||
ledger_id,
|
||||
order_trade_no,
|
||||
order_settle_id,
|
||||
assistant_id,
|
||||
NULL::TEXT AS assist_type_code,
|
||||
member_id,
|
||||
start_time,
|
||||
end_time,
|
||||
duration_minutes,
|
||||
original_fee,
|
||||
0::NUMERIC AS member_discount_amount,
|
||||
discount_amount AS manual_discount_amount,
|
||||
final_fee,
|
||||
FALSE AS is_canceled,
|
||||
NULL::TIMESTAMPTZ AS cancel_time
|
||||
FROM billiards_ods.ods_assistant_service_log
|
||||
ON CONFLICT (site_id, ledger_id) DO NOTHING;
|
||||
""",
|
||||
),
|
||||
(
|
||||
"fact_coupon_usage",
|
||||
"""
|
||||
INSERT INTO billiards_dwd.fact_coupon_usage (
|
||||
site_id, coupon_id, package_id, order_trade_no, order_settle_id,
|
||||
member_id, platform_code, status, deduct_amount, settle_price, used_time
|
||||
)
|
||||
SELECT
|
||||
site_id,
|
||||
coupon_id,
|
||||
NULL::BIGINT AS package_id,
|
||||
order_trade_no,
|
||||
order_settle_id,
|
||||
member_id,
|
||||
platform_code,
|
||||
status,
|
||||
deduct_amount,
|
||||
settle_price,
|
||||
used_time
|
||||
FROM billiards_ods.ods_platform_coupon_log
|
||||
ON CONFLICT (site_id, coupon_id) DO NOTHING;
|
||||
""",
|
||||
),
|
||||
(
|
||||
"fact_payment",
|
||||
"""
|
||||
INSERT INTO billiards_dwd.fact_payment (
|
||||
site_id, pay_id, order_trade_no, order_settle_id, member_id,
|
||||
pay_method_code, pay_amount, pay_time, relate_type, relate_id
|
||||
)
|
||||
SELECT
|
||||
site_id,
|
||||
pay_id,
|
||||
order_trade_no,
|
||||
order_settle_id,
|
||||
member_id,
|
||||
pay_method_code,
|
||||
pay_amount,
|
||||
pay_time,
|
||||
relate_type,
|
||||
relate_id
|
||||
FROM billiards_ods.ods_payment_record
|
||||
ON CONFLICT (site_id, pay_id) DO NOTHING;
|
||||
""",
|
||||
),
|
||||
(
|
||||
"fact_refund",
|
||||
"""
|
||||
INSERT INTO billiards_dwd.fact_refund (
|
||||
site_id, refund_id, order_trade_no, order_settle_id, member_id,
|
||||
pay_method_code, refund_amount, refund_time, status
|
||||
)
|
||||
SELECT
|
||||
site_id,
|
||||
refund_id,
|
||||
order_trade_no,
|
||||
order_settle_id,
|
||||
member_id,
|
||||
pay_method_code,
|
||||
refund_amount,
|
||||
refund_time,
|
||||
status
|
||||
FROM billiards_ods.ods_refund_record
|
||||
ON CONFLICT (site_id, refund_id) DO NOTHING;
|
||||
""",
|
||||
),
|
||||
(
|
||||
"fact_balance_change",
|
||||
"""
|
||||
INSERT INTO billiards_dwd.fact_balance_change (
|
||||
site_id, change_id, member_id, change_type, relate_type, relate_id,
|
||||
pay_method_code, change_amount, balance_before, balance_after, change_time
|
||||
)
|
||||
SELECT
|
||||
site_id,
|
||||
change_id,
|
||||
member_id,
|
||||
change_type,
|
||||
NULL::TEXT AS relate_type,
|
||||
relate_id,
|
||||
NULL::TEXT AS pay_method_code,
|
||||
change_amount,
|
||||
balance_before,
|
||||
balance_after,
|
||||
change_time
|
||||
FROM billiards_ods.ods_balance_change
|
||||
ON CONFLICT (site_id, change_id) DO NOTHING;
|
||||
""",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(description="Build DWD tables from ODS payloads (PRD schema).")
|
||||
parser.add_argument(
|
||||
"--dsn",
|
||||
default=os.environ.get("PG_DSN"),
|
||||
help="PostgreSQL DSN (fallback PG_DSN env)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--timeout",
|
||||
type=int,
|
||||
default=int(os.environ.get("PG_CONNECT_TIMEOUT", 10) or 10),
|
||||
help="connect_timeout seconds (capped at 20, default 10)",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main() -> int:
|
||||
args = parse_args()
|
||||
if not args.dsn:
|
||||
print("Missing DSN. Use --dsn or PG_DSN.", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
timeout_val = max(1, min(args.timeout, 20))
|
||||
conn = psycopg2.connect(args.dsn, connect_timeout=timeout_val)
|
||||
conn.autocommit = False
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
for name, sql in SQL_STEPS:
|
||||
cur.execute(sql)
|
||||
print(f"[OK] {name}")
|
||||
conn.commit()
|
||||
except Exception as exc: # pragma: no cover - operational script
|
||||
conn.rollback()
|
||||
print(f"[FAIL] {exc}", file=sys.stderr)
|
||||
return 1
|
||||
finally:
|
||||
try:
|
||||
conn.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
print("DWD build complete.")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
322
etl_billiards/scripts/build_dws_order_summary.py
Normal file
322
etl_billiards/scripts/build_dws_order_summary.py
Normal file
@@ -0,0 +1,322 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Recompute billiards_dws.dws_order_summary from DWD fact tables."""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
PROJECT_ROOT = Path(__file__).resolve().parents[1]
|
||||
if str(PROJECT_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(PROJECT_ROOT))
|
||||
|
||||
from database.connection import DatabaseConnection # noqa: E402
|
||||
|
||||
|
||||
SQL_BUILD_SUMMARY = r"""
|
||||
WITH table_fee AS (
|
||||
SELECT
|
||||
site_id,
|
||||
order_settle_id,
|
||||
order_trade_no,
|
||||
MIN(member_id) AS member_id,
|
||||
SUM(COALESCE(final_table_fee, 0)) AS table_fee_amount,
|
||||
SUM(COALESCE(member_discount_amount, 0)) AS member_discount_amount,
|
||||
SUM(COALESCE(manual_discount_amount, 0)) AS manual_discount_amount,
|
||||
SUM(COALESCE(original_table_fee, 0)) AS original_table_fee,
|
||||
MIN(start_time) AS first_time
|
||||
FROM billiards_dwd.fact_table_usage
|
||||
WHERE (%(site_id)s IS NULL OR site_id = %(site_id)s)
|
||||
AND (%(start_date)s IS NULL OR start_time::date >= %(start_date)s)
|
||||
AND (%(end_date)s IS NULL OR start_time::date <= %(end_date)s)
|
||||
AND COALESCE(is_canceled, FALSE) = FALSE
|
||||
GROUP BY site_id, order_settle_id, order_trade_no
|
||||
),
|
||||
assistant_fee AS (
|
||||
SELECT
|
||||
site_id,
|
||||
order_settle_id,
|
||||
order_trade_no,
|
||||
MIN(member_id) AS member_id,
|
||||
SUM(COALESCE(final_fee, 0)) AS assistant_service_amount,
|
||||
SUM(COALESCE(member_discount_amount, 0)) AS member_discount_amount,
|
||||
SUM(COALESCE(manual_discount_amount, 0)) AS manual_discount_amount,
|
||||
SUM(COALESCE(original_fee, 0)) AS original_fee,
|
||||
MIN(start_time) AS first_time
|
||||
FROM billiards_dwd.fact_assistant_service
|
||||
WHERE (%(site_id)s IS NULL OR site_id = %(site_id)s)
|
||||
AND (%(start_date)s IS NULL OR start_time::date >= %(start_date)s)
|
||||
AND (%(end_date)s IS NULL OR start_time::date <= %(end_date)s)
|
||||
AND COALESCE(is_canceled, FALSE) = FALSE
|
||||
GROUP BY site_id, order_settle_id, order_trade_no
|
||||
),
|
||||
goods_fee AS (
|
||||
SELECT
|
||||
site_id,
|
||||
order_settle_id,
|
||||
order_trade_no,
|
||||
MIN(member_id) AS member_id,
|
||||
SUM(COALESCE(final_amount, 0)) FILTER (WHERE COALESCE(is_gift, FALSE) = FALSE) AS goods_amount,
|
||||
SUM(COALESCE(discount_amount, 0)) FILTER (WHERE COALESCE(is_gift, FALSE) = FALSE) AS goods_discount_amount,
|
||||
SUM(COALESCE(original_amount, 0)) FILTER (WHERE COALESCE(is_gift, FALSE) = FALSE) AS goods_original_amount,
|
||||
COUNT(*) FILTER (WHERE COALESCE(is_gift, FALSE) = FALSE) AS item_count,
|
||||
SUM(COALESCE(quantity, 0)) FILTER (WHERE COALESCE(is_gift, FALSE) = FALSE) AS total_item_quantity,
|
||||
MIN(sale_time) AS first_time
|
||||
FROM billiards_dwd.fact_sale_item
|
||||
WHERE (%(site_id)s IS NULL OR site_id = %(site_id)s)
|
||||
AND (%(start_date)s IS NULL OR sale_time::date >= %(start_date)s)
|
||||
AND (%(end_date)s IS NULL OR sale_time::date <= %(end_date)s)
|
||||
GROUP BY site_id, order_settle_id, order_trade_no
|
||||
),
|
||||
coupon_usage AS (
|
||||
SELECT
|
||||
site_id,
|
||||
order_settle_id,
|
||||
order_trade_no,
|
||||
MIN(member_id) AS member_id,
|
||||
SUM(COALESCE(deduct_amount, 0)) AS coupon_deduction,
|
||||
SUM(COALESCE(settle_price, 0)) AS settle_price,
|
||||
MIN(used_time) AS first_time
|
||||
FROM billiards_dwd.fact_coupon_usage
|
||||
WHERE (%(site_id)s IS NULL OR site_id = %(site_id)s)
|
||||
AND (%(start_date)s IS NULL OR used_time::date >= %(start_date)s)
|
||||
AND (%(end_date)s IS NULL OR used_time::date <= %(end_date)s)
|
||||
GROUP BY site_id, order_settle_id, order_trade_no
|
||||
),
|
||||
payments AS (
|
||||
SELECT
|
||||
fp.site_id,
|
||||
fp.order_settle_id,
|
||||
fp.order_trade_no,
|
||||
MIN(fp.member_id) AS member_id,
|
||||
SUM(COALESCE(fp.pay_amount, 0)) AS total_paid_amount,
|
||||
SUM(COALESCE(fp.pay_amount, 0)) FILTER (WHERE COALESCE(pm.is_stored_value, FALSE)) AS stored_card_deduct,
|
||||
SUM(COALESCE(fp.pay_amount, 0)) FILTER (WHERE NOT COALESCE(pm.is_stored_value, FALSE)) AS external_paid_amount,
|
||||
MIN(fp.pay_time) AS first_time
|
||||
FROM billiards_dwd.fact_payment fp
|
||||
LEFT JOIN billiards_dwd.dim_pay_method pm ON fp.pay_method_code = pm.pay_method_code
|
||||
WHERE (%(site_id)s IS NULL OR fp.site_id = %(site_id)s)
|
||||
AND (%(start_date)s IS NULL OR fp.pay_time::date >= %(start_date)s)
|
||||
AND (%(end_date)s IS NULL OR fp.pay_time::date <= %(end_date)s)
|
||||
GROUP BY fp.site_id, fp.order_settle_id, fp.order_trade_no
|
||||
),
|
||||
refunds AS (
|
||||
SELECT
|
||||
site_id,
|
||||
order_settle_id,
|
||||
order_trade_no,
|
||||
SUM(COALESCE(refund_amount, 0)) AS refund_amount
|
||||
FROM billiards_dwd.fact_refund
|
||||
WHERE (%(site_id)s IS NULL OR site_id = %(site_id)s)
|
||||
AND (%(start_date)s IS NULL OR refund_time::date >= %(start_date)s)
|
||||
AND (%(end_date)s IS NULL OR refund_time::date <= %(end_date)s)
|
||||
GROUP BY site_id, order_settle_id, order_trade_no
|
||||
),
|
||||
combined_ids AS (
|
||||
SELECT site_id, order_settle_id, order_trade_no FROM table_fee
|
||||
UNION
|
||||
SELECT site_id, order_settle_id, order_trade_no FROM assistant_fee
|
||||
UNION
|
||||
SELECT site_id, order_settle_id, order_trade_no FROM goods_fee
|
||||
UNION
|
||||
SELECT site_id, order_settle_id, order_trade_no FROM coupon_usage
|
||||
UNION
|
||||
SELECT site_id, order_settle_id, order_trade_no FROM payments
|
||||
UNION
|
||||
SELECT site_id, order_settle_id, order_trade_no FROM refunds
|
||||
),
|
||||
site_dim AS (
|
||||
SELECT site_id, tenant_id FROM billiards_dwd.dim_site
|
||||
)
|
||||
INSERT INTO billiards_dws.dws_order_summary (
|
||||
site_id,
|
||||
order_settle_id,
|
||||
order_trade_no,
|
||||
order_date,
|
||||
tenant_id,
|
||||
member_id,
|
||||
member_flag,
|
||||
recharge_order_flag,
|
||||
item_count,
|
||||
total_item_quantity,
|
||||
table_fee_amount,
|
||||
assistant_service_amount,
|
||||
goods_amount,
|
||||
group_amount,
|
||||
total_coupon_deduction,
|
||||
member_discount_amount,
|
||||
manual_discount_amount,
|
||||
order_original_amount,
|
||||
order_final_amount,
|
||||
stored_card_deduct,
|
||||
external_paid_amount,
|
||||
total_paid_amount,
|
||||
book_table_flow,
|
||||
book_assistant_flow,
|
||||
book_goods_flow,
|
||||
book_group_flow,
|
||||
book_order_flow,
|
||||
order_effective_consume_cash,
|
||||
order_effective_recharge_cash,
|
||||
order_effective_flow,
|
||||
refund_amount,
|
||||
net_income,
|
||||
created_at,
|
||||
updated_at
|
||||
)
|
||||
SELECT
|
||||
c.site_id,
|
||||
c.order_settle_id,
|
||||
c.order_trade_no,
|
||||
COALESCE(tf.first_time, af.first_time, gf.first_time, pay.first_time, cu.first_time)::date AS order_date,
|
||||
sd.tenant_id,
|
||||
COALESCE(tf.member_id, af.member_id, gf.member_id, cu.member_id, pay.member_id) AS member_id,
|
||||
COALESCE(tf.member_id, af.member_id, gf.member_id, cu.member_id, pay.member_id) IS NOT NULL AS member_flag,
|
||||
-- recharge flag: no consumption side but has payments
|
||||
(COALESCE(tf.table_fee_amount, 0) + COALESCE(af.assistant_service_amount, 0) + COALESCE(gf.goods_amount, 0) + COALESCE(cu.settle_price, 0) = 0)
|
||||
AND COALESCE(pay.total_paid_amount, 0) > 0 AS recharge_order_flag,
|
||||
COALESCE(gf.item_count, 0) AS item_count,
|
||||
COALESCE(gf.total_item_quantity, 0) AS total_item_quantity,
|
||||
COALESCE(tf.table_fee_amount, 0) AS table_fee_amount,
|
||||
COALESCE(af.assistant_service_amount, 0) AS assistant_service_amount,
|
||||
COALESCE(gf.goods_amount, 0) AS goods_amount,
|
||||
COALESCE(cu.settle_price, 0) AS group_amount,
|
||||
COALESCE(cu.coupon_deduction, 0) AS total_coupon_deduction,
|
||||
COALESCE(tf.member_discount_amount, 0) + COALESCE(af.member_discount_amount, 0) + COALESCE(gf.goods_discount_amount, 0) AS member_discount_amount,
|
||||
COALESCE(tf.manual_discount_amount, 0) + COALESCE(af.manual_discount_amount, 0) AS manual_discount_amount,
|
||||
COALESCE(tf.original_table_fee, 0) + COALESCE(af.original_fee, 0) + COALESCE(gf.goods_original_amount, 0) AS order_original_amount,
|
||||
COALESCE(tf.table_fee_amount, 0) + COALESCE(af.assistant_service_amount, 0) + COALESCE(gf.goods_amount, 0) + COALESCE(cu.settle_price, 0) - COALESCE(cu.coupon_deduction, 0) AS order_final_amount,
|
||||
COALESCE(pay.stored_card_deduct, 0) AS stored_card_deduct,
|
||||
COALESCE(pay.external_paid_amount, 0) AS external_paid_amount,
|
||||
COALESCE(pay.total_paid_amount, 0) AS total_paid_amount,
|
||||
COALESCE(tf.table_fee_amount, 0) AS book_table_flow,
|
||||
COALESCE(af.assistant_service_amount, 0) AS book_assistant_flow,
|
||||
COALESCE(gf.goods_amount, 0) AS book_goods_flow,
|
||||
COALESCE(cu.settle_price, 0) AS book_group_flow,
|
||||
COALESCE(tf.table_fee_amount, 0) + COALESCE(af.assistant_service_amount, 0) + COALESCE(gf.goods_amount, 0) + COALESCE(cu.settle_price, 0) AS book_order_flow,
|
||||
CASE
|
||||
WHEN (COALESCE(tf.table_fee_amount, 0) + COALESCE(af.assistant_service_amount, 0) + COALESCE(gf.goods_amount, 0) + COALESCE(cu.settle_price, 0) = 0)
|
||||
THEN 0
|
||||
ELSE COALESCE(pay.external_paid_amount, 0)
|
||||
END AS order_effective_consume_cash,
|
||||
CASE
|
||||
WHEN (COALESCE(tf.table_fee_amount, 0) + COALESCE(af.assistant_service_amount, 0) + COALESCE(gf.goods_amount, 0) + COALESCE(cu.settle_price, 0) = 0)
|
||||
THEN COALESCE(pay.external_paid_amount, 0)
|
||||
ELSE 0
|
||||
END AS order_effective_recharge_cash,
|
||||
COALESCE(pay.external_paid_amount, 0) + COALESCE(cu.settle_price, 0) AS order_effective_flow,
|
||||
COALESCE(rf.refund_amount, 0) AS refund_amount,
|
||||
(COALESCE(pay.external_paid_amount, 0) + COALESCE(cu.settle_price, 0)) - COALESCE(rf.refund_amount, 0) AS net_income,
|
||||
now() AS created_at,
|
||||
now() AS updated_at
|
||||
FROM combined_ids c
|
||||
LEFT JOIN table_fee tf ON c.site_id = tf.site_id AND c.order_settle_id = tf.order_settle_id
|
||||
LEFT JOIN assistant_fee af ON c.site_id = af.site_id AND c.order_settle_id = af.order_settle_id
|
||||
LEFT JOIN goods_fee gf ON c.site_id = gf.site_id AND c.order_settle_id = gf.order_settle_id
|
||||
LEFT JOIN coupon_usage cu ON c.site_id = cu.site_id AND c.order_settle_id = cu.order_settle_id
|
||||
LEFT JOIN payments pay ON c.site_id = pay.site_id AND c.order_settle_id = pay.order_settle_id
|
||||
LEFT JOIN refunds rf ON c.site_id = rf.site_id AND c.order_settle_id = rf.order_settle_id
|
||||
LEFT JOIN site_dim sd ON c.site_id = sd.site_id
|
||||
ON CONFLICT (site_id, order_settle_id) DO UPDATE SET
|
||||
order_trade_no = EXCLUDED.order_trade_no,
|
||||
order_date = EXCLUDED.order_date,
|
||||
tenant_id = EXCLUDED.tenant_id,
|
||||
member_id = EXCLUDED.member_id,
|
||||
member_flag = EXCLUDED.member_flag,
|
||||
recharge_order_flag = EXCLUDED.recharge_order_flag,
|
||||
item_count = EXCLUDED.item_count,
|
||||
total_item_quantity = EXCLUDED.total_item_quantity,
|
||||
table_fee_amount = EXCLUDED.table_fee_amount,
|
||||
assistant_service_amount = EXCLUDED.assistant_service_amount,
|
||||
goods_amount = EXCLUDED.goods_amount,
|
||||
group_amount = EXCLUDED.group_amount,
|
||||
total_coupon_deduction = EXCLUDED.total_coupon_deduction,
|
||||
member_discount_amount = EXCLUDED.member_discount_amount,
|
||||
manual_discount_amount = EXCLUDED.manual_discount_amount,
|
||||
order_original_amount = EXCLUDED.order_original_amount,
|
||||
order_final_amount = EXCLUDED.order_final_amount,
|
||||
stored_card_deduct = EXCLUDED.stored_card_deduct,
|
||||
external_paid_amount = EXCLUDED.external_paid_amount,
|
||||
total_paid_amount = EXCLUDED.total_paid_amount,
|
||||
book_table_flow = EXCLUDED.book_table_flow,
|
||||
book_assistant_flow = EXCLUDED.book_assistant_flow,
|
||||
book_goods_flow = EXCLUDED.book_goods_flow,
|
||||
book_group_flow = EXCLUDED.book_group_flow,
|
||||
book_order_flow = EXCLUDED.book_order_flow,
|
||||
order_effective_consume_cash = EXCLUDED.order_effective_consume_cash,
|
||||
order_effective_recharge_cash = EXCLUDED.order_effective_recharge_cash,
|
||||
order_effective_flow = EXCLUDED.order_effective_flow,
|
||||
refund_amount = EXCLUDED.refund_amount,
|
||||
net_income = EXCLUDED.net_income,
|
||||
updated_at = now();
|
||||
"""
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Build/update dws_order_summary from DWD fact tables."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dsn",
|
||||
default=os.environ.get("PG_DSN"),
|
||||
help="PostgreSQL DSN (fallback: PG_DSN env)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--site-id",
|
||||
type=int,
|
||||
default=None,
|
||||
help="Filter by site_id (optional, default all sites)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--start-date",
|
||||
dest="start_date",
|
||||
default=None,
|
||||
help="Filter facts from this date (YYYY-MM-DD, optional)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--end-date",
|
||||
dest="end_date",
|
||||
default=None,
|
||||
help="Filter facts until this date (YYYY-MM-DD, optional)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--timeout",
|
||||
type=int,
|
||||
default=int(os.environ.get("PG_CONNECT_TIMEOUT", 10) or 10),
|
||||
help="connect_timeout seconds (capped at 20, default 10)",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main() -> int:
|
||||
args = parse_args()
|
||||
if not args.dsn:
|
||||
print("Missing DSN. Set PG_DSN or pass --dsn.", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
params = {
|
||||
"site_id": args.site_id,
|
||||
"start_date": args.start_date,
|
||||
"end_date": args.end_date,
|
||||
}
|
||||
timeout_val = max(1, min(args.timeout, 20))
|
||||
|
||||
conn = DatabaseConnection(args.dsn, connect_timeout=timeout_val)
|
||||
try:
|
||||
with conn.conn.cursor() as cur:
|
||||
cur.execute(SQL_BUILD_SUMMARY, params)
|
||||
conn.commit()
|
||||
except Exception as exc: # pragma: no cover - operational script
|
||||
conn.rollback()
|
||||
print(f"DWS build failed: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
print("dws_order_summary refreshed.")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
258
etl_billiards/scripts/rebuild_ods_from_json.py
Normal file
258
etl_billiards/scripts/rebuild_ods_from_json.py
Normal file
@@ -0,0 +1,258 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
从本地 JSON 示例目录重建 billiards_ods.* 表,并导入样例数据。
|
||||
用法:
|
||||
PYTHONPATH=. python -m etl_billiards.scripts.rebuild_ods_from_json [--dsn ...] [--json-dir ...] [--include ...] [--drop-schema-first]
|
||||
|
||||
依赖环境变量:
|
||||
PG_DSN PostgreSQL 连接串(必填)
|
||||
PG_CONNECT_TIMEOUT 可选,秒,默认 10
|
||||
JSON_DOC_DIR 可选,JSON 目录,默认 C:\\dev\\LLTQ\\export\\test-json-doc
|
||||
ODS_INCLUDE_FILES 可选,逗号分隔文件名(不含 .json)
|
||||
ODS_DROP_SCHEMA_FIRST 可选,true/false,默认 true
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Iterable, List, Tuple
|
||||
|
||||
import psycopg2
|
||||
from psycopg2 import sql
|
||||
from psycopg2.extras import Json, execute_values
|
||||
|
||||
|
||||
DEFAULT_JSON_DIR = r"C:\dev\LLTQ\export\test-json-doc"
|
||||
SPECIAL_LIST_PATHS: dict[str, tuple[str, ...]] = {
|
||||
"assistant_accounts_master": ("data", "assistantInfos"),
|
||||
"assistant_cancellation_records": ("data", "abolitionAssistants"),
|
||||
"assistant_service_records": ("data", "orderAssistantDetails"),
|
||||
"goods_stock_movements": ("data", "queryDeliveryRecordsList"),
|
||||
"goods_stock_summary": ("data",),
|
||||
"group_buy_packages": ("data", "packageCouponList"),
|
||||
"group_buy_redemption_records": ("data", "siteTableUseDetailsList"),
|
||||
"member_balance_changes": ("data", "tenantMemberCardLogs"),
|
||||
"member_profiles": ("data", "tenantMemberInfos"),
|
||||
"member_stored_value_cards": ("data", "tenantMemberCards"),
|
||||
"recharge_settlements": ("data", "settleList"),
|
||||
"settlement_records": ("data", "settleList"),
|
||||
"site_tables_master": ("data", "siteTables"),
|
||||
"stock_goods_category_tree": ("data", "goodsCategoryList"),
|
||||
"store_goods_master": ("data", "orderGoodsList"),
|
||||
"store_goods_sales_records": ("data", "orderGoodsLedgers"),
|
||||
"table_fee_discount_records": ("data", "taiFeeAdjustInfos"),
|
||||
"table_fee_transactions": ("data", "siteTableUseDetailsList"),
|
||||
"tenant_goods_master": ("data", "tenantGoodsList"),
|
||||
}
|
||||
|
||||
|
||||
def sanitize_identifier(name: str) -> str:
|
||||
"""将任意字符串转为可用的 SQL identifier(小写、非字母数字转下划线)。"""
|
||||
cleaned = re.sub(r"[^0-9a-zA-Z_]", "_", name.strip())
|
||||
if not cleaned:
|
||||
cleaned = "col"
|
||||
if cleaned[0].isdigit():
|
||||
cleaned = f"_{cleaned}"
|
||||
return cleaned.lower()
|
||||
|
||||
|
||||
def _extract_list_via_path(node, path: tuple[str, ...]):
|
||||
cur = node
|
||||
for key in path:
|
||||
if isinstance(cur, dict):
|
||||
cur = cur.get(key)
|
||||
else:
|
||||
return []
|
||||
return cur if isinstance(cur, list) else []
|
||||
|
||||
|
||||
def load_records(payload, list_path: tuple[str, ...] | None = None) -> list:
|
||||
"""
|
||||
尝试从 JSON 结构中提取记录列表:
|
||||
- 直接是 list -> 返回
|
||||
- dict 中 data 是 list -> 返回
|
||||
- dict 中 data 是 dict,取第一个 list 字段
|
||||
- dict 中任意值是 list -> 返回
|
||||
- 其余情况,包装为单条记录
|
||||
"""
|
||||
if list_path:
|
||||
if isinstance(payload, list):
|
||||
merged: list = []
|
||||
for item in payload:
|
||||
merged.extend(_extract_list_via_path(item, list_path))
|
||||
if merged:
|
||||
return merged
|
||||
elif isinstance(payload, dict):
|
||||
lst = _extract_list_via_path(payload, list_path)
|
||||
if lst:
|
||||
return lst
|
||||
|
||||
if isinstance(payload, list):
|
||||
return payload
|
||||
if isinstance(payload, dict):
|
||||
data_node = payload.get("data")
|
||||
if isinstance(data_node, list):
|
||||
return data_node
|
||||
if isinstance(data_node, dict):
|
||||
for v in data_node.values():
|
||||
if isinstance(v, list):
|
||||
return v
|
||||
for v in payload.values():
|
||||
if isinstance(v, list):
|
||||
return v
|
||||
return [payload]
|
||||
|
||||
|
||||
def collect_columns(records: Iterable[dict]) -> List[str]:
|
||||
"""汇总所有顶层键,作为表字段;仅处理 dict 记录。"""
|
||||
cols: set[str] = set()
|
||||
for rec in records:
|
||||
if isinstance(rec, dict):
|
||||
cols.update(rec.keys())
|
||||
return sorted(cols)
|
||||
|
||||
|
||||
def create_table(cur, schema: str, table: str, columns: List[Tuple[str, str]]):
|
||||
"""
|
||||
创建表:字段全部 jsonb,外加 source_file、record_index、payload、ingested_at。
|
||||
columns: [(col_name, original_key)]
|
||||
"""
|
||||
fields = [sql.SQL("{} jsonb").format(sql.Identifier(col)) for col, _ in columns]
|
||||
constraint_name = f"uq_{table}_source_record"
|
||||
ddl = sql.SQL(
|
||||
"CREATE TABLE IF NOT EXISTS {schema}.{table} ("
|
||||
"source_file text,"
|
||||
"record_index integer,"
|
||||
"{cols},"
|
||||
"payload jsonb,"
|
||||
"ingested_at timestamptz default now(),"
|
||||
"CONSTRAINT {constraint} UNIQUE (source_file, record_index)"
|
||||
");"
|
||||
).format(
|
||||
schema=sql.Identifier(schema),
|
||||
table=sql.Identifier(table),
|
||||
cols=sql.SQL(",").join(fields),
|
||||
constraint=sql.Identifier(constraint_name),
|
||||
)
|
||||
cur.execute(ddl)
|
||||
|
||||
|
||||
def insert_records(cur, schema: str, table: str, columns: List[Tuple[str, str]], records: list, source_file: str):
|
||||
"""批量插入记录。"""
|
||||
col_idents = [sql.Identifier(col) for col, _ in columns]
|
||||
col_names = [col for col, _ in columns]
|
||||
orig_keys = [orig for _, orig in columns]
|
||||
all_cols = [sql.Identifier("source_file"), sql.Identifier("record_index")] + col_idents + [
|
||||
sql.Identifier("payload")
|
||||
]
|
||||
|
||||
rows = []
|
||||
for idx, rec in enumerate(records):
|
||||
if not isinstance(rec, dict):
|
||||
rec = {"value": rec}
|
||||
row_values = [source_file, idx]
|
||||
for key in orig_keys:
|
||||
row_values.append(Json(rec.get(key)))
|
||||
row_values.append(Json(rec))
|
||||
rows.append(row_values)
|
||||
|
||||
insert_sql = sql.SQL("INSERT INTO {}.{} ({}) VALUES %s ON CONFLICT DO NOTHING").format(
|
||||
sql.Identifier(schema),
|
||||
sql.Identifier(table),
|
||||
sql.SQL(",").join(all_cols),
|
||||
)
|
||||
execute_values(cur, insert_sql, rows, page_size=500)
|
||||
|
||||
|
||||
def rebuild(schema: str = "billiards_ods", data_dir: str | Path = DEFAULT_JSON_DIR):
|
||||
parser = argparse.ArgumentParser(description="重建 billiards_ods.* 表并导入 JSON 样例")
|
||||
parser.add_argument("--dsn", dest="dsn", help="PostgreSQL DSN(默认读取环境变量 PG_DSN)")
|
||||
parser.add_argument("--json-dir", dest="json_dir", help=f"JSON 目录,默认 {DEFAULT_JSON_DIR}")
|
||||
parser.add_argument(
|
||||
"--include",
|
||||
dest="include_files",
|
||||
help="限定导入的文件名(逗号分隔,不含 .json),默认全部",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--drop-schema-first",
|
||||
dest="drop_schema_first",
|
||||
action="store_true",
|
||||
help="先删除并重建 schema(默认 true)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-drop-schema-first",
|
||||
dest="drop_schema_first",
|
||||
action="store_false",
|
||||
help="保留现有 schema,仅按冲突去重导入",
|
||||
)
|
||||
parser.set_defaults(drop_schema_first=None)
|
||||
args = parser.parse_args()
|
||||
|
||||
dsn = args.dsn or os.environ.get("PG_DSN")
|
||||
if not dsn:
|
||||
print("缺少参数/环境变量 PG_DSN,无法连接数据库。")
|
||||
sys.exit(1)
|
||||
timeout = max(1, min(int(os.environ.get("PG_CONNECT_TIMEOUT", 10)), 60))
|
||||
env_drop = os.environ.get("ODS_DROP_SCHEMA_FIRST") or os.environ.get("DROP_SCHEMA_FIRST")
|
||||
drop_schema_first = (
|
||||
args.drop_schema_first
|
||||
if args.drop_schema_first is not None
|
||||
else str(env_drop or "true").lower() in ("1", "true", "yes")
|
||||
)
|
||||
include_files_env = args.include_files or os.environ.get("ODS_INCLUDE_FILES") or os.environ.get("INCLUDE_FILES")
|
||||
include_files = set()
|
||||
if include_files_env:
|
||||
include_files = {p.strip().lower() for p in include_files_env.split(",") if p.strip()}
|
||||
|
||||
base_dir = Path(args.json_dir or data_dir or DEFAULT_JSON_DIR)
|
||||
if not base_dir.exists():
|
||||
print(f"JSON 目录不存在: {base_dir}")
|
||||
sys.exit(1)
|
||||
|
||||
conn = psycopg2.connect(dsn, connect_timeout=timeout)
|
||||
conn.autocommit = False
|
||||
cur = conn.cursor()
|
||||
|
||||
if drop_schema_first:
|
||||
print(f"Dropping schema {schema} ...")
|
||||
cur.execute(sql.SQL("DROP SCHEMA IF EXISTS {} CASCADE;").format(sql.Identifier(schema)))
|
||||
cur.execute(sql.SQL("CREATE SCHEMA {};").format(sql.Identifier(schema)))
|
||||
else:
|
||||
cur.execute(
|
||||
sql.SQL("SELECT schema_name FROM information_schema.schemata WHERE schema_name=%s"),
|
||||
(schema,),
|
||||
)
|
||||
if not cur.fetchone():
|
||||
cur.execute(sql.SQL("CREATE SCHEMA {};").format(sql.Identifier(schema)))
|
||||
|
||||
json_files = sorted(base_dir.glob("*.json"))
|
||||
for path in json_files:
|
||||
stem_lower = path.stem.lower()
|
||||
if include_files and stem_lower not in include_files:
|
||||
continue
|
||||
|
||||
print(f"Processing {path.name} ...")
|
||||
payload = json.loads(path.read_text(encoding="utf-8"))
|
||||
list_path = SPECIAL_LIST_PATHS.get(stem_lower)
|
||||
records = load_records(payload, list_path=list_path)
|
||||
columns_raw = collect_columns(records)
|
||||
columns = [(sanitize_identifier(c), c) for c in columns_raw]
|
||||
|
||||
table_name = sanitize_identifier(path.stem)
|
||||
create_table(cur, schema, table_name, columns)
|
||||
if records:
|
||||
insert_records(cur, schema, table_name, columns, records, path.name)
|
||||
print(f" -> rows: {len(records)}, columns: {len(columns)}")
|
||||
|
||||
conn.commit()
|
||||
cur.close()
|
||||
conn.close()
|
||||
print("Rebuild done.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
rebuild()
|
||||
@@ -4,9 +4,9 @@
|
||||
直接运行本文件即可触发 pytest。
|
||||
|
||||
示例:
|
||||
python scripts/run_tests.py --suite online --mode ONLINE --keyword ORDERS
|
||||
python scripts/run_tests.py --preset offline_realdb
|
||||
python scripts/run_tests.py --suite online offline --db-dsn ... --json-archive tmp/archives
|
||||
python scripts/run_tests.py --suite online --flow FULL --keyword ORDERS
|
||||
python scripts/run_tests.py --preset fetch_only
|
||||
python scripts/run_tests.py --suite online --json-source tmp/archives
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
@@ -27,7 +27,6 @@ if PROJECT_ROOT not in sys.path:
|
||||
|
||||
SUITE_MAP: Dict[str, str] = {
|
||||
"online": "tests/unit/test_etl_tasks_online.py",
|
||||
"offline": "tests/unit/test_etl_tasks_offline.py",
|
||||
"integration": "tests/integration/test_database.py",
|
||||
}
|
||||
|
||||
@@ -64,13 +63,12 @@ def parse_args() -> argparse.Namespace:
|
||||
help="自定义测试路径(可与 --suite 混用),例如 tests/unit/test_config.py",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--mode",
|
||||
choices=["ONLINE", "OFFLINE"],
|
||||
help="覆盖 TEST_MODE(默认沿用 .env / 环境变量)",
|
||||
"--flow",
|
||||
choices=["FETCH_ONLY", "INGEST_ONLY", "FULL"],
|
||||
help="覆盖 PIPELINE_FLOW(在线抓取/本地清洗/全流程)",
|
||||
)
|
||||
parser.add_argument("--db-dsn", help="设置 TEST_DB_DSN,连接真实数据库进行测试")
|
||||
parser.add_argument("--json-archive", help="设置 TEST_JSON_ARCHIVE_DIR(离线档案目录)")
|
||||
parser.add_argument("--json-temp", help="设置 TEST_JSON_TEMP_DIR(临时 JSON 路径)")
|
||||
parser.add_argument("--json-source", help="设置 JSON_SOURCE_DIR(本地清洗入库使用的 JSON 目录)")
|
||||
parser.add_argument("--json-fetch-root", help="设置 JSON_FETCH_ROOT(在线抓取输出根目录)")
|
||||
parser.add_argument(
|
||||
"--keyword",
|
||||
"-k",
|
||||
@@ -123,14 +121,12 @@ def apply_presets_to_args(args: argparse.Namespace):
|
||||
|
||||
def apply_env(args: argparse.Namespace) -> Dict[str, str]:
|
||||
env_updates = {}
|
||||
if args.mode:
|
||||
env_updates["TEST_MODE"] = args.mode
|
||||
if args.db_dsn:
|
||||
env_updates["TEST_DB_DSN"] = args.db_dsn
|
||||
if args.json_archive:
|
||||
env_updates["TEST_JSON_ARCHIVE_DIR"] = args.json_archive
|
||||
if args.json_temp:
|
||||
env_updates["TEST_JSON_TEMP_DIR"] = args.json_temp
|
||||
if args.flow:
|
||||
env_updates["PIPELINE_FLOW"] = args.flow
|
||||
if args.json_source:
|
||||
env_updates["JSON_SOURCE_DIR"] = args.json_source
|
||||
if args.json_fetch_root:
|
||||
env_updates["JSON_FETCH_ROOT"] = args.json_fetch_root
|
||||
if args.env:
|
||||
for item in args.env:
|
||||
if "=" not in item:
|
||||
@@ -151,8 +147,7 @@ def build_pytest_args(args: argparse.Namespace) -> List[str]:
|
||||
if args.tests:
|
||||
targets.extend(args.tests)
|
||||
if not targets:
|
||||
# 默认跑 online + offline 套件
|
||||
targets = [SUITE_MAP["online"], SUITE_MAP["offline"]]
|
||||
targets = list(SUITE_MAP.values())
|
||||
|
||||
pytest_args: List[str] = targets
|
||||
if args.keyword:
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Quick utility for validating PostgreSQL connectivity."""
|
||||
"""Quick utility for validating PostgreSQL connectivity (ASCII-only output)."""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
@@ -24,8 +24,8 @@ def parse_args() -> argparse.Namespace:
|
||||
parser.add_argument(
|
||||
"--timeout",
|
||||
type=int,
|
||||
default=5,
|
||||
help="connect_timeout seconds passed to psycopg2 (default: 5)",
|
||||
default=10,
|
||||
help="connect_timeout seconds passed to psycopg2 (capped at 20, default: 10)",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
@@ -34,25 +34,26 @@ def main() -> int:
|
||||
args = parse_args()
|
||||
dsn = args.dsn or os.environ.get("TEST_DB_DSN")
|
||||
if not dsn:
|
||||
print("❌ 未提供 DSN,请通过 --dsn 或 TEST_DB_DSN 指定连接串", file=sys.stderr)
|
||||
print("Missing DSN. Use --dsn or TEST_DB_DSN.", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
print(f"尝试连接: {dsn}")
|
||||
print(f"Trying connection: {dsn}")
|
||||
try:
|
||||
conn = DatabaseConnection(dsn, connect_timeout=args.timeout)
|
||||
timeout = max(1, min(args.timeout, 20))
|
||||
conn = DatabaseConnection(dsn, connect_timeout=timeout)
|
||||
except Exception as exc: # pragma: no cover - diagnostic output
|
||||
print("❌ 连接失败:", exc, file=sys.stderr)
|
||||
print("Connection failed:", exc, file=sys.stderr)
|
||||
return 1
|
||||
|
||||
try:
|
||||
result = conn.query(args.query)
|
||||
print("✅ 连接成功,查询结果:")
|
||||
print("Connection OK, query result:")
|
||||
for row in result:
|
||||
print(row)
|
||||
conn.close()
|
||||
return 0
|
||||
except Exception as exc: # pragma: no cover - diagnostic output
|
||||
print("⚠️ 连接成功但执行查询失败:", exc, file=sys.stderr)
|
||||
print("Connection succeeded but query failed:", exc, file=sys.stderr)
|
||||
try:
|
||||
conn.close()
|
||||
finally:
|
||||
|
||||
@@ -1,50 +1,5 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""测试命令仓库:集中维护 run_tests.py 的常用组合,并支持一键执行。
|
||||
|
||||
参数键说明(可在 PRESETS 中任意叠加):
|
||||
|
||||
1. suite
|
||||
类型:列表;值:["online"], ["offline"], ["integration"] 等。
|
||||
含义:引用 run_tests 内置测试套件。online=在线模式;offline=离线模式;integration=数据库集成测试。
|
||||
用法:["online","offline"] 表示一次执行两套;["integration"] 仅跑数据库相关用例。
|
||||
|
||||
2. tests
|
||||
类型:列表;示例:["tests/unit/test_config.py"]。
|
||||
含义:自定义的 pytest 目标路径,适合补充临时/个别测试。
|
||||
|
||||
3. mode
|
||||
类型:字符串;取值:"ONLINE" 或 "OFFLINE"。
|
||||
含义:覆盖 TEST_MODE;ONLINE 走 API 全流程,OFFLINE 读取 JSON 归档执行 Transform + Load。
|
||||
|
||||
4. db_dsn
|
||||
类型:字符串;示例:postgresql://user:pwd@host:5432/testdb。
|
||||
含义:设置 TEST_DB_DSN,使用真实 PostgreSQL 连接;不设置则使用伪 DB(仅记录操作,不落库)。
|
||||
|
||||
5. json_archive / json_temp
|
||||
类型:字符串;示例:"tests/testdata_json"、"C:/tmp/json"。
|
||||
含义:离线模式所需的归档输入目录 / 临时输出目录。未设置时沿用 .env 或默认配置。
|
||||
|
||||
6. keyword
|
||||
类型:字符串;示例:"ORDERS"。
|
||||
含义:等价 pytest -k,可筛选测试名/节点,只运行包含该关键字的用例。
|
||||
|
||||
7. pytest_args
|
||||
类型:字符串;示例:"-vv --maxfail=1"。
|
||||
含义:追加 pytest 命令行参数,用于控制日志、失败策略等。
|
||||
|
||||
8. env
|
||||
类型:列表;示例:["STORE_ID=123","API_TOKEN=xxx"]。
|
||||
含义:额外的环境变量,在调用 run_tests 前注入到 os.environ。
|
||||
|
||||
9. preset_meta
|
||||
类型:字符串;仅用于描述场景,不会传给 run_tests(纯注释)。
|
||||
|
||||
使用方式:
|
||||
- 直接 F5 或 `python scripts/test_presets.py`:读取 AUTO_RUN_PRESETS 的预置并顺序执行。
|
||||
- `python scripts/test_presets.py --preset offline_realdb`:临时指定要运行的组合。
|
||||
- `python scripts/test_presets.py --list`:查看参数说明及所有预置详情。
|
||||
"""
|
||||
|
||||
"""测试命令仓库:集中维护 run_tests.py 的常用组合,支持一键执行。"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
@@ -55,46 +10,42 @@ from typing import List
|
||||
|
||||
RUN_TESTS_SCRIPT = os.path.join(os.path.dirname(__file__), "run_tests.py")
|
||||
|
||||
# 默认自动运行的预置(可自定义顺序)
|
||||
|
||||
AUTO_RUN_PRESETS = ["offline_realdb"]
|
||||
# 默认自动运行的预置(可根据需要修改顺序/条目)
|
||||
AUTO_RUN_PRESETS = ["fetch_only"]
|
||||
|
||||
PRESETS = {
|
||||
"online_orders": {
|
||||
"fetch_only": {
|
||||
"suite": ["online"],
|
||||
"mode": "ONLINE",
|
||||
"flow": "FETCH_ONLY",
|
||||
"json_fetch_root": "tmp/json_fetch",
|
||||
"keyword": "ORDERS",
|
||||
"pytest_args": "-vv",
|
||||
"preset_meta": "在线模式,仅跑订单任务并输出详细日志",
|
||||
"preset_meta": "仅在线抓取阶段,输出到本地目录",
|
||||
},
|
||||
|
||||
"dbrun": {
|
||||
"suite": ["integration"],
|
||||
# "mode": "OFFLINE",
|
||||
# "keyword": "ORDERS",
|
||||
# "pytest_args": "-vv",
|
||||
"preset_meta": "在线模式,仅跑订单任务并输出详细日志",
|
||||
},
|
||||
|
||||
"offline_realdb": {
|
||||
"suite": ["offline"],
|
||||
"mode": "OFFLINE",
|
||||
"db_dsn": "postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test",
|
||||
"json_archive": "tests/testdata_json",
|
||||
"ingest_local": {
|
||||
"suite": ["online"],
|
||||
"flow": "INGEST_ONLY",
|
||||
"json_source": "tests/source-data-doc",
|
||||
"keyword": "ORDERS",
|
||||
"preset_meta": "离线模式 + 真实测试库,用预置 JSON 回放并写入测试库",
|
||||
"preset_meta": "从指定 JSON 目录做本地清洗入库",
|
||||
},
|
||||
"full_pipeline": {
|
||||
"suite": ["online"],
|
||||
"flow": "FULL",
|
||||
"json_fetch_root": "tmp/json_fetch",
|
||||
"keyword": "ORDERS",
|
||||
"preset_meta": "先抓取再清洗入库的全流程",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def print_parameter_help() -> None:
|
||||
print("=== 参数键说明 ===")
|
||||
print("suite : 预置套件列表,如 ['online','offline']")
|
||||
print("suite : 预置套件列表,如 ['online','integration']")
|
||||
print("tests : 自定义 pytest 路径列表")
|
||||
print("mode : TEST_MODE(ONLINE/ OFFLINE)")
|
||||
print("db_dsn : TEST_DB_DSN,连接真实 PostgreSQL")
|
||||
print("json_archive : TEST_JSON_ARCHIVE_DIR,离线模式输入目录")
|
||||
print("json_temp : TEST_JSON_TEMP_DIR,离线模式临时目录")
|
||||
print("flow : PIPELINE_FLOW(FETCH_ONLY / INGEST_ONLY / FULL)")
|
||||
print("json_source : JSON_SOURCE_DIR,本地清洗入库使用的 JSON 目录")
|
||||
print("json_fetch_root : JSON_FETCH_ROOT,在线抓取输出根目录")
|
||||
print("keyword : pytest -k 过滤关键字")
|
||||
print("pytest_args : 额外 pytest 参数(字符串)")
|
||||
print("env : 附加环境变量,例如 ['KEY=VALUE']")
|
||||
@@ -120,7 +71,7 @@ def print_presets() -> None:
|
||||
|
||||
def resolve_targets(requested: List[str] | None) -> List[str]:
|
||||
if not PRESETS:
|
||||
raise SystemExit("Pre-sets 为空,请先在 PRESETS 中定义测试组合。")
|
||||
raise SystemExit("预置为空,请先在 PRESETS 中定义测试组合。")
|
||||
|
||||
def valid(names: List[str]) -> List[str]:
|
||||
return [name for name in names if name in PRESETS]
|
||||
@@ -137,7 +88,6 @@ def resolve_targets(requested: List[str] | None) -> List[str]:
|
||||
if auto:
|
||||
return auto
|
||||
|
||||
# 兜底:全部预置
|
||||
return list(PRESETS.keys())
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user