From 7845570819f79ccf7ec35297c3a4a964b59d4c81 Mon Sep 17 00:00:00 2001 From: erik Date: Sun, 25 May 2025 22:12:08 +0000 Subject: [PATCH] Johan review --- ARCHITECTURE.md | 10 ++++++-- Dockerfile | 10 ++------ db_async.py | 64 +++++++++++++++++++++++++++++++++---------------- 3 files changed, 53 insertions(+), 31 deletions(-) diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index 290b2826..c16a5c28 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -12,8 +12,14 @@ Root directory: - **README.md**: High-level documentation and usage instructions. - **EVENT_FORMATS.json**: Example JSON payloads for all event types (`telemetry`, `spawn`, `chat`, `rare`). - **db.py**: Legacy SQLite-based storage (telemetry_log & live_state tables, WAL mode, auto-vacuum). -- **db_async.py**: Async database definitions for PostgreSQL/TimescaleDB: - - Table schemas (SQLAlchemy Core): `telemetry_events`, `char_stats`, `rare_stats`, `rare_stats_sessions`, `spawn_events`. + - **db_async.py**: Async database definitions for PostgreSQL/TimescaleDB: + - Table schemas (SQLAlchemy Core): + - `telemetry_events`, + - `char_stats`, + - `rare_stats`, + - `rare_stats_sessions`, + - `spawn_events`, + - `rare_events`. - `init_db_async()`: Creates tables, enables TimescaleDB extension, and configures a hypertable on `telemetry_events`. - **main.py**: The FastAPI application: - HTTP endpoints: `/debug`, `/live`, `/history`, `/trails`. diff --git a/Dockerfile b/Dockerfile index 1e44a79a..118f0807 100644 --- a/Dockerfile +++ b/Dockerfile @@ -35,13 +35,7 @@ ENV DATABASE_URL=postgresql://postgres:password@db:5432/dereth \ DB_MAX_SQL_LENGTH=1000000000 \ DB_MAX_SQL_VARIABLES=32766 \ DB_WAL_AUTOCHECKPOINT_PAGES=1000 \ - SHARED_SECRET=your_shared_secret # Secret for plugin authentication + SHARED_SECRET=your_shared_secret ## Launch the FastAPI app using Uvicorn -CMD [ - "uvicorn", "main:app", - "--host", "0.0.0.0", - "--port", "8765", - "--reload", # auto-restart on code changes - "--workers", "1" -] +CMD ["uvicorn","main:app","--host","0.0.0.0","--port","8765","--reload","--workers","1"] diff --git a/db_async.py b/db_async.py index c47b2458..d401ec09 100644 --- a/db_async.py +++ b/db_async.py @@ -7,6 +7,7 @@ import os import sqlalchemy from databases import Database from sqlalchemy import MetaData, Table, Column, Integer, String, Float, DateTime, text +from sqlalchemy import Index # Environment: Postgres/TimescaleDB connection URL DATABASE_URL = os.getenv("DATABASE_URL", "postgresql://postgres:password@localhost:5432/dereth") @@ -43,6 +44,12 @@ telemetry_events = Table( Column("mem_handles", Integer, nullable=True), Column("latency_ms", Float, nullable=True), ) +# Composite index to accelerate Grafana queries filtering by character_name then ordering by timestamp +Index( + 'ix_telemetry_events_char_ts', + telemetry_events.c.character_name, + telemetry_events.c.timestamp +) # Table for persistent total kills per character char_stats = Table( @@ -108,26 +115,41 @@ async def init_db_async(): engine = sqlalchemy.create_engine(DATABASE_URL) # Reflects metadata definitions into actual database tables via SQLAlchemy metadata.create_all(engine) - # Enable TimescaleDB extension and convert telemetry_events to hypertable - # Use a transactional context to ensure DDL statements are committed - with engine.begin() as conn: - # Enable or update TimescaleDB extension - # Install or confirm TimescaleDB extension to support hypertables - try: - conn.execute(text("CREATE EXTENSION IF NOT EXISTS timescaledb")) - except Exception as e: - print(f"Warning: failed to create extension timescaledb: {e}") - # Update TimescaleDB extension if an older version exists - try: - conn.execute(text("ALTER EXTENSION timescaledb UPDATE")) - except Exception as e: - print(f"Warning: failed to update timescaledb extension: {e}") - # Create hypertable for telemetry_events, skip default indexes to avoid collisions - # Transform telemetry_events into a hypertable partitioned by timestamp - try: + # Ensure TimescaleDB extension is installed and telemetry_events is a hypertable + # Run DDL in autocommit mode so errors don't abort subsequent statements + try: + with engine.connect().execution_options(isolation_level="AUTOCOMMIT") as conn: + # Install extension if missing + try: + conn.execute(text("CREATE EXTENSION IF NOT EXISTS timescaledb")) + except Exception as e: + print(f"Warning: failed to create extension timescaledb: {e}") + # Convert to hypertable, migrating existing data and skipping default index creation + try: + conn.execute(text( + "SELECT create_hypertable('telemetry_events', 'timestamp', " + "if_not_exists => true, migrate_data => true, create_default_indexes => false)" + )) + except Exception as e: + print(f"Warning: failed to create hypertable telemetry_events: {e}") + except Exception as e: + print(f"Warning: timescale extension/hypertable setup failed: {e}") + # Ensure composite index exists for efficient time-series queries by character + try: + with engine.connect() as conn: conn.execute(text( - "SELECT create_hypertable('telemetry_events', 'timestamp', \ - if_not_exists => true, create_default_indexes => false)" + "CREATE INDEX IF NOT EXISTS ix_telemetry_events_char_ts " + "ON telemetry_events (character_name, timestamp)" )) - except Exception as e: - print(f"Warning: failed to create hypertable telemetry_events: {e}") \ No newline at end of file + except Exception as e: + print(f"Warning: failed to create composite index ix_telemetry_events_char_ts: {e}") + # Disable parallel workers at the system level to avoid OOMs from large parallel scans + try: + # Apply settings outside transaction for ALTER SYSTEM + conn2 = engine.connect().execution_options(isolation_level="AUTOCOMMIT") + conn2.execute(text("ALTER SYSTEM SET max_parallel_workers_per_gather = 0")) + conn2.execute(text("ALTER SYSTEM SET max_parallel_workers = 0")) + conn2.execute(text("SELECT pg_reload_conf()")) + conn2.close() + except Exception as e: + print(f"Warning: failed to disable parallel workers: {e}") \ No newline at end of file