"""Append-only batch insert of dict rows into a PostgreSQL table via psycopg2.""" from __future__ import annotations from datetime import date def pg_insert_rows( dsn: str, table: str, rows: list[dict], add_snapshot_date: bool = True, ) -> int: """Insert rows (list of dicts) into a PostgreSQL table, append-only. Columns are derived from the dict keys. If rows have heterogeneous schemas, the union of all keys is used and missing values are filled with None, so a single parameterized statement covers every row. Insertion uses psycopg2.extras.execute_values (no string formatting) to avoid SQL injection. Args: dsn: Connection string, e.g. "postgresql://user:pass@host:port/dbname". table: Target table name (must already exist). rows: List of dicts; each dict is one row, keys are column names. add_snapshot_date: If True and a row lacks "snapshot_date", inject snapshot_date = date.today() before inserting. Returns: Number of rows inserted. Raises: RuntimeError: If the connection or the insert fails. The original psycopg2 exception is chained for debugging. """ if not rows: return 0 # psycopg2 is imported lazily so the module imports without the dependency # present (self-test / introspection) and fails clearly only when invoked. try: import psycopg2 from psycopg2 import extras as pg_extras from psycopg2 import sql as pg_sql except ImportError as exc: # pragma: no cover - exercised only without dep raise RuntimeError( "psycopg2 is required for pg_insert_rows; install psycopg2-binary" ) from exc # Work on copies so we never mutate the caller's dicts. prepared: list[dict] = [dict(row) for row in rows] if add_snapshot_date: today = date.today() for row in prepared: row.setdefault("snapshot_date", today) # Stable union of columns across all rows (first-seen order). columns: list[str] = [] seen: set[str] = set() for row in prepared: for key in row: if key not in seen: seen.add(key) columns.append(key) if not columns: return 0 # Build the value tuples in column order, filling absent keys with None. values = [tuple(row.get(col) for col in columns) for row in prepared] insert_stmt = pg_sql.SQL("INSERT INTO {table} ({cols}) VALUES %s").format( table=pg_sql.Identifier(table), cols=pg_sql.SQL(", ").join(pg_sql.Identifier(c) for c in columns), ) conn = None try: conn = psycopg2.connect(dsn) with conn.cursor() as cur: pg_extras.execute_values(cur, insert_stmt, values) conn.commit() return len(values) except Exception as exc: if conn is not None: conn.rollback() raise RuntimeError( f"pg_insert_rows failed inserting into {table!r}: {exc}" ) from exc finally: if conn is not None: conn.close()