feat(shell): auto-commit con 31 cambios
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -18,8 +18,12 @@ from .caldav_put_event import caldav_put_event
|
||||
from .dav_list_resources import dav_list_resources
|
||||
from .dav_get_resource import dav_get_resource
|
||||
from .dav_delete_resource import dav_delete_resource
|
||||
from .pg_insert_rows import pg_insert_rows
|
||||
from .pg_apply_sql import pg_apply_sql
|
||||
|
||||
__all__ = [
|
||||
"pg_insert_rows",
|
||||
"pg_apply_sql",
|
||||
"setup_logger",
|
||||
"get_logger",
|
||||
"generate_app_icon",
|
||||
|
||||
@@ -0,0 +1,59 @@
|
||||
---
|
||||
name: pg_apply_sql
|
||||
kind: function
|
||||
lang: py
|
||||
domain: infra
|
||||
version: "1.0.0"
|
||||
purity: impure
|
||||
signature: "def pg_apply_sql(dsn: str, sql_path: str) -> int"
|
||||
description: "Lee un archivo .sql y ejecuta su contenido completo contra PostgreSQL en un solo cursor.execute via psycopg2. Multi-statement en una transaccion (sin parametros). Pensado para migraciones idempotentes (el SQL usa IF NOT EXISTS). Commit al exito. Retorna el numero de statements aplicados (split por ;), minimo 1 si el script no esta vacio."
|
||||
tags: [postgres, market-intel, infra]
|
||||
uses_functions: []
|
||||
uses_types: []
|
||||
returns: []
|
||||
returns_optional: false
|
||||
error_type: "error_go_core"
|
||||
imports: [psycopg2]
|
||||
params:
|
||||
- name: dsn
|
||||
desc: "Cadena de conexion PostgreSQL en formato postgresql://user:pass@host:port/dbname."
|
||||
- name: sql_path
|
||||
desc: "Ruta al archivo .sql a aplicar (ej. db/migrations/001_init.sql)."
|
||||
output: "Numero entero de statements no vacios aplicados (split por ;), minimo 1 si el script no esta vacio; 0 si el archivo esta vacio."
|
||||
tested: false
|
||||
tests: []
|
||||
test_file_path: ""
|
||||
file_path: "python/functions/infra/pg_apply_sql.py"
|
||||
---
|
||||
|
||||
## Ejemplo
|
||||
|
||||
```python
|
||||
import sys, os
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "python", "functions"))
|
||||
from infra.pg_apply_sql import pg_apply_sql
|
||||
|
||||
dsn = "postgresql://scraper:secret@localhost:5432/captacion"
|
||||
# db/migrations/001_init.sql con:
|
||||
# CREATE TABLE IF NOT EXISTS leads_raw (
|
||||
# id SERIAL PRIMARY KEY, name TEXT, city TEXT, score INT,
|
||||
# snapshot_date DATE
|
||||
# );
|
||||
n = pg_apply_sql(dsn, "db/migrations/001_init.sql")
|
||||
print(f"aplicados {n} statements") # aplicados 1 statements
|
||||
```
|
||||
|
||||
## Cuando usarla
|
||||
|
||||
Cuando necesitas aplicar un archivo de migracion `.sql` (crear tablas, indices, columnas)
|
||||
a Postgres antes de escribir datos. Usala al arrancar el pipeline de captacion_clientes
|
||||
para garantizar el schema, y para iterar sobre `db/migrations/*.sql` en orden.
|
||||
|
||||
## Gotchas
|
||||
|
||||
- Idempotencia depende del SQL: el archivo DEBE usar `IF NOT EXISTS` / `ON CONFLICT` para poder re-aplicarse sin error. Esta funcion no lleva control de versiones de migracion — el caller decide que archivos aplica y en que orden.
|
||||
- Todo el script va en UNA transaccion: si cualquier statement falla, se hace rollback de todo el archivo y se lanza RuntimeError.
|
||||
- El conteo de statements (`split(";")`) es informativo y aproximado: un `;` dentro de un string literal o de un cuerpo de funcion PL/pgSQL infla la cuenta. No lo uses como verdad exacta, solo como indicador.
|
||||
- NO pasa parametros: el contenido del `.sql` se ejecuta tal cual. No metas datos no confiables en el archivo — es para DDL/migraciones controladas, no para input de usuario.
|
||||
- Requiere `psycopg2` instalado en el venv (import perezoso: el modulo importa sin la dependencia, pero la llamada falla con RuntimeError claro si falta).
|
||||
- Archivo inexistente o ilegible lanza RuntimeError con la ruta.
|
||||
@@ -0,0 +1,68 @@
|
||||
"""Apply a .sql script file against a PostgreSQL database via psycopg2."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def pg_apply_sql(dsn: str, sql_path: str) -> int:
|
||||
"""Read a .sql file and execute its full contents against PostgreSQL.
|
||||
|
||||
The whole script is sent in a single cursor.execute call. psycopg2 runs
|
||||
multi-statement scripts in one execute when there are no bound parameters,
|
||||
so DDL files with several statements separated by ";" apply atomically in
|
||||
one transaction. Designed for idempotent migrations (the SQL itself uses
|
||||
"IF NOT EXISTS"). Commits on success.
|
||||
|
||||
Args:
|
||||
dsn: Connection string, e.g. "postgresql://user:pass@host:port/dbname".
|
||||
sql_path: Path to the .sql file to apply (e.g. db/migrations/001_init.sql).
|
||||
|
||||
Returns:
|
||||
Number of non-empty statements applied (counted by splitting on ";").
|
||||
At minimum 1 when the script is non-empty.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the file cannot be read, or the connection / execution
|
||||
fails. The original exception is chained for debugging.
|
||||
"""
|
||||
path = Path(sql_path)
|
||||
try:
|
||||
script = path.read_text(encoding="utf-8")
|
||||
except OSError as exc:
|
||||
raise RuntimeError(
|
||||
f"pg_apply_sql could not read {sql_path!r}: {exc}"
|
||||
) from exc
|
||||
|
||||
if not script.strip():
|
||||
return 0
|
||||
|
||||
# Lazy import so the module loads even without psycopg2 installed.
|
||||
try:
|
||||
import psycopg2
|
||||
except ImportError as exc: # pragma: no cover - exercised only without dep
|
||||
raise RuntimeError(
|
||||
"psycopg2 is required for pg_apply_sql; install psycopg2-binary"
|
||||
) from exc
|
||||
|
||||
# Best-effort statement count (informational return value only). Strip
|
||||
# blank fragments produced by a trailing semicolon.
|
||||
statement_count = sum(1 for part in script.split(";") if part.strip())
|
||||
statement_count = max(statement_count, 1)
|
||||
|
||||
conn = None
|
||||
try:
|
||||
conn = psycopg2.connect(dsn)
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(script)
|
||||
conn.commit()
|
||||
return statement_count
|
||||
except Exception as exc:
|
||||
if conn is not None:
|
||||
conn.rollback()
|
||||
raise RuntimeError(
|
||||
f"pg_apply_sql failed applying {sql_path!r}: {exc}"
|
||||
) from exc
|
||||
finally:
|
||||
if conn is not None:
|
||||
conn.close()
|
||||
@@ -0,0 +1,63 @@
|
||||
---
|
||||
name: pg_insert_rows
|
||||
kind: function
|
||||
lang: py
|
||||
domain: infra
|
||||
version: "1.0.0"
|
||||
purity: impure
|
||||
signature: "def pg_insert_rows(dsn: str, table: str, rows: list[dict], add_snapshot_date: bool = True) -> int"
|
||||
description: "Inserta filas (lista de dicts) en una tabla PostgreSQL de forma append-only via psycopg2.extras.execute_values. Deriva columnas de las claves del dict (union si difieren, rellena con None). Opcionalmente inyecta snapshot_date = date.today(). Insercion parametrizada (sin format de strings, evita inyeccion SQL). Commit y cierre de conexion. Retorna el numero de filas insertadas."
|
||||
tags: [postgres, market-intel, infra]
|
||||
uses_functions: []
|
||||
uses_types: []
|
||||
returns: []
|
||||
returns_optional: false
|
||||
error_type: "error_go_core"
|
||||
imports: [psycopg2]
|
||||
params:
|
||||
- name: dsn
|
||||
desc: "Cadena de conexion PostgreSQL en formato postgresql://user:pass@host:port/dbname."
|
||||
- name: table
|
||||
desc: "Nombre de la tabla destino (debe existir previamente)."
|
||||
- name: rows
|
||||
desc: "Lista de dicts; cada dict es una fila, sus claves son nombres de columna. Si los esquemas difieren se usa la union de claves y se rellena con None."
|
||||
- name: add_snapshot_date
|
||||
desc: "Si True y una fila no trae snapshot_date, inyecta snapshot_date = date.today() antes de insertar. Default True."
|
||||
output: "Numero entero de filas insertadas (0 si rows esta vacio)."
|
||||
tested: false
|
||||
tests: []
|
||||
test_file_path: ""
|
||||
file_path: "python/functions/infra/pg_insert_rows.py"
|
||||
---
|
||||
|
||||
## Ejemplo
|
||||
|
||||
```python
|
||||
import sys, os
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "python", "functions"))
|
||||
from infra.pg_insert_rows import pg_insert_rows
|
||||
|
||||
dsn = "postgresql://scraper:secret@localhost:5432/captacion"
|
||||
rows = [
|
||||
{"name": "Cliente A", "city": "Madrid", "score": 87},
|
||||
{"name": "Cliente B", "city": "Sevilla"}, # sin score -> NULL
|
||||
]
|
||||
# snapshot_date = hoy se inyecta en cada fila automaticamente
|
||||
n = pg_insert_rows(dsn, "leads_raw", rows)
|
||||
print(f"insertadas {n} filas") # insertadas 2 filas
|
||||
```
|
||||
|
||||
## Cuando usarla
|
||||
|
||||
Cuando escribes datos scrapeados a Postgres en lote append-only y quieres la columna
|
||||
`snapshot_date` poblada sin codigo extra. Usala antes de cualquier dashboard/consulta de
|
||||
market-intel sobre el dato bruto. Cada llamada acumula una nueva foto historica.
|
||||
|
||||
## Gotchas
|
||||
|
||||
- La tabla debe existir antes de llamar — esta funcion NO crea schema (usa `pg_apply_sql` para eso).
|
||||
- Es append-only: NO hace upsert ni deduplica. Llamadas repetidas duplican filas (por diseno, para historico).
|
||||
- El esquema efectivo es la UNION de las claves de todas las filas; columnas ausentes en una fila se insertan como NULL. Si una clave no existe como columna en la tabla, Postgres lanza error y la transaccion entera hace rollback.
|
||||
- `add_snapshot_date=True` solo rellena filas que NO traen ya `snapshot_date`; si tu dict ya la incluye, se respeta.
|
||||
- Requiere `psycopg2` instalado en el venv (import perezoso: el modulo se importa sin la dependencia, pero la llamada falla con RuntimeError claro si falta).
|
||||
- Conexion nueva por llamada (sin pool). Para muchas inserciones pequenas en bucle, agrupa las filas en una sola llamada.
|
||||
@@ -0,0 +1,92 @@
|
||||
"""Append-only batch insert of dict rows into a PostgreSQL table via psycopg2."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import date
|
||||
|
||||
|
||||
def pg_insert_rows(
|
||||
dsn: str,
|
||||
table: str,
|
||||
rows: list[dict],
|
||||
add_snapshot_date: bool = True,
|
||||
) -> int:
|
||||
"""Insert rows (list of dicts) into a PostgreSQL table, append-only.
|
||||
|
||||
Columns are derived from the dict keys. If rows have heterogeneous schemas,
|
||||
the union of all keys is used and missing values are filled with None, so a
|
||||
single parameterized statement covers every row. Insertion uses
|
||||
psycopg2.extras.execute_values (no string formatting) to avoid SQL injection.
|
||||
|
||||
Args:
|
||||
dsn: Connection string, e.g. "postgresql://user:pass@host:port/dbname".
|
||||
table: Target table name (must already exist).
|
||||
rows: List of dicts; each dict is one row, keys are column names.
|
||||
add_snapshot_date: If True and a row lacks "snapshot_date", inject
|
||||
snapshot_date = date.today() before inserting.
|
||||
|
||||
Returns:
|
||||
Number of rows inserted.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the connection or the insert fails. The original
|
||||
psycopg2 exception is chained for debugging.
|
||||
"""
|
||||
if not rows:
|
||||
return 0
|
||||
|
||||
# psycopg2 is imported lazily so the module imports without the dependency
|
||||
# present (self-test / introspection) and fails clearly only when invoked.
|
||||
try:
|
||||
import psycopg2
|
||||
from psycopg2 import extras as pg_extras
|
||||
from psycopg2 import sql as pg_sql
|
||||
except ImportError as exc: # pragma: no cover - exercised only without dep
|
||||
raise RuntimeError(
|
||||
"psycopg2 is required for pg_insert_rows; install psycopg2-binary"
|
||||
) from exc
|
||||
|
||||
# Work on copies so we never mutate the caller's dicts.
|
||||
prepared: list[dict] = [dict(row) for row in rows]
|
||||
|
||||
if add_snapshot_date:
|
||||
today = date.today()
|
||||
for row in prepared:
|
||||
row.setdefault("snapshot_date", today)
|
||||
|
||||
# Stable union of columns across all rows (first-seen order).
|
||||
columns: list[str] = []
|
||||
seen: set[str] = set()
|
||||
for row in prepared:
|
||||
for key in row:
|
||||
if key not in seen:
|
||||
seen.add(key)
|
||||
columns.append(key)
|
||||
|
||||
if not columns:
|
||||
return 0
|
||||
|
||||
# Build the value tuples in column order, filling absent keys with None.
|
||||
values = [tuple(row.get(col) for col in columns) for row in prepared]
|
||||
|
||||
insert_stmt = pg_sql.SQL("INSERT INTO {table} ({cols}) VALUES %s").format(
|
||||
table=pg_sql.Identifier(table),
|
||||
cols=pg_sql.SQL(", ").join(pg_sql.Identifier(c) for c in columns),
|
||||
)
|
||||
|
||||
conn = None
|
||||
try:
|
||||
conn = psycopg2.connect(dsn)
|
||||
with conn.cursor() as cur:
|
||||
pg_extras.execute_values(cur, insert_stmt, values)
|
||||
conn.commit()
|
||||
return len(values)
|
||||
except Exception as exc:
|
||||
if conn is not None:
|
||||
conn.rollback()
|
||||
raise RuntimeError(
|
||||
f"pg_insert_rows failed inserting into {table!r}: {exc}"
|
||||
) from exc
|
||||
finally:
|
||||
if conn is not None:
|
||||
conn.close()
|
||||
Reference in New Issue
Block a user