84 lines
3.1 KiB
Python
84 lines
3.1 KiB
Python
"""Reconstrucción de las tablas derivadas (schema derived).
|
|
|
|
Regla dura: las derivadas contienen SOLO datos computados — ninguna lleva
|
|
columna que referencie notas (note_path prohibido aquí). Se reconstruyen
|
|
completas (DROP + CREATE) en cada ingest, así que su contenido siempre refleja
|
|
el último estado de las maestras.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
|
|
# Nombres de las derivadas que este módulo gestiona (para reportar en ingest).
|
|
DERIVED_TABLES = ("person_stats", "event_monthly", "contact_link_quality")
|
|
|
|
|
|
def rebuild_derived(conn) -> list:
|
|
"""Reconstruye todas las tablas derivadas sobre la conexión de escritura.
|
|
|
|
Devuelve la lista de nombres cualificados de las tablas reconstruidas.
|
|
"""
|
|
_rebuild_person_stats(conn)
|
|
_rebuild_event_monthly(conn)
|
|
_rebuild_contact_link_quality(conn)
|
|
return [f"derived.{name}" for name in DERIVED_TABLES]
|
|
|
|
|
|
def _rebuild_person_stats(conn) -> None:
|
|
"""Agregados de persons por contexto, pais y tag (sin note_path).
|
|
|
|
Una fila por (dimension, valor) con el conteo de personas. Los tags se
|
|
expanden en Python desde el JSON de la columna tags para no depender de
|
|
funciones JSON del motor.
|
|
"""
|
|
rows = conn.execute("SELECT contexto, pais, tags FROM persons").fetchall()
|
|
counts: dict = {}
|
|
for contexto, pais, tags_json in rows:
|
|
counts[("contexto", contexto or "(sin)")] = (
|
|
counts.get(("contexto", contexto or "(sin)"), 0) + 1
|
|
)
|
|
counts[("pais", pais or "(sin)")] = counts.get(("pais", pais or "(sin)"), 0) + 1
|
|
try:
|
|
tags = json.loads(tags_json) if tags_json else []
|
|
except (TypeError, ValueError):
|
|
tags = []
|
|
if not isinstance(tags, list):
|
|
tags = [tags]
|
|
for tag in tags:
|
|
key = ("tag", str(tag))
|
|
counts[key] = counts.get(key, 0) + 1
|
|
|
|
conn.execute("DROP TABLE IF EXISTS derived.person_stats")
|
|
conn.execute(
|
|
"CREATE TABLE derived.person_stats (dimension TEXT, valor TEXT, n BIGINT)"
|
|
)
|
|
payload = [[dim, val, n] for (dim, val), n in sorted(counts.items())]
|
|
if payload:
|
|
conn.executemany(
|
|
"INSERT INTO derived.person_stats VALUES (?, ?, ?)", payload
|
|
)
|
|
|
|
|
|
def _rebuild_event_monthly(conn) -> None:
|
|
"""Conteo de eventos por calendario y mes (sin note_path)."""
|
|
conn.execute("DROP TABLE IF EXISTS derived.event_monthly")
|
|
conn.execute(
|
|
"CREATE TABLE derived.event_monthly AS "
|
|
"SELECT calendar, substr(dtstart, 1, 7) AS month, COUNT(*) AS n "
|
|
"FROM events WHERE dtstart IS NOT NULL "
|
|
"GROUP BY calendar, substr(dtstart, 1, 7) ORDER BY calendar, month"
|
|
)
|
|
|
|
|
|
def _rebuild_contact_link_quality(conn) -> None:
|
|
"""Calidad del enlace contacts -> persons: solo números, sin paths."""
|
|
conn.execute("DROP TABLE IF EXISTS derived.contact_link_quality")
|
|
conn.execute(
|
|
"CREATE TABLE derived.contact_link_quality AS "
|
|
"SELECT COUNT(*) AS total, "
|
|
"COUNT(*) FILTER (WHERE note_path IS NOT NULL) AS linked, "
|
|
"COUNT(*) FILTER (WHERE note_path IS NULL) AS unlinked "
|
|
"FROM contacts"
|
|
)
|