Files
osint_db/server/writes.py
T
egutierrez 63f37257cd feat: DuckDB como fuente de verdad (multi-valor, ownership selectivo, escritura, libretas)
F1 — migraciones: 002_multivalue (persons +telefonos/emails/direcciones/extra_fm JSON,
backfill desde singulares con to_json) + 003_addressbooks (tabla addressbooks + seed
idempotente de la libreta por defecto). Conteos intactos (697/1065/98).

F2 — ingest_vault selectivo (anti-pisado): personas que ya existen en DB solo actualizan
note_path + extra_fm vía duckdb_upsert(update_cols=...), NO pisan los campos OWNED por la
DB; personas nuevas = bootstrap completo. _link_contacts enlaza por listas telefonos[]/
emails[] además del singular. ingest_dav itera todas las libretas de la tabla addressbooks.

F3 — escritura estructurada (server/writes.py + endpoints en main.py): CRUD
/api/person|contact|event, /api/addressbook, /api/calendar, /api/person/{slug}/render
(DB→nota preservando la prosa del cuerpo), /api/push/dav (reconcilia DB→Xandikos). El push
DAV y el render ocurren fuera de la transacción de escritura para no bloquear la DB con
latencia de red. registry_bridge.py importa las funciones nuevas; app.md actualizado.

Verificado: 18 tests verdes; ownership probado sobre datos reales (un centinela DB-owned
sobrevivió a POST /api/ingest/vault sobre las 697 fichas); person CRUD + materialización
de la ficha .md en vivo, con cleanup sin residuo.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-13 00:44:02 +02:00

679 lines
22 KiB
Python

"""Escritura estructurada del service osint_db (DB como fuente de verdad).
Estos helpers implementan los endpoints de escritura de /api/person,
/api/contact, /api/event, /api/addressbook, /api/calendar y /api/push/dav. El
patrón común:
1. Se escribe en la DB DuckDB bajo el lock single-writer del service.
2. El push a Xandikos (CardDAV/CalDAV) y el render DB->nota se hacen DESPUÉS
de cerrar la transacción, para no bloquear la DB con la latencia de red.
persons es dueña de sus campos estructurados (multi-valor): los singulares
telefono/email/direccion se rellenan con el primer elemento de cada lista al
materializar la ficha, y la nota Markdown se reescribe SIN tocar su prosa
(update_obsidian_note con set_frontmatter hace merge del frontmatter y conserva
el body).
"""
from __future__ import annotations
import json
import os
from datetime import datetime, timezone
from .config import Config
from .registry_bridge import (
build_vcard,
caldav_put_event,
carddav_put_vcard,
create_obsidian_note,
dav_delete_resource,
dav_make_addressbook,
dav_make_calendar,
duckdb_execute,
duckdb_query_readonly,
duckdb_upsert,
pass_get_secret,
update_obsidian_note,
)
# Columnas de persons gobernadas por la API estructurada (sin slug, que es la
# clave, ni note_path/extra_fm que gestiona el ingest del vault).
_PERSON_API_COLS = (
"nombre",
"aliases",
"sexo",
"fecha_nacimiento",
"dni",
"pais",
"contexto",
"telefonos",
"emails",
"direcciones",
"tags",
)
def _now():
return datetime.now(tz=timezone.utc)
def _as_list(value) -> list:
"""Normaliza a lista de strings no vacíos (string suelto -> [string])."""
if value is None:
return []
seq = value if isinstance(value, list) else [value]
out = []
for v in seq:
s = str(v).strip()
if s:
out.append(s)
return out
def _json(value) -> str:
return json.dumps(value, ensure_ascii=False, default=str)
def _read_person(db_path: str, slug: str) -> dict | None:
"""Lee una ficha de persons como dict (o None si no existe)."""
res = duckdb_query_readonly(
db_path,
"SELECT slug, note_path, nombre, aliases, sexo, fecha_nacimiento, dni, "
"telefono, email, direccion, pais, contexto, fuente, dav_uid, tags, "
"telefonos, emails, direcciones, extra_fm FROM persons WHERE slug = ?",
[slug],
1,
)
if res.get("status") != "ok" or not res.get("rows"):
return None
return res["rows"][0]
def _decode_json_field(value) -> list:
"""Decodifica un campo JSON de la DB a lista (tolera None/str/list)."""
if value is None:
return []
if isinstance(value, list):
return value
try:
parsed = json.loads(value)
except (TypeError, ValueError):
return []
return parsed if isinstance(parsed, list) else [parsed]
def _decode_extra_fm(value) -> dict:
"""Decodifica extra_fm (objeto JSON de la DB) a dict (o {} si no aplica)."""
if value is None:
return {}
if isinstance(value, dict):
return value
try:
parsed = json.loads(value)
except (TypeError, ValueError):
return {}
return parsed if isinstance(parsed, dict) else {}
# ---------------------------------------------------------------------------
# persons
# ---------------------------------------------------------------------------
def upsert_person(cfg: Config, slug: str, fields: dict, *, render: bool = True) -> dict:
"""Crea/actualiza una persona multi-valor y materializa su nota.
Escribe los campos estructurados en la DB (la DB es dueña), rellena los
singulares con el primer elemento de cada lista, y tras cerrar la escritura
materializa la ficha DB->nota (frontmatter OWNED + merge de extra_fm) sin
tocar la prosa de la nota.
"""
slug = (slug or "").strip()
if not slug:
return {"status": "error", "error": "slug vacío"}
telefonos = _as_list(fields.get("telefonos"))
emails = _as_list(fields.get("emails"))
direcciones = _as_list(fields.get("direcciones"))
aliases = _as_list(fields.get("aliases"))
tags = _as_list(fields.get("tags"))
existing = _read_person(cfg.db_path, slug)
note_path = (existing.get("note_path") if existing else None) or os.path.join(
"personas", f"{slug}.md"
)
row = {
"slug": slug,
"note_path": note_path,
"nombre": (fields.get("nombre") or slug),
"aliases": _json(aliases),
"sexo": fields.get("sexo"),
"fecha_nacimiento": fields.get("fecha_nacimiento"),
"dni": fields.get("dni"),
"telefono": telefonos[0] if telefonos else None,
"email": emails[0] if emails else None,
"direccion": direcciones[0] if direcciones else None,
"pais": fields.get("pais"),
"contexto": fields.get("contexto"),
"tags": _json(tags),
"telefonos": _json(telefonos),
"emails": _json(emails),
"direcciones": _json(direcciones),
"updated_at": _now(),
}
# update_cols = todo lo que la API gobierna (no pisa fuente/dav_uid/extra_fm
# que pertenecen al ingest del vault).
update_cols = [c for c in row if c not in ("slug",)]
res = duckdb_upsert(
cfg.db_path, "persons", [row], key_cols=["slug"], update_cols=update_cols
)
if res.get("status") != "ok":
return {"status": "error", "error": res.get("error")}
materialized = False
if render:
r = render_person(cfg, slug)
materialized = r.get("status") == "ok"
return {
"status": "ok",
"slug": slug,
"inserted": res.get("inserted", 0),
"updated": res.get("updated", 0),
"note_path": note_path,
"materialized": materialized,
}
def delete_person(cfg: Config, slug: str) -> dict:
"""Borra una ficha de persons de la DB (no borra la nota del vault)."""
slug = (slug or "").strip()
if not slug:
return {"status": "error", "error": "slug vacío"}
res = duckdb_execute(cfg.db_path, "DELETE FROM persons WHERE slug = ?", [slug])
if res.get("status") != "ok":
return {"status": "error", "error": res.get("error")}
return {"status": "ok", "slug": slug, "deleted": res.get("rowcount", 0)}
def render_person(cfg: Config, slug: str) -> dict:
"""Materializa una ficha DB->nota: frontmatter OWNED + extra_fm, sin prosa.
Lee la fila de persons, compone el frontmatter (campos OWNED como listas +
merge de extra_fm) y lo escribe en la nota con update_obsidian_note (que
conserva el body). Si la nota no existe la crea con un body mínimo.
"""
slug = (slug or "").strip()
person = _read_person(cfg.db_path, slug)
if person is None:
return {"status": "error", "error": f"persona desconocida: {slug!r}"}
rel = person.get("note_path") or os.path.join("personas", f"{slug}.md")
if not rel.endswith(".md"):
rel = rel + ".md"
abs_path = os.path.abspath(os.path.join(cfg.vault_dir, rel))
vault_real = os.path.realpath(cfg.vault_dir)
if not os.path.realpath(abs_path).startswith(vault_real + os.sep):
return {"status": "error", "error": f"note_path fuera del vault: {rel!r}"}
telefonos = _decode_json_field(person.get("telefonos"))
emails = _decode_json_field(person.get("emails"))
direcciones = _decode_json_field(person.get("direcciones"))
aliases = _decode_json_field(person.get("aliases"))
tags = _decode_json_field(person.get("tags"))
frontmatter = {
"tipo": "persona",
"slug": slug,
"nombre": person.get("nombre") or slug,
"aliases": aliases,
"sexo": person.get("sexo"),
"fecha_nacimiento": person.get("fecha_nacimiento"),
"dni": person.get("dni"),
"telefonos": telefonos,
"emails": emails,
"direcciones": direcciones,
# singulares por compatibilidad con consumidores que aún los leen.
"telefono": telefonos[0] if telefonos else None,
"email": emails[0] if emails else None,
"direccion": direcciones[0] if direcciones else None,
"pais": person.get("pais"),
"contexto": person.get("contexto"),
"fuente": person.get("fuente"),
"tags": tags,
}
# Merge del frontmatter no-owned capturado del vault (no pisa las claves
# OWNED de arriba). extra_fm es un objeto JSON (dict) en la DB.
extra = _decode_extra_fm(person.get("extra_fm"))
if extra:
merged = dict(extra)
merged.update(frontmatter)
frontmatter = merged
try:
if os.path.exists(abs_path):
# set_frontmatter hace merge y NO toca el body (prosa preservada).
update_obsidian_note(abs_path, set_frontmatter=frontmatter)
else:
create_obsidian_note(
cfg.vault_dir,
rel,
body="## Notas\n",
frontmatter=frontmatter,
)
except Exception as e: # noqa: BLE001
return {"status": "error", "error": str(e)}
return {"status": "ok", "slug": slug, "note_path": rel}
# ---------------------------------------------------------------------------
# contacts (DB -> Xandikos)
# ---------------------------------------------------------------------------
def _resolve_password(cfg: Config) -> tuple:
"""Resuelve la password de Xandikos desde pass. Devuelve (pwd|None, error|None)."""
secret = pass_get_secret(cfg.pass_secret)
if secret.get("status") != "ok":
return None, (
f"pass no devolvió el secreto {cfg.pass_secret!r}: {secret.get('error')}"
)
return secret["value"], None
def _default_collection(cfg: Config) -> str:
return cfg.dav_contacts_collection
def upsert_contact(cfg: Config, uid: str, fields: dict) -> dict:
"""Crea/actualiza un contacto en la DB y lo empuja a Xandikos (PUT vCard).
La escritura DB se hace bajo el lock; el push DAV ocurre después.
"""
uid = (uid or "").strip()
if not uid:
return {"status": "error", "error": "uid vacío"}
tels = _as_list(fields.get("tels") or fields.get("telefonos"))
emails = _as_list(fields.get("emails") or fields.get("correos"))
fn = fields.get("fn") or fields.get("nombre")
collection = fields.get("collection") or _default_collection(cfg)
vcard = build_vcard(
{
"uid": uid,
"fn": fn,
"tels": tels,
"emails": emails,
"adrs": _as_list(fields.get("direcciones") or fields.get("adrs")),
}
)
row = {
"uid": uid,
"collection": collection,
"etag": None,
"fn": fn,
"tels": _json(tels),
"emails": _json(emails),
"raw": vcard,
"note_path": None,
"updated_at": _now(),
}
res = duckdb_upsert(
cfg.db_path,
"contacts",
[row],
key_cols=["uid"],
update_cols=["collection", "fn", "tels", "emails", "raw", "updated_at"],
)
if res.get("status") != "ok":
return {"status": "error", "error": res.get("error")}
# Push DB -> Xandikos fuera de cualquier transacción de la DB.
pwd, err = _resolve_password(cfg)
pushed = None
if err is None:
push = carddav_put_vcard(
cfg.dav_base, cfg.dav_user, pwd, collection, uid, vcard
)
pushed = push.get("status") == "ok"
return {
"status": "ok",
"uid": uid,
"inserted": res.get("inserted", 0),
"updated": res.get("updated", 0),
"pushed": pushed,
"push_error": err,
}
def delete_contact(cfg: Config, uid: str) -> dict:
"""Borra un contacto de la DB y del servidor Xandikos (DELETE del recurso)."""
uid = (uid or "").strip()
if not uid:
return {"status": "error", "error": "uid vacío"}
person = duckdb_query_readonly(
cfg.db_path, "SELECT collection FROM contacts WHERE uid = ?", [uid], 1
)
collection = _default_collection(cfg)
if person.get("status") == "ok" and person.get("rows"):
collection = person["rows"][0].get("collection") or collection
res = duckdb_execute(cfg.db_path, "DELETE FROM contacts WHERE uid = ?", [uid])
if res.get("status") != "ok":
return {"status": "error", "error": res.get("error")}
# Borrado remoto del recurso .vcf (DESTRUCTIVO, explícito por el endpoint).
pwd, err = _resolve_password(cfg)
deleted_remote = None
if err is None:
resource = collection.rstrip("/") + "/" + _safe_resource(uid) + ".vcf"
rm = dav_delete_resource(cfg.dav_base, cfg.dav_user, pwd, resource)
deleted_remote = rm.get("status") == "ok"
return {
"status": "ok",
"uid": uid,
"deleted": res.get("rowcount", 0),
"deleted_remote": deleted_remote,
"push_error": err,
}
def _safe_resource(uid: str) -> str:
"""Sanea un UID al mismo nombre de recurso que carddav_put_vcard/caldav_put_event."""
import re
return re.sub(r"[^A-Za-z0-9_.-]", "_", uid)[:120]
# ---------------------------------------------------------------------------
# events (DB -> Xandikos)
# ---------------------------------------------------------------------------
def _build_vcalendar(uid: str, fields: dict) -> str:
"""Compone un VCALENDAR mínimo con un VEVENT desde los campos del evento."""
dtstart = (fields.get("dtstart") or "").replace("-", "").replace(":", "")
dtend = (fields.get("dtend") or "").replace("-", "").replace(":", "")
lines = [
"BEGIN:VCALENDAR",
"VERSION:2.0",
"PRODID:-//osint_db//events//EN",
"BEGIN:VEVENT",
f"UID:{uid}",
f"SUMMARY:{fields.get('summary') or ''}",
]
if dtstart:
lines.append(f"DTSTART:{dtstart}")
if dtend:
lines.append(f"DTEND:{dtend}")
if fields.get("location"):
lines.append(f"LOCATION:{fields['location']}")
if fields.get("rrule"):
lines.append(f"RRULE:{fields['rrule']}")
lines += ["END:VEVENT", "END:VCALENDAR"]
return "\r\n".join(lines) + "\r\n"
def upsert_event(cfg: Config, uid: str, fields: dict) -> dict:
"""Crea/actualiza un evento en la DB y lo empuja a Xandikos (PUT VCALENDAR)."""
uid = (uid or "").strip()
if not uid:
return {"status": "error", "error": "uid vacío"}
calendar = fields.get("calendar") or "default"
raw = _build_vcalendar(uid, fields)
row = {
"uid": uid,
"calendar": calendar,
"etag": None,
"dtstart": fields.get("dtstart"),
"dtend": fields.get("dtend"),
"all_day": bool(fields.get("all_day")),
"summary": fields.get("summary"),
"location": fields.get("location"),
"rrule": fields.get("rrule"),
"raw": raw,
"updated_at": _now(),
}
res = duckdb_upsert(
cfg.db_path,
"events",
[row],
key_cols=["uid"],
update_cols=[
"calendar",
"dtstart",
"dtend",
"all_day",
"summary",
"location",
"rrule",
"raw",
"updated_at",
],
)
if res.get("status") != "ok":
return {"status": "error", "error": res.get("error")}
# El calendario CalDAV destino se resuelve por su path; usamos el calendar
# home + slug del calendario. Push fuera de transacción.
pwd, err = _resolve_password(cfg)
pushed = None
if err is None:
collection = cfg.dav_calendar_home.rstrip("/") + "/" + calendar + "/"
push = caldav_put_event(
cfg.dav_base, cfg.dav_user, pwd, collection, uid, raw
)
pushed = push.get("status") == "ok"
return {
"status": "ok",
"uid": uid,
"inserted": res.get("inserted", 0),
"updated": res.get("updated", 0),
"pushed": pushed,
"push_error": err,
}
def delete_event(cfg: Config, uid: str) -> dict:
"""Borra un evento de la DB y del servidor Xandikos."""
uid = (uid or "").strip()
if not uid:
return {"status": "error", "error": "uid vacío"}
row = duckdb_query_readonly(
cfg.db_path, "SELECT calendar FROM events WHERE uid = ?", [uid], 1
)
calendar = "default"
if row.get("status") == "ok" and row.get("rows"):
calendar = row["rows"][0].get("calendar") or calendar
res = duckdb_execute(cfg.db_path, "DELETE FROM events WHERE uid = ?", [uid])
if res.get("status") != "ok":
return {"status": "error", "error": res.get("error")}
pwd, err = _resolve_password(cfg)
deleted_remote = None
if err is None:
collection = cfg.dav_calendar_home.rstrip("/") + "/" + calendar + "/"
resource = collection + _safe_resource(uid) + ".ics"
rm = dav_delete_resource(cfg.dav_base, cfg.dav_user, pwd, resource)
deleted_remote = rm.get("status") == "ok"
return {
"status": "ok",
"uid": uid,
"deleted": res.get("rowcount", 0),
"deleted_remote": deleted_remote,
"push_error": err,
}
# ---------------------------------------------------------------------------
# addressbooks / calendars
# ---------------------------------------------------------------------------
def make_addressbook(cfg: Config, fields: dict) -> dict:
"""Crea una libreta CardDAV en Xandikos y la registra en la tabla addressbooks."""
slug = (fields.get("slug") or "").strip()
if not slug:
return {"status": "error", "error": "slug vacío"}
display_name = fields.get("display_name") or ""
description = fields.get("description") or ""
color = fields.get("color") or ""
pwd, err = _resolve_password(cfg)
if err is not None:
return {"status": "error", "error": err}
# contacts_home = el directorio padre de la colección por defecto
# (/enmanuel/contacts/addressbook/ -> /enmanuel/contacts/).
contacts_home = "/" + "/".join(
cfg.dav_contacts_collection.strip("/").split("/")[:-1]
)
if not contacts_home.endswith("/"):
contacts_home += "/"
mk = dav_make_addressbook(
cfg.dav_base,
cfg.dav_user,
pwd,
contacts_home,
slug,
display_name,
description,
)
if mk.get("status") != "ok":
return {"status": "error", "error": mk.get("error"), "http_status": mk.get("http_status")}
collection_path = mk.get("href") or (contacts_home + slug + "/")
row = {
"slug": slug,
"display_name": display_name or slug,
"collection_path": collection_path,
"description": description or None,
"color": color or None,
"created_at": _now(),
}
res = duckdb_upsert(
cfg.db_path,
"addressbooks",
[row],
key_cols=["slug"],
update_cols=["display_name", "collection_path", "description", "color"],
)
if res.get("status") != "ok":
return {"status": "error", "error": res.get("error")}
return {
"status": "ok",
"slug": slug,
"collection_path": collection_path,
"existed": mk.get("existed", False),
}
def make_calendar(cfg: Config, fields: dict) -> dict:
"""Crea un calendario CalDAV en Xandikos (paridad con make_addressbook)."""
slug = (fields.get("slug") or "").strip()
if not slug:
return {"status": "error", "error": "slug vacío"}
display_name = fields.get("display_name") or ""
color = fields.get("color") or ""
pwd, err = _resolve_password(cfg)
if err is not None:
return {"status": "error", "error": err}
mk = dav_make_calendar(
cfg.dav_base,
cfg.dav_user,
pwd,
cfg.dav_calendar_home,
slug,
display_name,
color,
)
if mk.get("status") != "ok":
return {"status": "error", "error": mk.get("error"), "http_status": mk.get("http_status")}
return {
"status": "ok",
"slug": slug,
"href": mk.get("href"),
"existed": mk.get("existed", False),
}
# ---------------------------------------------------------------------------
# push masivo DB -> Xandikos
# ---------------------------------------------------------------------------
def push_all_dav(cfg: Config) -> dict:
"""Reconcilia en bloque: empuja todos los contacts y events de la DB a Xandikos.
Útil tras una migración para volcar lo que vive solo en la DB. Devuelve los
conteos de éxito/fallo por tipo. NO borra nada en remoto (solo PUT).
"""
pwd, err = _resolve_password(cfg)
if err is not None:
return {"status": "error", "error": err}
contacts = duckdb_query_readonly(
cfg.db_path,
"SELECT uid, collection, fn, tels, emails FROM contacts",
[],
1000000,
)
c_ok = c_fail = 0
if contacts.get("status") == "ok":
for row in contacts["rows"]:
uid = row["uid"]
collection = row.get("collection") or _default_collection(cfg)
vcard = build_vcard(
{
"uid": uid,
"fn": row.get("fn"),
"tels": _decode_json_field(row.get("tels")),
"emails": _decode_json_field(row.get("emails")),
}
)
push = carddav_put_vcard(
cfg.dav_base, cfg.dav_user, pwd, collection, uid, vcard
)
if push.get("status") == "ok":
c_ok += 1
else:
c_fail += 1
events = duckdb_query_readonly(
cfg.db_path, "SELECT uid, calendar, raw FROM events", [], 1000000
)
e_ok = e_fail = 0
if events.get("status") == "ok":
for row in events["rows"]:
uid = row["uid"]
calendar = row.get("calendar") or "default"
collection = cfg.dav_calendar_home.rstrip("/") + "/" + calendar + "/"
raw = row.get("raw") or _build_vcalendar(uid, {})
push = caldav_put_event(
cfg.dav_base, cfg.dav_user, pwd, collection, uid, raw
)
if push.get("status") == "ok":
e_ok += 1
else:
e_fail += 1
return {
"status": "ok",
"contacts_pushed": c_ok,
"contacts_failed": c_fail,
"events_pushed": e_ok,
"events_failed": e_fail,
}