feat: POST /api/push/dav-bulk — push masivo por disco + 1 commit (segundos vs minutos)

Vía rápida DB→Xandikos para operaciones masivas: genera todos los vCards de agenda desde
DuckDB a un tmpdir, rsync de golpe al working tree de la colección en magnus (excluyendo
.git/.xandikos), UN solo git commit, y 1 PROPFIND para capturar todos los etags en batch.
~0.5s vs ~6min del push HTTP (que hace N PUTs + N PROPFINDs + N commits). El push HTTP
push_all_dav se mantiene como fallback (y para CalDAV). Config DAV_BULK_SSH_HOST/REMOTE_DIR.
22 tests verdes.
This commit is contained in:
2026-06-13 11:11:32 +02:00
parent 27e9be1ab7
commit 058180ea1a
5 changed files with 535 additions and 1 deletions
+143
View File
@@ -700,3 +700,146 @@ def test_pull_dav_incremental_por_etag(client, cfg, monkeypatch):
assert by_uid["c-same"]["etag"] == '"e-same"'
assert by_uid["c-changed"]["fn"] == "Despues" # FN del vCard nuevo
assert by_uid["c-changed"]["etag"] == '"e-new"' # etag remoto persistido
# --- F5: push masivo por disco (1 rsync + 1 commit + 1 PROPFIND) ------------
def test_write_agenda_vcards_to_dir_nombres_y_sin_osint(client, cfg, tmp_path):
"""_write_agenda_vcards_to_dir escribe un .vcf por contacto, sin OSINT.
Parte LOCAL del push masivo por disco, testeable sin SSH: genera los .vcf en
un tmpdir con el nombre de recurso EXACTO del push HTTP (_safe_resource(uid)
+ '.vcf') y compone el vCard de agenda (con ADR de la persona enlazada, SIN
ninguna línea X-OSINT-*).
"""
from server import writes
client.post("/api/ingest/vault")
# Persona con campos OSINT + dirección, contacto enlazado por teléfono.
client.post(
"/api/person",
json={
"slug": "disco-osint",
"nombre": "Disco Osint",
"dni": "11111111H",
"sexo": "hombre",
"telefonos": ["+34 655 900 900"],
"direcciones": ["Av. Disco 7, Málaga"],
},
)
now = datetime.now(tz=timezone.utc)
with write_conn(cfg.db_path) as conn:
conn.execute(
"INSERT INTO contacts VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
[
"uid con espacios/raro", # fuerza saneo del nombre del recurso
"/enmanuel/contacts/addressbook/",
None,
"Disco Osint",
'["+34 655 900 900"]',
"[]",
"BEGIN:VCARD...",
None,
now,
],
)
client.post("/api/ingest/vault") # enlaza el contacto a la ficha
rows = client.post(
"/api/query",
json={
"sql": "SELECT uid, collection, fn, tels, emails, note_path "
"FROM contacts WHERE uid = 'uid con espacios/raro'"
},
).json()["rows"]
out_dir = str(tmp_path / "vcards_out")
os.makedirs(out_dir)
res = writes._write_agenda_vcards_to_dir(cfg, rows, out_dir)
assert res["written"] == 1
# Nombre de recurso = saneo del uid + .vcf (idéntico al push HTTP).
expected_name = writes._safe_resource("uid con espacios/raro") + ".vcf"
assert expected_name == "uid_con_espacios_raro.vcf"
assert list(res["by_resource"]) == [expected_name]
assert res["by_resource"][expected_name] == "uid con espacios/raro"
written_path = os.path.join(out_dir, expected_name)
assert os.path.exists(written_path)
content = open(written_path, encoding="utf-8").read()
# (a) Privacidad: sin OSINT.
assert "X-OSINT-" not in content
assert "11111111H" not in content
# (b) Agenda: la dirección de la persona enlazada SÍ viaja como ADR.
assert "ADR;TYPE=HOME:;;Av. Disco 7\\, Málaga;;;;" in content
def test_push_all_dav_bulk_flujo_mockeado(client, cfg, monkeypatch):
"""push_all_dav_bulk: genera .vcf, rsync+commit (mock) y persiste etags por uid.
Mockea rsync (subprocess), el commit remoto (HEAD before/after) y el PROPFIND
final, verificando: written = nº de contactos, committed True (HEAD cambió) y
que contacts.etag queda poblado con el etag del PROPFIND casado por uid.
"""
from server import writes
client.post("/api/ingest/vault")
coll = "/enmanuel/contacts/addressbook/"
now = datetime.now(tz=timezone.utc)
with write_conn(cfg.db_path) as conn:
for uid, fn in [("c-a", "Contacto A"), ("c-b", "Contacto B")]:
conn.execute(
"INSERT INTO contacts VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
[uid, coll, None, fn, "[]", "[]", "BEGIN:VCARD...", None, now],
)
# rsync + ssh (commit / rev-parse) mockeados a nivel de helper.
monkeypatch.setattr(
writes, "_rsync_vcards", lambda *a, **k: {"status": "ok", "stdout": "", "stderr": ""}
)
monkeypatch.setattr(
writes,
"_git_commit_remote",
lambda *a, **k: {
"status": "ok",
"head_before": "aaaa111",
"head_after": "bbbb222",
"committed": True,
},
)
monkeypatch.setattr(
writes, "pass_get_secret", lambda *_: {"status": "ok", "value": "pw"}
)
# PROPFIND: devuelve el etag de cada recurso por su nombre saneado.
def fake_list(base, user, pwd, collection, **kw):
return {
"status": "ok",
"http_status": 207,
"resources": [
{"href": coll + "c-a.vcf", "etag": '"etag-a"'},
{"href": coll + "c-b.vcf", "etag": '"etag-b"'},
],
}
monkeypatch.setattr(writes, "dav_list_resources", fake_list)
r = writes.push_all_dav_bulk(cfg)
assert r["status"] == "ok"
assert r["written"] == 2
assert r["rsynced"] is True
assert r["committed"] is True
assert r["head_before"] == "aaaa111"
assert r["head_after"] == "bbbb222"
assert r["etags_updated"] == 2
assert isinstance(r["elapsed_s"], float)
# Los etags del PROPFIND quedaron persistidos por uid (sync inverso fiable).
rows = client.post(
"/api/query",
json={"sql": "SELECT uid, etag FROM contacts ORDER BY uid"},
).json()["rows"]
by_uid = {row["uid"]: row["etag"] for row in rows}
assert by_uid["c-a"] == '"etag-a"'
assert by_uid["c-b"] == '"etag-b"'