211 lines
8.5 KiB
Python
211 lines
8.5 KiB
Python
#!/usr/bin/env python3
|
|
"""Migrador de fichas de persona desde NotasDeObsidian al vault osint.
|
|
|
|
Aplica el estandar de projects/osint/CONVENTIONS.md:
|
|
personas/<slug>.md ficha normalizada
|
|
personas/<slug>/<doc-slug>.md notas-documento
|
|
attachments/personas/<slug>/<doc-slug>-N.ext binarios
|
|
lugares/<slug>.md direcciones extraidas
|
|
|
|
Compone funciones del registry (grupo obsidian): slugify_obsidian_name,
|
|
extract_obsidian_embeds, resolve_obsidian_embed, read/create/delete_obsidian_note.
|
|
|
|
Modo por defecto: MOVER (borra docs .md y attachments originales de NotasDeObsidian).
|
|
Idempotente: re-ejecutar no duplica (overwrite por slug).
|
|
"""
|
|
import sys, os, re, shutil, glob
|
|
|
|
sys.path.insert(0, "/home/enmanuel/fn_registry/python/functions")
|
|
from obsidian import (read_obsidian_note, create_obsidian_note, delete_obsidian_note,
|
|
slugify_obsidian_name, extract_obsidian_embeds, resolve_obsidian_embed,
|
|
list_obsidian_notes)
|
|
|
|
NOTAS = "/home/enmanuel/Obsidian/NotasDeObsidian"
|
|
OSINT = "/home/enmanuel/Obsidian/osint"
|
|
|
|
|
|
def doc_tipo(ds: str) -> str:
|
|
if "dni" in ds: return "dni"
|
|
if "certificado" in ds or "firma" in ds: return "certificado"
|
|
if "carnet" in ds: return "carnet"
|
|
if "foto" in ds: return "fotos"
|
|
if "contrato" in ds: return "contrato"
|
|
if "nomina" in ds or "laboral" in ds: return "laboral"
|
|
if any(b in ds for b in ["abanca", "bbva", "cajamar", "unicaja", "ebury", "ovh", "legalitas", "banco"]):
|
|
return "banco"
|
|
if any(x in ds for x in ["modelo", "autonomo", "tributaria", "empadron"]): return "fiscal"
|
|
return "otro"
|
|
|
|
|
|
# Preposiciones/artículos que sobran en un doc-slug tras quitar el nombre de la persona
|
|
# (p.ej. "DNI de Maria" -> "dni-de" -> "dni").
|
|
STOPWORDS = {"de", "del", "la", "las", "el", "los", "y", "a", "en"}
|
|
|
|
|
|
def doc_slug(title: str, ptok: set) -> str:
|
|
parts = [p for p in slugify_obsidian_name(title).split("-")
|
|
if p and p not in ptok and p not in STOPWORDS]
|
|
return "-".join(parts) or "documento"
|
|
|
|
|
|
def known_person_slugs() -> set:
|
|
"""Slugs de todas las personas conocidas (fichas ya en osint/personas)."""
|
|
slugs = set()
|
|
for p in list_obsidian_notes(OSINT, subfolder="personas"):
|
|
base = os.path.splitext(os.path.basename(p))[0]
|
|
if base.startswith("_"):
|
|
continue
|
|
slugs.add(slugify_obsidian_name(base))
|
|
return slugs
|
|
|
|
|
|
def migrate_person(ficha_path, known_slugs, att_to_delete, docs_to_delete, move=True):
|
|
f = read_obsidian_note(ficha_path)
|
|
nombre = os.path.splitext(os.path.basename(ficha_path))[0]
|
|
slug = slugify_obsidian_name(nombre)
|
|
ptok = set(slug.split("-"))
|
|
att_rel = f"attachments/personas/{slug}"
|
|
att_abs = f"{OSINT}/{att_rel}"
|
|
os.makedirs(att_abs, exist_ok=True)
|
|
|
|
# direccion -> lugar
|
|
diru = f["frontmatter"].get("direccion") or ""
|
|
dirtxt = re.sub(r'^\[\[|\]\]$', '', str(diru)).strip()
|
|
lugar_link = ""
|
|
if dirtxt:
|
|
lslug = slugify_obsidian_name(dirtxt)
|
|
create_obsidian_note(OSINT, f"lugares/{lslug}",
|
|
body=f"Direccion vinculada a [[{slug}|{nombre}]].",
|
|
frontmatter={"tipo": "lugar", "nombre": dirtxt, "slug": lslug,
|
|
"tags": ["lugar", "osint"]}, overwrite=True)
|
|
lugar_link = f"[[lugares/{lslug}|{dirtxt}]]"
|
|
|
|
docs_done, rels, missing = [], [], []
|
|
for w in f["wikilinks"]:
|
|
wslug = slugify_obsidian_name(w)
|
|
if wslug in known_slugs and wslug != slug:
|
|
rels.append((wslug, w)); continue
|
|
p = resolve_obsidian_embed(NOTAS, w if w.lower().endswith(".md") else w + ".md")
|
|
if not p:
|
|
missing.append(w); continue
|
|
dn = read_obsidian_note(p)
|
|
if str(dn["frontmatter"].get("tipo", "")).lower() == "persona":
|
|
rels.append((wslug, w)); continue
|
|
ds = doc_slug(w, ptok)
|
|
embeds = extract_obsidian_embeds(dn["body"])
|
|
new, n_ok = [], 0
|
|
for i, emb in enumerate(embeds, 1):
|
|
ap = resolve_obsidian_embed(NOTAS, emb)
|
|
if not ap:
|
|
new.append(f"<!-- attachment no encontrado: {emb} -->"); continue
|
|
ext = os.path.splitext(ap)[1].lower()
|
|
nn = f"{ds}-{i}{ext}"
|
|
shutil.copy2(ap, f"{att_abs}/{nn}")
|
|
if move:
|
|
att_to_delete.add(ap)
|
|
new.append(f"![[{att_rel}/{nn}]]")
|
|
n_ok += 1
|
|
create_obsidian_note(OSINT, f"personas/{slug}/{ds}",
|
|
body="\n".join(new) if new else "(sin attachments)",
|
|
frontmatter={"tipo": "documento", "doc_tipo": doc_tipo(ds),
|
|
"persona": f"[[{slug}]]",
|
|
"fuente": "NotasDeObsidian/" + os.path.relpath(p, NOTAS)},
|
|
overwrite=True)
|
|
docs_done.append((w, ds, n_ok))
|
|
if move:
|
|
docs_to_delete.add(p)
|
|
|
|
# ficha normalizada
|
|
fm = dict(f["frontmatter"])
|
|
fm["nombre"], fm["slug"], fm["aliases"] = nombre, slug, [nombre]
|
|
fm["tags"] = ["persona", "osint"]
|
|
if dirtxt:
|
|
fm["direccion"] = dirtxt
|
|
bl = ["## Documentos", ""] + [f"- [[personas/{slug}/{ds}|{w}]]" for w, ds, _ in docs_done]
|
|
bl += ["", "## Relaciones", ""] + [f"- [[{rs}|{rn}]]" for rs, rn in rels]
|
|
if lugar_link:
|
|
bl += ["", "## Lugares", "", f"- {lugar_link}"]
|
|
bl += ["", "## Notas", ""]
|
|
create_obsidian_note(OSINT, f"personas/{slug}", body="\n".join(bl), frontmatter=fm, overwrite=True)
|
|
|
|
# borrar ficha plana vieja en osint (nombre con espacios) si el slug difiere
|
|
if os.path.abspath(ficha_path) != os.path.abspath(f"{OSINT}/personas/{slug}.md") \
|
|
and "/backups/" not in ficha_path:
|
|
delete_obsidian_note(ficha_path)
|
|
|
|
return dict(slug=slug, docs=len(docs_done), rels=len(rels), missing=missing,
|
|
att=sum(n for *_, n in docs_done))
|
|
|
|
|
|
def cleanup_enmanuel_originals(att_to_delete):
|
|
"""Enmanuel se migro en el piloto SIN mover originales (solo copia). Aqui se borran
|
|
sus docs originales (via campo `fuente` de cada doc en osint) y se marcan sus attachments."""
|
|
enm = "enmanuel-gutierrez-perez"
|
|
docs = list_obsidian_notes(OSINT, subfolder=f"personas/{enm}")
|
|
removed = 0
|
|
for docp in docs:
|
|
src = read_obsidian_note(docp)["frontmatter"].get("fuente", "")
|
|
if not src.startswith("NotasDeObsidian/"):
|
|
continue
|
|
orig = NOTAS + "/" + src[len("NotasDeObsidian/"):]
|
|
if os.path.exists(orig):
|
|
for emb in extract_obsidian_embeds(read_obsidian_note(orig)["body"]):
|
|
ap = resolve_obsidian_embed(NOTAS, emb)
|
|
if ap:
|
|
att_to_delete.add(ap)
|
|
os.remove(orig)
|
|
removed += 1
|
|
return removed
|
|
|
|
|
|
def main():
|
|
known = known_person_slugs()
|
|
att_del, docs_del = set(), set()
|
|
|
|
# 1. Enmanuel: limpiar originales que el piloto dejo en NotasDeObsidian
|
|
enm_removed = cleanup_enmanuel_originals(att_del)
|
|
|
|
# 2. Fichas pendientes: las que siguen planas en osint/personas (nombre != slug)
|
|
pend = []
|
|
for p in list_obsidian_notes(OSINT, subfolder="personas"):
|
|
base = os.path.splitext(os.path.basename(p))[0]
|
|
if base.startswith("_"):
|
|
continue
|
|
if slugify_obsidian_name(base) != base: # plana sin migrar
|
|
pend.append(p)
|
|
|
|
results = []
|
|
for fp in pend:
|
|
results.append(migrate_person(fp, known, att_del, docs_del, move=True))
|
|
|
|
# 3. Aplicar borrados (mover): docs .md y attachments originales
|
|
for p in docs_del:
|
|
try: os.remove(p)
|
|
except FileNotFoundError: pass
|
|
moved_att = 0
|
|
for ap in att_del:
|
|
try:
|
|
os.remove(ap); moved_att += 1
|
|
except FileNotFoundError:
|
|
pass
|
|
|
|
# 4. Reporte
|
|
print(f"enmanuel: {enm_removed} docs originales borrados")
|
|
print(f"personas migradas en este batch: {len(results)}")
|
|
print(f"docs .md movidos: {len(docs_del)} | attachments movidos: {moved_att}")
|
|
total_missing = []
|
|
for r in results:
|
|
tag = f"{r['slug']}: docs={r['docs']} rels={r['rels']} att={r['att']}"
|
|
if r["missing"]:
|
|
tag += f" | missing={r['missing']}"
|
|
total_missing += [(r['slug'], m) for m in r['missing']]
|
|
print(" " + tag)
|
|
if total_missing:
|
|
print(f"\nlinks sin archivo (placeholders), total {len(total_missing)}:")
|
|
for s, m in total_missing:
|
|
print(f" {s} -> {m!r}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|