#!/usr/bin/env python3 """Migrador de fichas de persona desde NotasDeObsidian al vault osint. Aplica el estandar de projects/osint/CONVENTIONS.md: personas/.md ficha normalizada personas//.md notas-documento attachments/personas//-N.ext binarios lugares/.md direcciones extraidas Compone funciones del registry (grupo obsidian): slugify_obsidian_name, extract_obsidian_embeds, resolve_obsidian_embed, read/create/delete_obsidian_note. Modo por defecto: MOVER (borra docs .md y attachments originales de NotasDeObsidian). Idempotente: re-ejecutar no duplica (overwrite por slug). """ import sys, os, re, shutil, glob sys.path.insert(0, "/home/enmanuel/fn_registry/python/functions") from obsidian import (read_obsidian_note, create_obsidian_note, delete_obsidian_note, slugify_obsidian_name, extract_obsidian_embeds, resolve_obsidian_embed, list_obsidian_notes) NOTAS = "/home/enmanuel/Obsidian/NotasDeObsidian" OSINT = "/home/enmanuel/Obsidian/osint" def doc_tipo(ds: str) -> str: if "dni" in ds: return "dni" if "certificado" in ds or "firma" in ds: return "certificado" if "carnet" in ds: return "carnet" if "foto" in ds: return "fotos" if "contrato" in ds: return "contrato" if "nomina" in ds or "laboral" in ds: return "laboral" if any(b in ds for b in ["abanca", "bbva", "cajamar", "unicaja", "ebury", "ovh", "legalitas", "banco"]): return "banco" if any(x in ds for x in ["modelo", "autonomo", "tributaria", "empadron"]): return "fiscal" return "otro" # Preposiciones/artículos que sobran en un doc-slug tras quitar el nombre de la persona # (p.ej. "DNI de Maria" -> "dni-de" -> "dni"). STOPWORDS = {"de", "del", "la", "las", "el", "los", "y", "a", "en"} def doc_slug(title: str, ptok: set) -> str: parts = [p for p in slugify_obsidian_name(title).split("-") if p and p not in ptok and p not in STOPWORDS] return "-".join(parts) or "documento" def known_person_slugs() -> set: """Slugs de todas las personas conocidas (fichas ya en osint/personas).""" slugs = set() for p in list_obsidian_notes(OSINT, subfolder="personas"): base = os.path.splitext(os.path.basename(p))[0] if base.startswith("_"): continue slugs.add(slugify_obsidian_name(base)) return slugs def migrate_person(ficha_path, known_slugs, att_to_delete, docs_to_delete, move=True): f = read_obsidian_note(ficha_path) nombre = os.path.splitext(os.path.basename(ficha_path))[0] slug = slugify_obsidian_name(nombre) ptok = set(slug.split("-")) att_rel = f"attachments/personas/{slug}" att_abs = f"{OSINT}/{att_rel}" os.makedirs(att_abs, exist_ok=True) # direccion -> lugar diru = f["frontmatter"].get("direccion") or "" dirtxt = re.sub(r'^\[\[|\]\]$', '', str(diru)).strip() lugar_link = "" if dirtxt: lslug = slugify_obsidian_name(dirtxt) create_obsidian_note(OSINT, f"lugares/{lslug}", body=f"Direccion vinculada a [[{slug}|{nombre}]].", frontmatter={"tipo": "lugar", "nombre": dirtxt, "slug": lslug, "tags": ["lugar", "osint"]}, overwrite=True) lugar_link = f"[[lugares/{lslug}|{dirtxt}]]" docs_done, rels, missing = [], [], [] for w in f["wikilinks"]: wslug = slugify_obsidian_name(w) if wslug in known_slugs and wslug != slug: rels.append((wslug, w)); continue p = resolve_obsidian_embed(NOTAS, w if w.lower().endswith(".md") else w + ".md") if not p: missing.append(w); continue dn = read_obsidian_note(p) if str(dn["frontmatter"].get("tipo", "")).lower() == "persona": rels.append((wslug, w)); continue ds = doc_slug(w, ptok) embeds = extract_obsidian_embeds(dn["body"]) new, n_ok = [], 0 for i, emb in enumerate(embeds, 1): ap = resolve_obsidian_embed(NOTAS, emb) if not ap: new.append(f""); continue ext = os.path.splitext(ap)[1].lower() nn = f"{ds}-{i}{ext}" shutil.copy2(ap, f"{att_abs}/{nn}") if move: att_to_delete.add(ap) new.append(f"![[{att_rel}/{nn}]]") n_ok += 1 create_obsidian_note(OSINT, f"personas/{slug}/{ds}", body="\n".join(new) if new else "(sin attachments)", frontmatter={"tipo": "documento", "doc_tipo": doc_tipo(ds), "persona": f"[[{slug}]]", "fuente": "NotasDeObsidian/" + os.path.relpath(p, NOTAS)}, overwrite=True) docs_done.append((w, ds, n_ok)) if move: docs_to_delete.add(p) # ficha normalizada fm = dict(f["frontmatter"]) fm["nombre"], fm["slug"], fm["aliases"] = nombre, slug, [nombre] fm["tags"] = ["persona", "osint"] if dirtxt: fm["direccion"] = dirtxt bl = ["## Documentos", ""] + [f"- [[personas/{slug}/{ds}|{w}]]" for w, ds, _ in docs_done] bl += ["", "## Relaciones", ""] + [f"- [[{rs}|{rn}]]" for rs, rn in rels] if lugar_link: bl += ["", "## Lugares", "", f"- {lugar_link}"] bl += ["", "## Notas", ""] create_obsidian_note(OSINT, f"personas/{slug}", body="\n".join(bl), frontmatter=fm, overwrite=True) # borrar ficha plana vieja en osint (nombre con espacios) si el slug difiere if os.path.abspath(ficha_path) != os.path.abspath(f"{OSINT}/personas/{slug}.md") \ and "/backups/" not in ficha_path: delete_obsidian_note(ficha_path) return dict(slug=slug, docs=len(docs_done), rels=len(rels), missing=missing, att=sum(n for *_, n in docs_done)) def cleanup_enmanuel_originals(att_to_delete): """Enmanuel se migro en el piloto SIN mover originales (solo copia). Aqui se borran sus docs originales (via campo `fuente` de cada doc en osint) y se marcan sus attachments.""" enm = "enmanuel-gutierrez-perez" docs = list_obsidian_notes(OSINT, subfolder=f"personas/{enm}") removed = 0 for docp in docs: src = read_obsidian_note(docp)["frontmatter"].get("fuente", "") if not src.startswith("NotasDeObsidian/"): continue orig = NOTAS + "/" + src[len("NotasDeObsidian/"):] if os.path.exists(orig): for emb in extract_obsidian_embeds(read_obsidian_note(orig)["body"]): ap = resolve_obsidian_embed(NOTAS, emb) if ap: att_to_delete.add(ap) os.remove(orig) removed += 1 return removed def main(): known = known_person_slugs() att_del, docs_del = set(), set() # 1. Enmanuel: limpiar originales que el piloto dejo en NotasDeObsidian enm_removed = cleanup_enmanuel_originals(att_del) # 2. Fichas pendientes: las que siguen planas en osint/personas (nombre != slug) pend = [] for p in list_obsidian_notes(OSINT, subfolder="personas"): base = os.path.splitext(os.path.basename(p))[0] if base.startswith("_"): continue if slugify_obsidian_name(base) != base: # plana sin migrar pend.append(p) results = [] for fp in pend: results.append(migrate_person(fp, known, att_del, docs_del, move=True)) # 3. Aplicar borrados (mover): docs .md y attachments originales for p in docs_del: try: os.remove(p) except FileNotFoundError: pass moved_att = 0 for ap in att_del: try: os.remove(ap); moved_att += 1 except FileNotFoundError: pass # 4. Reporte print(f"enmanuel: {enm_removed} docs originales borrados") print(f"personas migradas en este batch: {len(results)}") print(f"docs .md movidos: {len(docs_del)} | attachments movidos: {moved_att}") total_missing = [] for r in results: tag = f"{r['slug']}: docs={r['docs']} rels={r['rels']} att={r['att']}" if r["missing"]: tag += f" | missing={r['missing']}" total_missing += [(r['slug'], m) for m in r['missing']] print(" " + tag) if total_missing: print(f"\nlinks sin archivo (placeholders), total {len(total_missing)}:") for s, m in total_missing: print(f" {s} -> {m!r}") if __name__ == "__main__": main()