chore: sync from fn-registry agent

This commit is contained in:
fn-registry agent
2026-06-10 11:43:45 +02:00
commit 7c475be581
9 changed files with 519 additions and 0 deletions
+130
View File
@@ -0,0 +1,130 @@
#!/usr/bin/env python3
"""Migrador de organizaciones (empresas) desde NotasDeObsidian al vault osint.
A diferencia de las personas (que parten de una ficha con wikilinks), las organizaciones se
detectan recogiendo todas las notas cuyo titulo menciona el nombre de la empresa. La nota cuyo
titulo ES el nombre de la empresa actua como ficha; el resto son documentos.
Estructura resultante (estandar projects/osint/CONVENTIONS.md):
organizaciones/<slug>.md
organizaciones/<slug>/<doc-slug>.md
attachments/organizaciones/<slug>/<doc-slug>-N.ext
Modo MOVER (borra originales). Idempotente (overwrite por slug).
"""
import sys, os, re, shutil
sys.path.insert(0, "/home/enmanuel/fn_registry/python/functions")
from obsidian import (read_obsidian_note, create_obsidian_note,
slugify_obsidian_name, extract_obsidian_embeds, resolve_obsidian_embed,
list_obsidian_notes)
NOTAS = "/home/enmanuel/Obsidian/NotasDeObsidian"
OSINT = "/home/enmanuel/Obsidian/osint"
STOPWORDS = {"de", "del", "la", "las", "el", "los", "y", "a", "en"}
ORGS = [
{"display": "FenixFood SL", "slug": "fenixfood-sl",
"match": re.compile(r'fenix\s*food', re.I),
"nametok": {"fenixfood", "fenix", "food", "sl"}},
{"display": "BiorganicFood SL", "slug": "biorganicfood-sl",
"match": re.compile(r'biorganic', re.I),
"nametok": {"biorganicfood", "biorganic", "food", "sl"}},
]
def doc_slug(title, ntok):
parts = [p for p in slugify_obsidian_name(title).split("-")
if p and p not in ntok and p not in STOPWORDS]
return "-".join(parts) or "documento"
def migrate_org(org, all_notes, att_del, docs_del):
matched = [n for n in all_notes if org["match"].search(os.path.basename(n)[:-3])]
ficha_path = None
for n in matched:
t = os.path.basename(n)[:-3]
if slugify_obsidian_name(t) == org["slug"] or t.lower() == org["display"].lower():
ficha_path = n
break
slug, ntok = org["slug"], org["nametok"]
att_rel = f"attachments/organizaciones/{slug}"
att_abs = f"{OSINT}/{att_rel}"
os.makedirs(att_abs, exist_ok=True)
docs_done, ficha_extra = [], ""
for n in matched:
t = os.path.basename(n)[:-3]
if n == ficha_path:
ficha_extra = read_obsidian_note(n)["body"]
continue
dn = read_obsidian_note(n)
ds = doc_slug(t, ntok)
# evitar colision de doc-slug dentro de la misma org
base_ds = ds
k = 2
while os.path.exists(f"{OSINT}/organizaciones/{slug}/{ds}.md"):
ds = f"{base_ds}-{k}"; k += 1
embeds = extract_obsidian_embeds(dn["body"])
new = []
for i, emb in enumerate(embeds, 1):
ap = resolve_obsidian_embed(NOTAS, emb)
if not ap:
new.append(f"<!-- attachment no encontrado: {emb} -->"); continue
ext = os.path.splitext(ap)[1].lower()
nn = f"{ds}-{i}{ext}"
shutil.copy2(ap, f"{att_abs}/{nn}"); att_del.add(ap)
new.append(f"![[{att_rel}/{nn}]]")
# conservar texto del doc si lo tiene, ademas de los embeds reescritos
text = dn["body"]
text_wo_embeds = re.sub(r'!\[\[[^\]]+\]\]', '', text).strip()
parts = []
if text_wo_embeds:
parts.append(text_wo_embeds)
if new:
parts.append("\n".join(new))
create_obsidian_note(OSINT, f"organizaciones/{slug}/{ds}",
body="\n\n".join(parts) if parts else "(sin contenido)",
frontmatter={"tipo": "documento", "entidad": f"[[{slug}]]",
"fuente": "NotasDeObsidian/" + os.path.relpath(n, NOTAS)},
overwrite=True)
docs_done.append((t, ds)); docs_del.add(n)
bl = [f"Ficha de la organizacion {org['display']}.", ""]
if ficha_extra.strip():
bl += [ficha_extra.strip(), ""]
bl += ["## Documentos", ""] + [f"- [[organizaciones/{slug}/{ds}|{t}]]" for t, ds in docs_done]
bl += ["", "## Notas", ""]
create_obsidian_note(OSINT, f"organizaciones/{slug}", body="\n".join(bl),
frontmatter={"tipo": "organizacion", "nombre": org["display"], "slug": slug,
"tags": ["organizacion", "osint"],
"fuente": "NotasDeObsidian/" + (os.path.relpath(ficha_path, NOTAS) if ficha_path else "")},
overwrite=True)
if ficha_path:
docs_del.add(ficha_path)
return slug, len(docs_done), ficha_path is not None
def main():
all_notes = [n for n in list_obsidian_notes(NOTAS) if "/.git/" not in n and "/dist/" not in n]
att_del, docs_del = set(), set()
results = []
for org in ORGS:
results.append((org["display"],) + migrate_org(org, all_notes, att_del, docs_del))
for p in docs_del:
try: os.remove(p)
except FileNotFoundError: pass
moved_att = 0
for ap in att_del:
try:
os.remove(ap); moved_att += 1
except FileNotFoundError:
pass
print(f"docs .md movidos: {len(docs_del)} | attachments movidos: {moved_att}\n")
for disp, slug, ndocs, had_ficha in results:
print(f" {disp} -> organizaciones/{slug} | docs={ndocs} | ficha_origen={'si' if had_ficha else 'no'}")
if __name__ == "__main__":
main()
+210
View File
@@ -0,0 +1,210 @@
#!/usr/bin/env python3
"""Migrador de fichas de persona desde NotasDeObsidian al vault osint.
Aplica el estandar de projects/osint/CONVENTIONS.md:
personas/<slug>.md ficha normalizada
personas/<slug>/<doc-slug>.md notas-documento
attachments/personas/<slug>/<doc-slug>-N.ext binarios
lugares/<slug>.md direcciones extraidas
Compone funciones del registry (grupo obsidian): slugify_obsidian_name,
extract_obsidian_embeds, resolve_obsidian_embed, read/create/delete_obsidian_note.
Modo por defecto: MOVER (borra docs .md y attachments originales de NotasDeObsidian).
Idempotente: re-ejecutar no duplica (overwrite por slug).
"""
import sys, os, re, shutil, glob
sys.path.insert(0, "/home/enmanuel/fn_registry/python/functions")
from obsidian import (read_obsidian_note, create_obsidian_note, delete_obsidian_note,
slugify_obsidian_name, extract_obsidian_embeds, resolve_obsidian_embed,
list_obsidian_notes)
NOTAS = "/home/enmanuel/Obsidian/NotasDeObsidian"
OSINT = "/home/enmanuel/Obsidian/osint"
def doc_tipo(ds: str) -> str:
if "dni" in ds: return "dni"
if "certificado" in ds or "firma" in ds: return "certificado"
if "carnet" in ds: return "carnet"
if "foto" in ds: return "fotos"
if "contrato" in ds: return "contrato"
if "nomina" in ds or "laboral" in ds: return "laboral"
if any(b in ds for b in ["abanca", "bbva", "cajamar", "unicaja", "ebury", "ovh", "legalitas", "banco"]):
return "banco"
if any(x in ds for x in ["modelo", "autonomo", "tributaria", "empadron"]): return "fiscal"
return "otro"
# Preposiciones/artículos que sobran en un doc-slug tras quitar el nombre de la persona
# (p.ej. "DNI de Maria" -> "dni-de" -> "dni").
STOPWORDS = {"de", "del", "la", "las", "el", "los", "y", "a", "en"}
def doc_slug(title: str, ptok: set) -> str:
parts = [p for p in slugify_obsidian_name(title).split("-")
if p and p not in ptok and p not in STOPWORDS]
return "-".join(parts) or "documento"
def known_person_slugs() -> set:
"""Slugs de todas las personas conocidas (fichas ya en osint/personas)."""
slugs = set()
for p in list_obsidian_notes(OSINT, subfolder="personas"):
base = os.path.splitext(os.path.basename(p))[0]
if base.startswith("_"):
continue
slugs.add(slugify_obsidian_name(base))
return slugs
def migrate_person(ficha_path, known_slugs, att_to_delete, docs_to_delete, move=True):
f = read_obsidian_note(ficha_path)
nombre = os.path.splitext(os.path.basename(ficha_path))[0]
slug = slugify_obsidian_name(nombre)
ptok = set(slug.split("-"))
att_rel = f"attachments/personas/{slug}"
att_abs = f"{OSINT}/{att_rel}"
os.makedirs(att_abs, exist_ok=True)
# direccion -> lugar
diru = f["frontmatter"].get("direccion") or ""
dirtxt = re.sub(r'^\[\[|\]\]$', '', str(diru)).strip()
lugar_link = ""
if dirtxt:
lslug = slugify_obsidian_name(dirtxt)
create_obsidian_note(OSINT, f"lugares/{lslug}",
body=f"Direccion vinculada a [[{slug}|{nombre}]].",
frontmatter={"tipo": "lugar", "nombre": dirtxt, "slug": lslug,
"tags": ["lugar", "osint"]}, overwrite=True)
lugar_link = f"[[lugares/{lslug}|{dirtxt}]]"
docs_done, rels, missing = [], [], []
for w in f["wikilinks"]:
wslug = slugify_obsidian_name(w)
if wslug in known_slugs and wslug != slug:
rels.append((wslug, w)); continue
p = resolve_obsidian_embed(NOTAS, w if w.lower().endswith(".md") else w + ".md")
if not p:
missing.append(w); continue
dn = read_obsidian_note(p)
if str(dn["frontmatter"].get("tipo", "")).lower() == "persona":
rels.append((wslug, w)); continue
ds = doc_slug(w, ptok)
embeds = extract_obsidian_embeds(dn["body"])
new, n_ok = [], 0
for i, emb in enumerate(embeds, 1):
ap = resolve_obsidian_embed(NOTAS, emb)
if not ap:
new.append(f"<!-- attachment no encontrado: {emb} -->"); continue
ext = os.path.splitext(ap)[1].lower()
nn = f"{ds}-{i}{ext}"
shutil.copy2(ap, f"{att_abs}/{nn}")
if move:
att_to_delete.add(ap)
new.append(f"![[{att_rel}/{nn}]]")
n_ok += 1
create_obsidian_note(OSINT, f"personas/{slug}/{ds}",
body="\n".join(new) if new else "(sin attachments)",
frontmatter={"tipo": "documento", "doc_tipo": doc_tipo(ds),
"persona": f"[[{slug}]]",
"fuente": "NotasDeObsidian/" + os.path.relpath(p, NOTAS)},
overwrite=True)
docs_done.append((w, ds, n_ok))
if move:
docs_to_delete.add(p)
# ficha normalizada
fm = dict(f["frontmatter"])
fm["nombre"], fm["slug"], fm["aliases"] = nombre, slug, [nombre]
fm["tags"] = ["persona", "osint"]
if dirtxt:
fm["direccion"] = dirtxt
bl = ["## Documentos", ""] + [f"- [[personas/{slug}/{ds}|{w}]]" for w, ds, _ in docs_done]
bl += ["", "## Relaciones", ""] + [f"- [[{rs}|{rn}]]" for rs, rn in rels]
if lugar_link:
bl += ["", "## Lugares", "", f"- {lugar_link}"]
bl += ["", "## Notas", ""]
create_obsidian_note(OSINT, f"personas/{slug}", body="\n".join(bl), frontmatter=fm, overwrite=True)
# borrar ficha plana vieja en osint (nombre con espacios) si el slug difiere
if os.path.abspath(ficha_path) != os.path.abspath(f"{OSINT}/personas/{slug}.md") \
and "/backups/" not in ficha_path:
delete_obsidian_note(ficha_path)
return dict(slug=slug, docs=len(docs_done), rels=len(rels), missing=missing,
att=sum(n for *_, n in docs_done))
def cleanup_enmanuel_originals(att_to_delete):
"""Enmanuel se migro en el piloto SIN mover originales (solo copia). Aqui se borran
sus docs originales (via campo `fuente` de cada doc en osint) y se marcan sus attachments."""
enm = "enmanuel-gutierrez-perez"
docs = list_obsidian_notes(OSINT, subfolder=f"personas/{enm}")
removed = 0
for docp in docs:
src = read_obsidian_note(docp)["frontmatter"].get("fuente", "")
if not src.startswith("NotasDeObsidian/"):
continue
orig = NOTAS + "/" + src[len("NotasDeObsidian/"):]
if os.path.exists(orig):
for emb in extract_obsidian_embeds(read_obsidian_note(orig)["body"]):
ap = resolve_obsidian_embed(NOTAS, emb)
if ap:
att_to_delete.add(ap)
os.remove(orig)
removed += 1
return removed
def main():
known = known_person_slugs()
att_del, docs_del = set(), set()
# 1. Enmanuel: limpiar originales que el piloto dejo en NotasDeObsidian
enm_removed = cleanup_enmanuel_originals(att_del)
# 2. Fichas pendientes: las que siguen planas en osint/personas (nombre != slug)
pend = []
for p in list_obsidian_notes(OSINT, subfolder="personas"):
base = os.path.splitext(os.path.basename(p))[0]
if base.startswith("_"):
continue
if slugify_obsidian_name(base) != base: # plana sin migrar
pend.append(p)
results = []
for fp in pend:
results.append(migrate_person(fp, known, att_del, docs_del, move=True))
# 3. Aplicar borrados (mover): docs .md y attachments originales
for p in docs_del:
try: os.remove(p)
except FileNotFoundError: pass
moved_att = 0
for ap in att_del:
try:
os.remove(ap); moved_att += 1
except FileNotFoundError:
pass
# 4. Reporte
print(f"enmanuel: {enm_removed} docs originales borrados")
print(f"personas migradas en este batch: {len(results)}")
print(f"docs .md movidos: {len(docs_del)} | attachments movidos: {moved_att}")
total_missing = []
for r in results:
tag = f"{r['slug']}: docs={r['docs']} rels={r['rels']} att={r['att']}"
if r["missing"]:
tag += f" | missing={r['missing']}"
total_missing += [(r['slug'], m) for m in r['missing']]
print(" " + tag)
if total_missing:
print(f"\nlinks sin archivo (placeholders), total {len(total_missing)}:")
for s, m in total_missing:
print(f" {s} -> {m!r}")
if __name__ == "__main__":
main()