Files
osint/tools/migrate_orgs.py
2026-06-10 11:43:45 +02:00

131 lines
5.3 KiB
Python

#!/usr/bin/env python3
"""Migrador de organizaciones (empresas) desde NotasDeObsidian al vault osint.
A diferencia de las personas (que parten de una ficha con wikilinks), las organizaciones se
detectan recogiendo todas las notas cuyo titulo menciona el nombre de la empresa. La nota cuyo
titulo ES el nombre de la empresa actua como ficha; el resto son documentos.
Estructura resultante (estandar projects/osint/CONVENTIONS.md):
organizaciones/<slug>.md
organizaciones/<slug>/<doc-slug>.md
attachments/organizaciones/<slug>/<doc-slug>-N.ext
Modo MOVER (borra originales). Idempotente (overwrite por slug).
"""
import sys, os, re, shutil
sys.path.insert(0, "/home/enmanuel/fn_registry/python/functions")
from obsidian import (read_obsidian_note, create_obsidian_note,
slugify_obsidian_name, extract_obsidian_embeds, resolve_obsidian_embed,
list_obsidian_notes)
NOTAS = "/home/enmanuel/Obsidian/NotasDeObsidian"
OSINT = "/home/enmanuel/Obsidian/osint"
STOPWORDS = {"de", "del", "la", "las", "el", "los", "y", "a", "en"}
ORGS = [
{"display": "FenixFood SL", "slug": "fenixfood-sl",
"match": re.compile(r'fenix\s*food', re.I),
"nametok": {"fenixfood", "fenix", "food", "sl"}},
{"display": "BiorganicFood SL", "slug": "biorganicfood-sl",
"match": re.compile(r'biorganic', re.I),
"nametok": {"biorganicfood", "biorganic", "food", "sl"}},
]
def doc_slug(title, ntok):
parts = [p for p in slugify_obsidian_name(title).split("-")
if p and p not in ntok and p not in STOPWORDS]
return "-".join(parts) or "documento"
def migrate_org(org, all_notes, att_del, docs_del):
matched = [n for n in all_notes if org["match"].search(os.path.basename(n)[:-3])]
ficha_path = None
for n in matched:
t = os.path.basename(n)[:-3]
if slugify_obsidian_name(t) == org["slug"] or t.lower() == org["display"].lower():
ficha_path = n
break
slug, ntok = org["slug"], org["nametok"]
att_rel = f"attachments/organizaciones/{slug}"
att_abs = f"{OSINT}/{att_rel}"
os.makedirs(att_abs, exist_ok=True)
docs_done, ficha_extra = [], ""
for n in matched:
t = os.path.basename(n)[:-3]
if n == ficha_path:
ficha_extra = read_obsidian_note(n)["body"]
continue
dn = read_obsidian_note(n)
ds = doc_slug(t, ntok)
# evitar colision de doc-slug dentro de la misma org
base_ds = ds
k = 2
while os.path.exists(f"{OSINT}/organizaciones/{slug}/{ds}.md"):
ds = f"{base_ds}-{k}"; k += 1
embeds = extract_obsidian_embeds(dn["body"])
new = []
for i, emb in enumerate(embeds, 1):
ap = resolve_obsidian_embed(NOTAS, emb)
if not ap:
new.append(f"<!-- attachment no encontrado: {emb} -->"); continue
ext = os.path.splitext(ap)[1].lower()
nn = f"{ds}-{i}{ext}"
shutil.copy2(ap, f"{att_abs}/{nn}"); att_del.add(ap)
new.append(f"![[{att_rel}/{nn}]]")
# conservar texto del doc si lo tiene, ademas de los embeds reescritos
text = dn["body"]
text_wo_embeds = re.sub(r'!\[\[[^\]]+\]\]', '', text).strip()
parts = []
if text_wo_embeds:
parts.append(text_wo_embeds)
if new:
parts.append("\n".join(new))
create_obsidian_note(OSINT, f"organizaciones/{slug}/{ds}",
body="\n\n".join(parts) if parts else "(sin contenido)",
frontmatter={"tipo": "documento", "entidad": f"[[{slug}]]",
"fuente": "NotasDeObsidian/" + os.path.relpath(n, NOTAS)},
overwrite=True)
docs_done.append((t, ds)); docs_del.add(n)
bl = [f"Ficha de la organizacion {org['display']}.", ""]
if ficha_extra.strip():
bl += [ficha_extra.strip(), ""]
bl += ["## Documentos", ""] + [f"- [[organizaciones/{slug}/{ds}|{t}]]" for t, ds in docs_done]
bl += ["", "## Notas", ""]
create_obsidian_note(OSINT, f"organizaciones/{slug}", body="\n".join(bl),
frontmatter={"tipo": "organizacion", "nombre": org["display"], "slug": slug,
"tags": ["organizacion", "osint"],
"fuente": "NotasDeObsidian/" + (os.path.relpath(ficha_path, NOTAS) if ficha_path else "")},
overwrite=True)
if ficha_path:
docs_del.add(ficha_path)
return slug, len(docs_done), ficha_path is not None
def main():
all_notes = [n for n in list_obsidian_notes(NOTAS) if "/.git/" not in n and "/dist/" not in n]
att_del, docs_del = set(), set()
results = []
for org in ORGS:
results.append((org["display"],) + migrate_org(org, all_notes, att_del, docs_del))
for p in docs_del:
try: os.remove(p)
except FileNotFoundError: pass
moved_att = 0
for ap in att_del:
try:
os.remove(ap); moved_att += 1
except FileNotFoundError:
pass
print(f"docs .md movidos: {len(docs_del)} | attachments movidos: {moved_att}\n")
for disp, slug, ndocs, had_ficha in results:
print(f" {disp} -> organizaciones/{slug} | docs={ndocs} | ficha_origen={'si' if had_ficha else 'no'}")
if __name__ == "__main__":
main()