131 lines
5.3 KiB
Python
131 lines
5.3 KiB
Python
#!/usr/bin/env python3
|
|
"""Migrador de organizaciones (empresas) desde NotasDeObsidian al vault osint.
|
|
|
|
A diferencia de las personas (que parten de una ficha con wikilinks), las organizaciones se
|
|
detectan recogiendo todas las notas cuyo titulo menciona el nombre de la empresa. La nota cuyo
|
|
titulo ES el nombre de la empresa actua como ficha; el resto son documentos.
|
|
|
|
Estructura resultante (estandar projects/osint/CONVENTIONS.md):
|
|
organizaciones/<slug>.md
|
|
organizaciones/<slug>/<doc-slug>.md
|
|
attachments/organizaciones/<slug>/<doc-slug>-N.ext
|
|
|
|
Modo MOVER (borra originales). Idempotente (overwrite por slug).
|
|
"""
|
|
import sys, os, re, shutil
|
|
|
|
sys.path.insert(0, "/home/enmanuel/fn_registry/python/functions")
|
|
from obsidian import (read_obsidian_note, create_obsidian_note,
|
|
slugify_obsidian_name, extract_obsidian_embeds, resolve_obsidian_embed,
|
|
list_obsidian_notes)
|
|
|
|
NOTAS = "/home/enmanuel/Obsidian/NotasDeObsidian"
|
|
OSINT = "/home/enmanuel/Obsidian/osint"
|
|
STOPWORDS = {"de", "del", "la", "las", "el", "los", "y", "a", "en"}
|
|
|
|
ORGS = [
|
|
{"display": "FenixFood SL", "slug": "fenixfood-sl",
|
|
"match": re.compile(r'fenix\s*food', re.I),
|
|
"nametok": {"fenixfood", "fenix", "food", "sl"}},
|
|
{"display": "BiorganicFood SL", "slug": "biorganicfood-sl",
|
|
"match": re.compile(r'biorganic', re.I),
|
|
"nametok": {"biorganicfood", "biorganic", "food", "sl"}},
|
|
]
|
|
|
|
|
|
def doc_slug(title, ntok):
|
|
parts = [p for p in slugify_obsidian_name(title).split("-")
|
|
if p and p not in ntok and p not in STOPWORDS]
|
|
return "-".join(parts) or "documento"
|
|
|
|
|
|
def migrate_org(org, all_notes, att_del, docs_del):
|
|
matched = [n for n in all_notes if org["match"].search(os.path.basename(n)[:-3])]
|
|
ficha_path = None
|
|
for n in matched:
|
|
t = os.path.basename(n)[:-3]
|
|
if slugify_obsidian_name(t) == org["slug"] or t.lower() == org["display"].lower():
|
|
ficha_path = n
|
|
break
|
|
|
|
slug, ntok = org["slug"], org["nametok"]
|
|
att_rel = f"attachments/organizaciones/{slug}"
|
|
att_abs = f"{OSINT}/{att_rel}"
|
|
os.makedirs(att_abs, exist_ok=True)
|
|
|
|
docs_done, ficha_extra = [], ""
|
|
for n in matched:
|
|
t = os.path.basename(n)[:-3]
|
|
if n == ficha_path:
|
|
ficha_extra = read_obsidian_note(n)["body"]
|
|
continue
|
|
dn = read_obsidian_note(n)
|
|
ds = doc_slug(t, ntok)
|
|
# evitar colision de doc-slug dentro de la misma org
|
|
base_ds = ds
|
|
k = 2
|
|
while os.path.exists(f"{OSINT}/organizaciones/{slug}/{ds}.md"):
|
|
ds = f"{base_ds}-{k}"; k += 1
|
|
embeds = extract_obsidian_embeds(dn["body"])
|
|
new = []
|
|
for i, emb in enumerate(embeds, 1):
|
|
ap = resolve_obsidian_embed(NOTAS, emb)
|
|
if not ap:
|
|
new.append(f"<!-- attachment no encontrado: {emb} -->"); continue
|
|
ext = os.path.splitext(ap)[1].lower()
|
|
nn = f"{ds}-{i}{ext}"
|
|
shutil.copy2(ap, f"{att_abs}/{nn}"); att_del.add(ap)
|
|
new.append(f"![[{att_rel}/{nn}]]")
|
|
# conservar texto del doc si lo tiene, ademas de los embeds reescritos
|
|
text = dn["body"]
|
|
text_wo_embeds = re.sub(r'!\[\[[^\]]+\]\]', '', text).strip()
|
|
parts = []
|
|
if text_wo_embeds:
|
|
parts.append(text_wo_embeds)
|
|
if new:
|
|
parts.append("\n".join(new))
|
|
create_obsidian_note(OSINT, f"organizaciones/{slug}/{ds}",
|
|
body="\n\n".join(parts) if parts else "(sin contenido)",
|
|
frontmatter={"tipo": "documento", "entidad": f"[[{slug}]]",
|
|
"fuente": "NotasDeObsidian/" + os.path.relpath(n, NOTAS)},
|
|
overwrite=True)
|
|
docs_done.append((t, ds)); docs_del.add(n)
|
|
|
|
bl = [f"Ficha de la organizacion {org['display']}.", ""]
|
|
if ficha_extra.strip():
|
|
bl += [ficha_extra.strip(), ""]
|
|
bl += ["## Documentos", ""] + [f"- [[organizaciones/{slug}/{ds}|{t}]]" for t, ds in docs_done]
|
|
bl += ["", "## Notas", ""]
|
|
create_obsidian_note(OSINT, f"organizaciones/{slug}", body="\n".join(bl),
|
|
frontmatter={"tipo": "organizacion", "nombre": org["display"], "slug": slug,
|
|
"tags": ["organizacion", "osint"],
|
|
"fuente": "NotasDeObsidian/" + (os.path.relpath(ficha_path, NOTAS) if ficha_path else "")},
|
|
overwrite=True)
|
|
if ficha_path:
|
|
docs_del.add(ficha_path)
|
|
return slug, len(docs_done), ficha_path is not None
|
|
|
|
|
|
def main():
|
|
all_notes = [n for n in list_obsidian_notes(NOTAS) if "/.git/" not in n and "/dist/" not in n]
|
|
att_del, docs_del = set(), set()
|
|
results = []
|
|
for org in ORGS:
|
|
results.append((org["display"],) + migrate_org(org, all_notes, att_del, docs_del))
|
|
for p in docs_del:
|
|
try: os.remove(p)
|
|
except FileNotFoundError: pass
|
|
moved_att = 0
|
|
for ap in att_del:
|
|
try:
|
|
os.remove(ap); moved_att += 1
|
|
except FileNotFoundError:
|
|
pass
|
|
print(f"docs .md movidos: {len(docs_del)} | attachments movidos: {moved_att}\n")
|
|
for disp, slug, ndocs, had_ficha in results:
|
|
print(f" {disp} -> organizaciones/{slug} | docs={ndocs} | ficha_origen={'si' if had_ficha else 'no'}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|