Files
osint/tools/migrate_external_orgs.py
T
egutierrez f771c9b883 chore: auto-commit (6 archivos)
- CONVENTIONS.md
- tools/dedup_persons.py
- tools/extract_entities.py
- tools/migrate_external_orgs.py
- tools/normalize_person_frontmatter.py
- tools/person_datapoints.py

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-11 00:16:47 +02:00

144 lines
6.6 KiB
Python

#!/usr/bin/env python3
"""Fichas-indice de organizaciones EXTERNAS (proveedores, bancos, telcos, gov) en osint.
A diferencia de las empresas propias (FenixFood/Biorganic), las externas ya aparecen como
documentos dentro de personas/ y organizaciones/fenixfood-sl/. Este script:
1. Crea una ficha organizaciones/<slug>.md (tipo: organizacion, externa: true) por cada
empresa externa.
2. Cross-referencia (## Relacionado) los documentos ya existentes en osint donde aparece
la empresa, SIN moverlos (siguen bajo su dueño: FenixFood o la persona).
3. Para las empresas con notas sueltas reales en NotasDeObsidian (move_loose=True), mueve
esas notas a organizaciones/<slug>/<doc-slug>.md con sus attachments.
Idempotente (overwrite). Modo MOVER para las sueltas (borra original); backup tar cubre.
"""
import sys, os, re, shutil
sys.path.insert(0, "/home/enmanuel/fn_registry/python/functions")
from obsidian import (read_obsidian_note, create_obsidian_note,
slugify_obsidian_name, extract_obsidian_embeds, resolve_obsidian_embed,
list_obsidian_notes)
NOTAS = "/home/enmanuel/Obsidian/NotasDeObsidian"
OSINT = "/home/enmanuel/Obsidian/osint"
STOPWORDS = {"de", "del", "la", "las", "el", "los", "y", "a", "en"}
# (display, slug, regex_match, regex_excluir_falsos_positivos, move_loose)
EXT = [
("Santander", "santander", r"santander", None, False),
("Unicaja", "unicaja", r"unicaja", None, True),
("Abanca", "abanca", r"abanca", None, False),
("BBVA", "bbva", r"\bbbva\b", None, False),
("Cajamar", "cajamar", r"cajamar", None, False),
("Vodafone", "vodafone", r"vodafone", None, True),
("Orange", "orange", r"\borange\b", r"orange\s*pi", False),
("Endesa", "endesa", r"endesa", None, False),
("Ebury", "ebury", r"ebury", None, False),
("OVH", "ovh", r"\bovh\b", None, False),
("Legalitas", "legalitas", r"legalitas", None, False),
("Revolut", "revolut", r"revolut", r"paypal2revolut", False),
("Kiwa", "kiwa", r"kiwa", None, False),
("Martransit", "martransit", r"martransit", None, False),
("Transportes Nieves", "transportes-nieves", r"transportes\s*nieves", None, False),
("AEAT", "aeat", r"\baeat\b|agencia\s+tributaria", None, True),
]
def doc_slug(title, ntok):
parts = [p for p in slugify_obsidian_name(title).split("-")
if p and p not in ntok and p not in STOPWORDS]
return "-".join(parts) or "documento"
def main():
osint_docs = [n for n in list_obsidian_notes(OSINT)
if "/personas/" in n or "/organizaciones/" in n]
notas_rest = [n for n in list_obsidian_notes(NOTAS)
if "/.git/" not in n and "/dist/" not in n]
att_del, docs_del, report = set(), set(), []
for display, slug, rx, excl, move_loose in EXT:
rxc = re.compile(rx, re.I)
exclc = re.compile(excl, re.I) if excl else None
ntok = set(slug.split("-")) | {"documentos", "documento"}
# 1. cross-refs en osint (no mover)
xrefs = []
for n in osint_docs:
t = os.path.basename(n)[:-3]
if rxc.search(t) and not (exclc and exclc.search(t)):
rel = os.path.relpath(n, OSINT)[:-3] # sin .md
dueno = rel.split("/")[1] if rel.startswith(("personas/", "organizaciones/")) else ""
xrefs.append((rel, t, dueno))
# 2. notas sueltas reales (solo si move_loose)
moved = []
if move_loose:
att_rel = f"attachments/organizaciones/{slug}"
att_abs = f"{OSINT}/{att_rel}"
os.makedirs(att_abs, exist_ok=True)
for n in notas_rest:
t = os.path.basename(n)[:-3]
if not rxc.search(t) or (exclc and exclc.search(t)):
continue
if slugify_obsidian_name(t) == slug: # nota homonima = cuerpo de ficha, se trata aparte
continue
dn = read_obsidian_note(n)
ds = doc_slug(t, ntok)
base, k = ds, 2
while os.path.exists(f"{OSINT}/organizaciones/{slug}/{ds}.md"):
ds = f"{base}-{k}"; k += 1
new = []
for i, emb in enumerate(extract_obsidian_embeds(dn["body"]), 1):
ap = resolve_obsidian_embed(NOTAS, emb)
if not ap:
new.append(f"<!-- no encontrado: {emb} -->"); continue
ext = os.path.splitext(ap)[1].lower()
nn = f"{ds}-{i}{ext}"
shutil.copy2(ap, f"{att_abs}/{nn}"); att_del.add(ap)
new.append(f"![[{att_rel}/{nn}]]")
text = re.sub(r'!\[\[[^\]]+\]\]', '', dn["body"]).strip()
parts = [x for x in [text, "\n".join(new)] if x]
create_obsidian_note(OSINT, f"organizaciones/{slug}/{ds}",
body="\n\n".join(parts) if parts else "(sin contenido)",
frontmatter={"tipo": "documento", "entidad": f"[[{slug}]]",
"fuente": "NotasDeObsidian/" + os.path.relpath(n, NOTAS)},
overwrite=True)
moved.append((t, ds)); docs_del.add(n)
# 3. ficha-indice (omitir si no hay nada que referenciar)
if not xrefs and not moved:
continue
bl = [f"Ficha de la organizacion externa {display}. Referencia de todo lo trabajado con ellos.", ""]
if xrefs:
bl += ["## Relacionado", ""]
for rel, t, dueno in xrefs:
bl.append(f"- [[{rel}|{t}]]" + (f" ({dueno})" if dueno else ""))
if moved:
bl += ["", "## Documentos", ""] + [f"- [[organizaciones/{slug}/{ds}|{t}]]" for t, ds in moved]
bl += ["", "## Notas", ""]
create_obsidian_note(OSINT, f"organizaciones/{slug}", body="\n".join(bl),
frontmatter={"tipo": "organizacion", "nombre": display, "slug": slug,
"externa": True, "tags": ["organizacion", "externa", "osint"]},
overwrite=True)
report.append((display, slug, len(xrefs), len(moved)))
for p in docs_del:
try: os.remove(p)
except FileNotFoundError: pass
moved_att = 0
for ap in att_del:
try:
os.remove(ap); moved_att += 1
except FileNotFoundError:
pass
print(f"fichas externas creadas: {len(report)} | sueltas movidas: {len(docs_del)} | attachments: {moved_att}\n")
for d, s, x, m in report:
print(f" {d:20} -> organizaciones/{s} | xref={x} sueltas_movidas={m}")
if __name__ == "__main__":
main()