f771c9b883
- CONVENTIONS.md - tools/dedup_persons.py - tools/extract_entities.py - tools/migrate_external_orgs.py - tools/normalize_person_frontmatter.py - tools/person_datapoints.py Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
144 lines
6.6 KiB
Python
144 lines
6.6 KiB
Python
#!/usr/bin/env python3
|
|
"""Fichas-indice de organizaciones EXTERNAS (proveedores, bancos, telcos, gov) en osint.
|
|
|
|
A diferencia de las empresas propias (FenixFood/Biorganic), las externas ya aparecen como
|
|
documentos dentro de personas/ y organizaciones/fenixfood-sl/. Este script:
|
|
|
|
1. Crea una ficha organizaciones/<slug>.md (tipo: organizacion, externa: true) por cada
|
|
empresa externa.
|
|
2. Cross-referencia (## Relacionado) los documentos ya existentes en osint donde aparece
|
|
la empresa, SIN moverlos (siguen bajo su dueño: FenixFood o la persona).
|
|
3. Para las empresas con notas sueltas reales en NotasDeObsidian (move_loose=True), mueve
|
|
esas notas a organizaciones/<slug>/<doc-slug>.md con sus attachments.
|
|
|
|
Idempotente (overwrite). Modo MOVER para las sueltas (borra original); backup tar cubre.
|
|
"""
|
|
import sys, os, re, shutil
|
|
|
|
sys.path.insert(0, "/home/enmanuel/fn_registry/python/functions")
|
|
from obsidian import (read_obsidian_note, create_obsidian_note,
|
|
slugify_obsidian_name, extract_obsidian_embeds, resolve_obsidian_embed,
|
|
list_obsidian_notes)
|
|
|
|
NOTAS = "/home/enmanuel/Obsidian/NotasDeObsidian"
|
|
OSINT = "/home/enmanuel/Obsidian/osint"
|
|
STOPWORDS = {"de", "del", "la", "las", "el", "los", "y", "a", "en"}
|
|
|
|
# (display, slug, regex_match, regex_excluir_falsos_positivos, move_loose)
|
|
EXT = [
|
|
("Santander", "santander", r"santander", None, False),
|
|
("Unicaja", "unicaja", r"unicaja", None, True),
|
|
("Abanca", "abanca", r"abanca", None, False),
|
|
("BBVA", "bbva", r"\bbbva\b", None, False),
|
|
("Cajamar", "cajamar", r"cajamar", None, False),
|
|
("Vodafone", "vodafone", r"vodafone", None, True),
|
|
("Orange", "orange", r"\borange\b", r"orange\s*pi", False),
|
|
("Endesa", "endesa", r"endesa", None, False),
|
|
("Ebury", "ebury", r"ebury", None, False),
|
|
("OVH", "ovh", r"\bovh\b", None, False),
|
|
("Legalitas", "legalitas", r"legalitas", None, False),
|
|
("Revolut", "revolut", r"revolut", r"paypal2revolut", False),
|
|
("Kiwa", "kiwa", r"kiwa", None, False),
|
|
("Martransit", "martransit", r"martransit", None, False),
|
|
("Transportes Nieves", "transportes-nieves", r"transportes\s*nieves", None, False),
|
|
("AEAT", "aeat", r"\baeat\b|agencia\s+tributaria", None, True),
|
|
]
|
|
|
|
|
|
def doc_slug(title, ntok):
|
|
parts = [p for p in slugify_obsidian_name(title).split("-")
|
|
if p and p not in ntok and p not in STOPWORDS]
|
|
return "-".join(parts) or "documento"
|
|
|
|
|
|
def main():
|
|
osint_docs = [n for n in list_obsidian_notes(OSINT)
|
|
if "/personas/" in n or "/organizaciones/" in n]
|
|
notas_rest = [n for n in list_obsidian_notes(NOTAS)
|
|
if "/.git/" not in n and "/dist/" not in n]
|
|
att_del, docs_del, report = set(), set(), []
|
|
|
|
for display, slug, rx, excl, move_loose in EXT:
|
|
rxc = re.compile(rx, re.I)
|
|
exclc = re.compile(excl, re.I) if excl else None
|
|
ntok = set(slug.split("-")) | {"documentos", "documento"}
|
|
|
|
# 1. cross-refs en osint (no mover)
|
|
xrefs = []
|
|
for n in osint_docs:
|
|
t = os.path.basename(n)[:-3]
|
|
if rxc.search(t) and not (exclc and exclc.search(t)):
|
|
rel = os.path.relpath(n, OSINT)[:-3] # sin .md
|
|
dueno = rel.split("/")[1] if rel.startswith(("personas/", "organizaciones/")) else ""
|
|
xrefs.append((rel, t, dueno))
|
|
|
|
# 2. notas sueltas reales (solo si move_loose)
|
|
moved = []
|
|
if move_loose:
|
|
att_rel = f"attachments/organizaciones/{slug}"
|
|
att_abs = f"{OSINT}/{att_rel}"
|
|
os.makedirs(att_abs, exist_ok=True)
|
|
for n in notas_rest:
|
|
t = os.path.basename(n)[:-3]
|
|
if not rxc.search(t) or (exclc and exclc.search(t)):
|
|
continue
|
|
if slugify_obsidian_name(t) == slug: # nota homonima = cuerpo de ficha, se trata aparte
|
|
continue
|
|
dn = read_obsidian_note(n)
|
|
ds = doc_slug(t, ntok)
|
|
base, k = ds, 2
|
|
while os.path.exists(f"{OSINT}/organizaciones/{slug}/{ds}.md"):
|
|
ds = f"{base}-{k}"; k += 1
|
|
new = []
|
|
for i, emb in enumerate(extract_obsidian_embeds(dn["body"]), 1):
|
|
ap = resolve_obsidian_embed(NOTAS, emb)
|
|
if not ap:
|
|
new.append(f"<!-- no encontrado: {emb} -->"); continue
|
|
ext = os.path.splitext(ap)[1].lower()
|
|
nn = f"{ds}-{i}{ext}"
|
|
shutil.copy2(ap, f"{att_abs}/{nn}"); att_del.add(ap)
|
|
new.append(f"![[{att_rel}/{nn}]]")
|
|
text = re.sub(r'!\[\[[^\]]+\]\]', '', dn["body"]).strip()
|
|
parts = [x for x in [text, "\n".join(new)] if x]
|
|
create_obsidian_note(OSINT, f"organizaciones/{slug}/{ds}",
|
|
body="\n\n".join(parts) if parts else "(sin contenido)",
|
|
frontmatter={"tipo": "documento", "entidad": f"[[{slug}]]",
|
|
"fuente": "NotasDeObsidian/" + os.path.relpath(n, NOTAS)},
|
|
overwrite=True)
|
|
moved.append((t, ds)); docs_del.add(n)
|
|
|
|
# 3. ficha-indice (omitir si no hay nada que referenciar)
|
|
if not xrefs and not moved:
|
|
continue
|
|
bl = [f"Ficha de la organizacion externa {display}. Referencia de todo lo trabajado con ellos.", ""]
|
|
if xrefs:
|
|
bl += ["## Relacionado", ""]
|
|
for rel, t, dueno in xrefs:
|
|
bl.append(f"- [[{rel}|{t}]]" + (f" ({dueno})" if dueno else ""))
|
|
if moved:
|
|
bl += ["", "## Documentos", ""] + [f"- [[organizaciones/{slug}/{ds}|{t}]]" for t, ds in moved]
|
|
bl += ["", "## Notas", ""]
|
|
create_obsidian_note(OSINT, f"organizaciones/{slug}", body="\n".join(bl),
|
|
frontmatter={"tipo": "organizacion", "nombre": display, "slug": slug,
|
|
"externa": True, "tags": ["organizacion", "externa", "osint"]},
|
|
overwrite=True)
|
|
report.append((display, slug, len(xrefs), len(moved)))
|
|
|
|
for p in docs_del:
|
|
try: os.remove(p)
|
|
except FileNotFoundError: pass
|
|
moved_att = 0
|
|
for ap in att_del:
|
|
try:
|
|
os.remove(ap); moved_att += 1
|
|
except FileNotFoundError:
|
|
pass
|
|
|
|
print(f"fichas externas creadas: {len(report)} | sueltas movidas: {len(docs_del)} | attachments: {moved_att}\n")
|
|
for d, s, x, m in report:
|
|
print(f" {d:20} -> organizaciones/{s} | xref={x} sueltas_movidas={m}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|