chore: auto-commit (6 archivos)
- CONVENTIONS.md - tools/dedup_persons.py - tools/extract_entities.py - tools/migrate_external_orgs.py - tools/normalize_person_frontmatter.py - tools/person_datapoints.py Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,143 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Fichas-indice de organizaciones EXTERNAS (proveedores, bancos, telcos, gov) en osint.
|
||||
|
||||
A diferencia de las empresas propias (FenixFood/Biorganic), las externas ya aparecen como
|
||||
documentos dentro de personas/ y organizaciones/fenixfood-sl/. Este script:
|
||||
|
||||
1. Crea una ficha organizaciones/<slug>.md (tipo: organizacion, externa: true) por cada
|
||||
empresa externa.
|
||||
2. Cross-referencia (## Relacionado) los documentos ya existentes en osint donde aparece
|
||||
la empresa, SIN moverlos (siguen bajo su dueño: FenixFood o la persona).
|
||||
3. Para las empresas con notas sueltas reales en NotasDeObsidian (move_loose=True), mueve
|
||||
esas notas a organizaciones/<slug>/<doc-slug>.md con sus attachments.
|
||||
|
||||
Idempotente (overwrite). Modo MOVER para las sueltas (borra original); backup tar cubre.
|
||||
"""
|
||||
import sys, os, re, shutil
|
||||
|
||||
sys.path.insert(0, "/home/enmanuel/fn_registry/python/functions")
|
||||
from obsidian import (read_obsidian_note, create_obsidian_note,
|
||||
slugify_obsidian_name, extract_obsidian_embeds, resolve_obsidian_embed,
|
||||
list_obsidian_notes)
|
||||
|
||||
NOTAS = "/home/enmanuel/Obsidian/NotasDeObsidian"
|
||||
OSINT = "/home/enmanuel/Obsidian/osint"
|
||||
STOPWORDS = {"de", "del", "la", "las", "el", "los", "y", "a", "en"}
|
||||
|
||||
# (display, slug, regex_match, regex_excluir_falsos_positivos, move_loose)
|
||||
EXT = [
|
||||
("Santander", "santander", r"santander", None, False),
|
||||
("Unicaja", "unicaja", r"unicaja", None, True),
|
||||
("Abanca", "abanca", r"abanca", None, False),
|
||||
("BBVA", "bbva", r"\bbbva\b", None, False),
|
||||
("Cajamar", "cajamar", r"cajamar", None, False),
|
||||
("Vodafone", "vodafone", r"vodafone", None, True),
|
||||
("Orange", "orange", r"\borange\b", r"orange\s*pi", False),
|
||||
("Endesa", "endesa", r"endesa", None, False),
|
||||
("Ebury", "ebury", r"ebury", None, False),
|
||||
("OVH", "ovh", r"\bovh\b", None, False),
|
||||
("Legalitas", "legalitas", r"legalitas", None, False),
|
||||
("Revolut", "revolut", r"revolut", r"paypal2revolut", False),
|
||||
("Kiwa", "kiwa", r"kiwa", None, False),
|
||||
("Martransit", "martransit", r"martransit", None, False),
|
||||
("Transportes Nieves", "transportes-nieves", r"transportes\s*nieves", None, False),
|
||||
("AEAT", "aeat", r"\baeat\b|agencia\s+tributaria", None, True),
|
||||
]
|
||||
|
||||
|
||||
def doc_slug(title, ntok):
|
||||
parts = [p for p in slugify_obsidian_name(title).split("-")
|
||||
if p and p not in ntok and p not in STOPWORDS]
|
||||
return "-".join(parts) or "documento"
|
||||
|
||||
|
||||
def main():
|
||||
osint_docs = [n for n in list_obsidian_notes(OSINT)
|
||||
if "/personas/" in n or "/organizaciones/" in n]
|
||||
notas_rest = [n for n in list_obsidian_notes(NOTAS)
|
||||
if "/.git/" not in n and "/dist/" not in n]
|
||||
att_del, docs_del, report = set(), set(), []
|
||||
|
||||
for display, slug, rx, excl, move_loose in EXT:
|
||||
rxc = re.compile(rx, re.I)
|
||||
exclc = re.compile(excl, re.I) if excl else None
|
||||
ntok = set(slug.split("-")) | {"documentos", "documento"}
|
||||
|
||||
# 1. cross-refs en osint (no mover)
|
||||
xrefs = []
|
||||
for n in osint_docs:
|
||||
t = os.path.basename(n)[:-3]
|
||||
if rxc.search(t) and not (exclc and exclc.search(t)):
|
||||
rel = os.path.relpath(n, OSINT)[:-3] # sin .md
|
||||
dueno = rel.split("/")[1] if rel.startswith(("personas/", "organizaciones/")) else ""
|
||||
xrefs.append((rel, t, dueno))
|
||||
|
||||
# 2. notas sueltas reales (solo si move_loose)
|
||||
moved = []
|
||||
if move_loose:
|
||||
att_rel = f"attachments/organizaciones/{slug}"
|
||||
att_abs = f"{OSINT}/{att_rel}"
|
||||
os.makedirs(att_abs, exist_ok=True)
|
||||
for n in notas_rest:
|
||||
t = os.path.basename(n)[:-3]
|
||||
if not rxc.search(t) or (exclc and exclc.search(t)):
|
||||
continue
|
||||
if slugify_obsidian_name(t) == slug: # nota homonima = cuerpo de ficha, se trata aparte
|
||||
continue
|
||||
dn = read_obsidian_note(n)
|
||||
ds = doc_slug(t, ntok)
|
||||
base, k = ds, 2
|
||||
while os.path.exists(f"{OSINT}/organizaciones/{slug}/{ds}.md"):
|
||||
ds = f"{base}-{k}"; k += 1
|
||||
new = []
|
||||
for i, emb in enumerate(extract_obsidian_embeds(dn["body"]), 1):
|
||||
ap = resolve_obsidian_embed(NOTAS, emb)
|
||||
if not ap:
|
||||
new.append(f"<!-- no encontrado: {emb} -->"); continue
|
||||
ext = os.path.splitext(ap)[1].lower()
|
||||
nn = f"{ds}-{i}{ext}"
|
||||
shutil.copy2(ap, f"{att_abs}/{nn}"); att_del.add(ap)
|
||||
new.append(f"![[{att_rel}/{nn}]]")
|
||||
text = re.sub(r'!\[\[[^\]]+\]\]', '', dn["body"]).strip()
|
||||
parts = [x for x in [text, "\n".join(new)] if x]
|
||||
create_obsidian_note(OSINT, f"organizaciones/{slug}/{ds}",
|
||||
body="\n\n".join(parts) if parts else "(sin contenido)",
|
||||
frontmatter={"tipo": "documento", "entidad": f"[[{slug}]]",
|
||||
"fuente": "NotasDeObsidian/" + os.path.relpath(n, NOTAS)},
|
||||
overwrite=True)
|
||||
moved.append((t, ds)); docs_del.add(n)
|
||||
|
||||
# 3. ficha-indice (omitir si no hay nada que referenciar)
|
||||
if not xrefs and not moved:
|
||||
continue
|
||||
bl = [f"Ficha de la organizacion externa {display}. Referencia de todo lo trabajado con ellos.", ""]
|
||||
if xrefs:
|
||||
bl += ["## Relacionado", ""]
|
||||
for rel, t, dueno in xrefs:
|
||||
bl.append(f"- [[{rel}|{t}]]" + (f" ({dueno})" if dueno else ""))
|
||||
if moved:
|
||||
bl += ["", "## Documentos", ""] + [f"- [[organizaciones/{slug}/{ds}|{t}]]" for t, ds in moved]
|
||||
bl += ["", "## Notas", ""]
|
||||
create_obsidian_note(OSINT, f"organizaciones/{slug}", body="\n".join(bl),
|
||||
frontmatter={"tipo": "organizacion", "nombre": display, "slug": slug,
|
||||
"externa": True, "tags": ["organizacion", "externa", "osint"]},
|
||||
overwrite=True)
|
||||
report.append((display, slug, len(xrefs), len(moved)))
|
||||
|
||||
for p in docs_del:
|
||||
try: os.remove(p)
|
||||
except FileNotFoundError: pass
|
||||
moved_att = 0
|
||||
for ap in att_del:
|
||||
try:
|
||||
os.remove(ap); moved_att += 1
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
|
||||
print(f"fichas externas creadas: {len(report)} | sueltas movidas: {len(docs_del)} | attachments: {moved_att}\n")
|
||||
for d, s, x, m in report:
|
||||
print(f" {d:20} -> organizaciones/{s} | xref={x} sueltas_movidas={m}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user