chore: auto-commit (6 archivos)
- CONVENTIONS.md - tools/dedup_persons.py - tools/extract_entities.py - tools/migrate_external_orgs.py - tools/normalize_person_frontmatter.py - tools/person_datapoints.py Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,80 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Deduplica fichas de persona en osint cuando el slug de una es subconjunto estricto de
|
||||
tokens de otra (p.ej. manuel-torrubia <= simon-manuel-torrubia = misma persona, nombre largo).
|
||||
|
||||
Fusiona la corta en la larga (canonica = nombre mas completo): campos no-null, aliases, docs,
|
||||
attachments y body. Borra la duplicada. Con --apply ejecuta; sin flag solo lista candidatos.
|
||||
"""
|
||||
import sys, os, glob, shutil
|
||||
|
||||
sys.path.insert(0, "/home/enmanuel/fn_registry/python/functions")
|
||||
from obsidian import read_obsidian_note, create_obsidian_note, delete_obsidian_note
|
||||
|
||||
OSINT = "/home/enmanuel/Obsidian/osint"
|
||||
|
||||
|
||||
def load():
|
||||
out = {}
|
||||
for fp in glob.glob(f"{OSINT}/personas/*.md"):
|
||||
s = os.path.splitext(os.path.basename(fp))[0]
|
||||
if s.startswith("_"):
|
||||
continue
|
||||
out[s] = read_obsidian_note(fp)
|
||||
return out
|
||||
|
||||
|
||||
def main():
|
||||
apply = "--apply" in sys.argv
|
||||
fichas = load()
|
||||
slugs = list(fichas)
|
||||
pairs = []
|
||||
for a in slugs:
|
||||
for b in slugs:
|
||||
if a == b:
|
||||
continue
|
||||
ta, tb = set(a.split("-")), set(b.split("-"))
|
||||
# a subconjunto estricto de b, con >=2 tokens compartidos (evita nombres de pila sueltos)
|
||||
if ta < tb and len(ta) >= 2:
|
||||
pairs.append((a, b))
|
||||
# quedarse con el superset mas grande por cada corta
|
||||
best = {}
|
||||
for a, b in pairs:
|
||||
if a not in best or len(b) > len(best[a]):
|
||||
best[a] = b
|
||||
print(f"candidatos a fusion: {len(best)}")
|
||||
for a, b in best.items():
|
||||
print(f" {a} -> {b}")
|
||||
if not apply or not best:
|
||||
if not apply:
|
||||
print("\n(dry-run; usa --apply)")
|
||||
return
|
||||
|
||||
for a, b in best.items():
|
||||
if a not in fichas or b not in fichas:
|
||||
continue
|
||||
fa, fb = fichas[a], fichas[b]
|
||||
new = dict(fb["frontmatter"])
|
||||
for k, v in fa["frontmatter"].items():
|
||||
if v not in (None, "", []) and new.get(k) in (None, "", []):
|
||||
new[k] = v
|
||||
al = set(new.get("aliases") or [])
|
||||
al.add(fa["frontmatter"].get("nombre"))
|
||||
new["aliases"] = sorted(x for x in al if x)
|
||||
body = fb["body"].rstrip() + f"\n\n<!-- fusionado desde {a} -->\n" + fa["body"].strip()
|
||||
# mover docs y attachments de la corta a la canonica
|
||||
for d in glob.glob(f"{OSINT}/personas/{a}/*"):
|
||||
os.makedirs(f"{OSINT}/personas/{b}", exist_ok=True)
|
||||
shutil.move(d, f"{OSINT}/personas/{b}/{os.path.basename(d)}")
|
||||
for at in glob.glob(f"{OSINT}/attachments/personas/{a}/*"):
|
||||
os.makedirs(f"{OSINT}/attachments/personas/{b}", exist_ok=True)
|
||||
shutil.move(at, f"{OSINT}/attachments/personas/{b}/{os.path.basename(at)}")
|
||||
create_obsidian_note(OSINT, f"personas/{b}", body=body, frontmatter=new, overwrite=True)
|
||||
delete_obsidian_note(f"{OSINT}/personas/{a}.md")
|
||||
for empty in (f"{OSINT}/personas/{a}", f"{OSINT}/attachments/personas/{a}"):
|
||||
if os.path.isdir(empty) and not os.listdir(empty):
|
||||
os.rmdir(empty)
|
||||
print(f" fusionado {a} -> {b}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user