f771c9b883
- CONVENTIONS.md - tools/dedup_persons.py - tools/extract_entities.py - tools/migrate_external_orgs.py - tools/normalize_person_frontmatter.py - tools/person_datapoints.py Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
63 lines
2.7 KiB
Python
63 lines
2.7 KiB
Python
#!/usr/bin/env python3
|
|
"""Reporte de datapoints y score de fiabilidad/completitud por persona en osint.
|
|
|
|
Para cada ficha personas/<slug>.md calcula:
|
|
- score de completitud: campos de identidad presentes / 7 * 100
|
|
(sexo, fecha_nacimiento, dni, telefono, email, direccion, pais)
|
|
- datapoints totales: campos de identidad presentes + nº documentos + nº attachments + relaciones
|
|
- campos faltantes (cuando el score < 100%)
|
|
|
|
Salida: tabla ordenada por score asc (las menos fiables primero) + totales globales.
|
|
Con --json imprime el detalle como JSON. Read-only.
|
|
"""
|
|
import sys, os, glob, json
|
|
|
|
sys.path.insert(0, "/home/enmanuel/fn_registry/python/functions")
|
|
from obsidian import read_obsidian_note
|
|
|
|
OSINT = "/home/enmanuel/Obsidian/osint"
|
|
IDENT = ["sexo", "fecha_nacimiento", "dni", "telefono", "email", "direccion", "pais"]
|
|
|
|
|
|
def main():
|
|
as_json = "--json" in sys.argv
|
|
rows = []
|
|
tot_dp = 0
|
|
for fp in sorted(glob.glob(f"{OSINT}/personas/*.md")):
|
|
slug = os.path.splitext(os.path.basename(fp))[0]
|
|
if slug.startswith("_"):
|
|
continue
|
|
fm = read_obsidian_note(fp)["frontmatter"]
|
|
present = [k for k in IDENT if fm.get(k) not in (None, "", [])]
|
|
missing = [k for k in IDENT if k not in present]
|
|
score = round(len(present) / len(IDENT) * 100)
|
|
ndocs = len(glob.glob(f"{OSINT}/personas/{slug}/*.md"))
|
|
natt = len(glob.glob(f"{OSINT}/attachments/personas/{slug}/*"))
|
|
nrel = len(fm.get("relaciones") or [])
|
|
dp = len(present) + ndocs + natt + nrel
|
|
tot_dp += dp
|
|
rows.append({"slug": slug, "score": score, "datapoints": dp,
|
|
"ident": len(present), "docs": ndocs, "attachments": natt,
|
|
"relaciones": nrel, "faltan": missing})
|
|
|
|
rows.sort(key=lambda r: (r["score"], -r["datapoints"]))
|
|
if as_json:
|
|
print(json.dumps({"total_datapoints": tot_dp, "personas": rows}, ensure_ascii=False, indent=2))
|
|
return
|
|
|
|
print(f"PERSONAS: {len(rows)} | datapoints totales: {tot_dp} | "
|
|
f"score medio: {round(sum(r['score'] for r in rows)/len(rows))}%\n")
|
|
print(f"{'persona':38} {'score':>5} {'dp':>4} {'id':>3} {'doc':>4} {'att':>4} {'rel':>4} faltan")
|
|
print("-" * 100)
|
|
for r in rows:
|
|
flag = "" if r["score"] == 100 else " <-- " + ",".join(r["faltan"])
|
|
print(f"{r['slug']:38} {r['score']:>4}% {r['datapoints']:>4} {r['ident']:>3} "
|
|
f"{r['docs']:>4} {r['attachments']:>4} {r['relaciones']:>4}{flag}")
|
|
bajo = [r for r in rows if r["score"] < 100]
|
|
print(f"\nfichas por debajo del 100%: {len(bajo)}/{len(rows)} "
|
|
f"({round(len(bajo)/len(rows)*100)}%)")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|