chore: auto-commit (6 archivos)
- CONVENTIONS.md - tools/dedup_persons.py - tools/extract_entities.py - tools/migrate_external_orgs.py - tools/normalize_person_frontmatter.py - tools/person_datapoints.py Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,62 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Reporte de datapoints y score de fiabilidad/completitud por persona en osint.
|
||||
|
||||
Para cada ficha personas/<slug>.md calcula:
|
||||
- score de completitud: campos de identidad presentes / 7 * 100
|
||||
(sexo, fecha_nacimiento, dni, telefono, email, direccion, pais)
|
||||
- datapoints totales: campos de identidad presentes + nº documentos + nº attachments + relaciones
|
||||
- campos faltantes (cuando el score < 100%)
|
||||
|
||||
Salida: tabla ordenada por score asc (las menos fiables primero) + totales globales.
|
||||
Con --json imprime el detalle como JSON. Read-only.
|
||||
"""
|
||||
import sys, os, glob, json
|
||||
|
||||
sys.path.insert(0, "/home/enmanuel/fn_registry/python/functions")
|
||||
from obsidian import read_obsidian_note
|
||||
|
||||
OSINT = "/home/enmanuel/Obsidian/osint"
|
||||
IDENT = ["sexo", "fecha_nacimiento", "dni", "telefono", "email", "direccion", "pais"]
|
||||
|
||||
|
||||
def main():
|
||||
as_json = "--json" in sys.argv
|
||||
rows = []
|
||||
tot_dp = 0
|
||||
for fp in sorted(glob.glob(f"{OSINT}/personas/*.md")):
|
||||
slug = os.path.splitext(os.path.basename(fp))[0]
|
||||
if slug.startswith("_"):
|
||||
continue
|
||||
fm = read_obsidian_note(fp)["frontmatter"]
|
||||
present = [k for k in IDENT if fm.get(k) not in (None, "", [])]
|
||||
missing = [k for k in IDENT if k not in present]
|
||||
score = round(len(present) / len(IDENT) * 100)
|
||||
ndocs = len(glob.glob(f"{OSINT}/personas/{slug}/*.md"))
|
||||
natt = len(glob.glob(f"{OSINT}/attachments/personas/{slug}/*"))
|
||||
nrel = len(fm.get("relaciones") or [])
|
||||
dp = len(present) + ndocs + natt + nrel
|
||||
tot_dp += dp
|
||||
rows.append({"slug": slug, "score": score, "datapoints": dp,
|
||||
"ident": len(present), "docs": ndocs, "attachments": natt,
|
||||
"relaciones": nrel, "faltan": missing})
|
||||
|
||||
rows.sort(key=lambda r: (r["score"], -r["datapoints"]))
|
||||
if as_json:
|
||||
print(json.dumps({"total_datapoints": tot_dp, "personas": rows}, ensure_ascii=False, indent=2))
|
||||
return
|
||||
|
||||
print(f"PERSONAS: {len(rows)} | datapoints totales: {tot_dp} | "
|
||||
f"score medio: {round(sum(r['score'] for r in rows)/len(rows))}%\n")
|
||||
print(f"{'persona':38} {'score':>5} {'dp':>4} {'id':>3} {'doc':>4} {'att':>4} {'rel':>4} faltan")
|
||||
print("-" * 100)
|
||||
for r in rows:
|
||||
flag = "" if r["score"] == 100 else " <-- " + ",".join(r["faltan"])
|
||||
print(f"{r['slug']:38} {r['score']:>4}% {r['datapoints']:>4} {r['ident']:>3} "
|
||||
f"{r['docs']:>4} {r['attachments']:>4} {r['relaciones']:>4}{flag}")
|
||||
bajo = [r for r in rows if r["score"] < 100]
|
||||
print(f"\nfichas por debajo del 100%: {len(bajo)}/{len(rows)} "
|
||||
f"({round(len(bajo)/len(rows)*100)}%)")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user