Files
obsidian/tools/classify_notes.py
T
egutierrez 8641b49bee chore: auto-commit (1 archivos)
- tools/

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-11 00:16:47 +02:00

100 lines
3.6 KiB
Python

#!/usr/bin/env python3
"""Clasificador de notas sueltas de un vault Obsidian por titulo, via ask_llm (claude-direct).
Uso:
classify_notes.py [Vault] [--apply]
Vault por defecto: NotasDeObsidian.
Sin --apply: genera el plan (classify_plan_<vault>.json) y muestra distribucion, sin mover.
Con --apply: mueve cada nota suelta a su carpeta (in-situ; Obsidian resuelve links por nombre).
Taxonomia: estudio, tech, hacking, personal, finanzas, proyectos, otros.
"""
import sys, os, json, re, shutil
from collections import Counter
sys.path.insert(0, "/home/enmanuel/fn_registry/python/functions")
from core.ask_llm import ask_llm
from obsidian import list_obsidian_notes
OBS = "/home/enmanuel/Obsidian"
CATS = ["estudio", "tech", "hacking", "personal", "finanzas", "proyectos", "otros"]
BATCH = 50
def root_notes(vault):
vp = f"{OBS}/{vault}"
out = []
for n in list_obsidian_notes(vp):
rel = os.path.relpath(n, vp)
if "/" in rel or "/.git/" in n or "/dist/" in n:
continue
out.append(os.path.basename(n)[:-3])
return sorted(set(out))
def classify_batch(titles):
listado = "\n".join(f"{i}. {t}" for i, t in enumerate(titles))
prompt = (
"Clasifica cada nota de Obsidian (vault personal de un dev espanol) en UNA categoria.\n"
f"Categorias validas: {', '.join(CATS)}.\n"
"Guia: estudio=apuntes de cursos/teoria (data science, matematicas, bases de datos, frameworks). "
"tech=dev practico (comandos, docker, linux, git, apis, servidores, herramientas). "
"hacking=seguridad ofensiva real (bug bounty, exploits, recon, osint tecnico, web hacking). "
"personal=vida personal (citas, salud, tramites, viajes, familia). "
"finanzas=banca personal, cripto, trading. proyectos=proyectos propios de software/negocio. "
"otros=no encaja.\n"
"Devuelve SOLO un objeto JSON {indice: categoria}, indices 0-based, sin texto extra.\n\n"
+ listado
)
raw = ask_llm(prompt, model="claude-haiku-4-5-20251001", echo=False)
m = re.search(r'\{.*\}', raw, re.S)
if not m:
return {}
try:
return json.loads(m.group(0))
except Exception:
return {}
def main():
args = [a for a in sys.argv[1:] if not a.startswith("--")]
vault = args[0] if args else "NotasDeObsidian"
apply = "--apply" in sys.argv
vp = f"{OBS}/{vault}"
plan_path = f"/home/enmanuel/fn_registry/projects/obsidian/tools/classify_plan_{vault}.json"
titles = root_notes(vault)
print(f"{vault}: {len(titles)} notas sueltas en raiz")
plan = {}
for b in range(0, len(titles), BATCH):
chunk = titles[b:b + BATCH]
res = classify_batch(chunk)
for i, t in enumerate(chunk):
cat = str(res.get(str(i), res.get(i, "otros"))).strip().lower()
plan[t] = cat if cat in CATS else "otros"
json.dump(plan, open(plan_path, "w", encoding="utf-8"), ensure_ascii=False, indent=2)
c = Counter(plan.values())
print("distribucion:", {k: c[k] for k in CATS if c[k]})
for cat in CATS:
ej = [t for t, cc in plan.items() if cc == cat][:4]
if ej:
print(f" [{cat}] " + " | ".join(ej))
if apply:
moved = 0
for t, cat in plan.items():
src = f"{vp}/{t}.md"
if not os.path.exists(src):
continue
dd = f"{vp}/{cat}"; os.makedirs(dd, exist_ok=True)
dst = f"{dd}/{t}.md"
if os.path.exists(dst):
continue
shutil.move(src, dst); moved += 1
print(f"\nmovidas: {moved}")
if __name__ == "__main__":
main()