8641b49bee
- tools/ Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
100 lines
3.6 KiB
Python
100 lines
3.6 KiB
Python
#!/usr/bin/env python3
|
|
"""Clasificador de notas sueltas de un vault Obsidian por titulo, via ask_llm (claude-direct).
|
|
|
|
Uso:
|
|
classify_notes.py [Vault] [--apply]
|
|
Vault por defecto: NotasDeObsidian.
|
|
Sin --apply: genera el plan (classify_plan_<vault>.json) y muestra distribucion, sin mover.
|
|
Con --apply: mueve cada nota suelta a su carpeta (in-situ; Obsidian resuelve links por nombre).
|
|
|
|
Taxonomia: estudio, tech, hacking, personal, finanzas, proyectos, otros.
|
|
"""
|
|
import sys, os, json, re, shutil
|
|
from collections import Counter
|
|
|
|
sys.path.insert(0, "/home/enmanuel/fn_registry/python/functions")
|
|
from core.ask_llm import ask_llm
|
|
from obsidian import list_obsidian_notes
|
|
|
|
OBS = "/home/enmanuel/Obsidian"
|
|
CATS = ["estudio", "tech", "hacking", "personal", "finanzas", "proyectos", "otros"]
|
|
BATCH = 50
|
|
|
|
|
|
def root_notes(vault):
|
|
vp = f"{OBS}/{vault}"
|
|
out = []
|
|
for n in list_obsidian_notes(vp):
|
|
rel = os.path.relpath(n, vp)
|
|
if "/" in rel or "/.git/" in n or "/dist/" in n:
|
|
continue
|
|
out.append(os.path.basename(n)[:-3])
|
|
return sorted(set(out))
|
|
|
|
|
|
def classify_batch(titles):
|
|
listado = "\n".join(f"{i}. {t}" for i, t in enumerate(titles))
|
|
prompt = (
|
|
"Clasifica cada nota de Obsidian (vault personal de un dev espanol) en UNA categoria.\n"
|
|
f"Categorias validas: {', '.join(CATS)}.\n"
|
|
"Guia: estudio=apuntes de cursos/teoria (data science, matematicas, bases de datos, frameworks). "
|
|
"tech=dev practico (comandos, docker, linux, git, apis, servidores, herramientas). "
|
|
"hacking=seguridad ofensiva real (bug bounty, exploits, recon, osint tecnico, web hacking). "
|
|
"personal=vida personal (citas, salud, tramites, viajes, familia). "
|
|
"finanzas=banca personal, cripto, trading. proyectos=proyectos propios de software/negocio. "
|
|
"otros=no encaja.\n"
|
|
"Devuelve SOLO un objeto JSON {indice: categoria}, indices 0-based, sin texto extra.\n\n"
|
|
+ listado
|
|
)
|
|
raw = ask_llm(prompt, model="claude-haiku-4-5-20251001", echo=False)
|
|
m = re.search(r'\{.*\}', raw, re.S)
|
|
if not m:
|
|
return {}
|
|
try:
|
|
return json.loads(m.group(0))
|
|
except Exception:
|
|
return {}
|
|
|
|
|
|
def main():
|
|
args = [a for a in sys.argv[1:] if not a.startswith("--")]
|
|
vault = args[0] if args else "NotasDeObsidian"
|
|
apply = "--apply" in sys.argv
|
|
vp = f"{OBS}/{vault}"
|
|
plan_path = f"/home/enmanuel/fn_registry/projects/obsidian/tools/classify_plan_{vault}.json"
|
|
|
|
titles = root_notes(vault)
|
|
print(f"{vault}: {len(titles)} notas sueltas en raiz")
|
|
plan = {}
|
|
for b in range(0, len(titles), BATCH):
|
|
chunk = titles[b:b + BATCH]
|
|
res = classify_batch(chunk)
|
|
for i, t in enumerate(chunk):
|
|
cat = str(res.get(str(i), res.get(i, "otros"))).strip().lower()
|
|
plan[t] = cat if cat in CATS else "otros"
|
|
json.dump(plan, open(plan_path, "w", encoding="utf-8"), ensure_ascii=False, indent=2)
|
|
|
|
c = Counter(plan.values())
|
|
print("distribucion:", {k: c[k] for k in CATS if c[k]})
|
|
for cat in CATS:
|
|
ej = [t for t, cc in plan.items() if cc == cat][:4]
|
|
if ej:
|
|
print(f" [{cat}] " + " | ".join(ej))
|
|
|
|
if apply:
|
|
moved = 0
|
|
for t, cat in plan.items():
|
|
src = f"{vp}/{t}.md"
|
|
if not os.path.exists(src):
|
|
continue
|
|
dd = f"{vp}/{cat}"; os.makedirs(dd, exist_ok=True)
|
|
dst = f"{dd}/{t}.md"
|
|
if os.path.exists(dst):
|
|
continue
|
|
shutil.move(src, dst); moved += 1
|
|
print(f"\nmovidas: {moved}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|