chore: auto-commit (1 archivos)
- tools/ Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,99 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Clasificador de notas sueltas de un vault Obsidian por titulo, via ask_llm (claude-direct).
|
||||
|
||||
Uso:
|
||||
classify_notes.py [Vault] [--apply]
|
||||
Vault por defecto: NotasDeObsidian.
|
||||
Sin --apply: genera el plan (classify_plan_<vault>.json) y muestra distribucion, sin mover.
|
||||
Con --apply: mueve cada nota suelta a su carpeta (in-situ; Obsidian resuelve links por nombre).
|
||||
|
||||
Taxonomia: estudio, tech, hacking, personal, finanzas, proyectos, otros.
|
||||
"""
|
||||
import sys, os, json, re, shutil
|
||||
from collections import Counter
|
||||
|
||||
sys.path.insert(0, "/home/enmanuel/fn_registry/python/functions")
|
||||
from core.ask_llm import ask_llm
|
||||
from obsidian import list_obsidian_notes
|
||||
|
||||
OBS = "/home/enmanuel/Obsidian"
|
||||
CATS = ["estudio", "tech", "hacking", "personal", "finanzas", "proyectos", "otros"]
|
||||
BATCH = 50
|
||||
|
||||
|
||||
def root_notes(vault):
|
||||
vp = f"{OBS}/{vault}"
|
||||
out = []
|
||||
for n in list_obsidian_notes(vp):
|
||||
rel = os.path.relpath(n, vp)
|
||||
if "/" in rel or "/.git/" in n or "/dist/" in n:
|
||||
continue
|
||||
out.append(os.path.basename(n)[:-3])
|
||||
return sorted(set(out))
|
||||
|
||||
|
||||
def classify_batch(titles):
|
||||
listado = "\n".join(f"{i}. {t}" for i, t in enumerate(titles))
|
||||
prompt = (
|
||||
"Clasifica cada nota de Obsidian (vault personal de un dev espanol) en UNA categoria.\n"
|
||||
f"Categorias validas: {', '.join(CATS)}.\n"
|
||||
"Guia: estudio=apuntes de cursos/teoria (data science, matematicas, bases de datos, frameworks). "
|
||||
"tech=dev practico (comandos, docker, linux, git, apis, servidores, herramientas). "
|
||||
"hacking=seguridad ofensiva real (bug bounty, exploits, recon, osint tecnico, web hacking). "
|
||||
"personal=vida personal (citas, salud, tramites, viajes, familia). "
|
||||
"finanzas=banca personal, cripto, trading. proyectos=proyectos propios de software/negocio. "
|
||||
"otros=no encaja.\n"
|
||||
"Devuelve SOLO un objeto JSON {indice: categoria}, indices 0-based, sin texto extra.\n\n"
|
||||
+ listado
|
||||
)
|
||||
raw = ask_llm(prompt, model="claude-haiku-4-5-20251001", echo=False)
|
||||
m = re.search(r'\{.*\}', raw, re.S)
|
||||
if not m:
|
||||
return {}
|
||||
try:
|
||||
return json.loads(m.group(0))
|
||||
except Exception:
|
||||
return {}
|
||||
|
||||
|
||||
def main():
|
||||
args = [a for a in sys.argv[1:] if not a.startswith("--")]
|
||||
vault = args[0] if args else "NotasDeObsidian"
|
||||
apply = "--apply" in sys.argv
|
||||
vp = f"{OBS}/{vault}"
|
||||
plan_path = f"/home/enmanuel/fn_registry/projects/obsidian/tools/classify_plan_{vault}.json"
|
||||
|
||||
titles = root_notes(vault)
|
||||
print(f"{vault}: {len(titles)} notas sueltas en raiz")
|
||||
plan = {}
|
||||
for b in range(0, len(titles), BATCH):
|
||||
chunk = titles[b:b + BATCH]
|
||||
res = classify_batch(chunk)
|
||||
for i, t in enumerate(chunk):
|
||||
cat = str(res.get(str(i), res.get(i, "otros"))).strip().lower()
|
||||
plan[t] = cat if cat in CATS else "otros"
|
||||
json.dump(plan, open(plan_path, "w", encoding="utf-8"), ensure_ascii=False, indent=2)
|
||||
|
||||
c = Counter(plan.values())
|
||||
print("distribucion:", {k: c[k] for k in CATS if c[k]})
|
||||
for cat in CATS:
|
||||
ej = [t for t, cc in plan.items() if cc == cat][:4]
|
||||
if ej:
|
||||
print(f" [{cat}] " + " | ".join(ej))
|
||||
|
||||
if apply:
|
||||
moved = 0
|
||||
for t, cat in plan.items():
|
||||
src = f"{vp}/{t}.md"
|
||||
if not os.path.exists(src):
|
||||
continue
|
||||
dd = f"{vp}/{cat}"; os.makedirs(dd, exist_ok=True)
|
||||
dst = f"{dd}/{t}.md"
|
||||
if os.path.exists(dst):
|
||||
continue
|
||||
shutil.move(src, dst); moved += 1
|
||||
print(f"\nmovidas: {moved}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user