chore: auto-commit (95 archivos)

- cmd/fn/doctor.go
- cmd/fn/main.go
- cpp/apps/primitives_gallery/playground/tables/CMakeLists.txt
- cpp/apps/primitives_gallery/playground/tables/data_table.cpp
- cpp/apps/primitives_gallery/playground/tables/data_table_logic.cpp
- cpp/apps/primitives_gallery/playground/tables/data_table_logic.h
- cpp/apps/primitives_gallery/playground/tables/self_test.cpp
- cpp/apps/primitives_gallery/playground/tables/tql.cpp
- cpp/apps/primitives_gallery/playground/tables/viz.cpp
- cpp/apps/primitives_gallery/playground/tables/viz.h
- ...

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-13 00:50:34 +02:00
parent a2bbf23374
commit e3c8979e8d
189 changed files with 18964 additions and 330 deletions
@@ -0,0 +1,154 @@
"""Tests para vault_dedupe_report."""
from __future__ import annotations
import os
import sqlite3
import sys
from pathlib import Path
import pytest
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
from vault_dedupe_report import vault_dedupe_report
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _make_db(tmp_path: Path, rows: list[tuple]) -> Path:
"""Crea vault_index.db con la tabla files y las filas dadas.
rows: lista de (rel_path, size, sha256)
"""
db_path = tmp_path / "vault_index.db"
conn = sqlite3.connect(str(db_path))
conn.execute(
"""
CREATE TABLE files (
rel_path TEXT PRIMARY KEY,
size INTEGER,
mtime REAL,
sha256 TEXT,
mime TEXT,
ext TEXT,
bucket TEXT,
sub_bucket TEXT,
indexed_at REAL
);
"""
)
conn.executemany(
"INSERT INTO files (rel_path, size, sha256) VALUES (?, ?, ?);",
rows,
)
conn.commit()
conn.close()
return db_path
# ---------------------------------------------------------------------------
# Tests
# ---------------------------------------------------------------------------
def test_no_duplicates(tmp_path):
"""test_no_duplicates — 3 archivos con sha256 distintos -> groups=[]."""
_make_db(tmp_path, [
("a/file1.txt", 100, "aaa111"),
("a/file2.txt", 200, "bbb222"),
("a/file3.txt", 300, "ccc333"),
])
result = vault_dedupe_report(str(tmp_path), db_path=str(tmp_path / "vault_index.db"))
assert result["groups"] == []
assert result["total_groups"] == 0
assert result["total_duplicates"] == 0
assert result["total_reclaimable_bytes"] == 0
assert result["scanned_files"] == 3
assert result["vault_path"] == str(tmp_path)
def test_basic_duplicates(tmp_path):
"""test_basic_duplicates — 2 archivos mismo sha256 -> 1 group, count=2, reclaimable=size."""
_make_db(tmp_path, [
("data/orig.jpg", 500, "deadbeef"),
("backup/orig.jpg", 500, "deadbeef"),
])
result = vault_dedupe_report(str(tmp_path), db_path=str(tmp_path / "vault_index.db"))
assert result["total_groups"] == 1
assert result["total_duplicates"] == 1
assert result["total_reclaimable_bytes"] == 500
g = result["groups"][0]
assert g["sha256"] == "deadbeef"
assert g["size"] == 500
assert g["count"] == 2
assert g["reclaimable_bytes"] == 500
assert sorted(g["files"]) == ["backup/orig.jpg", "data/orig.jpg"]
def test_three_in_group(tmp_path):
"""test_three_in_group — 3 archivos mismo sha256 -> count=3, reclaimable=size*2."""
size = 1000
_make_db(tmp_path, [
("a/f1.bin", size, "cafebabe"),
("b/f2.bin", size, "cafebabe"),
("c/f3.bin", size, "cafebabe"),
])
result = vault_dedupe_report(str(tmp_path), db_path=str(tmp_path / "vault_index.db"))
assert result["total_groups"] == 1
assert result["total_duplicates"] == 2
assert result["total_reclaimable_bytes"] == size * 2
g = result["groups"][0]
assert g["count"] == 3
assert g["reclaimable_bytes"] == size * 2
assert g["files"] == sorted(["a/f1.bin", "b/f2.bin", "c/f3.bin"])
def test_min_size_filter(tmp_path):
"""test_min_size_filter — duplicados de tamano 50, min_size=100 -> groups=[]."""
_make_db(tmp_path, [
("x/small1.txt", 50, "tiny123"),
("y/small2.txt", 50, "tiny123"),
])
result = vault_dedupe_report(
str(tmp_path),
min_size=100,
db_path=str(tmp_path / "vault_index.db"),
)
assert result["groups"] == []
assert result["total_groups"] == 0
assert result["total_reclaimable_bytes"] == 0
assert result["scanned_files"] == 0
def test_multiple_groups_ordered(tmp_path):
"""test_multiple_groups_ordered — 2 grupos con distinto ahorro -> orden DESC."""
# grupo A: 2 copias de 200 bytes -> reclaimable=200
# grupo B: 3 copias de 500 bytes -> reclaimable=1000
# el grupo B debe salir primero
_make_db(tmp_path, [
("p/a1.dat", 200, "groupA"),
("q/a2.dat", 200, "groupA"),
("r/b1.dat", 500, "groupB"),
("s/b2.dat", 500, "groupB"),
("t/b3.dat", 500, "groupB"),
("u/uniq.dat", 999, "unique1"),
])
result = vault_dedupe_report(str(tmp_path), db_path=str(tmp_path / "vault_index.db"))
assert result["total_groups"] == 2
assert result["total_duplicates"] == 3 # (2-1) + (3-1)
assert result["total_reclaimable_bytes"] == 1200 # 200 + 1000
assert result["scanned_files"] == 6 # 6 filas con sha256 != '' (incluye el unico)
# Primer grupo debe ser el de mayor ahorro (B: 1000)
assert result["groups"][0]["sha256"] == "groupB"
assert result["groups"][0]["reclaimable_bytes"] == 1000
assert result["groups"][1]["sha256"] == "groupA"
assert result["groups"][1]["reclaimable_bytes"] == 200
@@ -0,0 +1,153 @@
"""Tests para vault_knowledge_parse."""
from __future__ import annotations
import os
import sqlite3
import sys
from pathlib import Path
import pytest
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
from vault_knowledge_parse import vault_knowledge_parse
def _make_vault(tmp: Path) -> tuple[Path, Path]:
"""Crea un vault mínimo con vault_index.db."""
db = tmp / "vault_index.db"
conn = sqlite3.connect(str(db))
conn.executescript(
"""
CREATE TABLE IF NOT EXISTS files (
rowid INTEGER PRIMARY KEY AUTOINCREMENT,
rel_path TEXT UNIQUE NOT NULL,
size_bytes INTEGER,
ext TEXT
);
CREATE VIRTUAL TABLE IF NOT EXISTS files_fts
USING fts5(rel_path, content_text, content='', contentless_delete=1);
CREATE TABLE IF NOT EXISTS knowledge_docs (
rel_path TEXT PRIMARY KEY,
title TEXT,
frontmatter_json TEXT,
headings_json TEXT,
parsed_at INTEGER
);
"""
)
conn.commit()
conn.close()
return tmp, db
def _insert_file_entry(db: Path, rel_path: str):
conn = sqlite3.connect(str(db))
conn.execute(
"INSERT OR IGNORE INTO files(rel_path, size_bytes, ext) VALUES (?, 0, '.md')",
(rel_path,),
)
conn.commit()
conn.close()
def test_md_with_frontmatter(tmp_path):
vault, db = _make_vault(tmp_path)
rel = "docs/guia.md"
md = vault / rel
md.parent.mkdir(parents=True, exist_ok=True)
md.write_text(
"---\ntitle: Mi Guía\nauthor: Lucas\n---\n\n# Mi Guía\n\nContenido del documento.\n",
encoding="utf-8",
)
_insert_file_entry(db, rel)
result = vault_knowledge_parse(str(vault), rel, db_path=str(db))
assert result["title"] == "Mi Guía"
assert result["frontmatter"]["author"] == "Lucas"
assert "Contenido del documento" in result["content_text"]
assert result["persisted"] is True
def test_md_no_frontmatter(tmp_path):
vault, db = _make_vault(tmp_path)
rel = "docs/sin_fm.md"
md = vault / rel
md.parent.mkdir(parents=True, exist_ok=True)
md.write_text("# Título\n\nCuerpo sin frontmatter.\n", encoding="utf-8")
_insert_file_entry(db, rel)
result = vault_knowledge_parse(str(vault), rel, db_path=str(db))
assert result["frontmatter"] == {}
assert result["title"] == "Título"
assert "Cuerpo sin frontmatter" in result["content_text"]
def test_md_title_from_h1(tmp_path):
vault, db = _make_vault(tmp_path)
rel = "docs/title_h1.md"
md = vault / rel
md.parent.mkdir(parents=True, exist_ok=True)
md.write_text("# Primer H1\n\nAlgún texto.\n", encoding="utf-8")
_insert_file_entry(db, rel)
result = vault_knowledge_parse(str(vault), rel, db_path=str(db))
assert result["title"] == "Primer H1"
def test_md_title_from_filename(tmp_path):
vault, db = _make_vault(tmp_path)
rel = "docs/nombre_archivo.md"
md = vault / rel
md.parent.mkdir(parents=True, exist_ok=True)
md.write_text("Solo texto sin headings ni frontmatter.\n", encoding="utf-8")
_insert_file_entry(db, rel)
result = vault_knowledge_parse(str(vault), rel, db_path=str(db))
assert result["title"] == "nombre_archivo"
def test_md_headings_levels(tmp_path):
vault, db = _make_vault(tmp_path)
rel = "docs/headings.md"
md = vault / rel
md.parent.mkdir(parents=True, exist_ok=True)
md.write_text(
"# H1 Título\n\nTexto.\n\n## H2 Sección\n\n### H3 Subsección\n\n## H2 Otra\n",
encoding="utf-8",
)
_insert_file_entry(db, rel)
result = vault_knowledge_parse(str(vault), rel, db_path=str(db))
headings = result["headings"]
assert len(headings) == 4
levels = [h["level"] for h in headings]
assert levels == [1, 2, 3, 2]
texts = [h["text"] for h in headings]
assert "H1 Título" in texts
assert "H2 Sección" in texts
assert "H3 Subsección" in texts
def test_md_persists_to_fts(tmp_path):
vault, db = _make_vault(tmp_path)
rel = "docs/fts_md.md"
md = vault / rel
md.parent.mkdir(parents=True, exist_ok=True)
md.write_text("# Documento FTS\n\nPalabra clave: xenolito.\n", encoding="utf-8")
_insert_file_entry(db, rel)
vault_knowledge_parse(str(vault), rel, db_path=str(db))
conn = sqlite3.connect(str(db))
# FTS5 contentless: no permite SELECT directo, usar MATCH
row = conn.execute(
"SELECT rowid FROM files_fts WHERE files_fts MATCH 'xenolito'",
).fetchone()
conn.close()
assert row is not None, "FTS no encontró 'xenolito'"