chore: auto-commit (95 archivos)

- cmd/fn/doctor.go
- cmd/fn/main.go
- cpp/apps/primitives_gallery/playground/tables/CMakeLists.txt
- cpp/apps/primitives_gallery/playground/tables/data_table.cpp
- cpp/apps/primitives_gallery/playground/tables/data_table_logic.cpp
- cpp/apps/primitives_gallery/playground/tables/data_table_logic.h
- cpp/apps/primitives_gallery/playground/tables/self_test.cpp
- cpp/apps/primitives_gallery/playground/tables/tql.cpp
- cpp/apps/primitives_gallery/playground/tables/viz.cpp
- cpp/apps/primitives_gallery/playground/tables/viz.h
- ...

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-13 00:50:34 +02:00
parent a2bbf23374
commit e3c8979e8d
189 changed files with 18964 additions and 330 deletions
@@ -0,0 +1,161 @@
"""Tests para vault_csv_profile."""
from __future__ import annotations
import os
import sqlite3
import sys
import tempfile
from pathlib import Path
import pytest
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
from vault_csv_profile import vault_csv_profile
def _make_vault(tmp: Path) -> tuple[Path, Path]:
"""Crea un vault mínimo con vault_index.db y tabla files + files_fts + csv_profiles."""
db = tmp / "vault_index.db"
conn = sqlite3.connect(str(db))
conn.executescript(
"""
CREATE TABLE IF NOT EXISTS files (
rowid INTEGER PRIMARY KEY AUTOINCREMENT,
rel_path TEXT UNIQUE NOT NULL,
size_bytes INTEGER,
ext TEXT
);
CREATE VIRTUAL TABLE IF NOT EXISTS files_fts
USING fts5(rel_path, content_text, content='', contentless_delete=1);
CREATE TABLE IF NOT EXISTS csv_profiles (
rel_path TEXT PRIMARY KEY,
cols_json TEXT,
n_rows INTEGER,
encoding TEXT,
date_min TEXT,
date_max TEXT,
profiled_at INTEGER
);
"""
)
conn.commit()
conn.close()
return tmp, db
def _insert_file_entry(db: Path, rel_path: str):
"""Inserta entrada en files para que files_fts tenga rowid válido."""
conn = sqlite3.connect(str(db))
conn.execute(
"INSERT OR IGNORE INTO files(rel_path, size_bytes, ext) VALUES (?, 0, '.csv')",
(rel_path,),
)
conn.commit()
conn.close()
def test_csv_basic(tmp_path):
vault, db = _make_vault(tmp_path)
rel = "data/basic.csv"
csv_file = vault / rel
csv_file.parent.mkdir(parents=True, exist_ok=True)
csv_file.write_text("nombre,edad,score\nAna,30,9.5\nBob,25,8.0\nCarla,35,7.5\n", encoding="utf-8")
_insert_file_entry(db, rel)
result = vault_csv_profile(str(vault), rel, db_path=str(db))
assert result["rel_path"] == rel
assert result["n_rows"] == 3
assert len(result["cols"]) == 3
col_names = [c["name"] for c in result["cols"]]
assert "nombre" in col_names
assert "edad" in col_names
assert "score" in col_names
assert result["persisted"] is True
# Verificar persistencia en csv_profiles
conn = sqlite3.connect(str(db))
row = conn.execute("SELECT n_rows FROM csv_profiles WHERE rel_path = ?", (rel,)).fetchone()
conn.close()
assert row is not None
assert row[0] == 3
def test_csv_date_detection(tmp_path):
vault, db = _make_vault(tmp_path)
rel = "data/fechas.csv"
csv_file = vault / rel
csv_file.parent.mkdir(parents=True, exist_ok=True)
csv_file.write_text(
"fecha,valor\n2023-01-01,100\n2023-06-15,200\n2023-12-31,300\n",
encoding="utf-8",
)
_insert_file_entry(db, rel)
result = vault_csv_profile(str(vault), rel, db_path=str(db))
assert result["date_min"] is not None
assert result["date_max"] is not None
assert result["date_min"] <= "2023-01-01"
assert result["date_max"] >= "2023-12-31"
def test_csv_encoding_latin1(tmp_path):
vault, db = _make_vault(tmp_path)
rel = "data/tildes.csv"
csv_file = vault / rel
csv_file.parent.mkdir(parents=True, exist_ok=True)
csv_file.write_bytes(
"ciudad,poblacion\nMálaga,500000\nCórdoba,320000\n".encode("latin-1")
)
_insert_file_entry(db, rel)
result = vault_csv_profile(str(vault), rel, db_path=str(db))
assert result["n_rows"] == 2
assert result["encoding"] != "utf-8?"
# encoding detectado (algún valor no vacío)
assert result["encoding"]
assert result["persisted"] is True
def test_csv_empty(tmp_path):
vault, db = _make_vault(tmp_path)
rel = "data/empty.csv"
csv_file = vault / rel
csv_file.parent.mkdir(parents=True, exist_ok=True)
csv_file.write_text("", encoding="utf-8")
_insert_file_entry(db, rel)
result = vault_csv_profile(str(vault), rel, db_path=str(db))
assert result["n_rows"] == 0
assert result["cols"] == []
assert result["date_min"] is None
assert result["date_max"] is None
def test_csv_persists_fts(tmp_path):
"""FTS5 contentless: verifica que las columnas son buscables con MATCH."""
vault, db = _make_vault(tmp_path)
rel = "data/fts_test.csv"
csv_file = vault / rel
csv_file.parent.mkdir(parents=True, exist_ok=True)
csv_file.write_text("producto,precio\nManzana,1.5\nPera,2.0\n", encoding="utf-8")
_insert_file_entry(db, rel)
vault_csv_profile(str(vault), rel, db_path=str(db))
conn = sqlite3.connect(str(db))
# FTS5 contentless no permite SELECT directo — usar MATCH para verificar indexado
row_prod = conn.execute(
"SELECT rowid FROM files_fts WHERE files_fts MATCH 'producto'",
).fetchone()
row_prec = conn.execute(
"SELECT rowid FROM files_fts WHERE files_fts MATCH 'precio'",
).fetchone()
conn.close()
assert row_prod is not None, "FTS no encontró 'producto'"
assert row_prec is not None, "FTS no encontró 'precio'"
@@ -0,0 +1,147 @@
"""Tests para vault_pdf_extract."""
from __future__ import annotations
import os
import sqlite3
import sys
from pathlib import Path
import pytest
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
from vault_pdf_extract import vault_pdf_extract
def _make_vault(tmp: Path) -> tuple[Path, Path]:
"""Crea un vault mínimo con vault_index.db."""
db = tmp / "vault_index.db"
conn = sqlite3.connect(str(db))
conn.executescript(
"""
CREATE TABLE IF NOT EXISTS files (
rowid INTEGER PRIMARY KEY AUTOINCREMENT,
rel_path TEXT UNIQUE NOT NULL,
size_bytes INTEGER,
ext TEXT
);
CREATE VIRTUAL TABLE IF NOT EXISTS files_fts
USING fts5(rel_path, content_text, content='', contentless_delete=1);
CREATE TABLE IF NOT EXISTS pdf_extracts (
rel_path TEXT PRIMARY KEY,
page_count INTEGER,
text_len INTEGER,
extracted_to TEXT,
extracted_at INTEGER
);
"""
)
conn.commit()
conn.close()
return tmp, db
def _insert_file_entry(db: Path, rel_path: str):
conn = sqlite3.connect(str(db))
conn.execute(
"INSERT OR IGNORE INTO files(rel_path, size_bytes, ext) VALUES (?, 0, '.pdf')",
(rel_path,),
)
conn.commit()
conn.close()
def _make_pdf(path: Path, text: str = "Hello vault PDF.\nPage two content."):
"""Crea un PDF mínimo con fitz para tests."""
import fitz
doc = fitz.open()
page = doc.new_page()
page.insert_text((72, 72), text)
doc.save(str(path))
doc.close()
def test_pdf_extract_basic(tmp_path):
vault, db = _make_vault(tmp_path)
rel = "docs/test.pdf"
pdf = vault / rel
pdf.parent.mkdir(parents=True, exist_ok=True)
_make_pdf(pdf)
_insert_file_entry(db, rel)
result = vault_pdf_extract(str(vault), rel, db_path=str(db))
assert result["rel_path"] == rel
assert result["page_count"] >= 1
assert result["text_len"] > 0
assert result["persisted"] is True
conn = sqlite3.connect(str(db))
row = conn.execute("SELECT page_count, text_len FROM pdf_extracts WHERE rel_path=?", (rel,)).fetchone()
conn.close()
assert row is not None
assert row[0] >= 1
assert row[1] > 0
def test_pdf_dump_text_creates_file(tmp_path):
vault, db = _make_vault(tmp_path)
rel = "docs/dump.pdf"
pdf = vault / rel
pdf.parent.mkdir(parents=True, exist_ok=True)
_make_pdf(pdf, "Contenido para dump a disco.")
_insert_file_entry(db, rel)
# Crear data/processed/ para que se use ese directorio
(vault / "data" / "processed").mkdir(parents=True, exist_ok=True)
result = vault_pdf_extract(str(vault), rel, db_path=str(db), dump_text=True)
assert result["extracted_to"] is not None
txt_path = vault / result["extracted_to"]
assert txt_path.exists()
assert txt_path.stat().st_size > 0
def test_pdf_no_dump(tmp_path):
vault, db = _make_vault(tmp_path)
rel = "docs/nodump.pdf"
pdf = vault / rel
pdf.parent.mkdir(parents=True, exist_ok=True)
_make_pdf(pdf, "No se debe volcar a disco.")
_insert_file_entry(db, rel)
result = vault_pdf_extract(str(vault), rel, db_path=str(db), dump_text=False)
assert result["extracted_to"] is None
def test_pdf_persists_to_fts(tmp_path):
vault, db = _make_vault(tmp_path)
rel = "docs/fts.pdf"
pdf = vault / rel
pdf.parent.mkdir(parents=True, exist_ok=True)
_make_pdf(pdf, "Texto especial para FTS xyzpdftest.")
_insert_file_entry(db, rel)
vault_pdf_extract(str(vault), rel, db_path=str(db), dump_text=False)
conn = sqlite3.connect(str(db))
# FTS5 contentless: no permite SELECT directo, usar MATCH
row = conn.execute(
"SELECT rowid FROM files_fts WHERE files_fts MATCH 'xyzpdftest'",
).fetchone()
conn.close()
assert row is not None, "FTS no encontró el texto del PDF"
def test_pdf_corrupt_errors(tmp_path):
vault, db = _make_vault(tmp_path)
rel = "docs/corrupt.pdf"
pdf = vault / rel
pdf.parent.mkdir(parents=True, exist_ok=True)
pdf.write_bytes(b"%PDF-1.4 garbage bytes \x00\x01\x02 not a real pdf")
_insert_file_entry(db, rel)
with pytest.raises(RuntimeError, match="corrupto|inválido|PDF"):
vault_pdf_extract(str(vault), rel, db_path=str(db))