chore: auto-commit (95 archivos)
- cmd/fn/doctor.go - cmd/fn/main.go - cpp/apps/primitives_gallery/playground/tables/CMakeLists.txt - cpp/apps/primitives_gallery/playground/tables/data_table.cpp - cpp/apps/primitives_gallery/playground/tables/data_table_logic.cpp - cpp/apps/primitives_gallery/playground/tables/data_table_logic.h - cpp/apps/primitives_gallery/playground/tables/self_test.cpp - cpp/apps/primitives_gallery/playground/tables/tql.cpp - cpp/apps/primitives_gallery/playground/tables/viz.cpp - cpp/apps/primitives_gallery/playground/tables/viz.h - ... Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,154 @@
|
||||
"""Tests para vault_dedupe_report."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sqlite3
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
||||
from vault_dedupe_report import vault_dedupe_report
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _make_db(tmp_path: Path, rows: list[tuple]) -> Path:
|
||||
"""Crea vault_index.db con la tabla files y las filas dadas.
|
||||
|
||||
rows: lista de (rel_path, size, sha256)
|
||||
"""
|
||||
db_path = tmp_path / "vault_index.db"
|
||||
conn = sqlite3.connect(str(db_path))
|
||||
conn.execute(
|
||||
"""
|
||||
CREATE TABLE files (
|
||||
rel_path TEXT PRIMARY KEY,
|
||||
size INTEGER,
|
||||
mtime REAL,
|
||||
sha256 TEXT,
|
||||
mime TEXT,
|
||||
ext TEXT,
|
||||
bucket TEXT,
|
||||
sub_bucket TEXT,
|
||||
indexed_at REAL
|
||||
);
|
||||
"""
|
||||
)
|
||||
conn.executemany(
|
||||
"INSERT INTO files (rel_path, size, sha256) VALUES (?, ?, ?);",
|
||||
rows,
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
return db_path
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_no_duplicates(tmp_path):
|
||||
"""test_no_duplicates — 3 archivos con sha256 distintos -> groups=[]."""
|
||||
_make_db(tmp_path, [
|
||||
("a/file1.txt", 100, "aaa111"),
|
||||
("a/file2.txt", 200, "bbb222"),
|
||||
("a/file3.txt", 300, "ccc333"),
|
||||
])
|
||||
result = vault_dedupe_report(str(tmp_path), db_path=str(tmp_path / "vault_index.db"))
|
||||
|
||||
assert result["groups"] == []
|
||||
assert result["total_groups"] == 0
|
||||
assert result["total_duplicates"] == 0
|
||||
assert result["total_reclaimable_bytes"] == 0
|
||||
assert result["scanned_files"] == 3
|
||||
assert result["vault_path"] == str(tmp_path)
|
||||
|
||||
|
||||
def test_basic_duplicates(tmp_path):
|
||||
"""test_basic_duplicates — 2 archivos mismo sha256 -> 1 group, count=2, reclaimable=size."""
|
||||
_make_db(tmp_path, [
|
||||
("data/orig.jpg", 500, "deadbeef"),
|
||||
("backup/orig.jpg", 500, "deadbeef"),
|
||||
])
|
||||
result = vault_dedupe_report(str(tmp_path), db_path=str(tmp_path / "vault_index.db"))
|
||||
|
||||
assert result["total_groups"] == 1
|
||||
assert result["total_duplicates"] == 1
|
||||
assert result["total_reclaimable_bytes"] == 500
|
||||
|
||||
g = result["groups"][0]
|
||||
assert g["sha256"] == "deadbeef"
|
||||
assert g["size"] == 500
|
||||
assert g["count"] == 2
|
||||
assert g["reclaimable_bytes"] == 500
|
||||
assert sorted(g["files"]) == ["backup/orig.jpg", "data/orig.jpg"]
|
||||
|
||||
|
||||
def test_three_in_group(tmp_path):
|
||||
"""test_three_in_group — 3 archivos mismo sha256 -> count=3, reclaimable=size*2."""
|
||||
size = 1000
|
||||
_make_db(tmp_path, [
|
||||
("a/f1.bin", size, "cafebabe"),
|
||||
("b/f2.bin", size, "cafebabe"),
|
||||
("c/f3.bin", size, "cafebabe"),
|
||||
])
|
||||
result = vault_dedupe_report(str(tmp_path), db_path=str(tmp_path / "vault_index.db"))
|
||||
|
||||
assert result["total_groups"] == 1
|
||||
assert result["total_duplicates"] == 2
|
||||
assert result["total_reclaimable_bytes"] == size * 2
|
||||
|
||||
g = result["groups"][0]
|
||||
assert g["count"] == 3
|
||||
assert g["reclaimable_bytes"] == size * 2
|
||||
assert g["files"] == sorted(["a/f1.bin", "b/f2.bin", "c/f3.bin"])
|
||||
|
||||
|
||||
def test_min_size_filter(tmp_path):
|
||||
"""test_min_size_filter — duplicados de tamano 50, min_size=100 -> groups=[]."""
|
||||
_make_db(tmp_path, [
|
||||
("x/small1.txt", 50, "tiny123"),
|
||||
("y/small2.txt", 50, "tiny123"),
|
||||
])
|
||||
result = vault_dedupe_report(
|
||||
str(tmp_path),
|
||||
min_size=100,
|
||||
db_path=str(tmp_path / "vault_index.db"),
|
||||
)
|
||||
|
||||
assert result["groups"] == []
|
||||
assert result["total_groups"] == 0
|
||||
assert result["total_reclaimable_bytes"] == 0
|
||||
assert result["scanned_files"] == 0
|
||||
|
||||
|
||||
def test_multiple_groups_ordered(tmp_path):
|
||||
"""test_multiple_groups_ordered — 2 grupos con distinto ahorro -> orden DESC."""
|
||||
# grupo A: 2 copias de 200 bytes -> reclaimable=200
|
||||
# grupo B: 3 copias de 500 bytes -> reclaimable=1000
|
||||
# el grupo B debe salir primero
|
||||
_make_db(tmp_path, [
|
||||
("p/a1.dat", 200, "groupA"),
|
||||
("q/a2.dat", 200, "groupA"),
|
||||
("r/b1.dat", 500, "groupB"),
|
||||
("s/b2.dat", 500, "groupB"),
|
||||
("t/b3.dat", 500, "groupB"),
|
||||
("u/uniq.dat", 999, "unique1"),
|
||||
])
|
||||
result = vault_dedupe_report(str(tmp_path), db_path=str(tmp_path / "vault_index.db"))
|
||||
|
||||
assert result["total_groups"] == 2
|
||||
assert result["total_duplicates"] == 3 # (2-1) + (3-1)
|
||||
assert result["total_reclaimable_bytes"] == 1200 # 200 + 1000
|
||||
assert result["scanned_files"] == 6 # 6 filas con sha256 != '' (incluye el unico)
|
||||
|
||||
# Primer grupo debe ser el de mayor ahorro (B: 1000)
|
||||
assert result["groups"][0]["sha256"] == "groupB"
|
||||
assert result["groups"][0]["reclaimable_bytes"] == 1000
|
||||
assert result["groups"][1]["sha256"] == "groupA"
|
||||
assert result["groups"][1]["reclaimable_bytes"] == 200
|
||||
@@ -0,0 +1,153 @@
|
||||
"""Tests para vault_knowledge_parse."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sqlite3
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
||||
from vault_knowledge_parse import vault_knowledge_parse
|
||||
|
||||
|
||||
def _make_vault(tmp: Path) -> tuple[Path, Path]:
|
||||
"""Crea un vault mínimo con vault_index.db."""
|
||||
db = tmp / "vault_index.db"
|
||||
conn = sqlite3.connect(str(db))
|
||||
conn.executescript(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS files (
|
||||
rowid INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
rel_path TEXT UNIQUE NOT NULL,
|
||||
size_bytes INTEGER,
|
||||
ext TEXT
|
||||
);
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS files_fts
|
||||
USING fts5(rel_path, content_text, content='', contentless_delete=1);
|
||||
CREATE TABLE IF NOT EXISTS knowledge_docs (
|
||||
rel_path TEXT PRIMARY KEY,
|
||||
title TEXT,
|
||||
frontmatter_json TEXT,
|
||||
headings_json TEXT,
|
||||
parsed_at INTEGER
|
||||
);
|
||||
"""
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
return tmp, db
|
||||
|
||||
|
||||
def _insert_file_entry(db: Path, rel_path: str):
|
||||
conn = sqlite3.connect(str(db))
|
||||
conn.execute(
|
||||
"INSERT OR IGNORE INTO files(rel_path, size_bytes, ext) VALUES (?, 0, '.md')",
|
||||
(rel_path,),
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
def test_md_with_frontmatter(tmp_path):
|
||||
vault, db = _make_vault(tmp_path)
|
||||
rel = "docs/guia.md"
|
||||
md = vault / rel
|
||||
md.parent.mkdir(parents=True, exist_ok=True)
|
||||
md.write_text(
|
||||
"---\ntitle: Mi Guía\nauthor: Lucas\n---\n\n# Mi Guía\n\nContenido del documento.\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
_insert_file_entry(db, rel)
|
||||
|
||||
result = vault_knowledge_parse(str(vault), rel, db_path=str(db))
|
||||
|
||||
assert result["title"] == "Mi Guía"
|
||||
assert result["frontmatter"]["author"] == "Lucas"
|
||||
assert "Contenido del documento" in result["content_text"]
|
||||
assert result["persisted"] is True
|
||||
|
||||
|
||||
def test_md_no_frontmatter(tmp_path):
|
||||
vault, db = _make_vault(tmp_path)
|
||||
rel = "docs/sin_fm.md"
|
||||
md = vault / rel
|
||||
md.parent.mkdir(parents=True, exist_ok=True)
|
||||
md.write_text("# Título\n\nCuerpo sin frontmatter.\n", encoding="utf-8")
|
||||
_insert_file_entry(db, rel)
|
||||
|
||||
result = vault_knowledge_parse(str(vault), rel, db_path=str(db))
|
||||
|
||||
assert result["frontmatter"] == {}
|
||||
assert result["title"] == "Título"
|
||||
assert "Cuerpo sin frontmatter" in result["content_text"]
|
||||
|
||||
|
||||
def test_md_title_from_h1(tmp_path):
|
||||
vault, db = _make_vault(tmp_path)
|
||||
rel = "docs/title_h1.md"
|
||||
md = vault / rel
|
||||
md.parent.mkdir(parents=True, exist_ok=True)
|
||||
md.write_text("# Primer H1\n\nAlgún texto.\n", encoding="utf-8")
|
||||
_insert_file_entry(db, rel)
|
||||
|
||||
result = vault_knowledge_parse(str(vault), rel, db_path=str(db))
|
||||
|
||||
assert result["title"] == "Primer H1"
|
||||
|
||||
|
||||
def test_md_title_from_filename(tmp_path):
|
||||
vault, db = _make_vault(tmp_path)
|
||||
rel = "docs/nombre_archivo.md"
|
||||
md = vault / rel
|
||||
md.parent.mkdir(parents=True, exist_ok=True)
|
||||
md.write_text("Solo texto sin headings ni frontmatter.\n", encoding="utf-8")
|
||||
_insert_file_entry(db, rel)
|
||||
|
||||
result = vault_knowledge_parse(str(vault), rel, db_path=str(db))
|
||||
|
||||
assert result["title"] == "nombre_archivo"
|
||||
|
||||
|
||||
def test_md_headings_levels(tmp_path):
|
||||
vault, db = _make_vault(tmp_path)
|
||||
rel = "docs/headings.md"
|
||||
md = vault / rel
|
||||
md.parent.mkdir(parents=True, exist_ok=True)
|
||||
md.write_text(
|
||||
"# H1 Título\n\nTexto.\n\n## H2 Sección\n\n### H3 Subsección\n\n## H2 Otra\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
_insert_file_entry(db, rel)
|
||||
|
||||
result = vault_knowledge_parse(str(vault), rel, db_path=str(db))
|
||||
|
||||
headings = result["headings"]
|
||||
assert len(headings) == 4
|
||||
levels = [h["level"] for h in headings]
|
||||
assert levels == [1, 2, 3, 2]
|
||||
texts = [h["text"] for h in headings]
|
||||
assert "H1 Título" in texts
|
||||
assert "H2 Sección" in texts
|
||||
assert "H3 Subsección" in texts
|
||||
|
||||
|
||||
def test_md_persists_to_fts(tmp_path):
|
||||
vault, db = _make_vault(tmp_path)
|
||||
rel = "docs/fts_md.md"
|
||||
md = vault / rel
|
||||
md.parent.mkdir(parents=True, exist_ok=True)
|
||||
md.write_text("# Documento FTS\n\nPalabra clave: xenolito.\n", encoding="utf-8")
|
||||
_insert_file_entry(db, rel)
|
||||
|
||||
vault_knowledge_parse(str(vault), rel, db_path=str(db))
|
||||
|
||||
conn = sqlite3.connect(str(db))
|
||||
# FTS5 contentless: no permite SELECT directo, usar MATCH
|
||||
row = conn.execute(
|
||||
"SELECT rowid FROM files_fts WHERE files_fts MATCH 'xenolito'",
|
||||
).fetchone()
|
||||
conn.close()
|
||||
assert row is not None, "FTS no encontró 'xenolito'"
|
||||
Reference in New Issue
Block a user