Files
fn_registry/python/functions/infra/scan_directory_test.py
T
egutierrez 5a324f6554 feat: funciones Python infra y tipos Python (core, datascience, infra)
Infra: cache_to_file, cache_to_sqlite, http_download_file, http_get_json,
http_post_json, read_file_with_encoding, safe_extract_zip, scan_directory,
setup_logger, normalize_zip_filenames.
Tipos: 30+ tipos core (agent_action, context, task, message, parse_result...),
6 tipos datascience (entity_candidate, extraction_result...), 2 tipos infra.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-05 17:11:43 +02:00

182 lines
7.0 KiB
Python

"""Tests para scan_directory."""
import os
import sys
import tempfile
from pathlib import Path
# Asegurar que los modulos del mismo directorio y tipos se puedan importar
_HERE = Path(__file__).parent
_TYPES_INFRA = Path(__file__).parent.parent.parent / "types" / "infra"
for _p in [str(_HERE), str(_TYPES_INFRA)]:
if _p not in sys.path:
sys.path.insert(0, _p)
from scan_directory import scan_directory # noqa: E402
def _make_tree(base: Path, structure: dict) -> None:
"""Crea un arbol de archivos/dirs a partir de un dict {rel_path: content}."""
for rel, content in structure.items():
path = base / rel
path.parent.mkdir(parents=True, exist_ok=True)
if content is None:
path.mkdir(parents=True, exist_ok=True)
else:
path.write_text(content, encoding="utf-8")
# ---------------------------------------------------------------------------
# Test: directorio con mezcla de archivos
# ---------------------------------------------------------------------------
def test_directorio_con_mezcla_de_archivos():
with tempfile.TemporaryDirectory() as tmp:
root = Path(tmp)
_make_tree(root, {
"report.pdf": "pdf content",
"notes.md": "# Notes",
"image.png": "png content",
"data.csv": "a,b,c",
})
result = scan_directory(str(root), supported_extensions={".pdf", ".md"})
rel_paths = [f.rel_path for f in result.processable]
assert "notes.md" in rel_paths, f"notes.md no en processable: {rel_paths}"
assert "report.pdf" in rel_paths, f"report.pdf no en processable: {rel_paths}"
unsup_paths = [f.rel_path for f in result.unsupported]
assert "image.png" in unsup_paths, f"image.png no en unsupported: {unsup_paths}"
assert "data.csv" in unsup_paths, f"data.csv no en unsupported: {unsup_paths}"
assert all(f.classification == "processable" for f in result.processable)
assert all(f.classification == "unsupported" for f in result.unsupported)
# ---------------------------------------------------------------------------
# Test: directorio con dot files
# ---------------------------------------------------------------------------
def test_directorio_con_dot_files():
with tempfile.TemporaryDirectory() as tmp:
root = Path(tmp)
_make_tree(root, {
"visible.txt": "content",
".hidden": "hidden content",
".env": "SECRET=x",
})
result = scan_directory(str(root))
all_paths = [f.rel_path for f in result.processable + result.unsupported]
assert ".hidden" not in all_paths, f".hidden no deberia aparecer: {all_paths}"
assert ".env" not in all_paths, f".env no deberia aparecer: {all_paths}"
assert "visible.txt" in all_paths, f"visible.txt deberia aparecer: {all_paths}"
skipped_paths = " ".join(result.skipped)
assert ".hidden" in skipped_paths or ".env" in skipped_paths
# ---------------------------------------------------------------------------
# Test: directorio con subdirs ignorados
# ---------------------------------------------------------------------------
def test_directorio_con_subdirs_ignorados():
with tempfile.TemporaryDirectory() as tmp:
root = Path(tmp)
_make_tree(root, {
"main.py": "print('hello')",
"__pycache__/module.pyc": "bytecode",
"node_modules/lib/index.js": "// js",
".git/config": "[core]",
"src/utils.py": "def f(): pass",
})
result = scan_directory(str(root))
all_rels = [f.rel_path for f in result.processable + result.unsupported]
# Archivos dentro de dirs ignorados no deben aparecer
assert not any("__pycache__" in r for r in all_rels), \
f"__pycache__ no deberia estar en resultados: {all_rels}"
assert not any("node_modules" in r for r in all_rels), \
f"node_modules no deberia estar en resultados: {all_rels}"
assert not any(".git" in r for r in all_rels), \
f".git no deberia estar en resultados: {all_rels}"
# Archivos fuera de dirs ignorados si deben aparecer
assert "main.py" in all_rels, f"main.py deberia estar: {all_rels}"
assert "src/utils.py" in all_rels, f"src/utils.py deberia estar: {all_rels}"
# ---------------------------------------------------------------------------
# Test: filtros include/exclude
# ---------------------------------------------------------------------------
def test_filtros_include_exclude():
with tempfile.TemporaryDirectory() as tmp:
root = Path(tmp)
_make_tree(root, {
"report.pdf": "content",
"notes.md": "notes",
"image.png": "image",
"drafts/draft.md": "draft",
"temp.tmp": "tmp",
})
# Solo incluir .pdf y .md
result = scan_directory(str(root), include="*.pdf,*.md")
all_rels = [f.rel_path for f in result.processable + result.unsupported]
assert "image.png" not in all_rels, f"image.png no deberia incluirse: {all_rels}"
assert "temp.tmp" not in all_rels, f"temp.tmp no deberia incluirse: {all_rels}"
assert "report.pdf" in all_rels
assert "notes.md" in all_rels
# Excluir path prefix drafts/ y extension .tmp
result2 = scan_directory(str(root), exclude="drafts/,*.tmp")
all_rels2 = [f.rel_path for f in result2.processable + result2.unsupported]
assert "drafts/draft.md" not in all_rels2, \
f"drafts/draft.md no deberia incluirse: {all_rels2}"
assert "temp.tmp" not in all_rels2, f"temp.tmp no deberia incluirse: {all_rels2}"
assert "report.pdf" in all_rels2
# ---------------------------------------------------------------------------
# Test: modo strict
# ---------------------------------------------------------------------------
def test_modo_strict():
with tempfile.TemporaryDirectory() as tmp:
root = Path(tmp)
_make_tree(root, {
"doc.pdf": "content",
"image.png": "image",
})
# strict=False no lanza error aunque haya unsupported
result = scan_directory(str(root), supported_extensions={".pdf"}, strict=False)
assert len(result.unsupported) == 1
# strict=True lanza ValueError
raised = False
try:
scan_directory(str(root), supported_extensions={".pdf"}, strict=True)
except ValueError:
raised = True
assert raised, "strict=True deberia lanzar ValueError cuando hay unsupported"
if __name__ == "__main__":
test_directorio_con_mezcla_de_archivos()
print("PASS: directorio con mezcla de archivos")
test_directorio_con_dot_files()
print("PASS: directorio con dot files")
test_directorio_con_subdirs_ignorados()
print("PASS: directorio con subdirs ignorados")
test_filtros_include_exclude()
print("PASS: filtros include/exclude")
test_modo_strict()
print("PASS: modo strict")
print("\nAll tests passed.")