feat(infra): auto-commit con 88 cambios
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,92 @@
|
||||
"""Tests para extract_pdf_metadata."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pypdf import PdfWriter
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
||||
|
||||
from cybersecurity.extract_pdf_metadata import extract_pdf_metadata
|
||||
|
||||
|
||||
def _make_pdf_with_metadata(path: str) -> None:
|
||||
"""Crea un PDF de 2 paginas con doc info (titulo, autor, fechas)."""
|
||||
writer = PdfWriter()
|
||||
writer.add_blank_page(width=200, height=200)
|
||||
writer.add_blank_page(width=200, height=200)
|
||||
writer.add_metadata(
|
||||
{
|
||||
"/Title": "Documento OSINT",
|
||||
"/Author": "Enmanuel G.",
|
||||
"/Creator": "PyTestRig",
|
||||
"/Producer": "pypdf",
|
||||
"/CreationDate": "D:20240311102200+01'00'",
|
||||
"/ModDate": "D:20240312113000+01'00'",
|
||||
}
|
||||
)
|
||||
with open(path, "wb") as fh:
|
||||
writer.write(fh)
|
||||
|
||||
|
||||
def _make_pdf_without_metadata(path: str) -> None:
|
||||
"""Crea un PDF de 1 pagina sin doc info."""
|
||||
writer = PdfWriter()
|
||||
writer.add_blank_page(width=100, height=100)
|
||||
with open(path, "wb") as fh:
|
||||
writer.write(fh)
|
||||
|
||||
|
||||
def test_pdf_con_metadatos_devuelve_titulo_autor_paginas(tmp_path):
|
||||
"""PDF con metadatos devuelve titulo, autor y num_pages."""
|
||||
p = str(tmp_path / "withmeta.pdf")
|
||||
_make_pdf_with_metadata(p)
|
||||
|
||||
meta = extract_pdf_metadata(p)
|
||||
|
||||
assert meta["error"] is None
|
||||
assert meta["title"] == "Documento OSINT"
|
||||
assert meta["author"] == "Enmanuel G."
|
||||
assert meta["creator"] == "PyTestRig"
|
||||
assert meta["producer"] == "pypdf"
|
||||
assert meta["num_pages"] == 2
|
||||
assert meta["raw"] # no vacio
|
||||
|
||||
|
||||
def test_pdf_sin_doc_info_devuelve_none_sin_petar(tmp_path):
|
||||
"""PDF sin doc info devuelve campos None sin petar."""
|
||||
p = str(tmp_path / "nometa.pdf")
|
||||
_make_pdf_without_metadata(p)
|
||||
|
||||
meta = extract_pdf_metadata(p)
|
||||
|
||||
assert meta["error"] is None
|
||||
assert meta["num_pages"] == 1
|
||||
assert meta["title"] is None
|
||||
assert meta["author"] is None
|
||||
|
||||
|
||||
def test_fechas_parseables_en_iso_8601(tmp_path):
|
||||
"""fechas parseables se devuelven en ISO 8601."""
|
||||
p = str(tmp_path / "dates.pdf")
|
||||
_make_pdf_with_metadata(p)
|
||||
|
||||
meta = extract_pdf_metadata(p)
|
||||
|
||||
# pypdf parsea D:YYYYMMDDHHmmSS a datetime; isoformat() lleva 'T'.
|
||||
assert meta["creation_date"] is not None
|
||||
assert "2024-03-11" in meta["creation_date"]
|
||||
assert "T" in meta["creation_date"]
|
||||
assert meta["mod_date"] is not None
|
||||
assert "2024-03-12" in meta["mod_date"]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
with tempfile.TemporaryDirectory() as d:
|
||||
test_pdf_con_metadatos_devuelve_titulo_autor_paginas(Path(d))
|
||||
test_pdf_sin_doc_info_devuelve_none_sin_petar(Path(d))
|
||||
test_fechas_parseables_en_iso_8601(Path(d))
|
||||
print("Todos los tests pasaron.")
|
||||
Reference in New Issue
Block a user