eb8dbf66a1
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
100 lines
3.3 KiB
Python
100 lines
3.3 KiB
Python
"""Extrae metadatos EXIF de una imagen (OSINT pasiva sobre documentos propios)."""
|
|
|
|
from PIL import ExifTags, Image
|
|
|
|
# Mapa inverso nombre -> id no hace falta: usamos ExifTags.TAGS (id -> nombre).
|
|
_GPS_TAGS = ExifTags.GPSTAGS # id -> nombre para el sub-IFD de GPS.
|
|
|
|
|
|
def _to_degrees(value) -> float | None:
|
|
"""Convierte una coordenada GPS en formato DMS (grados, minutos, segundos) a grados decimales.
|
|
|
|
Pillow devuelve cada componente como un IFDRational o una tupla (num, den).
|
|
"""
|
|
try:
|
|
d, m, s = value
|
|
return float(d) + float(m) / 60.0 + float(s) / 3600.0
|
|
except (TypeError, ValueError, ZeroDivisionError):
|
|
return None
|
|
|
|
|
|
def _extract_gps(gps_info: dict) -> tuple[float | None, float | None]:
|
|
"""Devuelve (lat, lon) en grados decimales desde el sub-IFD GPSInfo, o (None, None)."""
|
|
if not gps_info:
|
|
return None, None
|
|
|
|
named = {_GPS_TAGS.get(k, k): v for k, v in gps_info.items()}
|
|
|
|
lat = _to_degrees(named.get("GPSLatitude")) if "GPSLatitude" in named else None
|
|
lon = _to_degrees(named.get("GPSLongitude")) if "GPSLongitude" in named else None
|
|
|
|
if lat is not None and str(named.get("GPSLatitudeRef", "N")).upper() == "S":
|
|
lat = -lat
|
|
if lon is not None and str(named.get("GPSLongitudeRef", "E")).upper() == "W":
|
|
lon = -lon
|
|
|
|
return lat, lon
|
|
|
|
|
|
def extract_exif_metadata(image_path: str) -> dict:
|
|
"""Lee los metadatos EXIF de una imagen y los devuelve normalizados.
|
|
|
|
Abre la imagen con Pillow y extrae los tags EXIF. Normaliza los campos
|
|
mas relevantes para OSINT (fecha, camara, software, GPS) y adjunta el
|
|
diccionario completo de tags legibles por nombre en `raw`.
|
|
|
|
Args:
|
|
image_path: ruta al archivo de imagen (JPEG, PNG, TIFF, ...).
|
|
|
|
Returns:
|
|
dict con las claves: datetime, camera_make, camera_model, software,
|
|
gps_lat, gps_lon (grados decimales o None) y raw (dict tag->valor).
|
|
Si la imagen no tiene EXIF, los campos van a None y raw queda {}.
|
|
"""
|
|
result = {
|
|
"datetime": None,
|
|
"camera_make": None,
|
|
"camera_model": None,
|
|
"software": None,
|
|
"gps_lat": None,
|
|
"gps_lon": None,
|
|
"raw": {},
|
|
}
|
|
|
|
with Image.open(image_path) as img:
|
|
exif = img.getexif()
|
|
|
|
if not exif:
|
|
return result
|
|
|
|
# Tags de nivel raiz por nombre.
|
|
raw = {ExifTags.TAGS.get(tag_id, tag_id): value for tag_id, value in exif.items()}
|
|
|
|
# Sub-IFD EXIF (DateTimeOriginal vive aqui, no en el IFD raiz).
|
|
try:
|
|
exif_ifd = exif.get_ifd(ExifTags.IFD.Exif)
|
|
except (AttributeError, KeyError, ValueError):
|
|
exif_ifd = {}
|
|
for tag_id, value in exif_ifd.items():
|
|
raw[ExifTags.TAGS.get(tag_id, tag_id)] = value
|
|
|
|
# GPS IFD.
|
|
try:
|
|
gps_ifd = exif.get_ifd(ExifTags.IFD.GPSInfo)
|
|
except (AttributeError, KeyError, ValueError):
|
|
gps_ifd = {}
|
|
if gps_ifd:
|
|
raw["GPSInfo"] = {_GPS_TAGS.get(k, k): v for k, v in gps_ifd.items()}
|
|
|
|
result["raw"] = raw
|
|
result["datetime"] = raw.get("DateTimeOriginal") or raw.get("DateTime")
|
|
result["camera_make"] = raw.get("Make")
|
|
result["camera_model"] = raw.get("Model")
|
|
result["software"] = raw.get("Software")
|
|
|
|
lat, lon = _extract_gps(gps_ifd)
|
|
result["gps_lat"] = lat
|
|
result["gps_lon"] = lon
|
|
|
|
return result
|