"""Sink comun: persiste el resultado de cualquier escaneo de red en el ecosistema OSINT. Toda funcion de scan (whois, rdap, dns, nmap, traceroute, ping) llama a esta funcion DESPUES de ejecutarse para que el resultado quede archivado y navegable. Tiene dos capas: 1. Capa nota (SIEMPRE, fuente de verdad): escribe una nota Markdown en el vault de Obsidian OSINT bajo `dominios//recon/-.md` con el raw del scan en un bloque de codigo y un frontmatter tipado. Compone create_obsidian_note del grupo obsidian. 2. Capa registro estructurado (best-effort): hace POST al service osint_db (FastAPI + DuckDB single-writer) en /api/scan para indexar el scan. Si el endpoint no existe todavia (404) o el service esta caido (ConnectionError), degrada a solo-nota con un register_warning, SIN fallar: la nota ya quedo guardada. Funcion impura: escribe en disco y hace red. No lanza; devuelve un dict de estado. """ import json import os import re import urllib.error import urllib.request from datetime import datetime from obsidian import create_obsidian_note # Tipos de scan reconocidos. scan_type es texto libre pero se sanea a slug seguro. _KNOWN_SCAN_TYPES = {"whois", "rdap", "dns", "nmap", "traceroute", "ping"} # Limite del raw embebido en la nota (caracteres). Por encima se trunca. _RAW_MAX = 200_000 def _slugify(value: str) -> str: """Normaliza un texto a slug seguro: minusculas, solo [a-z0-9._-].""" s = re.sub(r"[^a-z0-9._-]+", "-", value.strip().lower()).strip("-") return s or "unknown" def _fence(raw: str) -> str: """Envuelve raw en un bloque de codigo fenced, evitando colisionar con ``` interiores.""" # Elige un cercado con suficientes backticks para que el contenido no lo cierre. longest = 0 for run in re.findall(r"`+", raw): longest = max(longest, len(run)) fence = "`" * max(3, longest + 1) return f"{fence}text\n{raw}\n{fence}" def save_scan_to_osint( target: str, scan_type: str, raw: str, summary: dict | None = None, vault_dir: str = "~/Obsidian/osint", service_url: str = "http://127.0.0.1:8771", tool: str | None = None, ) -> dict: """Persiste un resultado de escaneo de red en el vault OSINT (nota + registro DuckDB). Args: target: objetivo del scan (dominio, host o IP). Define el slug de la carpeta. scan_type: tipo de scan (whois|rdap|dns|nmap|traceroute|ping); texto libre que se saneara a slug seguro para nombres de archivo y tags. raw: salida cruda del scan (texto). Se embebe en un bloque de codigo en la nota; si supera ~200KB se trunca dejando una marca. summary: dict opcional con campos resumidos del scan (registrar, ips, puertos, rtt, etc.). Se anade al frontmatter y se envia al registro estructurado. vault_dir: raiz del vault OSINT. Se expande ~ . Default ~/Obsidian/osint. service_url: base del service osint_db. Default http://127.0.0.1:8771. tool: nombre de la herramienta usada (nmap, dig, whois...). Si None, usa scan_type. Returns: dict de estado. Caso ok: {"status": "ok", "target": str, "slug": str, "scan_type": str, "note_path": str (rel al vault), "note_abs": str (ruta absoluta), "registered": bool, "register_warning": str | None, "scan_id": str | None} Caso error (solo si falla la escritura critica de la nota): {"status": "error", "error": str} """ try: scan_type_slug = _slugify(scan_type) slug = _slugify(target) tool_name = tool or scan_type_slug now = datetime.now() ts_compact = now.strftime("%Y%m%d-%H%M") ts_iso = now.isoformat() rel_path = f"dominios/{slug}/recon/{scan_type_slug}-{ts_compact}.md" summary = summary if isinstance(summary, dict) else {} # --- Capa nota (critica) --- frontmatter = { "tipo": "scan-red", "scan_tipo": scan_type_slug, "target": target, "slug": slug, "fecha": ts_iso, "herramienta": tool_name, "tags": ["scan-red", scan_type_slug, "recon"], } if summary: frontmatter["summary"] = summary raw_body = raw if isinstance(raw, str) else str(raw) truncated = False if len(raw_body) > _RAW_MAX: raw_body = raw_body[:_RAW_MAX] truncated = True lines = [ f"# {scan_type_slug} scan — {target}", "", f"- **Target:** {target}", f"- **Tipo:** {scan_type_slug}", f"- **Herramienta:** {tool_name}", f"- **Fecha:** {ts_iso}", ] if summary: lines.append("") lines.append("## Resumen") for k, v in summary.items(): lines.append(f"- **{k}:** {v}") lines.append("") lines.append("## Salida cruda") lines.append("") lines.append(_fence(raw_body)) if truncated: lines.append("") lines.append( f"> Salida truncada a {_RAW_MAX} caracteres (el original era mas largo)." ) body = "\n".join(lines) + "\n" note_abs = create_obsidian_note( os.path.expanduser(vault_dir), rel_path, body=body, frontmatter=frontmatter, overwrite=True, ) # --- Capa registro estructurado (best-effort) --- registered = False register_warning = None scan_id = None payload = { "target": target, "target_slug": slug, "scan_type": scan_type_slug, "tool": tool_name, "note_path": rel_path, "summary": summary, "scan_ts": ts_iso, } url = service_url.rstrip("/") + "/api/scan" try: data = json.dumps(payload).encode("utf-8") req = urllib.request.Request( url, data=data, headers={"Content-Type": "application/json"}, method="POST", ) with urllib.request.urlopen(req, timeout=5) as resp: registered = True try: raw_resp = resp.read().decode("utf-8") parsed = json.loads(raw_resp) if raw_resp else {} if isinstance(parsed, dict) and parsed.get("id") is not None: scan_id = str(parsed["id"]) except (ValueError, UnicodeDecodeError): # 2xx sin body JSON: cuenta como registrado igualmente. pass except urllib.error.HTTPError as e: register_warning = f"HTTP {e.code} desde {url}: {e.reason}" except urllib.error.URLError as e: register_warning = f"service osint_db inaccesible en {url}: {e.reason}" except Exception as e: # noqa: BLE001 - degradacion: red nunca rompe la nota register_warning = f"registro fallido: {type(e).__name__}: {e}" return { "status": "ok", "target": target, "slug": slug, "scan_type": scan_type_slug, "note_path": rel_path, "note_abs": note_abs, "registered": registered, "register_warning": register_warning, "scan_id": scan_id, } except Exception as e: # noqa: BLE001 - contrato: nunca lanzar return {"status": "error", "error": f"{type(e).__name__}: {e}"} if __name__ == "__main__": import tempfile tmp_vault = tempfile.mkdtemp() # service_url apunta a un puerto muerto para ejercitar la degradacion graceful. result = save_scan_to_osint( "example.com", "whois", "Domain: EXAMPLE.COM\nRegistrar: X", summary={"registrar": "X"}, vault_dir=tmp_vault, service_url="http://127.0.0.1:1", ) assert result["status"] == "ok", result assert result["slug"] == "example.com", result assert result["scan_type"] == "whois", result assert result["note_path"] == result["note_path"], result assert os.path.isfile(result["note_abs"]), result assert result["registered"] is False, result assert result["register_warning"], result assert result["scan_id"] is None, result content = open(result["note_abs"], encoding="utf-8").read() assert "Registrar: X" in content, content assert "scan-red" in content, content print("save_scan_to_osint smoke OK") print(f" note_path: {result['note_path']}") print(f" note_abs: {result['note_abs']}") print(f" registered: {result['registered']}") print(f" register_warning: {result['register_warning']}")