"""Enricher: Fetch HTTP headers for a URL and update entity metadata.""" import sys import json try: import httpx except ImportError: import urllib.request def main(): entity = json.load(sys.stdin) url = (entity.get("metadata") or {}).get("url") or entity.get("name", "") if not url: json.dump({"error": "No URL found in entity"}, sys.stdout) return try: try: resp = httpx.head(url, follow_redirects=True, timeout=10) headers = dict(resp.headers) status = resp.status_code except NameError: req = urllib.request.Request(url, method="HEAD") with urllib.request.urlopen(req, timeout=10) as resp: headers = dict(resp.headers) status = resp.status except Exception as e: json.dump({"error": f"Failed to fetch headers: {e}"}, sys.stdout) return # Return metadata update for the source entity result = { "entities": [], "relations": [], "metadata_update": { "entity_id": entity["id"], "metadata": { "status_code": status, "server": headers.get("server", ""), "content_type": headers.get("content-type", ""), "x_powered_by": headers.get("x-powered-by", ""), }, }, } json.dump(result, sys.stdout, ensure_ascii=False) if __name__ == "__main__": main()