"""Enricher: Extract URLs from a text node.""" import sys import json import os sys.path.insert(0, os.path.join(os.environ.get("FN_REGISTRY_ROOT", ""), "python", "functions", "cybersecurity")) from cybersecurity import extract_urls def main(): entity = json.load(sys.stdin) text = (entity.get("metadata") or {}).get("full_content", "") if not text: text = entity.get("description", "") if not text: json.dump({"error": "No text content found in entity"}, sys.stdout) return urls = extract_urls(text) # Deduplicate seen = set() unique_urls = [] for u in urls: normalized = u.rstrip("/").lower() if normalized not in seen: seen.add(normalized) unique_urls.append(u) entities = [] relations = [] for i, url in enumerate(unique_urls): # Extract domain from URL domain = "" try: from urllib.parse import urlparse domain = urlparse(url).netloc except Exception: pass entities.append({ "name": url[:80], "type_ref": "url", "description": f"URL found in text", "tags": ["extracted"], "metadata": { "url": url, "domain": domain, }, "notes": "", }) relations.append({ "name": "contains", "from_entity": "__SOURCE__", "to_entity": f"__NEW_{i}__", "description": "URL found in text", "weight": 1.0, "tags": [], "notes": "", }) json.dump({"entities": entities, "relations": relations}, sys.stdout, ensure_ascii=False) if __name__ == "__main__": main()