"""Enricher: Extract URLs from a text node."""

import sys
import json
import os

sys.path.insert(0, os.path.join(os.environ.get("FN_REGISTRY_ROOT", ""), "python", "functions", "cybersecurity"))

from cybersecurity import extract_urls


def main():
    entity = json.load(sys.stdin)
    text = (entity.get("metadata") or {}).get("full_content", "")

    if not text:
        text = entity.get("description", "")

    if not text:
        json.dump({"error": "No text content found in entity"}, sys.stdout)
        return

    urls = extract_urls(text)

    # Deduplicate
    seen = set()
    unique_urls = []
    for u in urls:
        normalized = u.rstrip("/").lower()
        if normalized not in seen:
            seen.add(normalized)
            unique_urls.append(u)

    entities = []
    relations = []

    for i, url in enumerate(unique_urls):
        # Extract domain from URL
        domain = ""
        try:
            from urllib.parse import urlparse
            domain = urlparse(url).netloc
        except Exception:
            pass

        entities.append({
            "name": url[:80],
            "type_ref": "url",
            "description": f"URL found in text",
            "tags": ["extracted"],
            "metadata": {
                "url": url,
                "domain": domain,
            },
            "notes": "",
        })
        relations.append({
            "name": "contains",
            "from_entity": "__SOURCE__",
            "to_entity": f"__NEW_{i}__",
            "description": "URL found in text",
            "weight": 1.0,
            "tags": [],
            "notes": "",
        })

    json.dump({"entities": entities, "relations": relations}, sys.stdout, ensure_ascii=False)


if __name__ == "__main__":
    main()