c9fd4aa84c
- Añade enricher.go + directorio enrichers/ para enriquecer entidades con fuentes externas. - Nuevos componentes frontend: IngestPanel (panel de ingesta de datos) y NodeContextMenu (menu contextual sobre nodos del grafo). - Retira SearchBar y lib/utils.ts; la busqueda se integra dentro de los paneles existentes. - Ajusta tipos (types.go, types.ts, wailsjs/go) y theming (postcss + app.css + Mantine). - Actualiza app.go y wails.json para exponer las nuevas capacidades. - Añade directorio projects/ con estado inicial. - Rebuild del frontend (dist actualizado).
72 lines
1.7 KiB
Python
72 lines
1.7 KiB
Python
"""Enricher: Extract URLs from a text node."""
|
|
|
|
import sys
|
|
import json
|
|
import os
|
|
|
|
sys.path.insert(0, os.path.join(os.environ.get("FN_REGISTRY_ROOT", ""), "python", "functions", "cybersecurity"))
|
|
|
|
from cybersecurity import extract_urls
|
|
|
|
|
|
def main():
|
|
entity = json.load(sys.stdin)
|
|
text = (entity.get("metadata") or {}).get("full_content", "")
|
|
|
|
if not text:
|
|
text = entity.get("description", "")
|
|
|
|
if not text:
|
|
json.dump({"error": "No text content found in entity"}, sys.stdout)
|
|
return
|
|
|
|
urls = extract_urls(text)
|
|
|
|
# Deduplicate
|
|
seen = set()
|
|
unique_urls = []
|
|
for u in urls:
|
|
normalized = u.rstrip("/").lower()
|
|
if normalized not in seen:
|
|
seen.add(normalized)
|
|
unique_urls.append(u)
|
|
|
|
entities = []
|
|
relations = []
|
|
|
|
for i, url in enumerate(unique_urls):
|
|
# Extract domain from URL
|
|
domain = ""
|
|
try:
|
|
from urllib.parse import urlparse
|
|
domain = urlparse(url).netloc
|
|
except Exception:
|
|
pass
|
|
|
|
entities.append({
|
|
"name": url[:80],
|
|
"type_ref": "url",
|
|
"description": f"URL found in text",
|
|
"tags": ["extracted"],
|
|
"metadata": {
|
|
"url": url,
|
|
"domain": domain,
|
|
},
|
|
"notes": "",
|
|
})
|
|
relations.append({
|
|
"name": "contains",
|
|
"from_entity": "__SOURCE__",
|
|
"to_entity": f"__NEW_{i}__",
|
|
"description": "URL found in text",
|
|
"weight": 1.0,
|
|
"tags": [],
|
|
"notes": "",
|
|
})
|
|
|
|
json.dump({"entities": entities, "relations": relations}, sys.stdout, ensure_ascii=False)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|