Files
dataforge c9fd4aa84c feat: enrichers, panel de ingest y menu contextual en el grafo
- Añade enricher.go + directorio enrichers/ para enriquecer entidades con fuentes externas.
- Nuevos componentes frontend: IngestPanel (panel de ingesta de datos) y NodeContextMenu (menu contextual sobre nodos del grafo).
- Retira SearchBar y lib/utils.ts; la busqueda se integra dentro de los paneles existentes.
- Ajusta tipos (types.go, types.ts, wailsjs/go) y theming (postcss + app.css + Mantine).
- Actualiza app.go y wails.json para exponer las nuevas capacidades.
- Añade directorio projects/ con estado inicial.
- Rebuild del frontend (dist actualizado).
2026-04-13 23:32:55 +02:00

72 lines
1.7 KiB
Python

"""Enricher: Extract URLs from a text node."""
import sys
import json
import os
sys.path.insert(0, os.path.join(os.environ.get("FN_REGISTRY_ROOT", ""), "python", "functions", "cybersecurity"))
from cybersecurity import extract_urls
def main():
entity = json.load(sys.stdin)
text = (entity.get("metadata") or {}).get("full_content", "")
if not text:
text = entity.get("description", "")
if not text:
json.dump({"error": "No text content found in entity"}, sys.stdout)
return
urls = extract_urls(text)
# Deduplicate
seen = set()
unique_urls = []
for u in urls:
normalized = u.rstrip("/").lower()
if normalized not in seen:
seen.add(normalized)
unique_urls.append(u)
entities = []
relations = []
for i, url in enumerate(unique_urls):
# Extract domain from URL
domain = ""
try:
from urllib.parse import urlparse
domain = urlparse(url).netloc
except Exception:
pass
entities.append({
"name": url[:80],
"type_ref": "url",
"description": f"URL found in text",
"tags": ["extracted"],
"metadata": {
"url": url,
"domain": domain,
},
"notes": "",
})
relations.append({
"name": "contains",
"from_entity": "__SOURCE__",
"to_entity": f"__NEW_{i}__",
"description": "URL found in text",
"weight": 1.0,
"tags": [],
"notes": "",
})
json.dump({"entities": entities, "relations": relations}, sys.stdout, ensure_ascii=False)
if __name__ == "__main__":
main()