package main import ( "encoding/json" "fmt" "log" "os" "os/exec" "path/filepath" "strings" "time" ops "fn-registry/fn_operations" ) // EnricherDef describes a registered enricher function. type EnricherDef struct { ID string `json:"id"` Label string `json:"label"` Description string `json:"description"` AppliesTo []string `json:"applies_to"` // entity type_refs this enricher works on Script string `json:"script"` // Python script filename in enrichers/ Icon string `json:"icon"` // Tabler icon name } // EnricherResult is the JSON contract returned by Python enricher scripts. type EnricherResult struct { Entities []EntityInput `json:"entities"` Relations []RelationInputDTO `json:"relations"` Error string `json:"error,omitempty"` MetadataUpdate *MetadataUpdate `json:"metadata_update,omitempty"` } // MetadataUpdate allows enrichers to update the source entity's metadata. type MetadataUpdate struct { EntityID string `json:"entity_id"` Metadata map[string]any `json:"metadata"` } // Static enricher registry var enricherRegistry = []EnricherDef{ { ID: "url_to_text", Label: "Fetch & Extract Text", Description: "Download URL content and extract text", AppliesTo: []string{"url", "domain"}, Script: "url_to_text.py", Icon: "IconWorldDownload", }, { ID: "document_to_text", Label: "Extract Text", Description: "Extract text from document file", AppliesTo: []string{"document"}, Script: "document_to_text.py", Icon: "IconFileText", }, { ID: "text_to_entities", Label: "Extract Entities (LLM)", Description: "Extract entities and relations using AI", AppliesTo: []string{"text"}, Script: "text_to_entities.py", Icon: "IconBrain", }, { ID: "text_to_urls", Label: "Extract URLs", Description: "Find all URLs in text", AppliesTo: []string{"text"}, Script: "text_to_urls.py", Icon: "IconLink", }, { ID: "url_to_headers", Label: "Fetch HTTP Headers", Description: "Retrieve HTTP headers for URL", AppliesTo: []string{"url", "domain"}, Script: "url_to_headers.py", Icon: "IconServer", }, } // enrichersForType returns enrichers applicable to a given entity type. func enrichersForType(typeRef string) []EnricherDef { var result []EnricherDef for _, e := range enricherRegistry { for _, t := range e.AppliesTo { if t == typeRef { result = append(result, e) break } } } return result } // findEnricher looks up an enricher by ID. func findEnricher(id string) *EnricherDef { for i := range enricherRegistry { if enricherRegistry[i].ID == id { return &enricherRegistry[i] } } return nil } // runEnricherScript executes a Python enricher script and returns the parsed result. func runEnricherScript(registryRoot, enrichersDir, script string, entityJSON []byte) (*EnricherResult, error) { scriptPath := filepath.Join(enrichersDir, script) if _, err := os.Stat(scriptPath); err != nil { return nil, fmt.Errorf("enricher script not found: %s", scriptPath) } // Find Python: prefer registry venv, then system pythonPath := filepath.Join(registryRoot, "python", ".venv", "bin", "python3") if _, err := os.Stat(pythonPath); err != nil { pythonPath = "python3" } cmd := exec.Command(pythonPath, scriptPath) cmd.Stdin = strings.NewReader(string(entityJSON)) cmd.Dir = enrichersDir // Set PYTHONPATH so enricher scripts can import registry functions pypath := strings.Join([]string{ filepath.Join(registryRoot, "python", "functions", "core"), filepath.Join(registryRoot, "python", "functions", "cybersecurity"), filepath.Join(registryRoot, "python", "functions", "datascience"), filepath.Join(registryRoot, "analysis", "ontology_graph", "lib"), }, ":") cmd.Env = append(os.Environ(), "FN_REGISTRY_ROOT="+registryRoot, "PYTHONPATH="+pypath, ) output, err := cmd.Output() if err != nil { if exitErr, ok := err.(*exec.ExitError); ok { return nil, fmt.Errorf("enricher %s failed: %s", script, string(exitErr.Stderr)) } return nil, fmt.Errorf("enricher %s failed: %w", script, err) } var result EnricherResult if err := json.Unmarshal(output, &result); err != nil { return nil, fmt.Errorf("enricher %s: invalid JSON output: %w", script, err) } if result.Error != "" { return nil, fmt.Errorf("enricher %s: %s", script, result.Error) } return &result, nil } // insertEnricherResults inserts entities and relations from an enricher result, // resolving __NEW_N__ and __SOURCE__ placeholders. func (a *App) insertEnricherResults(result *EnricherResult, sourceEntityID string) error { newIDs := make([]string, len(result.Entities)) // Insert entities for i, ei := range result.Entities { id := makeEntityID(ei.Name, ei.TypeRef) now := time.Now() e := &ops.Entity{ ID: id, Name: ei.Name, TypeRef: ei.TypeRef, Status: ops.StatusActive, Description: ei.Description, Domain: "fuzzygraph", Tags: ei.Tags, Source: "enricher", Metadata: ei.Metadata, Notes: ei.Notes, CreatedAt: now, UpdatedAt: now, } if a.registryDB != nil { if err := ops.InsertEntityWithSnapshot(a.db, a.registryDB, e); err != nil { // Entity might already exist — try update instead if err2 := a.db.InsertEntity(e); err2 != nil { log.Printf("[insertEnricherResults] WARNING: skip entity %s: %v", id, err2) } } } else { if err := a.db.InsertEntity(e); err != nil { log.Printf("[insertEnricherResults] WARNING: skip entity %s: %v", id, err) } } newIDs[i] = id } // Insert relations with placeholder resolution for _, ri := range result.Relations { from := resolvePlaceholder(ri.FromEntity, sourceEntityID, newIDs) to := resolvePlaceholder(ri.ToEntity, sourceEntityID, newIDs) if from == "" || to == "" || from == to { continue } id := generateID() now := time.Now() r := &ops.Relation{ ID: id, Name: ri.Name, FromEntity: from, ToEntity: to, Description: ri.Description, Purity: "impure", Direction: ops.DirUnidirectional, Weight: ri.Weight, Status: ops.RelImplemented, Tags: ri.Tags, Notes: ri.Notes, CreatedAt: now, UpdatedAt: now, } if err := a.db.InsertRelation(r); err != nil { log.Printf("[insertEnricherResults] WARNING: skip relation %s->%s: %v", from, to, err) } } // Handle metadata update on source entity if result.MetadataUpdate != nil && result.MetadataUpdate.EntityID != "" { targetID := resolvePlaceholder(result.MetadataUpdate.EntityID, sourceEntityID, newIDs) if existing, err := a.db.GetEntity(targetID); err == nil && existing != nil { if existing.Metadata == nil { existing.Metadata = map[string]any{} } for k, v := range result.MetadataUpdate.Metadata { existing.Metadata[k] = v } existing.UpdatedAt = time.Now() if err := a.db.UpdateEntity(existing); err != nil { log.Printf("[insertEnricherResults] WARNING: metadata update failed: %v", err) } } } return nil } // resolvePlaceholder converts __SOURCE__, __NEW_0__ etc. to actual entity IDs. func resolvePlaceholder(val, sourceID string, newIDs []string) string { if val == "__SOURCE__" { return sourceID } if strings.HasPrefix(val, "__NEW_") && strings.HasSuffix(val, "__") { idxStr := val[6 : len(val)-2] var idx int if _, err := fmt.Sscanf(idxStr, "%d", &idx); err == nil && idx >= 0 && idx < len(newIDs) { return newIDs[idx] } } return val }