2667af52cc
Implementa el sistema de orquestación para salas Matrix con múltiples bots. El orquestador es un "special agent" sin identidad Matrix que coordina qué bot responde y cuándo, usando LLM (Claude) para routing y evaluación de calidad. Cambios principales: - pkg/orchestration/task.go: tipos puros (TaskEvent, BotResponse, QualityScore, RoutingDecision) - shell/orchestration/: runtime del orquestador (orchestrator.go, router.go, evaluator.go) - agents/specials/orchestrator/: config + prompts (routing, quality, refinement) - internal/config/: SpecialConfig, OrchestrationCfg, LoadSpecial() - shell/bus/bus.go: protocolo request-reply (SendAndWait, Reply) para delegación - shell/matrix/listener.go: InterceptFunc para interceptar eventos en salas orquestadas - agents/runtime.go: SetBus, listenBus, handleTaskEvent para recibir tareas del orquestador - cmd/launcher/main.go: creación de bus compartido, arranque del orquestador antes de bots Incluye deduplicación para evitar que múltiples listeners en la misma sala disparen el orquestador más de una vez por mensaje. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
333 lines
9.3 KiB
Go
333 lines
9.3 KiB
Go
// Package orchestration implements the multi-bot orchestrator runtime.
|
|
// The orchestrator intercepts Matrix events in managed rooms and coordinates
|
|
// which bot responds via the in-process bus.
|
|
package orchestration
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"log/slog"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"sync"
|
|
|
|
"github.com/enmanuel/agents/internal/config"
|
|
"github.com/enmanuel/agents/pkg/decision"
|
|
coretypes "github.com/enmanuel/agents/pkg/llm"
|
|
"github.com/enmanuel/agents/pkg/orchestration"
|
|
"github.com/enmanuel/agents/shell/bus"
|
|
shelllm "github.com/enmanuel/agents/shell/llm"
|
|
)
|
|
|
|
// Orchestrator coordinates multi-bot rooms. It has no Matrix identity —
|
|
// it intercepts events before they reach bots and delegates via the bus.
|
|
type Orchestrator struct {
|
|
cfg *config.SpecialConfig
|
|
llm coretypes.CompleteFunc
|
|
bus *bus.Bus
|
|
managedRooms map[string][]string // roomID → []botID
|
|
participants map[string]orchestration.ParticipantInfo // botID → info
|
|
logger *slog.Logger
|
|
|
|
// Prompts loaded from files
|
|
routingPrompt string
|
|
qualityPrompt string
|
|
refinementPrompt string
|
|
|
|
// Dedup: multiple bots in the same room will each trigger Intercept().
|
|
// We use a set of "room:sender:content" keys to ensure only one fires.
|
|
seenMu sync.Mutex
|
|
seen map[string]bool
|
|
}
|
|
|
|
// New creates an Orchestrator from its config.
|
|
func New(cfg *config.SpecialConfig, agentBus *bus.Bus, logger *slog.Logger) (*Orchestrator, error) {
|
|
llmFunc, err := shelllm.FromConfig(cfg.LLM.Primary)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("orchestrator LLM: %w", err)
|
|
}
|
|
|
|
managed := make(map[string][]string)
|
|
for _, room := range cfg.Orchestration.Rooms {
|
|
managed[room.RoomID] = room.Participants
|
|
}
|
|
|
|
o := &Orchestrator{
|
|
cfg: cfg,
|
|
llm: llmFunc,
|
|
bus: agentBus,
|
|
managedRooms: managed,
|
|
participants: make(map[string]orchestration.ParticipantInfo),
|
|
logger: logger,
|
|
seen: make(map[string]bool),
|
|
}
|
|
|
|
if err := o.loadPrompts(); err != nil {
|
|
return nil, fmt.Errorf("load prompts: %w", err)
|
|
}
|
|
|
|
return o, nil
|
|
}
|
|
|
|
// RegisterParticipant adds bot metadata used for LLM routing decisions.
|
|
func (o *Orchestrator) RegisterParticipant(info orchestration.ParticipantInfo) {
|
|
o.participants[info.ID] = info
|
|
o.logger.Debug("registered participant", "bot", info.ID, "desc", info.Description)
|
|
}
|
|
|
|
// ShouldIntercept returns true if the room is managed by this orchestrator.
|
|
func (o *Orchestrator) ShouldIntercept(roomID string) bool {
|
|
_, ok := o.managedRooms[roomID]
|
|
return ok
|
|
}
|
|
|
|
// Intercept is the InterceptFunc used by bot listeners. It checks if the
|
|
// room is managed and, if so, starts the orchestration pipeline asynchronously.
|
|
// Returns true if the event was intercepted (all bots in the room should return true,
|
|
// but only the first one triggers actual routing — the rest are deduped).
|
|
func (o *Orchestrator) Intercept(ctx context.Context, msgCtx decision.MessageContext) bool {
|
|
if !o.ShouldIntercept(msgCtx.RoomID) {
|
|
return false
|
|
}
|
|
|
|
// Dedup: multiple bots receive the same event. Only route once.
|
|
key := msgCtx.RoomID + ":" + msgCtx.SenderID + ":" + msgCtx.Content
|
|
o.seenMu.Lock()
|
|
if o.seen[key] {
|
|
o.seenMu.Unlock()
|
|
return true // still intercept (don't let the bot handle it) but don't route again
|
|
}
|
|
o.seen[key] = true
|
|
o.seenMu.Unlock()
|
|
|
|
// Route asynchronously so the listener isn't blocked.
|
|
// Clean up the dedup key after routing completes.
|
|
go func() {
|
|
defer func() {
|
|
o.seenMu.Lock()
|
|
delete(o.seen, key)
|
|
o.seenMu.Unlock()
|
|
}()
|
|
if err := o.Route(ctx, msgCtx); err != nil {
|
|
o.logger.Error("orchestration failed", "room", msgCtx.RoomID, "err", err)
|
|
}
|
|
}()
|
|
return true
|
|
}
|
|
|
|
// Route is the main entry point. Called when a human posts in a managed room.
|
|
// It decides which bot(s) should respond and dispatches tasks via the bus.
|
|
func (o *Orchestrator) Route(ctx context.Context, msgCtx decision.MessageContext) error {
|
|
participants, ok := o.managedRooms[msgCtx.RoomID]
|
|
if !ok {
|
|
return fmt.Errorf("room %s is not managed", msgCtx.RoomID)
|
|
}
|
|
|
|
o.logger.Info("orchestrating message",
|
|
"room", msgCtx.RoomID,
|
|
"sender", msgCtx.SenderID,
|
|
"participants", participants,
|
|
"content_preview", truncate(msgCtx.Content, 80),
|
|
)
|
|
|
|
// Optimization: single bot → dispatch directly without LLM
|
|
if len(participants) == 1 {
|
|
o.logger.Debug("single participant, dispatching directly", "bot", participants[0])
|
|
_, err := o.dispatchAndWait(ctx, participants[0], msgCtx, 0, nil)
|
|
return err
|
|
}
|
|
|
|
var responses []orchestration.BotResponse
|
|
var lastBot string
|
|
maxIter := o.cfg.Orchestration.MaxIterations
|
|
if maxIter <= 0 {
|
|
maxIter = 3
|
|
}
|
|
|
|
for i := 0; i < maxIter; i++ {
|
|
// Route: decide which bot responds
|
|
var target string
|
|
var err error
|
|
|
|
if i == 0 {
|
|
rd, routeErr := o.routeInitial(ctx, msgCtx.Content, participants)
|
|
if routeErr != nil {
|
|
o.logger.Error("routing failed, falling back to first participant", "err", routeErr)
|
|
target = participants[0]
|
|
} else {
|
|
target = rd.TargetBotID
|
|
o.logger.Info("routed to bot",
|
|
"bot", target,
|
|
"confidence", rd.Confidence,
|
|
"reason", rd.Reason,
|
|
"iteration", i,
|
|
)
|
|
}
|
|
} else {
|
|
rd, routeErr := o.routeRefinement(ctx, msgCtx.Content, responses, participants, lastBot)
|
|
if routeErr != nil {
|
|
o.logger.Warn("refinement routing failed, stopping pipeline", "err", routeErr)
|
|
break
|
|
}
|
|
target = rd.TargetBotID
|
|
o.logger.Info("refinement routed to bot",
|
|
"bot", target,
|
|
"reason", rd.Reason,
|
|
"iteration", i,
|
|
)
|
|
}
|
|
|
|
// Dispatch: send TaskEvent to bot via bus and wait for response
|
|
response, err := o.dispatchAndWait(ctx, target, msgCtx, i, responses)
|
|
if err != nil {
|
|
o.logger.Error("dispatch failed", "bot", target, "err", err)
|
|
break
|
|
}
|
|
|
|
responses = append(responses, response)
|
|
lastBot = target
|
|
|
|
o.logger.Info("bot responded",
|
|
"bot", target,
|
|
"response_len", len(response.Text),
|
|
"iteration", i,
|
|
)
|
|
|
|
// Evaluate quality (Fase 3)
|
|
score := o.evaluate(ctx, msgCtx.Content, response)
|
|
o.logger.Info("quality evaluated",
|
|
"score", score.Score,
|
|
"continue", score.Continue,
|
|
"reason", score.Reason,
|
|
"iteration", i,
|
|
)
|
|
|
|
if score.Score >= o.cfg.Orchestration.QualityThreshold || !score.Continue {
|
|
o.logger.Info("pipeline complete",
|
|
"iterations", i+1,
|
|
"final_score", score.Score,
|
|
)
|
|
break
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// dispatchAndWait sends a TaskEvent to a bot and waits for its response.
|
|
func (o *Orchestrator) dispatchAndWait(
|
|
ctx context.Context,
|
|
botID string,
|
|
msgCtx decision.MessageContext,
|
|
iteration int,
|
|
previousResponses []orchestration.BotResponse,
|
|
) (orchestration.BotResponse, error) {
|
|
taskID := fmt.Sprintf("orch-%s-%s-%d", msgCtx.RoomID, botID, iteration)
|
|
|
|
task := orchestration.TaskEvent{
|
|
TaskID: taskID,
|
|
TargetBotID: botID,
|
|
TargetRoomID: msgCtx.RoomID,
|
|
OriginalSender: msgCtx.SenderID,
|
|
OriginalQuestion: msgCtx.Content,
|
|
Iteration: iteration,
|
|
PreviousResponses: previousResponses,
|
|
}
|
|
|
|
taskJSON, err := orchestration.MarshalTaskEvent(task)
|
|
if err != nil {
|
|
return orchestration.BotResponse{}, fmt.Errorf("marshal task: %w", err)
|
|
}
|
|
|
|
msg := bus.AgentMessage{
|
|
From: bus.AgentID(o.cfg.Special.ID),
|
|
To: bus.AgentID(botID),
|
|
Kind: bus.KindTask,
|
|
Payload: map[string]string{"task_json": taskJSON},
|
|
}
|
|
|
|
timeout := o.cfg.Orchestration.DelegationTimeout
|
|
if timeout <= 0 {
|
|
timeout = 30_000_000_000 // 30s default
|
|
}
|
|
|
|
reply, err := o.bus.SendAndWait(ctx, msg, taskID, timeout)
|
|
if err != nil {
|
|
return orchestration.BotResponse{}, err
|
|
}
|
|
|
|
resultJSON, ok := reply.Payload["result_json"]
|
|
if !ok {
|
|
return orchestration.BotResponse{}, fmt.Errorf("reply missing result_json")
|
|
}
|
|
|
|
result, err := orchestration.UnmarshalTaskResult(resultJSON)
|
|
if err != nil {
|
|
return orchestration.BotResponse{}, fmt.Errorf("unmarshal result: %w", err)
|
|
}
|
|
|
|
if result.Error != "" {
|
|
return orchestration.BotResponse{}, fmt.Errorf("bot %s error: %s", botID, result.Error)
|
|
}
|
|
|
|
return orchestration.BotResponse{
|
|
BotID: botID,
|
|
Text: result.Text,
|
|
}, nil
|
|
}
|
|
|
|
// loadPrompts reads the orchestrator's prompt files.
|
|
func (o *Orchestrator) loadPrompts() error {
|
|
base := filepath.Join("agents", "specials", "orchestrator", "prompts")
|
|
|
|
routing, err := os.ReadFile(filepath.Join(base, "routing.md"))
|
|
if err != nil {
|
|
return fmt.Errorf("routing prompt: %w", err)
|
|
}
|
|
o.routingPrompt = string(routing)
|
|
|
|
quality, err := os.ReadFile(filepath.Join(base, "quality.md"))
|
|
if err != nil {
|
|
return fmt.Errorf("quality prompt: %w", err)
|
|
}
|
|
o.qualityPrompt = string(quality)
|
|
|
|
refinement, err := os.ReadFile(filepath.Join(base, "refinement.md"))
|
|
if err != nil {
|
|
return fmt.Errorf("refinement prompt: %w", err)
|
|
}
|
|
o.refinementPrompt = string(refinement)
|
|
|
|
return nil
|
|
}
|
|
|
|
// buildParticipantsList formats participant info for LLM prompts.
|
|
func (o *Orchestrator) buildParticipantsList(botIDs []string, exclude string) string {
|
|
var sb strings.Builder
|
|
for _, id := range botIDs {
|
|
if id == exclude {
|
|
continue
|
|
}
|
|
info, ok := o.participants[id]
|
|
if !ok {
|
|
sb.WriteString(fmt.Sprintf("- %s: (no description available)\n", id))
|
|
continue
|
|
}
|
|
caps := ""
|
|
if len(info.Capabilities) > 0 {
|
|
caps = fmt.Sprintf(" (capabilities: %s)", strings.Join(info.Capabilities, ", "))
|
|
}
|
|
sb.WriteString(fmt.Sprintf("- %s: %s%s\n", info.ID, info.Description, caps))
|
|
}
|
|
return sb.String()
|
|
}
|
|
|
|
func truncate(s string, n int) string {
|
|
runes := []rune(s)
|
|
if len(runes) <= n {
|
|
return s
|
|
}
|
|
return string(runes[:n]) + "..."
|
|
}
|