agents_and_robots/shell/orchestration/evaluator.go

package orchestration

import (
	"context"
	"encoding/json"
	"fmt"
	"strings"

	coretypes "github.com/enmanuel/agents/pkg/llm"
	"github.com/enmanuel/agents/pkg/orchestration"
)

// evaluate asks the LLM to score the quality of a bot's response.
func (o *Orchestrator) evaluate(ctx context.Context, question string, response orchestration.BotResponse) orchestration.QualityScore {
	userContent := fmt.Sprintf("Question: %s\n\nResponse from %s:\n%s", question, response.BotID, response.Text)

	resp, err := o.llm(ctx, coretypes.CompletionRequest{
		Model:        o.cfg.LLM.Primary.Model,
		MaxTokens:    o.cfg.LLM.Primary.MaxTokens,
		Temperature:  o.cfg.LLM.Primary.Temperature,
		SystemPrompt: o.qualityPrompt,
		Messages: []coretypes.Message{
			{Role: coretypes.RoleUser, Content: userContent},
		},
	})
	if err != nil {
		o.logger.Error("quality evaluation LLM call failed", "err", err)
		// On LLM failure, assume quality is good enough to stop the pipeline
		return orchestration.QualityScore{
			Score:    1.0,
			Continue: false,
			Reason:   fmt.Sprintf("evaluation failed: %s, assuming good quality", err),
		}
	}

	var qs orchestration.QualityScore
	if err := json.Unmarshal([]byte(strings.TrimSpace(resp.Content)), &qs); err != nil {
		o.logger.Warn("failed to parse quality score", "content", resp.Content, "err", err)
		// On parse failure, assume good quality
		return orchestration.QualityScore{
			Score:    1.0,
			Continue: false,
			Reason:   fmt.Sprintf("parse failed: %s", err),
		}
	}

	return qs
}