feat: import agents_and_robots platform as unibots (Matrix-out, unibus transport)
Reemplaza el scaffold del echobot por la plataforma completa de bots traida desde ~/DataProyects/Github/agents_and_robots tras la operacion Matrix-out: los bots ya no hablan por Matrix sino por el bus unibus (modelo todo-rooms + E2E via shell/transportunibus sobre github.com/enmanuel/unibus/pkg/client). - go.mod: replace de unibus -> ../unibus y de fn-registry -> ../../../.. (paths relativos reajustados a la nueva ubicacion dentro de fn_registry). - app.md: bump a 0.2.0, descripcion + arquitectura + comandos + gotchas reales. - modulo Go conservado como github.com/enmanuel/agents (sin reescribir imports). agents_and_robots queda archivado como museo de la era Matrix.
This commit is contained in:
@@ -0,0 +1,321 @@
|
||||
// Command agentctl manages Matrix agents: list, start, stop, remove.
|
||||
//
|
||||
// Usage:
|
||||
//
|
||||
// agentctl list # all agents with their status
|
||||
// agentctl start # start all enabled agents
|
||||
// agentctl start assistant-bot # start a specific agent
|
||||
// agentctl stop # stop all running agents
|
||||
// agentctl stop assistant-bot # stop a specific agent
|
||||
// agentctl remove assistant-bot # disable agent (keeps data)
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
|
||||
"github.com/enmanuel/agents/shell/process"
|
||||
)
|
||||
|
||||
const (
|
||||
runDir = "run"
|
||||
agentsGlob = "agents/*/config.yaml"
|
||||
)
|
||||
|
||||
// ── entry point ───────────────────────────────────────────────────────────
|
||||
|
||||
func main() {
|
||||
var binPath string
|
||||
|
||||
mgr := process.NewManager(runDir, agentsGlob, "")
|
||||
|
||||
root := &cobra.Command{
|
||||
Use: "agentctl",
|
||||
Short: "Manage Matrix agents",
|
||||
PersistentPreRunE: func(cmd *cobra.Command, args []string) error {
|
||||
return os.MkdirAll(runDir, 0o755)
|
||||
},
|
||||
}
|
||||
|
||||
root.PersistentFlags().StringVar(&binPath, "bin", "",
|
||||
"Launcher binary path. Defaults to ./bin/launcher, falls back to 'go run ./cmd/launcher'")
|
||||
|
||||
root.AddCommand(
|
||||
listCmd(mgr),
|
||||
startCmd(mgr, &binPath),
|
||||
stopCmd(mgr),
|
||||
reloadCmd(mgr),
|
||||
removeCmd(mgr),
|
||||
)
|
||||
|
||||
if err := root.Execute(); err != nil {
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
// ── list ──────────────────────────────────────────────────────────────────
|
||||
|
||||
func listCmd(mgr *process.Manager) *cobra.Command {
|
||||
return &cobra.Command{
|
||||
Use: "list",
|
||||
Short: "List all agents and their current status",
|
||||
Aliases: []string{"ls"},
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
statuses, err := mgr.StatusAll()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if len(statuses) == 0 {
|
||||
fmt.Println("No agents found under agents/*/config.yaml")
|
||||
return nil
|
||||
}
|
||||
|
||||
fmt.Printf("%-20s %-14s %-8s %-4s %s\n", "ID", "STATUS", "VERSION", "INST", "DESCRIPTION")
|
||||
fmt.Println(strings.Repeat("─", 78))
|
||||
for _, s := range statuses {
|
||||
fmt.Printf("%-20s %-14s %-8s %-4d %s\n",
|
||||
s.ID,
|
||||
statusLabel(s),
|
||||
s.Version,
|
||||
s.Instances,
|
||||
truncate(s.Desc, 32),
|
||||
)
|
||||
}
|
||||
return nil
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// ── start ─────────────────────────────────────────────────────────────────
|
||||
|
||||
func startCmd(mgr *process.Manager, binPath *string) *cobra.Command {
|
||||
return &cobra.Command{
|
||||
Use: "start [agent-id...]",
|
||||
Short: "Start one or all enabled agents",
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
statuses, err := mgr.StatusAll()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
targets := filterTargets(statuses, args)
|
||||
if len(targets) == 0 {
|
||||
return fmt.Errorf("no matching agents found")
|
||||
}
|
||||
|
||||
started := 0
|
||||
for _, s := range targets {
|
||||
if !s.Enabled {
|
||||
fmt.Printf("skip %-20s (disabled in config)\n", s.ID)
|
||||
continue
|
||||
}
|
||||
if err := mgr.Start(s.AgentInfo); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "fail %-20s %v\n", s.ID, err)
|
||||
continue
|
||||
}
|
||||
|
||||
fmt.Printf("start %-20s PID %d (instances: %d) log → %s\n",
|
||||
s.ID, mgr.ReadPID(s.ID), mgr.InstanceCount(s.ID), mgr.LogPath(s.ID))
|
||||
started++
|
||||
}
|
||||
|
||||
if started == 0 {
|
||||
fmt.Println("Nothing started.")
|
||||
}
|
||||
return nil
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// ── stop ──────────────────────────────────────────────────────────────────
|
||||
|
||||
func stopCmd(mgr *process.Manager) *cobra.Command {
|
||||
return &cobra.Command{
|
||||
Use: "stop [agent-id...]",
|
||||
Short: "Stop one or all running agents",
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
statuses, err := mgr.StatusAll()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
targets := filterTargets(statuses, args)
|
||||
if len(targets) == 0 {
|
||||
return fmt.Errorf("no matching agents found")
|
||||
}
|
||||
|
||||
stopped := 0
|
||||
for _, s := range targets {
|
||||
if !s.Running {
|
||||
fmt.Printf("skip %-20s (not running)\n", s.ID)
|
||||
continue
|
||||
}
|
||||
pid := s.PID
|
||||
if err := mgr.Stop(s.ID); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "fail %-20s %v\n", s.ID, err)
|
||||
continue
|
||||
}
|
||||
fmt.Printf("stop %-20s stopped PID %d\n", s.ID, pid)
|
||||
stopped++
|
||||
}
|
||||
|
||||
if stopped == 0 {
|
||||
fmt.Println("Nothing stopped.")
|
||||
}
|
||||
return nil
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// ── reload ────────────────────────────────────────────────────────────────
|
||||
|
||||
func reloadCmd(mgr *process.Manager) *cobra.Command {
|
||||
return &cobra.Command{
|
||||
Use: "reload [agent-id]",
|
||||
Short: "Hot-reload an agent (or all agents) without stopping the launcher",
|
||||
Long: `Sends SIGHUP to the running launcher, which triggers a hot-reload.
|
||||
If an agent-id is given, only that agent is reloaded.
|
||||
If no agent-id is given, all agents are reloaded.
|
||||
|
||||
The launcher must be running. Use 'agentctl start' first if needed.`,
|
||||
Args: cobra.MaximumNArgs(1),
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
pid := mgr.UnifiedPID()
|
||||
if pid <= 0 {
|
||||
return fmt.Errorf("launcher is not running")
|
||||
}
|
||||
|
||||
target := ""
|
||||
if len(args) == 1 {
|
||||
target = args[0]
|
||||
}
|
||||
|
||||
if target != "" {
|
||||
if err := os.WriteFile("run/reload.txt", []byte(target), 0o644); err != nil {
|
||||
return fmt.Errorf("write reload target: %w", err)
|
||||
}
|
||||
fmt.Printf("reload %-20s sending SIGHUP to PID %d\n", target, pid)
|
||||
} else {
|
||||
// Remove any stale reload.txt so SIGHUP reloads all agents.
|
||||
_ = os.Remove("run/reload.txt")
|
||||
fmt.Printf("reload %-20s sending SIGHUP to PID %d\n", "(all)", pid)
|
||||
}
|
||||
|
||||
if err := syscall.Kill(pid, syscall.SIGHUP); err != nil {
|
||||
return fmt.Errorf("kill -HUP %d: %w", pid, err)
|
||||
}
|
||||
return nil
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// ── remove ────────────────────────────────────────────────────────────────
|
||||
|
||||
func removeCmd(mgr *process.Manager) *cobra.Command {
|
||||
return &cobra.Command{
|
||||
Use: "remove <agent-id>",
|
||||
Short: "Disable an agent (sets enabled: false). Does not delete data.",
|
||||
Args: cobra.ExactArgs(1),
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
id := args[0]
|
||||
|
||||
statuses, err := mgr.StatusAll()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var target *process.AgentStatus
|
||||
for i := range statuses {
|
||||
if statuses[i].ID == id {
|
||||
target = &statuses[i]
|
||||
break
|
||||
}
|
||||
}
|
||||
if target == nil {
|
||||
return fmt.Errorf("agent %q not found", id)
|
||||
}
|
||||
|
||||
if target.Running {
|
||||
if err := mgr.Stop(id); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "warn stop failed: %v\n", err)
|
||||
} else {
|
||||
fmt.Printf("stop %-20s stopped PID %d\n", id, target.PID)
|
||||
}
|
||||
}
|
||||
|
||||
if err := setEnabled(target.ConfigPath, false); err != nil {
|
||||
return fmt.Errorf("update config: %w", err)
|
||||
}
|
||||
|
||||
fmt.Printf("ok %-20s marked as disabled in %s\n", id, target.ConfigPath)
|
||||
fmt.Printf(" Data preserved at agents/%s/data/\n", id)
|
||||
return nil
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// ── helpers ───────────────────────────────────────────────────────────────
|
||||
|
||||
func filterTargets(statuses []process.AgentStatus, ids []string) []process.AgentStatus {
|
||||
if len(ids) == 0 {
|
||||
return statuses
|
||||
}
|
||||
set := make(map[string]bool, len(ids))
|
||||
for _, id := range ids {
|
||||
set[id] = true
|
||||
}
|
||||
var out []process.AgentStatus
|
||||
for _, s := range statuses {
|
||||
if set[s.ID] {
|
||||
out = append(out, s)
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func statusLabel(s process.AgentStatus) string {
|
||||
switch {
|
||||
case !s.Enabled:
|
||||
return "disabled"
|
||||
case s.Running:
|
||||
if s.Instances > 1 {
|
||||
return fmt.Sprintf("● running(%d)", s.Instances)
|
||||
}
|
||||
return "● running"
|
||||
default:
|
||||
return "○ stopped"
|
||||
}
|
||||
}
|
||||
|
||||
func truncate(s string, max int) string {
|
||||
if len(s) <= max {
|
||||
return s
|
||||
}
|
||||
return s[:max-1] + "…"
|
||||
}
|
||||
|
||||
// setEnabled flips `enabled: true/false` in the agent section of the YAML.
|
||||
func setEnabled(configPath string, enabled bool) error {
|
||||
raw, err := os.ReadFile(configPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
current := "enabled: true"
|
||||
replacement := "enabled: false"
|
||||
if enabled {
|
||||
current = "enabled: false"
|
||||
replacement = "enabled: true"
|
||||
}
|
||||
|
||||
updated := strings.Replace(string(raw), current, replacement, 1)
|
||||
if updated == string(raw) {
|
||||
return nil
|
||||
}
|
||||
|
||||
return os.WriteFile(configPath, []byte(updated), 0o644)
|
||||
}
|
||||
@@ -0,0 +1,85 @@
|
||||
// Command dashboard provides an interactive TUI for managing bot agents.
|
||||
//
|
||||
// Usage:
|
||||
//
|
||||
// dashboard # launch the interactive TUI
|
||||
// go run ./cmd/dashboard
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
tea "github.com/charmbracelet/bubbletea"
|
||||
|
||||
puretui "github.com/enmanuel/agents/pkg/tui"
|
||||
"github.com/enmanuel/agents/shell/process"
|
||||
shelltui "github.com/enmanuel/agents/shell/tui"
|
||||
)
|
||||
|
||||
const (
|
||||
runDir = "run"
|
||||
agentsGlob = "agents/*/config.yaml"
|
||||
)
|
||||
|
||||
func main() {
|
||||
_ = os.MkdirAll(runDir, 0o755)
|
||||
|
||||
mgr := process.NewManager(runDir, agentsGlob, "")
|
||||
adapter := shelltui.NewAdapter(mgr)
|
||||
|
||||
p := tea.NewProgram(newBridge(adapter), tea.WithAltScreen())
|
||||
if _, err := p.Run(); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
// bridge implements tea.Model and connects the pure Update with the impure Adapter.
|
||||
type bridge struct {
|
||||
model puretui.Model
|
||||
adapter *shelltui.Adapter
|
||||
}
|
||||
|
||||
func newBridge(adapter *shelltui.Adapter) bridge {
|
||||
return bridge{
|
||||
model: puretui.InitialModel(),
|
||||
adapter: adapter,
|
||||
}
|
||||
}
|
||||
|
||||
func (b bridge) Init() tea.Cmd {
|
||||
return b.adapter.RunIntent(puretui.Intent{Kind: puretui.IntentLoadAgents})
|
||||
}
|
||||
|
||||
func (b bridge) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
||||
// Convert tea messages to pure messages.
|
||||
var pureMsg interface{}
|
||||
switch m := msg.(type) {
|
||||
case tea.KeyMsg:
|
||||
pureMsg = puretui.KeyMsg{Str: m.String()}
|
||||
case tea.WindowSizeMsg:
|
||||
pureMsg = puretui.WindowSizeMsg{Width: m.Width, Height: m.Height}
|
||||
default:
|
||||
// MsgAgentsLoaded, MsgActionDone, MsgLogsLoaded, MsgTick pass through.
|
||||
pureMsg = msg
|
||||
}
|
||||
|
||||
// Pure update: no side effects.
|
||||
newModel, intents := puretui.Update(b.model, pureMsg)
|
||||
b.model = newModel
|
||||
|
||||
// Convert pure intents to impure tea.Cmds.
|
||||
cmds := make([]tea.Cmd, 0, len(intents))
|
||||
for _, intent := range intents {
|
||||
if cmd := b.adapter.RunIntent(intent); cmd != nil {
|
||||
cmds = append(cmds, cmd)
|
||||
}
|
||||
}
|
||||
|
||||
return b, tea.Batch(cmds...)
|
||||
}
|
||||
|
||||
func (b bridge) View() string {
|
||||
return puretui.View(b.model)
|
||||
}
|
||||
@@ -1,267 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"net"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
cs "fn-registry/functions/cybersecurity"
|
||||
|
||||
"github.com/enmanuel/unibus/pkg/blobstore"
|
||||
"github.com/enmanuel/unibus/pkg/client"
|
||||
"github.com/enmanuel/unibus/pkg/embeddednats"
|
||||
"github.com/enmanuel/unibus/pkg/frame"
|
||||
"github.com/enmanuel/unibus/pkg/membership"
|
||||
"github.com/enmanuel/unibus/pkg/room"
|
||||
)
|
||||
|
||||
// testHarness boots an isolated embedded NATS server + in-process membershipd on
|
||||
// their OWN free ports (never the productive 8470/4250 nor the user's running
|
||||
// playground on 7700/8480/4260) and tears everything down by handle. This mirrors
|
||||
// the unibus client_test harness so the echobot is exercised against the real bus.
|
||||
type testHarness struct {
|
||||
natsURL string
|
||||
ctrlURL string
|
||||
}
|
||||
|
||||
func freePort(t *testing.T) int {
|
||||
t.Helper()
|
||||
l, err := net.Listen("tcp", "127.0.0.1:0")
|
||||
if err != nil {
|
||||
t.Fatalf("free port: %v", err)
|
||||
}
|
||||
defer l.Close()
|
||||
return l.Addr().(*net.TCPAddr).Port
|
||||
}
|
||||
|
||||
func newHarness(t *testing.T) *testHarness {
|
||||
t.Helper()
|
||||
dir := t.TempDir()
|
||||
|
||||
ns, err := embeddednats.Start(filepath.Join(dir, "js"), freePort(t))
|
||||
if err != nil {
|
||||
t.Fatalf("embedded nats: %v", err)
|
||||
}
|
||||
|
||||
store, err := membership.Open(filepath.Join(dir, "unibus.db"))
|
||||
if err != nil {
|
||||
ns.Shutdown()
|
||||
t.Fatalf("membership store: %v", err)
|
||||
}
|
||||
blobs, err := blobstore.New(filepath.Join(dir, "blobs"))
|
||||
if err != nil {
|
||||
store.Close()
|
||||
ns.Shutdown()
|
||||
t.Fatalf("blob store: %v", err)
|
||||
}
|
||||
srv := membership.NewServer(store, blobs)
|
||||
httpts := httptest.NewServer(srv)
|
||||
|
||||
t.Cleanup(func() {
|
||||
httpts.Close()
|
||||
store.Close()
|
||||
ns.Shutdown()
|
||||
ns.WaitForShutdown()
|
||||
})
|
||||
|
||||
return &testHarness{natsURL: embeddednats.ClientURL(ns), ctrlURL: httpts.URL}
|
||||
}
|
||||
|
||||
func waitHealth(t *testing.T, ctrlURL string) {
|
||||
t.Helper()
|
||||
deadline := time.Now().Add(3 * time.Second)
|
||||
for time.Now().Before(deadline) {
|
||||
resp, err := http.Get(ctrlURL + "/healthz")
|
||||
if err == nil && resp.StatusCode == 200 {
|
||||
resp.Body.Close()
|
||||
return
|
||||
}
|
||||
if resp != nil {
|
||||
resp.Body.Close()
|
||||
}
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
}
|
||||
t.Fatalf("membershipd never became healthy")
|
||||
}
|
||||
|
||||
func mustIdentity(t *testing.T) cs.Identity {
|
||||
t.Helper()
|
||||
id, err := cs.GenerateIdentity()
|
||||
if err != nil {
|
||||
t.Fatalf("generate identity: %v", err)
|
||||
}
|
||||
return id
|
||||
}
|
||||
|
||||
func waitFor(mu *sync.Mutex, slice *[]string, pred func([]string) bool, timeout time.Duration) bool {
|
||||
deadline := time.Now().Add(timeout)
|
||||
for time.Now().Before(deadline) {
|
||||
mu.Lock()
|
||||
cp := append([]string(nil), (*slice)...)
|
||||
mu.Unlock()
|
||||
if pred(cp) {
|
||||
return true
|
||||
}
|
||||
time.Sleep(25 * time.Millisecond)
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func snapshot(mu *sync.Mutex, slice *[]string) []string {
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
return append([]string(nil), (*slice)...)
|
||||
}
|
||||
|
||||
func contains(rs []string, want string) bool {
|
||||
for _, r := range rs {
|
||||
if r == want {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// startEchobot wires up the echobot's chat + rpc behaviour against the given bus,
|
||||
// using the same logic the main() entry point runs. It returns the bot client and
|
||||
// its endpoint id so callers can assert the anti-loop guard. Cleanup is registered
|
||||
// on the test.
|
||||
func startEchobot(t *testing.T, h *testHarness, roomSubject, rpcSubject string) (*client.Client, string) {
|
||||
t.Helper()
|
||||
bot, err := client.New(h.natsURL, h.ctrlURL, mustIdentity(t))
|
||||
if err != nil {
|
||||
t.Fatalf("connect echobot: %v", err)
|
||||
}
|
||||
selfID := bot.Endpoint().ID
|
||||
|
||||
chatRoom, err := bot.CreateRoom(roomSubject, room.ModeNATS)
|
||||
if err != nil {
|
||||
bot.Close()
|
||||
t.Fatalf("echobot create chat room: %v", err)
|
||||
}
|
||||
chatSub, err := bot.Subscribe(chatRoom, func(f frame.Frame, plaintext []byte) {
|
||||
if f.Sender == selfID {
|
||||
return // anti-loop guard
|
||||
}
|
||||
_ = bot.Publish(chatRoom, []byte("echo: "+string(plaintext)))
|
||||
})
|
||||
if err != nil {
|
||||
bot.Close()
|
||||
t.Fatalf("echobot subscribe chat: %v", err)
|
||||
}
|
||||
rpcSub, err := bot.Reply(rpcSubject, func(body []byte) []byte {
|
||||
return []byte("echo: " + string(body))
|
||||
})
|
||||
if err != nil {
|
||||
chatSub.Unsubscribe()
|
||||
bot.Close()
|
||||
t.Fatalf("echobot reply: %v", err)
|
||||
}
|
||||
|
||||
t.Cleanup(func() {
|
||||
rpcSub.Unsubscribe()
|
||||
chatSub.Unsubscribe()
|
||||
bot.Close()
|
||||
})
|
||||
return bot, selfID
|
||||
}
|
||||
|
||||
// TestChatEcho: a "human" peer publishes "hola" on the echo subject; the echobot
|
||||
// replies "echo: hola". Asserts the human receives the echo and that the echobot
|
||||
// never echoes its own messages (no infinite loop).
|
||||
func TestChatEcho(t *testing.T) {
|
||||
h := newHarness(t)
|
||||
waitHealth(t, h.ctrlURL)
|
||||
|
||||
const subject = "room.echo.test"
|
||||
_, botID := startEchobot(t, h, subject, "rpc.echo.test")
|
||||
|
||||
human, err := client.New(h.natsURL, h.ctrlURL, mustIdentity(t))
|
||||
if err != nil {
|
||||
t.Fatalf("connect human: %v", err)
|
||||
}
|
||||
defer human.Close()
|
||||
|
||||
humanRoom, err := human.CreateRoom(subject, room.ModeNATS)
|
||||
if err != nil {
|
||||
t.Fatalf("human create room: %v", err)
|
||||
}
|
||||
|
||||
var mu sync.Mutex
|
||||
var received []string
|
||||
var echoSenders []string
|
||||
hsub, err := human.Subscribe(humanRoom, func(f frame.Frame, plaintext []byte) {
|
||||
mu.Lock()
|
||||
received = append(received, string(plaintext))
|
||||
if string(plaintext) == "echo: hola" {
|
||||
echoSenders = append(echoSenders, f.Sender)
|
||||
}
|
||||
mu.Unlock()
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("human subscribe: %v", err)
|
||||
}
|
||||
defer hsub.Unsubscribe()
|
||||
|
||||
// Let both subscriptions settle before publishing.
|
||||
time.Sleep(200 * time.Millisecond)
|
||||
|
||||
if err := human.Publish(humanRoom, []byte("hola")); err != nil {
|
||||
t.Fatalf("human publish: %v", err)
|
||||
}
|
||||
|
||||
if !waitFor(&mu, &received, func(rs []string) bool { return contains(rs, "echo: hola") }, 2*time.Second) {
|
||||
t.Fatalf("human never received the echo; got %v", snapshot(&mu, &received))
|
||||
}
|
||||
|
||||
// The echo must come from the bot, not the human (sanity on routing).
|
||||
mu.Lock()
|
||||
senders := append([]string(nil), echoSenders...)
|
||||
mu.Unlock()
|
||||
for _, s := range senders {
|
||||
if s != botID {
|
||||
t.Fatalf("echo came from %q, expected echobot %q", s, botID)
|
||||
}
|
||||
}
|
||||
|
||||
// Anti-loop: give the bus time to spin if the guard were broken, then assert
|
||||
// the bot did not re-echo its own "echo: hola" into "echo: echo: hola".
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
for _, r := range snapshot(&mu, &received) {
|
||||
if r == "echo: echo: hola" {
|
||||
t.Fatalf("anti-loop guard broken: bot echoed its own message (%q)", r)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestRPCEcho: a process peer issues Request(rpc-subject, "ping") and gets back
|
||||
// "echo: ping". The unibus client library exposes request/reply, so this mode is
|
||||
// fully supported (see client.go: Client.Request / Client.Reply).
|
||||
func TestRPCEcho(t *testing.T) {
|
||||
h := newHarness(t)
|
||||
waitHealth(t, h.ctrlURL)
|
||||
|
||||
const rpcSubject = "rpc.echo.test"
|
||||
startEchobot(t, h, "room.echo.test", rpcSubject)
|
||||
|
||||
caller, err := client.New(h.natsURL, h.ctrlURL, mustIdentity(t))
|
||||
if err != nil {
|
||||
t.Fatalf("connect caller: %v", err)
|
||||
}
|
||||
defer caller.Close()
|
||||
|
||||
// Give the responder time to subscribe.
|
||||
time.Sleep(150 * time.Millisecond)
|
||||
|
||||
resp, err := caller.Request(rpcSubject, []byte("ping"), 2*time.Second)
|
||||
if err != nil {
|
||||
t.Fatalf("rpc request: %v", err)
|
||||
}
|
||||
if got, want := string(resp), "echo: ping"; got != want {
|
||||
t.Fatalf("rpc echo mismatch: got %q want %q", got, want)
|
||||
}
|
||||
}
|
||||
@@ -1,99 +0,0 @@
|
||||
// Command echobot is the first bot of the unibots platform: a bot WITHOUT an
|
||||
// LLM that demonstrates the two conversation patterns of the unibus bus.
|
||||
//
|
||||
// - Chat mode (bot<->human): the bot joins a cleartext room (room.ModeNATS)
|
||||
// on a shared subject and echoes back every message it sees, prefixed with
|
||||
// "echo: ". It never echoes its own messages (anti-loop guard), so two
|
||||
// echobots on the same subject do not spin forever.
|
||||
// - RPC mode (bot<->process): the bot registers a NATS request/reply
|
||||
// responder on an rpc.* subject that returns "echo: " + the request body.
|
||||
//
|
||||
// echobot is application code that consumes the unibus client library; it is
|
||||
// not a reusable registry function. The bus is the neighbouring `unibus` app.
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"log"
|
||||
"os"
|
||||
"os/signal"
|
||||
"syscall"
|
||||
|
||||
"github.com/enmanuel/unibus/pkg/client"
|
||||
"github.com/enmanuel/unibus/pkg/frame"
|
||||
"github.com/enmanuel/unibus/pkg/room"
|
||||
)
|
||||
|
||||
func main() {
|
||||
var (
|
||||
natsURL = flag.String("nats-url", "nats://127.0.0.1:4250", "NATS data-plane URL of the unibus bus")
|
||||
ctrlURL = flag.String("ctrl-url", "http://127.0.0.1:8470", "membershipd control-plane HTTP URL")
|
||||
roomSubject = flag.String("room-subject", "room.echo", "cleartext chat subject the bot listens on (bot<->human)")
|
||||
rpcSubject = flag.String("rpc-subject", "rpc.echo", "request/reply subject the bot responds on (bot<->process)")
|
||||
idFile = flag.String("id-file", "./local_files/echobot.id", "path to the bot's long-term identity file")
|
||||
)
|
||||
flag.Parse()
|
||||
|
||||
logger := log.New(os.Stderr, "[echobot] ", log.LstdFlags|log.Lmsgprefix)
|
||||
|
||||
id, err := client.LoadOrCreateIdentity(*idFile)
|
||||
if err != nil {
|
||||
logger.Fatalf("load/create identity %q: %v", *idFile, err)
|
||||
}
|
||||
|
||||
c, err := client.New(*natsURL, *ctrlURL, id)
|
||||
if err != nil {
|
||||
logger.Fatalf("connect to bus (nats=%s ctrl=%s): %v", *natsURL, *ctrlURL, err)
|
||||
}
|
||||
defer c.Close()
|
||||
|
||||
self := c.Endpoint()
|
||||
|
||||
// --- Chat mode (bot<->human) --------------------------------------------
|
||||
// A cleartext room mapped to the shared subject. NATS fans out by subject,
|
||||
// so the bot shares the conversation with any peer on the same subject even
|
||||
// if their room ids differ (same pattern as unibus worker/chat).
|
||||
chatRoom, err := c.CreateRoom(*roomSubject, room.ModeNATS)
|
||||
if err != nil {
|
||||
logger.Fatalf("create chat room on subject %q: %v", *roomSubject, err)
|
||||
}
|
||||
|
||||
chatSub, err := c.Subscribe(chatRoom, func(f frame.Frame, plaintext []byte) {
|
||||
// Anti-loop guard: never echo our own messages, or two echobots (or a
|
||||
// single bot seeing its own publish) would loop forever.
|
||||
if f.Sender == self.ID {
|
||||
return
|
||||
}
|
||||
reply := "echo: " + string(plaintext)
|
||||
if err := c.Publish(chatRoom, []byte(reply)); err != nil {
|
||||
logger.Printf("chat: publish echo failed: %v", err)
|
||||
return
|
||||
}
|
||||
logger.Printf("chat: echoed %q -> %q (from %s)", string(plaintext), reply, f.Sender)
|
||||
})
|
||||
if err != nil {
|
||||
logger.Fatalf("subscribe to chat room: %v", err)
|
||||
}
|
||||
defer chatSub.Unsubscribe()
|
||||
|
||||
// --- RPC mode (bot<->process) -------------------------------------------
|
||||
// NATS request/reply: a responder on the rpc subject returns "echo: " + body.
|
||||
rpcSub, err := c.Reply(*rpcSubject, func(body []byte) []byte {
|
||||
reply := "echo: " + string(body)
|
||||
logger.Printf("rpc: %q -> %q", string(body), reply)
|
||||
return []byte(reply)
|
||||
})
|
||||
if err != nil {
|
||||
logger.Fatalf("register rpc responder on %q: %v", *rpcSubject, err)
|
||||
}
|
||||
defer rpcSub.Unsubscribe()
|
||||
|
||||
logger.Printf("echobot up: endpoint=%s bus(nats=%s ctrl=%s) chat-subject=%q rpc-subject=%q",
|
||||
self.ID, *natsURL, *ctrlURL, *roomSubject, *rpcSubject)
|
||||
|
||||
// --- Loop until SIGINT/SIGTERM, then shut down cleanly ------------------
|
||||
sig := make(chan os.Signal, 1)
|
||||
signal.Notify(sig, syscall.SIGINT, syscall.SIGTERM)
|
||||
s := <-sig
|
||||
logger.Printf("received %v, shutting down", s)
|
||||
}
|
||||
@@ -0,0 +1,252 @@
|
||||
// Command launcher starts one or more agents from their config files.
|
||||
//
|
||||
// Usage:
|
||||
//
|
||||
// go run ./cmd/launcher # auto-discovers agents/*/config.yaml
|
||||
// go run ./cmd/launcher -c agents/assistant/config.yaml
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log/slog"
|
||||
"os"
|
||||
"os/signal"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
|
||||
"github.com/enmanuel/agents/agents"
|
||||
"github.com/enmanuel/agents/internal/config"
|
||||
"github.com/enmanuel/agents/pkg/decision"
|
||||
pksecurity "github.com/enmanuel/agents/pkg/security"
|
||||
"github.com/enmanuel/agents/shell/bus"
|
||||
agentlog "github.com/enmanuel/agents/shell/logger"
|
||||
shellsecurity "github.com/enmanuel/agents/shell/security"
|
||||
|
||||
// Blank imports: each agent self-registers its rules via init().
|
||||
_ "github.com/enmanuel/agents/agents/asistente-2"
|
||||
_ "github.com/enmanuel/agents/agents/assistant-bot"
|
||||
_ "github.com/enmanuel/agents/agents/meteorologo"
|
||||
)
|
||||
|
||||
func main() {
|
||||
var (
|
||||
configPaths []string
|
||||
logLevel string
|
||||
logDir string
|
||||
)
|
||||
|
||||
root := &cobra.Command{
|
||||
Use: "launcher",
|
||||
Short: "Start Matrix agents from config files",
|
||||
PersistentPreRunE: func(cmd *cobra.Command, args []string) error {
|
||||
if len(configPaths) == 0 {
|
||||
matches, _ := filepath.Glob("agents/*/config.yaml")
|
||||
configPaths = matches
|
||||
}
|
||||
return nil
|
||||
},
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
lvl := parseLogLevel(logLevel)
|
||||
|
||||
// ── Launcher-level logger ──
|
||||
logger, launcherCleanup, err := agentlog.NewAgentLogger(agentlog.LoggerConfig{
|
||||
BaseDir: logDir,
|
||||
AgentID: "launcher",
|
||||
Level: lvl,
|
||||
})
|
||||
if err != nil {
|
||||
// Fallback to stdout if file logger fails.
|
||||
logger = newLogger(logLevel)
|
||||
logger.Warn("could not create file logger, falling back to stdout", "err", err)
|
||||
launcherCleanup = func() {}
|
||||
}
|
||||
defer launcherCleanup()
|
||||
|
||||
if len(configPaths) == 0 {
|
||||
logger.Warn("no agent configs found — nothing to start")
|
||||
return nil
|
||||
}
|
||||
|
||||
ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
|
||||
defer stop()
|
||||
|
||||
// ── Load centralized security policy ──
|
||||
secPolicy, secErr := shellsecurity.Load("security/")
|
||||
if secErr != nil {
|
||||
logger.Warn("security policy load failed, using empty policy (open access)", "err", secErr)
|
||||
secPolicy = pksecurity.SecurityPolicy{}
|
||||
} else {
|
||||
logger.Info("security policy loaded",
|
||||
"user_groups", len(secPolicy.UserGroups),
|
||||
"agent_groups", len(secPolicy.AgentGroups),
|
||||
"policies", len(secPolicy.Policies),
|
||||
)
|
||||
}
|
||||
|
||||
// ── Shared bus for inter-agent communication ──
|
||||
agentBus := bus.New(logger)
|
||||
|
||||
// NOTE: the multi-bot orchestrator is parked (Matrix-out). Its room
|
||||
// discovery was Matrix-intrinsic and has been removed; it is no longer
|
||||
// wired into the launcher. Re-introducing it over unibus is a later step.
|
||||
|
||||
// ── Shared dependencies for agent registry ──
|
||||
deps := &launchDeps{
|
||||
agentBus: agentBus,
|
||||
logDir: logDir,
|
||||
logLevel: lvl,
|
||||
parentCtx: ctx,
|
||||
secPolicy: secPolicy,
|
||||
}
|
||||
registry := newAgentRegistry(deps)
|
||||
|
||||
// ── SIGHUP: hot-reload individual agent or all agents ──
|
||||
sighup := make(chan os.Signal, 1)
|
||||
signal.Notify(sighup, syscall.SIGHUP)
|
||||
go func() {
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case _, ok := <-sighup:
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
id := readReloadTarget("run/reload.txt")
|
||||
// Remove the target file after reading so it doesn't
|
||||
// affect the next SIGHUP.
|
||||
_ = os.Remove("run/reload.txt")
|
||||
if id == "" {
|
||||
logger.Info("sighup: reloading all agents")
|
||||
registry.reloadAll(rulesFor)
|
||||
} else {
|
||||
logger.Info("sighup: reloading agent", "id", id)
|
||||
registry.reload(id, rulesFor)
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
// ── Start normal agents ──
|
||||
for _, path := range configPaths {
|
||||
path := path
|
||||
cfg, err := config.Load(path)
|
||||
if err != nil {
|
||||
logger.Error("failed to load config", "path", path, "err", err)
|
||||
continue
|
||||
}
|
||||
if !cfg.Agent.Enabled {
|
||||
logger.Info("agent disabled, skipping", "id", cfg.Agent.ID)
|
||||
continue
|
||||
}
|
||||
if cfg.Agent.Template {
|
||||
logger.Info("agent is template, skipping", "id", cfg.Agent.ID)
|
||||
continue
|
||||
}
|
||||
|
||||
// Per-agent logger → writes to logs/<agent-id>/YYYY-MM-DD.jsonl
|
||||
agentLogger, agentCleanup, aErr := agentlog.NewAgentLogger(agentlog.LoggerConfig{
|
||||
BaseDir: logDir,
|
||||
AgentID: cfg.Agent.ID,
|
||||
Level: lvl,
|
||||
})
|
||||
if aErr != nil {
|
||||
logger.Warn("agent file logger failed, using launcher logger", "agent", cfg.Agent.ID, "err", aErr)
|
||||
agentLogger = logger.With("agent", cfg.Agent.ID)
|
||||
agentCleanup = func() {}
|
||||
}
|
||||
|
||||
// Branch: robot (command-only, lightweight) vs agent (full runtime).
|
||||
var runner agents.Runner
|
||||
|
||||
if cfg.Agent.Type == "robot" {
|
||||
robot, rErr := agents.NewRobot(cfg, agentLogger)
|
||||
if rErr != nil {
|
||||
logger.Error("failed to create robot", "id", cfg.Agent.ID, "err", rErr)
|
||||
agentCleanup()
|
||||
continue
|
||||
}
|
||||
runner = robot
|
||||
agentLogger.Info("created robot", "id", cfg.Agent.ID)
|
||||
} else {
|
||||
rules := rulesFor(cfg.Agent.ID, logger)
|
||||
|
||||
// Resolve centralized ACL for this agent
|
||||
agentACL := pksecurity.ResolveACL(cfg.Agent.ID, deps.secPolicy)
|
||||
agentLogger.Debug("resolved acl for agent",
|
||||
"agent", cfg.Agent.ID,
|
||||
"acl_empty", agentACL.Empty(),
|
||||
)
|
||||
|
||||
a, cErr := agents.New(cfg, rules, agentACL, agentLogger)
|
||||
if cErr != nil {
|
||||
logger.Error("failed to create agent", "id", cfg.Agent.ID, "err", cErr)
|
||||
agentCleanup()
|
||||
continue
|
||||
}
|
||||
|
||||
// Connect agent to the inter-agent bus.
|
||||
a.SetBus(agentBus)
|
||||
|
||||
runner = a
|
||||
}
|
||||
|
||||
registry.register(&runningAgent{
|
||||
runner: runner,
|
||||
cfg: cfg,
|
||||
cfgPath: path,
|
||||
logger: agentLogger,
|
||||
logCleanup: agentCleanup,
|
||||
})
|
||||
}
|
||||
|
||||
registry.waitAll()
|
||||
registry.cleanupLogs()
|
||||
logger.Info("all agents stopped")
|
||||
return nil
|
||||
},
|
||||
}
|
||||
|
||||
root.Flags().StringSliceVarP(&configPaths, "config", "c", nil,
|
||||
"Agent config file(s). If omitted, discovers all agents/*/config.yaml")
|
||||
root.Flags().StringVar(&logLevel, "log-level", "info",
|
||||
"Log level: debug | info | warn | error")
|
||||
root.Flags().StringVar(&logDir, "log-dir", "logs",
|
||||
`Log directory (logs/<agent>/YYYY-MM-DD.jsonl). Use "stdout" for console only`)
|
||||
|
||||
if err := root.Execute(); err != nil {
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
// rulesFor retrieves the rule factory for the given agent ID from the
|
||||
// global registry (populated by init() in each agent package).
|
||||
// Returns nil if no rules are registered (command-only bot).
|
||||
func rulesFor(agentID string, logger *slog.Logger) []decision.Rule {
|
||||
factory := agents.GetRules(agentID)
|
||||
if factory == nil {
|
||||
logger.Warn("no rules registered for agent, using empty ruleset (command-only)", "id", agentID)
|
||||
return nil
|
||||
}
|
||||
return factory()
|
||||
}
|
||||
|
||||
func parseLogLevel(level string) slog.Level {
|
||||
switch level {
|
||||
case "debug":
|
||||
return slog.LevelDebug
|
||||
case "warn":
|
||||
return slog.LevelWarn
|
||||
case "error":
|
||||
return slog.LevelError
|
||||
default:
|
||||
return slog.LevelInfo
|
||||
}
|
||||
}
|
||||
|
||||
// newLogger creates a stdout-only JSON logger (fallback when file logger fails).
|
||||
func newLogger(level string) *slog.Logger {
|
||||
return slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{Level: parseLogLevel(level)}))
|
||||
}
|
||||
@@ -0,0 +1,245 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log/slog"
|
||||
"os"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/enmanuel/agents/agents"
|
||||
"github.com/enmanuel/agents/internal/config"
|
||||
"github.com/enmanuel/agents/pkg/decision"
|
||||
pksecurity "github.com/enmanuel/agents/pkg/security"
|
||||
"github.com/enmanuel/agents/shell/bus"
|
||||
agentlog "github.com/enmanuel/agents/shell/logger"
|
||||
)
|
||||
|
||||
// runningAgent holds a live runner (Agent or Robot) and the metadata needed to recreate it.
|
||||
type runningAgent struct {
|
||||
runner agents.Runner
|
||||
cfg *config.AgentConfig
|
||||
cfgPath string
|
||||
logger *slog.Logger
|
||||
logCleanup func()
|
||||
}
|
||||
|
||||
// launchDeps holds shared resources needed to start/reload agents.
|
||||
type launchDeps struct {
|
||||
agentBus *bus.Bus
|
||||
logDir string
|
||||
logLevel slog.Level
|
||||
parentCtx context.Context
|
||||
secPolicy pksecurity.SecurityPolicy // centralized security policy loaded from security/
|
||||
}
|
||||
|
||||
// agentRegistry tracks all running agents by ID, enabling individual hot-reload.
|
||||
type agentRegistry struct {
|
||||
mu sync.Mutex
|
||||
agents map[string]*runningAgent
|
||||
deps *launchDeps
|
||||
}
|
||||
|
||||
func newAgentRegistry(deps *launchDeps) *agentRegistry {
|
||||
return &agentRegistry{
|
||||
agents: make(map[string]*runningAgent),
|
||||
deps: deps,
|
||||
}
|
||||
}
|
||||
|
||||
// register adds a running agent/robot to the registry and starts its goroutine.
|
||||
func (r *agentRegistry) register(ra *runningAgent) {
|
||||
r.mu.Lock()
|
||||
r.agents[ra.cfg.Agent.ID] = ra
|
||||
r.mu.Unlock()
|
||||
|
||||
runtimeType := ra.cfg.Agent.Type
|
||||
if runtimeType == "" {
|
||||
runtimeType = "agent"
|
||||
}
|
||||
|
||||
go func() {
|
||||
ra.logger.Info("runner started", "type", runtimeType)
|
||||
if err := ra.runner.Run(r.deps.parentCtx); err != nil {
|
||||
ra.logger.Error("runner stopped with error", "err", err, "type", runtimeType)
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
// stopAndWait stops a running agent/robot and waits for it to finish.
|
||||
// Caller must NOT hold r.mu.
|
||||
func (r *agentRegistry) stopAndWait(id string) {
|
||||
r.mu.Lock()
|
||||
ra, ok := r.agents[id]
|
||||
r.mu.Unlock()
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
|
||||
ra.runner.Stop()
|
||||
select {
|
||||
case <-ra.runner.Done():
|
||||
case <-time.After(10 * time.Second):
|
||||
ra.logger.Warn("runner did not stop within 10s, forcing", "id", id)
|
||||
}
|
||||
|
||||
// Unsubscribe from bus so no stale channel remains.
|
||||
r.deps.agentBus.Unsubscribe(bus.AgentID(id))
|
||||
}
|
||||
|
||||
// reload stops an agent, re-reads its config, recreates it, and restarts it.
|
||||
func (r *agentRegistry) reload(id string, rulesFor func(string, *slog.Logger) []decision.Rule) {
|
||||
r.mu.Lock()
|
||||
ra, ok := r.agents[id]
|
||||
r.mu.Unlock()
|
||||
if !ok {
|
||||
slog.Warn("reload: agent not found", "id", id)
|
||||
return
|
||||
}
|
||||
|
||||
cfgPath := ra.cfgPath
|
||||
oldCleanup := ra.logCleanup
|
||||
|
||||
ra.logger.Info("agent_reload_start", "id", id)
|
||||
|
||||
// 1. Stop current instance and wait.
|
||||
r.stopAndWait(id)
|
||||
|
||||
// 2. Cleanup old log writer.
|
||||
if oldCleanup != nil {
|
||||
oldCleanup()
|
||||
}
|
||||
|
||||
// 3. Re-read config.
|
||||
cfg, err := config.Load(cfgPath)
|
||||
if err != nil {
|
||||
slog.Error("reload: failed to load config", "path", cfgPath, "err", err)
|
||||
return
|
||||
}
|
||||
if !cfg.Agent.Enabled {
|
||||
slog.Info("reload: agent is disabled, not restarting", "id", id)
|
||||
r.mu.Lock()
|
||||
delete(r.agents, id)
|
||||
r.mu.Unlock()
|
||||
return
|
||||
}
|
||||
|
||||
// 4. New per-agent logger.
|
||||
newLogger, newCleanup, aErr := agentlog.NewAgentLogger(agentlog.LoggerConfig{
|
||||
BaseDir: r.deps.logDir,
|
||||
AgentID: cfg.Agent.ID,
|
||||
Level: r.deps.logLevel,
|
||||
})
|
||||
if aErr != nil {
|
||||
newLogger = slog.Default().With("agent", cfg.Agent.ID)
|
||||
newCleanup = func() {}
|
||||
}
|
||||
|
||||
// 5. Create new runner (validates config before discarding the old one).
|
||||
var newRunner agents.Runner
|
||||
|
||||
if cfg.Agent.Type == "robot" {
|
||||
robot, rErr := agents.NewRobot(cfg, newLogger)
|
||||
if rErr != nil {
|
||||
newLogger.Error("reload: failed to create robot", "id", id, "err", rErr)
|
||||
newCleanup()
|
||||
return
|
||||
}
|
||||
newRunner = robot
|
||||
} else {
|
||||
rules := rulesFor(cfg.Agent.ID, newLogger)
|
||||
agentACL := pksecurity.ResolveACL(cfg.Agent.ID, r.deps.secPolicy)
|
||||
newLogger.Debug("resolved acl for agent (reload)", "agent", cfg.Agent.ID, "acl_empty", agentACL.Empty())
|
||||
newAgent, aErr := agents.New(cfg, rules, agentACL, newLogger)
|
||||
if aErr != nil {
|
||||
newLogger.Error("reload: failed to create agent", "id", id, "err", aErr)
|
||||
newCleanup()
|
||||
return
|
||||
}
|
||||
|
||||
// Wire bus (orchestration is parked; only agents connect to the bus).
|
||||
newAgent.SetBus(r.deps.agentBus)
|
||||
newRunner = newAgent
|
||||
}
|
||||
|
||||
newRA := &runningAgent{
|
||||
runner: newRunner,
|
||||
cfg: cfg,
|
||||
cfgPath: cfgPath,
|
||||
logger: newLogger,
|
||||
logCleanup: newCleanup,
|
||||
}
|
||||
|
||||
r.mu.Lock()
|
||||
r.agents[id] = newRA
|
||||
r.mu.Unlock()
|
||||
|
||||
// 7. Start new goroutine.
|
||||
runtimeType := cfg.Agent.Type
|
||||
if runtimeType == "" {
|
||||
runtimeType = "agent"
|
||||
}
|
||||
go func() {
|
||||
newLogger.Info("runner started", "type", runtimeType)
|
||||
if err := newRunner.Run(r.deps.parentCtx); err != nil {
|
||||
newLogger.Error("runner stopped with error", "err", err, "type", runtimeType)
|
||||
}
|
||||
}()
|
||||
|
||||
newLogger.Info("runner_reloaded", "id", id, "type", runtimeType)
|
||||
}
|
||||
|
||||
// reloadAll reloads every registered agent sequentially.
|
||||
func (r *agentRegistry) reloadAll(rulesFor func(string, *slog.Logger) []decision.Rule) {
|
||||
r.mu.Lock()
|
||||
ids := make([]string, 0, len(r.agents))
|
||||
for id := range r.agents {
|
||||
ids = append(ids, id)
|
||||
}
|
||||
r.mu.Unlock()
|
||||
|
||||
for _, id := range ids {
|
||||
r.reload(id, rulesFor)
|
||||
}
|
||||
}
|
||||
|
||||
// waitAll blocks until all registered runners have stopped.
|
||||
func (r *agentRegistry) waitAll() {
|
||||
r.mu.Lock()
|
||||
dones := make([]<-chan struct{}, 0, len(r.agents))
|
||||
for _, ra := range r.agents {
|
||||
dones = append(dones, ra.runner.Done())
|
||||
}
|
||||
r.mu.Unlock()
|
||||
|
||||
for _, done := range dones {
|
||||
<-done
|
||||
}
|
||||
}
|
||||
|
||||
// cleanupLogs calls every agent's log cleanup function (called on launcher shutdown).
|
||||
func (r *agentRegistry) cleanupLogs() {
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
for _, ra := range r.agents {
|
||||
if ra.logCleanup != nil {
|
||||
ra.logCleanup()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// readReloadTarget reads the given file and returns the trimmed content.
|
||||
// Returns "" if the file doesn't exist, is empty, or equals "*" (meaning reload all).
|
||||
func readReloadTarget(path string) string {
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
id := strings.TrimSpace(string(data))
|
||||
if id == "*" {
|
||||
return ""
|
||||
}
|
||||
return id
|
||||
}
|
||||
|
||||
@@ -0,0 +1,58 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestReadReloadTarget_missing(t *testing.T) {
|
||||
got := readReloadTarget(filepath.Join(t.TempDir(), "reload.txt"))
|
||||
if got != "" {
|
||||
t.Fatalf("expected empty string for missing file, got %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReadReloadTarget_empty(t *testing.T) {
|
||||
f := filepath.Join(t.TempDir(), "reload.txt")
|
||||
if err := os.WriteFile(f, []byte(""), 0o644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
got := readReloadTarget(f)
|
||||
if got != "" {
|
||||
t.Fatalf("expected empty string for empty file, got %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReadReloadTarget_star(t *testing.T) {
|
||||
f := filepath.Join(t.TempDir(), "reload.txt")
|
||||
if err := os.WriteFile(f, []byte("*\n"), 0o644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
got := readReloadTarget(f)
|
||||
if got != "" {
|
||||
t.Fatalf("expected empty string for '*', got %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReadReloadTarget_agentID(t *testing.T) {
|
||||
f := filepath.Join(t.TempDir(), "reload.txt")
|
||||
if err := os.WriteFile(f, []byte("assistant-bot\n"), 0o644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
got := readReloadTarget(f)
|
||||
if got != "assistant-bot" {
|
||||
t.Fatalf("expected 'assistant-bot', got %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReadReloadTarget_whitespace(t *testing.T) {
|
||||
f := filepath.Join(t.TempDir(), "reload.txt")
|
||||
if err := os.WriteFile(f, []byte(" asistente-2 \n"), 0o644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
got := readReloadTarget(f)
|
||||
if got != "asistente-2" {
|
||||
t.Fatalf("expected 'asistente-2', got %q", got)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,37 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"database/sql/driver"
|
||||
|
||||
moderncsqlite "modernc.org/sqlite"
|
||||
)
|
||||
|
||||
func init() {
|
||||
// mautrix dbutil opens sqlite as "sqlite3"; register the pure-Go driver
|
||||
// under that name. We add a connection hook that sets WAL mode and a
|
||||
// busy timeout on every connection to prevent SQLITE_BUSY crashes during
|
||||
// concurrent writes (crypto store sync + memory store).
|
||||
d := &moderncsqlite.Driver{}
|
||||
d.RegisterConnectionHook(sqlitePragmaHook)
|
||||
sql.Register("sqlite3", d)
|
||||
}
|
||||
|
||||
// sqlitePragmaHook sets WAL journal mode and a 5-second busy timeout on
|
||||
// every new SQLite connection. This prevents SQLITE_BUSY errors when
|
||||
// multiple goroutines write concurrently (e.g. mautrix crypto sync +
|
||||
// memory/knowledge stores).
|
||||
func sqlitePragmaHook(conn moderncsqlite.ExecQuerierContext, _ string) error {
|
||||
ctx := context.Background()
|
||||
pragmas := []string{
|
||||
"PRAGMA journal_mode=WAL",
|
||||
"PRAGMA busy_timeout=5000",
|
||||
}
|
||||
for _, p := range pragmas {
|
||||
if _, err := conn.ExecContext(ctx, p, []driver.NamedValue{}); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -0,0 +1,198 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestSQLitePragmaHook verifies that every connection opened via the registered
|
||||
// "sqlite3" driver has WAL journal mode and a busy_timeout set.
|
||||
func TestSQLitePragmaHook(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
dbPath := filepath.Join(dir, "test.db")
|
||||
|
||||
db, err := sql.Open("sqlite3", dbPath)
|
||||
if err != nil {
|
||||
t.Fatalf("open: %v", err)
|
||||
}
|
||||
defer db.Close()
|
||||
|
||||
// Force a real connection to be created (Open is lazy).
|
||||
if err := db.Ping(); err != nil {
|
||||
t.Fatalf("ping: %v", err)
|
||||
}
|
||||
|
||||
var journalMode string
|
||||
if err := db.QueryRow("PRAGMA journal_mode").Scan(&journalMode); err != nil {
|
||||
t.Fatalf("query journal_mode: %v", err)
|
||||
}
|
||||
if journalMode != "wal" {
|
||||
t.Errorf("journal_mode = %q, want %q", journalMode, "wal")
|
||||
}
|
||||
|
||||
var busyTimeout int
|
||||
if err := db.QueryRow("PRAGMA busy_timeout").Scan(&busyTimeout); err != nil {
|
||||
t.Fatalf("query busy_timeout: %v", err)
|
||||
}
|
||||
if busyTimeout != 5000 {
|
||||
t.Errorf("busy_timeout = %d, want %d", busyTimeout, 5000)
|
||||
}
|
||||
}
|
||||
|
||||
// TestSQLiteConcurrentWrites verifies that concurrent writers do not get
|
||||
// SQLITE_BUSY errors thanks to WAL mode and busy_timeout.
|
||||
func TestSQLiteConcurrentWrites(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
dbPath := filepath.Join(dir, "concurrent.db")
|
||||
|
||||
db, err := sql.Open("sqlite3", dbPath)
|
||||
if err != nil {
|
||||
t.Fatalf("open: %v", err)
|
||||
}
|
||||
defer db.Close()
|
||||
|
||||
// Create a table to write to.
|
||||
if _, err := db.Exec(`CREATE TABLE kv (k TEXT PRIMARY KEY, v TEXT)`); err != nil {
|
||||
t.Fatalf("create table: %v", err)
|
||||
}
|
||||
|
||||
// Simulate the scenario: multiple goroutines writing concurrently,
|
||||
// like mautrix crypto sync + memory store + knowledge store.
|
||||
const writers = 5
|
||||
const writesPerWriter = 50
|
||||
ctx := context.Background()
|
||||
|
||||
var wg sync.WaitGroup
|
||||
errs := make(chan error, writers*writesPerWriter)
|
||||
|
||||
for w := 0; w < writers; w++ {
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for i := 0; i < writesPerWriter; i++ {
|
||||
_, err := db.ExecContext(ctx,
|
||||
`INSERT OR REPLACE INTO kv (k, v) VALUES (?, ?)`,
|
||||
// Use writer+iteration as key so they conflict
|
||||
"key", "value",
|
||||
)
|
||||
if err != nil {
|
||||
errs <- err
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
wg.Wait()
|
||||
close(errs)
|
||||
|
||||
for err := range errs {
|
||||
t.Errorf("concurrent write error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestSQLiteConcurrentWritesSeparateConnections tests with separate sql.DB
|
||||
// instances (like crypto.db being opened by both mautrix and our code).
|
||||
func TestSQLiteConcurrentWritesSeparateConnections(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
dbPath := filepath.Join(dir, "shared.db")
|
||||
|
||||
// Open two separate connections to the same file (simulates mautrix +
|
||||
// our memory store sharing a DB, or separate processes).
|
||||
db1, err := sql.Open("sqlite3", dbPath)
|
||||
if err != nil {
|
||||
t.Fatalf("open db1: %v", err)
|
||||
}
|
||||
defer db1.Close()
|
||||
|
||||
db2, err := sql.Open("sqlite3", dbPath)
|
||||
if err != nil {
|
||||
t.Fatalf("open db2: %v", err)
|
||||
}
|
||||
defer db2.Close()
|
||||
|
||||
// Create table via db1
|
||||
if _, err := db1.Exec(`CREATE TABLE t (id INTEGER PRIMARY KEY, data TEXT)`); err != nil {
|
||||
t.Fatalf("create table: %v", err)
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
const iterations = 100
|
||||
|
||||
var wg sync.WaitGroup
|
||||
errs := make(chan error, iterations*2)
|
||||
|
||||
// Writer 1 (simulates mautrix SaveNextBatch)
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for i := 0; i < iterations; i++ {
|
||||
_, err := db1.ExecContext(ctx,
|
||||
`INSERT INTO t (data) VALUES (?)`, "from_crypto_sync",
|
||||
)
|
||||
if err != nil {
|
||||
errs <- err
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
// Writer 2 (simulates our memory store SaveMessage)
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for i := 0; i < iterations; i++ {
|
||||
_, err := db2.ExecContext(ctx,
|
||||
`INSERT INTO t (data) VALUES (?)`, "from_memory_store",
|
||||
)
|
||||
if err != nil {
|
||||
errs <- err
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
wg.Wait()
|
||||
close(errs)
|
||||
|
||||
for err := range errs {
|
||||
t.Errorf("concurrent write error (separate conns): %v", err)
|
||||
}
|
||||
|
||||
// Verify all writes succeeded
|
||||
var count int
|
||||
if err := db1.QueryRow("SELECT COUNT(*) FROM t").Scan(&count); err != nil {
|
||||
t.Fatalf("count: %v", err)
|
||||
}
|
||||
expected := iterations * 2
|
||||
if count != expected {
|
||||
t.Errorf("row count = %d, want %d", count, expected)
|
||||
}
|
||||
}
|
||||
|
||||
// TestSQLiteWALFileCreated verifies that WAL mode actually creates the -wal file,
|
||||
// confirming the pragma took effect at the filesystem level.
|
||||
func TestSQLiteWALFileCreated(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
dbPath := filepath.Join(dir, "walcheck.db")
|
||||
|
||||
db, err := sql.Open("sqlite3", dbPath)
|
||||
if err != nil {
|
||||
t.Fatalf("open: %v", err)
|
||||
}
|
||||
defer db.Close()
|
||||
|
||||
// Create a table and write data to trigger WAL file creation.
|
||||
if _, err := db.Exec(`CREATE TABLE x (id INTEGER PRIMARY KEY)`); err != nil {
|
||||
t.Fatalf("create: %v", err)
|
||||
}
|
||||
if _, err := db.Exec(`INSERT INTO x (id) VALUES (1)`); err != nil {
|
||||
t.Fatalf("insert: %v", err)
|
||||
}
|
||||
|
||||
walPath := dbPath + "-wal"
|
||||
if _, err := os.Stat(walPath); os.IsNotExist(err) {
|
||||
t.Errorf("WAL file not created at %s — PRAGMA journal_mode=WAL may not be taking effect", walPath)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,206 @@
|
||||
// Command register creates a Matrix bot user via the Synapse admin API
|
||||
// and outputs the access token to store in .env.
|
||||
//
|
||||
// Usage:
|
||||
//
|
||||
// MATRIX_ADMIN_TOKEN=syt_... go run ./cmd/register \
|
||||
// --homeserver https://matrix-af2f3d.organic-machine.com \
|
||||
// --username assistant-bot \
|
||||
// --displayname "Assistant Bot" \
|
||||
// --env-var MATRIX_TOKEN_ASSISTANT
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
func main() {
|
||||
var (
|
||||
homeserver string
|
||||
username string
|
||||
displayname string
|
||||
envVar string
|
||||
password string
|
||||
)
|
||||
|
||||
root := &cobra.Command{
|
||||
Use: "register",
|
||||
Short: "Register a Matrix bot user via Synapse admin API",
|
||||
Long: `Creates a bot user on your Synapse homeserver and prints its access token.
|
||||
|
||||
Requires MATRIX_ADMIN_TOKEN env var with an admin user's access token.
|
||||
|
||||
Example:
|
||||
MATRIX_ADMIN_TOKEN=syt_... go run ./cmd/register \
|
||||
--homeserver https://matrix.example.com \
|
||||
--username my-bot \
|
||||
--displayname "My Bot" \
|
||||
--env-var MATRIX_TOKEN_MY_BOT`,
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
adminToken := os.Getenv("MATRIX_ADMIN_TOKEN")
|
||||
if adminToken == "" {
|
||||
return fmt.Errorf("MATRIX_ADMIN_TOKEN env var is not set")
|
||||
}
|
||||
|
||||
// Strip trailing slash
|
||||
homeserver = strings.TrimRight(homeserver, "/")
|
||||
|
||||
// Extract server name from homeserver URL
|
||||
serverName := homeserver
|
||||
serverName = strings.TrimPrefix(serverName, "https://")
|
||||
serverName = strings.TrimPrefix(serverName, "http://")
|
||||
|
||||
userID := fmt.Sprintf("@%s:%s", username, serverName)
|
||||
|
||||
fmt.Printf("→ Registering user %s on %s\n", userID, homeserver)
|
||||
|
||||
// Generate password if not provided
|
||||
if password == "" {
|
||||
password = generatePassword()
|
||||
}
|
||||
|
||||
// Step 1: Create/update user via admin API
|
||||
if err := createUser(homeserver, adminToken, userID, displayname, password); err != nil {
|
||||
return fmt.Errorf("create user: %w", err)
|
||||
}
|
||||
fmt.Printf("✓ User %s created/updated\n", userID)
|
||||
|
||||
// Step 2: Login as the bot to get an access token
|
||||
token, deviceID, err := loginAs(homeserver, username, password)
|
||||
if err != nil {
|
||||
return fmt.Errorf("login as bot: %w", err)
|
||||
}
|
||||
fmt.Printf("✓ Logged in, device ID: %s\n", deviceID)
|
||||
|
||||
// Step 3: Generate pickle key for E2EE crypto store
|
||||
pickleKey := generatePickleKey()
|
||||
|
||||
// Derive env var prefix from envVar (e.g. MATRIX_TOKEN_FOO → FOO)
|
||||
norm := strings.TrimPrefix(envVar, "MATRIX_TOKEN_")
|
||||
|
||||
// Step 4: Print results — parseable lines for register.sh
|
||||
fmt.Println("\n─── Add to your .env ───────────────────────────────")
|
||||
fmt.Printf("%s=%s\n", envVar, token)
|
||||
fmt.Printf("MATRIX_PASSWORD_%s=%s\n", norm, password)
|
||||
fmt.Printf("PICKLE_KEY_%s=%s\n", norm, pickleKey)
|
||||
fmt.Println("────────────────────────────────────────────────────")
|
||||
fmt.Printf("\nUser ID: %s\n", userID)
|
||||
fmt.Printf("Device ID: %s\n", deviceID)
|
||||
|
||||
return nil
|
||||
},
|
||||
}
|
||||
|
||||
root.Flags().StringVar(&homeserver, "homeserver", "", "Matrix homeserver URL (required)")
|
||||
root.Flags().StringVar(&username, "username", "", "Bot username, without @ or server (required)")
|
||||
root.Flags().StringVar(&displayname, "displayname", "", "Bot display name shown in Matrix")
|
||||
root.Flags().StringVar(&envVar, "env-var", "MATRIX_TOKEN_BOT", "Name of the env var to output")
|
||||
root.Flags().StringVar(&password, "password", "", "Bot password (auto-generated if empty)")
|
||||
_ = root.MarkFlagRequired("homeserver")
|
||||
_ = root.MarkFlagRequired("username")
|
||||
|
||||
if err := root.Execute(); err != nil {
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
// createUser calls PUT /_synapse/admin/v2/users/@user:server
|
||||
func createUser(homeserver, adminToken, userID, displayname, password string) error {
|
||||
body := map[string]any{
|
||||
"password": password,
|
||||
"admin": false,
|
||||
"deactivated": false,
|
||||
}
|
||||
if displayname != "" {
|
||||
body["displayname"] = displayname
|
||||
}
|
||||
|
||||
raw, _ := json.Marshal(body)
|
||||
url := fmt.Sprintf("%s/_synapse/admin/v2/users/%s", homeserver, userID)
|
||||
|
||||
req, err := http.NewRequest(http.MethodPut, url, bytes.NewReader(raw))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
req.Header.Set("Authorization", "Bearer "+adminToken)
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
return fmt.Errorf("HTTP PUT %s: %w", url, err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
respBody, _ := io.ReadAll(resp.Body)
|
||||
if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusCreated {
|
||||
return fmt.Errorf("admin API returned %d: %s", resp.StatusCode, respBody)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// loginAs calls POST /_matrix/client/v3/login with the bot credentials.
|
||||
func loginAs(homeserver, username, password string) (token, deviceID string, err error) {
|
||||
body := map[string]any{
|
||||
"type": "m.login.password",
|
||||
"identifier": map[string]string{
|
||||
"type": "m.id.user",
|
||||
"user": username,
|
||||
},
|
||||
"password": password,
|
||||
}
|
||||
raw, _ := json.Marshal(body)
|
||||
|
||||
url := homeserver + "/_matrix/client/v3/login"
|
||||
resp, err := http.Post(url, "application/json", bytes.NewReader(raw))
|
||||
if err != nil {
|
||||
return "", "", fmt.Errorf("HTTP POST %s: %w", url, err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
respBody, _ := io.ReadAll(resp.Body)
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return "", "", fmt.Errorf("login returned %d: %s", resp.StatusCode, respBody)
|
||||
}
|
||||
|
||||
var result struct {
|
||||
AccessToken string `json:"access_token"`
|
||||
DeviceID string `json:"device_id"`
|
||||
}
|
||||
if err := json.Unmarshal(respBody, &result); err != nil {
|
||||
return "", "", fmt.Errorf("parse login response: %w", err)
|
||||
}
|
||||
return result.AccessToken, result.DeviceID, nil
|
||||
}
|
||||
|
||||
// generatePassword creates a random-enough password for the bot account.
|
||||
func generatePassword() string {
|
||||
f, err := os.Open("/dev/urandom")
|
||||
if err != nil {
|
||||
return "agent-bot-default-please-change"
|
||||
}
|
||||
defer f.Close()
|
||||
buf := make([]byte, 24)
|
||||
_, _ = io.ReadFull(f, buf)
|
||||
return fmt.Sprintf("%x", buf)
|
||||
}
|
||||
|
||||
// generatePickleKey creates a 32-byte hex-encoded key for E2EE crypto store encryption.
|
||||
func generatePickleKey() string {
|
||||
f, err := os.Open("/dev/urandom")
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
defer f.Close()
|
||||
buf := make([]byte, 32)
|
||||
_, _ = io.ReadFull(f, buf)
|
||||
return hex.EncodeToString(buf)
|
||||
}
|
||||
Reference in New Issue
Block a user