chore: auto-commit (27 archivos)
- .claude/CLAUDE.md - .claude/rules/create_agent.md - agents/_specials/father-bot/prompts/system.md - agents/_template/config.yaml - agents/_template_robot/config.yaml - cmd/agentctl/autoavatar.go - cmd/launcher/sqlite.go - dev-scripts/_common.sh - dev-scripts/agent/create-full.sh - dev-scripts/agent/delete-full.sh - ... Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,302 @@
|
||||
/**
|
||||
* log-evaluator.ts — SSH to VPS + tail/grep agent JSONL logs.
|
||||
*
|
||||
* The agent-wsl-lucas runs in `agents_and_robots.service` on organic-machine.com.
|
||||
* Per-agent logs live in /home/ubuntu/CodeProyects/agents_and_robots/logs/<agent_id>/YYYY-MM-DD.jsonl
|
||||
* (slog JSON handler — one JSON object per line).
|
||||
*
|
||||
* This fixture is used by DoD Capa 2 e2e tests to *cross-check* what the bot
|
||||
* said in Matrix against what the runtime actually did. A bot can hallucinate
|
||||
* output and never invoke a tool; reading logs catches that.
|
||||
*/
|
||||
import { execFileSync } from "node:child_process";
|
||||
|
||||
export interface LogEntry {
|
||||
time: string;
|
||||
level: string;
|
||||
msg: string;
|
||||
agent_id?: string;
|
||||
tool?: string;
|
||||
call_id?: string;
|
||||
request_id?: string;
|
||||
err?: string;
|
||||
// arbitrary structured fields
|
||||
[k: string]: unknown;
|
||||
}
|
||||
|
||||
export interface ToolCallTrace {
|
||||
toolName: string;
|
||||
callId: string;
|
||||
ts: string;
|
||||
raw: LogEntry;
|
||||
}
|
||||
|
||||
export interface FetchLogsOptions {
|
||||
agentId: string;
|
||||
sshTarget?: string;
|
||||
sinceMinutes?: number;
|
||||
filterMsg?: string;
|
||||
limit?: number;
|
||||
// Override (testing): read from a local file instead of SSH.
|
||||
localFile?: string;
|
||||
}
|
||||
|
||||
const DEFAULT_SSH_TARGET = process.env.AGENT_LOG_SSH_TARGET ?? "organic-machine.com";
|
||||
const DEFAULT_LOG_BASE =
|
||||
process.env.AGENT_LOG_BASE_DIR ?? "/home/ubuntu/CodeProyects/agents_and_robots/logs";
|
||||
|
||||
function isoToday(): string {
|
||||
// Logs are in UTC; the slog handler uses time.Now() which the launcher serializes as RFC3339.
|
||||
// File names use YYYY-MM-DD in UTC.
|
||||
const d = new Date();
|
||||
const y = d.getUTCFullYear();
|
||||
const m = String(d.getUTCMonth() + 1).padStart(2, "0");
|
||||
const day = String(d.getUTCDate()).padStart(2, "0");
|
||||
return `${y}-${m}-${day}`;
|
||||
}
|
||||
|
||||
function isoYesterday(): string {
|
||||
const d = new Date(Date.now() - 24 * 60 * 60 * 1000);
|
||||
const y = d.getUTCFullYear();
|
||||
const m = String(d.getUTCMonth() + 1).padStart(2, "0");
|
||||
const day = String(d.getUTCDate()).padStart(2, "0");
|
||||
return `${y}-${m}-${day}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Run a command on the VPS via ssh. Throws if exit != 0.
|
||||
* Uses execFileSync to avoid shell-injection on the local side.
|
||||
*/
|
||||
function sshExec(sshTarget: string, remoteCmd: string): string {
|
||||
try {
|
||||
const out = execFileSync(
|
||||
"ssh",
|
||||
[
|
||||
"-o",
|
||||
"BatchMode=yes",
|
||||
"-o",
|
||||
"ConnectTimeout=5",
|
||||
"-o",
|
||||
"StrictHostKeyChecking=accept-new",
|
||||
sshTarget,
|
||||
remoteCmd,
|
||||
],
|
||||
{ encoding: "utf8", maxBuffer: 8 * 1024 * 1024 },
|
||||
);
|
||||
return out;
|
||||
} catch (err: any) {
|
||||
const stderr = err?.stderr?.toString?.() ?? "";
|
||||
const stdout = err?.stdout?.toString?.() ?? "";
|
||||
throw new Error(
|
||||
`ssh ${sshTarget} failed: ${err.message}\nstderr=${stderr}\nstdout=${stdout}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/** Read N last entries from the agent log, optionally grep-filtered. */
|
||||
export async function fetchAgentLogs(opts: FetchLogsOptions): Promise<LogEntry[]> {
|
||||
const sinceMinutes = opts.sinceMinutes ?? 5;
|
||||
const limit = opts.limit ?? 200;
|
||||
const target = opts.sshTarget ?? DEFAULT_SSH_TARGET;
|
||||
|
||||
// We pull TODAY's log file (UTC). If the test crosses midnight, also grab yesterday.
|
||||
// tail+grep is good enough; we will JSON-parse and filter by time client-side.
|
||||
const today = isoToday();
|
||||
const yesterday = isoYesterday();
|
||||
const baseDir = DEFAULT_LOG_BASE;
|
||||
const agentDir = `${baseDir}/${opts.agentId}`;
|
||||
|
||||
// Read both files (best-effort) and let the time filter cut.
|
||||
// Limit per-file tail to keep ssh response bounded.
|
||||
const perFileTail = Math.max(limit * 5, 1000);
|
||||
|
||||
let raw: string;
|
||||
if (opts.localFile) {
|
||||
// Local override path for self-test / dev
|
||||
const fs = require("node:fs");
|
||||
raw = fs.readFileSync(opts.localFile, "utf8");
|
||||
} else {
|
||||
const cmd =
|
||||
// `2>/dev/null || true` so missing files don't make ssh exit non-zero
|
||||
`(tail -n ${perFileTail} ${agentDir}/${yesterday}.jsonl 2>/dev/null || true; ` +
|
||||
`tail -n ${perFileTail} ${agentDir}/${today}.jsonl 2>/dev/null || true)`;
|
||||
raw = sshExec(target, cmd);
|
||||
}
|
||||
|
||||
const sinceMs = Date.now() - sinceMinutes * 60 * 1000;
|
||||
const entries: LogEntry[] = [];
|
||||
for (const line of raw.split("\n")) {
|
||||
const trimmed = line.trim();
|
||||
if (!trimmed) continue;
|
||||
let obj: LogEntry;
|
||||
try {
|
||||
obj = JSON.parse(trimmed);
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
// Time filter
|
||||
const t = obj.time ? Date.parse(obj.time) : NaN;
|
||||
if (!Number.isFinite(t) || t < sinceMs) continue;
|
||||
if (opts.filterMsg && !(obj.msg ?? "").includes(opts.filterMsg)) continue;
|
||||
entries.push(obj);
|
||||
}
|
||||
// Keep last `limit`
|
||||
return entries.slice(-limit);
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the most recent log entry for an executing-tool call where tool matches.
|
||||
*
|
||||
* The launcher emits: logger.Info("executing tool", "tool", tc.Name, "call_id", tc.ID)
|
||||
* in devagents/llm.go (line 125). We grep that as the canonical tool-call trace.
|
||||
*/
|
||||
export async function findLastToolCall(opts: {
|
||||
agentId: string;
|
||||
toolName: string;
|
||||
sinceMinutes?: number;
|
||||
sshTarget?: string;
|
||||
}): Promise<ToolCallTrace | null> {
|
||||
const logs = await fetchAgentLogs({
|
||||
agentId: opts.agentId,
|
||||
sinceMinutes: opts.sinceMinutes ?? 5,
|
||||
sshTarget: opts.sshTarget,
|
||||
filterMsg: "executing tool",
|
||||
limit: 500,
|
||||
});
|
||||
for (let i = logs.length - 1; i >= 0; i--) {
|
||||
const e = logs[i];
|
||||
if (e.msg === "executing tool" && e.tool === opts.toolName) {
|
||||
return {
|
||||
toolName: opts.toolName,
|
||||
callId: String(e.call_id ?? ""),
|
||||
ts: e.time,
|
||||
raw: e,
|
||||
};
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/** Find ANY executing-tool call regardless of tool name. */
|
||||
export async function findAnyToolCalls(opts: {
|
||||
agentId: string;
|
||||
sinceMinutes?: number;
|
||||
sshTarget?: string;
|
||||
}): Promise<ToolCallTrace[]> {
|
||||
const logs = await fetchAgentLogs({
|
||||
agentId: opts.agentId,
|
||||
sinceMinutes: opts.sinceMinutes ?? 5,
|
||||
sshTarget: opts.sshTarget,
|
||||
filterMsg: "executing tool",
|
||||
limit: 500,
|
||||
});
|
||||
return logs
|
||||
.filter((e) => e.msg === "executing tool" && typeof e.tool === "string")
|
||||
.map((e) => ({
|
||||
toolName: String(e.tool),
|
||||
callId: String(e.call_id ?? ""),
|
||||
ts: e.time,
|
||||
raw: e,
|
||||
}));
|
||||
}
|
||||
|
||||
/** Throws if any ERROR-level entry exists in the window (allowlist optional). */
|
||||
export async function assertNoErrors(opts: {
|
||||
agentId: string;
|
||||
sinceMinutes?: number;
|
||||
sshTarget?: string;
|
||||
// Substrings on `msg` or `err` that are acceptable to ignore
|
||||
ignore?: RegExp[];
|
||||
}): Promise<void> {
|
||||
const logs = await fetchAgentLogs({
|
||||
agentId: opts.agentId,
|
||||
sinceMinutes: opts.sinceMinutes ?? 5,
|
||||
sshTarget: opts.sshTarget,
|
||||
limit: 1000,
|
||||
});
|
||||
const errors = logs.filter((e) => e.level === "ERROR");
|
||||
const unexpected = errors.filter((e) => {
|
||||
if (!opts.ignore || opts.ignore.length === 0) return true;
|
||||
const blob = `${e.msg ?? ""} ${e.err ?? ""}`;
|
||||
return !opts.ignore.some((rx) => rx.test(blob));
|
||||
});
|
||||
if (unexpected.length > 0) {
|
||||
const sample = unexpected
|
||||
.slice(0, 5)
|
||||
.map((e) => `[${e.time}] ${e.msg} err=${e.err}`)
|
||||
.join("\n");
|
||||
throw new Error(
|
||||
`Agent log has ${unexpected.length} ERROR entries in last ` +
|
||||
`${opts.sinceMinutes ?? 5}min:\n${sample}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Best-effort latency measurement.
|
||||
* The launcher does NOT emit a single correlated "reply_sent" with the same id;
|
||||
* we approximate by measuring distance between `message_received` and the
|
||||
* next `tool_use loop complete` / final response log in the same agent.
|
||||
* If no pair found, returns null.
|
||||
*/
|
||||
export async function measureReplyLatency(opts: {
|
||||
agentId: string;
|
||||
sinceMinutes?: number;
|
||||
sshTarget?: string;
|
||||
}): Promise<number | null> {
|
||||
const logs = await fetchAgentLogs({
|
||||
agentId: opts.agentId,
|
||||
sinceMinutes: opts.sinceMinutes ?? 10,
|
||||
sshTarget: opts.sshTarget,
|
||||
limit: 2000,
|
||||
});
|
||||
// We look for pairs: "message_received" → next "llm completion" or "executing tool"
|
||||
// ending with "reply sent" / "tool_use loop done". Heuristic: pair each
|
||||
// message_received with the next log at level INFO emitted within 60s.
|
||||
let last: number | null = null;
|
||||
for (let i = 0; i < logs.length - 1; i++) {
|
||||
const a = logs[i];
|
||||
if (a.msg !== "message_received") continue;
|
||||
const aT = Date.parse(a.time);
|
||||
for (let j = i + 1; j < logs.length; j++) {
|
||||
const b = logs[j];
|
||||
const bT = Date.parse(b.time);
|
||||
if (bT - aT > 60_000) break;
|
||||
if (
|
||||
b.msg === "executing tool" ||
|
||||
b.msg === "llm response" ||
|
||||
b.msg === "tool_use_loop_done" ||
|
||||
(typeof b.msg === "string" && b.msg.includes("reply"))
|
||||
) {
|
||||
last = bT - aT;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return last;
|
||||
}
|
||||
|
||||
/**
|
||||
* Service uptime via systemd (best-effort). Returns seconds since
|
||||
* ActiveEnterTimestamp, or null if unable to read.
|
||||
*/
|
||||
export async function fetchServiceUptimeSec(opts: {
|
||||
sshTarget?: string;
|
||||
unit?: string;
|
||||
}): Promise<number | null> {
|
||||
const target = opts.sshTarget ?? DEFAULT_SSH_TARGET;
|
||||
const unit = opts.unit ?? "agents_and_robots.service";
|
||||
try {
|
||||
const out = sshExec(
|
||||
target,
|
||||
`systemctl show ${unit} --property=ActiveEnterTimestamp --value 2>/dev/null || true`,
|
||||
);
|
||||
const stamp = out.trim();
|
||||
if (!stamp) return null;
|
||||
const t = Date.parse(stamp);
|
||||
if (!Number.isFinite(t)) return null;
|
||||
return Math.floor((Date.now() - t) / 1000);
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user