// Package shellknowledge implements the knowledge store using files + SQLite FTS5. package shellknowledge import ( "context" "database/sql" "fmt" "log/slog" "os" "path/filepath" "regexp" "strings" "time" "github.com/enmanuel/agents/pkg/knowledge" ) const ftsSchema = ` CREATE VIRTUAL TABLE IF NOT EXISTS documents USING fts5( slug, title, content, updated_at UNINDEXED ); ` var slugRe = regexp.MustCompile(`^[a-z0-9][a-z0-9-]{0,62}[a-z0-9]$`) // ValidSlug returns true if s is a valid document slug. func ValidSlug(s string) bool { if len(s) < 2 || len(s) > 64 { return false } return slugRe.MatchString(s) } // FileStore implements knowledge.Store using markdown files + SQLite FTS5 index. type FileStore struct { dir string // path to agents//knowledge/ dbPath string // path to agents//data/knowledge.db db *sql.DB logger *slog.Logger } // New creates a FileStore. It ensures the knowledge dir and DB dir exist, // opens the SQLite database, and creates the FTS5 table if needed. func New(dir, dbPath string, logger *slog.Logger) (*FileStore, error) { log := logger.With("component", "knowledge", "dir", dir, "db_path", dbPath) if err := os.MkdirAll(dir, 0o755); err != nil { return nil, fmt.Errorf("create knowledge dir: %w", err) } if err := os.MkdirAll(filepath.Dir(dbPath), 0o755); err != nil { return nil, fmt.Errorf("create knowledge db dir: %w", err) } db, err := sql.Open("sqlite3", dbPath) if err != nil { return nil, fmt.Errorf("open knowledge db: %w", err) } // Enable WAL mode for better concurrency (allows multiple readers + single writer) if _, err := db.Exec("PRAGMA journal_mode=WAL"); err != nil { db.Close() return nil, fmt.Errorf("enable WAL mode: %w", err) } if _, err := db.Exec(ftsSchema); err != nil { db.Close() return nil, fmt.Errorf("create knowledge fts5 table: %w", err) } log.Info("knowledge_store_ready") return &FileStore{dir: dir, dbPath: dbPath, db: db, logger: log}, nil } // Sync re-indexes all .md files from disk into the FTS5 table. func (s *FileStore) Sync(ctx context.Context) error { entries, err := os.ReadDir(s.dir) if err != nil { return fmt.Errorf("read knowledge dir: %w", err) } tx, err := s.db.BeginTx(ctx, nil) if err != nil { return fmt.Errorf("begin sync tx: %w", err) } defer tx.Rollback() // Clear existing index if _, err := tx.ExecContext(ctx, `DELETE FROM documents`); err != nil { return fmt.Errorf("clear fts5 index: %w", err) } count := 0 for _, e := range entries { if e.IsDir() || !strings.HasSuffix(e.Name(), ".md") { continue } slug := strings.TrimSuffix(e.Name(), ".md") if !ValidSlug(slug) { s.logger.Warn("skipping invalid slug", "file", e.Name()) continue } content, err := os.ReadFile(filepath.Join(s.dir, e.Name())) if err != nil { s.logger.Warn("skipping unreadable file", "file", e.Name(), "err", err) continue } info, err := e.Info() if err != nil { s.logger.Warn("skipping file without info", "file", e.Name(), "err", err) continue } title := extractTitle(string(content), slug) mtime := info.ModTime().UTC().Format(time.RFC3339) if _, err := tx.ExecContext(ctx, `INSERT INTO documents (slug, title, content, updated_at) VALUES (?, ?, ?, ?)`, slug, title, string(content), mtime, ); err != nil { s.logger.Warn("failed to index file", "slug", slug, "err", err) continue } count++ } if err := tx.Commit(); err != nil { return fmt.Errorf("commit sync tx: %w", err) } s.logger.Info("knowledge_sync", "count", count) return nil } // Search performs full-text search on the FTS5 index. func (s *FileStore) Search(ctx context.Context, query string, limit int) ([]knowledge.SearchResult, error) { if limit <= 0 { limit = 5 } rows, err := s.db.QueryContext(ctx, `SELECT slug, title, snippet(documents, 2, '**', '**', '…', 32), rank FROM documents WHERE documents MATCH ? ORDER BY rank LIMIT ?`, query, limit, ) if err != nil { return nil, fmt.Errorf("knowledge search: %w", err) } defer rows.Close() var results []knowledge.SearchResult for rows.Next() { var r knowledge.SearchResult if err := rows.Scan(&r.Slug, &r.Title, &r.Snippet, &r.Rank); err != nil { return nil, err } results = append(results, r) } return results, rows.Err() } // Get reads a document from disk by slug. func (s *FileStore) Get(ctx context.Context, slug string) (*knowledge.Document, error) { if !ValidSlug(slug) { return nil, fmt.Errorf("invalid slug: %q", slug) } path := filepath.Join(s.dir, slug+".md") content, err := os.ReadFile(path) if err != nil { if os.IsNotExist(err) { return nil, fmt.Errorf("document not found: %q", slug) } return nil, fmt.Errorf("read document: %w", err) } info, err := os.Stat(path) if err != nil { return nil, fmt.Errorf("stat document: %w", err) } return &knowledge.Document{ Slug: slug, Title: extractTitle(string(content), slug), Content: string(content), UpdatedAt: info.ModTime().UTC(), }, nil } // Put writes a document to disk and updates the FTS5 index. func (s *FileStore) Put(ctx context.Context, doc knowledge.Document) error { if !ValidSlug(doc.Slug) { return fmt.Errorf("invalid slug: %q", doc.Slug) } if len(doc.Content) > 64*1024 { return fmt.Errorf("document too large: %d bytes (max 65536)", len(doc.Content)) } path := filepath.Join(s.dir, doc.Slug+".md") if err := os.WriteFile(path, []byte(doc.Content), 0o644); err != nil { return fmt.Errorf("write document: %w", err) } title := extractTitle(doc.Content, doc.Slug) now := time.Now().UTC().Format(time.RFC3339) // Upsert: delete old + insert new (FTS5 doesn't support UPDATE well) tx, err := s.db.BeginTx(ctx, nil) if err != nil { return fmt.Errorf("begin put tx: %w", err) } defer tx.Rollback() if _, err := tx.ExecContext(ctx, `DELETE FROM documents WHERE slug = ?`, doc.Slug); err != nil { return fmt.Errorf("delete old index: %w", err) } if _, err := tx.ExecContext(ctx, `INSERT INTO documents (slug, title, content, updated_at) VALUES (?, ?, ?, ?)`, doc.Slug, title, doc.Content, now, ); err != nil { return fmt.Errorf("insert index: %w", err) } if err := tx.Commit(); err != nil { return fmt.Errorf("commit put tx: %w", err) } s.logger.Debug("knowledge_put", "slug", doc.Slug, "size", len(doc.Content)) return nil } // Delete removes a document from disk and the FTS5 index. func (s *FileStore) Delete(ctx context.Context, slug string) error { if !ValidSlug(slug) { return fmt.Errorf("invalid slug: %q", slug) } path := filepath.Join(s.dir, slug+".md") if err := os.Remove(path); err != nil && !os.IsNotExist(err) { return fmt.Errorf("remove document: %w", err) } if _, err := s.db.ExecContext(ctx, `DELETE FROM documents WHERE slug = ?`, slug); err != nil { return fmt.Errorf("delete from index: %w", err) } s.logger.Debug("knowledge_delete", "slug", slug) return nil } // List returns all documents from the FTS5 index. func (s *FileStore) List(ctx context.Context) ([]knowledge.Document, error) { rows, err := s.db.QueryContext(ctx, `SELECT slug, title, updated_at FROM documents ORDER BY slug`) if err != nil { return nil, fmt.Errorf("knowledge list: %w", err) } defer rows.Close() var docs []knowledge.Document for rows.Next() { var d knowledge.Document var updatedAt string if err := rows.Scan(&d.Slug, &d.Title, &updatedAt); err != nil { return nil, err } d.UpdatedAt, _ = time.Parse(time.RFC3339, updatedAt) docs = append(docs, d) } return docs, rows.Err() } // Close releases the SQLite database. func (s *FileStore) Close() error { s.logger.Info("knowledge_store_closed") return s.db.Close() } // extractTitle returns the first H1 heading from markdown content, or a humanized slug. func extractTitle(content, slug string) string { for _, line := range strings.SplitN(content, "\n", 20) { line = strings.TrimSpace(line) if strings.HasPrefix(line, "# ") { return strings.TrimPrefix(line, "# ") } } // Humanize slug: "go-patterns" → "Go patterns" humanized := strings.ReplaceAll(slug, "-", " ") if len(humanized) > 0 { humanized = strings.ToUpper(humanized[:1]) + humanized[1:] } return humanized }