feat: externalize apps/analysis to Gitea repos, add analysis table

- Migration 007: repo_url on apps table + analysis table with FTS5
- Analysis struct, parser, CRUD, validation, hash computation
- Selective purge: remote-only apps/analysis preserved across fn index
- CLI: fn app list/clone/pull, fn analysis list/clone/pull
- search/show/list now include analysis results
- Apps removed from git tracking (content lives in Gitea repos)
- .gitkeep for apps/ and analysis/ dirs
- Bash functions: jupyter analysis pipeline, shell utilities
- Browser domain: CDP functions moved from infra to browser

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-01 04:23:51 +02:00
parent 8f24157096
commit d7f2c00d7b
111 changed files with 2766 additions and 5043 deletions
+18 -2
View File
@@ -60,13 +60,25 @@ func ComputeAppHash(a *App) string {
fmt.Fprintf(h, "|%s", marshalStrings(a.Tags))
fmt.Fprintf(h, "|%s", marshalStrings(a.UsesFunctions))
fmt.Fprintf(h, "|%s", marshalStrings(a.UsesTypes))
fmt.Fprintf(h, "|%s|%s|%s|%s|%s", a.Framework, a.EntryPoint, a.Documentation, a.Notes, a.DirPath)
fmt.Fprintf(h, "|%s|%s|%s|%s|%s|%s", a.Framework, a.EntryPoint, a.Documentation, a.Notes, a.DirPath, a.RepoURL)
return fmt.Sprintf("%x", h.Sum(nil))
}
// ComputeAnalysisHash computes a deterministic hash of all content fields of an Analysis.
func ComputeAnalysisHash(a *Analysis) string {
h := sha256.New()
fmt.Fprintf(h, "%s|%s|%s|%s|%s",
a.ID, a.Name, a.Lang, a.Domain, a.Description)
fmt.Fprintf(h, "|%s", marshalStrings(a.Tags))
fmt.Fprintf(h, "|%s", marshalStrings(a.UsesFunctions))
fmt.Fprintf(h, "|%s", marshalStrings(a.UsesTypes))
fmt.Fprintf(h, "|%s|%s|%s|%s|%s|%s", a.Framework, a.EntryPoint, a.Documentation, a.Notes, a.DirPath, a.RepoURL)
return fmt.Sprintf("%x", h.Sum(nil))
}
// LoadTimestamps reads existing id → {created_at, updated_at, content_hash} from all tables.
// Called before Purge so we can preserve dates across reindexing.
func (db *DB) LoadTimestamps() (funcs, types, apps map[string]timestampRecord, err error) {
func (db *DB) LoadTimestamps() (funcs, types, apps, analysis map[string]timestampRecord, err error) {
funcs, err = loadTable(db, "functions")
if err != nil {
return
@@ -76,6 +88,10 @@ func (db *DB) LoadTimestamps() (funcs, types, apps map[string]timestampRecord, e
return
}
apps, err = loadTable(db, "apps")
if err != nil {
return
}
analysis, err = loadTable(db, "analysis")
return
}
+47 -6
View File
@@ -13,6 +13,7 @@ type IndexResult struct {
Functions int
Types int
Apps int
Analysis int
ValidationErrors []string
Errors []string
}
@@ -26,15 +27,11 @@ type IndexResult struct {
// directories (e.g. python/functions/, python/types/).
func Index(db *DB, root string) (*IndexResult, error) {
// Load existing timestamps before purging so we can preserve created_at
oldFuncs, oldTypes, oldApps, err := db.LoadTimestamps()
oldFuncs, oldTypes, oldApps, oldAnalysis, err := db.LoadTimestamps()
if err != nil {
return nil, fmt.Errorf("loading timestamps: %w", err)
}
if err := db.Purge(); err != nil {
return nil, fmt.Errorf("purging database: %w", err)
}
result := &IndexResult{}
// Pass 1: parse everything from all source directories
@@ -42,7 +39,6 @@ func Index(db *DB, root string) (*IndexResult, error) {
var types []*Type
// Directories to scan for functions and types.
// Base dirs + language-specific dirs discovered automatically.
funcDirs := []string{filepath.Join(root, "functions")}
typeDirs := []string{filepath.Join(root, "types")}
@@ -86,6 +82,7 @@ func Index(db *DB, root string) (*IndexResult, error) {
// Parse apps from apps/*/app.md
var apps []*App
localAppIDs := make(map[string]bool)
appsDir := filepath.Join(root, "apps")
if fi, err := os.Stat(appsDir); err == nil && fi.IsDir() {
entries, _ := os.ReadDir(appsDir)
@@ -103,9 +100,39 @@ func Index(db *DB, root string) (*IndexResult, error) {
continue
}
apps = append(apps, a)
localAppIDs[a.ID] = true
}
}
// Parse analysis from analysis/*/analysis.md
var analyses []*Analysis
localAnalysisIDs := make(map[string]bool)
analysisDir := filepath.Join(root, "analysis")
if fi, err := os.Stat(analysisDir); err == nil && fi.IsDir() {
entries, _ := os.ReadDir(analysisDir)
for _, e := range entries {
if !e.IsDir() {
continue
}
analysisMD := filepath.Join(analysisDir, e.Name(), "analysis.md")
if _, err := os.Stat(analysisMD); err != nil {
continue
}
an, err := ParseAnalysisMD(analysisMD, root)
if err != nil {
result.Errors = append(result.Errors, fmt.Sprintf("parse %s: %v", analysisMD, err))
continue
}
analyses = append(analyses, an)
localAnalysisIDs[an.ID] = true
}
}
// Selective purge: preserve remote-only apps/analysis (have repo_url, not cloned locally)
if err := db.PurgeLocalOnly(localAppIDs, localAnalysisIDs); err != nil {
return nil, fmt.Errorf("purging database: %w", err)
}
// Build known ID sets
knownFunctions := make(map[string]bool, len(functions))
for _, f := range functions {
@@ -161,6 +188,20 @@ func Index(db *DB, root string) (*IndexResult, error) {
result.Apps++
}
for _, an := range analyses {
if verr := ValidateAnalysis(an, knownFunctions, knownTypes); verr != nil {
result.ValidationErrors = append(result.ValidationErrors, verr.Error())
continue
}
an.ContentHash = ComputeAnalysisHash(an)
applyTimestamps(&an.CreatedAt, &an.UpdatedAt, an.ContentHash, oldAnalysis[an.ID], now)
if err := db.InsertAnalysis(an); err != nil {
result.Errors = append(result.Errors, fmt.Sprintf("insert %s: %v", an.ID, err))
continue
}
result.Analysis++
}
return result, nil
}
@@ -0,0 +1,54 @@
-- Externalize apps and analysis to Gitea repositories.
-- Adds repo_url to apps, creates analysis table with FTS5.
ALTER TABLE apps ADD COLUMN repo_url TEXT NOT NULL DEFAULT '';
-- Analysis table: independent Jupyter/data explorations tracked in the registry.
CREATE TABLE IF NOT EXISTS analysis (
id TEXT PRIMARY KEY,
name TEXT NOT NULL,
lang TEXT NOT NULL,
domain TEXT NOT NULL,
description TEXT NOT NULL,
tags TEXT NOT NULL DEFAULT '[]',
uses_functions TEXT NOT NULL DEFAULT '[]',
uses_types TEXT NOT NULL DEFAULT '[]',
framework TEXT NOT NULL DEFAULT '',
entry_point TEXT NOT NULL DEFAULT '',
documentation TEXT NOT NULL DEFAULT '',
notes TEXT NOT NULL DEFAULT '',
repo_url TEXT NOT NULL DEFAULT '',
dir_path TEXT NOT NULL DEFAULT '',
content_hash TEXT NOT NULL DEFAULT '',
created_at TEXT NOT NULL,
updated_at TEXT NOT NULL
);
CREATE VIRTUAL TABLE IF NOT EXISTS analysis_fts USING fts5(
id,
name,
description,
tags,
domain,
documentation,
notes,
content='analysis',
content_rowid='rowid'
);
CREATE TRIGGER IF NOT EXISTS analysis_ai AFTER INSERT ON analysis BEGIN
INSERT INTO analysis_fts(rowid, id, name, description, tags, domain, documentation, notes)
VALUES (new.rowid, new.id, new.name, new.description, new.tags, new.domain, new.documentation, new.notes);
END;
CREATE TRIGGER IF NOT EXISTS analysis_ad AFTER DELETE ON analysis BEGIN
INSERT INTO analysis_fts(analysis_fts, rowid, id, name, description, tags, domain, documentation, notes)
VALUES ('delete', old.rowid, old.id, old.name, old.description, old.tags, old.domain, old.documentation, old.notes);
END;
CREATE TRIGGER IF NOT EXISTS analysis_au AFTER UPDATE ON analysis BEGIN
INSERT INTO analysis_fts(analysis_fts, rowid, id, name, description, tags, domain, documentation, notes)
VALUES ('delete', old.rowid, old.id, old.name, old.description, old.tags, old.domain, old.documentation, old.notes);
INSERT INTO analysis_fts(rowid, id, name, description, tags, domain, documentation, notes)
VALUES (new.rowid, new.id, new.name, new.description, new.tags, new.domain, new.documentation, new.notes);
END;
+22
View File
@@ -118,6 +118,28 @@ type App struct {
Notes string `json:"notes"`
DirPath string `json:"dir_path"`
ContentHash string `json:"content_hash"`
RepoURL string `json:"repo_url"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
// Analysis represents an entry in the analysis table.
type Analysis struct {
ID string `json:"id"`
Name string `json:"name"`
Lang string `json:"lang"`
Domain string `json:"domain"`
Description string `json:"description"`
Tags []string `json:"tags"`
UsesFunctions []string `json:"uses_functions"`
UsesTypes []string `json:"uses_types"`
Framework string `json:"framework"`
EntryPoint string `json:"entry_point"`
Documentation string `json:"documentation"`
Notes string `json:"notes"`
RepoURL string `json:"repo_url"`
DirPath string `json:"dir_path"`
ContentHash string `json:"content_hash"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
+63
View File
@@ -74,6 +74,22 @@ type rawApp struct {
Framework string `yaml:"framework"`
EntryPoint string `yaml:"entry_point"`
DirPath string `yaml:"dir_path"`
RepoURL string `yaml:"repo_url"`
}
// rawAnalysis mirrors the YAML frontmatter of an analysis .md file.
type rawAnalysis struct {
Name string `yaml:"name"`
Lang string `yaml:"lang"`
Domain string `yaml:"domain"`
Description string `yaml:"description"`
Tags []string `yaml:"tags"`
UsesFunctions []string `yaml:"uses_functions"`
UsesTypes []string `yaml:"uses_types"`
Framework string `yaml:"framework"`
EntryPoint string `yaml:"entry_point"`
DirPath string `yaml:"dir_path"`
RepoURL string `yaml:"repo_url"`
}
// extractFrontmatter splits a .md file into YAML frontmatter and body.
@@ -266,11 +282,58 @@ func ParseAppMD(path string, root string) (*App, error) {
Documentation: sections.documentation,
Notes: sections.notes,
DirPath: raw.DirPath,
RepoURL: raw.RepoURL,
}
return a, nil
}
// ParseAnalysisMD parses an analysis .md file into an Analysis.
func ParseAnalysisMD(path string, root string) (*Analysis, error) {
data, err := os.ReadFile(path)
if err != nil {
return nil, fmt.Errorf("reading %s: %w", path, err)
}
fm, body, err := extractFrontmatter(data)
if err != nil {
return nil, fmt.Errorf("parsing %s: %w", path, err)
}
var raw rawAnalysis
if err := yaml.Unmarshal(fm, &raw); err != nil {
return nil, fmt.Errorf("parsing YAML in %s: %w", path, err)
}
if raw.Name == "" {
return nil, fmt.Errorf("%s: name is required", path)
}
if raw.Description == "" {
return nil, fmt.Errorf("%s: description is required", path)
}
sections := extractSections(body)
an := &Analysis{
ID: GenerateID(raw.Name, raw.Lang, raw.Domain),
Name: raw.Name,
Lang: raw.Lang,
Domain: raw.Domain,
Description: raw.Description,
Tags: raw.Tags,
UsesFunctions: raw.UsesFunctions,
UsesTypes: raw.UsesTypes,
Framework: raw.Framework,
EntryPoint: raw.EntryPoint,
Documentation: sections.documentation,
Notes: sections.notes,
DirPath: raw.DirPath,
RepoURL: raw.RepoURL,
}
return an, nil
}
// bodySections holds the extracted sections from a .md body.
type bodySections struct {
example string // content under ## Ejemplo
+159 -4
View File
@@ -288,11 +288,12 @@ func (db *DB) InsertApp(a *App) error {
INSERT OR REPLACE INTO apps (
id, name, lang, domain, description, tags,
uses_functions, uses_types, framework, entry_point,
documentation, notes, dir_path, content_hash, created_at, updated_at
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
documentation, notes, dir_path, content_hash, created_at, updated_at, repo_url
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
a.ID, a.Name, a.Lang, a.Domain, a.Description, marshalStrings(a.Tags),
marshalStrings(a.UsesFunctions), marshalStrings(a.UsesTypes), a.Framework, a.EntryPoint,
a.Documentation, a.Notes, a.DirPath, a.ContentHash, a.CreatedAt.Format(time.RFC3339), a.UpdatedAt.Format(time.RFC3339),
a.RepoURL,
)
return err
}
@@ -359,6 +360,7 @@ func scanApps(rows interface{ Next() bool; Scan(...any) error }) ([]App, error)
&a.ID, &a.Name, &a.Lang, &a.Domain, &a.Description, &tagsJSON,
&usesFnJSON, &usesTypJSON, &a.Framework, &a.EntryPoint,
&a.Documentation, &a.Notes, &a.DirPath, &createdAt, &updatedAt, &a.ContentHash,
&a.RepoURL,
)
if err != nil {
return nil, fmt.Errorf("scanning app: %w", err)
@@ -375,7 +377,7 @@ func scanApps(rows interface{ Next() bool; Scan(...any) error }) ([]App, error)
return result, nil
}
// Purge deletes all data from functions, types and apps. Used before re-indexing.
// Purge deletes all data from functions, types, apps and analysis. Used before re-indexing.
func (db *DB) Purge() error {
if _, err := db.conn.Exec("DELETE FROM functions"); err != nil {
return err
@@ -383,10 +385,163 @@ func (db *DB) Purge() error {
if _, err := db.conn.Exec("DELETE FROM types"); err != nil {
return err
}
_, err := db.conn.Exec("DELETE FROM apps")
if _, err := db.conn.Exec("DELETE FROM apps"); err != nil {
return err
}
_, err := db.conn.Exec("DELETE FROM analysis")
return err
}
// PurgeLocalOnly deletes functions, types, and only locally-present apps/analysis.
// Remote-only records (repo_url set, not in localAppIDs/localAnalysisIDs) are preserved.
func (db *DB) PurgeLocalOnly(localAppIDs, localAnalysisIDs map[string]bool) error {
if _, err := db.conn.Exec("DELETE FROM functions"); err != nil {
return err
}
if _, err := db.conn.Exec("DELETE FROM types"); err != nil {
return err
}
// Delete local apps (those scanned from disk)
for id := range localAppIDs {
if _, err := db.conn.Exec("DELETE FROM apps WHERE id = ?", id); err != nil {
return err
}
}
// Delete apps without repo_url (legacy local-only apps not yet pushed)
if _, err := db.conn.Exec("DELETE FROM apps WHERE repo_url = '' OR repo_url IS NULL"); err != nil {
return err
}
// Same for analysis
for id := range localAnalysisIDs {
if _, err := db.conn.Exec("DELETE FROM analysis WHERE id = ?", id); err != nil {
return err
}
}
if _, err := db.conn.Exec("DELETE FROM analysis WHERE repo_url = '' OR repo_url IS NULL"); err != nil {
return err
}
return nil
}
// --- Analysis CRUD ---
// InsertAnalysis inserts or replaces an analysis entry.
func (db *DB) InsertAnalysis(a *Analysis) error {
now := time.Now().UTC()
if a.CreatedAt.IsZero() {
a.CreatedAt = now
}
if a.UpdatedAt.IsZero() {
a.UpdatedAt = now
}
if a.ID == "" {
a.ID = GenerateID(a.Name, a.Lang, a.Domain)
}
_, err := db.conn.Exec(`
INSERT OR REPLACE INTO analysis (
id, name, lang, domain, description, tags,
uses_functions, uses_types, framework, entry_point,
documentation, notes, repo_url, dir_path, content_hash, created_at, updated_at
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
a.ID, a.Name, a.Lang, a.Domain, a.Description, marshalStrings(a.Tags),
marshalStrings(a.UsesFunctions), marshalStrings(a.UsesTypes), a.Framework, a.EntryPoint,
a.Documentation, a.Notes, a.RepoURL, a.DirPath, a.ContentHash,
a.CreatedAt.Format(time.RFC3339), a.UpdatedAt.Format(time.RFC3339),
)
return err
}
// GetAnalysis returns a single analysis by ID.
func (db *DB) GetAnalysis(id string) (*Analysis, error) {
rows, err := db.conn.Query("SELECT * FROM analysis WHERE id = ?", id)
if err != nil {
return nil, err
}
defer rows.Close()
items, err := scanAnalysis(rows)
if err != nil {
return nil, err
}
if len(items) == 0 {
return nil, fmt.Errorf("analysis %q not found", id)
}
return &items[0], nil
}
// SearchAnalysis performs FTS search on analysis with optional filters.
func (db *DB) SearchAnalysis(query string, lang, domain string) ([]Analysis, error) {
where := []string{}
args := []any{}
if query != "" {
where = append(where, "a.id IN (SELECT id FROM analysis_fts WHERE analysis_fts MATCH ?)")
args = append(args, query)
}
if lang != "" {
where = append(where, "a.lang = ?")
args = append(args, lang)
}
if domain != "" {
where = append(where, "a.domain = ?")
args = append(args, domain)
}
sql := "SELECT * FROM analysis a"
if len(where) > 0 {
sql += " WHERE " + strings.Join(where, " AND ")
}
sql += " ORDER BY a.name"
rows, err := db.conn.Query(sql, args...)
if err != nil {
return nil, fmt.Errorf("search analysis: %w", err)
}
defer rows.Close()
return scanAnalysis(rows)
}
// ListAllAnalysis returns all analysis entries.
func (db *DB) ListAllAnalysis() ([]Analysis, error) {
return db.SearchAnalysis("", "", "")
}
// ListAllApps returns all app entries.
func (db *DB) ListAllApps() ([]App, error) {
return db.SearchApps("", "", "")
}
func scanAnalysis(rows interface{ Next() bool; Scan(...any) error }) ([]Analysis, error) {
var result []Analysis
for rows.Next() {
var a Analysis
var tagsJSON, usesFnJSON, usesTypJSON string
var createdAt, updatedAt string
err := rows.Scan(
&a.ID, &a.Name, &a.Lang, &a.Domain, &a.Description, &tagsJSON,
&usesFnJSON, &usesTypJSON, &a.Framework, &a.EntryPoint,
&a.Documentation, &a.Notes, &a.RepoURL, &a.DirPath, &a.ContentHash,
&createdAt, &updatedAt,
)
if err != nil {
return nil, fmt.Errorf("scanning analysis: %w", err)
}
a.Tags = unmarshalStrings(tagsJSON)
a.UsesFunctions = unmarshalStrings(usesFnJSON)
a.UsesTypes = unmarshalStrings(usesTypJSON)
a.CreatedAt, _ = time.Parse(time.RFC3339, createdAt)
a.UpdatedAt, _ = time.Parse(time.RFC3339, updatedAt)
result = append(result, a)
}
return result, nil
}
func scanFunctions(rows interface{ Next() bool; Scan(...any) error }) ([]Function, error) {
var result []Function
for rows.Next() {
+38
View File
@@ -199,6 +199,44 @@ func ValidateApp(a *App, knownFunctions, knownTypes map[string]bool) *Validation
return nil
}
// ValidateAnalysis checks integrity rules for analysis entries.
func ValidateAnalysis(a *Analysis, knownFunctions, knownTypes map[string]bool) *ValidationError {
var errs []string
if a.Name == "" {
errs = append(errs, "name is required")
}
if a.Lang == "" {
errs = append(errs, "lang is required")
}
if a.Domain == "" {
errs = append(errs, "domain is required")
}
if a.Description == "" {
errs = append(errs, "description is required")
}
if a.DirPath != "" && strings.HasPrefix(a.DirPath, "/") {
errs = append(errs, "dir_path must be relative to registry root")
}
for _, ref := range a.UsesFunctions {
if !knownFunctions[ref] {
errs = append(errs, fmt.Sprintf("uses_functions references unknown function: %s", ref))
}
}
for _, ref := range a.UsesTypes {
if !knownTypes[ref] {
errs = append(errs, fmt.Sprintf("uses_types references unknown type: %s", ref))
}
}
if len(errs) > 0 {
return &ValidationError{ID: a.ID, Errors: errs}
}
return nil
}
// ValidateType checks integrity rules for types.
func ValidateType(t *Type, knownTypes map[string]bool) *ValidationError {
var errs []string