Files
fn_registry/bash/functions/pipelines/propose_capability_groups.sh
T
egutierrez 47fac22230 chore: auto-commit (799 archivos)
- .claude/CLAUDE.md
- .claude/commands/subagentes.md
- .claude/rules/INDEX.md
- .mcp.json
- bash/functions/cybersecurity/analyze_dns.md
- bash/functions/cybersecurity/audit_http_headers.md
- bash/functions/cybersecurity/audit_ssh_config.md
- bash/functions/cybersecurity/check_firewall.md
- bash/functions/cybersecurity/detect_suspicious_users.md
- bash/functions/cybersecurity/encrypt_file.md
- ...

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-14 00:28:20 +02:00

347 lines
12 KiB
Bash

#!/usr/bin/env bash
# propose_capability_groups — analiza tags candidatos a capability group (issue 0086)
# Filtra via blocklist + cap de dominios. Lista candidatos o promociona con --apply.
set -euo pipefail
# ---------------------------------------------------------------------------
# Blocklist: tags genericos que nunca son capability groups
# ---------------------------------------------------------------------------
BLOCKLIST=(
# idioma
go py bash ps ts python cpp
# dominio
core infra finance datascience cybersecurity shell tui pipelines browser
# kind / purity
function pipeline component pure impure
# CRUD generico
add create delete list update get set remove insert
# verbo super-generico
compose convert combine append empty exists check find format parse render
# estructural
generic helper utility wrapper test
# primitivas
string number int float array slice map dict value key
# estados
pending-usar pendiente-usar
)
# ---------------------------------------------------------------------------
# Resolver raiz del registry (walk-up hasta registry.db)
# ---------------------------------------------------------------------------
find_registry_root() {
local dir
dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
while [[ "$dir" != "/" ]]; do
if [[ -f "$dir/registry.db" ]]; then
echo "$dir"
return 0
fi
dir="$(dirname "$dir")"
done
echo "ERROR: registry.db no encontrado en ningún directorio padre" >&2
return 1
}
# ---------------------------------------------------------------------------
# Defaults
# ---------------------------------------------------------------------------
MIN_COUNT=3
MAX_DOMAINS=4
JSON_MODE=false
APPLY_TAG=""
# ---------------------------------------------------------------------------
# Parse args
# ---------------------------------------------------------------------------
while [[ $# -gt 0 ]]; do
case "$1" in
--min-count)
MIN_COUNT="$2"
shift 2
;;
--max-domains)
MAX_DOMAINS="$2"
shift 2
;;
--json)
JSON_MODE=true
shift
;;
--apply)
APPLY_TAG="$2"
shift 2
;;
--help|-h)
echo "Uso: propose_capability_groups [--min-count N] [--max-domains M] [--json] [--apply <tag>]"
echo ""
echo " --min-count N Minimo de funciones con el tag (default: 3)"
echo " --max-domains M Maximo de dominios distintos (default: 3)"
echo " --json Salida JSON"
echo " --apply <tag> Promociona el tag a capability group"
exit 0
;;
*)
echo "ERROR: argumento desconocido: $1" >&2
exit 1
;;
esac
done
REGISTRY_ROOT="$(find_registry_root)"
DB="$REGISTRY_ROOT/registry.db"
INDEX_MD="$REGISTRY_ROOT/docs/capabilities/INDEX.md"
# ---------------------------------------------------------------------------
# Construir lista de tags bloqueados como CSV quoted para SQL IN (...)
# ---------------------------------------------------------------------------
build_blocklist_sql() {
local csv=""
for tag in "${BLOCKLIST[@]}"; do
csv="${csv}'${tag}',"
done
# quitar coma final
echo "${csv%,}"
}
# ---------------------------------------------------------------------------
# Parsear tags ya en INDEX.md: extraer slugs de [tag](tag.md)
# ---------------------------------------------------------------------------
get_existing_groups() {
if [[ ! -f "$INDEX_MD" ]]; then
return
fi
grep -oP '\[([^\]]+)\]\(\1\.md\)' "$INDEX_MD" | grep -oP '\[([^\]]+)\]' | tr -d '[]' || true
}
# ---------------------------------------------------------------------------
# Comprobar si un tag esta en blocklist
# ---------------------------------------------------------------------------
in_blocklist() {
local tag="$1"
for blocked in "${BLOCKLIST[@]}"; do
if [[ "$blocked" == "$tag" ]]; then
return 0
fi
done
return 1
}
# ---------------------------------------------------------------------------
# MODO --apply: promocionar un tag a capability group
# ---------------------------------------------------------------------------
apply_tag() {
local tag="$1"
# Validar: no en blocklist
if in_blocklist "$tag"; then
echo "ERROR: '$tag' está en la blocklist de tags genericos. No se puede promocionar." >&2
exit 1
fi
# Validar: count >= min y domains <= max
local blocklist_sql
blocklist_sql="$(build_blocklist_sql)"
local row
row="$(sqlite3 "$DB" "
SELECT COUNT(*) AS cnt, COUNT(DISTINCT f.domain) AS domains
FROM functions f, json_each(f.tags) j
WHERE j.value = '${tag}'
GROUP BY j.value;
" 2>/dev/null || true)"
if [[ -z "$row" ]]; then
echo "ERROR: tag '$tag' no encontrado en el registry o no tiene funciones." >&2
exit 1
fi
local cnt domains
cnt="$(echo "$row" | cut -d'|' -f1)"
domains="$(echo "$row" | cut -d'|' -f2)"
if [[ "$cnt" -lt "$MIN_COUNT" ]]; then
echo "ERROR: tag '$tag' tiene $cnt funciones, minimo requerido es $MIN_COUNT." >&2
exit 1
fi
if [[ "$domains" -gt "$MAX_DOMAINS" ]]; then
echo "ERROR: tag '$tag' aparece en $domains dominios distintos (maximo $MAX_DOMAINS). Probablemente es generico." >&2
exit 1
fi
echo "Promocionando tag '$tag' a capability group..."
echo " funciones: $cnt dominios: $domains"
echo ""
# Paso 1: llamar a generate_capability_doc
local gen_script="$REGISTRY_ROOT/bash/functions/pipelines/generate_capability_doc.sh"
if [[ ! -f "$gen_script" ]]; then
echo "ERROR: no se encontro generate_capability_doc.sh en $gen_script" >&2
exit 1
fi
echo "=> Generando docs/capabilities/${tag}.md ..."
bash "$gen_script" "$tag"
echo " OK"
# Paso 2: anadir fila a INDEX.md (idempotente)
if [[ ! -f "$INDEX_MD" ]]; then
echo "ERROR: no se encontro INDEX.md en $INDEX_MD" >&2
exit 1
fi
local row_pattern
row_pattern="| \[${tag}\](${tag}.md)"
if grep -qF "$row_pattern" "$INDEX_MD"; then
echo "=> Fila para '$tag' ya existe en INDEX.md — sin cambios."
else
echo "=> Anadiendo fila a INDEX.md ..."
# Insertar despues de la linea de cabecera |---|---|---| de la tabla "Grupos vigentes"
# Buscamos la linea del separador de cabecera de tabla que va despues de "## Grupos vigentes"
local new_row="| [${tag}](${tag}.md) | ${cnt} | _(editar — promovido automaticamente)_ |"
# Usar Python para insertar la linea de forma portable (awk no maneja bien insercion relativa)
python3 - "$INDEX_MD" "$new_row" <<'PYEOF'
import sys
index_path = sys.argv[1]
new_row = sys.argv[2]
with open(index_path, "r") as f:
lines = f.readlines()
# Encontrar el bloque "Grupos vigentes" y luego la linea separadora |---|---|---|
in_section = False
insert_after = -1
for i, line in enumerate(lines):
if "## Grupos vigentes" in line:
in_section = True
if in_section and line.strip().startswith("|---|"):
insert_after = i
break
if insert_after == -1:
print("ERROR: no se encontro la tabla 'Grupos vigentes' en INDEX.md", file=sys.stderr)
sys.exit(1)
lines.insert(insert_after + 1, new_row + "\n")
with open(index_path, "w") as f:
f.writelines(lines)
print(f" Fila insertada en posicion {insert_after + 1}")
PYEOF
echo " OK"
fi
echo ""
echo "HECHO. Pasos manuales pendientes:"
echo " 1. Editar docs/capabilities/${tag}.md:"
echo " - Anadir parrafo de descripcion del grupo."
echo " - Completar seccion 'Ejemplo canonico' con codigo real."
echo " - Completar seccion 'Fronteras' (que NO hace el grupo)."
echo " - Anadir 'Notas' si aplica."
echo " 2. Actualizar la frase descripcion en docs/capabilities/INDEX.md"
echo " (reemplazar el placeholder con descripcion real)."
}
# ---------------------------------------------------------------------------
# MODO LISTAR: analizar candidatos
# ---------------------------------------------------------------------------
list_candidates() {
local blocklist_sql
blocklist_sql="$(build_blocklist_sql)"
# Query: tags con suficientes funciones y no demasiados dominios
local query
query="
SELECT
j.value AS tag,
COUNT(*) AS cnt,
COUNT(DISTINCT f.domain) AS domains,
GROUP_CONCAT(DISTINCT f.domain) AS domain_list,
GROUP_CONCAT(f.id) AS function_ids
FROM functions f, json_each(f.tags) j
WHERE j.value NOT IN (${blocklist_sql})
GROUP BY j.value
HAVING cnt >= ${MIN_COUNT} AND domains <= ${MAX_DOMAINS}
ORDER BY cnt DESC;
"
local raw_results
raw_results="$(sqlite3 "$DB" "$query" 2>/dev/null || true)"
if [[ -z "$raw_results" ]]; then
echo "No se encontraron candidatos con min-count=${MIN_COUNT} y max-domains=${MAX_DOMAINS}."
exit 0
fi
# Obtener grupos ya existentes
local existing_groups
existing_groups="$(get_existing_groups)"
if $JSON_MODE; then
# Salida JSON
echo "["
local first=true
while IFS='|' read -r tag cnt domains domain_list function_ids; do
[[ -z "$tag" ]] && continue
local already_group="false"
if echo "$existing_groups" | grep -qxF "$tag" 2>/dev/null; then
already_group="true"
fi
# Tomar hasta 3 samples
local samples
samples="$(echo "$function_ids" | tr ',' '\n' | head -3 | tr '\n' ',' | sed 's/,$//')"
if $first; then
first=false
else
echo ","
fi
printf ' {"tag":"%s","count":%s,"domains":%s,"domain_list":"%s","already_group":%s,"samples":[%s]}' \
"$tag" "$cnt" "$domains" "$domain_list" "$already_group" \
"$(echo "$samples" | sed 's/,/","/g' | sed 's/^/"/' | sed 's/$/"/')"
done <<< "$raw_results"
echo ""
echo "]"
return
fi
# Salida texto humano
local total=0
local already=0
local nuevos=0
# Acumular lineas para contar primero
local lines_output=()
while IFS='|' read -r tag cnt domains domain_list function_ids; do
[[ -z "$tag" ]] && continue
local already_group="no"
if echo "$existing_groups" | grep -qxF "$tag" 2>/dev/null; then
already_group="yes"
((already++)) || true
else
((nuevos++)) || true
fi
((total++)) || true
# Tomar hasta 3 samples
local samples
samples="$(echo "$function_ids" | tr ',' '\n' | head -3 | paste -sd ',' -)"
lines_output+=("$(printf "%-22s %-6s %-25s %-13s %s" "$tag" "$cnt" "$domain_list" "$already_group" "$samples")")
done <<< "$raw_results"
printf "%-22s %-6s %-25s %-13s %s\n" "TAG" "COUNT" "DOMAINS" "ALREADY_GROUP" "SAMPLES"
printf "%-22s %-6s %-25s %-13s %s\n" "----------------------" "------" "-------------------------" "-------------" "-------"
for line in "${lines_output[@]}"; do
echo "$line"
done
echo ""
echo "${total} candidatos. ${already} ya son grupo. ${nuevos} son nuevos."
echo "Promociona con: bash bash/functions/pipelines/propose_capability_groups.sh --apply <tag>"
}
# ---------------------------------------------------------------------------
# Entry point
# ---------------------------------------------------------------------------
if [[ -n "$APPLY_TAG" ]]; then
apply_tag "$APPLY_TAG"
else
list_candidates
fi