47fac22230
- .claude/CLAUDE.md - .claude/commands/subagentes.md - .claude/rules/INDEX.md - .mcp.json - bash/functions/cybersecurity/analyze_dns.md - bash/functions/cybersecurity/audit_http_headers.md - bash/functions/cybersecurity/audit_ssh_config.md - bash/functions/cybersecurity/check_firewall.md - bash/functions/cybersecurity/detect_suspicious_users.md - bash/functions/cybersecurity/encrypt_file.md - ... Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
347 lines
12 KiB
Bash
347 lines
12 KiB
Bash
#!/usr/bin/env bash
|
|
# propose_capability_groups — analiza tags candidatos a capability group (issue 0086)
|
|
# Filtra via blocklist + cap de dominios. Lista candidatos o promociona con --apply.
|
|
set -euo pipefail
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Blocklist: tags genericos que nunca son capability groups
|
|
# ---------------------------------------------------------------------------
|
|
BLOCKLIST=(
|
|
# idioma
|
|
go py bash ps ts python cpp
|
|
# dominio
|
|
core infra finance datascience cybersecurity shell tui pipelines browser
|
|
# kind / purity
|
|
function pipeline component pure impure
|
|
# CRUD generico
|
|
add create delete list update get set remove insert
|
|
# verbo super-generico
|
|
compose convert combine append empty exists check find format parse render
|
|
# estructural
|
|
generic helper utility wrapper test
|
|
# primitivas
|
|
string number int float array slice map dict value key
|
|
# estados
|
|
pending-usar pendiente-usar
|
|
)
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Resolver raiz del registry (walk-up hasta registry.db)
|
|
# ---------------------------------------------------------------------------
|
|
find_registry_root() {
|
|
local dir
|
|
dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
while [[ "$dir" != "/" ]]; do
|
|
if [[ -f "$dir/registry.db" ]]; then
|
|
echo "$dir"
|
|
return 0
|
|
fi
|
|
dir="$(dirname "$dir")"
|
|
done
|
|
echo "ERROR: registry.db no encontrado en ningún directorio padre" >&2
|
|
return 1
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Defaults
|
|
# ---------------------------------------------------------------------------
|
|
MIN_COUNT=3
|
|
MAX_DOMAINS=4
|
|
JSON_MODE=false
|
|
APPLY_TAG=""
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Parse args
|
|
# ---------------------------------------------------------------------------
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
--min-count)
|
|
MIN_COUNT="$2"
|
|
shift 2
|
|
;;
|
|
--max-domains)
|
|
MAX_DOMAINS="$2"
|
|
shift 2
|
|
;;
|
|
--json)
|
|
JSON_MODE=true
|
|
shift
|
|
;;
|
|
--apply)
|
|
APPLY_TAG="$2"
|
|
shift 2
|
|
;;
|
|
--help|-h)
|
|
echo "Uso: propose_capability_groups [--min-count N] [--max-domains M] [--json] [--apply <tag>]"
|
|
echo ""
|
|
echo " --min-count N Minimo de funciones con el tag (default: 3)"
|
|
echo " --max-domains M Maximo de dominios distintos (default: 3)"
|
|
echo " --json Salida JSON"
|
|
echo " --apply <tag> Promociona el tag a capability group"
|
|
exit 0
|
|
;;
|
|
*)
|
|
echo "ERROR: argumento desconocido: $1" >&2
|
|
exit 1
|
|
;;
|
|
esac
|
|
done
|
|
|
|
REGISTRY_ROOT="$(find_registry_root)"
|
|
DB="$REGISTRY_ROOT/registry.db"
|
|
INDEX_MD="$REGISTRY_ROOT/docs/capabilities/INDEX.md"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Construir lista de tags bloqueados como CSV quoted para SQL IN (...)
|
|
# ---------------------------------------------------------------------------
|
|
build_blocklist_sql() {
|
|
local csv=""
|
|
for tag in "${BLOCKLIST[@]}"; do
|
|
csv="${csv}'${tag}',"
|
|
done
|
|
# quitar coma final
|
|
echo "${csv%,}"
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Parsear tags ya en INDEX.md: extraer slugs de [tag](tag.md)
|
|
# ---------------------------------------------------------------------------
|
|
get_existing_groups() {
|
|
if [[ ! -f "$INDEX_MD" ]]; then
|
|
return
|
|
fi
|
|
grep -oP '\[([^\]]+)\]\(\1\.md\)' "$INDEX_MD" | grep -oP '\[([^\]]+)\]' | tr -d '[]' || true
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Comprobar si un tag esta en blocklist
|
|
# ---------------------------------------------------------------------------
|
|
in_blocklist() {
|
|
local tag="$1"
|
|
for blocked in "${BLOCKLIST[@]}"; do
|
|
if [[ "$blocked" == "$tag" ]]; then
|
|
return 0
|
|
fi
|
|
done
|
|
return 1
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# MODO --apply: promocionar un tag a capability group
|
|
# ---------------------------------------------------------------------------
|
|
apply_tag() {
|
|
local tag="$1"
|
|
|
|
# Validar: no en blocklist
|
|
if in_blocklist "$tag"; then
|
|
echo "ERROR: '$tag' está en la blocklist de tags genericos. No se puede promocionar." >&2
|
|
exit 1
|
|
fi
|
|
|
|
# Validar: count >= min y domains <= max
|
|
local blocklist_sql
|
|
blocklist_sql="$(build_blocklist_sql)"
|
|
local row
|
|
row="$(sqlite3 "$DB" "
|
|
SELECT COUNT(*) AS cnt, COUNT(DISTINCT f.domain) AS domains
|
|
FROM functions f, json_each(f.tags) j
|
|
WHERE j.value = '${tag}'
|
|
GROUP BY j.value;
|
|
" 2>/dev/null || true)"
|
|
|
|
if [[ -z "$row" ]]; then
|
|
echo "ERROR: tag '$tag' no encontrado en el registry o no tiene funciones." >&2
|
|
exit 1
|
|
fi
|
|
|
|
local cnt domains
|
|
cnt="$(echo "$row" | cut -d'|' -f1)"
|
|
domains="$(echo "$row" | cut -d'|' -f2)"
|
|
|
|
if [[ "$cnt" -lt "$MIN_COUNT" ]]; then
|
|
echo "ERROR: tag '$tag' tiene $cnt funciones, minimo requerido es $MIN_COUNT." >&2
|
|
exit 1
|
|
fi
|
|
if [[ "$domains" -gt "$MAX_DOMAINS" ]]; then
|
|
echo "ERROR: tag '$tag' aparece en $domains dominios distintos (maximo $MAX_DOMAINS). Probablemente es generico." >&2
|
|
exit 1
|
|
fi
|
|
|
|
echo "Promocionando tag '$tag' a capability group..."
|
|
echo " funciones: $cnt dominios: $domains"
|
|
echo ""
|
|
|
|
# Paso 1: llamar a generate_capability_doc
|
|
local gen_script="$REGISTRY_ROOT/bash/functions/pipelines/generate_capability_doc.sh"
|
|
if [[ ! -f "$gen_script" ]]; then
|
|
echo "ERROR: no se encontro generate_capability_doc.sh en $gen_script" >&2
|
|
exit 1
|
|
fi
|
|
echo "=> Generando docs/capabilities/${tag}.md ..."
|
|
bash "$gen_script" "$tag"
|
|
echo " OK"
|
|
|
|
# Paso 2: anadir fila a INDEX.md (idempotente)
|
|
if [[ ! -f "$INDEX_MD" ]]; then
|
|
echo "ERROR: no se encontro INDEX.md en $INDEX_MD" >&2
|
|
exit 1
|
|
fi
|
|
|
|
local row_pattern
|
|
row_pattern="| \[${tag}\](${tag}.md)"
|
|
if grep -qF "$row_pattern" "$INDEX_MD"; then
|
|
echo "=> Fila para '$tag' ya existe en INDEX.md — sin cambios."
|
|
else
|
|
echo "=> Anadiendo fila a INDEX.md ..."
|
|
# Insertar despues de la linea de cabecera |---|---|---| de la tabla "Grupos vigentes"
|
|
# Buscamos la linea del separador de cabecera de tabla que va despues de "## Grupos vigentes"
|
|
local new_row="| [${tag}](${tag}.md) | ${cnt} | _(editar — promovido automaticamente)_ |"
|
|
# Usar Python para insertar la linea de forma portable (awk no maneja bien insercion relativa)
|
|
python3 - "$INDEX_MD" "$new_row" <<'PYEOF'
|
|
import sys
|
|
|
|
index_path = sys.argv[1]
|
|
new_row = sys.argv[2]
|
|
|
|
with open(index_path, "r") as f:
|
|
lines = f.readlines()
|
|
|
|
# Encontrar el bloque "Grupos vigentes" y luego la linea separadora |---|---|---|
|
|
in_section = False
|
|
insert_after = -1
|
|
for i, line in enumerate(lines):
|
|
if "## Grupos vigentes" in line:
|
|
in_section = True
|
|
if in_section and line.strip().startswith("|---|"):
|
|
insert_after = i
|
|
break
|
|
|
|
if insert_after == -1:
|
|
print("ERROR: no se encontro la tabla 'Grupos vigentes' en INDEX.md", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
lines.insert(insert_after + 1, new_row + "\n")
|
|
|
|
with open(index_path, "w") as f:
|
|
f.writelines(lines)
|
|
|
|
print(f" Fila insertada en posicion {insert_after + 1}")
|
|
PYEOF
|
|
echo " OK"
|
|
fi
|
|
|
|
echo ""
|
|
echo "HECHO. Pasos manuales pendientes:"
|
|
echo " 1. Editar docs/capabilities/${tag}.md:"
|
|
echo " - Anadir parrafo de descripcion del grupo."
|
|
echo " - Completar seccion 'Ejemplo canonico' con codigo real."
|
|
echo " - Completar seccion 'Fronteras' (que NO hace el grupo)."
|
|
echo " - Anadir 'Notas' si aplica."
|
|
echo " 2. Actualizar la frase descripcion en docs/capabilities/INDEX.md"
|
|
echo " (reemplazar el placeholder con descripcion real)."
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# MODO LISTAR: analizar candidatos
|
|
# ---------------------------------------------------------------------------
|
|
list_candidates() {
|
|
local blocklist_sql
|
|
blocklist_sql="$(build_blocklist_sql)"
|
|
|
|
# Query: tags con suficientes funciones y no demasiados dominios
|
|
local query
|
|
query="
|
|
SELECT
|
|
j.value AS tag,
|
|
COUNT(*) AS cnt,
|
|
COUNT(DISTINCT f.domain) AS domains,
|
|
GROUP_CONCAT(DISTINCT f.domain) AS domain_list,
|
|
GROUP_CONCAT(f.id) AS function_ids
|
|
FROM functions f, json_each(f.tags) j
|
|
WHERE j.value NOT IN (${blocklist_sql})
|
|
GROUP BY j.value
|
|
HAVING cnt >= ${MIN_COUNT} AND domains <= ${MAX_DOMAINS}
|
|
ORDER BY cnt DESC;
|
|
"
|
|
|
|
local raw_results
|
|
raw_results="$(sqlite3 "$DB" "$query" 2>/dev/null || true)"
|
|
|
|
if [[ -z "$raw_results" ]]; then
|
|
echo "No se encontraron candidatos con min-count=${MIN_COUNT} y max-domains=${MAX_DOMAINS}."
|
|
exit 0
|
|
fi
|
|
|
|
# Obtener grupos ya existentes
|
|
local existing_groups
|
|
existing_groups="$(get_existing_groups)"
|
|
|
|
if $JSON_MODE; then
|
|
# Salida JSON
|
|
echo "["
|
|
local first=true
|
|
while IFS='|' read -r tag cnt domains domain_list function_ids; do
|
|
[[ -z "$tag" ]] && continue
|
|
local already_group="false"
|
|
if echo "$existing_groups" | grep -qxF "$tag" 2>/dev/null; then
|
|
already_group="true"
|
|
fi
|
|
# Tomar hasta 3 samples
|
|
local samples
|
|
samples="$(echo "$function_ids" | tr ',' '\n' | head -3 | tr '\n' ',' | sed 's/,$//')"
|
|
if $first; then
|
|
first=false
|
|
else
|
|
echo ","
|
|
fi
|
|
printf ' {"tag":"%s","count":%s,"domains":%s,"domain_list":"%s","already_group":%s,"samples":[%s]}' \
|
|
"$tag" "$cnt" "$domains" "$domain_list" "$already_group" \
|
|
"$(echo "$samples" | sed 's/,/","/g' | sed 's/^/"/' | sed 's/$/"/')"
|
|
done <<< "$raw_results"
|
|
echo ""
|
|
echo "]"
|
|
return
|
|
fi
|
|
|
|
# Salida texto humano
|
|
local total=0
|
|
local already=0
|
|
local nuevos=0
|
|
|
|
# Acumular lineas para contar primero
|
|
local lines_output=()
|
|
while IFS='|' read -r tag cnt domains domain_list function_ids; do
|
|
[[ -z "$tag" ]] && continue
|
|
local already_group="no"
|
|
if echo "$existing_groups" | grep -qxF "$tag" 2>/dev/null; then
|
|
already_group="yes"
|
|
((already++)) || true
|
|
else
|
|
((nuevos++)) || true
|
|
fi
|
|
((total++)) || true
|
|
# Tomar hasta 3 samples
|
|
local samples
|
|
samples="$(echo "$function_ids" | tr ',' '\n' | head -3 | paste -sd ',' -)"
|
|
lines_output+=("$(printf "%-22s %-6s %-25s %-13s %s" "$tag" "$cnt" "$domain_list" "$already_group" "$samples")")
|
|
done <<< "$raw_results"
|
|
|
|
printf "%-22s %-6s %-25s %-13s %s\n" "TAG" "COUNT" "DOMAINS" "ALREADY_GROUP" "SAMPLES"
|
|
printf "%-22s %-6s %-25s %-13s %s\n" "----------------------" "------" "-------------------------" "-------------" "-------"
|
|
for line in "${lines_output[@]}"; do
|
|
echo "$line"
|
|
done
|
|
echo ""
|
|
echo "${total} candidatos. ${already} ya son grupo. ${nuevos} son nuevos."
|
|
echo "Promociona con: bash bash/functions/pipelines/propose_capability_groups.sh --apply <tag>"
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Entry point
|
|
# ---------------------------------------------------------------------------
|
|
if [[ -n "$APPLY_TAG" ]]; then
|
|
apply_tag "$APPLY_TAG"
|
|
else
|
|
list_candidates
|
|
fi
|