#!/usr/bin/env bash # propose_capability_groups — analiza tags candidatos a capability group (issue 0086) # Filtra via blocklist + cap de dominios. Lista candidatos o promociona con --apply. set -euo pipefail # --------------------------------------------------------------------------- # Blocklist: tags genericos que nunca son capability groups # --------------------------------------------------------------------------- BLOCKLIST=( # idioma go py bash ps ts python cpp # dominio core infra finance datascience cybersecurity shell tui pipelines browser # kind / purity function pipeline component pure impure # CRUD generico add create delete list update get set remove insert # verbo super-generico compose convert combine append empty exists check find format parse render # estructural generic helper utility wrapper test # primitivas string number int float array slice map dict value key # estados pending-usar pendiente-usar ) # --------------------------------------------------------------------------- # Resolver raiz del registry (walk-up hasta registry.db) # --------------------------------------------------------------------------- find_registry_root() { local dir dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" while [[ "$dir" != "/" ]]; do if [[ -f "$dir/registry.db" ]]; then echo "$dir" return 0 fi dir="$(dirname "$dir")" done echo "ERROR: registry.db no encontrado en ningún directorio padre" >&2 return 1 } # --------------------------------------------------------------------------- # Defaults # --------------------------------------------------------------------------- MIN_COUNT=3 MAX_DOMAINS=4 JSON_MODE=false APPLY_TAG="" # --------------------------------------------------------------------------- # Parse args # --------------------------------------------------------------------------- while [[ $# -gt 0 ]]; do case "$1" in --min-count) MIN_COUNT="$2" shift 2 ;; --max-domains) MAX_DOMAINS="$2" shift 2 ;; --json) JSON_MODE=true shift ;; --apply) APPLY_TAG="$2" shift 2 ;; --help|-h) echo "Uso: propose_capability_groups [--min-count N] [--max-domains M] [--json] [--apply ]" echo "" echo " --min-count N Minimo de funciones con el tag (default: 3)" echo " --max-domains M Maximo de dominios distintos (default: 3)" echo " --json Salida JSON" echo " --apply Promociona el tag a capability group" exit 0 ;; *) echo "ERROR: argumento desconocido: $1" >&2 exit 1 ;; esac done REGISTRY_ROOT="$(find_registry_root)" DB="$REGISTRY_ROOT/registry.db" INDEX_MD="$REGISTRY_ROOT/docs/capabilities/INDEX.md" # --------------------------------------------------------------------------- # Construir lista de tags bloqueados como CSV quoted para SQL IN (...) # --------------------------------------------------------------------------- build_blocklist_sql() { local csv="" for tag in "${BLOCKLIST[@]}"; do csv="${csv}'${tag}'," done # quitar coma final echo "${csv%,}" } # --------------------------------------------------------------------------- # Parsear tags ya en INDEX.md: extraer slugs de [tag](tag.md) # --------------------------------------------------------------------------- get_existing_groups() { if [[ ! -f "$INDEX_MD" ]]; then return fi grep -oP '\[([^\]]+)\]\(\1\.md\)' "$INDEX_MD" | grep -oP '\[([^\]]+)\]' | tr -d '[]' || true } # --------------------------------------------------------------------------- # Comprobar si un tag esta en blocklist # --------------------------------------------------------------------------- in_blocklist() { local tag="$1" for blocked in "${BLOCKLIST[@]}"; do if [[ "$blocked" == "$tag" ]]; then return 0 fi done return 1 } # --------------------------------------------------------------------------- # MODO --apply: promocionar un tag a capability group # --------------------------------------------------------------------------- apply_tag() { local tag="$1" # Validar: no en blocklist if in_blocklist "$tag"; then echo "ERROR: '$tag' está en la blocklist de tags genericos. No se puede promocionar." >&2 exit 1 fi # Validar: count >= min y domains <= max local blocklist_sql blocklist_sql="$(build_blocklist_sql)" local row row="$(sqlite3 "$DB" " SELECT COUNT(*) AS cnt, COUNT(DISTINCT f.domain) AS domains FROM functions f, json_each(f.tags) j WHERE j.value = '${tag}' GROUP BY j.value; " 2>/dev/null || true)" if [[ -z "$row" ]]; then echo "ERROR: tag '$tag' no encontrado en el registry o no tiene funciones." >&2 exit 1 fi local cnt domains cnt="$(echo "$row" | cut -d'|' -f1)" domains="$(echo "$row" | cut -d'|' -f2)" if [[ "$cnt" -lt "$MIN_COUNT" ]]; then echo "ERROR: tag '$tag' tiene $cnt funciones, minimo requerido es $MIN_COUNT." >&2 exit 1 fi if [[ "$domains" -gt "$MAX_DOMAINS" ]]; then echo "ERROR: tag '$tag' aparece en $domains dominios distintos (maximo $MAX_DOMAINS). Probablemente es generico." >&2 exit 1 fi echo "Promocionando tag '$tag' a capability group..." echo " funciones: $cnt dominios: $domains" echo "" # Paso 1: llamar a generate_capability_doc local gen_script="$REGISTRY_ROOT/bash/functions/pipelines/generate_capability_doc.sh" if [[ ! -f "$gen_script" ]]; then echo "ERROR: no se encontro generate_capability_doc.sh en $gen_script" >&2 exit 1 fi echo "=> Generando docs/capabilities/${tag}.md ..." bash "$gen_script" "$tag" echo " OK" # Paso 2: anadir fila a INDEX.md (idempotente) if [[ ! -f "$INDEX_MD" ]]; then echo "ERROR: no se encontro INDEX.md en $INDEX_MD" >&2 exit 1 fi local row_pattern row_pattern="| \[${tag}\](${tag}.md)" if grep -qF "$row_pattern" "$INDEX_MD"; then echo "=> Fila para '$tag' ya existe en INDEX.md — sin cambios." else echo "=> Anadiendo fila a INDEX.md ..." # Insertar despues de la linea de cabecera |---|---|---| de la tabla "Grupos vigentes" # Buscamos la linea del separador de cabecera de tabla que va despues de "## Grupos vigentes" local new_row="| [${tag}](${tag}.md) | ${cnt} | _(editar — promovido automaticamente)_ |" # Usar Python para insertar la linea de forma portable (awk no maneja bien insercion relativa) python3 - "$INDEX_MD" "$new_row" <<'PYEOF' import sys index_path = sys.argv[1] new_row = sys.argv[2] with open(index_path, "r") as f: lines = f.readlines() # Encontrar el bloque "Grupos vigentes" y luego la linea separadora |---|---|---| in_section = False insert_after = -1 for i, line in enumerate(lines): if "## Grupos vigentes" in line: in_section = True if in_section and line.strip().startswith("|---|"): insert_after = i break if insert_after == -1: print("ERROR: no se encontro la tabla 'Grupos vigentes' en INDEX.md", file=sys.stderr) sys.exit(1) lines.insert(insert_after + 1, new_row + "\n") with open(index_path, "w") as f: f.writelines(lines) print(f" Fila insertada en posicion {insert_after + 1}") PYEOF echo " OK" fi echo "" echo "HECHO. Pasos manuales pendientes:" echo " 1. Editar docs/capabilities/${tag}.md:" echo " - Anadir parrafo de descripcion del grupo." echo " - Completar seccion 'Ejemplo canonico' con codigo real." echo " - Completar seccion 'Fronteras' (que NO hace el grupo)." echo " - Anadir 'Notas' si aplica." echo " 2. Actualizar la frase descripcion en docs/capabilities/INDEX.md" echo " (reemplazar el placeholder con descripcion real)." } # --------------------------------------------------------------------------- # MODO LISTAR: analizar candidatos # --------------------------------------------------------------------------- list_candidates() { local blocklist_sql blocklist_sql="$(build_blocklist_sql)" # Query: tags con suficientes funciones y no demasiados dominios local query query=" SELECT j.value AS tag, COUNT(*) AS cnt, COUNT(DISTINCT f.domain) AS domains, GROUP_CONCAT(DISTINCT f.domain) AS domain_list, GROUP_CONCAT(f.id) AS function_ids FROM functions f, json_each(f.tags) j WHERE j.value NOT IN (${blocklist_sql}) GROUP BY j.value HAVING cnt >= ${MIN_COUNT} AND domains <= ${MAX_DOMAINS} ORDER BY cnt DESC; " local raw_results raw_results="$(sqlite3 "$DB" "$query" 2>/dev/null || true)" if [[ -z "$raw_results" ]]; then echo "No se encontraron candidatos con min-count=${MIN_COUNT} y max-domains=${MAX_DOMAINS}." exit 0 fi # Obtener grupos ya existentes local existing_groups existing_groups="$(get_existing_groups)" if $JSON_MODE; then # Salida JSON echo "[" local first=true while IFS='|' read -r tag cnt domains domain_list function_ids; do [[ -z "$tag" ]] && continue local already_group="false" if echo "$existing_groups" | grep -qxF "$tag" 2>/dev/null; then already_group="true" fi # Tomar hasta 3 samples local samples samples="$(echo "$function_ids" | tr ',' '\n' | head -3 | tr '\n' ',' | sed 's/,$//')" if $first; then first=false else echo "," fi printf ' {"tag":"%s","count":%s,"domains":%s,"domain_list":"%s","already_group":%s,"samples":[%s]}' \ "$tag" "$cnt" "$domains" "$domain_list" "$already_group" \ "$(echo "$samples" | sed 's/,/","/g' | sed 's/^/"/' | sed 's/$/"/')" done <<< "$raw_results" echo "" echo "]" return fi # Salida texto humano local total=0 local already=0 local nuevos=0 # Acumular lineas para contar primero local lines_output=() while IFS='|' read -r tag cnt domains domain_list function_ids; do [[ -z "$tag" ]] && continue local already_group="no" if echo "$existing_groups" | grep -qxF "$tag" 2>/dev/null; then already_group="yes" ((already++)) || true else ((nuevos++)) || true fi ((total++)) || true # Tomar hasta 3 samples local samples samples="$(echo "$function_ids" | tr ',' '\n' | head -3 | paste -sd ',' -)" lines_output+=("$(printf "%-22s %-6s %-25s %-13s %s" "$tag" "$cnt" "$domain_list" "$already_group" "$samples")") done <<< "$raw_results" printf "%-22s %-6s %-25s %-13s %s\n" "TAG" "COUNT" "DOMAINS" "ALREADY_GROUP" "SAMPLES" printf "%-22s %-6s %-25s %-13s %s\n" "----------------------" "------" "-------------------------" "-------------" "-------" for line in "${lines_output[@]}"; do echo "$line" done echo "" echo "${total} candidatos. ${already} ya son grupo. ${nuevos} son nuevos." echo "Promociona con: bash bash/functions/pipelines/propose_capability_groups.sh --apply " } # --------------------------------------------------------------------------- # Entry point # --------------------------------------------------------------------------- if [[ -n "$APPLY_TAG" ]]; then apply_tag "$APPLY_TAG" else list_candidates fi