"""Split text into overlapping chunks with sentence-boundary awareness."""


def split_text_into_chunks(
    text: str, chunk_size: int = 500, overlap: int = 50
) -> list[str]:
    """Divide texto en chunks de tamaño fijo con overlap, cortando en límites de oración.

    Args:
        text: Texto a dividir.
        chunk_size: Tamaño máximo de cada chunk en caracteres.
        overlap: Número de caracteres de solapamiento entre chunks consecutivos.

    Returns:
        Lista de chunks. Vacía si el texto es vacío.
    """
    if not text:
        return []

    if len(text) <= chunk_size:
        stripped = text.strip()
        return [stripped] if stripped else []

    # Separadores en orden de prioridad (más específicos primero)
    separators = ["。", "！", "？", ".\n", "!\n", "?\n", "\n\n", ". ", "! ", "? "]

    chunks: list[str] = []
    start = 0
    text_len = len(text)

    while start < text_len:
        end = start + chunk_size

        if end < text_len:
            # Buscar el último separador de oración dentro de text[start:end]
            # Solo aceptar si está después del 30% del chunk
            min_pos = start + int(chunk_size * 0.30)
            best_end = None

            for sep in separators:
                sep_len = len(sep)
                # Buscar la última ocurrencia del separador en text[start:end]
                search_region = text[start:end]
                pos = search_region.rfind(sep)
                if pos == -1:
                    continue
                abs_pos = start + pos + sep_len
                if abs_pos > min_pos:
                    # Usar este separador solo si produce un corte más tarde que el mínimo
                    # y más temprano que chunk_size (ya garantizado por rfind en [start:end])
                    if best_end is None or abs_pos > best_end:
                        best_end = abs_pos

            if best_end is not None:
                end = best_end

        chunk = text[start:end].strip()
        if chunk:
            chunks.append(chunk)

        start = end - overlap
        # Protección contra bucle infinito si overlap >= chunk_size o end no avanza
        if start >= end:
            start = end

    return chunks