feat(eda): negrita inline real (**bold**) en renderers AutomaticEDA

El render de Markdown del motor AutomaticEDA quitaba los marcadores **negrita** sin aplicar estilo. Ahora los spans **bold**/__bold__ se renderizan en negrita real, de forma aditiva y sin romper el anti-corte: - text_layout.py: parse_inline_bold() tokeniza spans preservando el texto visible (== strip_inline_md) y wrap_rich() envuelve por palabras a max_chars conservando el flag de negrita por segmento (la anchura visible no cambia, así que la paginación es idéntica). - render_pdf_impl.py: _place_rich_lines() dibuja cada segmento con su fontweight avanzando x por el mismo grid de caracteres que usa el wrap (párrafos+bullets). - render_pptx_impl.py: _add_rich_text() usa runs nativos de python-pptx con font.bold por segmento (negrita real de PowerPoint). - bold_render_test.py: helpers puros (no-overflow, bold preservado, marcadores desbalanceados) + e2e que abre el .pptx y confirma un run con font.bold True. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-30 16:08:16 +02:00
parent 5eaf3f662e
commit f5b30b23dc
4 changed files with 334 additions and 18 deletions
@@ -15,8 +15,15 @@ overflowing — that is wrapping, not loss: every character is still rendered.

 from __future__ import annotations

+import re
 import textwrap

+# Inline span markers: ``**bold**`` / ``__bold__`` (rendered bold) and
+# `` `code` `` (markers removed, not styled). Matched non-greedily so the
+# shortest balanced pair wins. Unbalanced leftovers are stripped afterwards so
+# the visible text matches ``strip_inline_md`` exactly.
+_INLINE_SPAN_RE = re.compile(r"(\*\*.+?\*\*|__.+?__|`.+?`)")
+

 def avg_char_width_in(fontsize_pt: float) -> float:
    """Approximate average glyph width in inches for a sans-serif font.
@@ -84,6 +91,137 @@ def strip_inline_md(text: str) -> str:
    return s


+def _strip_leftover_markers(s: str) -> str:
+    """Drop any unbalanced inline markers from a plain (non-span) fragment.
+
+    Keeps the visible text identical to :func:`strip_inline_md` even when a
+    ``**`` / ``__`` / `` ` `` has no matching closing marker.
+    """
+    for marker in ("**", "__", "`"):
+        s = s.replace(marker, "")
+    return s
+
+
+def parse_inline_bold(text: str):
+    """Split ``text`` into ``[(fragment, is_bold), ...]`` preserving order.
+
+    ``**...**`` and ``__...__`` spans become bold fragments (markers removed);
+    `` `code` `` keeps its text without the backticks and is not bold; any other
+    text is emitted verbatim with unbalanced markers stripped. The concatenation
+    of all fragment texts equals :func:`strip_inline_md` of the input — so the
+    *visible* characters (and therefore line wrapping) are unchanged; only the
+    bold flag is added. Adjacent fragments of the same weight are merged.
+    """
+    s = "" if text is None else str(text)
+    if not s:
+        return []
+    out = []
+
+    def _emit(fragment: str, bold: bool) -> None:
+        if fragment == "":
+            return
+        if out and out[-1][1] == bold:
+            out[-1] = (out[-1][0] + fragment, bold)
+        else:
+            out.append((fragment, bold))
+
+    pos = 0
+    for m in _INLINE_SPAN_RE.finditer(s):
+        if m.start() > pos:
+            _emit(_strip_leftover_markers(s[pos:m.start()]), False)
+        tok = m.group(0)
+        if tok.startswith("**") and tok.endswith("**"):
+            _emit(tok[2:-2], True)
+        elif tok.startswith("__") and tok.endswith("__"):
+            _emit(tok[2:-2], True)
+        else:  # `code`
+            _emit(tok[1:-1], False)
+        pos = m.end()
+    if pos < len(s):
+        _emit(_strip_leftover_markers(s[pos:]), False)
+    return out
+
+
+def _hard_split(word: str, max_chars: int):
+    """Split a single long token into <= max_chars chunks (never loses chars)."""
+    return [word[i:i + max_chars] for i in range(0, len(word), max_chars)] or [""]
+
+
+def wrap_rich(text: str, max_chars: int):
+    """Word-wrap ``text`` to ``max_chars`` while preserving inline bold spans.
+
+    Returns ``list[list[(fragment, is_bold)]]`` — one inner list of styled
+    fragments per output line; concatenating an inner list's fragment texts is
+    the visible line. Wrapping is word-aware and hard-splits over-long tokens, so
+    no line exceeds ``max_chars`` (the renderers measure these very lines, so the
+    no-cut guarantee holds). Bold spans never widen a line: only the bold flag is
+    carried, the visible width is identical to :func:`wrap`.
+    """
+    if max_chars < 1:
+        max_chars = 1
+    spans = parse_inline_bold(text)
+    if not spans:
+        return [[("", False)]]
+
+    # Flatten to (word, is_bold) tokens, honoring hard newlines as line breaks.
+    # A token list of None marks a forced line break.
+    tokens = []  # each: (word, bold) or ("\n", None)
+    for frag, bold in spans:
+        parts = frag.split("\n")
+        for pi, part in enumerate(parts):
+            if pi > 0:
+                tokens.append(("\n", None))
+            for word in part.split(" "):
+                if word == "":
+                    continue
+                tokens.append((word, bold))
+
+    lines = []          # list[list[(seg, bold)]]
+    cur = []            # list[(word, bold)]
+    cur_len = 0
+
+    def _flush():
+        nonlocal cur, cur_len
+        # Merge adjacent same-weight words (with separating spaces) into segments.
+        merged = []
+        for k, (word, bold) in enumerate(cur):
+            piece = word if k == 0 else " " + word
+            if merged and merged[-1][1] == bold:
+                merged[-1] = (merged[-1][0] + piece, bold)
+            else:
+                merged.append((piece, bold))
+        lines.append(merged or [("", False)])
+        cur = []
+        cur_len = 0
+
+    for word, bold in tokens:
+        if bold is None:  # forced newline
+            _flush()
+            continue
+        if len(word) > max_chars:
+            if cur:
+                _flush()
+            chunks = _hard_split(word, max_chars)
+            for ci, chunk in enumerate(chunks):
+                if ci < len(chunks) - 1:
+                    lines.append([(chunk, bold)])
+                else:
+                    cur = [(chunk, bold)]
+                    cur_len = len(chunk)
+            continue
+        add = len(word) if cur_len == 0 else cur_len + 1 + len(word)
+        if cur_len != 0 and add > max_chars:
+            _flush()
+            cur = [(word, bold)]
+            cur_len = len(word)
+        else:
+            cur.append((word, bold))
+            cur_len = add
+    if cur:
+        _flush()
+    return lines or [[("", False)]]
+
+
 def parse_md_table(lines: list):
    """Parse consecutive ``| a | b |`` lines into ``(header, rows)`` or None.