f5b30b23dc
El render de Markdown del motor AutomaticEDA quitaba los marcadores **negrita** sin aplicar estilo. Ahora los spans **bold**/__bold__ se renderizan en negrita real, de forma aditiva y sin romper el anti-corte: - text_layout.py: parse_inline_bold() tokeniza spans preservando el texto visible (== strip_inline_md) y wrap_rich() envuelve por palabras a max_chars conservando el flag de negrita por segmento (la anchura visible no cambia, así que la paginación es idéntica). - render_pdf_impl.py: _place_rich_lines() dibuja cada segmento con su fontweight avanzando x por el mismo grid de caracteres que usa el wrap (párrafos+bullets). - render_pptx_impl.py: _add_rich_text() usa runs nativos de python-pptx con font.bold por segmento (negrita real de PowerPoint). - bold_render_test.py: helpers puros (no-overflow, bold preservado, marcadores desbalanceados) + e2e que abre el .pptx y confirma un run con font.bold True. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
246 lines
8.7 KiB
Python
246 lines
8.7 KiB
Python
"""Shared text-measurement helpers for the AutomaticEDA renderers.
|
|
|
|
Both renderers flow content top-to-bottom and must know, *before* placing a
|
|
block, how much vertical space it will take — that is what guarantees nothing is
|
|
cut: a unit either fits in the remaining space or moves to the next page/slide
|
|
whole. Measuring proportional text exactly in matplotlib/pptx is impractical, so
|
|
we use a deterministic character-grid estimate (chars-per-line from an average
|
|
glyph width) which slightly over-estimates and is therefore safe: it never
|
|
claims something fits when it would overflow.
|
|
|
|
Wrapping is word-aware (``textwrap``) and additionally hard-splits any single
|
|
token longer than the line so a 200-character value still wraps instead of
|
|
overflowing — that is wrapping, not loss: every character is still rendered.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import re
|
|
import textwrap
|
|
|
|
# Inline span markers: ``**bold**`` / ``__bold__`` (rendered bold) and
|
|
# `` `code` `` (markers removed, not styled). Matched non-greedily so the
|
|
# shortest balanced pair wins. Unbalanced leftovers are stripped afterwards so
|
|
# the visible text matches ``strip_inline_md`` exactly.
|
|
_INLINE_SPAN_RE = re.compile(r"(\*\*.+?\*\*|__.+?__|`.+?`)")
|
|
|
|
|
|
def avg_char_width_in(fontsize_pt: float) -> float:
|
|
"""Approximate average glyph width in inches for a sans-serif font.
|
|
|
|
~0.5 of the point size is a conservative mean advance width for proportional
|
|
sans fonts; dividing by 72 converts points to inches.
|
|
"""
|
|
return 0.5 * fontsize_pt / 72.0
|
|
|
|
|
|
def line_height_in(fontsize_pt: float, leading: float = 1.32) -> float:
|
|
"""Line height in inches for a given font size and leading."""
|
|
return leading * fontsize_pt / 72.0
|
|
|
|
|
|
def chars_per_line(width_in: float, fontsize_pt: float) -> int:
|
|
"""How many average glyphs fit in ``width_in`` at ``fontsize_pt``."""
|
|
cw = avg_char_width_in(fontsize_pt)
|
|
if cw <= 0:
|
|
return 80
|
|
n = int(width_in / cw)
|
|
return max(1, n)
|
|
|
|
|
|
def wrap(text: str, max_chars: int) -> list:
|
|
"""Word-wrap ``text`` to lines of at most ``max_chars``, never losing chars.
|
|
|
|
Long tokens (no spaces) are hard-split so they cannot overflow. Existing
|
|
newlines are honored as hard breaks. Empty input yields a single empty line
|
|
so callers can still reserve a row.
|
|
"""
|
|
if max_chars < 1:
|
|
max_chars = 1
|
|
s = "" if text is None else str(text)
|
|
out: list = []
|
|
for raw_line in s.split("\n"):
|
|
if raw_line == "":
|
|
out.append("")
|
|
continue
|
|
# textwrap with break_long_words so no token overflows the column.
|
|
wrapped = textwrap.wrap(
|
|
raw_line, width=max_chars, break_long_words=True,
|
|
break_on_hyphens=False, replace_whitespace=True,
|
|
drop_whitespace=True,
|
|
)
|
|
if not wrapped:
|
|
out.append("")
|
|
else:
|
|
out.extend(wrapped)
|
|
return out or [""]
|
|
|
|
|
|
def strip_inline_md(text: str) -> str:
|
|
"""Strip a tiny subset of inline markdown markers, keeping the text.
|
|
|
|
Removes ``**bold**`` / ``__bold__`` / ``*em*`` / `` `code` `` markers so the
|
|
content is preserved without trying to style spans (which the line-grid
|
|
layout cannot do). Nothing is dropped except the markers themselves.
|
|
"""
|
|
if not text:
|
|
return ""
|
|
s = str(text)
|
|
for marker in ("**", "__", "`"):
|
|
s = s.replace(marker, "")
|
|
return s
|
|
|
|
|
|
def _strip_leftover_markers(s: str) -> str:
|
|
"""Drop any unbalanced inline markers from a plain (non-span) fragment.
|
|
|
|
Keeps the visible text identical to :func:`strip_inline_md` even when a
|
|
``**`` / ``__`` / `` ` `` has no matching closing marker.
|
|
"""
|
|
for marker in ("**", "__", "`"):
|
|
s = s.replace(marker, "")
|
|
return s
|
|
|
|
|
|
def parse_inline_bold(text: str):
|
|
"""Split ``text`` into ``[(fragment, is_bold), ...]`` preserving order.
|
|
|
|
``**...**`` and ``__...__`` spans become bold fragments (markers removed);
|
|
`` `code` `` keeps its text without the backticks and is not bold; any other
|
|
text is emitted verbatim with unbalanced markers stripped. The concatenation
|
|
of all fragment texts equals :func:`strip_inline_md` of the input — so the
|
|
*visible* characters (and therefore line wrapping) are unchanged; only the
|
|
bold flag is added. Adjacent fragments of the same weight are merged.
|
|
"""
|
|
s = "" if text is None else str(text)
|
|
if not s:
|
|
return []
|
|
out = []
|
|
|
|
def _emit(fragment: str, bold: bool) -> None:
|
|
if fragment == "":
|
|
return
|
|
if out and out[-1][1] == bold:
|
|
out[-1] = (out[-1][0] + fragment, bold)
|
|
else:
|
|
out.append((fragment, bold))
|
|
|
|
pos = 0
|
|
for m in _INLINE_SPAN_RE.finditer(s):
|
|
if m.start() > pos:
|
|
_emit(_strip_leftover_markers(s[pos:m.start()]), False)
|
|
tok = m.group(0)
|
|
if tok.startswith("**") and tok.endswith("**"):
|
|
_emit(tok[2:-2], True)
|
|
elif tok.startswith("__") and tok.endswith("__"):
|
|
_emit(tok[2:-2], True)
|
|
else: # `code`
|
|
_emit(tok[1:-1], False)
|
|
pos = m.end()
|
|
if pos < len(s):
|
|
_emit(_strip_leftover_markers(s[pos:]), False)
|
|
return out
|
|
|
|
|
|
def _hard_split(word: str, max_chars: int):
|
|
"""Split a single long token into <= max_chars chunks (never loses chars)."""
|
|
return [word[i:i + max_chars] for i in range(0, len(word), max_chars)] or [""]
|
|
|
|
|
|
def wrap_rich(text: str, max_chars: int):
|
|
"""Word-wrap ``text`` to ``max_chars`` while preserving inline bold spans.
|
|
|
|
Returns ``list[list[(fragment, is_bold)]]`` — one inner list of styled
|
|
fragments per output line; concatenating an inner list's fragment texts is
|
|
the visible line. Wrapping is word-aware and hard-splits over-long tokens, so
|
|
no line exceeds ``max_chars`` (the renderers measure these very lines, so the
|
|
no-cut guarantee holds). Bold spans never widen a line: only the bold flag is
|
|
carried, the visible width is identical to :func:`wrap`.
|
|
"""
|
|
if max_chars < 1:
|
|
max_chars = 1
|
|
spans = parse_inline_bold(text)
|
|
if not spans:
|
|
return [[("", False)]]
|
|
|
|
# Flatten to (word, is_bold) tokens, honoring hard newlines as line breaks.
|
|
# A token list of None marks a forced line break.
|
|
tokens = [] # each: (word, bold) or ("\n", None)
|
|
for frag, bold in spans:
|
|
parts = frag.split("\n")
|
|
for pi, part in enumerate(parts):
|
|
if pi > 0:
|
|
tokens.append(("\n", None))
|
|
for word in part.split(" "):
|
|
if word == "":
|
|
continue
|
|
tokens.append((word, bold))
|
|
|
|
lines = [] # list[list[(seg, bold)]]
|
|
cur = [] # list[(word, bold)]
|
|
cur_len = 0
|
|
|
|
def _flush():
|
|
nonlocal cur, cur_len
|
|
# Merge adjacent same-weight words (with separating spaces) into segments.
|
|
merged = []
|
|
for k, (word, bold) in enumerate(cur):
|
|
piece = word if k == 0 else " " + word
|
|
if merged and merged[-1][1] == bold:
|
|
merged[-1] = (merged[-1][0] + piece, bold)
|
|
else:
|
|
merged.append((piece, bold))
|
|
lines.append(merged or [("", False)])
|
|
cur = []
|
|
cur_len = 0
|
|
|
|
for word, bold in tokens:
|
|
if bold is None: # forced newline
|
|
_flush()
|
|
continue
|
|
if len(word) > max_chars:
|
|
if cur:
|
|
_flush()
|
|
chunks = _hard_split(word, max_chars)
|
|
for ci, chunk in enumerate(chunks):
|
|
if ci < len(chunks) - 1:
|
|
lines.append([(chunk, bold)])
|
|
else:
|
|
cur = [(chunk, bold)]
|
|
cur_len = len(chunk)
|
|
continue
|
|
add = len(word) if cur_len == 0 else cur_len + 1 + len(word)
|
|
if cur_len != 0 and add > max_chars:
|
|
_flush()
|
|
cur = [(word, bold)]
|
|
cur_len = len(word)
|
|
else:
|
|
cur.append((word, bold))
|
|
cur_len = add
|
|
if cur:
|
|
_flush()
|
|
return lines or [[("", False)]]
|
|
|
|
|
|
def parse_md_table(lines: list):
|
|
"""Parse consecutive ``| a | b |`` lines into ``(header, rows)`` or None.
|
|
|
|
Accepts an optional separator row (``|---|---|``) right after the header,
|
|
which is ignored. Returns None if the lines are not a pipe table.
|
|
"""
|
|
cells_rows = []
|
|
for ln in lines:
|
|
s = ln.strip()
|
|
if not (s.startswith("|") and s.endswith("|")):
|
|
return None
|
|
parts = [c.strip() for c in s.strip("|").split("|")]
|
|
cells_rows.append(parts)
|
|
if not cells_rows:
|
|
return None
|
|
header = cells_rows[0]
|
|
body = cells_rows[1:]
|
|
# Drop a markdown separator row (all cells are dashes/colons).
|
|
if body and all(set(c) <= set("-: ") and "-" in c for c in body[0]):
|
|
body = body[1:]
|
|
return header, body
|