"""Shared text-measurement helpers for the AutomaticEDA renderers. Both renderers flow content top-to-bottom and must know, *before* placing a block, how much vertical space it will take — that is what guarantees nothing is cut: a unit either fits in the remaining space or moves to the next page/slide whole. Measuring proportional text exactly in matplotlib/pptx is impractical, so we use a deterministic character-grid estimate (chars-per-line from an average glyph width) which slightly over-estimates and is therefore safe: it never claims something fits when it would overflow. Wrapping is word-aware (``textwrap``) and additionally hard-splits any single token longer than the line so a 200-character value still wraps instead of overflowing — that is wrapping, not loss: every character is still rendered. """ from __future__ import annotations import re import textwrap # Inline span markers: ``**bold**`` / ``__bold__`` (rendered bold) and # `` `code` `` (markers removed, not styled). Matched non-greedily so the # shortest balanced pair wins. Unbalanced leftovers are stripped afterwards so # the visible text matches ``strip_inline_md`` exactly. _INLINE_SPAN_RE = re.compile(r"(\*\*.+?\*\*|__.+?__|`.+?`)") # Glossary term span: ``[[term:key]]texto visible[[/term]]``. The visible text # (which may itself contain ``**bold**``) is kept and tagged with ``key`` so the # renderers can turn each appearance into a clickable jump to the glossary entry. _TERM_SPAN_RE = re.compile(r"\[\[term:([A-Za-z0-9_]+)\]\](.*?)\[\[/term\]\]", re.S) _TERM_OPEN_RE = re.compile(r"\[\[term:[A-Za-z0-9_]+\]\]") def avg_char_width_in(fontsize_pt: float) -> float: """Approximate average glyph width in inches for a sans-serif font. ~0.5 of the point size is a conservative mean advance width for proportional sans fonts; dividing by 72 converts points to inches. """ return 0.5 * fontsize_pt / 72.0 def line_height_in(fontsize_pt: float, leading: float = 1.32) -> float: """Line height in inches for a given font size and leading.""" return leading * fontsize_pt / 72.0 def chars_per_line(width_in: float, fontsize_pt: float) -> int: """How many average glyphs fit in ``width_in`` at ``fontsize_pt``.""" cw = avg_char_width_in(fontsize_pt) if cw <= 0: return 80 n = int(width_in / cw) return max(1, n) def wrap(text: str, max_chars: int) -> list: """Word-wrap ``text`` to lines of at most ``max_chars``, never losing chars. Long tokens (no spaces) are hard-split so they cannot overflow. Existing newlines are honored as hard breaks. Empty input yields a single empty line so callers can still reserve a row. """ if max_chars < 1: max_chars = 1 s = "" if text is None else str(text) out: list = [] for raw_line in s.split("\n"): if raw_line == "": out.append("") continue # textwrap with break_long_words so no token overflows the column. wrapped = textwrap.wrap( raw_line, width=max_chars, break_long_words=True, break_on_hyphens=False, replace_whitespace=True, drop_whitespace=True, ) if not wrapped: out.append("") else: out.extend(wrapped) return out or [""] def strip_inline_md(text: str) -> str: """Strip a tiny subset of inline markdown markers, keeping the text. Removes ``**bold**`` / ``__bold__`` / ``*em*`` / `` `code` `` markers so the content is preserved without trying to style spans (which the line-grid layout cannot do). Nothing is dropped except the markers themselves. """ if not text: return "" s = str(text) # Drop glossary term markers, keeping the visible inner text. s = _TERM_SPAN_RE.sub(lambda m: m.group(2), s) s = _TERM_OPEN_RE.sub("", s) # leftover unbalanced open marker. s = s.replace("[[/term]]", "") # leftover unbalanced close marker. for marker in ("**", "__", "`"): s = s.replace(marker, "") return s def _strip_term_markers(s: str) -> str: """Remove any (balanced or leftover) glossary term markers, keeping text.""" s = _TERM_OPEN_RE.sub("", s) return s.replace("[[/term]]", "") def _strip_leftover_markers(s: str) -> str: """Drop any unbalanced inline markers from a plain (non-span) fragment. Keeps the visible text identical to :func:`strip_inline_md` even when a ``**`` / ``__`` / `` ` `` has no matching closing marker. """ for marker in ("**", "__", "`"): s = s.replace(marker, "") return s def parse_inline_bold(text: str): """Split ``text`` into ``[(fragment, is_bold), ...]`` preserving order. ``**...**`` and ``__...__`` spans become bold fragments (markers removed); `` `code` `` keeps its text without the backticks and is not bold; any other text is emitted verbatim with unbalanced markers stripped. The concatenation of all fragment texts equals :func:`strip_inline_md` of the input — so the *visible* characters (and therefore line wrapping) are unchanged; only the bold flag is added. Adjacent fragments of the same weight are merged. """ s = "" if text is None else str(text) if not s: return [] out = [] def _emit(fragment: str, bold: bool) -> None: if fragment == "": return if out and out[-1][1] == bold: out[-1] = (out[-1][0] + fragment, bold) else: out.append((fragment, bold)) pos = 0 for m in _INLINE_SPAN_RE.finditer(s): if m.start() > pos: _emit(_strip_leftover_markers(s[pos:m.start()]), False) tok = m.group(0) if tok.startswith("**") and tok.endswith("**"): _emit(tok[2:-2], True) elif tok.startswith("__") and tok.endswith("__"): _emit(tok[2:-2], True) else: # `code` _emit(tok[1:-1], False) pos = m.end() if pos < len(s): _emit(_strip_leftover_markers(s[pos:]), False) return out def _hard_split(word: str, max_chars: int): """Split a single long token into <= max_chars chunks (never loses chars).""" return [word[i:i + max_chars] for i in range(0, len(word), max_chars)] or [""] def wrap_rich(text: str, max_chars: int): """Word-wrap ``text`` to ``max_chars`` while preserving inline bold spans. Returns ``list[list[(fragment, is_bold)]]`` — one inner list of styled fragments per output line; concatenating an inner list's fragment texts is the visible line. Wrapping is word-aware and hard-splits over-long tokens, so no line exceeds ``max_chars`` (the renderers measure these very lines, so the no-cut guarantee holds). Bold spans never widen a line: only the bold flag is carried, the visible width is identical to :func:`wrap`. """ if max_chars < 1: max_chars = 1 spans = parse_inline_bold(text) if not spans: return [[("", False)]] # Flatten to (word, is_bold) tokens, honoring hard newlines as line breaks. # A token list of None marks a forced line break. tokens = [] # each: (word, bold) or ("\n", None) for frag, bold in spans: parts = frag.split("\n") for pi, part in enumerate(parts): if pi > 0: tokens.append(("\n", None)) for word in part.split(" "): if word == "": continue tokens.append((word, bold)) lines = [] # list[list[(seg, bold)]] cur = [] # list[(word, bold)] cur_len = 0 def _flush(): nonlocal cur, cur_len # Merge adjacent same-weight words (with separating spaces) into segments. merged = [] for k, (word, bold) in enumerate(cur): piece = word if k == 0 else " " + word if merged and merged[-1][1] == bold: merged[-1] = (merged[-1][0] + piece, bold) else: merged.append((piece, bold)) lines.append(merged or [("", False)]) cur = [] cur_len = 0 for word, bold in tokens: if bold is None: # forced newline _flush() continue if len(word) > max_chars: if cur: _flush() chunks = _hard_split(word, max_chars) for ci, chunk in enumerate(chunks): if ci < len(chunks) - 1: lines.append([(chunk, bold)]) else: cur = [(chunk, bold)] cur_len = len(chunk) continue add = len(word) if cur_len == 0 else cur_len + 1 + len(word) if cur_len != 0 and add > max_chars: _flush() cur = [(word, bold)] cur_len = len(word) else: cur.append((word, bold)) cur_len = add if cur: _flush() return lines or [[("", False)]] def parse_inline_rich(text: str): """Split ``text`` into ``[(fragment, is_bold, term_key), ...]``. Extends :func:`parse_inline_bold` with glossary term spans ``[[term:key]]visible[[/term]]``: the inner ``visible`` text is parsed for ``**bold**`` as usual and every resulting fragment carries ``term_key`` so the renderers can make it clickable. Text outside a term span gets ``term_key = None``. Unbalanced term markers are stripped (kept identical to :func:`strip_inline_md`). The concatenation of all fragment texts equals ``strip_inline_md(text)`` — visible characters and wrapping are unchanged; only the bold flag and the term key are added. Adjacent fragments with the same (bold, term) are merged. """ s = "" if text is None else str(text) if not s: return [] out = [] def _emit(fragment: str, bold: bool, term) -> None: if fragment == "": return if out and out[-1][1] == bold and out[-1][2] == term: out[-1] = (out[-1][0] + fragment, bold, term) else: out.append((fragment, bold, term)) def _emit_bolded(segment: str, term) -> None: # Reuse the bold parser on a term-marker-free segment. for frag, bold in parse_inline_bold(_strip_term_markers(segment)): _emit(frag, bold, term) pos = 0 for m in _TERM_SPAN_RE.finditer(s): if m.start() > pos: _emit_bolded(s[pos:m.start()], None) _emit_bolded(m.group(2), m.group(1)) pos = m.end() if pos < len(s): _emit_bolded(s[pos:], None) return out def wrap_rich_terms(text: str, max_chars: int): """Like :func:`wrap_rich` but preserving glossary term keys per fragment. Returns ``list[list[(fragment, is_bold, term_key)]]`` — one inner list per output line. Wrapping is word-aware and hard-splits over-long tokens so no line exceeds ``max_chars`` (the renderers measure these very lines). Term and bold flags never widen a line: the visible width matches :func:`wrap`. """ if max_chars < 1: max_chars = 1 spans = parse_inline_rich(text) if not spans: return [[("", False, None)]] tokens = [] # each: (word, bold, term) or ("\n", None, None) for frag, bold, term in spans: parts = frag.split("\n") for pi, part in enumerate(parts): if pi > 0: tokens.append(("\n", None, None)) for word in part.split(" "): if word == "": continue tokens.append((word, bold, term)) lines = [] cur = [] cur_len = 0 def _flush(): nonlocal cur, cur_len merged = [] for k, (word, bold, term) in enumerate(cur): piece = word if k == 0 else " " + word if merged and merged[-1][1] == bold and merged[-1][2] == term: merged[-1] = (merged[-1][0] + piece, bold, term) else: merged.append((piece, bold, term)) lines.append(merged or [("", False, None)]) cur = [] cur_len = 0 for word, bold, term in tokens: if bold is None: # forced newline _flush() continue if len(word) > max_chars: if cur: _flush() chunks = _hard_split(word, max_chars) for ci, chunk in enumerate(chunks): if ci < len(chunks) - 1: lines.append([(chunk, bold, term)]) else: cur = [(chunk, bold, term)] cur_len = len(chunk) continue add = len(word) if cur_len == 0 else cur_len + 1 + len(word) if cur_len != 0 and add > max_chars: _flush() cur = [(word, bold, term)] cur_len = len(word) else: cur.append((word, bold, term)) cur_len = add if cur: _flush() return lines or [[("", False, None)]] def parse_md_table(lines: list): """Parse consecutive ``| a | b |`` lines into ``(header, rows)`` or None. Accepts an optional separator row (``|---|---|``) right after the header, which is ignored. Returns None if the lines are not a pipe table. """ cells_rows = [] for ln in lines: s = ln.strip() if not (s.startswith("|") and s.endswith("|")): return None parts = [c.strip() for c in s.strip("|").split("|")] cells_rows.append(parts) if not cells_rows: return None header = cells_rows[0] body = cells_rows[1:] # Drop a markdown separator row (all cells are dashes/colons). if body and all(set(c) <= set("-: ") and "-" in c for c in body[0]): body = body[1:] return header, body