"""Impure EDA helper: rasterize a join graph to a matplotlib Figure (`eda` group). Takes the join graph produced by ``build_join_graph`` (inter-table FK relations) and draws it as a directed node-link diagram on a ready-to-rasterize ``matplotlib.figure.Figure``. Hub tables (the ones with the highest out-degree, candidate fact tables of a star schema) are highlighted in a warm accent colour; the rest use a neutral colour. Directed edges carry a ``from_col→to_col`` label (plus the cardinality when present). This is the *drawn* counterpart of the Mermaid string that ``build_join_graph`` also emits: the relations chapter of an AutomaticEDA report can show a real picture instead of only the pasteable Mermaid block. Impure because it touches matplotlib's rendering machinery. It pins the headless Agg backend and a deterministic ``spring_layout`` seed so the output is reproducible. It never raises: on any internal failure (or empty input) it returns a ``Figure`` carrying a centered message, so the lazy render of the document is never broken. """ import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt # noqa: E402 import networkx as nx # noqa: E402 # Warm accent reserved for hub tables (candidate fact tables / star-schema cores). _HUB_COLOR = "#DD8452" # Neutral blue for every other table. _NODE_COLOR = "#4C72B0" # Muted gray for the empty/error message text. _MUTED_TEXT = "#5f6b7a" # Edge colour and label colour. _EDGE_COLOR = "#7a7a7a" _EDGE_LABEL_COLOR = "#34495e" # Constant node size; shared with the edge drawing so arrowheads stop at the # node boundary instead of being hidden under the marker. _NODE_SIZE = 2200 def _text_figure(message: str) -> "matplotlib.figure.Figure": """Return a blank Figure carrying a single centered message. Used both for the "no relations" case and as the never-raise fallback. """ fig, ax = plt.subplots(figsize=(7, 5)) ax.axis("off") ax.text( 0.5, 0.5, message, ha="center", va="center", fontsize=12, color=_MUTED_TEXT, transform=ax.transAxes, ) fig.tight_layout() return fig def _edge_label(edge: dict) -> str: """Build the ``from_col→to_col`` label of an edge, appending cardinality.""" fc = edge.get("from_col") tc = edge.get("to_col") if fc is not None and tc is not None: label = f"{fc}→{tc}" elif fc is not None: label = str(fc) elif tc is not None: label = str(tc) else: label = "" card = edge.get("cardinality") if card: label = f"{label} ({card})" if label else str(card) return label def draw_join_graph_figure(join_graph: dict, title: str = None): """Rasterize a join graph to a matplotlib Figure. Builds a ``networkx.DiGraph`` from the graph's nodes and edges, lays it out with a deterministic ``spring_layout`` (``seed=42``) and draws it on a ``matplotlib.figure.Figure``: tables as labelled circular nodes (hubs in a warm accent, the rest neutral) and FK relations as directed arrows labelled ``from_col→to_col`` (plus cardinality when available). The function never raises. On empty/``None`` input it returns a Figure with a centered "Sin relaciones FK detectadas." message; on any internal failure it returns a Figure with a generic centered message. It never shows the figure nor writes it to disk — the document renderer rasterizes it. Args: join_graph: Dict produced by ``build_join_graph`` with keys ``nodes`` (list of ``{table, out_degree, in_degree, role}``), ``edges`` (list of ``{from_table, from_col, to_table, to_col, cardinality?, inclusion?}``) and ``hubs`` (list of hub table names to highlight). Missing keys, non-dict items, ``None`` or ``{}`` are all tolerated. title: Optional title drawn above the diagram. When omitted, the title defaults to "Join graph". Returns: A ``matplotlib.figure.Figure`` (figsize 7x5) with a single Axes holding the node-link diagram. The caller rasterizes/closes it. """ try: jg = join_graph if isinstance(join_graph, dict) else {} nodes = jg.get("nodes") or [] edges = jg.get("edges") or [] hubs = {h for h in (jg.get("hubs") or []) if h is not None} # Collect node names from the declared nodes and, defensively, from the # edges (so a graph with edges but no explicit nodes still draws). node_names: list = [] seen: set = set() def _register(name) -> None: if name is not None and name not in seen: seen.add(name) node_names.append(name) for n in nodes: if isinstance(n, dict): _register(n.get("table")) for e in edges: if isinstance(e, dict): _register(e.get("from_table")) _register(e.get("to_table")) if not node_names: return _text_figure("Sin relaciones FK detectadas.") graph = nx.DiGraph() for name in node_names: graph.add_node(name) edge_labels: dict = {} for e in edges: if not isinstance(e, dict): continue ft = e.get("from_table") tt = e.get("to_table") if ft is None or tt is None: continue graph.add_edge(ft, tt) edge_labels[(ft, tt)] = _edge_label(e) fig, ax = plt.subplots(figsize=(7, 5)) # Deterministic layout. Fixed positions for trivial graphs so a single # node sits centered instead of at an arbitrary spring-layout point. if graph.number_of_nodes() <= 1: pos = {name: (0.5, 0.5) for name in graph.nodes()} else: pos = nx.spring_layout(graph, seed=42) node_colors = [ _HUB_COLOR if name in hubs else _NODE_COLOR for name in graph.nodes() ] nx.draw_networkx_nodes( graph, pos, ax=ax, node_color=node_colors, node_size=_NODE_SIZE, node_shape="o", edgecolors="white", linewidths=1.5, ) nx.draw_networkx_labels( graph, pos, ax=ax, font_size=9, font_color="white", font_weight="bold", ) nx.draw_networkx_edges( graph, pos, ax=ax, arrows=True, arrowstyle="-|>", arrowsize=18, edge_color=_EDGE_COLOR, width=1.4, connectionstyle="arc3,rad=0.06", node_size=_NODE_SIZE, ) if any(lbl for lbl in edge_labels.values()): nx.draw_networkx_edge_labels( graph, pos, edge_labels=edge_labels, ax=ax, font_size=7, font_color=_EDGE_LABEL_COLOR, bbox={ "boxstyle": "round,pad=0.2", "fc": "white", "ec": "none", "alpha": 0.7, }, ) ax.set_title(title if title else "Join graph", fontsize=13) ax.axis("off") fig.tight_layout() return fig except Exception: # Never raise — the document render is lazy and must not be broken. return _text_figure("No se pudo dibujar el join graph.")