9886e2905d
draw_join_graph_figure (datascience, grupo eda): dibuja el join graph de la base como una matplotlib Figure real (networkx spring_layout seed=42, nodos = tablas, hubs destacados, flechas dirigidas con etiqueta from_col->to_col + cardinalidad). Nunca lanza: devuelve una Figure de error si algo falla; entrada vacia -> Figure 'Sin relaciones FK detectadas'. render_automatic_eda_folder ahora inserta esa Figure (bloque Figure lazy via make) en el capitulo de relaciones cuando hay edges, ademas del texto Mermaid (util para el MD/LLM). Antes solo se volcaba el texto del grafo; ahora el PDF/PPTX muestran el diagrama dibujado. Tests nuevos: la Figure real se construye con edges y se omite sin edges. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
215 lines
7.4 KiB
Python
215 lines
7.4 KiB
Python
"""Impure EDA helper: rasterize a join graph to a matplotlib Figure (`eda` group).
|
|
|
|
Takes the join graph produced by ``build_join_graph`` (inter-table FK relations)
|
|
and draws it as a directed node-link diagram on a ready-to-rasterize
|
|
``matplotlib.figure.Figure``. Hub tables (the ones with the highest out-degree,
|
|
candidate fact tables of a star schema) are highlighted in a warm accent colour;
|
|
the rest use a neutral colour. Directed edges carry a ``from_col→to_col`` label
|
|
(plus the cardinality when present).
|
|
|
|
This is the *drawn* counterpart of the Mermaid string that ``build_join_graph``
|
|
also emits: the relations chapter of an AutomaticEDA report can show a real
|
|
picture instead of only the pasteable Mermaid block.
|
|
|
|
Impure because it touches matplotlib's rendering machinery. It pins the headless
|
|
Agg backend and a deterministic ``spring_layout`` seed so the output is
|
|
reproducible. It never raises: on any internal failure (or empty input) it
|
|
returns a ``Figure`` carrying a centered message, so the lazy render of the
|
|
document is never broken.
|
|
"""
|
|
|
|
import matplotlib
|
|
|
|
matplotlib.use("Agg")
|
|
|
|
import matplotlib.pyplot as plt # noqa: E402
|
|
import networkx as nx # noqa: E402
|
|
|
|
# Warm accent reserved for hub tables (candidate fact tables / star-schema cores).
|
|
_HUB_COLOR = "#DD8452"
|
|
# Neutral blue for every other table.
|
|
_NODE_COLOR = "#4C72B0"
|
|
# Muted gray for the empty/error message text.
|
|
_MUTED_TEXT = "#5f6b7a"
|
|
# Edge colour and label colour.
|
|
_EDGE_COLOR = "#7a7a7a"
|
|
_EDGE_LABEL_COLOR = "#34495e"
|
|
# Constant node size; shared with the edge drawing so arrowheads stop at the
|
|
# node boundary instead of being hidden under the marker.
|
|
_NODE_SIZE = 2200
|
|
|
|
|
|
def _text_figure(message: str) -> "matplotlib.figure.Figure":
|
|
"""Return a blank Figure carrying a single centered message.
|
|
|
|
Used both for the "no relations" case and as the never-raise fallback.
|
|
"""
|
|
fig, ax = plt.subplots(figsize=(7, 5))
|
|
ax.axis("off")
|
|
ax.text(
|
|
0.5,
|
|
0.5,
|
|
message,
|
|
ha="center",
|
|
va="center",
|
|
fontsize=12,
|
|
color=_MUTED_TEXT,
|
|
transform=ax.transAxes,
|
|
)
|
|
fig.tight_layout()
|
|
return fig
|
|
|
|
|
|
def _edge_label(edge: dict) -> str:
|
|
"""Build the ``from_col→to_col`` label of an edge, appending cardinality."""
|
|
fc = edge.get("from_col")
|
|
tc = edge.get("to_col")
|
|
if fc is not None and tc is not None:
|
|
label = f"{fc}→{tc}"
|
|
elif fc is not None:
|
|
label = str(fc)
|
|
elif tc is not None:
|
|
label = str(tc)
|
|
else:
|
|
label = ""
|
|
card = edge.get("cardinality")
|
|
if card:
|
|
label = f"{label} ({card})" if label else str(card)
|
|
return label
|
|
|
|
|
|
def draw_join_graph_figure(join_graph: dict, title: str = None):
|
|
"""Rasterize a join graph to a matplotlib Figure.
|
|
|
|
Builds a ``networkx.DiGraph`` from the graph's nodes and edges, lays it out
|
|
with a deterministic ``spring_layout`` (``seed=42``) and draws it on a
|
|
``matplotlib.figure.Figure``: tables as labelled circular nodes (hubs in a
|
|
warm accent, the rest neutral) and FK relations as directed arrows labelled
|
|
``from_col→to_col`` (plus cardinality when available).
|
|
|
|
The function never raises. On empty/``None`` input it returns a Figure with
|
|
a centered "Sin relaciones FK detectadas." message; on any internal failure
|
|
it returns a Figure with a generic centered message. It never shows the
|
|
figure nor writes it to disk — the document renderer rasterizes it.
|
|
|
|
Args:
|
|
join_graph: Dict produced by ``build_join_graph`` with keys ``nodes``
|
|
(list of ``{table, out_degree, in_degree, role}``), ``edges`` (list
|
|
of ``{from_table, from_col, to_table, to_col, cardinality?,
|
|
inclusion?}``) and ``hubs`` (list of hub table names to highlight).
|
|
Missing keys, non-dict items, ``None`` or ``{}`` are all tolerated.
|
|
title: Optional title drawn above the diagram. When omitted, the title
|
|
defaults to "Join graph".
|
|
|
|
Returns:
|
|
A ``matplotlib.figure.Figure`` (figsize 7x5) with a single Axes holding
|
|
the node-link diagram. The caller rasterizes/closes it.
|
|
"""
|
|
try:
|
|
jg = join_graph if isinstance(join_graph, dict) else {}
|
|
nodes = jg.get("nodes") or []
|
|
edges = jg.get("edges") or []
|
|
hubs = {h for h in (jg.get("hubs") or []) if h is not None}
|
|
|
|
# Collect node names from the declared nodes and, defensively, from the
|
|
# edges (so a graph with edges but no explicit nodes still draws).
|
|
node_names: list = []
|
|
seen: set = set()
|
|
|
|
def _register(name) -> None:
|
|
if name is not None and name not in seen:
|
|
seen.add(name)
|
|
node_names.append(name)
|
|
|
|
for n in nodes:
|
|
if isinstance(n, dict):
|
|
_register(n.get("table"))
|
|
for e in edges:
|
|
if isinstance(e, dict):
|
|
_register(e.get("from_table"))
|
|
_register(e.get("to_table"))
|
|
|
|
if not node_names:
|
|
return _text_figure("Sin relaciones FK detectadas.")
|
|
|
|
graph = nx.DiGraph()
|
|
for name in node_names:
|
|
graph.add_node(name)
|
|
|
|
edge_labels: dict = {}
|
|
for e in edges:
|
|
if not isinstance(e, dict):
|
|
continue
|
|
ft = e.get("from_table")
|
|
tt = e.get("to_table")
|
|
if ft is None or tt is None:
|
|
continue
|
|
graph.add_edge(ft, tt)
|
|
edge_labels[(ft, tt)] = _edge_label(e)
|
|
|
|
fig, ax = plt.subplots(figsize=(7, 5))
|
|
|
|
# Deterministic layout. Fixed positions for trivial graphs so a single
|
|
# node sits centered instead of at an arbitrary spring-layout point.
|
|
if graph.number_of_nodes() <= 1:
|
|
pos = {name: (0.5, 0.5) for name in graph.nodes()}
|
|
else:
|
|
pos = nx.spring_layout(graph, seed=42)
|
|
|
|
node_colors = [
|
|
_HUB_COLOR if name in hubs else _NODE_COLOR for name in graph.nodes()
|
|
]
|
|
nx.draw_networkx_nodes(
|
|
graph,
|
|
pos,
|
|
ax=ax,
|
|
node_color=node_colors,
|
|
node_size=_NODE_SIZE,
|
|
node_shape="o",
|
|
edgecolors="white",
|
|
linewidths=1.5,
|
|
)
|
|
nx.draw_networkx_labels(
|
|
graph,
|
|
pos,
|
|
ax=ax,
|
|
font_size=9,
|
|
font_color="white",
|
|
font_weight="bold",
|
|
)
|
|
nx.draw_networkx_edges(
|
|
graph,
|
|
pos,
|
|
ax=ax,
|
|
arrows=True,
|
|
arrowstyle="-|>",
|
|
arrowsize=18,
|
|
edge_color=_EDGE_COLOR,
|
|
width=1.4,
|
|
connectionstyle="arc3,rad=0.06",
|
|
node_size=_NODE_SIZE,
|
|
)
|
|
if any(lbl for lbl in edge_labels.values()):
|
|
nx.draw_networkx_edge_labels(
|
|
graph,
|
|
pos,
|
|
edge_labels=edge_labels,
|
|
ax=ax,
|
|
font_size=7,
|
|
font_color=_EDGE_LABEL_COLOR,
|
|
bbox={
|
|
"boxstyle": "round,pad=0.2",
|
|
"fc": "white",
|
|
"ec": "none",
|
|
"alpha": 0.7,
|
|
},
|
|
)
|
|
|
|
ax.set_title(title if title else "Join graph", fontsize=13)
|
|
ax.axis("off")
|
|
fig.tight_layout()
|
|
return fig
|
|
except Exception:
|
|
# Never raise — the document render is lazy and must not be broken.
|
|
return _text_figure("No se pudo dibujar el join graph.")
|