import marimo __generated_with = "0.15.2" app = marimo.App(width="medium") @app.cell def _(): # Cell 1 — Imports y utilidades import marimo as mo return (mo,) @app.cell def _(mo): mo.md(r"""# PyGWalker: EDA rapido e interactivo con pygwalker""") return @app.cell def _(): import os import pandas as pd import numpy as np import altair as alt import pygwalker as pyg from typing import Dict try: from vega_datasets import data as vega_data except Exception: vega_data = None DATASET_URLS: Dict[str, str] = { "Bike Sharing (DC)": "https://kanaries-app.s3.ap-northeast-1.amazonaws.com/public-datasets/bike_sharing_dc.csv", } return DATASET_URLS, pd, pyg, vega_data @app.cell def _(mo): options = [ "cars", "iris", "seattle-weather", "stocks", "Bike Sharing (DC)", ] dataset_selector = mo.ui.dropdown(options=options, value="cars", label="Dataset") sample_toggle = mo.ui.checkbox(label="Muestrear filas", value=False) sample_size = mo.ui.number(value=1000, label="Filas a tomar") dark_mode = mo.ui.radio(options=["media", "light", "dark"], value="media", label="Tema") spec_path_input = mo.ui.text(value="", label="Ruta de spec (opcional)", full_width=True) mo.vstack([ mo.md("## 1) Selecciona un dataset y preferencias"), mo.hstack([dataset_selector, sample_toggle, sample_size]), mo.hstack([dark_mode]), spec_path_input, ]) return ( dark_mode, dataset_selector, sample_size, sample_toggle, spec_path_input, ) @app.cell def _( dark_mode, dataset_selector, sample_size, sample_toggle, spec_path_input, ): selected_name = dataset_selector.value use_sample = sample_toggle.value sample_n = sample_size.value selected_theme = dark_mode.value spec_path_val = spec_path_input.value.strip() return sample_n, selected_name, selected_theme, spec_path_val, use_sample @app.cell def _( DATASET_URLS: "Dict[str, str]", pd, sample_n, selected_name, use_sample, vega_data, ): def _load_dataset(name: str) -> pd.DataFrame: if name == "Bike Sharing (DC)": return pd.read_csv(DATASET_URLS["Bike Sharing (DC)"], parse_dates=["date"]) if name == "cars": if vega_data is not None: return vega_data.cars() return pd.read_json("https://raw.githubusercontent.com/vega/vega-datasets/master/data/cars.json") if name == "iris": if vega_data is not None: return vega_data.iris() return pd.read_csv("https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv") if name == "seattle-weather": if vega_data is not None: return vega_data.seattle_weather() return pd.read_csv( "https://raw.githubusercontent.com/vega/vega-datasets/master/data/seattle-weather.csv", parse_dates=["date"], ) if name == "stocks": if vega_data is not None: return vega_data.stocks() return pd.read_csv( "https://raw.githubusercontent.com/vega/vega-datasets/master/data/stocks.csv", parse_dates=["date"], ) return pd.DataFrame() _df_raw = _load_dataset(selected_name) if use_sample and isinstance(sample_n, (int, float)) and sample_n > 0: _n = int(sample_n) df = _df_raw.sample(n=min(_n, len(_df_raw)), random_state=42).reset_index(drop=True) else: df = _df_raw df return (df,) @app.cell def _(df, mo): explorer = mo.ui.data_explorer(df) explorer return @app.cell def _(df, mo, pyg, selected_theme, spec_path_val): _gw_gid = "gwalker-main" _html = pyg.to_html( df, gid=_gw_gid, dark=selected_theme, spec=spec_path_val if spec_path_val != "" else "", ) mo.Html(_html) return if __name__ == "__main__": app.run()