f3d427d9e4
Conecta el motor AutomaticEDA con los datos crudos para que los 4 capítulos
dependientes de ctx (modelos, timeseries, geospatial, agregacion) salgan
POBLADOS en vez de degradar a una nota.
- build_eda_render_ctx (datascience, impure, dict-no-throw): dado db_path+table
y el TableProfile agregado, construye el ctx con los datos crudos que el
perfil no incluye: raw_numeric {col:[float|None]} alineado por fila (modelos /
geospatial), timeseries_raw {time_col,t,series} vía extract_timeseries_raw,
geo_points {lats,lons} desde el par lat/lon detectado, y db_path/table para el
groupby/pivot push-down de agregacion. Muestrea con LIMIT (no trae la tabla
entera a RAM). Compone detect_time_column / extract_timeseries_raw /
detect_latlon_columns / duckdb_query_readonly (imports lazy para evitar ciclo).
- render_automatic_eda (pipeline): one-shot perfil -> ctx -> PDF + PPTX con los
11 capítulos poblados; devuelve rutas + manifest de versiones por capítulo.
- profile_table: flag aditivo emit_automatic=True emite el AutomaticEDA PDF+PPTX
además del flujo legacy (emit_pdf/render_eda_pdf intacto). Nuevas claves de
retorno aeda_pdf_path / aeda_pptx_path / aeda_manifest_path.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
143 lines
4.9 KiB
Python
143 lines
4.9 KiB
Python
from .datascience import (
|
|
pearson,
|
|
standardize,
|
|
min_max_scale,
|
|
clip,
|
|
detect_outliers,
|
|
impute,
|
|
histogram,
|
|
rolling_window,
|
|
autocorrelation,
|
|
linspace,
|
|
)
|
|
from .scrape_amazon_bestsellers import scrape_amazon_bestsellers
|
|
from .scrape_google_trends import scrape_google_trends
|
|
from .scrape_competitor_prices import scrape_competitor_prices
|
|
from .scrape_tiktok_creative import scrape_tiktok_creative
|
|
from .scrape_aliexpress_trending import scrape_aliexpress_trending
|
|
from .fetch_reddit_search import fetch_reddit_search
|
|
from .fetch_hackernews_search import fetch_hackernews_search
|
|
from .score_demand_signal import score_demand_signal
|
|
from .pull_gsc_search_analytics import pull_gsc_search_analytics
|
|
from .summarize_table_duckdb import summarize_table_duckdb
|
|
from .summarize_table_pg import summarize_table_pg
|
|
from .describe_numeric import describe_numeric
|
|
from .summarize_categorical import summarize_categorical
|
|
from .infer_semantic_type import infer_semantic_type
|
|
from .column_quality_score import column_quality_score
|
|
from .select_groupby_keys import select_groupby_keys
|
|
from .render_eda_markdown import render_eda_markdown
|
|
from .detect_distribution_type import detect_distribution_type
|
|
from .spearman_corr import spearman_corr
|
|
from .cramers_v import cramers_v
|
|
from .theils_u import theils_u
|
|
from .correlation_ratio import correlation_ratio
|
|
from .mutual_info_columns import mutual_info_columns
|
|
from .infer_fk_containment_duckdb import infer_fk_containment_duckdb
|
|
from .build_join_graph import build_join_graph
|
|
from .association_matrix import association_matrix
|
|
from .correlation_matrix_duckdb import correlation_matrix_duckdb
|
|
from .pivot_table_duckdb import pivot_table_duckdb
|
|
from .groupby_stats_duckdb import groupby_stats_duckdb
|
|
from .pca_explained import pca_explained
|
|
from .kmeans_segments import kmeans_segments
|
|
from .isolation_forest_outliers import isolation_forest_outliers
|
|
from .normality_tests import normality_tests
|
|
from .trend_slope import trend_slope
|
|
from .run_eda_models import run_eda_models
|
|
from .project_clusters_2d import project_clusters_2d
|
|
from .describe_clusters_llm import describe_clusters_llm
|
|
from .detect_latlon_columns import detect_latlon_columns
|
|
from .analyze_geo_extent import analyze_geo_extent
|
|
from .build_geo_scatter import build_geo_scatter
|
|
from .eda_llm_insights import eda_llm_insights
|
|
from .build_eda_notebook import build_eda_notebook
|
|
from .decode_qr_image import decode_qr_image
|
|
from .adf_kpss_stationarity import adf_kpss_stationarity
|
|
from .acf_pacf import acf_pacf
|
|
from .stl_decompose import stl_decompose
|
|
from .to_returns import to_returns
|
|
from .fdr_correction import fdr_correction
|
|
from .suggest_reexpression import suggest_reexpression
|
|
from .exploratory_caveats import exploratory_caveats
|
|
from .render_eda_pdf import render_eda_pdf, render_eda_pdf_relational
|
|
from .render_automatic_eda_pdf import render_automatic_eda_pdf
|
|
from .render_automatic_eda_pptx import render_automatic_eda_pptx
|
|
from .detect_time_column import detect_time_column
|
|
from .extract_timeseries_raw import extract_timeseries_raw
|
|
from .build_eda_render_ctx import build_eda_render_ctx
|
|
from .profile_datetime import profile_datetime
|
|
from .resample_timeseries import resample_timeseries
|
|
|
|
__all__ = [
|
|
"detect_time_column",
|
|
"extract_timeseries_raw",
|
|
"build_eda_render_ctx",
|
|
"profile_datetime",
|
|
"resample_timeseries",
|
|
"render_automatic_eda_pdf",
|
|
"render_automatic_eda_pptx",
|
|
"decode_qr_image",
|
|
"adf_kpss_stationarity",
|
|
"acf_pacf",
|
|
"stl_decompose",
|
|
"to_returns",
|
|
"fdr_correction",
|
|
"suggest_reexpression",
|
|
"exploratory_caveats",
|
|
"render_eda_pdf",
|
|
"render_eda_pdf_relational",
|
|
"summarize_table_duckdb",
|
|
"summarize_table_pg",
|
|
"spearman_corr",
|
|
"cramers_v",
|
|
"theils_u",
|
|
"correlation_ratio",
|
|
"mutual_info_columns",
|
|
"infer_fk_containment_duckdb",
|
|
"build_join_graph",
|
|
"association_matrix",
|
|
"correlation_matrix_duckdb",
|
|
"pivot_table_duckdb",
|
|
"groupby_stats_duckdb",
|
|
"pca_explained",
|
|
"kmeans_segments",
|
|
"isolation_forest_outliers",
|
|
"normality_tests",
|
|
"trend_slope",
|
|
"run_eda_models",
|
|
"project_clusters_2d",
|
|
"describe_clusters_llm",
|
|
"detect_latlon_columns",
|
|
"analyze_geo_extent",
|
|
"build_geo_scatter",
|
|
"eda_llm_insights",
|
|
"build_eda_notebook",
|
|
"describe_numeric",
|
|
"summarize_categorical",
|
|
"infer_semantic_type",
|
|
"column_quality_score",
|
|
"select_groupby_keys",
|
|
"render_eda_markdown",
|
|
"detect_distribution_type",
|
|
"pull_gsc_search_analytics",
|
|
"scrape_amazon_bestsellers",
|
|
"scrape_google_trends",
|
|
"scrape_competitor_prices",
|
|
"scrape_tiktok_creative",
|
|
"scrape_aliexpress_trending",
|
|
"fetch_reddit_search",
|
|
"fetch_hackernews_search",
|
|
"score_demand_signal",
|
|
"pearson",
|
|
"standardize",
|
|
"min_max_scale",
|
|
"clip",
|
|
"detect_outliers",
|
|
"impute",
|
|
"histogram",
|
|
"rolling_window",
|
|
"autocorrelation",
|
|
"linspace",
|
|
]
|