Files
fn_registry/python/functions/datascience/__init__.py
T
egutierrez 9c1b7dd0f3 feat(papers): render_paper_pdf (Markdown IMRaD → PDF) + agente paper-reviewer
Subsistema papers/: pieza de entrega + control de calidad.

- render_paper_pdf_py_datascience (Python, impure, dominio datascience, grupo
  `papers`): convierte papers/<slug>/paper.md (frontmatter YAML + cuerpo IMRaD)
  en papers/<slug>/out/paper.pdf. Reutiliza el motor de paginación de flujo del
  paquete automatic_eda (matplotlib PdfPages, el mismo PDF móvil A5 de los
  informes EDA) — no reimplementa paginación ni toca matplotlib, y no añade
  dependencias. Cada sección IMRaD (# H1) → un Chapter en página nueva; portada
  desde el frontmatter (title/authors/date europea/abstract); detecta las
  imágenes Markdown ![alt](src) que el motor no entiende y las parte en bloques
  Image resueltos contra base_dir y base_dir/figures/. dict-no-throw estricto.
  5 tests verdes (golden + edges: sin frontmatter, path inexistente, figura
  inexistente, ruta directa al .md).

- .claude/agents/paper-reviewer: revisor académico adversarial read-only (gate
  anti paper-mill). Puntúa novedad/rigor/reproducibilidad/validez (0-5), intenta
  refutar cada claim contra la evidencia citada, detecta HARKing contra el
  preregistration.md, exige limitaciones declaradas y claims ≤ evidencia, y
  emite veredicto estructurado JSON (accept|major_revision|reject) con default
  conservador. Tools: Read, Grep, Glob, Bash (sin Edit/Write: solo juzga).

Diseño completo: reports/0001-2026-06-30-papers-system-design.md (agente C).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-30 20:39:59 +02:00

153 lines
5.4 KiB
Python

from .datascience import (
pearson,
standardize,
min_max_scale,
clip,
detect_outliers,
impute,
histogram,
rolling_window,
autocorrelation,
linspace,
)
from .scrape_amazon_bestsellers import scrape_amazon_bestsellers
from .scrape_google_trends import scrape_google_trends
from .scrape_competitor_prices import scrape_competitor_prices
from .scrape_tiktok_creative import scrape_tiktok_creative
from .scrape_aliexpress_trending import scrape_aliexpress_trending
from .fetch_reddit_search import fetch_reddit_search
from .fetch_hackernews_search import fetch_hackernews_search
from .score_demand_signal import score_demand_signal
from .pull_gsc_search_analytics import pull_gsc_search_analytics
from .summarize_table_duckdb import summarize_table_duckdb
from .summarize_table_pg import summarize_table_pg
from .describe_numeric import describe_numeric
from .summarize_categorical import summarize_categorical
from .infer_semantic_type import infer_semantic_type
from .column_quality_score import column_quality_score
from .select_groupby_keys import select_groupby_keys
from .render_eda_markdown import render_eda_markdown
from .detect_distribution_type import detect_distribution_type
from .spearman_corr import spearman_corr
from .cramers_v import cramers_v
from .theils_u import theils_u
from .correlation_ratio import correlation_ratio
from .mutual_info_columns import mutual_info_columns
from .infer_fk_containment_duckdb import infer_fk_containment_duckdb
from .detect_declared_keys_duckdb import detect_declared_keys_duckdb
from .build_join_graph import build_join_graph
from .association_matrix import association_matrix
from .correlation_matrix_duckdb import correlation_matrix_duckdb
from .pivot_table_duckdb import pivot_table_duckdb
from .groupby_stats_duckdb import groupby_stats_duckdb
from .pca_explained import pca_explained
from .kmeans_segments import kmeans_segments
from .isolation_forest_outliers import isolation_forest_outliers
from .normality_tests import normality_tests
from .trend_slope import trend_slope
from .run_eda_models import run_eda_models
from .project_clusters_2d import project_clusters_2d
from .describe_clusters_llm import describe_clusters_llm
from .detect_latlon_columns import detect_latlon_columns
from .analyze_geo_extent import analyze_geo_extent
from .build_geo_scatter import build_geo_scatter
from .eda_llm_insights import eda_llm_insights
from .build_eda_notebook import build_eda_notebook
from .decode_qr_image import decode_qr_image
from .adf_kpss_stationarity import adf_kpss_stationarity
from .acf_pacf import acf_pacf
from .stl_decompose import stl_decompose
from .to_returns import to_returns
from .fdr_correction import fdr_correction
from .suggest_reexpression import suggest_reexpression
from .exploratory_caveats import exploratory_caveats
from .render_eda_pdf import render_eda_pdf, render_eda_pdf_relational
from .render_automatic_eda_pdf import render_automatic_eda_pdf
from .render_automatic_eda_pptx import render_automatic_eda_pptx
from .render_automatic_eda_markdown import render_automatic_eda_markdown
from .detect_time_column import detect_time_column
from .extract_timeseries_raw import extract_timeseries_raw
from .build_eda_render_ctx import build_eda_render_ctx
from .profile_datetime import profile_datetime
from .resample_timeseries import resample_timeseries
from .add_pdf_internal_links import add_pdf_internal_links
from .suggest_intratable_fk_candidates import suggest_intratable_fk_candidates
from .render_paper_pdf import render_paper_pdf
__all__ = [
"render_paper_pdf",
"suggest_intratable_fk_candidates",
"detect_time_column",
"extract_timeseries_raw",
"build_eda_render_ctx",
"add_pdf_internal_links",
"profile_datetime",
"resample_timeseries",
"render_automatic_eda_pdf",
"render_automatic_eda_pptx",
"render_automatic_eda_markdown",
"decode_qr_image",
"adf_kpss_stationarity",
"acf_pacf",
"stl_decompose",
"to_returns",
"fdr_correction",
"suggest_reexpression",
"exploratory_caveats",
"render_eda_pdf",
"render_eda_pdf_relational",
"summarize_table_duckdb",
"summarize_table_pg",
"spearman_corr",
"cramers_v",
"theils_u",
"correlation_ratio",
"mutual_info_columns",
"infer_fk_containment_duckdb",
"detect_declared_keys_duckdb",
"build_join_graph",
"association_matrix",
"correlation_matrix_duckdb",
"pivot_table_duckdb",
"groupby_stats_duckdb",
"pca_explained",
"kmeans_segments",
"isolation_forest_outliers",
"normality_tests",
"trend_slope",
"run_eda_models",
"project_clusters_2d",
"describe_clusters_llm",
"detect_latlon_columns",
"analyze_geo_extent",
"build_geo_scatter",
"eda_llm_insights",
"build_eda_notebook",
"describe_numeric",
"summarize_categorical",
"infer_semantic_type",
"column_quality_score",
"select_groupby_keys",
"render_eda_markdown",
"detect_distribution_type",
"pull_gsc_search_analytics",
"scrape_amazon_bestsellers",
"scrape_google_trends",
"scrape_competitor_prices",
"scrape_tiktok_creative",
"scrape_aliexpress_trending",
"fetch_reddit_search",
"fetch_hackernews_search",
"score_demand_signal",
"pearson",
"standardize",
"min_max_scale",
"clip",
"detect_outliers",
"impute",
"histogram",
"rolling_window",
"autocorrelation",
"linspace",
]