fn_registry/python/functions/datascience/best_central_tendency.py

"""best_central_tendency — Select the best central tendency measure for a distribution type."""

import math
import numpy as np

try:
    from .geometric_mean import geometric_mean
    from .trimmed_mean import trimmed_mean
except ImportError:
    from geometric_mean import geometric_mean  # type: ignore
    from trimmed_mean import trimmed_mean  # type: ignore


def best_central_tendency(values: list[float], dist_type: str) -> tuple[str, float]:
    """Return the most appropriate central tendency measure given the distribution type.

    Mapping:
      "normal-ish"     -> ("mean",               arithmetic mean)
      "lognormal-ish"  -> ("geometric_mean",      geometric mean of positives)
      "heavy-tail"     -> ("trimmed_mean_5%",     trimmed mean at 5%)
      "right-skewed"   -> ("median",              median)
      "left-skewed"    -> ("median",              median)
      default          -> ("median",              median)

    Args:
        values: List of numeric values.
        dist_type: Distribution type string (from detect_distribution_type).

    Returns:
        Tuple (label: str, value: float). Value is math.nan if values is empty.
    """
    if not values:
        return ("median", math.nan)

    arr = np.array(values, dtype=float)

    if dist_type == "normal-ish":
        return ("mean", float(np.mean(arr)))
    elif dist_type == "lognormal-ish":
        return ("geometric_mean", geometric_mean(values))
    elif dist_type == "heavy-tail":
        return ("trimmed_mean_5%", trimmed_mean(values, trim=0.05))
    else:
        # right-skewed, left-skewed, other, too_few_samples, unknown
        return ("median", float(np.median(arr)))