feat: funciones Python para core, cybersecurity, datascience y finance

Agrega funciones Python reutilizables organizadas por dominio: - core: composicion funcional (pipe, compose, map, filter, reduce, etc.) - cybersecurity: analisis de amenazas y puertos - datascience: estadisticas y deteccion de outliers - finance: indicadores tecnicos y analisis financiero
2026-03-29 00:13:50 +01:00
parent ac71d4b079
commit eaed99e52c
55 changed files with 2237 additions and 0 deletions
@@ -0,0 +1,25 @@
+from .datascience import (
+    pearson,
+    standardize,
+    min_max_scale,
+    clip,
+    detect_outliers,
+    impute,
+    histogram,
+    rolling_window,
+    autocorrelation,
+    linspace,
+)
+
+__all__ = [
+    "pearson",
+    "standardize",
+    "min_max_scale",
+    "clip",
+    "detect_outliers",
+    "impute",
+    "histogram",
+    "rolling_window",
+    "autocorrelation",
+    "linspace",
+]
@@ -0,0 +1,32 @@
+---
+name: autocorrelation
+kind: function
+lang: py
+domain: datascience
+version: "1.0.0"
+purity: pure
+signature: "def autocorrelation(data: list, lag: int) -> float"
+description: "Calcula la autocorrelacion de una serie temporal para un lag dado."
+tags: [statistics, timeseries, correlation, python]
+uses_functions: []
+uses_types: []
+returns: []
+returns_optional: false
+error_type: ""
+imports: []
+tested: false
+tests: []
+test_file_path: ""
+file_path: "python/functions/datascience/datascience.py"
+---
+
+## Ejemplo
+
+```python
+autocorrelation([1, 2, 3, 4, 5, 4, 3, 2, 1], 1)
+# ~0.489
+```
+
+## Notas
+
+Autocorrelacion normalizada por la varianza. Retorna 0.0 si lag es invalido o la varianza es cero.
@@ -0,0 +1,32 @@
+---
+name: clip
+kind: function
+lang: py
+domain: datascience
+version: "1.0.0"
+purity: pure
+signature: "def clip(data: list, lo: float, hi: float) -> list"
+description: "Recorta los valores de la lista al rango [lo, hi]."
+tags: [clipping, bounds, python]
+uses_functions: []
+uses_types: []
+returns: []
+returns_optional: false
+error_type: ""
+imports: []
+tested: false
+tests: []
+test_file_path: ""
+file_path: "python/functions/datascience/datascience.py"
+---
+
+## Ejemplo
+
+```python
+clip([1, 5, 10, -3], 0, 7)
+# [1, 5, 7, 0]
+```
+
+## Notas
+
+Funcion pura sin dependencias. Equivalente a numpy.clip pero sin numpy.
@@ -0,0 +1,123 @@
+"""Pure datascience utilities — statistics and numerical functions.
+
+Uses only math stdlib. No external dependencies.
+"""
+
+import math
+
+
+def pearson(xs: list, ys: list) -> float:
+    """Pearson correlation coefficient between two lists of floats."""
+    n = len(xs)
+    if n != len(ys) or n == 0:
+        return 0.0
+    mean_x = sum(xs) / n
+    mean_y = sum(ys) / n
+    num = sum((x - mean_x) * (y - mean_y) for x, y in zip(xs, ys))
+    den_x = math.sqrt(sum((x - mean_x) ** 2 for x in xs))
+    den_y = math.sqrt(sum((y - mean_y) ** 2 for y in ys))
+    if den_x == 0.0 or den_y == 0.0:
+        return 0.0
+    return num / (den_x * den_y)
+
+
+def standardize(data: list) -> list:
+    """Z-score standardization (mean=0, std=1)."""
+    n = len(data)
+    if n == 0:
+        return []
+    mean = sum(data) / n
+    std = math.sqrt(sum((x - mean) ** 2 for x in data) / n)
+    if std == 0.0:
+        return [0.0] * n
+    return [(x - mean) / std for x in data]
+
+
+def min_max_scale(data: list) -> list:
+    """Scale values to [0, 1] range."""
+    if not data:
+        return []
+    lo = min(data)
+    hi = max(data)
+    if hi == lo:
+        return [0.0] * len(data)
+    return [(x - lo) / (hi - lo) for x in data]
+
+
+def clip(data: list, lo: float, hi: float) -> list:
+    """Clip values to [lo, hi]."""
+    return [max(lo, min(hi, x)) for x in data]
+
+
+def detect_outliers(data: list, threshold: float) -> list:
+    """Returns list of bools, True where |z-score| > threshold."""
+    n = len(data)
+    if n == 0:
+        return []
+    mean = sum(data) / n
+    std = math.sqrt(sum((x - mean) ** 2 for x in data) / n)
+    if std == 0.0:
+        return [False] * n
+    return [abs((x - mean) / std) > threshold for x in data]
+
+
+def impute(data: list) -> list:
+    """Replace None/NaN with mean of non-null values."""
+    valid = [x for x in data if x is not None and not (isinstance(x, float) and math.isnan(x))]
+    if not valid:
+        return [0.0] * len(data)
+    mean = sum(valid) / len(valid)
+    return [
+        mean if (x is None or (isinstance(x, float) and math.isnan(x))) else x
+        for x in data
+    ]
+
+
+def histogram(data: list, buckets: int) -> list:
+    """Returns list of counts per bucket."""
+    if not data or buckets <= 0:
+        return []
+    lo = min(data)
+    hi = max(data)
+    if hi == lo:
+        counts = [0] * buckets
+        counts[0] = len(data)
+        return counts
+    width = (hi - lo) / buckets
+    counts = [0] * buckets
+    for x in data:
+        idx = int((x - lo) / width)
+        if idx >= buckets:
+            idx = buckets - 1
+        counts[idx] += 1
+    return counts
+
+
+def rolling_window(xs: list, size: int) -> list:
+    """Returns list of sublists (sliding windows of given size)."""
+    if size <= 0 or size > len(xs):
+        return []
+    return [xs[i : i + size] for i in range(len(xs) - size + 1)]
+
+
+def autocorrelation(data: list, lag: int) -> float:
+    """Autocorrelation at given lag."""
+    n = len(data)
+    if lag < 0 or lag >= n or n == 0:
+        return 0.0
+    mean = sum(data) / n
+    var = sum((x - mean) ** 2 for x in data) / n
+    if var == 0.0:
+        return 0.0
+    cov = sum((data[i] - mean) * (data[i + lag] - mean) for i in range(n - lag)) / n
+    return cov / var
+
+
+def linspace(start: float, stop: float, num: int) -> list:
+    """Generate evenly spaced values from start to stop (inclusive)."""
+    if num <= 0:
+        return []
+    if num == 1:
+        return [start]
+    step = (stop - start) / (num - 1)
+    return [start + i * step for i in range(num)]
@@ -0,0 +1,32 @@
+---
+name: detect_outliers
+kind: function
+lang: py
+domain: datascience
+version: "1.0.0"
+purity: pure
+signature: "def detect_outliers(data: list, threshold: float) -> list"
+description: "Detecta outliers por z-score. Retorna lista de bools, True donde |z-score| > threshold."
+tags: [statistics, outliers, python]
+uses_functions: []
+uses_types: []
+returns: []
+returns_optional: false
+error_type: ""
+imports: [math]
+tested: false
+tests: []
+test_file_path: ""
+file_path: "python/functions/datascience/datascience.py"
+---
+
+## Ejemplo
+
+```python
+detect_outliers([1, 2, 3, 100, 2, 3], 2.0)
+# [False, False, False, True, False, False]
+```
+
+## Notas
+
+Usa z-score poblacional. Threshold tipico: 2.0 o 3.0. Si la desviacion es cero, no hay outliers.
@@ -0,0 +1,32 @@
+---
+name: histogram
+kind: function
+lang: py
+domain: datascience
+version: "1.0.0"
+purity: pure
+signature: "def histogram(data: list, buckets: int) -> list"
+description: "Calcula histograma con N buckets. Retorna lista de conteos por bucket."
+tags: [statistics, histogram, python]
+uses_functions: []
+uses_types: []
+returns: []
+returns_optional: false
+error_type: ""
+imports: []
+tested: false
+tests: []
+test_file_path: ""
+file_path: "python/functions/datascience/datascience.py"
+---
+
+## Ejemplo
+
+```python
+histogram([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 5)
+# [2, 2, 2, 2, 2]
+```
+
+## Notas
+
+Los buckets cubren el rango [min, max] uniformemente. El ultimo bucket incluye el valor maximo. Si todos los valores son iguales, todos caen en el primer bucket.
@@ -0,0 +1,32 @@
+---
+name: impute
+kind: function
+lang: py
+domain: datascience
+version: "1.0.0"
+purity: pure
+signature: "def impute(data: list) -> list"
+description: "Reemplaza None y NaN con la media de los valores validos."
+tags: [imputation, missing, python]
+uses_functions: []
+uses_types: []
+returns: []
+returns_optional: false
+error_type: ""
+imports: [math]
+tested: false
+tests: []
+test_file_path: ""
+file_path: "python/functions/datascience/datascience.py"
+---
+
+## Ejemplo
+
+```python
+impute([1.0, None, 3.0, float('nan'), 5.0])
+# [1.0, 3.0, 3.0, 3.0, 5.0]
+```
+
+## Notas
+
+Detecta tanto None como float('nan'). Si no hay valores validos, rellena con 0.0.
@@ -0,0 +1,32 @@
+---
+name: linspace
+kind: function
+lang: py
+domain: datascience
+version: "1.0.0"
+purity: pure
+signature: "def linspace(start: float, stop: float, num: int) -> list"
+description: "Genera una lista de valores equiespaciados entre start y stop (inclusivos)."
+tags: [numeric, range, python]
+uses_functions: []
+uses_types: []
+returns: []
+returns_optional: false
+error_type: ""
+imports: []
+tested: false
+tests: []
+test_file_path: ""
+file_path: "python/functions/datascience/datascience.py"
+---
+
+## Ejemplo
+
+```python
+linspace(0, 1, 5)
+# [0.0, 0.25, 0.5, 0.75, 1.0]
+```
+
+## Notas
+
+Equivalente a numpy.linspace pero sin numpy. Si num=1, retorna [start]. Si num<=0, retorna lista vacia.
@@ -0,0 +1,32 @@
+---
+name: min_max_scale
+kind: function
+lang: py
+domain: datascience
+version: "1.0.0"
+purity: pure
+signature: "def min_max_scale(data: list) -> list"
+description: "Escala los valores al rango [0, 1] usando min-max normalization."
+tags: [normalization, scaling, python]
+uses_functions: []
+uses_types: []
+returns: []
+returns_optional: false
+error_type: ""
+imports: []
+tested: false
+tests: []
+test_file_path: ""
+file_path: "python/functions/datascience/datascience.py"
+---
+
+## Ejemplo
+
+```python
+min_max_scale([2, 4, 6, 8, 10])
+# [0.0, 0.25, 0.5, 0.75, 1.0]
+```
+
+## Notas
+
+Si todos los valores son iguales, retorna lista de ceros. No requiere imports externos.
@@ -0,0 +1,32 @@
+---
+name: pearson
+kind: function
+lang: py
+domain: datascience
+version: "1.0.0"
+purity: pure
+signature: "def pearson(xs: list, ys: list) -> float"
+description: "Calcula el coeficiente de correlacion de Pearson entre dos listas de floats."
+tags: [statistics, correlation, python]
+uses_functions: []
+uses_types: []
+returns: []
+returns_optional: false
+error_type: ""
+imports: [math]
+tested: false
+tests: []
+test_file_path: ""
+file_path: "python/functions/datascience/datascience.py"
+---
+
+## Ejemplo
+
+```python
+r = pearson([1, 2, 3], [2, 4, 6])
+# r = 1.0
+```
+
+## Notas
+
+Usa solo math stdlib. No requiere numpy. Retorna 0.0 si las listas tienen longitud diferente, estan vacias, o la desviacion es cero.
@@ -0,0 +1,32 @@
+---
+name: rolling_window
+kind: function
+lang: py
+domain: datascience
+version: "1.0.0"
+purity: pure
+signature: "def rolling_window(xs: list, size: int) -> list"
+description: "Genera ventanas deslizantes de tamanio fijo sobre una lista."
+tags: [windowing, timeseries, python]
+uses_functions: []
+uses_types: []
+returns: []
+returns_optional: false
+error_type: ""
+imports: []
+tested: false
+tests: []
+test_file_path: ""
+file_path: "python/functions/datascience/datascience.py"
+---
+
+## Ejemplo
+
+```python
+rolling_window([1, 2, 3, 4, 5], 3)
+# [[1, 2, 3], [2, 3, 4], [3, 4, 5]]
+```
+
+## Notas
+
+Retorna lista vacia si size <= 0 o size > len(xs). Util para calcular medias moviles u otras metricas sobre ventanas.
@@ -0,0 +1,32 @@
+---
+name: standardize
+kind: function
+lang: py
+domain: datascience
+version: "1.0.0"
+purity: pure
+signature: "def standardize(data: list) -> list"
+description: "Estandarizacion Z-score: transforma los datos a media=0 y desviacion=1."
+tags: [statistics, normalization, python]
+uses_functions: []
+uses_types: []
+returns: []
+returns_optional: false
+error_type: ""
+imports: [math]
+tested: false
+tests: []
+test_file_path: ""
+file_path: "python/functions/datascience/datascience.py"
+---
+
+## Ejemplo
+
+```python
+standardize([10, 20, 30])
+# [-1.2247..., 0.0, 1.2247...]
+```
+
+## Notas
+
+Si la desviacion estandar es cero, retorna lista de ceros. Usa desviacion poblacional (N, no N-1).