feat: funciones Python para core, cybersecurity, datascience y finance
Agrega funciones Python reutilizables organizadas por dominio: - core: composicion funcional (pipe, compose, map, filter, reduce, etc.) - cybersecurity: analisis de amenazas y puertos - datascience: estadisticas y deteccion de outliers - finance: indicadores tecnicos y analisis financiero
This commit is contained in:
@@ -0,0 +1,123 @@
|
||||
"""Pure datascience utilities — statistics and numerical functions.
|
||||
|
||||
Uses only math stdlib. No external dependencies.
|
||||
"""
|
||||
|
||||
import math
|
||||
|
||||
|
||||
def pearson(xs: list, ys: list) -> float:
|
||||
"""Pearson correlation coefficient between two lists of floats."""
|
||||
n = len(xs)
|
||||
if n != len(ys) or n == 0:
|
||||
return 0.0
|
||||
mean_x = sum(xs) / n
|
||||
mean_y = sum(ys) / n
|
||||
num = sum((x - mean_x) * (y - mean_y) for x, y in zip(xs, ys))
|
||||
den_x = math.sqrt(sum((x - mean_x) ** 2 for x in xs))
|
||||
den_y = math.sqrt(sum((y - mean_y) ** 2 for y in ys))
|
||||
if den_x == 0.0 or den_y == 0.0:
|
||||
return 0.0
|
||||
return num / (den_x * den_y)
|
||||
|
||||
|
||||
def standardize(data: list) -> list:
|
||||
"""Z-score standardization (mean=0, std=1)."""
|
||||
n = len(data)
|
||||
if n == 0:
|
||||
return []
|
||||
mean = sum(data) / n
|
||||
std = math.sqrt(sum((x - mean) ** 2 for x in data) / n)
|
||||
if std == 0.0:
|
||||
return [0.0] * n
|
||||
return [(x - mean) / std for x in data]
|
||||
|
||||
|
||||
def min_max_scale(data: list) -> list:
|
||||
"""Scale values to [0, 1] range."""
|
||||
if not data:
|
||||
return []
|
||||
lo = min(data)
|
||||
hi = max(data)
|
||||
if hi == lo:
|
||||
return [0.0] * len(data)
|
||||
return [(x - lo) / (hi - lo) for x in data]
|
||||
|
||||
|
||||
def clip(data: list, lo: float, hi: float) -> list:
|
||||
"""Clip values to [lo, hi]."""
|
||||
return [max(lo, min(hi, x)) for x in data]
|
||||
|
||||
|
||||
def detect_outliers(data: list, threshold: float) -> list:
|
||||
"""Returns list of bools, True where |z-score| > threshold."""
|
||||
n = len(data)
|
||||
if n == 0:
|
||||
return []
|
||||
mean = sum(data) / n
|
||||
std = math.sqrt(sum((x - mean) ** 2 for x in data) / n)
|
||||
if std == 0.0:
|
||||
return [False] * n
|
||||
return [abs((x - mean) / std) > threshold for x in data]
|
||||
|
||||
|
||||
def impute(data: list) -> list:
|
||||
"""Replace None/NaN with mean of non-null values."""
|
||||
valid = [x for x in data if x is not None and not (isinstance(x, float) and math.isnan(x))]
|
||||
if not valid:
|
||||
return [0.0] * len(data)
|
||||
mean = sum(valid) / len(valid)
|
||||
return [
|
||||
mean if (x is None or (isinstance(x, float) and math.isnan(x))) else x
|
||||
for x in data
|
||||
]
|
||||
|
||||
|
||||
def histogram(data: list, buckets: int) -> list:
|
||||
"""Returns list of counts per bucket."""
|
||||
if not data or buckets <= 0:
|
||||
return []
|
||||
lo = min(data)
|
||||
hi = max(data)
|
||||
if hi == lo:
|
||||
counts = [0] * buckets
|
||||
counts[0] = len(data)
|
||||
return counts
|
||||
width = (hi - lo) / buckets
|
||||
counts = [0] * buckets
|
||||
for x in data:
|
||||
idx = int((x - lo) / width)
|
||||
if idx >= buckets:
|
||||
idx = buckets - 1
|
||||
counts[idx] += 1
|
||||
return counts
|
||||
|
||||
|
||||
def rolling_window(xs: list, size: int) -> list:
|
||||
"""Returns list of sublists (sliding windows of given size)."""
|
||||
if size <= 0 or size > len(xs):
|
||||
return []
|
||||
return [xs[i : i + size] for i in range(len(xs) - size + 1)]
|
||||
|
||||
|
||||
def autocorrelation(data: list, lag: int) -> float:
|
||||
"""Autocorrelation at given lag."""
|
||||
n = len(data)
|
||||
if lag < 0 or lag >= n or n == 0:
|
||||
return 0.0
|
||||
mean = sum(data) / n
|
||||
var = sum((x - mean) ** 2 for x in data) / n
|
||||
if var == 0.0:
|
||||
return 0.0
|
||||
cov = sum((data[i] - mean) * (data[i + lag] - mean) for i in range(n - lag)) / n
|
||||
return cov / var
|
||||
|
||||
|
||||
def linspace(start: float, stop: float, num: int) -> list:
|
||||
"""Generate evenly spaced values from start to stop (inclusive)."""
|
||||
if num <= 0:
|
||||
return []
|
||||
if num == 1:
|
||||
return [start]
|
||||
step = (stop - start) / (num - 1)
|
||||
return [start + i * step for i in range(num)]
|
||||
Reference in New Issue
Block a user