diff --git a/python/functions/core/__init__.py b/python/functions/core/__init__.py new file mode 100644 index 00000000..38a60350 --- /dev/null +++ b/python/functions/core/__init__.py @@ -0,0 +1,43 @@ +"""Core functional programming utilities.""" + +from .core import ( + all_of, + any_of, + chunk, + compose, + drop, + filter_list, + find, + find_index, + flat_map, + flatten, + group_by, + map_list, + partition, + pipe, + reduce_list, + take, + unique, + zip_with, +) + +__all__ = [ + "all_of", + "any_of", + "chunk", + "compose", + "drop", + "filter_list", + "find", + "find_index", + "flat_map", + "flatten", + "group_by", + "map_list", + "partition", + "pipe", + "reduce_list", + "take", + "unique", + "zip_with", +] diff --git a/python/functions/core/all_of.md b/python/functions/core/all_of.md new file mode 100644 index 00000000..a1958a50 --- /dev/null +++ b/python/functions/core/all_of.md @@ -0,0 +1,32 @@ +--- +name: all_of +kind: function +lang: py +domain: core +version: "1.0.0" +purity: pure +signature: "def all_of(xs: list, pred: callable) -> bool" +description: "Retorna True si todos los elementos de la lista cumplen el predicado." +tags: [list, functional, predicate, all, python] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [] +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/core/core.py" +--- + +## Ejemplo + +```python +result = all_of([2, 4, 6], lambda n: n % 2 == 0) +# True +``` + +## Notas + +Funcion pura. Retorna True para lista vacia (vacuamente verdadero). Cortocircuita al primer False. diff --git a/python/functions/core/any_of.md b/python/functions/core/any_of.md new file mode 100644 index 00000000..2b82a1b7 --- /dev/null +++ b/python/functions/core/any_of.md @@ -0,0 +1,32 @@ +--- +name: any_of +kind: function +lang: py +domain: core +version: "1.0.0" +purity: pure +signature: "def any_of(xs: list, pred: callable) -> bool" +description: "Retorna True si al menos un elemento de la lista cumple el predicado." +tags: [list, functional, predicate, any, python] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [] +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/core/core.py" +--- + +## Ejemplo + +```python +result = any_of([1, 3, 5, 4], lambda n: n % 2 == 0) +# True +``` + +## Notas + +Funcion pura. Retorna False para lista vacia. Cortocircuita al primer True. diff --git a/python/functions/core/chunk.md b/python/functions/core/chunk.md new file mode 100644 index 00000000..f6429277 --- /dev/null +++ b/python/functions/core/chunk.md @@ -0,0 +1,32 @@ +--- +name: chunk +kind: function +lang: py +domain: core +version: "1.0.0" +purity: pure +signature: "def chunk(xs: list, size: int) -> list" +description: "Divide una lista en sublistas de tamanio fijo. El ultimo chunk puede ser menor." +tags: [list, functional, chunk, partition, python] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [] +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/core/core.py" +--- + +## Ejemplo + +```python +result = chunk([1, 2, 3, 4, 5], 2) +# [[1, 2], [3, 4], [5]] +``` + +## Notas + +Funcion pura. Si size <= 0 retorna lista vacia. diff --git a/python/functions/core/compose.md b/python/functions/core/compose.md new file mode 100644 index 00000000..59c17688 --- /dev/null +++ b/python/functions/core/compose.md @@ -0,0 +1,33 @@ +--- +name: compose +kind: function +lang: py +domain: core +version: "1.0.0" +purity: pure +signature: "def compose(*fns) -> callable" +description: "Compone funciones de derecha a izquierda. compose(f, g)(x) == f(g(x))." +tags: [functional, compose, composition, python] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [] +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/core/core.py" +--- + +## Ejemplo + +```python +double_then_str = compose(str, lambda n: n * 2) +result = double_then_str(5) +# "10" +``` + +## Notas + +Funcion pura. Composicion matematica: la ultima funcion se aplica primero. Inverso de pipe. diff --git a/python/functions/core/core.py b/python/functions/core/core.py new file mode 100644 index 00000000..0b6a6e7e --- /dev/null +++ b/python/functions/core/core.py @@ -0,0 +1,135 @@ +"""Core functional programming utilities — pure functions for list/collection operations.""" + +from functools import reduce as _reduce +from typing import Any, Callable, Dict, List, Tuple + + +def filter_list(xs: list, pred: Callable) -> list: + """Filter list by predicate. Does not mutate the original.""" + return [x for x in xs if pred(x)] + + +def map_list(xs: list, fn: Callable) -> list: + """Map function over list. Does not mutate the original.""" + return [fn(x) for x in xs] + + +def reduce_list(xs: list, initial: Any, fn: Callable) -> Any: + """Reduce list with accumulator. fn(acc, x) -> acc.""" + return _reduce(fn, xs, initial) + + +def flat_map(xs: list, fn: Callable) -> list: + """Map function over list then flatten one level.""" + result = [] + for x in xs: + result.extend(fn(x)) + return result + + +def flatten(xss: list) -> list: + """Flatten a list of lists one level.""" + result = [] + for xs in xss: + result.extend(xs) + return result + + +def chunk(xs: list, size: int) -> list: + """Split list into chunks of given size. Last chunk may be smaller.""" + if size <= 0: + return [] + return [xs[i : i + size] for i in range(0, len(xs), size)] + + +def take(xs: list, n: int) -> list: + """Take first n elements from list.""" + return xs[:n] + + +def drop(xs: list, n: int) -> list: + """Drop first n elements from list.""" + return xs[n:] + + +def unique(xs: list) -> list: + """Remove duplicates preserving order. Uses identity for hashable elements.""" + seen = set() + result = [] + for x in xs: + if x not in seen: + seen.add(x) + result.append(x) + return result + + +def group_by(xs: list, key_fn: Callable) -> Dict: + """Group elements by key function. Returns dict of key -> list.""" + groups: Dict = {} + for x in xs: + k = key_fn(x) + if k not in groups: + groups[k] = [] + groups[k].append(x) + return groups + + +def partition(xs: list, pred: Callable) -> Tuple[list, list]: + """Split list into (matches, non_matches) based on predicate.""" + matches = [] + non_matches = [] + for x in xs: + if pred(x): + matches.append(x) + else: + non_matches.append(x) + return (matches, non_matches) + + +def find(xs: list, pred: Callable) -> Any: + """Find first element matching predicate. Returns None if not found.""" + for x in xs: + if pred(x): + return x + return None + + +def find_index(xs: list, pred: Callable) -> int: + """Find index of first element matching predicate. Returns -1 if not found.""" + for i, x in enumerate(xs): + if pred(x): + return i + return -1 + + +def zip_with(xs: list, ys: list, fn: Callable) -> list: + """Zip two lists with a combining function. Stops at shorter list.""" + return [fn(x, y) for x, y in zip(xs, ys)] + + +def all_of(xs: list, pred: Callable) -> bool: + """Return True if all elements match predicate.""" + return all(pred(x) for x in xs) + + +def any_of(xs: list, pred: Callable) -> bool: + """Return True if any element matches predicate.""" + return any(pred(x) for x in xs) + + +def pipe(value: Any, *fns: Callable) -> Any: + """Pipe a value through a sequence of functions left-to-right.""" + result = value + for fn in fns: + result = fn(result) + return result + + +def compose(*fns: Callable) -> Callable: + """Compose functions right-to-left. compose(f, g)(x) == f(g(x)).""" + def composed(x: Any) -> Any: + result = x + for fn in reversed(fns): + result = fn(result) + return result + return composed diff --git a/python/functions/core/drop.md b/python/functions/core/drop.md new file mode 100644 index 00000000..ecab3559 --- /dev/null +++ b/python/functions/core/drop.md @@ -0,0 +1,32 @@ +--- +name: drop +kind: function +lang: py +domain: core +version: "1.0.0" +purity: pure +signature: "def drop(xs: list, n: int) -> list" +description: "Descarta los primeros n elementos de una lista." +tags: [list, functional, drop, slice, python] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [] +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/core/core.py" +--- + +## Ejemplo + +```python +result = drop([1, 2, 3, 4, 5], 2) +# [3, 4, 5] +``` + +## Notas + +Funcion pura. Si n > len(xs), retorna lista vacia. No muta la original. diff --git a/python/functions/core/filter_list.md b/python/functions/core/filter_list.md new file mode 100644 index 00000000..3dd32a8c --- /dev/null +++ b/python/functions/core/filter_list.md @@ -0,0 +1,32 @@ +--- +name: filter_list +kind: function +lang: py +domain: core +version: "1.0.0" +purity: pure +signature: "def filter_list(xs: list, pred: callable) -> list" +description: "Filtra una lista aplicando un predicado sin mutar la original." +tags: [list, functional, filter, python] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [] +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/core/core.py" +--- + +## Ejemplo + +```python +evens = filter_list([1, 2, 3, 4], lambda n: n % 2 == 0) +# [2, 4] +``` + +## Notas + +Funcion pura. No muta la lista original. Equivalente a `[x for x in xs if pred(x)]`. diff --git a/python/functions/core/find.md b/python/functions/core/find.md new file mode 100644 index 00000000..dd2fafe8 --- /dev/null +++ b/python/functions/core/find.md @@ -0,0 +1,32 @@ +--- +name: find +kind: function +lang: py +domain: core +version: "1.0.0" +purity: pure +signature: "def find(xs: list, pred: callable)" +description: "Encuentra el primer elemento que cumple el predicado. Retorna None si no hay coincidencia." +tags: [list, functional, find, search, python] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [] +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/core/core.py" +--- + +## Ejemplo + +```python +result = find([1, 2, 3, 4], lambda n: n > 2) +# 3 +``` + +## Notas + +Funcion pura. Retorna None si ningun elemento cumple el predicado. Cortocircuita al primer match. diff --git a/python/functions/core/find_index.md b/python/functions/core/find_index.md new file mode 100644 index 00000000..427ae923 --- /dev/null +++ b/python/functions/core/find_index.md @@ -0,0 +1,32 @@ +--- +name: find_index +kind: function +lang: py +domain: core +version: "1.0.0" +purity: pure +signature: "def find_index(xs: list, pred: callable) -> int" +description: "Encuentra el indice del primer elemento que cumple el predicado. Retorna -1 si no hay coincidencia." +tags: [list, functional, find, index, search, python] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [] +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/core/core.py" +--- + +## Ejemplo + +```python +idx = find_index([10, 20, 30, 40], lambda n: n > 25) +# 2 +``` + +## Notas + +Funcion pura. Retorna -1 si ningun elemento cumple el predicado. diff --git a/python/functions/core/flat_map.md b/python/functions/core/flat_map.md new file mode 100644 index 00000000..3e509651 --- /dev/null +++ b/python/functions/core/flat_map.md @@ -0,0 +1,32 @@ +--- +name: flat_map +kind: function +lang: py +domain: core +version: "1.0.0" +purity: pure +signature: "def flat_map(xs: list, fn: callable) -> list" +description: "Aplica una funcion que retorna listas a cada elemento y aplana el resultado un nivel." +tags: [list, functional, flatmap, python] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [] +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/core/core.py" +--- + +## Ejemplo + +```python +result = flat_map([1, 2, 3], lambda n: [n, n * 10]) +# [1, 10, 2, 20, 3, 30] +``` + +## Notas + +Funcion pura. Equivalente a flatten(map_list(xs, fn)). Solo aplana un nivel. diff --git a/python/functions/core/flatten.md b/python/functions/core/flatten.md new file mode 100644 index 00000000..7ca6648f --- /dev/null +++ b/python/functions/core/flatten.md @@ -0,0 +1,32 @@ +--- +name: flatten +kind: function +lang: py +domain: core +version: "1.0.0" +purity: pure +signature: "def flatten(xss: list) -> list" +description: "Aplana una lista de listas un nivel, concatenando las sublistas." +tags: [list, functional, flatten, python] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [] +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/core/core.py" +--- + +## Ejemplo + +```python +result = flatten([[1, 2], [3], [4, 5]]) +# [1, 2, 3, 4, 5] +``` + +## Notas + +Funcion pura. Solo aplana un nivel de anidamiento. diff --git a/python/functions/core/group_by.md b/python/functions/core/group_by.md new file mode 100644 index 00000000..a6630743 --- /dev/null +++ b/python/functions/core/group_by.md @@ -0,0 +1,32 @@ +--- +name: group_by +kind: function +lang: py +domain: core +version: "1.0.0" +purity: pure +signature: "def group_by(xs: list, key_fn: callable) -> dict" +description: "Agrupa elementos de una lista por una funcion clave. Retorna dict de clave a lista." +tags: [list, functional, group, classify, python] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [] +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/core/core.py" +--- + +## Ejemplo + +```python +result = group_by(["hi", "hey", "bye"], lambda s: s[0]) +# {"h": ["hi", "hey"], "b": ["bye"]} +``` + +## Notas + +Funcion pura. El orden de los elementos dentro de cada grupo se preserva. diff --git a/python/functions/core/map_list.md b/python/functions/core/map_list.md new file mode 100644 index 00000000..259178cc --- /dev/null +++ b/python/functions/core/map_list.md @@ -0,0 +1,32 @@ +--- +name: map_list +kind: function +lang: py +domain: core +version: "1.0.0" +purity: pure +signature: "def map_list(xs: list, fn: callable) -> list" +description: "Aplica una funcion a cada elemento de una lista, retornando una nueva lista." +tags: [list, functional, map, python] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [] +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/core/core.py" +--- + +## Ejemplo + +```python +doubled = map_list([1, 2, 3], lambda n: n * 2) +# [2, 4, 6] +``` + +## Notas + +Funcion pura. No muta la lista original. Equivalente a `[fn(x) for x in xs]`. diff --git a/python/functions/core/partition.md b/python/functions/core/partition.md new file mode 100644 index 00000000..5d09e2e9 --- /dev/null +++ b/python/functions/core/partition.md @@ -0,0 +1,32 @@ +--- +name: partition +kind: function +lang: py +domain: core +version: "1.0.0" +purity: pure +signature: "def partition(xs: list, pred: callable) -> tuple" +description: "Divide una lista en dos: (elementos que cumplen el predicado, elementos que no)." +tags: [list, functional, partition, split, python] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [] +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/core/core.py" +--- + +## Ejemplo + +```python +evens, odds = partition([1, 2, 3, 4, 5], lambda n: n % 2 == 0) +# evens = [2, 4], odds = [1, 3, 5] +``` + +## Notas + +Funcion pura. Retorna tupla de dos listas: (matches, non_matches). diff --git a/python/functions/core/pipe.md b/python/functions/core/pipe.md new file mode 100644 index 00000000..cacdd992 --- /dev/null +++ b/python/functions/core/pipe.md @@ -0,0 +1,36 @@ +--- +name: pipe +kind: function +lang: py +domain: core +version: "1.0.0" +purity: pure +signature: "def pipe(value, *fns)" +description: "Pasa un valor a traves de una secuencia de funciones de izquierda a derecha." +tags: [functional, pipe, composition, python] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [] +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/core/core.py" +--- + +## Ejemplo + +```python +result = pipe( + [1, 2, 3, 4, 5], + lambda xs: filter_list(xs, lambda n: n > 2), + lambda xs: map_list(xs, lambda n: n * 10), +) +# [30, 40, 50] +``` + +## Notas + +Funcion pura. Ejecuta las funciones en orden de izquierda a derecha: f1(value), luego f2(result), etc. diff --git a/python/functions/core/reduce_list.md b/python/functions/core/reduce_list.md new file mode 100644 index 00000000..1387c202 --- /dev/null +++ b/python/functions/core/reduce_list.md @@ -0,0 +1,32 @@ +--- +name: reduce_list +kind: function +lang: py +domain: core +version: "1.0.0" +purity: pure +signature: "def reduce_list(xs: list, initial, fn: callable)" +description: "Reduce una lista con un acumulador y una funcion binaria fn(acc, x)." +tags: [list, functional, reduce, fold, python] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [] +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/core/core.py" +--- + +## Ejemplo + +```python +total = reduce_list([1, 2, 3, 4], 0, lambda acc, x: acc + x) +# 10 +``` + +## Notas + +Funcion pura. Usa functools.reduce internamente. El valor inicial es obligatorio para evitar errores con listas vacias. diff --git a/python/functions/core/take.md b/python/functions/core/take.md new file mode 100644 index 00000000..1a01474b --- /dev/null +++ b/python/functions/core/take.md @@ -0,0 +1,32 @@ +--- +name: take +kind: function +lang: py +domain: core +version: "1.0.0" +purity: pure +signature: "def take(xs: list, n: int) -> list" +description: "Toma los primeros n elementos de una lista." +tags: [list, functional, take, slice, python] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [] +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/core/core.py" +--- + +## Ejemplo + +```python +result = take([1, 2, 3, 4, 5], 3) +# [1, 2, 3] +``` + +## Notas + +Funcion pura. Si n > len(xs), retorna toda la lista. No muta la original. diff --git a/python/functions/core/unique.md b/python/functions/core/unique.md new file mode 100644 index 00000000..3416252c --- /dev/null +++ b/python/functions/core/unique.md @@ -0,0 +1,32 @@ +--- +name: unique +kind: function +lang: py +domain: core +version: "1.0.0" +purity: pure +signature: "def unique(xs: list) -> list" +description: "Elimina duplicados de una lista preservando el orden de aparicion." +tags: [list, functional, unique, deduplicate, python] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [] +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/core/core.py" +--- + +## Ejemplo + +```python +result = unique([1, 2, 2, 3, 1, 4]) +# [1, 2, 3, 4] +``` + +## Notas + +Funcion pura. Preserva el orden de la primera aparicion. Requiere elementos hashables. diff --git a/python/functions/core/zip_with.md b/python/functions/core/zip_with.md new file mode 100644 index 00000000..4264e1d7 --- /dev/null +++ b/python/functions/core/zip_with.md @@ -0,0 +1,32 @@ +--- +name: zip_with +kind: function +lang: py +domain: core +version: "1.0.0" +purity: pure +signature: "def zip_with(xs: list, ys: list, fn: callable) -> list" +description: "Combina dos listas elemento a elemento con una funcion. Se detiene en la mas corta." +tags: [list, functional, zip, combine, python] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [] +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/core/core.py" +--- + +## Ejemplo + +```python +result = zip_with([1, 2, 3], [10, 20, 30], lambda a, b: a + b) +# [11, 22, 33] +``` + +## Notas + +Funcion pura. Se detiene en la lista mas corta, como zip() de Python. diff --git a/python/functions/cybersecurity/__init__.py b/python/functions/cybersecurity/__init__.py new file mode 100644 index 00000000..8406b920 --- /dev/null +++ b/python/functions/cybersecurity/__init__.py @@ -0,0 +1,25 @@ +from .cybersecurity import ( + hash_sha256, + hash_md5, + entropy_shannon, + detect_sql_injection, + extract_urls, + is_base64, + is_hex, + levenshtein_distance, + jaccard_similarity, + normalize_url, +) + +__all__ = [ + "hash_sha256", + "hash_md5", + "entropy_shannon", + "detect_sql_injection", + "extract_urls", + "is_base64", + "is_hex", + "levenshtein_distance", + "jaccard_similarity", + "normalize_url", +] diff --git a/python/functions/cybersecurity/cybersecurity.py b/python/functions/cybersecurity/cybersecurity.py new file mode 100644 index 00000000..5c63c993 --- /dev/null +++ b/python/functions/cybersecurity/cybersecurity.py @@ -0,0 +1,167 @@ +"""Cybersecurity pure functions: hashing, parsing, and security utilities.""" + +import hashlib +import math +import re +import base64 +from collections import Counter +from urllib.parse import urlparse, urlunparse, parse_qs, urlencode + + +def hash_sha256(data: bytes) -> str: + """Calcula el hash SHA-256 de datos binarios. Retorna hex digest.""" + return hashlib.sha256(data).hexdigest() + + +def hash_md5(data: bytes) -> str: + """Calcula el hash MD5 de datos binarios. Retorna hex digest.""" + return hashlib.md5(data).hexdigest() + + +def entropy_shannon(data: bytes) -> float: + """Calcula la entropia de Shannon de datos binarios (0-8 bits por byte). + + Entropia alta (>7.5) sugiere datos cifrados o comprimidos. + Entropia baja (<3) sugiere datos estructurados o repetitivos. + """ + if not data: + return 0.0 + length = len(data) + counts = Counter(data) + entropy = 0.0 + for count in counts.values(): + p = count / length + if p > 0: + entropy -= p * math.log2(p) + return entropy + + +_SQL_INJECTION_PATTERNS = [ + (r"('\s*OR\s+'[^']*'\s*=\s*'[^']*'?)", "string_tautology"), + (r"('\s*(OR|AND)\s+'?\d+\s*=\s*\d+)", "tautology"), + (r"(;\s*(DROP|DELETE|UPDATE|INSERT)\b)", "stacked_query"), + (r"(UNION\s+(ALL\s+)?SELECT)", "union_select"), + (r"(\b(SELECT|INSERT|UPDATE|DELETE|DROP|ALTER|CREATE|EXEC)\b\s)", "sql_keyword"), + (r"(--\s*$|/\*|\*/)", "comment_injection"), + (r"(BENCHMARK\s*\(|SLEEP\s*\(|WAITFOR\s+DELAY)", "time_based"), + (r"(CHAR\s*\(\s*\d+)", "char_function"), + (r"(CONCAT\s*\()", "concat_function"), + (r"(0x[0-9a-fA-F]{4,})", "hex_literal"), +] + + +def detect_sql_injection(input_str: str) -> tuple: + """Detecta patrones de SQL injection en un string. + + Retorna (is_threat, pattern) donde pattern es el nombre del patron + detectado o cadena vacia si no hay amenaza. + """ + for pattern, name in _SQL_INJECTION_PATTERNS: + if re.search(pattern, input_str, re.IGNORECASE): + return (True, name) + return (False, "") + + +_URL_REGEX = re.compile( + r"https?://[^\s<>\"'\)\]]+", + re.IGNORECASE, +) + + +def extract_urls(text: str) -> list: + """Extrae todas las URLs (http/https) de un texto.""" + return _URL_REGEX.findall(text) + + +def is_base64(s: str) -> bool: + """Verifica si un string es base64 valido. + + Acepta base64 estandar y URL-safe. Requiere al menos 4 caracteres. + """ + if len(s) < 4: + return False + b64_pattern = re.compile(r"^[A-Za-z0-9+/\-_]*={0,2}$") + if not b64_pattern.match(s): + return False + try: + decoded = base64.b64decode(s, validate=True) + return len(decoded) > 0 + except Exception: + try: + decoded = base64.urlsafe_b64decode(s) + return len(decoded) > 0 + except Exception: + return False + + +def is_hex(s: str) -> bool: + """Verifica si un string es hexadecimal valido. + + Acepta con o sin prefijo 0x. Requiere al menos 2 caracteres (sin prefijo). + """ + clean = s.strip() + if clean.startswith(("0x", "0X")): + clean = clean[2:] + if len(clean) < 2: + return False + return bool(re.fullmatch(r"[0-9a-fA-F]+", clean)) + + +def levenshtein_distance(a: str, b: str) -> int: + """Calcula la distancia de Levenshtein (edit distance) entre dos strings. + + Util para deteccion de typosquatting en dominios y fuzzy matching. + """ + if len(a) < len(b): + return levenshtein_distance(b, a) + if len(b) == 0: + return len(a) + + prev_row = list(range(len(b) + 1)) + for i, ca in enumerate(a): + curr_row = [i + 1] + for j, cb in enumerate(b): + cost = 0 if ca == cb else 1 + curr_row.append( + min( + curr_row[j] + 1, # insert + prev_row[j + 1] + 1, # delete + prev_row[j] + cost, # substitute + ) + ) + prev_row = curr_row + return prev_row[-1] + + +def jaccard_similarity(a: list, b: list) -> float: + """Calcula el coeficiente de similitud de Jaccard entre dos listas. + + J(A,B) = |A interseccion B| / |A union B|. Retorna 0.0 si ambas vacias. + Util para comparar conjuntos de tokens, features, o IoCs. + """ + set_a = set(a) + set_b = set(b) + if not set_a and not set_b: + return 0.0 + intersection = set_a & set_b + union = set_a | set_b + return len(intersection) / len(union) + + +def normalize_url(raw_url: str) -> str: + """Normaliza una URL: lowercase del host, elimina fragmentos, ordena parametros. + + Util para deduplicacion de URLs y comparacion de IoCs. + """ + parsed = urlparse(raw_url) + scheme = parsed.scheme.lower() or "http" + netloc = parsed.netloc.lower() + path = parsed.path or "/" + # Remove trailing slash except for root + if path != "/" and path.endswith("/"): + path = path.rstrip("/") + # Sort query parameters + params = parse_qs(parsed.query, keep_blank_values=True) + sorted_query = urlencode(sorted(params.items()), doseq=True) + # Drop fragment + return urlunparse((scheme, netloc, path, parsed.params, sorted_query, "")) diff --git a/python/functions/cybersecurity/detect_sql_injection.md b/python/functions/cybersecurity/detect_sql_injection.md new file mode 100644 index 00000000..ec3d8946 --- /dev/null +++ b/python/functions/cybersecurity/detect_sql_injection.md @@ -0,0 +1,38 @@ +--- +name: detect_sql_injection +kind: function +lang: py +domain: cybersecurity +version: "1.0.0" +purity: pure +signature: "def detect_sql_injection(input_str: str) -> tuple" +description: "Detecta patrones de SQL injection en un string. Retorna (is_threat, pattern) con el nombre del patron detectado." +tags: [sqli, injection, detection, security, python] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [re] +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/cybersecurity/cybersecurity.py" +--- + +## Ejemplo + +```python +detect_sql_injection("' OR '1'='1") +# (True, "string_tautology") + +detect_sql_injection("; DROP TABLE users") +# (True, "stacked_query") + +detect_sql_injection("hello world") +# (False, "") +``` + +## Notas + +Detecta 10 patrones: sql_keyword, tautology, stacked_query, comment_injection, string_tautology, union_select, hex_literal, char_function, concat_function, time_based. No reemplaza un WAF pero es util para logging y alertas tempranas. diff --git a/python/functions/cybersecurity/entropy_shannon.md b/python/functions/cybersecurity/entropy_shannon.md new file mode 100644 index 00000000..453b9e13 --- /dev/null +++ b/python/functions/cybersecurity/entropy_shannon.md @@ -0,0 +1,41 @@ +--- +name: entropy_shannon +kind: function +lang: py +domain: cybersecurity +version: "1.0.0" +purity: pure +signature: "def entropy_shannon(data: bytes) -> float" +description: "Calcula la entropia de Shannon de datos binarios (0-8 bits por byte). Util para detectar datos cifrados o comprimidos." +tags: [entropy, shannon, analysis, crypto, python] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [math, collections] +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/cybersecurity/cybersecurity.py" +--- + +## Ejemplo + +```python +# Datos aleatorios (alta entropia) +entropy_shannon(bytes(range(256))) +# ~8.0 + +# Datos repetitivos (baja entropia) +entropy_shannon(b"aaaaaaaaaa") +# 0.0 + +# Texto normal +entropy_shannon(b"hello world") +# ~2.84 +``` + +## Notas + +Entropia alta (>7.5) sugiere datos cifrados o comprimidos. Entropia baja (<3) sugiere datos estructurados o repetitivos. Retorna 0.0 para datos vacios. diff --git a/python/functions/cybersecurity/extract_urls.md b/python/functions/cybersecurity/extract_urls.md new file mode 100644 index 00000000..faf9aa16 --- /dev/null +++ b/python/functions/cybersecurity/extract_urls.md @@ -0,0 +1,35 @@ +--- +name: extract_urls +kind: function +lang: py +domain: cybersecurity +version: "1.0.0" +purity: pure +signature: "def extract_urls(text: str) -> list" +description: "Extrae todas las URLs (http/https) de un texto. Util para analisis de IoCs y threat intelligence." +tags: [url, extract, parsing, ioc, python] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [re] +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/cybersecurity/cybersecurity.py" +--- + +## Ejemplo + +```python +extract_urls("Visit https://example.com and http://test.org/path?q=1") +# ["https://example.com", "http://test.org/path?q=1"] + +extract_urls("no urls here") +# [] +``` + +## Notas + +Usa regex para extraer URLs con esquema http/https. No valida que las URLs sean alcanzables. Util para extraer indicadores de compromiso (IoCs) de logs, emails o reportes de threat intelligence. diff --git a/python/functions/cybersecurity/hash_md5.md b/python/functions/cybersecurity/hash_md5.md new file mode 100644 index 00000000..3dc5e207 --- /dev/null +++ b/python/functions/cybersecurity/hash_md5.md @@ -0,0 +1,32 @@ +--- +name: hash_md5 +kind: function +lang: py +domain: cybersecurity +version: "1.0.0" +purity: pure +signature: "def hash_md5(data: bytes) -> str" +description: "Calcula el hash MD5 de datos binarios. Retorna hex digest." +tags: [hash, md5, crypto, python] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [hashlib] +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/cybersecurity/cybersecurity.py" +--- + +## Ejemplo + +```python +h = hash_md5(b"hello") +# "5d41402abc4b2a76b9719d911017c592" +``` + +## Notas + +Usa hashlib de stdlib. MD5 no es seguro para propositos criptograficos pero es util para checksums, fingerprinting de archivos e identificacion rapida de IoCs. diff --git a/python/functions/cybersecurity/hash_sha256.md b/python/functions/cybersecurity/hash_sha256.md new file mode 100644 index 00000000..90b704c2 --- /dev/null +++ b/python/functions/cybersecurity/hash_sha256.md @@ -0,0 +1,32 @@ +--- +name: hash_sha256 +kind: function +lang: py +domain: cybersecurity +version: "1.0.0" +purity: pure +signature: "def hash_sha256(data: bytes) -> str" +description: "Calcula el hash SHA-256 de datos binarios. Retorna hex digest." +tags: [hash, sha256, crypto, python] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [hashlib] +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/cybersecurity/cybersecurity.py" +--- + +## Ejemplo + +```python +h = hash_sha256(b"hello") +# "2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824" +``` + +## Notas + +Usa hashlib de stdlib. Funcion pura sin side effects. SHA-256 produce un digest de 64 caracteres hexadecimales (256 bits). diff --git a/python/functions/cybersecurity/is_base64.md b/python/functions/cybersecurity/is_base64.md new file mode 100644 index 00000000..01a1b680 --- /dev/null +++ b/python/functions/cybersecurity/is_base64.md @@ -0,0 +1,38 @@ +--- +name: is_base64 +kind: function +lang: py +domain: cybersecurity +version: "1.0.0" +purity: pure +signature: "def is_base64(s: str) -> bool" +description: "Verifica si un string es base64 valido. Acepta base64 estandar y URL-safe. Requiere minimo 4 caracteres." +tags: [base64, validation, encoding, python] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [re, base64] +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/cybersecurity/cybersecurity.py" +--- + +## Ejemplo + +```python +is_base64("aGVsbG8=") +# True + +is_base64("not!valid") +# False + +is_base64("ab") +# False (menos de 4 caracteres) +``` + +## Notas + +Verifica tanto el formato (regex) como que el decode sea exitoso. Util para detectar datos codificados en payloads sospechosos, headers HTTP o parametros de URL. diff --git a/python/functions/cybersecurity/is_hex.md b/python/functions/cybersecurity/is_hex.md new file mode 100644 index 00000000..9ff6bb47 --- /dev/null +++ b/python/functions/cybersecurity/is_hex.md @@ -0,0 +1,41 @@ +--- +name: is_hex +kind: function +lang: py +domain: cybersecurity +version: "1.0.0" +purity: pure +signature: "def is_hex(s: str) -> bool" +description: "Verifica si un string es hexadecimal valido. Acepta con o sin prefijo 0x. Requiere minimo 2 caracteres." +tags: [hex, validation, encoding, python] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [re] +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/cybersecurity/cybersecurity.py" +--- + +## Ejemplo + +```python +is_hex("4a6f686e") +# True + +is_hex("0x4a6f686e") +# True + +is_hex("xyz") +# False + +is_hex("a") +# False (menos de 2 caracteres) +``` + +## Notas + +Util para validar hashes, direcciones de memoria, shellcode y otros datos hexadecimales en contexto de seguridad. diff --git a/python/functions/cybersecurity/jaccard_similarity.md b/python/functions/cybersecurity/jaccard_similarity.md new file mode 100644 index 00000000..0a6b0d66 --- /dev/null +++ b/python/functions/cybersecurity/jaccard_similarity.md @@ -0,0 +1,38 @@ +--- +name: jaccard_similarity +kind: function +lang: py +domain: cybersecurity +version: "1.0.0" +purity: pure +signature: "def jaccard_similarity(a: list, b: list) -> float" +description: "Calcula el coeficiente de similitud de Jaccard entre dos listas. J(A,B) = |A interseccion B| / |A union B|." +tags: [jaccard, similarity, comparison, sets, python] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [] +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/cybersecurity/cybersecurity.py" +--- + +## Ejemplo + +```python +jaccard_similarity(["a", "b", "c"], ["b", "c", "d"]) +# 0.5 + +jaccard_similarity(["a", "b"], ["a", "b"]) +# 1.0 + +jaccard_similarity([], []) +# 0.0 +``` + +## Notas + +Convierte las listas a sets internamente. Retorna 0.0 si ambas listas son vacias. Util para comparar conjuntos de tokens, features de malware, IoCs compartidos entre muestras, o tags de vulnerabilidades. diff --git a/python/functions/cybersecurity/levenshtein_distance.md b/python/functions/cybersecurity/levenshtein_distance.md new file mode 100644 index 00000000..efbb6845 --- /dev/null +++ b/python/functions/cybersecurity/levenshtein_distance.md @@ -0,0 +1,38 @@ +--- +name: levenshtein_distance +kind: function +lang: py +domain: cybersecurity +version: "1.0.0" +purity: pure +signature: "def levenshtein_distance(a: str, b: str) -> int" +description: "Calcula la distancia de Levenshtein (edit distance) entre dos strings. Util para deteccion de typosquatting en dominios." +tags: [levenshtein, distance, fuzzy, typosquatting, python] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [] +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/cybersecurity/cybersecurity.py" +--- + +## Ejemplo + +```python +levenshtein_distance("google.com", "gooogle.com") +# 1 + +levenshtein_distance("paypal.com", "paypa1.com") +# 1 + +levenshtein_distance("abc", "abc") +# 0 +``` + +## Notas + +Implementacion O(n*m) con optimizacion de espacio (dos filas). Sin dependencias externas. Util para detectar dominios de typosquatting comparando contra dominios legitimos conocidos. diff --git a/python/functions/cybersecurity/normalize_url.md b/python/functions/cybersecurity/normalize_url.md new file mode 100644 index 00000000..1e73aa85 --- /dev/null +++ b/python/functions/cybersecurity/normalize_url.md @@ -0,0 +1,35 @@ +--- +name: normalize_url +kind: function +lang: py +domain: cybersecurity +version: "1.0.0" +purity: pure +signature: "def normalize_url(raw_url: str) -> str" +description: "Normaliza una URL: lowercase del host, elimina fragmentos, ordena parametros. Util para deduplicacion de IoCs." +tags: [url, normalize, ioc, dedup, python] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [urllib.parse] +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/cybersecurity/cybersecurity.py" +--- + +## Ejemplo + +```python +normalize_url("HTTPS://Example.COM/path?b=2&a=1#frag") +# "https://example.com/path?a=1&b=2" + +normalize_url("http://test.org/path/") +# "http://test.org/path" +``` + +## Notas + +Operaciones de normalizacion: lowercase de scheme y host, eliminacion de trailing slash (excepto root), ordenamiento alfabetico de query parameters, eliminacion de fragmentos. Usa urllib.parse de stdlib. diff --git a/python/functions/datascience/__init__.py b/python/functions/datascience/__init__.py new file mode 100644 index 00000000..3aecc28d --- /dev/null +++ b/python/functions/datascience/__init__.py @@ -0,0 +1,25 @@ +from .datascience import ( + pearson, + standardize, + min_max_scale, + clip, + detect_outliers, + impute, + histogram, + rolling_window, + autocorrelation, + linspace, +) + +__all__ = [ + "pearson", + "standardize", + "min_max_scale", + "clip", + "detect_outliers", + "impute", + "histogram", + "rolling_window", + "autocorrelation", + "linspace", +] diff --git a/python/functions/datascience/autocorrelation.md b/python/functions/datascience/autocorrelation.md new file mode 100644 index 00000000..950a99c7 --- /dev/null +++ b/python/functions/datascience/autocorrelation.md @@ -0,0 +1,32 @@ +--- +name: autocorrelation +kind: function +lang: py +domain: datascience +version: "1.0.0" +purity: pure +signature: "def autocorrelation(data: list, lag: int) -> float" +description: "Calcula la autocorrelacion de una serie temporal para un lag dado." +tags: [statistics, timeseries, correlation, python] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [] +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/datascience/datascience.py" +--- + +## Ejemplo + +```python +autocorrelation([1, 2, 3, 4, 5, 4, 3, 2, 1], 1) +# ~0.489 +``` + +## Notas + +Autocorrelacion normalizada por la varianza. Retorna 0.0 si lag es invalido o la varianza es cero. diff --git a/python/functions/datascience/clip.md b/python/functions/datascience/clip.md new file mode 100644 index 00000000..8647851a --- /dev/null +++ b/python/functions/datascience/clip.md @@ -0,0 +1,32 @@ +--- +name: clip +kind: function +lang: py +domain: datascience +version: "1.0.0" +purity: pure +signature: "def clip(data: list, lo: float, hi: float) -> list" +description: "Recorta los valores de la lista al rango [lo, hi]." +tags: [clipping, bounds, python] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [] +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/datascience/datascience.py" +--- + +## Ejemplo + +```python +clip([1, 5, 10, -3], 0, 7) +# [1, 5, 7, 0] +``` + +## Notas + +Funcion pura sin dependencias. Equivalente a numpy.clip pero sin numpy. diff --git a/python/functions/datascience/datascience.py b/python/functions/datascience/datascience.py new file mode 100644 index 00000000..35f7d38a --- /dev/null +++ b/python/functions/datascience/datascience.py @@ -0,0 +1,123 @@ +"""Pure datascience utilities — statistics and numerical functions. + +Uses only math stdlib. No external dependencies. +""" + +import math + + +def pearson(xs: list, ys: list) -> float: + """Pearson correlation coefficient between two lists of floats.""" + n = len(xs) + if n != len(ys) or n == 0: + return 0.0 + mean_x = sum(xs) / n + mean_y = sum(ys) / n + num = sum((x - mean_x) * (y - mean_y) for x, y in zip(xs, ys)) + den_x = math.sqrt(sum((x - mean_x) ** 2 for x in xs)) + den_y = math.sqrt(sum((y - mean_y) ** 2 for y in ys)) + if den_x == 0.0 or den_y == 0.0: + return 0.0 + return num / (den_x * den_y) + + +def standardize(data: list) -> list: + """Z-score standardization (mean=0, std=1).""" + n = len(data) + if n == 0: + return [] + mean = sum(data) / n + std = math.sqrt(sum((x - mean) ** 2 for x in data) / n) + if std == 0.0: + return [0.0] * n + return [(x - mean) / std for x in data] + + +def min_max_scale(data: list) -> list: + """Scale values to [0, 1] range.""" + if not data: + return [] + lo = min(data) + hi = max(data) + if hi == lo: + return [0.0] * len(data) + return [(x - lo) / (hi - lo) for x in data] + + +def clip(data: list, lo: float, hi: float) -> list: + """Clip values to [lo, hi].""" + return [max(lo, min(hi, x)) for x in data] + + +def detect_outliers(data: list, threshold: float) -> list: + """Returns list of bools, True where |z-score| > threshold.""" + n = len(data) + if n == 0: + return [] + mean = sum(data) / n + std = math.sqrt(sum((x - mean) ** 2 for x in data) / n) + if std == 0.0: + return [False] * n + return [abs((x - mean) / std) > threshold for x in data] + + +def impute(data: list) -> list: + """Replace None/NaN with mean of non-null values.""" + valid = [x for x in data if x is not None and not (isinstance(x, float) and math.isnan(x))] + if not valid: + return [0.0] * len(data) + mean = sum(valid) / len(valid) + return [ + mean if (x is None or (isinstance(x, float) and math.isnan(x))) else x + for x in data + ] + + +def histogram(data: list, buckets: int) -> list: + """Returns list of counts per bucket.""" + if not data or buckets <= 0: + return [] + lo = min(data) + hi = max(data) + if hi == lo: + counts = [0] * buckets + counts[0] = len(data) + return counts + width = (hi - lo) / buckets + counts = [0] * buckets + for x in data: + idx = int((x - lo) / width) + if idx >= buckets: + idx = buckets - 1 + counts[idx] += 1 + return counts + + +def rolling_window(xs: list, size: int) -> list: + """Returns list of sublists (sliding windows of given size).""" + if size <= 0 or size > len(xs): + return [] + return [xs[i : i + size] for i in range(len(xs) - size + 1)] + + +def autocorrelation(data: list, lag: int) -> float: + """Autocorrelation at given lag.""" + n = len(data) + if lag < 0 or lag >= n or n == 0: + return 0.0 + mean = sum(data) / n + var = sum((x - mean) ** 2 for x in data) / n + if var == 0.0: + return 0.0 + cov = sum((data[i] - mean) * (data[i + lag] - mean) for i in range(n - lag)) / n + return cov / var + + +def linspace(start: float, stop: float, num: int) -> list: + """Generate evenly spaced values from start to stop (inclusive).""" + if num <= 0: + return [] + if num == 1: + return [start] + step = (stop - start) / (num - 1) + return [start + i * step for i in range(num)] diff --git a/python/functions/datascience/detect_outliers.md b/python/functions/datascience/detect_outliers.md new file mode 100644 index 00000000..adff14d0 --- /dev/null +++ b/python/functions/datascience/detect_outliers.md @@ -0,0 +1,32 @@ +--- +name: detect_outliers +kind: function +lang: py +domain: datascience +version: "1.0.0" +purity: pure +signature: "def detect_outliers(data: list, threshold: float) -> list" +description: "Detecta outliers por z-score. Retorna lista de bools, True donde |z-score| > threshold." +tags: [statistics, outliers, python] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [math] +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/datascience/datascience.py" +--- + +## Ejemplo + +```python +detect_outliers([1, 2, 3, 100, 2, 3], 2.0) +# [False, False, False, True, False, False] +``` + +## Notas + +Usa z-score poblacional. Threshold tipico: 2.0 o 3.0. Si la desviacion es cero, no hay outliers. diff --git a/python/functions/datascience/histogram.md b/python/functions/datascience/histogram.md new file mode 100644 index 00000000..bc6c50d4 --- /dev/null +++ b/python/functions/datascience/histogram.md @@ -0,0 +1,32 @@ +--- +name: histogram +kind: function +lang: py +domain: datascience +version: "1.0.0" +purity: pure +signature: "def histogram(data: list, buckets: int) -> list" +description: "Calcula histograma con N buckets. Retorna lista de conteos por bucket." +tags: [statistics, histogram, python] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [] +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/datascience/datascience.py" +--- + +## Ejemplo + +```python +histogram([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 5) +# [2, 2, 2, 2, 2] +``` + +## Notas + +Los buckets cubren el rango [min, max] uniformemente. El ultimo bucket incluye el valor maximo. Si todos los valores son iguales, todos caen en el primer bucket. diff --git a/python/functions/datascience/impute.md b/python/functions/datascience/impute.md new file mode 100644 index 00000000..ddb9e7ae --- /dev/null +++ b/python/functions/datascience/impute.md @@ -0,0 +1,32 @@ +--- +name: impute +kind: function +lang: py +domain: datascience +version: "1.0.0" +purity: pure +signature: "def impute(data: list) -> list" +description: "Reemplaza None y NaN con la media de los valores validos." +tags: [imputation, missing, python] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [math] +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/datascience/datascience.py" +--- + +## Ejemplo + +```python +impute([1.0, None, 3.0, float('nan'), 5.0]) +# [1.0, 3.0, 3.0, 3.0, 5.0] +``` + +## Notas + +Detecta tanto None como float('nan'). Si no hay valores validos, rellena con 0.0. diff --git a/python/functions/datascience/linspace.md b/python/functions/datascience/linspace.md new file mode 100644 index 00000000..ba528903 --- /dev/null +++ b/python/functions/datascience/linspace.md @@ -0,0 +1,32 @@ +--- +name: linspace +kind: function +lang: py +domain: datascience +version: "1.0.0" +purity: pure +signature: "def linspace(start: float, stop: float, num: int) -> list" +description: "Genera una lista de valores equiespaciados entre start y stop (inclusivos)." +tags: [numeric, range, python] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [] +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/datascience/datascience.py" +--- + +## Ejemplo + +```python +linspace(0, 1, 5) +# [0.0, 0.25, 0.5, 0.75, 1.0] +``` + +## Notas + +Equivalente a numpy.linspace pero sin numpy. Si num=1, retorna [start]. Si num<=0, retorna lista vacia. diff --git a/python/functions/datascience/min_max_scale.md b/python/functions/datascience/min_max_scale.md new file mode 100644 index 00000000..ba4e2247 --- /dev/null +++ b/python/functions/datascience/min_max_scale.md @@ -0,0 +1,32 @@ +--- +name: min_max_scale +kind: function +lang: py +domain: datascience +version: "1.0.0" +purity: pure +signature: "def min_max_scale(data: list) -> list" +description: "Escala los valores al rango [0, 1] usando min-max normalization." +tags: [normalization, scaling, python] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [] +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/datascience/datascience.py" +--- + +## Ejemplo + +```python +min_max_scale([2, 4, 6, 8, 10]) +# [0.0, 0.25, 0.5, 0.75, 1.0] +``` + +## Notas + +Si todos los valores son iguales, retorna lista de ceros. No requiere imports externos. diff --git a/python/functions/datascience/pearson.md b/python/functions/datascience/pearson.md new file mode 100644 index 00000000..de3b0bf8 --- /dev/null +++ b/python/functions/datascience/pearson.md @@ -0,0 +1,32 @@ +--- +name: pearson +kind: function +lang: py +domain: datascience +version: "1.0.0" +purity: pure +signature: "def pearson(xs: list, ys: list) -> float" +description: "Calcula el coeficiente de correlacion de Pearson entre dos listas de floats." +tags: [statistics, correlation, python] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [math] +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/datascience/datascience.py" +--- + +## Ejemplo + +```python +r = pearson([1, 2, 3], [2, 4, 6]) +# r = 1.0 +``` + +## Notas + +Usa solo math stdlib. No requiere numpy. Retorna 0.0 si las listas tienen longitud diferente, estan vacias, o la desviacion es cero. diff --git a/python/functions/datascience/rolling_window.md b/python/functions/datascience/rolling_window.md new file mode 100644 index 00000000..39f5d0cf --- /dev/null +++ b/python/functions/datascience/rolling_window.md @@ -0,0 +1,32 @@ +--- +name: rolling_window +kind: function +lang: py +domain: datascience +version: "1.0.0" +purity: pure +signature: "def rolling_window(xs: list, size: int) -> list" +description: "Genera ventanas deslizantes de tamanio fijo sobre una lista." +tags: [windowing, timeseries, python] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [] +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/datascience/datascience.py" +--- + +## Ejemplo + +```python +rolling_window([1, 2, 3, 4, 5], 3) +# [[1, 2, 3], [2, 3, 4], [3, 4, 5]] +``` + +## Notas + +Retorna lista vacia si size <= 0 o size > len(xs). Util para calcular medias moviles u otras metricas sobre ventanas. diff --git a/python/functions/datascience/standardize.md b/python/functions/datascience/standardize.md new file mode 100644 index 00000000..5ae7928b --- /dev/null +++ b/python/functions/datascience/standardize.md @@ -0,0 +1,32 @@ +--- +name: standardize +kind: function +lang: py +domain: datascience +version: "1.0.0" +purity: pure +signature: "def standardize(data: list) -> list" +description: "Estandarizacion Z-score: transforma los datos a media=0 y desviacion=1." +tags: [statistics, normalization, python] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [math] +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/datascience/datascience.py" +--- + +## Ejemplo + +```python +standardize([10, 20, 30]) +# [-1.2247..., 0.0, 1.2247...] +``` + +## Notas + +Si la desviacion estandar es cero, retorna lista de ceros. Usa desviacion poblacional (N, no N-1). diff --git a/python/functions/finance/__init__.py b/python/functions/finance/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/python/functions/finance/annualized_volatility.md b/python/functions/finance/annualized_volatility.md new file mode 100644 index 00000000..5c40a757 --- /dev/null +++ b/python/functions/finance/annualized_volatility.md @@ -0,0 +1,35 @@ +--- +name: annualized_volatility +kind: function +lang: py +domain: finance +version: "1.0.0" +purity: pure +signature: "def annualized_volatility(returns: list, periods_per_year: float) -> float" +description: "Calcula la volatilidad anualizada de una serie de retornos." +tags: [finance, volatility, risk, python] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [math] +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/finance/finance.py" +--- + +## Ejemplo + +```python +daily_returns = [0.01, -0.005, 0.008, 0.003, -0.002, 0.006, 0.004] +vol = annualized_volatility(daily_returns, 252.0) +# Volatilidad anualizada (std * sqrt(252)) +``` + +## Notas + +Formula: std_muestral(returns) * sqrt(periods_per_year). +Usa desviacion estandar muestral (n-1) para ser consistente con la practica financiera. +Retorna 0.0 si hay menos de 2 retornos o periods_per_year es menor o igual a cero. diff --git a/python/functions/finance/bollinger_bands.md b/python/functions/finance/bollinger_bands.md new file mode 100644 index 00000000..502e9f96 --- /dev/null +++ b/python/functions/finance/bollinger_bands.md @@ -0,0 +1,35 @@ +--- +name: bollinger_bands +kind: function +lang: py +domain: finance +version: "1.0.0" +purity: pure +signature: "def bollinger_bands(data: list, period: int, num_std: float) -> tuple" +description: "Calcula las Bandas de Bollinger (upper, middle, lower) de una serie de precios." +tags: [finance, bollinger, volatility, indicator, python] +uses_functions: [sma_py_finance] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [] +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/finance/finance.py" +--- + +## Ejemplo + +```python +prices = [10, 11, 12, 13, 14, 15, 14, 13, 12, 11] +upper, middle, lower = bollinger_bands(prices, 5, 2.0) +# middle es la SMA(5), upper/lower son middle +/- 2*std +``` + +## Notas + +Retorna tupla de tres listas (upper, middle, lower). Cada lista tiene len(data) - period + 1 elementos. +La desviacion estandar se calcula sobre la ventana de tamanio period (poblacional, no muestral). +Usa internamente la funcion sma para la banda media. diff --git a/python/functions/finance/ema.md b/python/functions/finance/ema.md new file mode 100644 index 00000000..ee192902 --- /dev/null +++ b/python/functions/finance/ema.md @@ -0,0 +1,34 @@ +--- +name: ema +kind: function +lang: py +domain: finance +version: "1.0.0" +purity: pure +signature: "def ema(data: list, period: int) -> list" +description: "Calcula la media movil exponencial (EMA) de una serie de precios." +tags: [finance, moving-average, exponential, indicator, python] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [] +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/finance/finance.py" +--- + +## Ejemplo + +```python +prices = [10, 11, 12, 13, 14, 15] +result = ema(prices, 3) +# [11.0, 11.5, 12.25, 13.125] +``` + +## Notas + +El primer valor de la EMA es el SMA del primer periodo. El multiplicador es 2 / (period + 1). +Retorna len(data) - period + 1 elementos. Lista vacia si period invalido. diff --git a/python/functions/finance/finance.py b/python/functions/finance/finance.py new file mode 100644 index 00000000..a4870d21 --- /dev/null +++ b/python/functions/finance/finance.py @@ -0,0 +1,137 @@ +"""Finance domain — pure functions for financial indicators and calculations.""" + +import math + + +def sma(data: list, period: int) -> list: + """Calcula la media movil simple (SMA) de una serie de precios.""" + if period <= 0 or period > len(data): + return [] + result = [] + for i in range(period - 1, len(data)): + window = data[i - period + 1 : i + 1] + result.append(sum(window) / period) + return result + + +def ema(data: list, period: int) -> list: + """Calcula la media movil exponencial (EMA) de una serie de precios.""" + if period <= 0 or period > len(data): + return [] + multiplier = 2.0 / (period + 1) + # Primer valor es SMA del primer periodo + first_sma = sum(data[:period]) / period + result = [first_sma] + for i in range(period, len(data)): + val = (data[i] - result[-1]) * multiplier + result[-1] + result.append(val) + return result + + +def rsi(data: list, period: int) -> list: + """Calcula el Relative Strength Index (RSI) de una serie de precios.""" + if period <= 0 or len(data) < period + 1: + return [] + deltas = [data[i] - data[i - 1] for i in range(1, len(data))] + gains = [d if d > 0 else 0.0 for d in deltas] + losses = [-d if d < 0 else 0.0 for d in deltas] + + avg_gain = sum(gains[:period]) / period + avg_loss = sum(losses[:period]) / period + + result = [] + if avg_loss == 0: + result.append(100.0) + else: + rs = avg_gain / avg_loss + result.append(100.0 - 100.0 / (1.0 + rs)) + + for i in range(period, len(deltas)): + avg_gain = (avg_gain * (period - 1) + gains[i]) / period + avg_loss = (avg_loss * (period - 1) + losses[i]) / period + if avg_loss == 0: + result.append(100.0) + else: + rs = avg_gain / avg_loss + result.append(100.0 - 100.0 / (1.0 + rs)) + + return result + + +def bollinger_bands(data: list, period: int, num_std: float) -> tuple: + """Calcula las Bandas de Bollinger (upper, middle, lower).""" + if period <= 0 or period > len(data): + return ([], [], []) + middle = sma(data, period) + upper = [] + lower = [] + for i in range(len(middle)): + window = data[i : i + period] + mean = middle[i] + variance = sum((x - mean) ** 2 for x in window) / period + std = math.sqrt(variance) + upper.append(mean + num_std * std) + lower.append(mean - num_std * std) + return (upper, middle, lower) + + +def sharpe_ratio(returns: list, risk_free_rate: float, periods_per_year: float) -> float: + """Calcula el Sharpe Ratio anualizado.""" + if len(returns) == 0 or periods_per_year <= 0: + return 0.0 + n = len(returns) + mean_return = sum(returns) / n + excess = mean_return - risk_free_rate / periods_per_year + variance = sum((r - mean_return) ** 2 for r in returns) / n + std = math.sqrt(variance) + if std == 0: + return 0.0 + return (excess / std) * math.sqrt(periods_per_year) + + +def max_drawdown(values: list) -> tuple: + """Calcula el max drawdown y los indices de inicio y fin.""" + if len(values) < 2: + return (0.0, 0, 0) + peak = values[0] + peak_idx = 0 + max_dd = 0.0 + dd_start = 0 + dd_end = 0 + for i in range(1, len(values)): + if values[i] > peak: + peak = values[i] + peak_idx = i + dd = (peak - values[i]) / peak if peak != 0 else 0.0 + if dd > max_dd: + max_dd = dd + dd_start = peak_idx + dd_end = i + return (max_dd, dd_start, dd_end) + + +def vwap(prices: list, volumes: list) -> float: + """Calcula el Volume-Weighted Average Price (VWAP).""" + if len(prices) == 0 or len(prices) != len(volumes): + return 0.0 + total_volume = sum(volumes) + if total_volume == 0: + return 0.0 + return sum(p * v for p, v in zip(prices, volumes)) / total_volume + + +def log_return(price_start: float, price_end: float) -> float: + """Calcula el retorno logaritmico entre dos precios.""" + if price_start <= 0 or price_end <= 0: + return 0.0 + return math.log(price_end / price_start) + + +def annualized_volatility(returns: list, periods_per_year: float) -> float: + """Calcula la volatilidad anualizada de una serie de retornos.""" + if len(returns) < 2 or periods_per_year <= 0: + return 0.0 + n = len(returns) + mean = sum(returns) / n + variance = sum((r - mean) ** 2 for r in returns) / (n - 1) + return math.sqrt(variance) * math.sqrt(periods_per_year) diff --git a/python/functions/finance/log_return.md b/python/functions/finance/log_return.md new file mode 100644 index 00000000..75ab1897 --- /dev/null +++ b/python/functions/finance/log_return.md @@ -0,0 +1,34 @@ +--- +name: log_return +kind: function +lang: py +domain: finance +version: "1.0.0" +purity: pure +signature: "def log_return(price_start: float, price_end: float) -> float" +description: "Calcula el retorno logaritmico entre dos precios." +tags: [finance, return, logarithmic, python] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [math] +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/finance/finance.py" +--- + +## Ejemplo + +```python +r = log_return(100.0, 110.0) +# 0.09531... (aprox 9.53%) +``` + +## Notas + +Formula: ln(price_end / price_start). +Retorna 0.0 si alguno de los precios es menor o igual a cero. +Los retornos logaritmicos son aditivos en el tiempo, a diferencia de los retornos simples. diff --git a/python/functions/finance/max_drawdown.md b/python/functions/finance/max_drawdown.md new file mode 100644 index 00000000..63a42294 --- /dev/null +++ b/python/functions/finance/max_drawdown.md @@ -0,0 +1,35 @@ +--- +name: max_drawdown +kind: function +lang: py +domain: finance +version: "1.0.0" +purity: pure +signature: "def max_drawdown(values: list) -> tuple" +description: "Calcula el maximo drawdown y los indices de inicio y fin del peor periodo." +tags: [finance, drawdown, risk, performance, python] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [] +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/finance/finance.py" +--- + +## Ejemplo + +```python +portfolio = [100, 110, 105, 95, 102, 108, 90, 95] +dd, start, end = max_drawdown(portfolio) +# dd = 0.1818..., start = 1, end = 6 (de 110 a 90) +``` + +## Notas + +Retorna tupla (max_dd, start_idx, end_idx) donde max_dd es fraccion (0.0 a 1.0). +start_idx es el indice del pico previo, end_idx es el indice del valle. +Retorna (0.0, 0, 0) si la lista tiene menos de 2 elementos. diff --git a/python/functions/finance/rsi.md b/python/functions/finance/rsi.md new file mode 100644 index 00000000..4a417831 --- /dev/null +++ b/python/functions/finance/rsi.md @@ -0,0 +1,36 @@ +--- +name: rsi +kind: function +lang: py +domain: finance +version: "1.0.0" +purity: pure +signature: "def rsi(data: list, period: int) -> list" +description: "Calcula el Relative Strength Index (RSI) de una serie de precios." +tags: [finance, rsi, momentum, indicator, python] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [] +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/finance/finance.py" +--- + +## Ejemplo + +```python +prices = [44, 44.34, 44.09, 43.61, 44.33, 44.83, 45.10, 45.42, 45.84, 46.08, + 45.89, 46.03, 45.61, 46.28, 46.28, 46.00, 46.03, 46.41, 46.22, 45.64] +result = rsi(prices, 14) +# Lista de valores RSI entre 0 y 100 +``` + +## Notas + +Usa el metodo de suavizado de Wilder (media exponencial modificada). +Requiere al menos period + 1 datos de entrada. Retorna len(data) - period valores. +RSI = 100 si no hay perdidas en el periodo (avg_loss == 0). diff --git a/python/functions/finance/sharpe_ratio.md b/python/functions/finance/sharpe_ratio.md new file mode 100644 index 00000000..e411fba4 --- /dev/null +++ b/python/functions/finance/sharpe_ratio.md @@ -0,0 +1,35 @@ +--- +name: sharpe_ratio +kind: function +lang: py +domain: finance +version: "1.0.0" +purity: pure +signature: "def sharpe_ratio(returns: list, risk_free_rate: float, periods_per_year: float) -> float" +description: "Calcula el Sharpe Ratio anualizado de una serie de retornos." +tags: [finance, sharpe, risk, performance, python] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [math] +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/finance/finance.py" +--- + +## Ejemplo + +```python +daily_returns = [0.01, -0.005, 0.008, 0.003, -0.002, 0.006, 0.004] +sr = sharpe_ratio(daily_returns, 0.02, 252.0) +# Sharpe ratio anualizado +``` + +## Notas + +risk_free_rate es la tasa anual (ej: 0.02 para 2%). Se convierte a tasa por periodo internamente. +periods_per_year indica la frecuencia de los retornos (252 para diarios, 12 para mensuales). +Retorna 0.0 si la desviacion estandar es cero o la lista esta vacia. diff --git a/python/functions/finance/sma.md b/python/functions/finance/sma.md new file mode 100644 index 00000000..9bc76159 --- /dev/null +++ b/python/functions/finance/sma.md @@ -0,0 +1,34 @@ +--- +name: sma +kind: function +lang: py +domain: finance +version: "1.0.0" +purity: pure +signature: "def sma(data: list, period: int) -> list" +description: "Calcula la media movil simple (SMA) de una serie de precios." +tags: [finance, moving-average, indicator, python] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [] +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/finance/finance.py" +--- + +## Ejemplo + +```python +prices = [10, 11, 12, 13, 14, 15] +result = sma(prices, 3) +# [11.0, 12.0, 13.0, 14.0] +``` + +## Notas + +Retorna lista mas corta que la entrada (len - period + 1 elementos). +Si period es mayor que len(data) o menor/igual a 0, retorna lista vacia. diff --git a/python/functions/finance/vwap.md b/python/functions/finance/vwap.md new file mode 100644 index 00000000..f8ef0656 --- /dev/null +++ b/python/functions/finance/vwap.md @@ -0,0 +1,35 @@ +--- +name: vwap +kind: function +lang: py +domain: finance +version: "1.0.0" +purity: pure +signature: "def vwap(prices: list, volumes: list) -> float" +description: "Calcula el Volume-Weighted Average Price (VWAP)." +tags: [finance, vwap, volume, indicator, python] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [] +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/finance/finance.py" +--- + +## Ejemplo + +```python +prices = [100.0, 101.0, 102.0, 101.5] +volumes = [1000, 1500, 1200, 800] +result = vwap(prices, volumes) +# 101.0888... +``` + +## Notas + +Formula: sum(price_i * volume_i) / sum(volume_i). +Retorna 0.0 si las listas estan vacias, tienen distinto tamanio, o el volumen total es cero.