agent_coding_eval/challenges.py

"""
challenges.py — Challenges de programación de nivel production.

Inspirados en funciones reales del fn_registry. Prueban:
- Programación funcional (pipe, compose, curry, combinators)
- Procesamiento de datos (coercion, parsing, normalization)
- Algoritmos no triviales (árboles, grafos, scheduling)
- Patterns del mundo real (retry, rate limiting, schema validation)
"""

from eval_runner import Challenge

# ══════════════════════════════════════════════════════════════
#  FUNCTIONAL PROGRAMMING
# ══════════════════════════════════════════════════════════════

FUNCTIONAL = [
    Challenge(
        id="fn_pipe",
        name="Pipe with error propagation",
        category="functional",
        difficulty="medium",
        prompt="""Write a Python function:

def pipe_safe(value, *fns):
    \"\"\"Pipe a value through functions left-to-right. If any function raises,
    return a tuple (None, error_string). On success return (result, None).\"\"\"

Example:
    pipe_safe(5, lambda x: x*2, lambda x: x+1) == (11, None)
    pipe_safe(0, lambda x: 10/x) == (None, "division by zero")  # or similar
""",
        test_code="""
# Success cases
assert pipe_safe(5, lambda x: x*2, lambda x: x+1) == (11, None)
assert pipe_safe("hello", str.upper, lambda s: s + "!") == ("HELLO!", None)
assert pipe_safe(42) == (42, None)  # no functions
assert pipe_safe([3,1,2], sorted, lambda x: x[0]) == (1, None)

# Error propagation
result, err = pipe_safe(0, lambda x: 10/x)
assert result is None
assert err is not None and "division" in err.lower()

result, err = pipe_safe("abc", lambda x: x*2, int)
assert result is None
assert err is not None

print("PASS: pipe_safe")
""",
    ),

    Challenge(
        id="fn_group_by_multi",
        name="Group by with transform",
        category="functional",
        difficulty="medium",
        prompt="""Write a Python function:

def group_by_transform(xs: list, key_fn, value_fn=None) -> dict:
    \"\"\"Group elements by key_fn. Optionally transform values with value_fn.
    If value_fn is None, store raw elements. Preserves insertion order within groups.\"\"\"

Example:
    group_by_transform(["hello", "hi", "bye"], lambda s: s[0])
    # => {"h": ["hello", "hi"], "b": ["bye"]}

    group_by_transform(["hello", "hi", "bye"], lambda s: s[0], str.upper)
    # => {"h": ["HELLO", "HI"], "b": ["BYE"]}
""",
        test_code="""
# Basic grouping
r = group_by_transform(["hello", "hi", "bye"], lambda s: s[0])
assert r == {"h": ["hello", "hi"], "b": ["bye"]}

# With value transform
r = group_by_transform(["hello", "hi", "bye"], lambda s: s[0], str.upper)
assert r == {"h": ["HELLO", "HI"], "b": ["BYE"]}

# Numbers
r = group_by_transform([1,2,3,4,5,6], lambda x: x % 2, lambda x: x**2)
assert r == {1: [1, 9, 25], 0: [4, 16, 36]}

# Empty
assert group_by_transform([], lambda x: x) == {}

# Single element
assert group_by_transform([42], lambda x: "k") == {"k": [42]}

print("PASS: group_by_transform")
""",
    ),

    Challenge(
        id="fn_memoize",
        name="Memoize decorator with max size",
        category="functional",
        difficulty="hard",
        prompt="""Write a Python function:

def memoize(max_size: int = 128):
    \"\"\"Decorator that memoizes function results. When cache exceeds max_size,
    evict the oldest entry (FIFO). The key is (args, tuple(sorted(kwargs.items()))).
    Must work with both positional and keyword arguments.\"\"\"

Usage:
    @memoize(max_size=3)
    def add(a, b):
        return a + b
""",
        test_code="""
call_count = 0

@memoize(max_size=3)
def expensive(x, y=0):
    global call_count
    call_count += 1
    return x + y

# First call — computes
call_count = 0
assert expensive(1, 2) == 3
assert call_count == 1

# Cached — no recompute
assert expensive(1, 2) == 3
assert call_count == 1

# Different args
assert expensive(3, 4) == 7
assert call_count == 2

# Kwargs
assert expensive(1, y=2) == 3
assert call_count == 2  # same as (1, 2) via kwargs

# Fill cache to max_size=3
assert expensive(10) == 10   # call 3
assert expensive(20) == 20   # call 4, evicts (1,2)
assert call_count == 4

# (1,2) was evicted, must recompute
assert expensive(1, 2) == 3
assert call_count == 5

print("PASS: memoize")
""",
        max_tokens=1500,
    ),

    Challenge(
        id="fn_compose_async",
        name="Partition with multiple predicates",
        category="functional",
        difficulty="medium",
        prompt="""Write a Python function:

def multi_partition(xs: list, *predicates) -> list[list]:
    \"\"\"Partition a list into N+1 buckets where N is the number of predicates.
    Each element goes into the bucket of the FIRST predicate it satisfies.
    Elements matching no predicate go into the last bucket.
    Returns list of N+1 lists. Does not mutate input.\"\"\"

Example:
    multi_partition([1,2,3,4,5,6,7,8,9,10],
                    lambda x: x % 3 == 0,
                    lambda x: x % 2 == 0)
    # => [[3,6,9], [2,4,8,10], [1,5,7]]
    # 6 goes to first bucket (div by 3) even though also div by 2
""",
        test_code="""
# Basic
r = multi_partition([1,2,3,4,5,6,7,8,9,10], lambda x: x%3==0, lambda x: x%2==0)
assert r == [[3,6,9], [2,4,8,10], [1,5,7]], f"got {r}"

# No predicates — everything in remainder
assert multi_partition([1,2,3]) == [[1,2,3]]

# One predicate
r = multi_partition(["a","bb","ccc"], lambda s: len(s) > 1)
assert r == [["bb","ccc"], ["a"]]

# All match first
r = multi_partition([2,4,6], lambda x: x%2==0, lambda x: x>0)
assert r == [[2,4,6], [], []]

# Empty
r = multi_partition([], lambda x: True)
assert r == [[], []]

print("PASS: multi_partition")
""",
    ),
]

# ══════════════════════════════════════════════════════════════
#  DATA PROCESSING
# ══════════════════════════════════════════════════════════════

DATA_PROCESSING = [
    Challenge(
        id="dp_coerce",
        name="Type coercion with schema",
        category="data_processing",
        difficulty="hard",
        prompt="""Write a Python function:

def coerce_types(data: dict, schema: dict[str, str]) -> tuple[dict, list[str]]:
    \"\"\"Coerce dict values to types specified in schema. Never mutate original.

    Schema maps field names to type strings: "int", "float", "str", "bool", "list[str]".

    Rules:
    - str → int: parse via float first (handle "3.0" → 3), warn if lossy ("3.7" → 3)
    - str → float: standard float()
    - str → bool: "true/1/yes" → True, "false/0/no" → False (case-insensitive)
    - str → list[str]: split by "," and strip whitespace from each item
    - Fields not in schema: pass through unchanged
    - Fields in schema but not in data: skip
    - Failed coercion: keep original value, add warning string to list

    Returns (new_dict, warnings_list).\"\"\"
""",
        test_code="""
# Basic coercions
d, w = coerce_types({"age": "25", "score": "3.14", "active": "yes"}, {"age": "int", "score": "float", "active": "bool"})
assert d == {"age": 25, "score": 3.14, "active": True}, f"got {d}"
assert w == []

# Lossy int coercion
d, w = coerce_types({"x": "3.7"}, {"x": "int"})
assert d["x"] == 3
assert len(w) == 1 and "lossy" in w[0].lower() or "3.7" in w[0]

# Bool variants
d, _ = coerce_types({"a": "TRUE", "b": "0", "c": "no"}, {"a": "bool", "b": "bool", "c": "bool"})
assert d == {"a": True, "b": False, "c": False}

# list[str]
d, _ = coerce_types({"tags": "a, b , c"}, {"tags": "list[str]"})
assert d == {"tags": ["a", "b", "c"]}

# Pass through unknown fields
d, _ = coerce_types({"name": "test", "age": "5"}, {"age": "int"})
assert d == {"name": "test", "age": 5}

# Failed coercion
d, w = coerce_types({"x": "not_a_number"}, {"x": "int"})
assert d["x"] == "not_a_number"  # kept original
assert len(w) == 1

# No mutation
original = {"x": "5"}
d, _ = coerce_types(original, {"x": "int"})
assert original["x"] == "5"
assert d["x"] == 5

print("PASS: coerce_types")
""",
        max_tokens=2048,
    ),

    Challenge(
        id="dp_frontmatter",
        name="Extract YAML frontmatter",
        category="data_processing",
        difficulty="medium",
        prompt="""Write a Python function:

def extract_frontmatter(content: str) -> tuple[str, dict | None]:
    \"\"\"Extract YAML-like frontmatter delimited by '---' from start of markdown.

    Frontmatter format:
    ---
    key: value
    another: something
    ---

    Rest of content here.

    Parse simple key:value pairs (no nested YAML needed). Values are always strings.
    Do NOT use the yaml library.

    Returns (content_without_frontmatter, parsed_dict_or_None).
    If no frontmatter found, return (original_content, None).\"\"\"
""",
        test_code="""
# Basic frontmatter
content = "---\\nname: test\\nversion: 1.0\\n---\\n\\nHello world"
body, meta = extract_frontmatter(content)
assert meta == {"name": "test", "version": "1.0"}, f"got {meta}"
assert body.strip() == "Hello world"

# No frontmatter
body, meta = extract_frontmatter("Just text")
assert meta is None
assert body == "Just text"

# Empty frontmatter
body, meta = extract_frontmatter("---\\n---\\nContent")
assert meta == {} or meta is not None
assert "Content" in body

# Values with colons
body, meta = extract_frontmatter("---\\nurl: http://example.com\\n---\\nBody")
assert meta["url"] == "http://example.com"

# Frontmatter must be at start
body, meta = extract_frontmatter("Some text\\n---\\nkey: val\\n---")
assert meta is None

print("PASS: extract_frontmatter")
""",
    ),

    Challenge(
        id="dp_json_extract",
        name="Extract JSON from LLM response",
        category="data_processing",
        difficulty="hard",
        prompt="""Write a Python function:

def extract_json_from_llm(content: str) -> dict:
    \"\"\"Extract and parse JSON from messy LLM responses.

    Must handle:
    1. JSON inside ```json ... ``` code blocks
    2. JSON inside ``` ... ``` blocks (no language tag)
    3. Raw JSON with surrounding text
    4. Trailing commas: {"a": 1,} or [1, 2,]
    5. Python None instead of null
    6. Single-quoted strings converted to double quotes

    Returns parsed dict. Returns empty dict {} on failure.
    Use only stdlib (json, re).\"\"\"
""",
        test_code="""
import json

# Clean JSON block
assert extract_json_from_llm('```json\\n{"name": "test"}\\n```') == {"name": "test"}

# Block without language tag
assert extract_json_from_llm('```\\n{"x": 1}\\n```') == {"x": 1}

# JSON with surrounding text
r = extract_json_from_llm('Here is the result: {"count": 42} hope that helps!')
assert r == {"count": 42}

# Trailing commas
assert extract_json_from_llm('{"a": 1, "b": 2,}') == {"a": 1, "b": 2}
assert extract_json_from_llm('[1, 2, 3,]') == {} or extract_json_from_llm('{"items": [1,2,]}') == {"items": [1, 2]}

# Python None → null
assert extract_json_from_llm('{"value": None}') == {"value": None}

# Garbage input
assert extract_json_from_llm("no json here at all") == {}
assert extract_json_from_llm("") == {}

print("PASS: extract_json_from_llm")
""",
        max_tokens=1500,
    ),

    Challenge(
        id="dp_smart_split",
        name="Smart text splitter with token budget",
        category="data_processing",
        difficulty="hard",
        prompt="""Write a Python function:

def smart_split(text: str, max_chars: int = 500, overlap: int = 50) -> list[str]:
    \"\"\"Split text into chunks respecting max_chars with overlap between chunks.

    Rules:
    - Split at paragraph boundaries (double newline) when possible
    - If a single paragraph exceeds max_chars, split at sentence boundaries (. ! ?)
    - If a single sentence exceeds max_chars, hard-cut at max_chars
    - Each chunk (except the first) starts with the last `overlap` characters of the previous chunk
    - Strip leading/trailing whitespace from each chunk
    - Never return empty chunks

    Returns list of string chunks.\"\"\"
""",
        test_code="""
# Simple paragraphs within budget
text = "First paragraph.\\n\\nSecond paragraph.\\n\\nThird paragraph."
chunks = smart_split(text, max_chars=100)
assert len(chunks) == 1
assert text.strip() in chunks[0]

# Force split between paragraphs
text = "A" * 100 + "\\n\\n" + "B" * 100
chunks = smart_split(text, max_chars=120, overlap=10)
assert len(chunks) >= 2
assert "A" * 100 in chunks[0]
assert "B" * 100 in chunks[-1]

# Overlap present
text = "Hello world this is text.\\n\\nAnother paragraph here."
chunks = smart_split(text, max_chars=30, overlap=5)
assert len(chunks) >= 2
for c in chunks:
    assert len(c.strip()) > 0  # no empty chunks

# Very long single paragraph splits at sentence
text = "Short sentence. " * 50  # ~850 chars
chunks = smart_split(text, max_chars=200, overlap=20)
assert all(len(c) <= 220 for c in chunks)  # max_chars + overlap tolerance

# Hard cut when no sentence boundary
text = "A" * 600
chunks = smart_split(text, max_chars=200, overlap=20)
assert len(chunks) >= 3
assert all(len(c) <= 220 for c in chunks)

# Empty/whitespace
assert smart_split("") == [] or smart_split("") == [""]
assert smart_split("   \\n\\n   ") == [] or len(smart_split("   \\n\\n   ")) <= 1

print("PASS: smart_split")
""",
        max_tokens=2048,
    ),
]

# ══════════════════════════════════════════════════════════════
#  ALGORITHMS
# ══════════════════════════════════════════════════════════════

ALGORITHMS = [
    Challenge(
        id="alg_topo_sort",
        name="Topological sort with cycle detection",
        category="algorithm",
        difficulty="hard",
        prompt="""Write a Python function:

def topo_sort(graph: dict[str, list[str]]) -> tuple[list[str], bool]:
    \"\"\"Topological sort of a directed acyclic graph using Kahn's algorithm.

    graph is adjacency list: {"a": ["b", "c"]} means a → b, a → c.
    Nodes with no edges should also be included.

    Returns (sorted_list, has_cycle).
    - If no cycle: (topologically_sorted_nodes, False)
    - If cycle detected: (partial_result, True)

    When multiple valid orderings exist, prefer lexicographic order.\"\"\"
""",
        test_code="""
# Simple DAG
order, cycle = topo_sort({"a": ["b", "c"], "b": ["d"], "c": ["d"], "d": []})
assert not cycle
assert order.index("a") < order.index("b")
assert order.index("a") < order.index("c")
assert order.index("b") < order.index("d")
assert order.index("c") < order.index("d")

# Lexicographic preference
order, cycle = topo_sort({"c": [], "b": [], "a": []})
assert not cycle
assert order == ["a", "b", "c"]

# Cycle detection
_, cycle = topo_sort({"a": ["b"], "b": ["c"], "c": ["a"]})
assert cycle

# Single node
order, cycle = topo_sort({"x": []})
assert order == ["x"]
assert not cycle

# Empty graph
order, cycle = topo_sort({})
assert order == []
assert not cycle

# Linear chain
order, cycle = topo_sort({"a": ["b"], "b": ["c"], "c": []})
assert order == ["a", "b", "c"]
assert not cycle

print("PASS: topo_sort")
""",
    ),

    Challenge(
        id="alg_interval_merge",
        name="Interval scheduler with priorities",
        category="algorithm",
        difficulty="hard",
        prompt="""Write a Python function:

def schedule_intervals(intervals: list[dict]) -> list[dict]:
    \"\"\"Schedule non-overlapping intervals maximizing total priority.

    Each interval is {"id": str, "start": int, "end": int, "priority": int}.
    Intervals are half-open: [start, end). Two intervals [1,3) and [3,5) do NOT overlap.

    Use weighted interval scheduling (dynamic programming).
    Returns list of selected intervals sorted by start time.\"\"\"
""",
        test_code="""
# Basic: pick higher priority
result = schedule_intervals([
    {"id": "a", "start": 0, "end": 3, "priority": 2},
    {"id": "b", "start": 1, "end": 4, "priority": 5},
    {"id": "c", "start": 3, "end": 6, "priority": 3},
])
ids = [r["id"] for r in result]
assert "b" in ids  # highest single priority
# b conflicts with a and c's start, so either [b] (5) or [a,c] (5) is valid
total = sum(r["priority"] for r in result)
assert total == 5, f"got total={total}"

# Non-overlapping, take all
result = schedule_intervals([
    {"id": "a", "start": 0, "end": 2, "priority": 3},
    {"id": "b", "start": 2, "end": 4, "priority": 3},
    {"id": "c", "start": 4, "end": 6, "priority": 3},
])
assert len(result) == 3
assert sum(r["priority"] for r in result) == 9

# Empty
assert schedule_intervals([]) == []

# Single
result = schedule_intervals([{"id": "x", "start": 0, "end": 10, "priority": 7}])
assert len(result) == 1 and result[0]["id"] == "x"

# Prefer two small over one big
result = schedule_intervals([
    {"id": "big", "start": 0, "end": 10, "priority": 5},
    {"id": "s1", "start": 0, "end": 5, "priority": 3},
    {"id": "s2", "start": 5, "end": 10, "priority": 3},
])
total = sum(r["priority"] for r in result)
assert total == 6  # s1 + s2 beats big

# Result sorted by start
for i in range(len(result) - 1):
    assert result[i]["start"] <= result[i+1]["start"]

print("PASS: schedule_intervals")
""",
        max_tokens=2048,
    ),

    Challenge(
        id="alg_tree_ops",
        name="Tree operations suite",
        category="algorithm",
        difficulty="expert",
        prompt="""Write three Python functions for tree manipulation:

1. def flatten_tree(tree: dict) -> list[dict]:
   \"\"\"Flatten nested tree to list. Each node is a dict with optional 'children' key.
   DFS pre-order. Remove 'children' key from output nodes. Deep copy nodes.\"\"\"

2. def find_path(tree: dict, target_id: str) -> list[str] | None:
   \"\"\"Find path from root to node with given 'id' field. Returns list of ids
   from root to target (inclusive), or None if not found.\"\"\"

3. def map_tree(tree: dict, fn) -> dict:
   \"\"\"Apply fn to each node (excluding 'children' key), return new tree with
   same structure. fn receives a dict without 'children' and returns a new dict.
   Must not mutate original.\"\"\"
""",
        test_code="""
import copy

tree = {
    "id": "root", "name": "Root",
    "children": [
        {"id": "a", "name": "A", "children": [
            {"id": "a1", "name": "A1"},
            {"id": "a2", "name": "A2"},
        ]},
        {"id": "b", "name": "B"},
    ]
}
original = copy.deepcopy(tree)

# flatten_tree
flat = flatten_tree(tree)
ids = [n["id"] for n in flat]
assert ids == ["root", "a", "a1", "a2", "b"], f"got {ids}"
assert all("children" not in n for n in flat)
assert tree == original  # no mutation

# find_path
assert find_path(tree, "a2") == ["root", "a", "a2"]
assert find_path(tree, "root") == ["root"]
assert find_path(tree, "b") == ["root", "b"]
assert find_path(tree, "nonexistent") is None

# map_tree
result = map_tree(tree, lambda n: {**n, "name": n["name"].lower()})
assert result["name"] == "root"
assert result["children"][0]["name"] == "a"
assert result["children"][0]["children"][0]["name"] == "a1"
assert tree == original  # no mutation
assert result["id"] == "root"

# Edge: leaf node
leaf = {"id": "solo", "val": 1}
flat = flatten_tree(leaf)
assert flat == [{"id": "solo", "val": 1}]
assert find_path(leaf, "solo") == ["solo"]

print("PASS: tree_ops")
""",
        max_tokens=2048,
    ),
]

# ══════════════════════════════════════════════════════════════
#  REAL-WORLD PATTERNS
# ══════════════════════════════════════════════════════════════

REAL_WORLD = [
    Challenge(
        id="rw_retry",
        name="Retry with exponential backoff",
        category="real_world",
        difficulty="hard",
        prompt="""Write a Python function:

def compute_backoff_delays(max_retries: int, base_delay: float = 1.0,
                           max_delay: float = 60.0, jitter: bool = False) -> list[float]:
    \"\"\"Compute the sequence of backoff delays for retry logic.

    Formula: delay = min(base_delay * 2^attempt, max_delay)
    If jitter=True, multiply each delay by a factor between 0.5 and 1.0
    (use deterministic half-jitter: factor = 0.75 for testability).

    attempt starts at 0.
    Returns list of `max_retries` delay values.\"\"\"

Also write:

def classify_error(status_code: int) -> str:
    \"\"\"Classify HTTP status code for retry decisions.
    Returns: 'permanent' (4xx except 429), 'transient' (5xx, 429, 408), or 'success' (2xx).
    Any other code returns 'unknown'.\"\"\"
""",
        test_code="""
# Basic exponential backoff
delays = compute_backoff_delays(5, base_delay=1.0, max_delay=60.0)
assert delays == [1.0, 2.0, 4.0, 8.0, 16.0], f"got {delays}"

# Capped at max_delay
delays = compute_backoff_delays(4, base_delay=10.0, max_delay=30.0)
assert delays == [10.0, 20.0, 30.0, 30.0], f"got {delays}"

# With jitter (deterministic 0.75 factor)
delays = compute_backoff_delays(3, base_delay=4.0, jitter=True)
assert delays == [3.0, 6.0, 12.0], f"got {delays}"

# Zero retries
assert compute_backoff_delays(0) == []

# Error classification
assert classify_error(200) == "success"
assert classify_error(201) == "success"
assert classify_error(400) == "permanent"
assert classify_error(403) == "permanent"
assert classify_error(404) == "permanent"
assert classify_error(429) == "transient"  # rate limit
assert classify_error(408) == "transient"  # timeout
assert classify_error(500) == "transient"
assert classify_error(503) == "transient"
assert classify_error(100) == "unknown"
assert classify_error(302) == "unknown"

print("PASS: retry_backoff")
""",
    ),

    Challenge(
        id="rw_schema_validate",
        name="Schema validator for dicts",
        category="real_world",
        difficulty="expert",
        prompt="""Write a Python function:

def validate(data: dict, schema: dict) -> list[str]:
    \"\"\"Validate a dict against a schema. Return list of error strings (empty = valid).

    Schema format — each key maps to a rule dict:
    {
        "field_name": {
            "type": "str" | "int" | "float" | "bool" | "list" | "dict",
            "required": True | False,  # default False
            "min": number,             # minimum value (for int/float) or min length (for str/list)
            "max": number,             # maximum value or max length
            "choices": [...],          # allowed values
            "pattern": "regex",        # regex pattern (for str only)
        }
    }

    Error messages should be descriptive: "field_name: expected type str, got int"
    Check in order: required → type → min/max → choices → pattern.\"\"\"
""",
        test_code="""
import re

schema = {
    "name": {"type": "str", "required": True, "min": 1, "max": 50},
    "age": {"type": "int", "required": True, "min": 0, "max": 150},
    "email": {"type": "str", "pattern": r".+@.+\\..+"},
    "role": {"type": "str", "choices": ["admin", "user", "guest"]},
    "tags": {"type": "list", "max": 5},
}

# Valid data
errors = validate({"name": "Alice", "age": 30, "email": "a@b.com", "role": "admin", "tags": ["a"]}, schema)
assert errors == [], f"got {errors}"

# Missing required
errors = validate({"age": 25}, schema)
assert any("name" in e and "required" in e.lower() for e in errors), f"got {errors}"

# Wrong type
errors = validate({"name": 123, "age": 25}, schema)
assert any("name" in e and "type" in e.lower() for e in errors)

# Min/max violation
errors = validate({"name": "", "age": 25}, schema)
assert any("name" in e for e in errors)  # min length 1

errors = validate({"name": "Bob", "age": -5}, schema)
assert any("age" in e for e in errors)  # min 0

# Invalid choice
errors = validate({"name": "X", "age": 1, "role": "superuser"}, schema)
assert any("role" in e and "choices" in e.lower() for e in errors)

# Pattern mismatch
errors = validate({"name": "X", "age": 1, "email": "invalid"}, schema)
assert any("email" in e and "pattern" in e.lower() for e in errors)

# Extra fields ignored (no error)
errors = validate({"name": "X", "age": 1, "extra": "ok"}, schema)
assert not any("extra" in e for e in errors)

# Optional missing is fine
errors = validate({"name": "Test", "age": 50}, schema)
assert not any("email" in e for e in errors)

print("PASS: schema_validate")
""",
        max_tokens=2500,
    ),

    Challenge(
        id="rw_rate_limiter",
        name="Token bucket rate limiter",
        category="real_world",
        difficulty="expert",
        prompt="""Write a Python class:

class TokenBucket:
    \"\"\"Token bucket rate limiter (non-threaded, for testing).

    Args:
        capacity: Maximum tokens in bucket.
        refill_rate: Tokens added per second.

    Methods:
        consume(tokens: int = 1, current_time: float = None) -> bool:
            Try to consume tokens. Returns True if allowed, False if not enough tokens.
            current_time is injectable for testing (defaults to time.time()).
            Before checking, refill based on elapsed time since last refill.

        tokens_available(current_time: float = None) -> float:
            Return current token count after refill.

        wait_time(tokens: int = 1, current_time: float = None) -> float:
            Return seconds to wait before `tokens` would be available.
            Returns 0.0 if tokens are already available.
    \"\"\"
""",
        test_code="""
# Basic usage
bucket = TokenBucket(capacity=10, refill_rate=1.0)

# Starts full
assert bucket.tokens_available(current_time=0) == 10

# Consume some
assert bucket.consume(3, current_time=0) == True
assert bucket.tokens_available(current_time=0) == 7

# Consume more than available
assert bucket.consume(8, current_time=0) == False
assert bucket.tokens_available(current_time=0) == 7  # unchanged

# Refill over time
assert bucket.tokens_available(current_time=2) == 9  # 7 + 2*1.0

# Consume after refill
assert bucket.consume(9, current_time=2) == True
assert bucket.tokens_available(current_time=2) == 0

# Don't exceed capacity
assert bucket.tokens_available(current_time=100) == 10  # capped at capacity

# Wait time
bucket2 = TokenBucket(capacity=5, refill_rate=2.0)
bucket2.consume(5, current_time=0)
assert bucket2.tokens_available(current_time=0) == 0
wt = bucket2.wait_time(4, current_time=0)
assert abs(wt - 2.0) < 0.01  # need 4 tokens at 2/s = 2s

# Already available
bucket3 = TokenBucket(capacity=10, refill_rate=1.0)
assert bucket3.wait_time(5, current_time=0) == 0.0

# Consume more than capacity
assert bucket3.consume(11, current_time=0) == False

print("PASS: token_bucket")
""",
        max_tokens=1500,
    ),

    Challenge(
        id="rw_diff",
        name="Simple line differ",
        category="real_world",
        difficulty="expert",
        prompt="""Write a Python function:

def line_diff(old: str, new: str) -> list[str]:
    \"\"\"Compute line-by-line diff between old and new text.

    Returns list of diff lines:
    - Lines only in old: prefixed with "- "
    - Lines only in new: prefixed with "+ "
    - Common lines: prefixed with "  " (two spaces)

    Use longest common subsequence (LCS) to produce minimal diff.
    Split input on newlines. Empty string = no lines.\"\"\"
""",
        test_code="""
# No changes
result = line_diff("a\\nb\\nc", "a\\nb\\nc")
assert result == ["  a", "  b", "  c"]

# Addition
result = line_diff("a\\nc", "a\\nb\\nc")
assert result == ["  a", "+ b", "  c"], f"got {result}"

# Deletion
result = line_diff("a\\nb\\nc", "a\\nc")
assert result == ["  a", "- b", "  c"], f"got {result}"

# Replacement
result = line_diff("a\\nb\\nc", "a\\nX\\nc")
assert result == ["  a", "- b", "+ X", "  c"], f"got {result}"

# Complete change
result = line_diff("a\\nb", "c\\nd")
assert result == ["- a", "- b", "+ c", "+ d"]

# Empty inputs
assert line_diff("", "") == []
assert line_diff("a", "") == ["- a"]
assert line_diff("", "a") == ["+ a"]

# Multiple additions and deletions
result = line_diff("a\\nb\\nc\\nd", "a\\nc\\nd\\ne")
assert "- b" in result
assert "+ e" in result
assert "  a" in result
assert "  c" in result
assert "  d" in result

print("PASS: line_diff")
""",
        max_tokens=2048,
    ),
]


# ── Todos ─────────────────────────────────────────────────

ALL_CHALLENGES = FUNCTIONAL + DATA_PROCESSING + ALGORITHMS + REAL_WORLD