"""Tests para missingness_overview.""" import sys import os import pytest sys.path.insert(0, os.path.dirname(__file__)) from missingness_overview import missingness_overview # Output contract: every call returns exactly these 9 keys. EXPECTED_KEYS = { "n_rows", "n_cols", "n_cols_with_null", "n_missing_cells", "missing_cell_pct", "complete_rows", "incomplete_rows", "complete_pct", "incomplete_pct", } def test_cooccurrence_three_cols_exact(): # 3 columns, 5 rows. Hand-computed expectations: # col a missing at rows 0, 4 -> 2 # col b missing at rows 0, 2 -> 2 # col c missing at row 4 -> 1 # n_missing_cells = 5, total_cells = 5*3 = 15 -> 33.333...% # row 0 (a&b co-occur) -> incomplete # row 1 (all present) -> complete # row 2 (b only) -> incomplete # row 3 (all present) -> complete # row 4 (a&c co-occur) -> incomplete mask = { "a": [1, 0, 0, 0, 1], "b": [1, 0, 1, 0, 0], "c": [0, 0, 0, 0, 1], } out = missingness_overview(mask) assert out["n_rows"] == 5 assert out["n_cols"] == 3 assert out["n_cols_with_null"] == 3 assert out["n_missing_cells"] == 5 assert out["missing_cell_pct"] == pytest.approx(33.33333333, abs=1e-6) assert out["complete_rows"] == 2 assert out["incomplete_rows"] == 3 assert out["complete_pct"] == pytest.approx(40.0) assert out["incomplete_pct"] == pytest.approx(60.0) def test_empty_dict_all_zero(): out = missingness_overview({}) assert out == { "n_rows": 0, "n_cols": 0, "n_cols_with_null": 0, "n_missing_cells": 0, "missing_cell_pct": 0.0, "complete_rows": 0, "incomplete_rows": 0, "complete_pct": 0.0, "incomplete_pct": 0.0, } def test_output_keys_contract(): # The 9-key contract holds even for the garbage/zero path. assert set(missingness_overview({}).keys()) == EXPECTED_KEYS assert set(missingness_overview({"a": [1, 0]}).keys()) == EXPECTED_KEYS def test_not_a_dict_returns_zero(): for bad in (None, [1, 0, 1], 42, "nope", 3.14): out = missingness_overview(bad) assert out["n_rows"] == 0 assert out["n_cols"] == 0 assert out["n_missing_cells"] == 0 assert out["missing_cell_pct"] == 0.0 def test_no_nulls_all_complete(): mask = {"a": [0, 0, 0], "b": [0, 0, 0]} out = missingness_overview(mask) assert out["n_rows"] == 3 assert out["n_cols"] == 2 assert out["n_cols_with_null"] == 0 assert out["n_missing_cells"] == 0 assert out["missing_cell_pct"] == 0.0 assert out["complete_rows"] == 3 assert out["incomplete_rows"] == 0 assert out["complete_pct"] == pytest.approx(100.0) assert out["incomplete_pct"] == pytest.approx(0.0) def test_none_values_treated_as_present(): # None and other non-1 values count as present (0). mask = {"a": [None, 1, None, "x", 0]} out = missingness_overview(mask) assert out["n_rows"] == 5 assert out["n_cols"] == 1 assert out["n_missing_cells"] == 1 # only the explicit 1 at row 1 assert out["n_cols_with_null"] == 1 assert out["complete_rows"] == 4 assert out["incomplete_rows"] == 1 def test_unequal_lengths_pad_with_max(): # Ragged lists: n_rows = max length; shorter column padded as present. # a = [1, 1] -> missing at rows 0, 1 # b = [0] -> row 1 padded to present # n_rows = 2, n_cols = 2, total_cells = 4, n_missing_cells = 2 -> 50% mask = {"a": [1, 1], "b": [0]} out = missingness_overview(mask) assert out["n_rows"] == 2 assert out["n_cols"] == 2 assert out["n_cols_with_null"] == 1 assert out["n_missing_cells"] == 2 assert out["missing_cell_pct"] == pytest.approx(50.0) assert out["complete_rows"] == 0 assert out["incomplete_rows"] == 2 assert out["incomplete_pct"] == pytest.approx(100.0) def test_columns_present_but_no_rows(): # Columns exist but all empty -> zero metrics, n_cols preserved. out = missingness_overview({"a": [], "b": []}) assert out["n_rows"] == 0 assert out["n_cols"] == 2 assert out["n_missing_cells"] == 0 assert out["missing_cell_pct"] == 0.0 assert out["complete_pct"] == 0.0 def test_never_raises_on_garbage(): # Non-list column values, mixed junk -> must not raise. mask = {"a": "not a list", "b": 123, "c": [1, 0, 1]} out = missingness_overview(mask) assert set(out.keys()) == EXPECTED_KEYS assert out["n_rows"] == 3 assert out["n_cols"] == 3 assert out["n_missing_cells"] == 2 # only col c contributes assert out["n_cols_with_null"] == 1