"""build_geo_scatter — prepare points for a geographic scatter (EDA `geospatial`). Pure function: no I/O, deterministic. Takes two parallel lists of latitudes and longitudes and returns the data a caller needs to draw a geographic scatter in an equirectangular projection: cleaned points in [lon, lat] order, a bounding box, a projection aspect ratio and a suggested axis padding. It NEVER draws anything (no matplotlib) — the chapter that consumes this output is responsible for the rendering. Reading is defensive throughout and the function NEVER raises: malformed pairs (None, NaN, infinity or out-of-range coordinates) are silently dropped and an empty/valid result is always returned. To keep the rendered PDF/PPTX light on phones, when the number of valid pairs exceeds `max_points` the points are down-sampled DETERMINISTICALLY by a fixed step (`pairs[::step]`), never randomly, so the result is reproducible. """ import math # Minimum axis padding (in degrees) so a single point or a zero-range cloud is # never drawn glued to the axis border (it would collapse to a line). _MIN_PAD = 0.01 # Aspect ratio clamp. 1/cos(lat) blows up near the poles; clamp keeps the render # sane (Tufte: do not let the projection stretch the cloud out of proportion). _ASPECT_MIN = 0.3 _ASPECT_MAX = 5.0 def _coord(value): """Coerce to a finite float defensively; return None for invalid coordinates. bool is a subclass of int, but a real latitude/longitude is never a bool, so True/False are treated as missing instead of coercing to 1.0/0.0. NaN and +/-infinity are never valid coordinates either. """ if value is None or isinstance(value, bool): return None try: coord = float(value) except (TypeError, ValueError): return None if math.isnan(coord) or math.isinf(coord): return None return coord def build_geo_scatter(lats: list, lons: list, max_points: int = 2000) -> dict: """Prepare the data for a geographic scatter in equirectangular projection. Pairs `lats` and `lons` by index, drops invalid pairs, optionally down-samples deterministically, and derives the geometry (bbox, aspect, pad) a caller needs to draw the cloud. No raw rendering is performed. Args: lats: List (or tuple) of latitudes in degrees. Paired by index with `lons`. A value that is None, NaN, infinite, bool or outside [-90, 90] discards that pair. Read defensively. lons: List (or tuple) of longitudes in degrees, parallel to `lats`. A value outside [-180, 180] (or None/NaN/inf/bool) discards that pair. max_points: Cap on the number of points returned. When the number of valid pairs exceeds this cap, the points are down-sampled by a fixed step `ceil(n_total / max_points)` taking `pairs[::step]` — DETERMINISTIC, not random, so the output is reproducible. A non-positive or non-int value disables down-sampling. Returns: Dict ready for a caller's ax.scatter: {points: [[lon, lat], ...] (x=lon, y=lat order), n_total: valid pairs before down-sampling, n_shown: points returned, downsampled: bool, bbox: {lat_min, lat_max, lon_min, lon_max} or None, aspect: 1/cos(centroid lat) clamped to [0.3, 5.0], pad: {lon, lat} ~5% of each range with a small floor}. When there are no valid pairs returns points=[], n_total=0, n_shown=0, downsampled=False, bbox=None, aspect=1.0, pad={lon:0.0, lat:0.0}. """ pairs = [] # each item is (lon, lat) — already in [x, y] order if isinstance(lats, (list, tuple)) and isinstance(lons, (list, tuple)): n = min(len(lats), len(lons)) for i in range(n): lat = _coord(lats[i]) lon = _coord(lons[i]) if lat is None or lon is None: continue if lat < -90.0 or lat > 90.0: continue if lon < -180.0 or lon > 180.0: continue pairs.append((lon, lat)) n_total = len(pairs) if n_total == 0: return { "points": [], "n_total": 0, "n_shown": 0, "downsampled": False, "bbox": None, "aspect": 1.0, "pad": {"lon": 0.0, "lat": 0.0}, } # Deterministic down-sampling by a fixed step. Reproducible: same input -> # same output, no randomness. if ( isinstance(max_points, int) and not isinstance(max_points, bool) and max_points > 0 and n_total > max_points ): step = math.ceil(n_total / max_points) sampled = pairs[::step] else: sampled = pairs points = [[lon, lat] for (lon, lat) in sampled] n_shown = len(points) downsampled = n_shown < n_total lons_s = [p[0] for p in sampled] lats_s = [p[1] for p in sampled] lon_min, lon_max = min(lons_s), max(lons_s) lat_min, lat_max = min(lats_s), max(lats_s) bbox = { "lat_min": lat_min, "lat_max": lat_max, "lon_min": lon_min, "lon_max": lon_max, } # Aspect for an equirectangular projection: stretch the x axis by 1/cos(lat) # at the cloud centroid so a degree of longitude reads at its real width. centroid_lat = sum(lats_s) / len(lats_s) cos_lat = math.cos(math.radians(centroid_lat)) if cos_lat < 1e-12: # centroid at (or numerically at) a pole aspect = _ASPECT_MAX else: aspect = 1.0 / cos_lat aspect = max(_ASPECT_MIN, min(_ASPECT_MAX, aspect)) # Padding ~5% of each range, with a small floor so a zero-range cloud (single # point / all identical) still gets a non-zero margin. pad_lon = max(0.05 * (lon_max - lon_min), _MIN_PAD) pad_lat = max(0.05 * (lat_max - lat_min), _MIN_PAD) return { "points": points, "n_total": n_total, "n_shown": n_shown, "downsampled": downsampled, "bbox": bbox, "aspect": aspect, "pad": {"lon": pad_lon, "lat": pad_lat}, }