# Defensive programming demo (run-all ready)

Small notebook to show run-all hygiene, schema checks, and logging on a toy patient intake sheet. Use it in class to demonstrate how guardrails turn silent failures into actionable errors.


In [None]:
from __future__ import annotations

import logging
from pathlib import Path

import pandas as pd

logging.basicConfig(level=logging.INFO, format="%(levelname)s:%(message)s")

DEMO_DIR = Path(globals().get("__file__", ".")).resolve().parent
DATA_DIR = DEMO_DIR / "data"

REQUIRED_COLUMNS = [
    "patient_id",
    "weight_kg",
    "height_cm",
    "age",
    "sex",
]

BOUNDS = {
    "weight_kg": (30, 250),
    "height_cm": (120, 230),
    "age": (0, 110),
}

logging.info("Data directory: %s", DATA_DIR)


## Helpers: load, validate, and enrich

These helpers keep the notebook run-all ready by validating the input before doing any work.


In [None]:
def load_intake_data(csv_path: Path) -> pd.DataFrame:
    """Load the CSV and ensure the required columns exist."""
    if not csv_path.exists():
        raise FileNotFoundError(f"Missing input: {csv_path}")

    df = pd.read_csv(csv_path)
    missing = [col for col in REQUIRED_COLUMNS if col not in df.columns]
    if missing:
        raise ValueError(f"Missing columns: {missing}")

    return df[REQUIRED_COLUMNS].copy()


def validate_values(df: pd.DataFrame) -> pd.DataFrame:
    """Catch obvious data issues before computing BMI."""
    df = df.copy()

    numeric_cols = ["weight_kg", "height_cm", "age"]
    df[numeric_cols] = df[numeric_cols].apply(pd.to_numeric, errors="coerce")

    if df[numeric_cols].isna().any().any():
        raise ValueError("Non-numeric values found in numeric columns")

    for col, (lower, upper) in BOUNDS.items():
        out_of_bounds = ~df[col].between(lower, upper)
        if out_of_bounds.any():
            bad_rows = df.loc[out_of_bounds, ["patient_id", col]]
            raise ValueError(f"Column {col} failed bounds check:
{bad_rows}")

    df["height_m"] = df["height_cm"] / 100
    return df


def add_bmi_columns(df: pd.DataFrame) -> pd.DataFrame:
    """Compute BMI and add a simple category with pure functions."""
    df = df.copy()
    df["bmi"] = (df["weight_kg"] / (df["height_m"] ** 2)).round(1)
    df["bmi_category"] = pd.cut(
        df["bmi"],
        bins=[0, 18.5, 25, 30, float("inf")],
        labels=["Underweight", "Normal", "Overweight", "Obese"],
        right=False,
    )
    return df


## Happy path: pass schema + bounds and compute BMI

Run-all should be boring. We log progress and show the enriched DataFrame.


In [None]:
good_path = DATA_DIR / "patient_intake.csv"
logging.info("Loading %s", good_path.name)

intake = load_intake_data(good_path)
validated = validate_values(intake)
hardened = add_bmi_columns(validated)

hardened[["patient_id", "bmi", "bmi_category"]]


## Failure modes: missing columns and impossible values

Intentionally run the same pipeline on messy inputs to show how defensive checks fail fast.


In [None]:
for messy_name in [
    "patient_intake_missing_height.csv",
    "patient_intake_bad_values.csv",
]:
    path = DATA_DIR / messy_name
    logging.info("
Trying %s", path.name)
    try:
        df = load_intake_data(path)
        df = validate_values(df)
        _ = add_bmi_columns(df)
    except Exception as err:  # noqa: BLE001
        logging.error("Run-all guard tripped for %s: %s", path.name, err)
