Coverage for pybmc/sampling

1import numpy as np

4def coverage(percentiles, rndm_m, models_output, truth_column):

5 """

6 Calculates coverage percentages for credible intervals.

8 Args:

9 percentiles (list[int]): Percentiles to evaluate (e.g., `[5, 10, ..., 95]`).

10 rndm_m (numpy.ndarray): Posterior samples of predictions.

11 models_output (pandas.DataFrame): DataFrame containing true values.

12 truth_column (str): Name of column with true values.

14 Returns:

15 list[float]: Coverage percentages for each percentile.

16 """

17 # How often the model’s credible intervals actually contain the true value

18 data_total = len(rndm_m.T) # Number of data points

19 M_evals = len(rndm_m) # Number of samples

20 data_true = models_output[truth_column].tolist()

22 coverage_results = []

24 for p in percentiles:

25 count_covered = 0

26 for i in range(data_total):

27 # Sort model evaluations for the i-th data point

28 sorted_evals = np.sort(rndm_m.T[i])

29 # Find indices for lower and upper bounds of the credible interval

30 lower_idx = int((0.5 - p / 200) * M_evals)

31 upper_idx = int((0.5 + p / 200) * M_evals) - 1

32 # Check if the true value y[i] is within this interval

33 if sorted_evals[lower_idx] <= data_true[i] <= sorted_evals[upper_idx]:

34 count_covered += 1

35 coverage_results.append(count_covered / data_total * 100)

37 return coverage_results

40def rndm_m_random_calculator(filtered_model_predictions, samples, Vt_hat):

41 """

42 Generates posterior predictive samples and credible intervals.

44 Args:

45 filtered_model_predictions (numpy.ndarray): Model predictions.

46 samples (numpy.ndarray): Gibbs samples `[beta, sigma]`.

47 Vt_hat (numpy.ndarray): Normalized right singular vectors.

49 Returns:

50 tuple[numpy.ndarray, list[numpy.ndarray]]:

51 - `rndm_m` (numpy.ndarray): Posterior predictive samples.

52 - `[lower, median, upper]` (list[numpy.ndarray]): Credible interval arrays.

53 """

54 np.random.seed(142858)

55 rng = np.random.default_rng()

57 theta_rand_selected = rng.choice(samples, 10000, replace=False)

59 # Extract betas and noise std deviations

60 betas = theta_rand_selected[:, :-1] # shape: (10000, num_models - 1)

61 noise_stds = theta_rand_selected[:, -1] # shape: (10000,)

63 # Compute model weights: shape (10000, num_models)

64 default_weights = np.full(Vt_hat.shape[1], 1 / Vt_hat.shape[1])

65 model_weights_random = (

66 betas @ Vt_hat + default_weights

67 ) # broadcasting default_weights

69 # Generate noiseless predictions: shape (10000, num_data_points)

70 yvals_rand_radius = (

71 model_weights_random @ filtered_model_predictions.T

72 ) # dot product

74 # Add Gaussian noise with std = noise_stds (assume diagonal covariance)

75 # We'll use broadcasting: noise_stds[:, None] * standard normal noise

76 noise = rng.standard_normal(yvals_rand_radius.shape) * noise_stds[:, None]

77 rndm_m = yvals_rand_radius + noise

79 # Compute credible intervals

80 lower_radius = np.percentile(rndm_m, 2.5, axis=0)

81 median_radius = np.percentile(rndm_m, 50, axis=0)

82 upper_radius = np.percentile(rndm_m, 97.5, axis=0)

84 return rndm_m, [lower_radius, median_radius, upper_radius]

Coverage for pybmc/sampling_utils.py: 100%

31 statements