Coverage for pybmc/sampling_utils.py: 100%
31 statements
« prev ^ index » next coverage.py v7.10.0, created at 2025-07-27 15:48 +0000
« prev ^ index » next coverage.py v7.10.0, created at 2025-07-27 15:48 +0000
1import numpy as np
4def coverage(percentiles, rndm_m, models_output, truth_column):
5 """
6 Calculates coverage percentages for credible intervals.
8 Args:
9 percentiles (list[int]): Percentiles to evaluate (e.g., `[5, 10, ..., 95]`).
10 rndm_m (numpy.ndarray): Posterior samples of predictions.
11 models_output (pandas.DataFrame): DataFrame containing true values.
12 truth_column (str): Name of column with true values.
14 Returns:
15 list[float]: Coverage percentages for each percentile.
16 """
17 # How often the model’s credible intervals actually contain the true value
18 data_total = len(rndm_m.T) # Number of data points
19 M_evals = len(rndm_m) # Number of samples
20 data_true = models_output[truth_column].tolist()
22 coverage_results = []
24 for p in percentiles:
25 count_covered = 0
26 for i in range(data_total):
27 # Sort model evaluations for the i-th data point
28 sorted_evals = np.sort(rndm_m.T[i])
29 # Find indices for lower and upper bounds of the credible interval
30 lower_idx = int((0.5 - p / 200) * M_evals)
31 upper_idx = int((0.5 + p / 200) * M_evals) - 1
32 # Check if the true value y[i] is within this interval
33 if sorted_evals[lower_idx] <= data_true[i] <= sorted_evals[upper_idx]:
34 count_covered += 1
35 coverage_results.append(count_covered / data_total * 100)
37 return coverage_results
40def rndm_m_random_calculator(filtered_model_predictions, samples, Vt_hat):
41 """
42 Generates posterior predictive samples and credible intervals.
44 Args:
45 filtered_model_predictions (numpy.ndarray): Model predictions.
46 samples (numpy.ndarray): Gibbs samples `[beta, sigma]`.
47 Vt_hat (numpy.ndarray): Normalized right singular vectors.
49 Returns:
50 tuple[numpy.ndarray, list[numpy.ndarray]]:
51 - `rndm_m` (numpy.ndarray): Posterior predictive samples.
52 - `[lower, median, upper]` (list[numpy.ndarray]): Credible interval arrays.
53 """
54 np.random.seed(142858)
55 rng = np.random.default_rng()
57 theta_rand_selected = rng.choice(samples, 10000, replace=False)
59 # Extract betas and noise std deviations
60 betas = theta_rand_selected[:, :-1] # shape: (10000, num_models - 1)
61 noise_stds = theta_rand_selected[:, -1] # shape: (10000,)
63 # Compute model weights: shape (10000, num_models)
64 default_weights = np.full(Vt_hat.shape[1], 1 / Vt_hat.shape[1])
65 model_weights_random = (
66 betas @ Vt_hat + default_weights
67 ) # broadcasting default_weights
69 # Generate noiseless predictions: shape (10000, num_data_points)
70 yvals_rand_radius = (
71 model_weights_random @ filtered_model_predictions.T
72 ) # dot product
74 # Add Gaussian noise with std = noise_stds (assume diagonal covariance)
75 # We'll use broadcasting: noise_stds[:, None] * standard normal noise
76 noise = rng.standard_normal(yvals_rand_radius.shape) * noise_stds[:, None]
77 rndm_m = yvals_rand_radius + noise
79 # Compute credible intervals
80 lower_radius = np.percentile(rndm_m, 2.5, axis=0)
81 median_radius = np.percentile(rndm_m, 50, axis=0)
82 upper_radius = np.percentile(rndm_m, 97.5, axis=0)
84 return rndm_m, [lower_radius, median_radius, upper_radius]