Coverage for pybmc/sampling_utils.py: 100%

31 statements  

« prev     ^ index     » next       coverage.py v7.10.0, created at 2025-07-27 15:48 +0000

1import numpy as np 

2 

3 

4def coverage(percentiles, rndm_m, models_output, truth_column): 

5 """ 

6 Calculates coverage percentages for credible intervals. 

7 

8 Args: 

9 percentiles (list[int]): Percentiles to evaluate (e.g., `[5, 10, ..., 95]`). 

10 rndm_m (numpy.ndarray): Posterior samples of predictions. 

11 models_output (pandas.DataFrame): DataFrame containing true values. 

12 truth_column (str): Name of column with true values. 

13 

14 Returns: 

15 list[float]: Coverage percentages for each percentile. 

16 """ 

17 # How often the model’s credible intervals actually contain the true value 

18 data_total = len(rndm_m.T) # Number of data points 

19 M_evals = len(rndm_m) # Number of samples 

20 data_true = models_output[truth_column].tolist() 

21 

22 coverage_results = [] 

23 

24 for p in percentiles: 

25 count_covered = 0 

26 for i in range(data_total): 

27 # Sort model evaluations for the i-th data point 

28 sorted_evals = np.sort(rndm_m.T[i]) 

29 # Find indices for lower and upper bounds of the credible interval 

30 lower_idx = int((0.5 - p / 200) * M_evals) 

31 upper_idx = int((0.5 + p / 200) * M_evals) - 1 

32 # Check if the true value y[i] is within this interval 

33 if sorted_evals[lower_idx] <= data_true[i] <= sorted_evals[upper_idx]: 

34 count_covered += 1 

35 coverage_results.append(count_covered / data_total * 100) 

36 

37 return coverage_results 

38 

39 

40def rndm_m_random_calculator(filtered_model_predictions, samples, Vt_hat): 

41 """ 

42 Generates posterior predictive samples and credible intervals. 

43 

44 Args: 

45 filtered_model_predictions (numpy.ndarray): Model predictions. 

46 samples (numpy.ndarray): Gibbs samples `[beta, sigma]`. 

47 Vt_hat (numpy.ndarray): Normalized right singular vectors. 

48 

49 Returns: 

50 tuple[numpy.ndarray, list[numpy.ndarray]]: 

51 - `rndm_m` (numpy.ndarray): Posterior predictive samples. 

52 - `[lower, median, upper]` (list[numpy.ndarray]): Credible interval arrays. 

53 """ 

54 np.random.seed(142858) 

55 rng = np.random.default_rng() 

56 

57 theta_rand_selected = rng.choice(samples, 10000, replace=False) 

58 

59 # Extract betas and noise std deviations 

60 betas = theta_rand_selected[:, :-1] # shape: (10000, num_models - 1) 

61 noise_stds = theta_rand_selected[:, -1] # shape: (10000,) 

62 

63 # Compute model weights: shape (10000, num_models) 

64 default_weights = np.full(Vt_hat.shape[1], 1 / Vt_hat.shape[1]) 

65 model_weights_random = ( 

66 betas @ Vt_hat + default_weights 

67 ) # broadcasting default_weights 

68 

69 # Generate noiseless predictions: shape (10000, num_data_points) 

70 yvals_rand_radius = ( 

71 model_weights_random @ filtered_model_predictions.T 

72 ) # dot product 

73 

74 # Add Gaussian noise with std = noise_stds (assume diagonal covariance) 

75 # We'll use broadcasting: noise_stds[:, None] * standard normal noise 

76 noise = rng.standard_normal(yvals_rand_radius.shape) * noise_stds[:, None] 

77 rndm_m = yvals_rand_radius + noise 

78 

79 # Compute credible intervals 

80 lower_radius = np.percentile(rndm_m, 2.5, axis=0) 

81 median_radius = np.percentile(rndm_m, 50, axis=0) 

82 upper_radius = np.percentile(rndm_m, 97.5, axis=0) 

83 

84 return rndm_m, [lower_radius, median_radius, upper_radius]