From 6355f33ab9c1d6e58befc5a05659a660a533e8dd Mon Sep 17 00:00:00 2001 From: Maria Fernanda Morales <65073126+mfmo45@users.noreply.github.com> Date: Wed, 24 Apr 2024 16:41:47 +0200 Subject: [PATCH] Added ToDos to SequentialDesign --- .../surrogate_models/sequential_design.py | 82 ++++++++++++++----- 1 file changed, 63 insertions(+), 19 deletions(-) diff --git a/src/bayesvalidrox/surrogate_models/sequential_design.py b/src/bayesvalidrox/surrogate_models/sequential_design.py index 322ebbbce..abe3f1a13 100644 --- a/src/bayesvalidrox/surrogate_models/sequential_design.py +++ b/src/bayesvalidrox/surrogate_models/sequential_design.py @@ -209,12 +209,13 @@ class SequentialDesign: return Xnew, None - # Generate needed Exploration class - explore = Exploration(self.ExpDesign, n_candidates) - explore.w = 100 # * ndim #500 # TODO: where does this value come from? - - # Select criterion (mc-intersite-proj-th, mc-intersite-proj) - explore.mc_criterion = 'mc-intersite-proj' + # ------- Calculate Exploration weight ------- + # Compute exploration weight based on trade off scheme + explore_w, exploit_w = self.tradeoff_weights(tradeoff_scheme, + old_EDX, + old_EDY) + print(f"\n Exploration weight={explore_w:0.3f} " + f"Exploitation weight={exploit_w:0.3f}\n") # Generate the candidate samples # TODO: here use the sampling method provided by the expdesign? @@ -231,6 +232,9 @@ class SequentialDesign: # ----------------------------------------- # ---------- EXPLORATION METHODS ---------- # ----------------------------------------- + # ToDo: Move this if/else into its own function called "do_exploration", which should select the + # exploration samples, and assign exploration scores. We should send it explore_score, for if/else stmts + # ToDo: Check if explore_scores can be nan, and remove them from any score normalization if explore_method == 'LOOCV': # ----------------------------------------------------------------- # TODO: LOOCV model construnction based on Feng et al. (2020) @@ -256,14 +260,16 @@ class SequentialDesign: else: # ------- EXPLORATION: SPACE-FILLING DESIGN ------- + # ToDo: Remove Exploration class and merge the functions into SequentialDesign class # Generate candidate samples from Exploration class explore = Exploration(self.ExpDesign, n_candidates) - explore.w = 100 # * ndim #500 + explore.w = 100 # * ndim #500 # TODO: where does this value come from? # Select criterion (mc-intersite-proj-th, mc-intersite-proj) explore.mc_criterion = 'mc-intersite-proj' allCandidates, scoreExploration = explore.get_exploration_samples() # Temp: ---- Plot all candidates ----- + # ToDo: Make its own function, called inside of the select_exploration_samples function. if ndim == 2: def plotter(points, allCandidates, Method, scoreExploration=None): @@ -313,23 +319,19 @@ class SequentialDesign: if exploit_method.lower() == 'bayesoptdesign' or \ exploit_method.lower() == 'bayesactdesign': - # ------- Calculate Exoploration weight ------- - # Compute exploration weight based on trade off scheme - explore_w, exploit_w = self.tradeoff_weights(tradeoff_scheme, - old_EDX, - old_EDY) - print(f"\n Exploration weight={explore_w:0.3f} " - f"Exploitation weight={exploit_w:0.3f}\n") - # ------- EXPLOITATION: BayesOptDesign & ActiveLearning ------- if explore_w != 1.0: # Check if all needed properties are set if not hasattr(self.ExpDesign, 'max_func_itr'): raise AttributeError('max_func_itr not given to the experimental design') + # Create a sample pool for rejection sampling + # ToDo: remove from here, add only to BayesOptDesign option MCsize = 15000 X_MC = self.ExpDesign.generate_samples(MCsize, 'random') + + # ToDo: Get samples from the "do_exploration" candidates = self.ExpDesign.generate_samples( n_candidates, 'latin_hypercube') @@ -350,6 +352,7 @@ class SequentialDesign: results.append(self.run_util_func(exploit_method, split_cand[i], i, sigma2, var, X_MC)) # Retrieve the results and append them + # ToDo: Rename U_J_D (here and everyhwere) to something more representative U_J_d = np.concatenate([results[NofE][1] for NofE in range(n_cand_groups)]) @@ -363,21 +366,28 @@ class SequentialDesign: U_J_d = np.mean(U_J_d.reshape(-1, n_candidates), axis=1) # Normalize U_J_d + # ToDO: Check if this is working for the case where the util_func should be minimized (e.g. IE) + # norm_U_J_D = U_J_d / np.nansum(np.abs(U_J_d)) # Possible solution norm_U_J_d = U_J_d / np.sum(U_J_d) + else: norm_U_J_d = np.zeros((len(scoreExploration))) # ------- Calculate Total score ------- + # ToDo: This should be outside of the exploration/exploitation if/else part # ------- Trade off between EXPLORATION & EXPLOITATION ------- # Accumulate the samples - finalCandidates = np.concatenate((allCandidates, candidates), axis=0) - finalCandidates = np.unique(finalCandidates, axis=0) + # ToDo: Stop assuming 2 sets of samples (should only be 1) + finalCandidates = np.concatenate((allCandidates, candidates), axis=0) # ToDo: Remove + finalCandidates = np.unique(finalCandidates, axis=0) # ToDo: Remove # Calculations take into account both exploration and exploitation # samples without duplicates totalScore = np.zeros(finalCandidates.shape[0]) # self.totalScore = totalScore + # ToDo: Simplify (remove loop) for only one set of samples + # final_weights = explore_score*explore_weights + exploit_score*exploit_weight for cand_idx in range(finalCandidates.shape[0]): # find candidate indices idx1 = np.where(allCandidates == finalCandidates[cand_idx])[0] @@ -406,7 +416,7 @@ class SequentialDesign: # find an optimal point subset to add to the initial design by # maximization of the utility score and taking care of NaN values temp = totalScore.copy() - temp[np.isnan(totalScore)] = -np.inf + temp[np.isnan(totalScore)] = -np.inf # Since we are maximizing sorted_idxtotalScore = np.argsort(temp)[::-1] bestIdx = sorted_idxtotalScore[:n_new_samples] @@ -426,7 +436,6 @@ class SequentialDesign: # TODO: still not changed for e.g. 'Voronoi' Xnew = finalCandidates[sorted_idxtotalScore[:n_new_samples]] - elif exploit_method.lower() == 'varoptdesign': # ------- EXPLOITATION: VarOptDesign ------- UtilMethod = var @@ -458,6 +467,7 @@ class SequentialDesign: ExploitScore = np.max(np.max(allModifiedLOO, axis=1), axis=1) elif UtilMethod in ['EIGF', 'ALM']: + # ToDo: Check the methods it actually can receive (ALC is missing from conditional list and code) # ----- All other in ['EIGF', 'ALM'] ----- # Initilize the ExploitScore array # ExploitScore = np.zeros((len(old_EDX), len(OutputNames))) @@ -508,6 +518,7 @@ class SequentialDesign: # maximization of the utility score and taking care of NaN values # Total score # Normalize U_J_d + # ToDo: MOve this out of the exploitation if/else part (same as with Bayesian approaches) ExploitScore = ExploitScore / np.sum(ExploitScore) totalScore = exploit_w * ExploitScore # print(totalScore.shape) @@ -534,11 +545,16 @@ class SequentialDesign: # select the requested number of samples Xnew[i] = newSamples[np.argmax(maxminScore)] + # ToDo: For these 2 last methods, we should find better ways elif exploit_method.lower() == 'alphabetic': + # ToDo: Check function to see what it does for scores/how it chooses points, so it gives as an output the + # scores. See how it works with exploration_scores. + # Todo: Check if it is a minimization or maximization. (We think it is minimization) # ------- EXPLOITATION: ALPHABETIC ------- Xnew = self.util_AlphOptDesign(allCandidates, var) elif exploit_method == 'Space-filling': + # ToDo: Set exploitation score to 0, so we can do tradeoff oustide of if/else # ------- EXPLOITATION: SPACE-FILLING ------- totalScore = scoreExploration @@ -703,6 +719,7 @@ class SequentialDesign: y_hat, std, sigma2Dict, var) elif method.lower() == 'bayesoptdesign': + # ToDo: Create X_MC here, since it is not used in the other active learning approaches. NCandidate = candidates.shape[0] U_J_d = np.zeros(NCandidate) for idx, X_can in tqdm(enumerate(candidates), ascii=True, @@ -1714,3 +1731,30 @@ class SequentialDesign: ) return RMSE_Mean, RMSE_std + + def _select_indexes(self, prior_samples, collocation_points): + """ + ToDo: This function will be used to check the user-input exploration samples, remove training points that + were already used, and select the first mc_size samples that have not yet been used for training. It should also + assign an exploration score of 0 to all samples. + Args: + prior_samples: array [mc_size, n_params] + Pre-defined samples from the parameter space, out of which the sample sets should be extracted. + collocation_points: [tp_size, n_params] + array with training points which were already used to train the surrogate model, and should therefore + not be re-explored. + + Returns: array[self.mc_size,] + With indexes of the new candidate parameter sets, to be read from the prior_samples array + + """ + n_tp = collocation_points.shape[0] + # a) get index of elements that have already been used + aux1_ = np.where((prior_samples[:self.mc_samples + n_tp, :] == collocation_points[:, None]).all(-1))[1] + # b) give each element in the prior a True if it has not been used before + aux2_ = np.invert(np.in1d(np.arange(prior_samples[:self.mc_samples + n_tp, :].shape[0]), aux1_)) + # c) Select the first d_size_bal elements in prior_sample that have not been used before + al_unique_index = np.arange(prior_samples[:self.mc_samples + n_tp, :].shape[0])[aux2_] + al_unique_index = al_unique_index[:self.mc_samples] + + return al_unique_index \ No newline at end of file -- GitLab