From f60fa754e1835890ed43b53ee8c534e3e28346ee Mon Sep 17 00:00:00 2001
From: kohlhaasrebecca <rebecca.kohlhaas@outlook.com>
Date: Wed, 10 Jul 2024 14:36:55 +0200
Subject: [PATCH] Small fixes, update of engine

---
 ...mple_analytical_function_testSequential.py |  2 +-
 src/bayesvalidrox/surrogate_models/engine.py  | 52 +++++++++++++++----
 .../surrogate_models/sequential_design.py     | 42 +++++++--------
 3 files changed, 63 insertions(+), 33 deletions(-)

diff --git a/examples/analytical-function/example_analytical_function_testSequential.py b/examples/analytical-function/example_analytical_function_testSequential.py
index 06dd3e25d..7015d2699 100644
--- a/examples/analytical-function/example_analytical_function_testSequential.py
+++ b/examples/analytical-function/example_analytical_function_testSequential.py
@@ -21,7 +21,7 @@ import pandas as pd
 import sys
 import joblib
 import matplotlib
-matplotlib.use('agg')
+#matplotlib.use('agg')
 
 # import bayesvalidrox
 # Add BayesValidRox path
diff --git a/src/bayesvalidrox/surrogate_models/engine.py b/src/bayesvalidrox/surrogate_models/engine.py
index 24aa0c0d7..a6531b978 100644
--- a/src/bayesvalidrox/surrogate_models/engine.py
+++ b/src/bayesvalidrox/surrogate_models/engine.py
@@ -23,6 +23,8 @@ from tqdm import tqdm
 from bayesvalidrox.bayes_inference.bayes_inference import BayesInference
 from bayesvalidrox.bayes_inference.discrepancy import Discrepancy
 from .exploration import Exploration
+from.surrogate_models import MetaModel as MM
+from.surrogate_models import create_psi
 from .sequential_design import SequentialDesign
 
 
@@ -153,6 +155,7 @@ class Engine:
         self.SeqModifiedLOO = None
         self.valid_likelihoods = None
         self._y_hat_prev = None
+        self.emulator = False
 
     def start_engine(self) -> None:
         """
@@ -164,7 +167,13 @@ class Engine:
 
         """
         self.out_names = self.Model.Output.names
-        self.MetaModel.out_names = self.out_names
+        if isinstance(self.MetaModel, MM):
+            self.emulator = True
+            self.MetaModel.out_names = self.out_names
+            print('MetaModel has been given, `emulator` will be set to `True`')
+        else:
+            self.emulator = False
+            print('MetaModel has not been given, `emulator` will be set to `False`')
 
     def train_normal(self, parallel=False, verbose=False, save=False) -> None:
         """
@@ -179,8 +188,7 @@ class Engine:
         None
 
         """
-        if self.out_names == 'None':
-            self.start_engine()
+        self.start_engine()
 
         ExpDesign = self.ExpDesign
         MetaModel = self.MetaModel
@@ -199,9 +207,13 @@ class Engine:
 
         # Prepare X samples 
         # For training the surrogate use ExpDesign.X_tr, ExpDesign.X is for the model to run on 
+        if self.emulator:
+            maxdeg = np.max(MetaModel.pce_deg)
+        else:
+            maxdeg = 1
         ExpDesign.generate_ED(ExpDesign.n_init_samples,
-                              transform=True,
-                              max_pce_deg=np.max(MetaModel.pce_deg))
+                              #transform=True,
+                              max_pce_deg=maxdeg)
 
         # Run simulations at X 
         if not hasattr(ExpDesign, 'Y') or ExpDesign.Y is None:
@@ -222,11 +234,14 @@ class Engine:
             print('No x_values are given, this might lead to issues during PostProcessing')
 
         # Fit the surrogate
-        MetaModel.fit(ExpDesign.X, ExpDesign.Y, parallel, verbose)
+        if self.emulator:
+            MetaModel.fit(ExpDesign.X, ExpDesign.Y, parallel, verbose)
 
         # Save what there is to save
         if save:
             # Save surrogate
+            if not os.path.exists('surrogates/'):
+                os.makedirs('surrogates/')
             with open(f'surrogates/surrogate_{self.Model.name}.pk1', 'wb') as output:
                 joblib.dump(MetaModel, output, 2)
 
@@ -255,7 +270,8 @@ class Engine:
 
     # -------------------------------------------------------------------------
     def eval_metamodel(self, samples=None, nsamples=None,
-                       sampling_method='random', return_samples=False):
+                       sampling_method='random', return_samples=False,
+                       parallel = False):
         """
         Evaluates metamodel at the requested samples. One can also generate
         nsamples.
@@ -272,6 +288,9 @@ class Engine:
             'random'.
         return_samples : bool, optional
             Retun samples, if no `samples` is provided. The default is False.
+        parallel : bool, optional
+            Set to true if the evaluations should be done in parallel.
+            The default is False.
 
         Returns
         -------
@@ -290,12 +309,22 @@ class Engine:
 
         # Transformation to other space is to be done in the MetaModel
         # TODO: sort the transformations better
-        mean_pred, std_pred = self.MetaModel.eval_metamodel(samples)
+        if self.emulator:
+            mean_pred, std_pred = self.MetaModel.eval_metamodel(samples)
+        else:
+            mean_pred , X = self.Model.run_model_parallel(samples, mp=parallel)
 
         if return_samples:
-            return mean_pred, std_pred, samples
+            if self.emulator:
+                return mean_pred, std_pred, samples
+            else:
+                return mean_pred, samples
         else:
-            return mean_pred, std_pred
+            if self.emulator:
+                return mean_pred, std_pred
+            else:
+                return mean_pred, None
+                
 
     # -------------------------------------------------------------------------
     def train_seq_design(self, parallel=False, verbose=False):
@@ -348,6 +377,7 @@ class Engine:
 
         # Setup the Sequential Design object
         self.SeqDes = SequentialDesign(self.MetaModel, self.Model, self.ExpDesign)
+        self.SeqDes.out_names = self.out_names
 
         # Handle if only one UtilityFunctions is provided
         if not isinstance(util_func, list):
@@ -485,7 +515,7 @@ class Engine:
 
                     # Optimal Bayesian Design
                     # self.MetaModel.ExpDesignFlag = 'sequential'
-                    Xnew, updatedPrior = self.SeqDesign.choose_next_sample(TotalSigma2,
+                    Xnew, updatedPrior = self.SeqDes.choose_next_sample(TotalSigma2,
                                                                  n_canddidate,
                                                                  util_f)
 
diff --git a/src/bayesvalidrox/surrogate_models/sequential_design.py b/src/bayesvalidrox/surrogate_models/sequential_design.py
index abe3f1a13..e020f689f 100644
--- a/src/bayesvalidrox/surrogate_models/sequential_design.py
+++ b/src/bayesvalidrox/surrogate_models/sequential_design.py
@@ -332,12 +332,12 @@ class SequentialDesign:
                 X_MC = self.ExpDesign.generate_samples(MCsize, 'random')
 
                 # ToDo: Get samples from the "do_exploration"
-                candidates = self.ExpDesign.generate_samples(
-                    n_candidates, 'latin_hypercube')
+                #candidates = self.ExpDesign.generate_samples(
+                #    n_candidates, 'latin_hypercube')
 
                 # Split the candidates in groups for multiprocessing
                 split_cand = np.array_split(
-                    candidates, n_cand_groups, axis=0
+                    allCandidates, n_cand_groups, axis=0
                 )
                 # print(candidates)
                 # print(split_cand)
@@ -368,43 +368,43 @@ class SequentialDesign:
                 # Normalize U_J_d
                 # ToDO: Check if this is working for the case where the util_func should be minimized (e.g. IE)
                 # norm_U_J_D = U_J_d / np.nansum(np.abs(U_J_d))  # Possible solution
-                norm_U_J_d = U_J_d / np.sum(U_J_d)
+                norm_scoreExploitation = U_J_d / np.sum(U_J_d)
 
             else:
-                norm_U_J_d = np.zeros((len(scoreExploration)))
+                norm_scoreExploitation = np.zeros((len(scoreExploration)))
 
             # ------- Calculate Total score -------
             # ToDo: This should be outside of the exploration/exploitation if/else part
             # ------- Trade off between EXPLORATION & EXPLOITATION -------
             # Accumulate the samples
             # ToDo: Stop assuming 2 sets of samples (should only be 1)
-            finalCandidates = np.concatenate((allCandidates, candidates), axis=0)  # ToDo: Remove
-            finalCandidates = np.unique(finalCandidates, axis=0)                   # ToDo: Remove
+            #finalCandidates = np.concatenate((allCandidates, candidates), axis=0)  # ToDo: Remove
+            finalCandidates = allCandidates#np.unique(finalCandidates, axis=0)                   # ToDo: Remove
 
             # Calculations take into account both exploration and exploitation
             # samples without duplicates
-            totalScore = np.zeros(finalCandidates.shape[0])
+            #totalScore = np.zeros(finalCandidates.shape[0])
             # self.totalScore = totalScore
 
             # ToDo: Simplify (remove loop) for only one set of samples
             # final_weights = explore_score*explore_weights + exploit_score*exploit_weight
-            for cand_idx in range(finalCandidates.shape[0]):
-                # find candidate indices
-                idx1 = np.where(allCandidates == finalCandidates[cand_idx])[0]
-                idx2 = np.where(candidates == finalCandidates[cand_idx])[0]
+            #for cand_idx in range(finalCandidates.shape[0]):
+            #    # find candidate indices
+            #    idx1 = np.where(allCandidates == finalCandidates[cand_idx])[0]
+            #    idx2 = np.where(candidates == finalCandidates[cand_idx])[0]#
 
                 # exploration
-                if idx1.shape[0] > 0:
-                    idx1 = idx1[0]
-                    totalScore[cand_idx] += explore_w * scoreExploration[idx1]
+            #    if idx1.shape[0] > 0:
+            #        idx1 = idx1[0]
+            #        totalScore[cand_idx] += explore_w * scoreExploration[idx1]
 
-                # exploitation
-                if idx2.shape[0] > 0:
-                    idx2 = idx2[0]
-                    totalScore[cand_idx] += exploit_w * norm_U_J_d[idx2]
+            #    # exploitation
+            #    if idx2.shape[0] > 0:
+            #        idx2 = idx2[0]
+            #        totalScore[cand_idx] += exploit_w * norm_U_J_d[idx2]
 
             # Total score
-            totalScore = exploit_w * norm_U_J_d
+            totalScore = exploit_w * norm_scoreExploitation
             totalScore += explore_w * scoreExploration
 
             # temp: Plot
@@ -821,7 +821,7 @@ class SequentialDesign:
         """
 
         # Get the data
-        obs_data = self.observations
+        obs_data = self.Model.observations
         # TODO: this should be optimizable to be calculated explicitly
         if hasattr(self.Model, 'n_obs'):
             n_obs = self.Model.n_obs
-- 
GitLab