diff --git a/src/bayesvalidrox/post_processing/post_processing.py b/src/bayesvalidrox/post_processing/post_processing.py index 91692669be7f23eb660d40917100000d2c030f0b..abafb123d266f7cb95d51ba38be0ab8b66b56415 100644 --- a/src/bayesvalidrox/post_processing/post_processing.py +++ b/src/bayesvalidrox/post_processing/post_processing.py @@ -47,7 +47,7 @@ class PostProcessing: # Initialize attributes self.mc_reference = None - self.total_sobol = None + #self.total_sobol = None self.valid_error = None self.rmse = None self.model_out_dict = {} @@ -90,7 +90,7 @@ class PostProcessing: x_values_orig = self.engine.ExpDesign.x_values # Compute the moments with the PCEModel object - self.means, self.stds = self.engine.MetaModel.compute_moments() + self.means, self.stds = self.engine.MetaModel.calculate_moments() # Plot the best fit line, set the linewidth (lw), color and # transparency (alpha) of the line @@ -522,7 +522,7 @@ class PostProcessing: # ------------------------------------------------------------------------- - def sobol_indices(self, xlabel: str = 'Time [s]', plot_type: str = None): + def sobol_indices(self, xlabel: str = 'Time [s]', plot_type: str = None, save:bool=True): """ Provides Sobol indices as a sensitivity measure to infer the importance of the input parameters. See Eq. 27 in [1] for more details. For the @@ -545,6 +545,9 @@ class PostProcessing: plot_type : str, optional Plot type. The default is `None`. This corresponds to line plot. Bar chart can be selected by `bar`. + save : bool, optional + Write out the inidces as csv files if set to True. The default + is True Raises ------ @@ -567,279 +570,152 @@ class PostProcessing: raise AttributeError('Sobol indices only support PCE-type models!') # Extract the necessary variables - basis_dict = metamod._basis_dict - coeffs_dict = metamod._coeffs_dict - n_params = metamod.ndim max_order = np.max(metamod._pce_deg) - sobol_cell_b = {} - total_sobol_b = {} - cov_Z_p_q = np.zeros((n_params)) - - for b_i in range(metamod.n_bootstrap_itrs): - - sobol_cell_, total_sobol_ = {}, {} - - for output in self.engine.out_names: - - n_meas_points = len(coeffs_dict[f'b_{b_i+1}'][output]) - - # Initialize the (cell) array containing the (total) Sobol indices. - sobol_array = dict.fromkeys(range(1, max_order+1), []) - sobol_cell_array = dict.fromkeys(range(1, max_order+1), []) - - for i_order in range(1, max_order+1): - n_comb = math.comb(n_params, i_order) - - sobol_cell_array[i_order] = np.zeros( - (n_comb, n_meas_points)) - - total_sobol_array = np.zeros((n_params, n_meas_points)) - - # Initialize the cell to store the names of the variables - TotalVariance = np.zeros((n_meas_points)) - # Loop over all measurement points and calculate sobol indices - for pIdx in range(n_meas_points): - - # Extract the basis indices (alpha) and coefficients - Basis = basis_dict[f'b_{b_i+1}'][output][f'y_{pIdx+1}'] - - try: - clf_poly = metamod._clf_poly[f'b_{b_i+1}'][output][f'y_{pIdx+1}'] - PCECoeffs = clf_poly.coef_ - except: - PCECoeffs = coeffs_dict[f'b_{b_i+1}'][output][f'y_{pIdx+1}'] - - # Compute total variance - TotalVariance[pIdx] = np.sum(np.square(PCECoeffs[1:])) - - nzidx = np.where(PCECoeffs != 0)[0] - # Set all the Sobol indices equal to zero in the presence of a - # null output. - if len(nzidx) == 0: - # This is buggy. - for i_order in range(1, max_order+1): - sobol_cell_array[i_order][:, pIdx] = 0 - - # Otherwise compute them by summing well-chosen coefficients - else: - nz_basis = Basis[nzidx] - for i_order in range(1, max_order+1): - idx = np.where( - np.sum(nz_basis > 0, axis=1) == i_order) - subbasis = nz_basis[idx] - Z = np.array( - list(combinations(range(n_params), i_order))) - - for q in range(Z.shape[0]): - Zq = Z[q] - subsubbasis = subbasis[:, Zq] - subidx = np.prod(subsubbasis, axis=1) > 0 - sum_ind = nzidx[idx[0][subidx]] - if TotalVariance[pIdx] == 0.0: - sobol_cell_array[i_order][q, pIdx] = 0.0 - else: - sobol = np.sum( - np.square(PCECoeffs[sum_ind])) - sobol /= TotalVariance[pIdx] - sobol_cell_array[i_order][q, pIdx] = sobol - - # Compute the TOTAL Sobol indices. - for ParIdx in range(n_params): - idx = nz_basis[:, ParIdx] > 0 - sum_ind = nzidx[idx] - - if TotalVariance[pIdx] == 0.0: - total_sobol_array[ParIdx, pIdx] = 0.0 - else: - sobol = np.sum(np.square(PCECoeffs[sum_ind])) - sobol /= TotalVariance[pIdx] - total_sobol_array[ParIdx, pIdx] = sobol - - # ----- if PCA selected: Compute covariance ----- - if metamod.dim_red_method.lower() == 'pca': - # Extract the basis indices (alpha) and coefficients for - # next component - if pIdx < n_meas_points-1: - nextBasis = basis_dict[f'b_{b_i+1}'][output][f'y_{pIdx+2}'] - if metamod.bootstrap_method != 'fast' or b_i == 0: - clf_poly = metamod._clf_poly[f'b_{b_i+1}'][output][f'y_{pIdx+2}'] - nextPCECoeffs = clf_poly.coef_ - else: - nextPCECoeffs = coeffs_dict[f'b_{b_i+1}'][output][f'y_{pIdx+2}'] - - # Choose the common non-zero basis - mask = (Basis[:, None] == - nextBasis).all(-1).any(-1) - n_mask = (nextBasis[:, None] == - Basis).all(-1).any(-1) - - # Compute the covariance in Eq 17. - for ParIdx in range(n_params): - idx = (mask) & (Basis[:, ParIdx] > 0) - n_idx = (n_mask) & (nextBasis[:, ParIdx] > 0) - try: - cov_Z_p_q[ParIdx] += np.sum(np.dot( - PCECoeffs[idx], nextPCECoeffs[n_idx]) - ) - except: - pass - - # Compute the sobol indices according to Ref. 2 - if metamod.dim_red_method.lower() == 'pca': - n_c_points = self.engine.ExpDesign.Y[output].shape[1] - PCA = metamod.pca[f'b_{b_i+1}'][output] - compPCA = PCA.components_ - nComp = compPCA.shape[0] - var_Z_p = PCA.explained_variance_ - - # Extract the sobol index of the components - for i_order in range(1, max_order+1): - n_comb = math.comb(n_params, i_order) - sobol_array[i_order] = np.zeros((n_comb, n_c_points)) - Z = np.array( - list(combinations(range(n_params), i_order))) - - # Loop over parameters - for q in range(Z.shape[0]): - S_Z_i = sobol_cell_array[i_order][q] - - for tIdx in range(n_c_points): - var_Y_t = np.var( - self.engine.ExpDesign.Y[output][:, tIdx]) - if var_Y_t == 0.0: - term1, term2 = 0.0, 0.0 - else: - # Eq. 17 - term1 = 0.0 - for i in range(nComp): - a = S_Z_i[i] * var_Z_p[i] - a *= compPCA[i, tIdx]**2 - term1 += a - - # TODO: Term 2 - # term2 = 0.0 - # for i in range(nComp-1): - # term2 += cov_Z_p_q[q] * compPCA[i, tIdx] - # term2 *= compPCA[i+1, tIdx] - # term2 *= 2 - - sobol_array[i_order][q, - tIdx] = term1 # + term2 - - # Devide over total output variance Eq. 18 - sobol_array[i_order][q, tIdx] /= var_Y_t - - # Compute the TOTAL Sobol indices. - total_sobol = np.zeros((n_params, n_c_points)) - for ParIdx in range(n_params): - S_Z_i = total_sobol_array[ParIdx] - - for tIdx in range(n_c_points): - var_Y_t = np.var( - self.engine.ExpDesign.Y[output][:, tIdx]) - if var_Y_t == 0.0: - term1, term2 = 0.0, 0.0 - else: - term1 = 0 - for i in range(nComp): - term1 += S_Z_i[i] * var_Z_p[i] * \ - (compPCA[i, tIdx]**2) - - # Term 2 - term2 = 0 - for i in range(nComp-1): - term2 += cov_Z_p_q[ParIdx] * compPCA[i, tIdx] \ - * compPCA[i+1, tIdx] - term2 *= 2 - - total_sobol[ParIdx, tIdx] = term1 # + term2 - - # Devide over total output variance Eq. 18 - total_sobol[ParIdx, tIdx] /= var_Y_t - - sobol_cell_[output] = sobol_array - total_sobol_[output] = total_sobol - else: - sobol_cell_[output] = sobol_cell_array - total_sobol_[output] = total_sobol_array - - # Save for each bootsrtap iteration - sobol_cell_b[b_i] = sobol_cell_ - total_sobol_b[b_i] = total_sobol_ - - # Average total sobol indices - total_sobol_all = {} - for i in sorted(total_sobol_b): - for k, v in total_sobol_b[i].items(): - if k not in total_sobol_all: - total_sobol_all[k] = [None] * len(total_sobol_b) - total_sobol_all[k][i] = v - - self.total_sobol = {} - for output in self.engine.out_names: - self.total_sobol[output] = np.mean(total_sobol_all[output], axis=0) - - # ---------------- Plot ----------------------- + outputs = self.engine.Model.Output.names + if metamod.sobol is None: + metamod.calculate_sobol() + sobol_all, total_sobol_all = metamod.sobol, metamod.total_sobol par_names = self.engine.ExpDesign.par_names x_values_orig = self.engine.ExpDesign.x_values + # Save indices + if save: + for _, output in enumerate(outputs): + total_sobol = total_sobol_all[output] + np.savetxt( + f"{self.out_dir}totalsobol_" + output.replace("/", "_") + ".csv", + total_sobol.T, + delimiter=",", + header=",".join(par_names), + comments="", + ) + + for i_order in range(1, max_order + 1): + sobol = sobol_all[i_order][output][0] + np.savetxt( + f"{self.out_dir}sobol_{i_order}_" + + output.replace("/", "_") + + ".csv", + sobol.T, + delimiter=",", + header=",".join(par_names), + comments="", + ) + + # Check if the x_values match the number of metamodel outputs + # TODO: How relevant is this check? + if (np.array(x_values_orig).shape[0] != total_sobol_all[outputs[0]].shape[1]): + print( + "The number of MetaModel outputs does not match the x_values" + " specified in ExpDesign. Images are created with " + "equidistant numbers on the x-axis" + ) + x_values_orig = np.arange(0, 1, total_sobol_all[output].shape[0]) + + # Plot Sobol' indices fig = plt.figure() + for i_order in range(1, max_order + 1): + # Create labels for higher order indices + if i_order == 1: + par_names_i = par_names + else: + par_names_i = list(combinations(par_names, i_order)) + + for _, output in enumerate(outputs): + x = x_values_orig[output] if (type(x_values_orig) is dict) else x_values_orig + sobol = sobol_all[i_order][output][0] + + # Compute quantiles + q_5 = np.quantile(sobol_all[i_order][output], q=0.05, axis=0) + q_97_5 = np.quantile(sobol_all[i_order][output], q=0.975, axis=0) + + if plot_type is "bar": + ax = fig.add_axes([0, 0, 1, 1]) + dict1 = {xlabel: x} + dict2 = { + param: sobolIndices + for param, sobolIndices in zip(par_names_i, sobol) + } + + df = pd.DataFrame({**dict1, **dict2}) + df.plot( + x=xlabel, + y=par_names_i, + kind="bar", + ax=ax, + rot=0, + colormap="Dark2", + yerr=q_97_5 - q_5, + ) + ax.set_ylabel("Sobol indices, $S^T$") - for outIdx, output in enumerate(self.engine.out_names): + else: + for i, sobolIndices in enumerate(sobol): + plt.plot( + x, + sobolIndices, + label=par_names_i[i], + marker="x", + lw=2.5, + ) + plt.fill_between(x, q_5[i], q_97_5[i], alpha=0.15) + + plt.ylabel("Sobol indices, $S^T$") + plt.xlabel(xlabel) + plt.legend(loc="best", frameon=True) + + plt.title(f"{i_order} degree Sensitivity analysis of {output}") + fig.savefig( + f"{self.out_dir}Sobol_indices_{i_order}_{output}.pdf", + bbox_inches="tight", + ) + plt.clf() - # Extract total Sobol indices - total_sobol = self.total_sobol[output] + # Plot the Total Sobol' indices + fig = plt.figure() + for _, output in enumerate(outputs): + x = x_values_orig[output] if (type(x_values_orig) is dict) else x_values_orig + total_sobol = total_sobol_all[output] # Compute quantiles q_5 = np.quantile(total_sobol_all[output], q=0.05, axis=0) q_97_5 = np.quantile(total_sobol_all[output], q=0.975, axis=0) - # Extract a list of x values - if type(x_values_orig) is dict: - x = x_values_orig[output] - else: - x = x_values_orig - - if plot_type == 'bar': + if plot_type is "bar": ax = fig.add_axes([0, 0, 1, 1]) dict1 = {xlabel: x} - dict2 = {param: sobolIndices for param, sobolIndices - in zip(par_names, total_sobol)} + dict2 = { + param: sobolIndices + for param, sobolIndices in zip(par_names, total_sobol) + } df = pd.DataFrame({**dict1, **dict2}) - df.plot(x=xlabel, y=par_names, kind="bar", ax=ax, rot=0, - colormap='Dark2', yerr=q_97_5-q_5) - ax.set_ylabel('Total Sobol indices, $S^T$') + df.plot( + x=xlabel, + y=par_names, + kind="bar", + ax=ax, + rot=0, + colormap="Dark2", + yerr=q_97_5 - q_5, + ) + ax.set_ylabel("Total Sobol indices, $S^T$") else: for i, sobolIndices in enumerate(total_sobol): - plt.plot(x, sobolIndices, label=par_names[i], - marker='x', lw=2.5) + plt.plot(x, sobolIndices, label=par_names[i], marker="x", lw=2.5) plt.fill_between(x, q_5[i], q_97_5[i], alpha=0.15) - plt.ylabel('Total Sobol indices, $S^T$') + plt.ylabel("Total Sobol indices, $S^T$") plt.xlabel(xlabel) + plt.legend(loc="best", frameon=True) - plt.title(f'Sensitivity analysis of {output}') - if plot_type != 'bar': - plt.legend(loc='best', frameon=True) - - # Save indices - np.savetxt(f'./{self.out_dir}totalsobol_' + - output.replace('/', '_') + '.csv', - total_sobol.T, delimiter=',', - header=','.join(par_names), comments='') - - # save the current figure + plt.title(f"Sensitivity analysis of {output}") fig.savefig( - f'./{self.out_dir}Sobol_indices_{output}.pdf', - bbox_inches='tight' + f"{self.out_dir}TotalSobol_indices_{output}.pdf", bbox_inches="tight" ) + plt.clf() - # Destroy the current plot - plt.close() - - return self.total_sobol # ------------------------------------------------------------------------- def check_reg_quality(self, n_samples: int = 1000, samples=None, outputs: dict = None) -> None: @@ -1052,7 +928,7 @@ class PostProcessing: Dictionary of results. """ - samples = self._get_sample() + #samples = self._get_sample() model_outs, _ = self.engine.Model.run_model_parallel( samples, key_str=key_str) diff --git a/src/bayesvalidrox/surrogate_models/polynomial_chaos.py b/src/bayesvalidrox/surrogate_models/polynomial_chaos.py index 592652e2abcd2fcabb09392e800c02b5fe27f655..498cb76e6d4fe23c7845dfd0a5aab616aa3cc013 100644 --- a/src/bayesvalidrox/surrogate_models/polynomial_chaos.py +++ b/src/bayesvalidrox/surrogate_models/polynomial_chaos.py @@ -129,6 +129,9 @@ class PCE(MetaModel): self._q_norm_dict = None self._deg_dict = None + self.sobol = None + self.total_sobol = None + # Initialize the regression options as a dictionary self.set_regression_options() @@ -930,7 +933,7 @@ class PCE(MetaModel): return pce_means, pce_stds - def calculate_sobol(self): + def calculate_sobol(self, save = True): """ Calculates Sobol' indices of all possible orders and the Total Sobol' indices based on the coefficients in the PCE. @@ -1175,168 +1178,4 @@ class PCE(MetaModel): self.total_sobol[output] = np.mean(total_sobol_all[output], axis=0) return self.sobol, self.total_sobol - - def plot_sobol(self): - - # ---------------- Plot ----------------------- - par_names = self.engine.ExpDesign.par_names - x_values_orig = self.engine.ExpDesign.x_values - - # Check if the x_values match the number of metamodel outputs - if ( - not np.array(x_values_orig).shape[0] - == self.total_sobol[outputs[0]].shape[1] - ): - print( - "The number of MetaModel outputs does not match the x_values" - " specified in ExpDesign. Images are created with " - "equidistant numbers on the x-axis" - ) - x_values_orig = np.arange(0, 1, self.total_sobol[output].shape[0]) - - if 1: - fig = plt.figure() - - for i_order in range(1, max_order + 1): - # Change labels to combined params sets for higher order indices - if i_order == 1: - par_names_i = par_names - else: - par_names_i = list(combinations(par_names, i_order)) - for outIdx, output in enumerate(outputs): - # Extract total Sobol indices - sobol = self.sobol[i_order][output][0] - - # Compute quantiles - q_5 = np.quantile(sobol_all[i_order][output], q=0.05, axis=0) - q_97_5 = np.quantile(sobol_all[i_order][output], q=0.975, axis=0) - - # Extract a list of x values - if type(x_values_orig) is dict: - x = x_values_orig[output] - else: - x = x_values_orig - - if plot_type == "bar": - ax = fig.add_axes([0, 0, 1, 1]) - dict1 = {xlabel: x} - dict2 = { - param: sobolIndices - for param, sobolIndices in zip(par_names_i, sobol) - } - - df = pd.DataFrame({**dict1, **dict2}) - df.plot( - x=xlabel, - y=par_names_i, - kind="bar", - ax=ax, - rot=0, - colormap="Dark2", - yerr=q_97_5 - q_5, - ) - ax.set_ylabel("Sobol indices, $S^T$") - - else: - for i, sobolIndices in enumerate(sobol): - plt.plot( - x, - sobolIndices, - label=par_names_i[i], - marker="x", - lw=2.5, - ) - plt.fill_between(x, q_5[i], q_97_5[i], alpha=0.15) - - plt.ylabel("Sobol indices, $S^T$") - plt.xlabel(xlabel) - - plt.title(f"{i_order} degree Sensitivity analysis of {output}") - if plot_type != "bar": - plt.legend(loc="best", frameon=True) - - # Save indices - np.savetxt( - f"{self.out_dir}sobol_{i_order}_" - + output.replace("/", "_") - + ".csv", - sobol.T, - delimiter=",", - header=",".join(par_names), - comments="", - ) - - # save the current figure - # print(f'{self.out_dir}/{newpath}Sobol_indices_{i_order}_{output}.pdf') - fig.savefig( - f"{self.out_dir}Sobol_indices_{i_order}_{output}.pdf", - bbox_inches="tight", - ) - - # Destroy the current plot - plt.clf() - - fig = plt.figure() - - for outIdx, output in enumerate(outputs): - # Extract total Sobol indices - total_sobol = self.total_sobol[output] - - # Compute quantiles - q_5 = np.quantile(total_sobol_all[output], q=0.05, axis=0) - q_97_5 = np.quantile(total_sobol_all[output], q=0.975, axis=0) - - # Extract a list of x values - if type(x_values_orig) is dict: - x = x_values_orig[output] - else: - x = x_values_orig - - if plot_type == "bar": - ax = fig.add_axes([0, 0, 1, 1]) - dict1 = {xlabel: x} - dict2 = { - param: sobolIndices - for param, sobolIndices in zip(par_names, total_sobol) - } - - df = pd.DataFrame({**dict1, **dict2}) - df.plot( - x=xlabel, - y=par_names, - kind="bar", - ax=ax, - rot=0, - colormap="Dark2", - yerr=q_97_5 - q_5, - ) - ax.set_ylabel("Total Sobol indices, $S^T$") - - else: - for i, sobolIndices in enumerate(total_sobol): - plt.plot(x, sobolIndices, label=par_names[i], marker="x", lw=2.5) - plt.fill_between(x, q_5[i], q_97_5[i], alpha=0.15) - - plt.ylabel("Total Sobol indices, $S^T$") - plt.xlabel(xlabel) - - plt.title(f"Sensitivity analysis of {output}") - if plot_type != "bar": - plt.legend(loc="best", frameon=True) - - # Save indices - np.savetxt( - f"{self.out_dir}totalsobol_" + output.replace("/", "_") + ".csv", - total_sobol.T, - delimiter=",", - header=",".join(par_names), - comments="", - ) - - # save the current figure - fig.savefig( - f"{self.out_dir}TotalSobol_indices_{output}.pdf", bbox_inches="tight" - ) - - # Destroy the current plot - plt.clf() + \ No newline at end of file