Source code for dlmfg.core.metrics_eval

""" Contains classes and methods to obtain various regression based metrics to evaluate"""
from sklearn import metrics
import numpy as np
import pandas as pd
import math
import sys
sys.path.append("../config")

[docs]class MetricsEval: """MetricsEval Class Evaluate metrics to evaluate model performance """
[docs] def metrics_eval_base(self,predicted_y, test_y,logs_path,run_id=0): """Get predicted and actual value for all KCCs and return regression metrics namely: Mean Absolute Error, Mean Squared Error, Root Mean Squared Error, R-Squared Value :param predicted_y: predicted values for the process parameters :type conn_str: numpy.array [test_samples*kccs] (required) :param predicted_y: actual values for the process parameters :type conn_str: numpy.array [test_samples*kccs] (required) :param logs_path: Logs path to save the evaluation metrics :type logs_path: str (required) :returns: dictionary of all metrics for each KCC :rtype: dict :returns: dataframe of all metrics for each KCC :rtype: pandas.dataframe """ kcc_dim=test_y.shape[1] import kcc_config as kcc_config #kcc_struct=kcc_config.get_kcc_struct() kcc_struct=kcc_config.kcc_struct # Calculating Regression Based Evaluation Metrics mae_KCCs=np.zeros((kcc_dim)) mse_KCCs=np.zeros((kcc_dim)) r2_KCCs=np.zeros((kcc_dim)) #print(kcc_struct) kcc_id=[] for kcc in kcc_struct: if(kcc['kcc_type']==1): kcc_name=kcc['kcc_id'] kcc_id.append(kcc_name) mae_KCCs=metrics.mean_absolute_error(predicted_y, test_y,multioutput='raw_values') mse_KCCs=metrics.mean_squared_error(predicted_y, test_y,multioutput='raw_values') r2_KCCs = metrics.r2_score(predicted_y, test_y,multioutput='raw_values') #print(kcc_id) rmse_KCCs=np.sqrt(mse_KCCs) eval_metrics= { "KCC_ID":kcc_id, "Mean Absolute Error" : mae_KCCs, "Mean Squared Error" : mse_KCCs, "Root Mean Squared Error" : rmse_KCCs, "R Squared" : r2_KCCs } #print(len(kcc_id),len(mae_KCCs),len(mae_KCCs),len(rmse_KCCs),len(r2_KCCs)) #print(eval_metrics) accuracy_metrics_df=pd.DataFrame.from_dict(eval_metrics) accuracy_metrics_df=accuracy_metrics_df.set_index('KCC_ID') #accuracy_metrics_df.to_csv(logs_path+'/metrics.csv') #moved to function call return eval_metrics,accuracy_metrics_df
[docs] def metrics_eval_classification(self,y_pred, y_true,logs_path,run_id=0): """Get predicted and actual value for all KCCs and return regression metrics namely: Mean Absolute Error, Mean Squared Error, Root Mean Squared Error, R-Squared Value :param predicted_y: predicted values for the process parameters :type conn_str: numpy.array [test_samples*kccs] (required) :param predicted_y: actual values for the process parameters :type conn_str: numpy.array [test_samples*kccs] (required) :param logs_path: Logs path to save the evaluation metrics :type logs_path: str (required) :returns: dictionary of all metrics for each KCC :rtype: dict :returns: dataframe of all metrics for each KCC :rtype: pandas.dataframe """ kcc_dim=y_true.shape[1] import kcc_config as kcc_config kcc_struct=kcc_config.get_kcc_struct() # Calculating Regression Based Evaluation Metrics kcc_id=[] for kcc in kcc_struct: if(kcc['kcc_type']==1): kcc_name=kcc['kcc_id'] kcc_id.append(kcc_name) acc_kccs=[] f1_kccs=[] pre_kccs=[] recall_kccs=[] roc_auc_kccs=[] kappa_kccs=[] from sklearn.metrics import accuracy_score,f1_score,precision_score,recall_score,roc_auc_score,cohen_kappa_score for i in range(y_true.shape[1]): #Binary Prediction arrray y_pred_bin=np.where(y_pred[:,i] > 0.5, 1, 0) acc_kccs.append(accuracy_score(y_true[:,i],y_pred_bin)) f1_kccs.append(f1_score(y_true[:,i],y_pred_bin)) pre_kccs.append(precision_score(y_true[:,i],y_pred_bin)) recall_kccs.append(recall_score(y_true[:,i],y_pred_bin)) kappa_kccs.append(cohen_kappa_score(y_true[:,i],y_pred_bin)) #Probablity based Scoring roc_auc_kccs.append(roc_auc_score(y_true[:,i],y_pred[:,i])) eval_metrics= { "KCC_ID":kcc_id, "Accuracy" : acc_kccs, "F1" : f1_kccs, "Precision" : pre_kccs, "Recall" : recall_kccs, "ROC_AUC":roc_auc_kccs, "Kappa":kappa_kccs } accuracy_metrics_df=pd.DataFrame.from_dict(eval_metrics) accuracy_metrics_df=accuracy_metrics_df.set_index('KCC_ID') #accuracy_metrics_df.to_csv(logs_path+'/metrics.csv') #moved to function call return eval_metrics,accuracy_metrics_df
[docs] def metrics_eval_cop(self,predicted_y, test_y,logs_path,run_id=0): """Get predicted and actual value for all KCCs and return regression metrics namely: Mean Absolute Error, Mean Squared Error, Root Mean Squared Error, R-Squared Value :param predicted_y: predicted values for the process parameters :type conn_str: numpy.array [test_samples*kccs] (required) :param predicted_y: actual values for the process parameters :type conn_str: numpy.array [test_samples*kccs] (required) :param logs_path: Logs path to save the evaluation metrics :type logs_path: str (required) :returns: dictionary of all metrics for each KCC :rtype: dict :returns: dataframe of all metrics for each KCC :rtype: pandas.dataframe """ kcc_dim=test_y.shape[1] mae_KCCs=np.zeros((kcc_dim)) mse_KCCs=np.zeros((kcc_dim)) r2_KCCs=np.zeros((kcc_dim)) mae_KCCs=metrics.mean_absolute_error(predicted_y, test_y,multioutput='raw_values') mse_KCCs=metrics.mean_squared_error(predicted_y, test_y,multioutput='raw_values') r2_KCCs = metrics.r2_score(predicted_y, test_y,multioutput='raw_values') rmse_KCCs=np.sqrt(mse_KCCs) r2_adjusted=np.zeros(kcc_dim) from tqdm import tqdm for i in tqdm(range(kcc_dim)): y_cop_test_flat=test_y[:,i] y_cop_pred_flat=predicted_y[:,i] combined_array=np.stack([y_cop_test_flat,y_cop_pred_flat],axis=1) filtered_array=combined_array[np.where(abs(combined_array[:,0]) >= 0)] y_cop_test_vector=filtered_array[:,0:1] y_cop_pred_vector=filtered_array[:,1:2] #print(y_cop_pred_vector.shape) r2_adjusted[i] = metrics.r2_score(y_cop_test_vector,y_cop_pred_vector,multioutput='raw_values')[0] eval_metrics= { "Mean Absolute Error" : mae_KCCs, "Mean Squared Error" : mse_KCCs, "Root Mean Squared Error" : rmse_KCCs, "R Squared" : r2_KCCs, "R Squared Adjusted" : r2_adjusted } accuracy_metrics_df=pd.DataFrame({'MAE':mae_KCCs,'MSE':mse_KCCs,'RMSE':rmse_KCCs,'R2':r2_KCCs,"R2_Adjusted":r2_adjusted},columns=['MAE','MSE','RMSE','R2',"R2_Adjusted"]) #accuracy_metrics_df.to_csv(logs_path+'/metrics.csv') #moved to function call return eval_metrics,accuracy_metrics_df
def metrics_eval_aleatoric_model(self,predicted_y, test_y,logs_path): kcc_dim=test_y.shape[1] log_variance=y_pred[:,kcc_dim] variance=np.exp(log_variance) predicted_y_sub=predicted_y[:,0:(kcc_dim-1)] standard_deviation=np.sqrt(variance) avg_aleatoric_SD=np.mean(standard_deviation) # Calculating Regression Based Evaluation Metrics mae_KCCs=np.zeros((kcc_dim)) mse_KCCs=np.zeros((kcc_dim)) r2_KCCs=np.zeros((kcc_dim)) kcc_id=[] for i in range(kcc_dim): kcc_name="KCC_"+str(i+1) kcc_id.append(kcc_name) mae_KCCs=metrics.mean_absolute_error(predicted_y_sub, test_y,multioutput='raw_values') mse_KCCs=metrics.mean_squared_error(predicted_y_sub, test_y,multioutput='raw_values') r2_KCCs = metrics.r2_score(predicted_y_sub, test_y,multioutput='raw_values') rmse_KCCs=sqrt(mse_KCCs) eval_metrics= { "Mean Absolute Error" : mae_KCCs, "Mean Squared Error" : mse_KCCs, "Root Mean Squared Error" : rmse_KCCs, "R Squared" : r2_KCCs, "Aleatoric Standard Deviation":avg_aleatoric_SD } accuracy_metrics_df=pd.DataFrame({'KCC_ID':kcc_id,'MAE':mae_KCCs,'MSE':mse_KCCs,'RMSE':rmse_KCCs,'R2':r2_KCCs}) accuracy_metrics_df.columns = ['KCC_ID','MAE','MSE','RMSE','R2'] accuracy_metrics_df.to_csv(logs_path+'/metrics.csv') return eval_metrics