test / property_prediction_helpers.py
ipd's picture
init
79d2379
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import xgboost as xgb
from sklearn.kernel_ridge import KernelRidge
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
os.environ["OMP_MAX_ACTIVE_LEVELS"] = "1"
import models.fm4m as fm4m
# Function to create model based on user input
def _create_model(
model_name, max_depth=None, n_estimators=None, alpha=None, degree=None, kernel=None
):
if model_name == "XGBClassifier":
model = xgb.XGBClassifier(
objective='binary:logistic',
eval_metric='auc',
max_depth=max_depth,
n_estimators=n_estimators,
alpha=alpha,
)
elif model_name == "SVR":
model = SVR(degree=degree, kernel=kernel)
elif model_name == "Kernel Ridge":
model = KernelRidge(alpha=alpha, degree=degree, kernel=kernel)
elif model_name == "Linear Regression":
model = LinearRegression()
elif model_name == "Default - Auto":
return "Default Settings"
else:
return "Model not supported."
return f"{model_name} * {model.get_params()}"
# Function to handle model creation based on input parameters
def create_downstream_model(model_name, max_depth, n_estimators, alpha, degree, kernel):
if model_name == "XGBClassifier":
return _create_model(
model_name,
max_depth=max_depth,
n_estimators=n_estimators,
alpha=alpha,
)
elif model_name == "SVR":
return _create_model(model_name, degree=degree, kernel=kernel)
elif model_name == "Kernel Ridge":
return _create_model(model_name, alpha=alpha, degree=degree, kernel=kernel)
elif model_name == "Linear Regression":
return _create_model(model_name)
elif model_name == "Default - Auto":
return _create_model(model_name)
# Function to display evaluation score
def display_eval(selected_models, dataset, task_type, downstream, fusion_type, state):
result = None
try:
downstream_model = downstream.split("*")[0].lstrip()
downstream_model = downstream_model.rstrip()
hyp_param = downstream.split("*")[-1].lstrip()
hyp_param = hyp_param.rstrip()
hyp_param = hyp_param.replace("nan", "float('nan')")
params = eval(hyp_param)
except:
downstream_model = downstream.split("*")[0].lstrip()
downstream_model = downstream_model.rstrip()
params = None
try:
if not selected_models:
return "Please select at least one enabled model."
if len(selected_models) > 1:
if task_type == "Classification":
if downstream_model == "Default Settings":
downstream_model = "DefaultClassifier"
params = None
(
result,
state["roc_auc"],
state["fpr"],
state["tpr"],
state["x_batch"],
state["y_batch"],
) = fm4m.multi_modal(
model_list=selected_models,
downstream_model=downstream_model,
params=params,
dataset=dataset,
)
elif task_type == "Regression":
if downstream_model == "Default Settings":
downstream_model = "DefaultRegressor"
params = None
(
result,
state["RMSE"],
state["y_batch_test"],
state["y_prob"],
state["x_batch"],
state["y_batch"],
) = fm4m.multi_modal(
model_list=selected_models,
downstream_model=downstream_model,
params=params,
dataset=dataset,
)
else:
if task_type == "Classification":
if downstream_model == "Default Settings":
downstream_model = "DefaultClassifier"
params = None
(
result,
state["roc_auc"],
state["fpr"],
state["tpr"],
state["x_batch"],
state["y_batch"],
) = fm4m.single_modal(
model=selected_models[0],
downstream_model=downstream_model,
params=params,
dataset=dataset,
)
elif task_type == "Regression":
if downstream_model == "Default Settings":
downstream_model = "DefaultRegressor"
params = None
(
result,
state["RMSE"],
state["y_batch_test"],
state["y_prob"],
state["x_batch"],
state["y_batch"],
) = fm4m.single_modal(
model=selected_models[0],
downstream_model=downstream_model,
params=params,
dataset=dataset,
)
except Exception as e:
return f"An error occurred: {e}"
return result or "Data & Model Setting is incorrect"
# Function to handle plot display
def display_plot(plot_type, state):
fig, ax = plt.subplots()
if plot_type == "Latent Space":
x_batch, y_batch = state.get("x_batch"), state.get("y_batch")
ax.set_title("T-SNE Plot")
class_0 = x_batch
class_1 = y_batch
plt.scatter(class_1[:, 0], class_1[:, 1], c='red', label='Class 1')
plt.scatter(class_0[:, 0], class_0[:, 1], c='blue', label='Class 0')
ax.set_xlabel('Feature 1')
ax.set_ylabel('Feature 2')
ax.set_title('Dataset Distribution')
elif plot_type == "ROC-AUC":
roc_auc, fpr, tpr = state.get("roc_auc"), state.get("fpr"), state.get("tpr")
ax.set_title("ROC-AUC Curve")
try:
ax.plot(
fpr,
tpr,
color='darkorange',
lw=2,
label=f'ROC curve (area = {roc_auc:.4f})',
)
ax.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
ax.set_xlim([0.0, 1.0])
ax.set_ylim([0.0, 1.05])
except:
pass
ax.set_xlabel('False Positive Rate')
ax.set_ylabel('True Positive Rate')
ax.set_title('Receiver Operating Characteristic')
ax.legend(loc='lower right')
elif plot_type == "Parity Plot":
RMSE, y_batch_test, y_prob = (
state.get("RMSE"),
state.get("y_batch_test"),
state.get("y_prob"),
)
ax.set_title("Parity plot")
# change format
try:
print(y_batch_test)
print(y_prob)
y_batch_test = np.array(y_batch_test, dtype=float)
y_prob = np.array(y_prob, dtype=float)
ax.scatter(
y_batch_test,
y_prob,
color="blue",
label=f"Predicted vs Actual (RMSE: {RMSE:.4f})",
)
min_val = min(min(y_batch_test), min(y_prob))
max_val = max(max(y_batch_test), max(y_prob))
ax.plot([min_val, max_val], [min_val, max_val], 'r-')
except:
y_batch_test = []
y_prob = []
RMSE = None
print(y_batch_test)
print(y_prob)
ax.set_xlabel('Actual Values')
ax.set_ylabel('Predicted Values')
ax.legend(loc='lower right')
return fig
# Function to handle evaluation and logging
def evaluate_and_log(models, dataset, task_type, eval_output, state):
task_dic = {'Classification': 'CLS', 'Regression': 'RGR'}
result = eval_output.replace(" Score", "")
new_entry = {
"Selected Models": str(models),
"Dataset": dataset,
"Task": task_dic[task_type],
"Result": result,
}
new_entry_df = pd.DataFrame([new_entry])
state["log_df"] = pd.concat([new_entry_df, state["log_df"]])
return state["log_df"]